summaryrefslogtreecommitdiff
path: root/app/scrapers/buzzfront-yokohama.ts
blob: d206e1dc80a9c054cfddf5477a45c4d8a72c59a8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
/**
 * BuzzFront YOKOHAMA — https://buzzfront.net
 *
 * meets-otsuka と同じ omatsuri.tech プラットフォーム。構造:
 *   <div class="blog-entry event-wrap" event-date="YYYY-MM-DD">
 *     <h2><a href="/events/ID">タイトル</a></h2>
 *     <p class="act"><span>アーティスト</span></p>
 *     <p class="time">OPEN 19:00 / START 19:30</p>
 *     <span class="ticket-price__label">前売券(税込)</span>
 *     <span class="ticket-price__amount">3,000円</span>
 *     <div class="image-bg" style="background-image: url(...)">
 */
import * as cheerio from "cheerio";
import type { Scraper, VenueMeta } from "./base";
import type { EventInput } from "~/lib/db.server";

export const venue: VenueMeta = {
  id: "buzzfront-yokohama",
  name: "BuzzFront YOKOHAMA",
  url: "https://buzzfront.net",
  area: "横浜",
  capacity: 300,
};

function parseHtml(html: string): EventInput[] {
  const $ = cheerio.load(html);
  const events: EventInput[] = [];

  $("div.blog-entry.event-wrap").each((_, el) => {
    const $el = $(el);

    const date = $el.attr("event-date") ?? "";
    if (!date.match(/^\d{4}-\d{2}-\d{2}$/)) return;

    const $link = $el.find("h2 a").first();
    const title = $link.text().trim();
    if (!title) return;

    const detailPath = $link.attr("href") ?? null;
    const sourceUrl = detailPath ? `${venue.url}${detailPath}` : null;

    const artist =
      $el.find("p.act span").map((_, s) => $(s).text().trim()).get().join("、") || null;

    const timeText = $el.find("p.time").first().text();
    const openMatch = timeText.match(/OPEN\s*(\d{2}:\d{2})/i);
    const startMatch = timeText.match(/START\s*(\d{2}:\d{2})/i);

    const advLabel = $el.find("span.ticket-price__label").filter((_, s) =>
      $(s).text().includes("前売")
    ).first();
    const advAmount = advLabel.next("span.ticket-price__amount").text().trim();
    const doorLabel = $el.find("span.ticket-price__label").filter((_, s) =>
      $(s).text().includes("当日")
    ).first();
    const doorAmount = doorLabel.next("span.ticket-price__amount").text().trim();
    const price =
      advAmount && doorAmount
        ? `前売 ${advAmount} / 当日 ${doorAmount}`
        : advAmount || doorAmount || null;

    const bgStyle = $el.find("div.image-bg").attr("style") ?? "";
    const imgMatch = bgStyle.match(/url\(["']?([^"')]+)["']?\)/);
    const imageUrl = imgMatch?.[1] ?? null;

    const ticketUrl =
      $el
        .find("a[href*='livepocket'], a[href*='eplus'], a[href*='pia'], a[href*='tiget'], a[href*='ticket']")
        .first()
        .attr("href") ?? null;

    events.push({
      venue_id: venue.id,
      title,
      artist,
      date,
      open_time: openMatch?.[1] ?? null,
      start_time: startMatch?.[1] ?? null,
      price,
      ticket_url: ticketUrl,
      image_url: imageUrl,
      source_url: sourceUrl,
    });
  });

  return events;
}

export const scraper: Scraper = {
  venue,
  async scrape(): Promise<EventInput[]> {
    const now = new Date();
    const urls = [0, 1, 2].map((offset) => {
      const d = new Date(now.getFullYear(), now.getMonth() + offset, 1);
      const ym = `${d.getFullYear()}/${String(d.getMonth() + 1).padStart(2, "0")}`;
      return `https://buzzfront.net/events?date=${encodeURIComponent(ym)}`;
    });

    const htmls = await Promise.all(
      urls.map((url) => fetch(url).then((r) => (r.ok ? r.text() : "")))
    );

    const seen = new Set<string>();
    return htmls.flatMap(parseHtml).filter((e) => {
      const key = `${e.date}|${e.title}`;
      if (seen.has(key)) return false;
      seen.add(key);
      return true;
    });
  },
};