summaryrefslogtreecommitdiff
path: root/app/scrapers/club-quattro.ts
blob: cbb898e0ffafc2a20924b91a825f733adcb8d4b1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import * as cheerio from "cheerio";
import type { Scraper, VenueMeta } from "./base";
import type { EventInput } from "~/lib/db.server";

export const venue: VenueMeta = {
  id: "club-quattro",
  name: "CLUB QUATTRO",
  url: "https://www.club-quattro.com",
  area: "渋谷",
  capacity: 750,
};

function parseHtml(html: string): EventInput[] {
  const $ = cheerio.load(html);
  const events: EventInput[] = [];

  $("li[data-event-date]").each((_, el) => {
    const $el = $(el);

    const date = $el.attr("data-event-date") ?? "";
    if (!date) return;

    const title = $el.find("p.txt-02").text().trim();
    if (!title) return;

    const artist = $el.find("p.txt-01 span").text().trim() || null;

    let openTime: string | null = null;
    let startTime: string | null = null;
    $el.find("dl.detail-list .bundle").each((_, bundle) => {
      const label = $(bundle).find("dt").text().trim();
      if (label.includes("開場") || label.includes("開演")) {
        const times = $(bundle).find("dd").text().trim().match(/\d{2}:\d{2}/g) ?? [];
        openTime = times[0] ?? null;
        startTime = times[1] ?? null;
      }
    });

    const href = $el.find("a").first().attr("href") ?? null;
    const imageSrc = $el.find(".front img").attr("src") ?? null;

    events.push({
      venue_id: venue.id,
      title,
      artist,
      date,
      open_time: openTime,
      start_time: startTime,
      image_url: imageSrc ? absoluteUrl(imageSrc, venue.url) : null,
      source_url: href ? absoluteUrl(href, venue.url) : null,
    });
  });

  return events;
}

export const scraper: Scraper = {
  venue,
  async scrape(): Promise<EventInput[]> {
    const now = new Date();
    const urls = [0, 1, 2].map((offset) => {
      const d = new Date(now.getFullYear(), now.getMonth() + offset, 1);
      const ym = `${d.getFullYear()}${String(d.getMonth() + 1).padStart(2, "0")}`;
      return `https://www.club-quattro.com/shibuya/schedule/?ym=${ym}`;
    });

    const htmls = await Promise.all(
      urls.map((url) => fetch(url).then((r) => (r.ok ? r.text() : "")))
    );

    const seen = new Set<string>();
    return htmls.flatMap(parseHtml).filter((e) => {
      const key = `${e.date}|${e.title}`;
      if (seen.has(key)) return false;
      seen.add(key);
      return true;
    });
  },
};

function absoluteUrl(url: string, base: string): string {
  if (url.startsWith("http")) return url;
  return url.startsWith("/") ? base + url : `${base}/${url}`;
}