summaryrefslogtreecommitdiff
path: root/app/scrapers/liquid-room.ts
blob: a1265c81e43a126e68e5449c0b12fef0e443c9a5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import * as cheerio from "cheerio";
import type { Scraper, VenueMeta } from "./base";
import type { EventInput } from "~/lib/db.server";

export const venue: VenueMeta = {
  id: "liquid-room",
  name: "LIQUID ROOM",
  url: "https://www.liquidroom.net",
  area: "恵比寿",
  capacity: 1000,
};

function parseHtml(html: string): EventInput[] {
  const $ = cheerio.load(html);
  const events: EventInput[] = [];

  $("article").each((_, el) => {
    const $el = $(el);

    const href = $el.find("a.s_link").attr("href") ?? "";
    const dateMatch = href.match(/_(\d{4})(\d{2})(\d{2})$/);
    if (!dateMatch) return;
    const date = `${dateMatch[1]}-${dateMatch[2]}-${dateMatch[3]}`;

    const h2 = $el.find("h2").first().text().trim();
    if (!h2) return;

    const subtitle = $el.find("p.subtitle").first().text().trim();
    const title = subtitle || h2;
    const artist = subtitle ? h2 : null;

    const openTime =
      $el
        .find("dl")
        .filter((_, dl) => $(dl).find("dt").text().includes("OPEN"))
        .find("dd")
        .text()
        .trim()
        .match(/\d{2}:\d{2}/)?.[0] ?? null;

    const startTime =
      $el
        .find("dl")
        .filter((_, dl) => $(dl).find("dt").text().includes("START"))
        .find("dd")
        .text()
        .trim()
        .match(/\d{2}:\d{2}/)?.[0] ?? null;

    events.push({
      venue_id: venue.id,
      title,
      artist,
      date,
      open_time: openTime,
      start_time: startTime,
      image_url: $el.find("div.left img").attr("src") ?? null,
      source_url: href,
    });
  });

  return events;
}

export const scraper: Scraper = {
  venue,
  async scrape(): Promise<EventInput[]> {
    const now = new Date();
    const urls = [0, 1, 2].map((offset) => {
      const d = new Date(now.getFullYear(), now.getMonth() + offset, 1);
      return `https://www.liquidroom.net/schedule/${d.getFullYear()}/${String(d.getMonth() + 1).padStart(2, "0")}`;
    });

    const htmls = await Promise.all(
      urls.map((url) => fetch(url).then((r) => (r.ok ? r.text() : "")))
    );

    const seen = new Set<string>();
    return htmls.flatMap(parseHtml).filter((e) => {
      if (seen.has(e.source_url ?? e.title)) return false;
      seen.add(e.source_url ?? e.title);
      return true;
    });
  },
};