summaryrefslogtreecommitdiff
path: root/app/scrapers/liquid-room.ts
blob: b497759ef16e370f3e126f462f2680d8b5656dde (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
/**
 * Liquid Room (恵比寿) — https://www.liquidroom.net/schedule
 *
 * The schedule page lists events with JSON-LD or HTML data.
 * Structure: <div class="p-schedule__item"> contains date, title, etc.
 */
import * as cheerio from "cheerio";
import type { Scraper, VenueMeta } from "./base";
import type { EventInput } from "~/lib/db.server";

export const venue: VenueMeta = {
  id: "liquid-room",
  name: "LIQUID ROOM",
  url: "https://www.liquidroom.net",
  area: "恵比寿",
};

export const scraper: Scraper = {
  venue,
  async scrape(): Promise<EventInput[]> {
    const res = await fetch("https://www.liquidroom.net/schedule");
    if (!res.ok) throw new Error(`HTTP ${res.status}`);
    const html = await res.text();
    const $ = cheerio.load(html);
    const events: EventInput[] = [];

    $("article.p-schedule__item, .schedule-list__item, .c-event-item").each(
      (_, el) => {
        const $el = $(el);

        const title =
          $el.find(".p-schedule__title, .event-title, h3, h2").first().text().trim();
        if (!title) return;

        const dateStr =
          $el.find(".p-schedule__date, .event-date, time").first().text().trim() ||
          $el.find("time").attr("datetime") ||
          "";
        const date = parseJapaneseDate(dateStr);
        if (!date) return;

        const artist =
          $el.find(".p-schedule__artist, .artist").first().text().trim() || null;
        const startTime =
          $el.find(".p-schedule__time, .open-time").first().text().trim().match(/\d{2}:\d{2}/)?.[0] ?? null;
        const ticketUrl =
          $el.find("a[href*='ticket'], a[href*='eplus'], a[href*='pia']").first().attr("href") ?? null;
        const imageUrl =
          $el.find("img").first().attr("src") ?? null;
        const sourceUrl =
          $el.find("a").first().attr("href") ?? null;

        events.push({
          venue_id: venue.id,
          title,
          artist,
          date,
          start_time: startTime,
          ticket_url: ticketUrl,
          image_url: imageUrl ? absoluteUrl(imageUrl, venue.url) : null,
          source_url: sourceUrl ? absoluteUrl(sourceUrl, venue.url) : null,
        });
      }
    );

    return events;
  },
};

function parseJapaneseDate(raw: string): string | null {
  // Handles "2025.06.15" "2025/06/15" "2025年06月15日" "06.15" formats
  const m =
    raw.match(/(\d{4})[./年](\d{1,2})[./月](\d{1,2})/) ||
    raw.match(/(\d{1,2})[./月](\d{1,2})/);
  if (!m) return null;
  if (m.length === 4) {
    return `${m[1]}-${m[2].padStart(2, "0")}-${m[3].padStart(2, "0")}`;
  }
  const year = new Date().getFullYear();
  return `${year}-${m[1].padStart(2, "0")}-${m[2].padStart(2, "0")}`;
}

function absoluteUrl(url: string, base: string): string {
  if (url.startsWith("http")) return url;
  if (url.startsWith("/")) return base + url;
  return base + "/" + url;
}