summaryrefslogtreecommitdiff
path: root/app/scrapers/shibuya-o.ts
blob: 1ad8d8c327ef1932d927d890e911aedddc546d14 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
/**
 * Shibuya O-East / O-West / O-Crest / O-Nest (渋谷)
 * https://www.shibuya-o.com/schedule/
 *
 * The page uses a unified schedule listing for all O venues.
 */
import * as cheerio from "cheerio";
import type { Scraper, VenueMeta } from "./base";
import type { EventInput } from "~/lib/db.server";

export const venue: VenueMeta = {
  id: "shibuya-o",
  name: "渋谷 O-EAST / O-WEST",
  url: "https://www.shibuya-o.com",
  area: "渋谷",
};

export const scraper: Scraper = {
  venue,
  async scrape(): Promise<EventInput[]> {
    const res = await fetch("https://www.shibuya-o.com/schedule/");
    if (!res.ok) throw new Error(`HTTP ${res.status}`);
    const html = await res.text();
    const $ = cheerio.load(html);
    const events: EventInput[] = [];

    $(".schedule_list li, .c-schedule__item, .event-item").each((_, el) => {
      const $el = $(el);

      const title = $el.find(".schedule_title, .title, h3").first().text().trim();
      if (!title) return;

      const rawDate =
        $el.find(".schedule_date, .date, time").first().text().trim() ||
        $el.find("time").attr("datetime") ||
        "";
      const date = parseJapaneseDate(rawDate);
      if (!date) return;

      const hall = $el.find(".schedule_hall, .hall, .venue-name").first().text().trim() || null;
      const timeText = $el.find(".schedule_time, .time").first().text();
      const openMatch = timeText.match(/OPEN[:: ]*(\d{2}:\d{2})/i);
      const startMatch = timeText.match(/START[:: ]*(\d{2}:\d{2})/i);

      const detailHref = $el.find("a[href]").first().attr("href") ?? null;

      events.push({
        venue_id: venue.id,
        title,
        artist: hall,
        date,
        open_time: openMatch?.[1] ?? null,
        start_time: startMatch?.[1] ?? null,
        ticket_url:
          $el.find("a[href*='eplus'], a[href*='lawson'], a[href*='ticket']").first().attr("href") ?? null,
        image_url: $el.find("img").first().attr("src")
          ? absoluteUrl($el.find("img").first().attr("src")!, venue.url)
          : null,
        source_url: detailHref ? absoluteUrl(detailHref, venue.url) : null,
      });
    });

    return events;
  },
};

function parseJapaneseDate(raw: string): string | null {
  const m =
    raw.match(/(\d{4})[./年](\d{1,2})[./月](\d{1,2})/) ||
    raw.match(/(\d{1,2})[./月](\d{1,2})/);
  if (!m) return null;
  if (m.length === 4) {
    return `${m[1]}-${m[2].padStart(2, "0")}-${m[3].padStart(2, "0")}`;
  }
  const year = new Date().getFullYear();
  return `${year}-${m[1].padStart(2, "0")}-${m[2].padStart(2, "0")}`;
}

function absoluteUrl(url: string, base: string): string {
  if (url.startsWith("http")) return url;
  return url.startsWith("/") ? base + url : `${base}/${url}`;
}