summaryrefslogtreecommitdiff
path: root/app/scrapers/shibuya-o.ts
blob: 6d394ffeb91379dadf0d0f19491cc96e89228606 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
/**
 * 渋谷 O-EAST / O-WEST / O-Crest / O-nest — https://shibuya-o.com
 *
 * 各ベニューのスケジュールページを個別に取得して統合する。
 * DOM 構造 (共通):
 *   <div class="p-scheduled-card">
 *     <a href="https://shibuya-o.com/{venue}/schedule/{slug}/">
 *     <span class="p-scheduled-card__date-item">05 / 01</span>
 *     <span class="p-scheduled-card__date-open">OPEN 18:00 / START 19:00</span>
 *     <span class="p-scheduled-card__title-main">タイトル</span>
 *     <li class="p-scheduled-card__artist-item">アーティスト</li>
 *   年は nav リンク <a href="/east/schedule/?y=2026&m=6"> から取得。
 */
import * as cheerio from "cheerio";
import type { Scraper, VenueMeta } from "./base";
import type { EventInput } from "~/lib/db.server";

export const venue: VenueMeta = {
  id: "shibuya-o",
  name: "渋谷 O-EAST / O-WEST / O-Crest / O-nest",
  url: "https://shibuya-o.com",
  area: "渋谷",
  capacity: 1300,
};

const SUB_VENUES = ["east", "west", "crest", "nest"];
const BASE = "https://shibuya-o.com";

async function scrapeVenueMonth(subVenue: string, year: number, month: number): Promise<EventInput[]> {
  const url = `${BASE}/${subVenue}/schedule/?y=${year}&m=${month}`;
  const res = await fetch(url);
  if (!res.ok) throw new Error(`HTTP ${res.status} for ${url}`);
  const $ = cheerio.load(await res.text());
  const events: EventInput[] = [];

  $("div.p-scheduled-card").each((_, el) => {
    const $el = $(el);

    const dateRaw = $el.find("span.p-scheduled-card__date-item").first().text().trim();
    const dateMatch = dateRaw.match(/(\d{1,2})\s*\/\s*(\d{1,2})/);
    if (!dateMatch) return;
    const cardMonth = parseInt(dateMatch[1], 10);
    const day = parseInt(dateMatch[2], 10);
    if (!cardMonth) return;
    // Handle year rollover (e.g. December page showing January events)
    const cardYear = cardMonth < month ? year + 1 : year;
    const date = `${cardYear}-${String(cardMonth).padStart(2, "0")}-${String(day).padStart(2, "0")}`;

    const title = $el.find("span.p-scheduled-card__title-main").first().text().trim();
    if (!title) return;

    const openText = $el.find("span.p-scheduled-card__date-open").first().text().trim();
    const openMatch = openText.match(/OPEN\s*(\d{2}:\d{2})/i);
    const startMatch = openText.match(/START\s*(\d{2}:\d{2})/i);

    const artists = $el.find("li.p-scheduled-card__artist-item")
      .map((_, s) => $(s).text().trim()).get().join("、") || null;

    const detailHref = $el.closest("a[href]").attr("href") ??
      $el.find("a[href]").first().attr("href") ?? null;
    const sourceUrl = detailHref
      ? (detailHref.startsWith("http") ? detailHref : `${BASE}${detailHref}`)
      : null;

    const imageUrl = $el.find("figure img").first().attr("src") ?? null;

    events.push({
      venue_id: venue.id,
      title,
      artist: artists,
      date,
      open_time: openMatch?.[1] ?? null,
      start_time: startMatch?.[1] ?? null,
      image_url: imageUrl,
      source_url: sourceUrl,
    });
  });

  return events;
}

export const scraper: Scraper = {
  venue,
  async scrape(): Promise<EventInput[]> {
    const now = new Date();
    const months = [0, 1, 2].map((offset) => {
      const d = new Date(now.getFullYear(), now.getMonth() + offset, 1);
      return { year: d.getFullYear(), month: d.getMonth() + 1 };
    });

    const tasks = SUB_VENUES.flatMap((sub) =>
      months.map(({ year, month }) => scrapeVenueMonth(sub, year, month))
    );
    const results = await Promise.allSettled(tasks);

    const all: EventInput[] = [];
    for (const r of results) {
      if (r.status === "fulfilled") all.push(...r.value);
    }

    const seen = new Set<string>();
    return all.filter((e) => {
      const key = `${e.date}|${e.title}`;
      if (seen.has(key)) return false;
      seen.add(key);
      return true;
    });
  },
};