summaryrefslogtreecommitdiff
path: root/app/scrapers/www-shibuya.ts
blob: 2c850803d7b23dd59e7690c9602da6f61fcbdd9c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import * as cheerio from "cheerio";
import type { Scraper, VenueMeta } from "./base";
import type { EventInput } from "~/lib/db.server";

export const venue: VenueMeta = {
  id: "www-shibuya",
  name: "WWW / WWW X",
  url: "https://www-shibuya.jp",
  area: "渋谷",
  capacity: 700,
};

export const scraper: Scraper = {
  venue,
  async scrape(): Promise<EventInput[]> {
    const res = await fetch("https://www-shibuya.jp/schedule/");
    if (!res.ok) throw new Error(`HTTP ${res.status}`);
    const html = await res.text();
    const $ = cheerio.load(html);
    const events: EventInput[] = [];

    // Month from nav: "202605May" → year=2026, month=5
    const monthText = $("li.month").first().text().trim();
    const monthMatch = monthText.match(/(\d{4})(\d{2})/);
    let year = monthMatch ? parseInt(monthMatch[1]) : new Date().getFullYear();
    let month = monthMatch ? parseInt(monthMatch[2]) : new Date().getMonth() + 1;
    let prevDay = 0;

    $("article.column").each((_, el) => {
      const $el = $(el);

      const day = parseInt($el.find(".date .day").text().trim(), 10);
      if (!day) return;

      // Detect month rollover when day numbers reset
      if (prevDay > 0 && day < prevDay) {
        month++;
        if (month > 12) {
          month = 1;
          year++;
        }
      }
      prevDay = day;

      const date = `${year}-${String(month).padStart(2, "0")}-${String(day).padStart(2, "0")}`;

      const title = $el.find("h3").text().trim();
      if (!title) return;

      const timeText = $el.find(".openstart").text();
      const times = timeText.match(/\d{2}:\d{2}/g) ?? [];

      const href = $el.find("a").first().attr("href") ?? null;

      events.push({
        venue_id: venue.id,
        title,
        artist: null,
        date,
        open_time: times[0] ?? null,
        start_time: times[1] ?? null,
        image_url: null,
        source_url: href ? absoluteUrl(href, venue.url) : null,
      });
    });

    return events;
  },
};

function absoluteUrl(url: string, base: string): string {
  if (url.startsWith("http")) return url;
  return url.startsWith("/") ? base + url : `${base}/${url}`;
}