summaryrefslogtreecommitdiff
path: root/app/scrapers/mod-shibasaki.ts
blob: 0e2a96b452b9b65a8a0ad44ba62ec21127f3bc6d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
/**
 * shibasaki mod (調布市柴崎) — https://shibasakimod.com/schedule
 *
 * Squarespace イベントリスト。静的 HTML として配信される。
 * 一覧ページに出演者情報はないため、各イベントの詳細ページを並列取得する。
 * DOM 構造 (一覧):
 *   <article class="eventlist-event eventlist-event--upcoming">
 *     <time class="event-date" datetime="YYYY-MM-DD">
 *     <h1 class="eventlist-title"><a class="eventlist-title-link" href="/schedule/YYYYMMDD">
 * DOM 構造 (詳細):
 *   <div class="sqs-html-content"><p style="white-space:pre-wrap;">
 *     ...
 *     live:\nアーティスト名\n... (または 出演:)
 */
import * as cheerio from "cheerio";
import type { Scraper, VenueMeta } from "./base";
import type { EventInput } from "~/lib/db.server";

export const venue: VenueMeta = {
  id: "mod-shibasaki",
  name: "shibasaki mod",
  url: "https://shibasakimod.com",
  area: "柴崎",
};

const SCHEDULE_URL = "https://shibasakimod.com/schedule";

interface EventStub {
  date: string;
  title: string;
  startTime: string | null;
  sourceUrl: string | null;
  imageUrl: string | null;
}

async function fetchArtist(url: string): Promise<string | null> {
  try {
    const res = await fetch(url);
    if (!res.ok) return null;
    const $ = cheerio.load(await res.text());
    // Insert newlines at block boundaries before extracting text
    $(".sqs-html-content br").replaceWith("\n");
    $(".sqs-html-content p").each((_, el) => {
      $(el).append("\n");
    });
    const text = $(".sqs-html-content").text();
    const extractArtists = (section: string): string[] => {
      const artists: string[] = [];
      for (const raw of section.split(/\n/)) {
        const l = raw.replace(/\s*@\S+/g, "").replace(/^[・•]\s*/, "").trim();
        if (!l) continue;
        if (/^https?:\/\//i.test(l)) continue; // skip social links between artists
        if (/\d{1,2}:\d{2}|[¥¥]|yen|ticket|チケット|予約|adv|door/i.test(l)) break;
        artists.push(l);
      }
      return artists;
    };

    const match = text.match(/(?:live|出演|act)[::]\s*([\s\S]+)/i);
    if (match) {
      const artists = extractArtists(match[1]);
      if (artists.length > 0) return artists.join(" / ");
    }
    // Fallback: find the first line starting with ・ and extract from there
    const lines = text.split(/\n/);
    const firstBullet = lines.findIndex((l) => /^・/.test(l.trim()));
    if (firstBullet >= 0) {
      const artists = extractArtists(lines.slice(firstBullet).join("\n"));
      if (artists.length > 0) return artists.join(" / ");
    }
    return null;
  } catch {
    return null;
  }
}

export const scraper: Scraper = {
  venue,
  async scrape(): Promise<EventInput[]> {
    const res = await fetch(SCHEDULE_URL);
    if (!res.ok) throw new Error(`HTTP ${res.status}`);
    const $ = cheerio.load(await res.text());
    const stubs: EventStub[] = [];

    $("article.eventlist-event--upcoming").each((_, el) => {
      const $el = $(el);

      const date = $el.find("time.event-date").first().attr("datetime") ?? "";
      if (!date.match(/^\d{4}-\d{2}-\d{2}$/)) return;

      const title = $el.find("h1.eventlist-title a, h2.eventlist-title a").first().text().trim();
      if (!title) return;

      const startTime = $el.find("time.event-time-localized-start").first().text().trim() || null;

      const relHref = $el.find("a.eventlist-title-link").first().attr("href") ?? null;
      const sourceUrl = relHref
        ? (relHref.startsWith("http") ? relHref : `${venue.url}${relHref}`)
        : null;

      const imageUrl =
        $el.find("img[data-src]").first().attr("data-src") ??
        $el.find("img[src]").first().attr("src") ?? null;

      stubs.push({ date, title, startTime, sourceUrl, imageUrl });
    });

    const artists = await Promise.all(
      stubs.map((s) => (s.sourceUrl ? fetchArtist(s.sourceUrl) : Promise.resolve(null)))
    );

    return stubs.map((s, i) => ({
      venue_id: venue.id,
      title: s.title,
      date: s.date,
      open_time: null,
      start_time: s.startTime,
      price: null,
      image_url: s.imageUrl,
      source_url: s.sourceUrl,
      artist: artists[i],
    }));
  },
};