summaryrefslogtreecommitdiff
path: root/app/scrapers/warp-kichijoji.ts
blob: 8929fef5d0381cc022f6767c1d9937b14d750fde (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
/**
 * 吉祥寺 WARP — http://warp.rinky.info/schedules
 *
 * WordPress カスタムテーマ。年月は:
 *   <h3>2026<br /><span>05</span></h3>
 *
 * イベント構造:
 *   <article id="box-DD-ID" class="schedules-box">
 *     <section class="date-box[-sun|-sat]">DD<span class="dayofweek">...</span></section>
 *     <h4>タイトル</h4>
 *     <section class="notes-wrapper">
 *       <p>OPEN / START<br/><span class="strong">HH:MM / HH:MM</span></p>
 *       <p>ADV / DOOR<br/><span class="strong">¥XXXX / ¥XXXX</span></p>
 *     </section>
 *     <section class="flyer"><img data-src="..." /></section>
 *   </article>
 */
import * as cheerio from "cheerio";
import type { Scraper, VenueMeta } from "./base";
import type { EventInput } from "~/lib/db.server";

export const venue: VenueMeta = {
  id: "warp-kichijoji",
  name: "吉祥寺 WARP",
  url: "http://warp.rinky.info",
  area: "吉祥寺",
};

export const scraper: Scraper = {
  venue,
  async scrape(): Promise<EventInput[]> {
    const res = await fetch("http://warp.rinky.info/schedules");
    if (!res.ok) throw new Error(`HTTP ${res.status}`);
    const $ = cheerio.load(await res.text());
    const events: EventInput[] = [];

    // Extract year + month from <h3>2026<br/><span>05</span></h3>
    const h3Text = $("h3").first().text().trim(); // e.g. "2026\n05"
    const yearMonthMatch = h3Text.match(/(\d{4})\D*(\d{2})/);
    if (!yearMonthMatch) return events;
    const year = yearMonthMatch[1];
    const month = yearMonthMatch[2];

    $("article.schedules-box").each((_, el) => {
      const $el = $(el);

      // Day from article id: "box-03-23546" → "03"
      const id = $el.attr("id") ?? "";
      const dayMatch = id.match(/^box-(\d{2})-/);
      if (!dayMatch) return;
      const day = dayMatch[1];
      const date = `${year}-${month}-${day}`;

      const title = $el.find("h4").first().text().replace(/<br\s*\/?>/gi, " ").trim();
      if (!title) return;

      // First notes-wrapper <p> contains OPEN/START times
      const $notes = $el.find("section.notes-wrapper p");
      const timeStrong = $notes.eq(0).find("span.strong").text().trim();
      // e.g. "18:30 / 19:00"
      const [openTime, startTime] = timeStrong.split("/").map((s) => s.trim());

      // Second <p> contains ADV/DOOR price
      const priceStrong = $notes.eq(1).find("span.strong").text().trim();
      // e.g. "¥3,000 / ¥3,500"
      const price = priceStrong !== "TBA / TBA" && priceStrong ? priceStrong : null;

      // Image: prefer data-src (lazy), fall back to noscript img src
      const $flyer = $el.find("section.flyer img").first();
      const rawImg =
        $flyer.attr("data-src") ??
        $el.find("section.flyer noscript img").first().attr("src") ??
        null;
      // Strip ShortPixel CDN prefix if present
      const imageUrl = rawImg
        ? rawImg.replace(/^https?:\/\/sp-ao\.shortpixel\.ai\/client\/[^/]+\//, "")
        : null;

      // Artists in <div class="w-flyer"> separated by <br>
      // notes-wrapper and detail-texts are nested inside w-flyer — clone and strip them
      const $wFlyer = $el.find("div.w-flyer").first().clone();
      $wFlyer.find("section.notes-wrapper, div.detail-texts").remove();
      $wFlyer.find("br").replaceWith("\n");
      const rawArtist = $wFlyer.text();
      const artistLines: string[] = [];
      for (const raw of rawArtist.split("\n")) {
        const l = raw.trim();
        if (!l) {
          if (artistLines.length > 0) break; // stop at first blank line after artists
          continue;
        }
        if (/^[■▼◼▶◆]|チケット|ticket|TICKET|予約|http|\d{1,2}:\d{2}|[¥¥]/i.test(l)) break;
        artistLines.push(l);
      }
      const artist = artistLines.length > 0 ? artistLines.join(" / ") : null;

      events.push({
        venue_id: venue.id,
        title,
        artist,
        date,
        open_time: isTime(openTime) ? openTime : null,
        start_time: isTime(startTime) ? startTime : null,
        price,
        ticket_url: $el.find("a[href*='livepocket'], a[href*='eplus'], a[href*='pia']").first().attr("href") ?? null,
        image_url: imageUrl,
        source_url: null,
      });
    });

    return events;
  },
};

function isTime(s: string | undefined): boolean {
  return !!s && /^\d{2}:\d{2}$/.test(s.trim());
}