summaryrefslogtreecommitdiff
path: root/app/scrapers/warp-kichijoji.ts
diff options
context:
space:
mode:
Diffstat (limited to 'app/scrapers/warp-kichijoji.ts')
-rw-r--r--app/scrapers/warp-kichijoji.ts99
1 files changed, 99 insertions, 0 deletions
diff --git a/app/scrapers/warp-kichijoji.ts b/app/scrapers/warp-kichijoji.ts
new file mode 100644
index 0000000..8a828ea
--- /dev/null
+++ b/app/scrapers/warp-kichijoji.ts
@@ -0,0 +1,99 @@
+/**
+ * 吉祥寺 WARP — http://warp.rinky.info/schedules
+ *
+ * WordPress カスタムテーマ。年月は:
+ * <h3>2026<br /><span>05</span></h3>
+ *
+ * イベント構造:
+ * <article id="box-DD-ID" class="schedules-box">
+ * <section class="date-box[-sun|-sat]">DD<span class="dayofweek">...</span></section>
+ * <h4>タイトル</h4>
+ * <section class="notes-wrapper">
+ * <p>OPEN / START<br/><span class="strong">HH:MM / HH:MM</span></p>
+ * <p>ADV / DOOR<br/><span class="strong">¥XXXX / ¥XXXX</span></p>
+ * </section>
+ * <section class="flyer"><img data-src="..." /></section>
+ * </article>
+ */
+import * as cheerio from "cheerio";
+import type { Scraper, VenueMeta } from "./base";
+import type { EventInput } from "~/lib/db.server";
+
+export const venue: VenueMeta = {
+ id: "warp-kichijoji",
+ name: "吉祥寺 WARP",
+ url: "http://warp.rinky.info",
+ area: "吉祥寺",
+};
+
+export const scraper: Scraper = {
+ venue,
+ async scrape(): Promise<EventInput[]> {
+ const res = await fetch("http://warp.rinky.info/schedules");
+ if (!res.ok) throw new Error(`HTTP ${res.status}`);
+ const $ = cheerio.load(await res.text());
+ const events: EventInput[] = [];
+
+ // Extract year + month from <h3>2026<br/><span>05</span></h3>
+ const h3Text = $("h3").first().text().trim(); // e.g. "2026\n05"
+ const yearMonthMatch = h3Text.match(/(\d{4})\D*(\d{2})/);
+ if (!yearMonthMatch) return events;
+ const year = yearMonthMatch[1];
+ const month = yearMonthMatch[2];
+
+ $("article.schedules-box").each((_, el) => {
+ const $el = $(el);
+
+ // Day from article id: "box-03-23546" → "03"
+ const id = $el.attr("id") ?? "";
+ const dayMatch = id.match(/^box-(\d{2})-/);
+ if (!dayMatch) return;
+ const day = dayMatch[1];
+ const date = `${year}-${month}-${day}`;
+
+ const title = $el.find("h4").first().text().replace(/<br\s*\/?>/gi, " ").trim();
+ if (!title) return;
+
+ // First notes-wrapper <p> contains OPEN/START times
+ const $notes = $el.find("section.notes-wrapper p");
+ const timeStrong = $notes.eq(0).find("span.strong").text().trim();
+ // e.g. "18:30 / 19:00"
+ const [openTime, startTime] = timeStrong.split("/").map((s) => s.trim());
+
+ // Second <p> contains ADV/DOOR price
+ const priceStrong = $notes.eq(1).find("span.strong").text().trim();
+ // e.g. "¥3,000 / ¥3,500"
+ const price = priceStrong !== "TBA / TBA" && priceStrong ? priceStrong : null;
+
+ // Image: prefer data-src (lazy), fall back to noscript img src
+ const $flyer = $el.find("section.flyer img").first();
+ const rawImg =
+ $flyer.attr("data-src") ??
+ $el.find("section.flyer noscript img").first().attr("src") ??
+ null;
+ // Strip ShortPixel CDN prefix if present
+ const imageUrl = rawImg
+ ? rawImg.replace(/^https?:\/\/sp-ao\.shortpixel\.ai\/client\/[^/]+\//, "")
+ : null;
+
+ events.push({
+ venue_id: venue.id,
+ title,
+ artist: null,
+ date,
+ open_time: isTime(openTime) ? openTime : null,
+ start_time: isTime(startTime) ? startTime : null,
+ price,
+ ticket_url: $el.find("a[href*='livepocket'], a[href*='eplus'], a[href*='pia']").first().attr("href") ?? null,
+ image_url: imageUrl,
+ source_url: null,
+ });
+ });
+
+ return events;
+ },
+};
+
+function isTime(s: string | undefined): boolean {
+ return !!s && /^\d{2}:\d{2}$/.test(s.trim());
+}