1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
|
/**
* 吉祥寺 WARP — http://warp.rinky.info/schedules
*
* WordPress カスタムテーマ。年月は:
* <h3>2026<br /><span>05</span></h3>
*
* イベント構造:
* <article id="box-DD-ID" class="schedules-box">
* <section class="date-box[-sun|-sat]">DD<span class="dayofweek">...</span></section>
* <h4>タイトル</h4>
* <section class="notes-wrapper">
* <p>OPEN / START<br/><span class="strong">HH:MM / HH:MM</span></p>
* <p>ADV / DOOR<br/><span class="strong">¥XXXX / ¥XXXX</span></p>
* </section>
* <section class="flyer"><img data-src="..." /></section>
* </article>
*/
import * as cheerio from "cheerio";
import type { Scraper, VenueMeta } from "./base";
import type { EventInput } from "~/lib/db.server";
export const venue: VenueMeta = {
id: "warp-kichijoji",
name: "吉祥寺 WARP",
url: "http://warp.rinky.info",
area: "吉祥寺",
};
export const scraper: Scraper = {
venue,
async scrape(): Promise<EventInput[]> {
const res = await fetch("http://warp.rinky.info/schedules");
if (!res.ok) throw new Error(`HTTP ${res.status}`);
const $ = cheerio.load(await res.text());
const events: EventInput[] = [];
// Extract year + month from <h3>2026<br/><span>05</span></h3>
const h3Text = $("h3").first().text().trim(); // e.g. "2026\n05"
const yearMonthMatch = h3Text.match(/(\d{4})\D*(\d{2})/);
if (!yearMonthMatch) return events;
const year = yearMonthMatch[1];
const month = yearMonthMatch[2];
$("article.schedules-box").each((_, el) => {
const $el = $(el);
// Day from article id: "box-03-23546" → "03"
const id = $el.attr("id") ?? "";
const dayMatch = id.match(/^box-(\d{2})-/);
if (!dayMatch) return;
const day = dayMatch[1];
const date = `${year}-${month}-${day}`;
const title = $el.find("h4").first().text().replace(/<br\s*\/?>/gi, " ").trim();
if (!title) return;
// First notes-wrapper <p> contains OPEN/START times
const $notes = $el.find("section.notes-wrapper p");
const timeStrong = $notes.eq(0).find("span.strong").text().trim();
// e.g. "18:30 / 19:00"
const [openTime, startTime] = timeStrong.split("/").map((s) => s.trim());
// Second <p> contains ADV/DOOR price
const priceStrong = $notes.eq(1).find("span.strong").text().trim();
// e.g. "¥3,000 / ¥3,500"
const price = priceStrong !== "TBA / TBA" && priceStrong ? priceStrong : null;
// Image: prefer data-src (lazy), fall back to noscript img src
const $flyer = $el.find("section.flyer img").first();
const rawImg =
$flyer.attr("data-src") ??
$el.find("section.flyer noscript img").first().attr("src") ??
null;
// Strip ShortPixel CDN prefix if present
const imageUrl = rawImg
? rawImg.replace(/^https?:\/\/sp-ao\.shortpixel\.ai\/client\/[^/]+\//, "")
: null;
events.push({
venue_id: venue.id,
title,
artist: null,
date,
open_time: isTime(openTime) ? openTime : null,
start_time: isTime(startTime) ? startTime : null,
price,
ticket_url: $el.find("a[href*='livepocket'], a[href*='eplus'], a[href*='pia']").first().attr("href") ?? null,
image_url: imageUrl,
source_url: null,
});
});
return events;
},
};
function isTime(s: string | undefined): boolean {
return !!s && /^\d{2}:\d{2}$/.test(s.trim());
}
|