1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
|
/**
* 吉祥寺 WARP — http://warp.rinky.info/schedules
*
* WordPress カスタムテーマ。年月は:
* <h3>2026<br /><span>05</span></h3>
*
* イベント構造:
* <article id="box-DD-ID" class="schedules-box">
* <section class="date-box[-sun|-sat]">DD<span class="dayofweek">...</span></section>
* <h4>タイトル</h4>
* <section class="notes-wrapper">
* <p>OPEN / START<br/><span class="strong">HH:MM / HH:MM</span></p>
* <p>ADV / DOOR<br/><span class="strong">¥XXXX / ¥XXXX</span></p>
* </section>
* <section class="flyer"><img data-src="..." /></section>
* </article>
*/
import * as cheerio from "cheerio";
import type { Scraper, VenueMeta } from "./base";
import type { EventInput } from "~/lib/db.server";
export const venue: VenueMeta = {
id: "warp-kichijoji",
name: "吉祥寺 WARP",
url: "http://warp.rinky.info",
area: "吉祥寺",
};
export const scraper: Scraper = {
venue,
async scrape(): Promise<EventInput[]> {
const res = await fetch("http://warp.rinky.info/schedules");
if (!res.ok) throw new Error(`HTTP ${res.status}`);
const $ = cheerio.load(await res.text());
const events: EventInput[] = [];
// Extract year + month from <h3>2026<br/><span>05</span></h3>
const h3Text = $("h3").first().text().trim(); // e.g. "2026\n05"
const yearMonthMatch = h3Text.match(/(\d{4})\D*(\d{2})/);
if (!yearMonthMatch) return events;
const year = yearMonthMatch[1];
const month = yearMonthMatch[2];
$("article.schedules-box").each((_, el) => {
const $el = $(el);
// Day from article id: "box-03-23546" → "03"
const id = $el.attr("id") ?? "";
const dayMatch = id.match(/^box-(\d{2})-/);
if (!dayMatch) return;
const day = dayMatch[1];
const date = `${year}-${month}-${day}`;
const title = $el.find("h4").first().text().replace(/<br\s*\/?>/gi, " ").trim();
if (!title) return;
// First notes-wrapper <p> contains OPEN/START times
const $notes = $el.find("section.notes-wrapper p");
const timeStrong = $notes.eq(0).find("span.strong").text().trim();
// e.g. "18:30 / 19:00"
const [openTime, startTime] = timeStrong.split("/").map((s) => s.trim());
// Second <p> contains ADV/DOOR price
const priceStrong = $notes.eq(1).find("span.strong").text().trim();
// e.g. "¥3,000 / ¥3,500"
const price = priceStrong !== "TBA / TBA" && priceStrong ? priceStrong : null;
// Image: prefer data-src (lazy), fall back to noscript img src
const $flyer = $el.find("section.flyer img").first();
const rawImg =
$flyer.attr("data-src") ??
$el.find("section.flyer noscript img").first().attr("src") ??
null;
// Strip ShortPixel CDN prefix if present
const imageUrl = rawImg
? rawImg.replace(/^https?:\/\/sp-ao\.shortpixel\.ai\/client\/[^/]+\//, "")
: null;
// Artists in <div class="w-flyer"> separated by <br>
// notes-wrapper and detail-texts are nested inside w-flyer — clone and strip them
const $wFlyer = $el.find("div.w-flyer").first().clone();
$wFlyer.find("section.notes-wrapper, div.detail-texts").remove();
$wFlyer.find("br").replaceWith("\n");
const rawArtist = $wFlyer.text();
const artistLines: string[] = [];
for (const raw of rawArtist.split("\n")) {
const l = raw.trim();
if (!l) {
if (artistLines.length > 0) break; // stop at first blank line after artists
continue;
}
if (/^[■▼◼▶◆]|チケット|ticket|TICKET|予約|http|\d{1,2}:\d{2}|[¥¥]/i.test(l)) break;
artistLines.push(l);
}
const artist = artistLines.length > 0 ? artistLines.join(" / ") : null;
events.push({
venue_id: venue.id,
title,
artist,
date,
open_time: isTime(openTime) ? openTime : null,
start_time: isTime(startTime) ? startTime : null,
price,
ticket_url: $el.find("a[href*='livepocket'], a[href*='eplus'], a[href*='pia']").first().attr("href") ?? null,
image_url: imageUrl,
source_url: null,
});
});
return events;
},
};
function isTime(s: string | undefined): boolean {
return !!s && /^\d{2}:\d{2}$/.test(s.trim());
}
|