summaryrefslogtreecommitdiff
path: root/app/scrapers/liquid-room.ts
diff options
context:
space:
mode:
authoryyamashita <yyamashita@mosquit.one>2026-05-10 23:22:17 +0900
committeryyamashita <yyamashita@mosquit.one>2026-05-10 23:22:17 +0900
commitb56e79b5b288b7c9e2fef396b303afc32c9baf5d (patch)
tree28080f7f019889659ef1682f4d3661ed9650da54 /app/scrapers/liquid-room.ts
parent05d2b35a85a46dde9a1264d3002ba86e02e3d5eb (diff)
Fix multi-month scrape coverage and add duo MUSIC EXCHANGE
- Extend 8 scrapers (liquid-room, shibuya-o, club-quattro, meets-otsuka, nishieifuku-jam, fever-shindaita, fad-yokohama, and new duo-music-exchange) to fetch 3 calendar months instead of 1-2, covering the full 65-day window - Add duo MUSIC EXCHANGE scraper (渋谷, ~700 cap, /schedule/YYYY/index_YYYY-MM.html) - Add npm test: Node.js built-in test runner verifies each scraper fetches all required month URLs via mocked fetch (10 tests, no extra deps) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
Diffstat (limited to 'app/scrapers/liquid-room.ts')
-rw-r--r--app/scrapers/liquid-room.ts110
1 files changed, 62 insertions, 48 deletions
diff --git a/app/scrapers/liquid-room.ts b/app/scrapers/liquid-room.ts
index 1eeade6..a1265c8 100644
--- a/app/scrapers/liquid-room.ts
+++ b/app/scrapers/liquid-room.ts
@@ -10,62 +10,76 @@ export const venue: VenueMeta = {
capacity: 1000,
};
-export const scraper: Scraper = {
- venue,
- async scrape(): Promise<EventInput[]> {
- const res = await fetch("https://www.liquidroom.net/schedule");
- if (!res.ok) throw new Error(`HTTP ${res.status}`);
- const html = await res.text();
- const $ = cheerio.load(html);
- const events: EventInput[] = [];
+function parseHtml(html: string): EventInput[] {
+ const $ = cheerio.load(html);
+ const events: EventInput[] = [];
- $("article").each((_, el) => {
- const $el = $(el);
+ $("article").each((_, el) => {
+ const $el = $(el);
- const href = $el.find("a.s_link").attr("href") ?? "";
- // Date is encoded in the URL: e.g. /schedule/eventname_20260501
- const dateMatch = href.match(/_(\d{4})(\d{2})(\d{2})$/);
- if (!dateMatch) return;
- const date = `${dateMatch[1]}-${dateMatch[2]}-${dateMatch[3]}`;
+ const href = $el.find("a.s_link").attr("href") ?? "";
+ const dateMatch = href.match(/_(\d{4})(\d{2})(\d{2})$/);
+ if (!dateMatch) return;
+ const date = `${dateMatch[1]}-${dateMatch[2]}-${dateMatch[3]}`;
- const h2 = $el.find("h2").first().text().trim();
- if (!h2) return;
+ const h2 = $el.find("h2").first().text().trim();
+ if (!h2) return;
- const subtitle = $el.find("p.subtitle").first().text().trim();
- // h2 is the artist/band name; subtitle (if present) is the event title
- const title = subtitle || h2;
- const artist = subtitle ? h2 : null;
+ const subtitle = $el.find("p.subtitle").first().text().trim();
+ const title = subtitle || h2;
+ const artist = subtitle ? h2 : null;
- const openTime =
- $el
- .find("dl")
- .filter((_, dl) => $(dl).find("dt").text().includes("OPEN"))
- .find("dd")
- .text()
- .trim()
- .match(/\d{2}:\d{2}/)?.[0] ?? null;
+ const openTime =
+ $el
+ .find("dl")
+ .filter((_, dl) => $(dl).find("dt").text().includes("OPEN"))
+ .find("dd")
+ .text()
+ .trim()
+ .match(/\d{2}:\d{2}/)?.[0] ?? null;
- const startTime =
- $el
- .find("dl")
- .filter((_, dl) => $(dl).find("dt").text().includes("START"))
- .find("dd")
- .text()
- .trim()
- .match(/\d{2}:\d{2}/)?.[0] ?? null;
+ const startTime =
+ $el
+ .find("dl")
+ .filter((_, dl) => $(dl).find("dt").text().includes("START"))
+ .find("dd")
+ .text()
+ .trim()
+ .match(/\d{2}:\d{2}/)?.[0] ?? null;
- events.push({
- venue_id: venue.id,
- title,
- artist,
- date,
- open_time: openTime,
- start_time: startTime,
- image_url: $el.find("div.left img").attr("src") ?? null,
- source_url: href,
- });
+ events.push({
+ venue_id: venue.id,
+ title,
+ artist,
+ date,
+ open_time: openTime,
+ start_time: startTime,
+ image_url: $el.find("div.left img").attr("src") ?? null,
+ source_url: href,
});
+ });
- return events;
+ return events;
+}
+
+export const scraper: Scraper = {
+ venue,
+ async scrape(): Promise<EventInput[]> {
+ const now = new Date();
+ const urls = [0, 1, 2].map((offset) => {
+ const d = new Date(now.getFullYear(), now.getMonth() + offset, 1);
+ return `https://www.liquidroom.net/schedule/${d.getFullYear()}/${String(d.getMonth() + 1).padStart(2, "0")}`;
+ });
+
+ const htmls = await Promise.all(
+ urls.map((url) => fetch(url).then((r) => (r.ok ? r.text() : "")))
+ );
+
+ const seen = new Set<string>();
+ return htmls.flatMap(parseHtml).filter((e) => {
+ if (seen.has(e.source_url ?? e.title)) return false;
+ seen.add(e.source_url ?? e.title);
+ return true;
+ });
},
};