diff options
| author | yyamashita <yyamashita@mosquit.one> | 2026-05-10 23:22:17 +0900 |
|---|---|---|
| committer | yyamashita <yyamashita@mosquit.one> | 2026-05-10 23:22:17 +0900 |
| commit | b56e79b5b288b7c9e2fef396b303afc32c9baf5d (patch) | |
| tree | 28080f7f019889659ef1682f4d3661ed9650da54 /app/scrapers/liquid-room.ts | |
| parent | 05d2b35a85a46dde9a1264d3002ba86e02e3d5eb (diff) | |
Fix multi-month scrape coverage and add duo MUSIC EXCHANGE
- Extend 8 scrapers (liquid-room, shibuya-o, club-quattro, meets-otsuka,
nishieifuku-jam, fever-shindaita, fad-yokohama, and new duo-music-exchange)
to fetch 3 calendar months instead of 1-2, covering the full 65-day window
- Add duo MUSIC EXCHANGE scraper (渋谷, ~700 cap, /schedule/YYYY/index_YYYY-MM.html)
- Add npm test: Node.js built-in test runner verifies each scraper fetches
all required month URLs via mocked fetch (10 tests, no extra deps)
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
Diffstat (limited to 'app/scrapers/liquid-room.ts')
| -rw-r--r-- | app/scrapers/liquid-room.ts | 110 |
1 files changed, 62 insertions, 48 deletions
diff --git a/app/scrapers/liquid-room.ts b/app/scrapers/liquid-room.ts index 1eeade6..a1265c8 100644 --- a/app/scrapers/liquid-room.ts +++ b/app/scrapers/liquid-room.ts @@ -10,62 +10,76 @@ export const venue: VenueMeta = { capacity: 1000, }; -export const scraper: Scraper = { - venue, - async scrape(): Promise<EventInput[]> { - const res = await fetch("https://www.liquidroom.net/schedule"); - if (!res.ok) throw new Error(`HTTP ${res.status}`); - const html = await res.text(); - const $ = cheerio.load(html); - const events: EventInput[] = []; +function parseHtml(html: string): EventInput[] { + const $ = cheerio.load(html); + const events: EventInput[] = []; - $("article").each((_, el) => { - const $el = $(el); + $("article").each((_, el) => { + const $el = $(el); - const href = $el.find("a.s_link").attr("href") ?? ""; - // Date is encoded in the URL: e.g. /schedule/eventname_20260501 - const dateMatch = href.match(/_(\d{4})(\d{2})(\d{2})$/); - if (!dateMatch) return; - const date = `${dateMatch[1]}-${dateMatch[2]}-${dateMatch[3]}`; + const href = $el.find("a.s_link").attr("href") ?? ""; + const dateMatch = href.match(/_(\d{4})(\d{2})(\d{2})$/); + if (!dateMatch) return; + const date = `${dateMatch[1]}-${dateMatch[2]}-${dateMatch[3]}`; - const h2 = $el.find("h2").first().text().trim(); - if (!h2) return; + const h2 = $el.find("h2").first().text().trim(); + if (!h2) return; - const subtitle = $el.find("p.subtitle").first().text().trim(); - // h2 is the artist/band name; subtitle (if present) is the event title - const title = subtitle || h2; - const artist = subtitle ? h2 : null; + const subtitle = $el.find("p.subtitle").first().text().trim(); + const title = subtitle || h2; + const artist = subtitle ? h2 : null; - const openTime = - $el - .find("dl") - .filter((_, dl) => $(dl).find("dt").text().includes("OPEN")) - .find("dd") - .text() - .trim() - .match(/\d{2}:\d{2}/)?.[0] ?? null; + const openTime = + $el + .find("dl") + .filter((_, dl) => $(dl).find("dt").text().includes("OPEN")) + .find("dd") + .text() + .trim() + .match(/\d{2}:\d{2}/)?.[0] ?? null; - const startTime = - $el - .find("dl") - .filter((_, dl) => $(dl).find("dt").text().includes("START")) - .find("dd") - .text() - .trim() - .match(/\d{2}:\d{2}/)?.[0] ?? null; + const startTime = + $el + .find("dl") + .filter((_, dl) => $(dl).find("dt").text().includes("START")) + .find("dd") + .text() + .trim() + .match(/\d{2}:\d{2}/)?.[0] ?? null; - events.push({ - venue_id: venue.id, - title, - artist, - date, - open_time: openTime, - start_time: startTime, - image_url: $el.find("div.left img").attr("src") ?? null, - source_url: href, - }); + events.push({ + venue_id: venue.id, + title, + artist, + date, + open_time: openTime, + start_time: startTime, + image_url: $el.find("div.left img").attr("src") ?? null, + source_url: href, }); + }); - return events; + return events; +} + +export const scraper: Scraper = { + venue, + async scrape(): Promise<EventInput[]> { + const now = new Date(); + const urls = [0, 1, 2].map((offset) => { + const d = new Date(now.getFullYear(), now.getMonth() + offset, 1); + return `https://www.liquidroom.net/schedule/${d.getFullYear()}/${String(d.getMonth() + 1).padStart(2, "0")}`; + }); + + const htmls = await Promise.all( + urls.map((url) => fetch(url).then((r) => (r.ok ? r.text() : ""))) + ); + + const seen = new Set<string>(); + return htmls.flatMap(parseHtml).filter((e) => { + if (seen.has(e.source_url ?? e.title)) return false; + seen.add(e.source_url ?? e.title); + return true; + }); }, }; |
