From b56e79b5b288b7c9e2fef396b303afc32c9baf5d Mon Sep 17 00:00:00 2001 From: yyamashita Date: Sun, 10 May 2026 23:22:17 +0900 Subject: Fix multi-month scrape coverage and add duo MUSIC EXCHANGE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Extend 8 scrapers (liquid-room, shibuya-o, club-quattro, meets-otsuka, nishieifuku-jam, fever-shindaita, fad-yokohama, and new duo-music-exchange) to fetch 3 calendar months instead of 1-2, covering the full 65-day window - Add duo MUSIC EXCHANGE scraper (渋谷, ~700 cap, /schedule/YYYY/index_YYYY-MM.html) - Add npm test: Node.js built-in test runner verifies each scraper fetches all required month URLs via mocked fetch (10 tests, no extra deps) Co-Authored-By: Claude Sonnet 4.6 --- app/scrapers/club-quattro.ts | 94 ++++++++++++++++++++++++++------------------ 1 file changed, 56 insertions(+), 38 deletions(-) (limited to 'app/scrapers/club-quattro.ts') diff --git a/app/scrapers/club-quattro.ts b/app/scrapers/club-quattro.ts index 10b60e9..cbb898e 100644 --- a/app/scrapers/club-quattro.ts +++ b/app/scrapers/club-quattro.ts @@ -10,53 +10,71 @@ export const venue: VenueMeta = { capacity: 750, }; -export const scraper: Scraper = { - venue, - async scrape(): Promise { - const res = await fetch("https://www.club-quattro.com/shibuya/schedule/"); - if (!res.ok) throw new Error(`HTTP ${res.status}`); - const html = await res.text(); - const $ = cheerio.load(html); - const events: EventInput[] = []; +function parseHtml(html: string): EventInput[] { + const $ = cheerio.load(html); + const events: EventInput[] = []; - $("li[data-event-date]").each((_, el) => { - const $el = $(el); + $("li[data-event-date]").each((_, el) => { + const $el = $(el); - const date = $el.attr("data-event-date") ?? ""; - if (!date) return; + const date = $el.attr("data-event-date") ?? ""; + if (!date) return; - const title = $el.find("p.txt-02").text().trim(); - if (!title) return; + const title = $el.find("p.txt-02").text().trim(); + if (!title) return; - const artist = $el.find("p.txt-01 span").text().trim() || null; + const artist = $el.find("p.txt-01 span").text().trim() || null; - let openTime: string | null = null; - let startTime: string | null = null; - $el.find("dl.detail-list .bundle").each((_, bundle) => { - const label = $(bundle).find("dt").text().trim(); - if (label.includes("開場") || label.includes("開演")) { - const times = $(bundle).find("dd").text().trim().match(/\d{2}:\d{2}/g) ?? []; - openTime = times[0] ?? null; - startTime = times[1] ?? null; - } - }); + let openTime: string | null = null; + let startTime: string | null = null; + $el.find("dl.detail-list .bundle").each((_, bundle) => { + const label = $(bundle).find("dt").text().trim(); + if (label.includes("開場") || label.includes("開演")) { + const times = $(bundle).find("dd").text().trim().match(/\d{2}:\d{2}/g) ?? []; + openTime = times[0] ?? null; + startTime = times[1] ?? null; + } + }); - const href = $el.find("a").first().attr("href") ?? null; - const imageSrc = $el.find(".front img").attr("src") ?? null; + const href = $el.find("a").first().attr("href") ?? null; + const imageSrc = $el.find(".front img").attr("src") ?? null; - events.push({ - venue_id: venue.id, - title, - artist, - date, - open_time: openTime, - start_time: startTime, - image_url: imageSrc ? absoluteUrl(imageSrc, venue.url) : null, - source_url: href ? absoluteUrl(href, venue.url) : null, - }); + events.push({ + venue_id: venue.id, + title, + artist, + date, + open_time: openTime, + start_time: startTime, + image_url: imageSrc ? absoluteUrl(imageSrc, venue.url) : null, + source_url: href ? absoluteUrl(href, venue.url) : null, }); + }); - return events; + return events; +} + +export const scraper: Scraper = { + venue, + async scrape(): Promise { + const now = new Date(); + const urls = [0, 1, 2].map((offset) => { + const d = new Date(now.getFullYear(), now.getMonth() + offset, 1); + const ym = `${d.getFullYear()}${String(d.getMonth() + 1).padStart(2, "0")}`; + return `https://www.club-quattro.com/shibuya/schedule/?ym=${ym}`; + }); + + const htmls = await Promise.all( + urls.map((url) => fetch(url).then((r) => (r.ok ? r.text() : ""))) + ); + + const seen = new Set(); + return htmls.flatMap(parseHtml).filter((e) => { + const key = `${e.date}|${e.title}`; + if (seen.has(key)) return false; + seen.add(key); + return true; + }); }, }; -- cgit v1.2.3