From b56e79b5b288b7c9e2fef396b303afc32c9baf5d Mon Sep 17 00:00:00 2001 From: yyamashita Date: Sun, 10 May 2026 23:22:17 +0900 Subject: Fix multi-month scrape coverage and add duo MUSIC EXCHANGE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Extend 8 scrapers (liquid-room, shibuya-o, club-quattro, meets-otsuka, nishieifuku-jam, fever-shindaita, fad-yokohama, and new duo-music-exchange) to fetch 3 calendar months instead of 1-2, covering the full 65-day window - Add duo MUSIC EXCHANGE scraper (渋谷, ~700 cap, /schedule/YYYY/index_YYYY-MM.html) - Add npm test: Node.js built-in test runner verifies each scraper fetches all required month URLs via mocked fetch (10 tests, no extra deps) Co-Authored-By: Claude Sonnet 4.6 --- app/scrapers/shibuya-o.ts | 45 +++++++++++++++++++-------------------------- 1 file changed, 19 insertions(+), 26 deletions(-) (limited to 'app/scrapers/shibuya-o.ts') diff --git a/app/scrapers/shibuya-o.ts b/app/scrapers/shibuya-o.ts index c674cfc..6d394ff 100644 --- a/app/scrapers/shibuya-o.ts +++ b/app/scrapers/shibuya-o.ts @@ -26,42 +26,25 @@ export const venue: VenueMeta = { const SUB_VENUES = ["east", "west", "crest", "nest"]; const BASE = "https://shibuya-o.com"; -async function scrapeVenue(subVenue: string): Promise { - const url = `${BASE}/${subVenue}/schedule/`; +async function scrapeVenueMonth(subVenue: string, year: number, month: number): Promise { + const url = `${BASE}/${subVenue}/schedule/?y=${year}&m=${month}`; const res = await fetch(url); if (!res.ok) throw new Error(`HTTP ${res.status} for ${url}`); const $ = cheerio.load(await res.text()); const events: EventInput[] = []; - // Extract year: try "next" nav link (?y=YYYY&m=MM) - const nextHref = $("a[href*='?y='][href*='&m=']").last().attr("href") ?? ""; - const nextYearMatch = nextHref.match(/[?&]y=(\d{4})/); - const nextMonthMatch = nextHref.match(/[?&]m=(\d{1,2})/); - const currentMonthRaw = $("div.p-schedule__month").first().text().trim(); - const currentMonth = parseInt(currentMonthRaw, 10); - - let year = new Date().getFullYear(); - if (nextYearMatch && nextMonthMatch) { - const nextYear = parseInt(nextYearMatch[1], 10); - const nextMonth = parseInt(nextMonthMatch[1], 10); - // If next month == current month + 1 (normal case), year == nextYear - // If current month == 12 and next month == 1, year == nextYear - 1 - year = nextMonth === currentMonth + 1 ? nextYear : nextYear - 1; - } - $("div.p-scheduled-card").each((_, el) => { const $el = $(el); const dateRaw = $el.find("span.p-scheduled-card__date-item").first().text().trim(); - // "05 / 01" → month=5, day=1 const dateMatch = dateRaw.match(/(\d{1,2})\s*\/\s*(\d{1,2})/); if (!dateMatch) return; - const month = parseInt(dateMatch[1], 10); + const cardMonth = parseInt(dateMatch[1], 10); const day = parseInt(dateMatch[2], 10); - if (!currentMonth || !month) return; - // Handle year rollover (December cards on January page, etc.) - const cardYear = month < currentMonth ? year + 1 : year; - const date = `${cardYear}-${String(month).padStart(2, "0")}-${String(day).padStart(2, "0")}`; + if (!cardMonth) return; + // Handle year rollover (e.g. December page showing January events) + const cardYear = cardMonth < month ? year + 1 : year; + const date = `${cardYear}-${String(cardMonth).padStart(2, "0")}-${String(day).padStart(2, "0")}`; const title = $el.find("span.p-scheduled-card__title-main").first().text().trim(); if (!title) return; @@ -99,12 +82,22 @@ async function scrapeVenue(subVenue: string): Promise { export const scraper: Scraper = { venue, async scrape(): Promise { - const results = await Promise.allSettled(SUB_VENUES.map(scrapeVenue)); + const now = new Date(); + const months = [0, 1, 2].map((offset) => { + const d = new Date(now.getFullYear(), now.getMonth() + offset, 1); + return { year: d.getFullYear(), month: d.getMonth() + 1 }; + }); + + const tasks = SUB_VENUES.flatMap((sub) => + months.map(({ year, month }) => scrapeVenueMonth(sub, year, month)) + ); + const results = await Promise.allSettled(tasks); + const all: EventInput[] = []; for (const r of results) { if (r.status === "fulfilled") all.push(...r.value); } - // Deduplicate by date + title + const seen = new Set(); return all.filter((e) => { const key = `${e.date}|${e.title}`; -- cgit v1.2.3