diff options
| author | yyamashita <yyamashita@mosquit.one> | 2026-05-07 22:06:24 +0900 |
|---|---|---|
| committer | yyamashita <yyamashita@mosquit.one> | 2026-05-07 22:06:24 +0900 |
| commit | b4823d4c124160dcba8e5fed1e424cf2cc12e72c (patch) | |
| tree | 573de8cf82704320b3f308eaa0112e590e5f0aa5 /app/scrapers/www-shibuya.ts | |
| parent | b8537eabe94b24e8530b4c1511456dc94cf8ec4c (diff) | |
Fix scrapers returning 0 events for 4 venues
Rewrote selectors to match actual HTML structure after inspecting each site:
- LIQUID ROOM: article selector, date extracted from URL (_YYYYMMDD suffix)
- WWW/WWW X: article.column selector, month from li.month nav, day rollover detection
- 新宿 LOFT: section.block_schedule_list links, full date from time div.year/month/day
- CLUB QUATTRO: li[data-event-date] with ISO date attribute, jp-label time parsing
Result: 0件 → LIQUID ROOM 25件 / WWW 48件 / LOFT 13件 / QUATTRO 24件
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
Diffstat (limited to 'app/scrapers/www-shibuya.ts')
| -rw-r--r-- | app/scrapers/www-shibuya.ts | 70 |
1 files changed, 32 insertions, 38 deletions
diff --git a/app/scrapers/www-shibuya.ts b/app/scrapers/www-shibuya.ts index 905fc61..d561332 100644 --- a/app/scrapers/www-shibuya.ts +++ b/app/scrapers/www-shibuya.ts @@ -1,6 +1,3 @@ -/** - * WWW / WWW X (渋谷) — https://www-shibuya.jp/schedule/ - */ import * as cheerio from "cheerio"; import type { Scraper, VenueMeta } from "./base"; import type { EventInput } from "~/lib/db.server"; @@ -21,39 +18,48 @@ export const scraper: Scraper = { const $ = cheerio.load(html); const events: EventInput[] = []; - $(".schedule-list li, .p-schedule-item, article").each((_, el) => { + // Month from nav: "202605May" → year=2026, month=5 + const monthText = $("li.month").first().text().trim(); + const monthMatch = monthText.match(/(\d{4})(\d{2})/); + let year = monthMatch ? parseInt(monthMatch[1]) : new Date().getFullYear(); + let month = monthMatch ? parseInt(monthMatch[2]) : new Date().getMonth() + 1; + let prevDay = 0; + + $("article.column").each((_, el) => { const $el = $(el); - const title = $el.find(".schedule-title, .title, h3, h2").first().text().trim(); - if (!title) return; + const day = parseInt($el.find(".date .day").text().trim(), 10); + if (!day) return; - const rawDate = - $el.find(".schedule-date, .date, time").first().text().trim() || - $el.find("time").attr("datetime") || - ""; - const date = parseJapaneseDate(rawDate); - if (!date) return; + // Detect month rollover when day numbers reset + if (prevDay > 0 && day < prevDay) { + month++; + if (month > 12) { + month = 1; + year++; + } + } + prevDay = day; - const timeText = $el.find(".schedule-time, .time").first().text(); - const openMatch = timeText.match(/OPEN\s*(\d{2}:\d{2})/i); - const startMatch = timeText.match(/START\s*(\d{2}:\d{2})/i); + const date = `${year}-${String(month).padStart(2, "0")}-${String(day).padStart(2, "0")}`; + + const title = $el.find("h3").text().trim(); + if (!title) return; - const detailHref = $el.find("a").first().attr("href") ?? null; + const timeText = $el.find(".openstart").text(); + const times = timeText.match(/\d{2}:\d{2}/g) ?? []; + + const href = $el.find("a").first().attr("href") ?? null; events.push({ venue_id: venue.id, title, - artist: $el.find(".artist").first().text().trim() || null, + artist: null, date, - open_time: openMatch?.[1] ?? null, - start_time: startMatch?.[1] ?? null, - ticket_url: - $el.find("a[href*='eplus'], a[href*='pia'], a[href*='ticket']").first().attr("href") ?? null, - image_url: - $el.find("img").first().attr("src") - ? absoluteUrl($el.find("img").first().attr("src")!, venue.url) - : null, - source_url: detailHref ? absoluteUrl(detailHref, venue.url) : null, + open_time: times[0] ?? null, + start_time: times[1] ?? null, + image_url: null, + source_url: href ? absoluteUrl(href, venue.url) : null, }); }); @@ -61,18 +67,6 @@ export const scraper: Scraper = { }, }; -function parseJapaneseDate(raw: string): string | null { - const m = - raw.match(/(\d{4})[./年](\d{1,2})[./月](\d{1,2})/) || - raw.match(/(\d{1,2})[./月](\d{1,2})/); - if (!m) return null; - if (m.length === 4) { - return `${m[1]}-${m[2].padStart(2, "0")}-${m[3].padStart(2, "0")}`; - } - const year = new Date().getFullYear(); - return `${year}-${m[1].padStart(2, "0")}-${m[2].padStart(2, "0")}`; -} - function absoluteUrl(url: string, base: string): string { if (url.startsWith("http")) return url; return url.startsWith("/") ? base + url : `${base}/${url}`; |
