diff options
| author | yyamashita <yyamashita@mosquit.one> | 2026-05-10 23:22:17 +0900 |
|---|---|---|
| committer | yyamashita <yyamashita@mosquit.one> | 2026-05-10 23:22:17 +0900 |
| commit | b56e79b5b288b7c9e2fef396b303afc32c9baf5d (patch) | |
| tree | 28080f7f019889659ef1682f4d3661ed9650da54 /app/scrapers/meets-otsuka.ts | |
| parent | 05d2b35a85a46dde9a1264d3002ba86e02e3d5eb (diff) | |
Fix multi-month scrape coverage and add duo MUSIC EXCHANGE
- Extend 8 scrapers (liquid-room, shibuya-o, club-quattro, meets-otsuka,
nishieifuku-jam, fever-shindaita, fad-yokohama, and new duo-music-exchange)
to fetch 3 calendar months instead of 1-2, covering the full 65-day window
- Add duo MUSIC EXCHANGE scraper (渋谷, ~700 cap, /schedule/YYYY/index_YYYY-MM.html)
- Add npm test: Node.js built-in test runner verifies each scraper fetches
all required month URLs via mocked fetch (10 tests, no extra deps)
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
Diffstat (limited to 'app/scrapers/meets-otsuka.ts')
| -rw-r--r-- | app/scrapers/meets-otsuka.ts | 116 |
1 files changed, 66 insertions, 50 deletions
diff --git a/app/scrapers/meets-otsuka.ts b/app/scrapers/meets-otsuka.ts index 0b56251..0acc925 100644 --- a/app/scrapers/meets-otsuka.ts +++ b/app/scrapers/meets-otsuka.ts @@ -21,60 +21,76 @@ export const venue: VenueMeta = { capacity: 100, }; +function parseHtml(html: string): EventInput[] { + const $ = cheerio.load(html); + const events: EventInput[] = []; + + $("div.blog-entry.event-wrap").each((_, el) => { + const $el = $(el); + + const date = $el.attr("event-date") ?? ""; + if (!date.match(/^\d{4}-\d{2}-\d{2}$/)) return; + + const $link = $el.find("h2 a").first(); + const title = $link.text().trim(); + if (!title) return; + + const detailPath = $link.attr("href") ?? null; + const sourceUrl = detailPath ? `${venue.url}${detailPath}` : null; + + const artist = $el.find("p.act span").map((_, s) => $(s).text().trim()).get().join("、") || null; + + const timeText = $el.find("p.time").first().text(); + const openMatch = timeText.match(/OPEN\s*(\d{2}:\d{2})/i); + const startMatch = timeText.match(/START\s*(\d{2}:\d{2})/i); + + const price = $el.find("span.ticket-price__label").first().text().trim() || null; + + const bgStyle = $el.find("div.image-bg").attr("style") ?? ""; + const imgMatch = bgStyle.match(/url\(["']?([^"')]+)["']?\)/); + const imageUrl = imgMatch?.[1] ?? null; + + const ticketUrl = + $el.find("a[href*='livepocket'], a[href*='eplus'], a[href*='pia'], a[href*='ticket']") + .first().attr("href") ?? null; + + events.push({ + venue_id: venue.id, + title, + artist, + date, + open_time: openMatch?.[1] ?? null, + start_time: startMatch?.[1] ?? null, + price, + ticket_url: ticketUrl, + image_url: imageUrl, + source_url: sourceUrl, + }); + }); + + return events; +} + export const scraper: Scraper = { venue, async scrape(): Promise<EventInput[]> { - const res = await fetch("https://meets.rinky.info/events"); - if (!res.ok) throw new Error(`HTTP ${res.status}`); - const $ = cheerio.load(await res.text()); - const events: EventInput[] = []; - - $("div.blog-entry.event-wrap").each((_, el) => { - const $el = $(el); - - const date = $el.attr("event-date") ?? ""; - if (!date.match(/^\d{4}-\d{2}-\d{2}$/)) return; - - const $link = $el.find("h2 a").first(); - const title = $link.text().trim(); - if (!title) return; - - const detailPath = $link.attr("href") ?? null; - const sourceUrl = detailPath - ? `${venue.url}${detailPath}` - : null; - - const artist = $el.find("p.act span").map((_, s) => $(s).text().trim()).get().join("、") || null; - - const timeText = $el.find("p.time").first().text(); - const openMatch = timeText.match(/OPEN\s*(\d{2}:\d{2})/i); - const startMatch = timeText.match(/START\s*(\d{2}:\d{2})/i); - - const price = $el.find("span.ticket-price__label").first().text().trim() || null; - - // background-image: url("...") - const bgStyle = $el.find("div.image-bg").attr("style") ?? ""; - const imgMatch = bgStyle.match(/url\(["']?([^"')]+)["']?\)/); - const imageUrl = imgMatch?.[1] ?? null; - - const ticketUrl = - $el.find("a[href*='livepocket'], a[href*='eplus'], a[href*='pia'], a[href*='ticket']") - .first().attr("href") ?? null; - - events.push({ - venue_id: venue.id, - title, - artist, - date, - open_time: openMatch?.[1] ?? null, - start_time: startMatch?.[1] ?? null, - price, - ticket_url: ticketUrl, - image_url: imageUrl, - source_url: sourceUrl, - }); + const now = new Date(); + const urls = [0, 1, 2].map((offset) => { + const d = new Date(now.getFullYear(), now.getMonth() + offset, 1); + const ym = `${d.getFullYear()}/${String(d.getMonth() + 1).padStart(2, "0")}`; + return `https://meets.rinky.info/events?date=${encodeURIComponent(ym)}`; }); - return events; + const htmls = await Promise.all( + urls.map((url) => fetch(url).then((r) => (r.ok ? r.text() : ""))) + ); + + const seen = new Set<string>(); + return htmls.flatMap(parseHtml).filter((e) => { + const key = `${e.date}|${e.title}`; + if (seen.has(key)) return false; + seen.add(key); + return true; + }); }, }; |
