diff options
| author | yyamashita <yyamashita@mosquit.one> | 2026-05-10 23:22:17 +0900 |
|---|---|---|
| committer | yyamashita <yyamashita@mosquit.one> | 2026-05-10 23:22:17 +0900 |
| commit | b56e79b5b288b7c9e2fef396b303afc32c9baf5d (patch) | |
| tree | 28080f7f019889659ef1682f4d3661ed9650da54 /app | |
| parent | 05d2b35a85a46dde9a1264d3002ba86e02e3d5eb (diff) | |
Fix multi-month scrape coverage and add duo MUSIC EXCHANGE
- Extend 8 scrapers (liquid-room, shibuya-o, club-quattro, meets-otsuka,
nishieifuku-jam, fever-shindaita, fad-yokohama, and new duo-music-exchange)
to fetch 3 calendar months instead of 1-2, covering the full 65-day window
- Add duo MUSIC EXCHANGE scraper (渋谷, ~700 cap, /schedule/YYYY/index_YYYY-MM.html)
- Add npm test: Node.js built-in test runner verifies each scraper fetches
all required month URLs via mocked fetch (10 tests, no extra deps)
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
Diffstat (limited to 'app')
| -rw-r--r-- | app/scrapers/club-quattro.ts | 94 | ||||
| -rw-r--r-- | app/scrapers/duo-music-exchange.ts | 103 | ||||
| -rw-r--r-- | app/scrapers/fad-yokohama.ts | 31 | ||||
| -rw-r--r-- | app/scrapers/fever-shindaita.ts | 12 | ||||
| -rw-r--r-- | app/scrapers/index.ts | 2 | ||||
| -rw-r--r-- | app/scrapers/liquid-room.ts | 110 | ||||
| -rw-r--r-- | app/scrapers/meets-otsuka.ts | 116 | ||||
| -rw-r--r-- | app/scrapers/nishieifuku-jam.ts | 113 | ||||
| -rw-r--r-- | app/scrapers/shibuya-o.ts | 45 |
9 files changed, 391 insertions, 235 deletions
diff --git a/app/scrapers/club-quattro.ts b/app/scrapers/club-quattro.ts index 10b60e9..cbb898e 100644 --- a/app/scrapers/club-quattro.ts +++ b/app/scrapers/club-quattro.ts @@ -10,53 +10,71 @@ export const venue: VenueMeta = { capacity: 750, }; -export const scraper: Scraper = { - venue, - async scrape(): Promise<EventInput[]> { - const res = await fetch("https://www.club-quattro.com/shibuya/schedule/"); - if (!res.ok) throw new Error(`HTTP ${res.status}`); - const html = await res.text(); - const $ = cheerio.load(html); - const events: EventInput[] = []; +function parseHtml(html: string): EventInput[] { + const $ = cheerio.load(html); + const events: EventInput[] = []; - $("li[data-event-date]").each((_, el) => { - const $el = $(el); + $("li[data-event-date]").each((_, el) => { + const $el = $(el); - const date = $el.attr("data-event-date") ?? ""; - if (!date) return; + const date = $el.attr("data-event-date") ?? ""; + if (!date) return; - const title = $el.find("p.txt-02").text().trim(); - if (!title) return; + const title = $el.find("p.txt-02").text().trim(); + if (!title) return; - const artist = $el.find("p.txt-01 span").text().trim() || null; + const artist = $el.find("p.txt-01 span").text().trim() || null; - let openTime: string | null = null; - let startTime: string | null = null; - $el.find("dl.detail-list .bundle").each((_, bundle) => { - const label = $(bundle).find("dt").text().trim(); - if (label.includes("開場") || label.includes("開演")) { - const times = $(bundle).find("dd").text().trim().match(/\d{2}:\d{2}/g) ?? []; - openTime = times[0] ?? null; - startTime = times[1] ?? null; - } - }); + let openTime: string | null = null; + let startTime: string | null = null; + $el.find("dl.detail-list .bundle").each((_, bundle) => { + const label = $(bundle).find("dt").text().trim(); + if (label.includes("開場") || label.includes("開演")) { + const times = $(bundle).find("dd").text().trim().match(/\d{2}:\d{2}/g) ?? []; + openTime = times[0] ?? null; + startTime = times[1] ?? null; + } + }); - const href = $el.find("a").first().attr("href") ?? null; - const imageSrc = $el.find(".front img").attr("src") ?? null; + const href = $el.find("a").first().attr("href") ?? null; + const imageSrc = $el.find(".front img").attr("src") ?? null; - events.push({ - venue_id: venue.id, - title, - artist, - date, - open_time: openTime, - start_time: startTime, - image_url: imageSrc ? absoluteUrl(imageSrc, venue.url) : null, - source_url: href ? absoluteUrl(href, venue.url) : null, - }); + events.push({ + venue_id: venue.id, + title, + artist, + date, + open_time: openTime, + start_time: startTime, + image_url: imageSrc ? absoluteUrl(imageSrc, venue.url) : null, + source_url: href ? absoluteUrl(href, venue.url) : null, }); + }); - return events; + return events; +} + +export const scraper: Scraper = { + venue, + async scrape(): Promise<EventInput[]> { + const now = new Date(); + const urls = [0, 1, 2].map((offset) => { + const d = new Date(now.getFullYear(), now.getMonth() + offset, 1); + const ym = `${d.getFullYear()}${String(d.getMonth() + 1).padStart(2, "0")}`; + return `https://www.club-quattro.com/shibuya/schedule/?ym=${ym}`; + }); + + const htmls = await Promise.all( + urls.map((url) => fetch(url).then((r) => (r.ok ? r.text() : ""))) + ); + + const seen = new Set<string>(); + return htmls.flatMap(parseHtml).filter((e) => { + const key = `${e.date}|${e.title}`; + if (seen.has(key)) return false; + seen.add(key); + return true; + }); }, }; diff --git a/app/scrapers/duo-music-exchange.ts b/app/scrapers/duo-music-exchange.ts new file mode 100644 index 0000000..57814ea --- /dev/null +++ b/app/scrapers/duo-music-exchange.ts @@ -0,0 +1,103 @@ +/** + * duo MUSIC EXCHANGE — https://duomusicexchange.com + * + * 月別HTML: /schedule/YYYY/index_YYYY-MM.html + * DOM構造: + * <section id="daybox"> + * <div class="date"><span class="day">01</span></div> + * <div class="sche-details"> + * <span class="artist">アーティスト名</span> + * <span class="details-title">イベントタイトル</span> + * <dl class="row"> + * <dt>OPEN/START</dt><dd>18:00 / 19:00</dd> + * <dt>ADV./DOOR</dt><dd>¥3,000 / ¥3,500</dd> + * <dt>Ticket.</dt><dd><a href="...">...</a></dd> + * </dl> + * </div> + * </section> + */ +import * as cheerio from "cheerio"; +import type { Scraper, VenueMeta } from "./base"; +import type { EventInput } from "~/lib/db.server"; + +export const venue: VenueMeta = { + id: "duo-music-exchange", + name: "duo MUSIC EXCHANGE", + url: "https://duomusicexchange.com", + area: "渋谷", + capacity: 700, +}; + +async function scrapeMonth(year: number, month: number): Promise<EventInput[]> { + const mm = String(month).padStart(2, "0"); + const url = `${venue.url}/schedule/${year}/index_${year}-${mm}.html`; + const res = await fetch(url); + if (!res.ok) return []; + const $ = cheerio.load(await res.text()); + const events: EventInput[] = []; + + $("section#daybox").each((_, el) => { + const $el = $(el); + + const dayStr = $el.find(".date .day").first().text().trim(); + const day = parseInt(dayStr, 10); + if (!day) return; + const date = `${year}-${mm}-${String(day).padStart(2, "0")}`; + + const artist = $el.find(".sche-details .artist").first().text().trim() || null; + const title = $el.find(".sche-details .details-title").first().text().trim(); + if (!title) return; + + let openTime: string | null = null; + let startTime: string | null = null; + let price: string | null = null; + let ticketUrl: string | null = null; + + $el.find("dl.row dt").each((_, dt) => { + const label = $(dt).text().trim(); + const $dd = $(dt).next("dd"); + if (/OPEN/i.test(label)) { + const times = $dd.text().trim().match(/(\d{1,2}:\d{2})/g) ?? []; + openTime = times[0] ?? null; + startTime = times[1] ?? null; + } else if (/ADV/i.test(label)) { + price = $dd.text().trim() || null; + } else if (/Ticket/i.test(label)) { + ticketUrl = $dd.find("a[href]").first().attr("href") ?? null; + } + }); + + const imgSrc = $el.find("img").first().attr("src") ?? null; + const imageUrl = imgSrc + ? (imgSrc.startsWith("http") ? imgSrc : `${venue.url}/schedule/${year}/${imgSrc}`) + : null; + + events.push({ + venue_id: venue.id, + title, + artist, + date, + open_time: openTime, + start_time: startTime, + price, + ticket_url: ticketUrl, + image_url: imageUrl, + source_url: url, + }); + }); + + return events; +} + +export const scraper: Scraper = { + venue, + async scrape(): Promise<EventInput[]> { + const now = new Date(); + const months = [0, 1, 2].map((offset) => { + const d = new Date(now.getFullYear(), now.getMonth() + offset, 1); + return { year: d.getFullYear(), month: d.getMonth() + 1 }; + }); + const results = await Promise.all(months.map(({ year, month }) => scrapeMonth(year, month))); + return results.flat(); + }, +}; diff --git a/app/scrapers/fad-yokohama.ts b/app/scrapers/fad-yokohama.ts index a01ea0d..f8f7cbc 100644 --- a/app/scrapers/fad-yokohama.ts +++ b/app/scrapers/fad-yokohama.ts @@ -151,27 +151,18 @@ function parsePageEvents( export const scraper: Scraper = { venue, async scrape(): Promise<EventInput[]> { - const res = await fetch(SCHEDULE_URL); - if (!res.ok) throw new Error(`HTTP ${res.status}`); - const html = await res.text(); - - const { year, month, nextUrl } = getMonthContext(html); - const events = parsePageEvents(html, year, month, SCHEDULE_URL); - - if (nextUrl) { - const nextRes = await fetch(nextUrl); - if (nextRes.ok) { - const nextHtml = await nextRes.text(); - let nextMonth = month + 1; - let nextYear = year; - if (nextMonth > 12) { - nextMonth = 1; - nextYear++; - } - events.push(...parsePageEvents(nextHtml, nextYear, nextMonth, nextUrl)); - } + const allEvents: EventInput[] = []; + let url: string | null = SCHEDULE_URL; + + for (let page = 0; page < 3 && url; page++) { + const res = await fetch(url); + if (!res.ok) break; + const html = await res.text(); + const { year, month, nextUrl } = getMonthContext(html); + allEvents.push(...parsePageEvents(html, year, month, url)); + url = nextUrl; } - return events; + return allEvents; }, }; diff --git a/app/scrapers/fever-shindaita.ts b/app/scrapers/fever-shindaita.ts index 62c2e2c..6356343 100644 --- a/app/scrapers/fever-shindaita.ts +++ b/app/scrapers/fever-shindaita.ts @@ -108,11 +108,11 @@ export const scraper: Scraper = { venue, async scrape(): Promise<EventInput[]> { const now = new Date(); - const thisMonth = `${now.getFullYear()}-${String(now.getMonth() + 1).padStart(2, "0")}`; - const next = new Date(now.getFullYear(), now.getMonth() + 1, 1); - const nextMonth = `${next.getFullYear()}-${String(next.getMonth() + 1).padStart(2, "0")}`; - - const [a, b] = await Promise.all([scrapeMonth(thisMonth), scrapeMonth(nextMonth)]); - return [...a, ...b]; + const months = [0, 1, 2].map((offset) => { + const d = new Date(now.getFullYear(), now.getMonth() + offset, 1); + return `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, "0")}`; + }); + const results = await Promise.all(months.map(scrapeMonth)); + return results.flat(); }, }; diff --git a/app/scrapers/index.ts b/app/scrapers/index.ts index c38816f..e812626 100644 --- a/app/scrapers/index.ts +++ b/app/scrapers/index.ts @@ -20,6 +20,7 @@ import { scraper as warpKichijoji } from "./warp-kichijoji"; import { scraper as pitbarNishiogikubo } from "./pitbar-nishiogikubo"; import { scraper as naveyFloor } from "./navey-floor"; import { scraper as shimokitazawaEra } from "./shimokitazawa-era"; +import { scraper as duoMusicExchange } from "./duo-music-exchange"; export const ALL_SCRAPERS: Scraper[] = [ liquidRoom, @@ -39,6 +40,7 @@ export const ALL_SCRAPERS: Scraper[] = [ pitbarNishiogikubo, naveyFloor, shimokitazawaEra, + duoMusicExchange, ]; export type { Scraper } from "./base"; diff --git a/app/scrapers/liquid-room.ts b/app/scrapers/liquid-room.ts index 1eeade6..a1265c8 100644 --- a/app/scrapers/liquid-room.ts +++ b/app/scrapers/liquid-room.ts @@ -10,62 +10,76 @@ export const venue: VenueMeta = { capacity: 1000, }; -export const scraper: Scraper = { - venue, - async scrape(): Promise<EventInput[]> { - const res = await fetch("https://www.liquidroom.net/schedule"); - if (!res.ok) throw new Error(`HTTP ${res.status}`); - const html = await res.text(); - const $ = cheerio.load(html); - const events: EventInput[] = []; +function parseHtml(html: string): EventInput[] { + const $ = cheerio.load(html); + const events: EventInput[] = []; - $("article").each((_, el) => { - const $el = $(el); + $("article").each((_, el) => { + const $el = $(el); - const href = $el.find("a.s_link").attr("href") ?? ""; - // Date is encoded in the URL: e.g. /schedule/eventname_20260501 - const dateMatch = href.match(/_(\d{4})(\d{2})(\d{2})$/); - if (!dateMatch) return; - const date = `${dateMatch[1]}-${dateMatch[2]}-${dateMatch[3]}`; + const href = $el.find("a.s_link").attr("href") ?? ""; + const dateMatch = href.match(/_(\d{4})(\d{2})(\d{2})$/); + if (!dateMatch) return; + const date = `${dateMatch[1]}-${dateMatch[2]}-${dateMatch[3]}`; - const h2 = $el.find("h2").first().text().trim(); - if (!h2) return; + const h2 = $el.find("h2").first().text().trim(); + if (!h2) return; - const subtitle = $el.find("p.subtitle").first().text().trim(); - // h2 is the artist/band name; subtitle (if present) is the event title - const title = subtitle || h2; - const artist = subtitle ? h2 : null; + const subtitle = $el.find("p.subtitle").first().text().trim(); + const title = subtitle || h2; + const artist = subtitle ? h2 : null; - const openTime = - $el - .find("dl") - .filter((_, dl) => $(dl).find("dt").text().includes("OPEN")) - .find("dd") - .text() - .trim() - .match(/\d{2}:\d{2}/)?.[0] ?? null; + const openTime = + $el + .find("dl") + .filter((_, dl) => $(dl).find("dt").text().includes("OPEN")) + .find("dd") + .text() + .trim() + .match(/\d{2}:\d{2}/)?.[0] ?? null; - const startTime = - $el - .find("dl") - .filter((_, dl) => $(dl).find("dt").text().includes("START")) - .find("dd") - .text() - .trim() - .match(/\d{2}:\d{2}/)?.[0] ?? null; + const startTime = + $el + .find("dl") + .filter((_, dl) => $(dl).find("dt").text().includes("START")) + .find("dd") + .text() + .trim() + .match(/\d{2}:\d{2}/)?.[0] ?? null; - events.push({ - venue_id: venue.id, - title, - artist, - date, - open_time: openTime, - start_time: startTime, - image_url: $el.find("div.left img").attr("src") ?? null, - source_url: href, - }); + events.push({ + venue_id: venue.id, + title, + artist, + date, + open_time: openTime, + start_time: startTime, + image_url: $el.find("div.left img").attr("src") ?? null, + source_url: href, }); + }); - return events; + return events; +} + +export const scraper: Scraper = { + venue, + async scrape(): Promise<EventInput[]> { + const now = new Date(); + const urls = [0, 1, 2].map((offset) => { + const d = new Date(now.getFullYear(), now.getMonth() + offset, 1); + return `https://www.liquidroom.net/schedule/${d.getFullYear()}/${String(d.getMonth() + 1).padStart(2, "0")}`; + }); + + const htmls = await Promise.all( + urls.map((url) => fetch(url).then((r) => (r.ok ? r.text() : ""))) + ); + + const seen = new Set<string>(); + return htmls.flatMap(parseHtml).filter((e) => { + if (seen.has(e.source_url ?? e.title)) return false; + seen.add(e.source_url ?? e.title); + return true; + }); }, }; diff --git a/app/scrapers/meets-otsuka.ts b/app/scrapers/meets-otsuka.ts index 0b56251..0acc925 100644 --- a/app/scrapers/meets-otsuka.ts +++ b/app/scrapers/meets-otsuka.ts @@ -21,60 +21,76 @@ export const venue: VenueMeta = { capacity: 100, }; +function parseHtml(html: string): EventInput[] { + const $ = cheerio.load(html); + const events: EventInput[] = []; + + $("div.blog-entry.event-wrap").each((_, el) => { + const $el = $(el); + + const date = $el.attr("event-date") ?? ""; + if (!date.match(/^\d{4}-\d{2}-\d{2}$/)) return; + + const $link = $el.find("h2 a").first(); + const title = $link.text().trim(); + if (!title) return; + + const detailPath = $link.attr("href") ?? null; + const sourceUrl = detailPath ? `${venue.url}${detailPath}` : null; + + const artist = $el.find("p.act span").map((_, s) => $(s).text().trim()).get().join("、") || null; + + const timeText = $el.find("p.time").first().text(); + const openMatch = timeText.match(/OPEN\s*(\d{2}:\d{2})/i); + const startMatch = timeText.match(/START\s*(\d{2}:\d{2})/i); + + const price = $el.find("span.ticket-price__label").first().text().trim() || null; + + const bgStyle = $el.find("div.image-bg").attr("style") ?? ""; + const imgMatch = bgStyle.match(/url\(["']?([^"')]+)["']?\)/); + const imageUrl = imgMatch?.[1] ?? null; + + const ticketUrl = + $el.find("a[href*='livepocket'], a[href*='eplus'], a[href*='pia'], a[href*='ticket']") + .first().attr("href") ?? null; + + events.push({ + venue_id: venue.id, + title, + artist, + date, + open_time: openMatch?.[1] ?? null, + start_time: startMatch?.[1] ?? null, + price, + ticket_url: ticketUrl, + image_url: imageUrl, + source_url: sourceUrl, + }); + }); + + return events; +} + export const scraper: Scraper = { venue, async scrape(): Promise<EventInput[]> { - const res = await fetch("https://meets.rinky.info/events"); - if (!res.ok) throw new Error(`HTTP ${res.status}`); - const $ = cheerio.load(await res.text()); - const events: EventInput[] = []; - - $("div.blog-entry.event-wrap").each((_, el) => { - const $el = $(el); - - const date = $el.attr("event-date") ?? ""; - if (!date.match(/^\d{4}-\d{2}-\d{2}$/)) return; - - const $link = $el.find("h2 a").first(); - const title = $link.text().trim(); - if (!title) return; - - const detailPath = $link.attr("href") ?? null; - const sourceUrl = detailPath - ? `${venue.url}${detailPath}` - : null; - - const artist = $el.find("p.act span").map((_, s) => $(s).text().trim()).get().join("、") || null; - - const timeText = $el.find("p.time").first().text(); - const openMatch = timeText.match(/OPEN\s*(\d{2}:\d{2})/i); - const startMatch = timeText.match(/START\s*(\d{2}:\d{2})/i); - - const price = $el.find("span.ticket-price__label").first().text().trim() || null; - - // background-image: url("...") - const bgStyle = $el.find("div.image-bg").attr("style") ?? ""; - const imgMatch = bgStyle.match(/url\(["']?([^"')]+)["']?\)/); - const imageUrl = imgMatch?.[1] ?? null; - - const ticketUrl = - $el.find("a[href*='livepocket'], a[href*='eplus'], a[href*='pia'], a[href*='ticket']") - .first().attr("href") ?? null; - - events.push({ - venue_id: venue.id, - title, - artist, - date, - open_time: openMatch?.[1] ?? null, - start_time: startMatch?.[1] ?? null, - price, - ticket_url: ticketUrl, - image_url: imageUrl, - source_url: sourceUrl, - }); + const now = new Date(); + const urls = [0, 1, 2].map((offset) => { + const d = new Date(now.getFullYear(), now.getMonth() + offset, 1); + const ym = `${d.getFullYear()}/${String(d.getMonth() + 1).padStart(2, "0")}`; + return `https://meets.rinky.info/events?date=${encodeURIComponent(ym)}`; }); - return events; + const htmls = await Promise.all( + urls.map((url) => fetch(url).then((r) => (r.ok ? r.text() : ""))) + ); + + const seen = new Set<string>(); + return htmls.flatMap(parseHtml).filter((e) => { + const key = `${e.date}|${e.title}`; + if (seen.has(key)) return false; + seen.add(key); + return true; + }); }, }; diff --git a/app/scrapers/nishieifuku-jam.ts b/app/scrapers/nishieifuku-jam.ts index 7408e02..094d5fe 100644 --- a/app/scrapers/nishieifuku-jam.ts +++ b/app/scrapers/nishieifuku-jam.ts @@ -20,57 +20,76 @@ export const venue: VenueMeta = { capacity: 250, }; +function parseHtml(html: string): EventInput[] { + const $ = cheerio.load(html); + const events: EventInput[] = []; + + $("div.blog-entry.event-wrap").each((_, el) => { + const $el = $(el); + + const date = $el.attr("event-date") ?? ""; + if (!date.match(/^\d{4}-\d{2}-\d{2}$/)) return; + + const $link = $el.find("h2 a").first(); + const title = $link.text().trim(); + if (!title) return; + + const detailPath = $link.attr("href") ?? null; + const sourceUrl = detailPath ? `${venue.url}${detailPath}` : null; + + const artist = $el.find("p.act span").map((_, s) => $(s).text().trim()).get().join("、") || null; + + const timeText = $el.find("p.time").first().text(); + const openMatch = timeText.match(/OPEN\s*(\d{2}:\d{2})/i); + const startMatch = timeText.match(/START\s*(\d{2}:\d{2})/i); + + const price = $el.find("span.ticket-price__label").first().text().trim() || null; + + const bgStyle = $el.find("div.image-bg").attr("style") ?? ""; + const imgMatch = bgStyle.match(/url\(["']?([^"')]+)["']?\)/); + const imageUrl = imgMatch?.[1] ?? null; + + const ticketUrl = + $el.find("a[href*='livepocket'], a[href*='eplus'], a[href*='pia'], a[href*='ticket'], a[href*='tiget']") + .first().attr("href") ?? null; + + events.push({ + venue_id: venue.id, + title, + artist, + date, + open_time: openMatch?.[1] ?? null, + start_time: startMatch?.[1] ?? null, + price, + ticket_url: ticketUrl, + image_url: imageUrl, + source_url: sourceUrl, + }); + }); + + return events; +} + export const scraper: Scraper = { venue, async scrape(): Promise<EventInput[]> { - const res = await fetch("https://jam.rinky.info/events"); - if (!res.ok) throw new Error(`HTTP ${res.status}`); - const $ = cheerio.load(await res.text()); - const events: EventInput[] = []; - - $("div.blog-entry.event-wrap").each((_, el) => { - const $el = $(el); - - const date = $el.attr("event-date") ?? ""; - if (!date.match(/^\d{4}-\d{2}-\d{2}$/)) return; - - const $link = $el.find("h2 a").first(); - const title = $link.text().trim(); - if (!title) return; - - const detailPath = $link.attr("href") ?? null; - const sourceUrl = detailPath ? `${venue.url}${detailPath}` : null; - - const artist = $el.find("p.act span").map((_, s) => $(s).text().trim()).get().join("、") || null; - - const timeText = $el.find("p.time").first().text(); - const openMatch = timeText.match(/OPEN\s*(\d{2}:\d{2})/i); - const startMatch = timeText.match(/START\s*(\d{2}:\d{2})/i); - - const price = $el.find("span.ticket-price__label").first().text().trim() || null; - - const bgStyle = $el.find("div.image-bg").attr("style") ?? ""; - const imgMatch = bgStyle.match(/url\(["']?([^"')]+)["']?\)/); - const imageUrl = imgMatch?.[1] ?? null; - - const ticketUrl = - $el.find("a[href*='livepocket'], a[href*='eplus'], a[href*='pia'], a[href*='ticket'], a[href*='tiget']") - .first().attr("href") ?? null; - - events.push({ - venue_id: venue.id, - title, - artist, - date, - open_time: openMatch?.[1] ?? null, - start_time: startMatch?.[1] ?? null, - price, - ticket_url: ticketUrl, - image_url: imageUrl, - source_url: sourceUrl, - }); + const now = new Date(); + const urls = [0, 1, 2].map((offset) => { + const d = new Date(now.getFullYear(), now.getMonth() + offset, 1); + const ym = `${d.getFullYear()}/${String(d.getMonth() + 1).padStart(2, "0")}`; + return `https://jam.rinky.info/events?date=${encodeURIComponent(ym)}`; }); - return events; + const htmls = await Promise.all( + urls.map((url) => fetch(url).then((r) => (r.ok ? r.text() : ""))) + ); + + const seen = new Set<string>(); + return htmls.flatMap(parseHtml).filter((e) => { + const key = `${e.date}|${e.title}`; + if (seen.has(key)) return false; + seen.add(key); + return true; + }); }, }; diff --git a/app/scrapers/shibuya-o.ts b/app/scrapers/shibuya-o.ts index c674cfc..6d394ff 100644 --- a/app/scrapers/shibuya-o.ts +++ b/app/scrapers/shibuya-o.ts @@ -26,42 +26,25 @@ export const venue: VenueMeta = { const SUB_VENUES = ["east", "west", "crest", "nest"]; const BASE = "https://shibuya-o.com"; -async function scrapeVenue(subVenue: string): Promise<EventInput[]> { - const url = `${BASE}/${subVenue}/schedule/`; +async function scrapeVenueMonth(subVenue: string, year: number, month: number): Promise<EventInput[]> { + const url = `${BASE}/${subVenue}/schedule/?y=${year}&m=${month}`; const res = await fetch(url); if (!res.ok) throw new Error(`HTTP ${res.status} for ${url}`); const $ = cheerio.load(await res.text()); const events: EventInput[] = []; - // Extract year: try "next" nav link (?y=YYYY&m=MM) - const nextHref = $("a[href*='?y='][href*='&m=']").last().attr("href") ?? ""; - const nextYearMatch = nextHref.match(/[?&]y=(\d{4})/); - const nextMonthMatch = nextHref.match(/[?&]m=(\d{1,2})/); - const currentMonthRaw = $("div.p-schedule__month").first().text().trim(); - const currentMonth = parseInt(currentMonthRaw, 10); - - let year = new Date().getFullYear(); - if (nextYearMatch && nextMonthMatch) { - const nextYear = parseInt(nextYearMatch[1], 10); - const nextMonth = parseInt(nextMonthMatch[1], 10); - // If next month == current month + 1 (normal case), year == nextYear - // If current month == 12 and next month == 1, year == nextYear - 1 - year = nextMonth === currentMonth + 1 ? nextYear : nextYear - 1; - } - $("div.p-scheduled-card").each((_, el) => { const $el = $(el); const dateRaw = $el.find("span.p-scheduled-card__date-item").first().text().trim(); - // "05 / 01" → month=5, day=1 const dateMatch = dateRaw.match(/(\d{1,2})\s*\/\s*(\d{1,2})/); if (!dateMatch) return; - const month = parseInt(dateMatch[1], 10); + const cardMonth = parseInt(dateMatch[1], 10); const day = parseInt(dateMatch[2], 10); - if (!currentMonth || !month) return; - // Handle year rollover (December cards on January page, etc.) - const cardYear = month < currentMonth ? year + 1 : year; - const date = `${cardYear}-${String(month).padStart(2, "0")}-${String(day).padStart(2, "0")}`; + if (!cardMonth) return; + // Handle year rollover (e.g. December page showing January events) + const cardYear = cardMonth < month ? year + 1 : year; + const date = `${cardYear}-${String(cardMonth).padStart(2, "0")}-${String(day).padStart(2, "0")}`; const title = $el.find("span.p-scheduled-card__title-main").first().text().trim(); if (!title) return; @@ -99,12 +82,22 @@ async function scrapeVenue(subVenue: string): Promise<EventInput[]> { export const scraper: Scraper = { venue, async scrape(): Promise<EventInput[]> { - const results = await Promise.allSettled(SUB_VENUES.map(scrapeVenue)); + const now = new Date(); + const months = [0, 1, 2].map((offset) => { + const d = new Date(now.getFullYear(), now.getMonth() + offset, 1); + return { year: d.getFullYear(), month: d.getMonth() + 1 }; + }); + + const tasks = SUB_VENUES.flatMap((sub) => + months.map(({ year, month }) => scrapeVenueMonth(sub, year, month)) + ); + const results = await Promise.allSettled(tasks); + const all: EventInput[] = []; for (const r of results) { if (r.status === "fulfilled") all.push(...r.value); } - // Deduplicate by date + title + const seen = new Set<string>(); return all.filter((e) => { const key = `${e.date}|${e.title}`; |
