diff options
| -rw-r--r-- | SCRAPE_TARGETS.md | 1 | ||||
| -rw-r--r-- | app/scrapers/club-quattro.ts | 94 | ||||
| -rw-r--r-- | app/scrapers/duo-music-exchange.ts | 103 | ||||
| -rw-r--r-- | app/scrapers/fad-yokohama.ts | 31 | ||||
| -rw-r--r-- | app/scrapers/fever-shindaita.ts | 12 | ||||
| -rw-r--r-- | app/scrapers/index.ts | 2 | ||||
| -rw-r--r-- | app/scrapers/liquid-room.ts | 110 | ||||
| -rw-r--r-- | app/scrapers/meets-otsuka.ts | 116 | ||||
| -rw-r--r-- | app/scrapers/nishieifuku-jam.ts | 113 | ||||
| -rw-r--r-- | app/scrapers/shibuya-o.ts | 45 | ||||
| -rw-r--r-- | package.json | 3 | ||||
| -rw-r--r-- | scripts/test-scrape-window.ts | 265 |
12 files changed, 659 insertions, 236 deletions
diff --git a/SCRAPE_TARGETS.md b/SCRAPE_TARGETS.md index 461573f..941db13 100644 --- a/SCRAPE_TARGETS.md +++ b/SCRAPE_TARGETS.md @@ -24,6 +24,7 @@ | `fever-shindaita` | 新代田 FEVER | 新代田 | https://www.fever-popo.com | [fever-shindaita.ts](app/scrapers/fever-shindaita.ts) | ✅ | | `moon-step-nakano` | 中野 MOON STEP | 中野 | https://nakano-dynamite.com/moonstep | [moon-step-nakano.ts](app/scrapers/moon-step-nakano.ts) | ✅ | | `mod-shibasaki` | shibasaki mod | 柴崎 | https://shibasakimod.com/schedule | [mod-shibasaki.ts](app/scrapers/mod-shibasaki.ts) | ✅ | +| `duo-music-exchange` | duo MUSIC EXCHANGE | 渋谷 | https://duomusicexchange.com/schedule/ | [duo-music-exchange.ts](app/scrapers/duo-music-exchange.ts) | ✅ | ### 状態凡例 - ✅ 動作中 diff --git a/app/scrapers/club-quattro.ts b/app/scrapers/club-quattro.ts index 10b60e9..cbb898e 100644 --- a/app/scrapers/club-quattro.ts +++ b/app/scrapers/club-quattro.ts @@ -10,53 +10,71 @@ export const venue: VenueMeta = { capacity: 750, }; -export const scraper: Scraper = { - venue, - async scrape(): Promise<EventInput[]> { - const res = await fetch("https://www.club-quattro.com/shibuya/schedule/"); - if (!res.ok) throw new Error(`HTTP ${res.status}`); - const html = await res.text(); - const $ = cheerio.load(html); - const events: EventInput[] = []; +function parseHtml(html: string): EventInput[] { + const $ = cheerio.load(html); + const events: EventInput[] = []; - $("li[data-event-date]").each((_, el) => { - const $el = $(el); + $("li[data-event-date]").each((_, el) => { + const $el = $(el); - const date = $el.attr("data-event-date") ?? ""; - if (!date) return; + const date = $el.attr("data-event-date") ?? ""; + if (!date) return; - const title = $el.find("p.txt-02").text().trim(); - if (!title) return; + const title = $el.find("p.txt-02").text().trim(); + if (!title) return; - const artist = $el.find("p.txt-01 span").text().trim() || null; + const artist = $el.find("p.txt-01 span").text().trim() || null; - let openTime: string | null = null; - let startTime: string | null = null; - $el.find("dl.detail-list .bundle").each((_, bundle) => { - const label = $(bundle).find("dt").text().trim(); - if (label.includes("開場") || label.includes("開演")) { - const times = $(bundle).find("dd").text().trim().match(/\d{2}:\d{2}/g) ?? []; - openTime = times[0] ?? null; - startTime = times[1] ?? null; - } - }); + let openTime: string | null = null; + let startTime: string | null = null; + $el.find("dl.detail-list .bundle").each((_, bundle) => { + const label = $(bundle).find("dt").text().trim(); + if (label.includes("開場") || label.includes("開演")) { + const times = $(bundle).find("dd").text().trim().match(/\d{2}:\d{2}/g) ?? []; + openTime = times[0] ?? null; + startTime = times[1] ?? null; + } + }); - const href = $el.find("a").first().attr("href") ?? null; - const imageSrc = $el.find(".front img").attr("src") ?? null; + const href = $el.find("a").first().attr("href") ?? null; + const imageSrc = $el.find(".front img").attr("src") ?? null; - events.push({ - venue_id: venue.id, - title, - artist, - date, - open_time: openTime, - start_time: startTime, - image_url: imageSrc ? absoluteUrl(imageSrc, venue.url) : null, - source_url: href ? absoluteUrl(href, venue.url) : null, - }); + events.push({ + venue_id: venue.id, + title, + artist, + date, + open_time: openTime, + start_time: startTime, + image_url: imageSrc ? absoluteUrl(imageSrc, venue.url) : null, + source_url: href ? absoluteUrl(href, venue.url) : null, }); + }); - return events; + return events; +} + +export const scraper: Scraper = { + venue, + async scrape(): Promise<EventInput[]> { + const now = new Date(); + const urls = [0, 1, 2].map((offset) => { + const d = new Date(now.getFullYear(), now.getMonth() + offset, 1); + const ym = `${d.getFullYear()}${String(d.getMonth() + 1).padStart(2, "0")}`; + return `https://www.club-quattro.com/shibuya/schedule/?ym=${ym}`; + }); + + const htmls = await Promise.all( + urls.map((url) => fetch(url).then((r) => (r.ok ? r.text() : ""))) + ); + + const seen = new Set<string>(); + return htmls.flatMap(parseHtml).filter((e) => { + const key = `${e.date}|${e.title}`; + if (seen.has(key)) return false; + seen.add(key); + return true; + }); }, }; diff --git a/app/scrapers/duo-music-exchange.ts b/app/scrapers/duo-music-exchange.ts new file mode 100644 index 0000000..57814ea --- /dev/null +++ b/app/scrapers/duo-music-exchange.ts @@ -0,0 +1,103 @@ +/** + * duo MUSIC EXCHANGE — https://duomusicexchange.com + * + * 月別HTML: /schedule/YYYY/index_YYYY-MM.html + * DOM構造: + * <section id="daybox"> + * <div class="date"><span class="day">01</span></div> + * <div class="sche-details"> + * <span class="artist">アーティスト名</span> + * <span class="details-title">イベントタイトル</span> + * <dl class="row"> + * <dt>OPEN/START</dt><dd>18:00 / 19:00</dd> + * <dt>ADV./DOOR</dt><dd>¥3,000 / ¥3,500</dd> + * <dt>Ticket.</dt><dd><a href="...">...</a></dd> + * </dl> + * </div> + * </section> + */ +import * as cheerio from "cheerio"; +import type { Scraper, VenueMeta } from "./base"; +import type { EventInput } from "~/lib/db.server"; + +export const venue: VenueMeta = { + id: "duo-music-exchange", + name: "duo MUSIC EXCHANGE", + url: "https://duomusicexchange.com", + area: "渋谷", + capacity: 700, +}; + +async function scrapeMonth(year: number, month: number): Promise<EventInput[]> { + const mm = String(month).padStart(2, "0"); + const url = `${venue.url}/schedule/${year}/index_${year}-${mm}.html`; + const res = await fetch(url); + if (!res.ok) return []; + const $ = cheerio.load(await res.text()); + const events: EventInput[] = []; + + $("section#daybox").each((_, el) => { + const $el = $(el); + + const dayStr = $el.find(".date .day").first().text().trim(); + const day = parseInt(dayStr, 10); + if (!day) return; + const date = `${year}-${mm}-${String(day).padStart(2, "0")}`; + + const artist = $el.find(".sche-details .artist").first().text().trim() || null; + const title = $el.find(".sche-details .details-title").first().text().trim(); + if (!title) return; + + let openTime: string | null = null; + let startTime: string | null = null; + let price: string | null = null; + let ticketUrl: string | null = null; + + $el.find("dl.row dt").each((_, dt) => { + const label = $(dt).text().trim(); + const $dd = $(dt).next("dd"); + if (/OPEN/i.test(label)) { + const times = $dd.text().trim().match(/(\d{1,2}:\d{2})/g) ?? []; + openTime = times[0] ?? null; + startTime = times[1] ?? null; + } else if (/ADV/i.test(label)) { + price = $dd.text().trim() || null; + } else if (/Ticket/i.test(label)) { + ticketUrl = $dd.find("a[href]").first().attr("href") ?? null; + } + }); + + const imgSrc = $el.find("img").first().attr("src") ?? null; + const imageUrl = imgSrc + ? (imgSrc.startsWith("http") ? imgSrc : `${venue.url}/schedule/${year}/${imgSrc}`) + : null; + + events.push({ + venue_id: venue.id, + title, + artist, + date, + open_time: openTime, + start_time: startTime, + price, + ticket_url: ticketUrl, + image_url: imageUrl, + source_url: url, + }); + }); + + return events; +} + +export const scraper: Scraper = { + venue, + async scrape(): Promise<EventInput[]> { + const now = new Date(); + const months = [0, 1, 2].map((offset) => { + const d = new Date(now.getFullYear(), now.getMonth() + offset, 1); + return { year: d.getFullYear(), month: d.getMonth() + 1 }; + }); + const results = await Promise.all(months.map(({ year, month }) => scrapeMonth(year, month))); + return results.flat(); + }, +}; diff --git a/app/scrapers/fad-yokohama.ts b/app/scrapers/fad-yokohama.ts index a01ea0d..f8f7cbc 100644 --- a/app/scrapers/fad-yokohama.ts +++ b/app/scrapers/fad-yokohama.ts @@ -151,27 +151,18 @@ function parsePageEvents( export const scraper: Scraper = { venue, async scrape(): Promise<EventInput[]> { - const res = await fetch(SCHEDULE_URL); - if (!res.ok) throw new Error(`HTTP ${res.status}`); - const html = await res.text(); - - const { year, month, nextUrl } = getMonthContext(html); - const events = parsePageEvents(html, year, month, SCHEDULE_URL); - - if (nextUrl) { - const nextRes = await fetch(nextUrl); - if (nextRes.ok) { - const nextHtml = await nextRes.text(); - let nextMonth = month + 1; - let nextYear = year; - if (nextMonth > 12) { - nextMonth = 1; - nextYear++; - } - events.push(...parsePageEvents(nextHtml, nextYear, nextMonth, nextUrl)); - } + const allEvents: EventInput[] = []; + let url: string | null = SCHEDULE_URL; + + for (let page = 0; page < 3 && url; page++) { + const res = await fetch(url); + if (!res.ok) break; + const html = await res.text(); + const { year, month, nextUrl } = getMonthContext(html); + allEvents.push(...parsePageEvents(html, year, month, url)); + url = nextUrl; } - return events; + return allEvents; }, }; diff --git a/app/scrapers/fever-shindaita.ts b/app/scrapers/fever-shindaita.ts index 62c2e2c..6356343 100644 --- a/app/scrapers/fever-shindaita.ts +++ b/app/scrapers/fever-shindaita.ts @@ -108,11 +108,11 @@ export const scraper: Scraper = { venue, async scrape(): Promise<EventInput[]> { const now = new Date(); - const thisMonth = `${now.getFullYear()}-${String(now.getMonth() + 1).padStart(2, "0")}`; - const next = new Date(now.getFullYear(), now.getMonth() + 1, 1); - const nextMonth = `${next.getFullYear()}-${String(next.getMonth() + 1).padStart(2, "0")}`; - - const [a, b] = await Promise.all([scrapeMonth(thisMonth), scrapeMonth(nextMonth)]); - return [...a, ...b]; + const months = [0, 1, 2].map((offset) => { + const d = new Date(now.getFullYear(), now.getMonth() + offset, 1); + return `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, "0")}`; + }); + const results = await Promise.all(months.map(scrapeMonth)); + return results.flat(); }, }; diff --git a/app/scrapers/index.ts b/app/scrapers/index.ts index c38816f..e812626 100644 --- a/app/scrapers/index.ts +++ b/app/scrapers/index.ts @@ -20,6 +20,7 @@ import { scraper as warpKichijoji } from "./warp-kichijoji"; import { scraper as pitbarNishiogikubo } from "./pitbar-nishiogikubo"; import { scraper as naveyFloor } from "./navey-floor"; import { scraper as shimokitazawaEra } from "./shimokitazawa-era"; +import { scraper as duoMusicExchange } from "./duo-music-exchange"; export const ALL_SCRAPERS: Scraper[] = [ liquidRoom, @@ -39,6 +40,7 @@ export const ALL_SCRAPERS: Scraper[] = [ pitbarNishiogikubo, naveyFloor, shimokitazawaEra, + duoMusicExchange, ]; export type { Scraper } from "./base"; diff --git a/app/scrapers/liquid-room.ts b/app/scrapers/liquid-room.ts index 1eeade6..a1265c8 100644 --- a/app/scrapers/liquid-room.ts +++ b/app/scrapers/liquid-room.ts @@ -10,62 +10,76 @@ export const venue: VenueMeta = { capacity: 1000, }; -export const scraper: Scraper = { - venue, - async scrape(): Promise<EventInput[]> { - const res = await fetch("https://www.liquidroom.net/schedule"); - if (!res.ok) throw new Error(`HTTP ${res.status}`); - const html = await res.text(); - const $ = cheerio.load(html); - const events: EventInput[] = []; +function parseHtml(html: string): EventInput[] { + const $ = cheerio.load(html); + const events: EventInput[] = []; - $("article").each((_, el) => { - const $el = $(el); + $("article").each((_, el) => { + const $el = $(el); - const href = $el.find("a.s_link").attr("href") ?? ""; - // Date is encoded in the URL: e.g. /schedule/eventname_20260501 - const dateMatch = href.match(/_(\d{4})(\d{2})(\d{2})$/); - if (!dateMatch) return; - const date = `${dateMatch[1]}-${dateMatch[2]}-${dateMatch[3]}`; + const href = $el.find("a.s_link").attr("href") ?? ""; + const dateMatch = href.match(/_(\d{4})(\d{2})(\d{2})$/); + if (!dateMatch) return; + const date = `${dateMatch[1]}-${dateMatch[2]}-${dateMatch[3]}`; - const h2 = $el.find("h2").first().text().trim(); - if (!h2) return; + const h2 = $el.find("h2").first().text().trim(); + if (!h2) return; - const subtitle = $el.find("p.subtitle").first().text().trim(); - // h2 is the artist/band name; subtitle (if present) is the event title - const title = subtitle || h2; - const artist = subtitle ? h2 : null; + const subtitle = $el.find("p.subtitle").first().text().trim(); + const title = subtitle || h2; + const artist = subtitle ? h2 : null; - const openTime = - $el - .find("dl") - .filter((_, dl) => $(dl).find("dt").text().includes("OPEN")) - .find("dd") - .text() - .trim() - .match(/\d{2}:\d{2}/)?.[0] ?? null; + const openTime = + $el + .find("dl") + .filter((_, dl) => $(dl).find("dt").text().includes("OPEN")) + .find("dd") + .text() + .trim() + .match(/\d{2}:\d{2}/)?.[0] ?? null; - const startTime = - $el - .find("dl") - .filter((_, dl) => $(dl).find("dt").text().includes("START")) - .find("dd") - .text() - .trim() - .match(/\d{2}:\d{2}/)?.[0] ?? null; + const startTime = + $el + .find("dl") + .filter((_, dl) => $(dl).find("dt").text().includes("START")) + .find("dd") + .text() + .trim() + .match(/\d{2}:\d{2}/)?.[0] ?? null; - events.push({ - venue_id: venue.id, - title, - artist, - date, - open_time: openTime, - start_time: startTime, - image_url: $el.find("div.left img").attr("src") ?? null, - source_url: href, - }); + events.push({ + venue_id: venue.id, + title, + artist, + date, + open_time: openTime, + start_time: startTime, + image_url: $el.find("div.left img").attr("src") ?? null, + source_url: href, }); + }); - return events; + return events; +} + +export const scraper: Scraper = { + venue, + async scrape(): Promise<EventInput[]> { + const now = new Date(); + const urls = [0, 1, 2].map((offset) => { + const d = new Date(now.getFullYear(), now.getMonth() + offset, 1); + return `https://www.liquidroom.net/schedule/${d.getFullYear()}/${String(d.getMonth() + 1).padStart(2, "0")}`; + }); + + const htmls = await Promise.all( + urls.map((url) => fetch(url).then((r) => (r.ok ? r.text() : ""))) + ); + + const seen = new Set<string>(); + return htmls.flatMap(parseHtml).filter((e) => { + if (seen.has(e.source_url ?? e.title)) return false; + seen.add(e.source_url ?? e.title); + return true; + }); }, }; diff --git a/app/scrapers/meets-otsuka.ts b/app/scrapers/meets-otsuka.ts index 0b56251..0acc925 100644 --- a/app/scrapers/meets-otsuka.ts +++ b/app/scrapers/meets-otsuka.ts @@ -21,60 +21,76 @@ export const venue: VenueMeta = { capacity: 100, }; +function parseHtml(html: string): EventInput[] { + const $ = cheerio.load(html); + const events: EventInput[] = []; + + $("div.blog-entry.event-wrap").each((_, el) => { + const $el = $(el); + + const date = $el.attr("event-date") ?? ""; + if (!date.match(/^\d{4}-\d{2}-\d{2}$/)) return; + + const $link = $el.find("h2 a").first(); + const title = $link.text().trim(); + if (!title) return; + + const detailPath = $link.attr("href") ?? null; + const sourceUrl = detailPath ? `${venue.url}${detailPath}` : null; + + const artist = $el.find("p.act span").map((_, s) => $(s).text().trim()).get().join("、") || null; + + const timeText = $el.find("p.time").first().text(); + const openMatch = timeText.match(/OPEN\s*(\d{2}:\d{2})/i); + const startMatch = timeText.match(/START\s*(\d{2}:\d{2})/i); + + const price = $el.find("span.ticket-price__label").first().text().trim() || null; + + const bgStyle = $el.find("div.image-bg").attr("style") ?? ""; + const imgMatch = bgStyle.match(/url\(["']?([^"')]+)["']?\)/); + const imageUrl = imgMatch?.[1] ?? null; + + const ticketUrl = + $el.find("a[href*='livepocket'], a[href*='eplus'], a[href*='pia'], a[href*='ticket']") + .first().attr("href") ?? null; + + events.push({ + venue_id: venue.id, + title, + artist, + date, + open_time: openMatch?.[1] ?? null, + start_time: startMatch?.[1] ?? null, + price, + ticket_url: ticketUrl, + image_url: imageUrl, + source_url: sourceUrl, + }); + }); + + return events; +} + export const scraper: Scraper = { venue, async scrape(): Promise<EventInput[]> { - const res = await fetch("https://meets.rinky.info/events"); - if (!res.ok) throw new Error(`HTTP ${res.status}`); - const $ = cheerio.load(await res.text()); - const events: EventInput[] = []; - - $("div.blog-entry.event-wrap").each((_, el) => { - const $el = $(el); - - const date = $el.attr("event-date") ?? ""; - if (!date.match(/^\d{4}-\d{2}-\d{2}$/)) return; - - const $link = $el.find("h2 a").first(); - const title = $link.text().trim(); - if (!title) return; - - const detailPath = $link.attr("href") ?? null; - const sourceUrl = detailPath - ? `${venue.url}${detailPath}` - : null; - - const artist = $el.find("p.act span").map((_, s) => $(s).text().trim()).get().join("、") || null; - - const timeText = $el.find("p.time").first().text(); - const openMatch = timeText.match(/OPEN\s*(\d{2}:\d{2})/i); - const startMatch = timeText.match(/START\s*(\d{2}:\d{2})/i); - - const price = $el.find("span.ticket-price__label").first().text().trim() || null; - - // background-image: url("...") - const bgStyle = $el.find("div.image-bg").attr("style") ?? ""; - const imgMatch = bgStyle.match(/url\(["']?([^"')]+)["']?\)/); - const imageUrl = imgMatch?.[1] ?? null; - - const ticketUrl = - $el.find("a[href*='livepocket'], a[href*='eplus'], a[href*='pia'], a[href*='ticket']") - .first().attr("href") ?? null; - - events.push({ - venue_id: venue.id, - title, - artist, - date, - open_time: openMatch?.[1] ?? null, - start_time: startMatch?.[1] ?? null, - price, - ticket_url: ticketUrl, - image_url: imageUrl, - source_url: sourceUrl, - }); + const now = new Date(); + const urls = [0, 1, 2].map((offset) => { + const d = new Date(now.getFullYear(), now.getMonth() + offset, 1); + const ym = `${d.getFullYear()}/${String(d.getMonth() + 1).padStart(2, "0")}`; + return `https://meets.rinky.info/events?date=${encodeURIComponent(ym)}`; }); - return events; + const htmls = await Promise.all( + urls.map((url) => fetch(url).then((r) => (r.ok ? r.text() : ""))) + ); + + const seen = new Set<string>(); + return htmls.flatMap(parseHtml).filter((e) => { + const key = `${e.date}|${e.title}`; + if (seen.has(key)) return false; + seen.add(key); + return true; + }); }, }; diff --git a/app/scrapers/nishieifuku-jam.ts b/app/scrapers/nishieifuku-jam.ts index 7408e02..094d5fe 100644 --- a/app/scrapers/nishieifuku-jam.ts +++ b/app/scrapers/nishieifuku-jam.ts @@ -20,57 +20,76 @@ export const venue: VenueMeta = { capacity: 250, }; +function parseHtml(html: string): EventInput[] { + const $ = cheerio.load(html); + const events: EventInput[] = []; + + $("div.blog-entry.event-wrap").each((_, el) => { + const $el = $(el); + + const date = $el.attr("event-date") ?? ""; + if (!date.match(/^\d{4}-\d{2}-\d{2}$/)) return; + + const $link = $el.find("h2 a").first(); + const title = $link.text().trim(); + if (!title) return; + + const detailPath = $link.attr("href") ?? null; + const sourceUrl = detailPath ? `${venue.url}${detailPath}` : null; + + const artist = $el.find("p.act span").map((_, s) => $(s).text().trim()).get().join("、") || null; + + const timeText = $el.find("p.time").first().text(); + const openMatch = timeText.match(/OPEN\s*(\d{2}:\d{2})/i); + const startMatch = timeText.match(/START\s*(\d{2}:\d{2})/i); + + const price = $el.find("span.ticket-price__label").first().text().trim() || null; + + const bgStyle = $el.find("div.image-bg").attr("style") ?? ""; + const imgMatch = bgStyle.match(/url\(["']?([^"')]+)["']?\)/); + const imageUrl = imgMatch?.[1] ?? null; + + const ticketUrl = + $el.find("a[href*='livepocket'], a[href*='eplus'], a[href*='pia'], a[href*='ticket'], a[href*='tiget']") + .first().attr("href") ?? null; + + events.push({ + venue_id: venue.id, + title, + artist, + date, + open_time: openMatch?.[1] ?? null, + start_time: startMatch?.[1] ?? null, + price, + ticket_url: ticketUrl, + image_url: imageUrl, + source_url: sourceUrl, + }); + }); + + return events; +} + export const scraper: Scraper = { venue, async scrape(): Promise<EventInput[]> { - const res = await fetch("https://jam.rinky.info/events"); - if (!res.ok) throw new Error(`HTTP ${res.status}`); - const $ = cheerio.load(await res.text()); - const events: EventInput[] = []; - - $("div.blog-entry.event-wrap").each((_, el) => { - const $el = $(el); - - const date = $el.attr("event-date") ?? ""; - if (!date.match(/^\d{4}-\d{2}-\d{2}$/)) return; - - const $link = $el.find("h2 a").first(); - const title = $link.text().trim(); - if (!title) return; - - const detailPath = $link.attr("href") ?? null; - const sourceUrl = detailPath ? `${venue.url}${detailPath}` : null; - - const artist = $el.find("p.act span").map((_, s) => $(s).text().trim()).get().join("、") || null; - - const timeText = $el.find("p.time").first().text(); - const openMatch = timeText.match(/OPEN\s*(\d{2}:\d{2})/i); - const startMatch = timeText.match(/START\s*(\d{2}:\d{2})/i); - - const price = $el.find("span.ticket-price__label").first().text().trim() || null; - - const bgStyle = $el.find("div.image-bg").attr("style") ?? ""; - const imgMatch = bgStyle.match(/url\(["']?([^"')]+)["']?\)/); - const imageUrl = imgMatch?.[1] ?? null; - - const ticketUrl = - $el.find("a[href*='livepocket'], a[href*='eplus'], a[href*='pia'], a[href*='ticket'], a[href*='tiget']") - .first().attr("href") ?? null; - - events.push({ - venue_id: venue.id, - title, - artist, - date, - open_time: openMatch?.[1] ?? null, - start_time: startMatch?.[1] ?? null, - price, - ticket_url: ticketUrl, - image_url: imageUrl, - source_url: sourceUrl, - }); + const now = new Date(); + const urls = [0, 1, 2].map((offset) => { + const d = new Date(now.getFullYear(), now.getMonth() + offset, 1); + const ym = `${d.getFullYear()}/${String(d.getMonth() + 1).padStart(2, "0")}`; + return `https://jam.rinky.info/events?date=${encodeURIComponent(ym)}`; }); - return events; + const htmls = await Promise.all( + urls.map((url) => fetch(url).then((r) => (r.ok ? r.text() : ""))) + ); + + const seen = new Set<string>(); + return htmls.flatMap(parseHtml).filter((e) => { + const key = `${e.date}|${e.title}`; + if (seen.has(key)) return false; + seen.add(key); + return true; + }); }, }; diff --git a/app/scrapers/shibuya-o.ts b/app/scrapers/shibuya-o.ts index c674cfc..6d394ff 100644 --- a/app/scrapers/shibuya-o.ts +++ b/app/scrapers/shibuya-o.ts @@ -26,42 +26,25 @@ export const venue: VenueMeta = { const SUB_VENUES = ["east", "west", "crest", "nest"]; const BASE = "https://shibuya-o.com"; -async function scrapeVenue(subVenue: string): Promise<EventInput[]> { - const url = `${BASE}/${subVenue}/schedule/`; +async function scrapeVenueMonth(subVenue: string, year: number, month: number): Promise<EventInput[]> { + const url = `${BASE}/${subVenue}/schedule/?y=${year}&m=${month}`; const res = await fetch(url); if (!res.ok) throw new Error(`HTTP ${res.status} for ${url}`); const $ = cheerio.load(await res.text()); const events: EventInput[] = []; - // Extract year: try "next" nav link (?y=YYYY&m=MM) - const nextHref = $("a[href*='?y='][href*='&m=']").last().attr("href") ?? ""; - const nextYearMatch = nextHref.match(/[?&]y=(\d{4})/); - const nextMonthMatch = nextHref.match(/[?&]m=(\d{1,2})/); - const currentMonthRaw = $("div.p-schedule__month").first().text().trim(); - const currentMonth = parseInt(currentMonthRaw, 10); - - let year = new Date().getFullYear(); - if (nextYearMatch && nextMonthMatch) { - const nextYear = parseInt(nextYearMatch[1], 10); - const nextMonth = parseInt(nextMonthMatch[1], 10); - // If next month == current month + 1 (normal case), year == nextYear - // If current month == 12 and next month == 1, year == nextYear - 1 - year = nextMonth === currentMonth + 1 ? nextYear : nextYear - 1; - } - $("div.p-scheduled-card").each((_, el) => { const $el = $(el); const dateRaw = $el.find("span.p-scheduled-card__date-item").first().text().trim(); - // "05 / 01" → month=5, day=1 const dateMatch = dateRaw.match(/(\d{1,2})\s*\/\s*(\d{1,2})/); if (!dateMatch) return; - const month = parseInt(dateMatch[1], 10); + const cardMonth = parseInt(dateMatch[1], 10); const day = parseInt(dateMatch[2], 10); - if (!currentMonth || !month) return; - // Handle year rollover (December cards on January page, etc.) - const cardYear = month < currentMonth ? year + 1 : year; - const date = `${cardYear}-${String(month).padStart(2, "0")}-${String(day).padStart(2, "0")}`; + if (!cardMonth) return; + // Handle year rollover (e.g. December page showing January events) + const cardYear = cardMonth < month ? year + 1 : year; + const date = `${cardYear}-${String(cardMonth).padStart(2, "0")}-${String(day).padStart(2, "0")}`; const title = $el.find("span.p-scheduled-card__title-main").first().text().trim(); if (!title) return; @@ -99,12 +82,22 @@ async function scrapeVenue(subVenue: string): Promise<EventInput[]> { export const scraper: Scraper = { venue, async scrape(): Promise<EventInput[]> { - const results = await Promise.allSettled(SUB_VENUES.map(scrapeVenue)); + const now = new Date(); + const months = [0, 1, 2].map((offset) => { + const d = new Date(now.getFullYear(), now.getMonth() + offset, 1); + return { year: d.getFullYear(), month: d.getMonth() + 1 }; + }); + + const tasks = SUB_VENUES.flatMap((sub) => + months.map(({ year, month }) => scrapeVenueMonth(sub, year, month)) + ); + const results = await Promise.allSettled(tasks); + const all: EventInput[] = []; for (const r of results) { if (r.status === "fulfilled") all.push(...r.value); } - // Deduplicate by date + title + const seen = new Set<string>(); return all.filter((e) => { const key = `${e.date}|${e.title}`; diff --git a/package.json b/package.json index ca597ed..5232259 100644 --- a/package.json +++ b/package.json @@ -7,7 +7,8 @@ "dev": "react-router dev", "start": "react-router-serve ./build/server/index.js", "typecheck": "react-router typegen && tsc", - "scrape": "node --import tsx/esm scripts/scrape.ts" + "scrape": "node --import tsx/esm scripts/scrape.ts", + "test": "node --import tsx/esm --test scripts/test-scrape-window.ts" }, "dependencies": { "@react-router/node": "^7.3.0", diff --git a/scripts/test-scrape-window.ts b/scripts/test-scrape-window.ts new file mode 100644 index 0000000..0bc6fee --- /dev/null +++ b/scripts/test-scrape-window.ts @@ -0,0 +1,265 @@ +/** + * 期間カバレッジテスト + * + * 各スクレーパーが SCRAPE_WINDOW_DAYS(65日)をカバーするために + * 必要な月のURLを全てフェッチしているかを検証する。 + * + * fetch をモックし、呼ばれた URL の月一覧を確認する。 + */ +import { test, describe, beforeEach, afterEach } from "node:test"; +import assert from "node:assert/strict"; + +// ---- テスト用ユーティリティ ---- + +function monthsInWindow(from: Date, windowDays: number): string[] { + const end = new Date(from); + end.setDate(end.getDate() + windowDays); + const months: string[] = []; + const d = new Date(from.getFullYear(), from.getMonth(), 1); + while (d <= end) { + months.push( + `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, "0")}` + ); + d.setMonth(d.getMonth() + 1); + } + return months; +} + +const WINDOW_DAYS = 65; + +/** fetch をモックし、呼ばれた URL 一覧を返す */ +function mockFetch(htmlByUrl: Record<string, string> = {}): { + fetchedUrls: string[]; + restore: () => void; +} { + const fetchedUrls: string[] = []; + const orig = global.fetch; + // @ts-ignore + global.fetch = async (url: string | URL) => { + const s = url.toString(); + fetchedUrls.push(s); + const html = htmlByUrl[s] ?? "<html><body></body></html>"; + return { + ok: true, + status: 200, + text: async () => html, + json: async () => ({ events: [] }), + } as unknown as Response; + }; + return { fetchedUrls, restore: () => { global.fetch = orig; } }; +} + +/** YYYY-MM 文字列が URL に含まれるか確認(複数フォーマット対応) */ +function urlCoversMonth(urls: string[], yyyymm: string): boolean { + const [y, m] = yyyymm.split("-"); + const mInt = parseInt(m, 10).toString(); // "06" → "6" + return urls.some( + (u) => + u.includes(`/${y}/${m}`) || + u.includes(`/${y}-${m}`) || + u.includes(`_${y}-${m}`) || // duo: index_2026-05.html + u.includes(`${y}${m}`) || + u.includes(`y=${y}&m=${mInt}`) || + u.includes(`y=${y}&m=${m}`) || + u.includes(encodeURIComponent(`${y}/${m}`)) || + u.includes(`date=${y}%2F${m}`) + ); +} + +// ---- ウィンドウ計算ロジック テスト ---- + +describe("monthsInWindow", () => { + test("65日窓は最大3ヶ月にまたがる(月初付近)", () => { + // 5/1 から65日 = 7/5 → May, Jun, Jul + const months = monthsInWindow(new Date("2026-05-01"), WINDOW_DAYS); + assert.deepEqual(months, ["2026-05", "2026-06", "2026-07"]); + }); + + test("65日窓は最大3ヶ月にまたがる(月末付近)", () => { + // 5/31 から65日 = 8/4 → May, Jun, Jul, Aug + const months = monthsInWindow(new Date("2026-05-31"), WINDOW_DAYS); + assert.deepEqual(months, ["2026-05", "2026-06", "2026-07", "2026-08"]); + }); + + test("年またぎ対応", () => { + // 11/15 から65日 = 2027/1/19 → Nov, Dec, Jan + const months = monthsInWindow(new Date("2026-11-15"), WINDOW_DAYS); + assert.deepEqual(months, ["2026-11", "2026-12", "2027-01"]); + }); +}); + +// ---- スクレーパー別 URL カバレッジテスト ---- + +describe("fever-shindaita: 3ヶ月分フェッチ", () => { + let restore: () => void; + let fetchedUrls: string[]; + + beforeEach(() => { + ({ fetchedUrls, restore } = mockFetch()); + }); + afterEach(() => restore()); + + test("今月・来月・再来月のURLをフェッチする", async () => { + const { scraper } = await import("../app/scrapers/fever-shindaita.js"); + await scraper.scrape(); + + const now = new Date(); + const expected = monthsInWindow(now, WINDOW_DAYS); + for (const m of expected) { + const [y, mo] = m.split("-"); + assert.ok( + fetchedUrls.some((u) => u.includes(`/${y}/${mo}/`)), + `fever-shindaita: ${m} のURLがフェッチされていない\nfetched: ${fetchedUrls.join(", ")}` + ); + } + }); +}); + +describe("liquid-room: 3ヶ月分フェッチ", () => { + let restore: () => void; + let fetchedUrls: string[]; + + beforeEach(() => { + ({ fetchedUrls, restore } = mockFetch()); + }); + afterEach(() => restore()); + + test("今月・来月・再来月のURLをフェッチする", async () => { + const { scraper } = await import("../app/scrapers/liquid-room.js"); + await scraper.scrape(); + + const now = new Date(); + const expected = monthsInWindow(now, WINDOW_DAYS); + for (const m of expected) { + assert.ok( + urlCoversMonth(fetchedUrls, m), + `liquid-room: ${m} のURLがフェッチされていない\nfetched: ${fetchedUrls.join(", ")}` + ); + } + }); +}); + +describe("club-quattro: 3ヶ月分フェッチ", () => { + let restore: () => void; + let fetchedUrls: string[]; + + beforeEach(() => { + ({ fetchedUrls, restore } = mockFetch()); + }); + afterEach(() => restore()); + + test("今月・来月・再来月のURLをフェッチする", async () => { + const { scraper } = await import("../app/scrapers/club-quattro.js"); + await scraper.scrape(); + + const now = new Date(); + const expected = monthsInWindow(now, WINDOW_DAYS); + for (const m of expected) { + assert.ok( + urlCoversMonth(fetchedUrls, m), + `club-quattro: ${m} のURLがフェッチされていない\nfetched: ${fetchedUrls.join(", ")}` + ); + } + }); +}); + +describe("meets-otsuka: 3ヶ月分フェッチ", () => { + let restore: () => void; + let fetchedUrls: string[]; + + beforeEach(() => { + ({ fetchedUrls, restore } = mockFetch()); + }); + afterEach(() => restore()); + + test("今月・来月・再来月のURLをフェッチする", async () => { + const { scraper } = await import("../app/scrapers/meets-otsuka.js"); + await scraper.scrape(); + + const now = new Date(); + const expected = monthsInWindow(now, WINDOW_DAYS); + for (const m of expected) { + assert.ok( + urlCoversMonth(fetchedUrls, m), + `meets-otsuka: ${m} のURLがフェッチされていない\nfetched: ${fetchedUrls.join(", ")}` + ); + } + }); +}); + +describe("nishieifuku-jam: 3ヶ月分フェッチ", () => { + let restore: () => void; + let fetchedUrls: string[]; + + beforeEach(() => { + ({ fetchedUrls, restore } = mockFetch()); + }); + afterEach(() => restore()); + + test("今月・来月・再来月のURLをフェッチする", async () => { + const { scraper } = await import("../app/scrapers/nishieifuku-jam.js"); + await scraper.scrape(); + + const now = new Date(); + const expected = monthsInWindow(now, WINDOW_DAYS); + for (const m of expected) { + assert.ok( + urlCoversMonth(fetchedUrls, m), + `nishieifuku-jam: ${m} のURLがフェッチされていない\nfetched: ${fetchedUrls.join(", ")}` + ); + } + }); +}); + +describe("shibuya-o: 全サブ会場 × 3ヶ月分フェッチ", () => { + let restore: () => void; + let fetchedUrls: string[]; + + beforeEach(() => { + ({ fetchedUrls, restore } = mockFetch()); + }); + afterEach(() => restore()); + + test("east/west/crest/nest × 今月・来月・再来月をフェッチする", async () => { + const { scraper } = await import("../app/scrapers/shibuya-o.js"); + await scraper.scrape(); + + const now = new Date(); + const expected = monthsInWindow(now, WINDOW_DAYS); + for (const subVenue of ["east", "west", "crest", "nest"]) { + for (const m of expected) { + assert.ok( + urlCoversMonth( + fetchedUrls.filter((u) => u.includes(`/${subVenue}/`)), + m + ), + `shibuya-o/${subVenue}: ${m} のURLがフェッチされていない` + ); + } + } + }); +}); + +describe("duo-music-exchange: 3ヶ月分フェッチ", () => { + let restore: () => void; + let fetchedUrls: string[]; + + beforeEach(() => { + ({ fetchedUrls, restore } = mockFetch()); + }); + afterEach(() => restore()); + + test("今月・来月・再来月のURLをフェッチする", async () => { + const { scraper } = await import("../app/scrapers/duo-music-exchange.js"); + await scraper.scrape(); + + const now = new Date(); + const expected = monthsInWindow(now, WINDOW_DAYS); + for (const m of expected) { + assert.ok( + urlCoversMonth(fetchedUrls, m), + `duo-music-exchange: ${m} のURLがフェッチされていない\nfetched: ${fetchedUrls.join(", ")}` + ); + } + }); +}); |
