diff options
| author | yyamashita <yyamashita@mosquit.one> | 2026-05-10 23:22:17 +0900 |
|---|---|---|
| committer | yyamashita <yyamashita@mosquit.one> | 2026-05-10 23:22:17 +0900 |
| commit | b56e79b5b288b7c9e2fef396b303afc32c9baf5d (patch) | |
| tree | 28080f7f019889659ef1682f4d3661ed9650da54 /app/scrapers/duo-music-exchange.ts | |
| parent | 05d2b35a85a46dde9a1264d3002ba86e02e3d5eb (diff) | |
Fix multi-month scrape coverage and add duo MUSIC EXCHANGE
- Extend 8 scrapers (liquid-room, shibuya-o, club-quattro, meets-otsuka,
nishieifuku-jam, fever-shindaita, fad-yokohama, and new duo-music-exchange)
to fetch 3 calendar months instead of 1-2, covering the full 65-day window
- Add duo MUSIC EXCHANGE scraper (渋谷, ~700 cap, /schedule/YYYY/index_YYYY-MM.html)
- Add npm test: Node.js built-in test runner verifies each scraper fetches
all required month URLs via mocked fetch (10 tests, no extra deps)
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
Diffstat (limited to 'app/scrapers/duo-music-exchange.ts')
| -rw-r--r-- | app/scrapers/duo-music-exchange.ts | 103 |
1 files changed, 103 insertions, 0 deletions
diff --git a/app/scrapers/duo-music-exchange.ts b/app/scrapers/duo-music-exchange.ts new file mode 100644 index 0000000..57814ea --- /dev/null +++ b/app/scrapers/duo-music-exchange.ts @@ -0,0 +1,103 @@ +/** + * duo MUSIC EXCHANGE — https://duomusicexchange.com + * + * 月別HTML: /schedule/YYYY/index_YYYY-MM.html + * DOM構造: + * <section id="daybox"> + * <div class="date"><span class="day">01</span></div> + * <div class="sche-details"> + * <span class="artist">アーティスト名</span> + * <span class="details-title">イベントタイトル</span> + * <dl class="row"> + * <dt>OPEN/START</dt><dd>18:00 / 19:00</dd> + * <dt>ADV./DOOR</dt><dd>¥3,000 / ¥3,500</dd> + * <dt>Ticket.</dt><dd><a href="...">...</a></dd> + * </dl> + * </div> + * </section> + */ +import * as cheerio from "cheerio"; +import type { Scraper, VenueMeta } from "./base"; +import type { EventInput } from "~/lib/db.server"; + +export const venue: VenueMeta = { + id: "duo-music-exchange", + name: "duo MUSIC EXCHANGE", + url: "https://duomusicexchange.com", + area: "渋谷", + capacity: 700, +}; + +async function scrapeMonth(year: number, month: number): Promise<EventInput[]> { + const mm = String(month).padStart(2, "0"); + const url = `${venue.url}/schedule/${year}/index_${year}-${mm}.html`; + const res = await fetch(url); + if (!res.ok) return []; + const $ = cheerio.load(await res.text()); + const events: EventInput[] = []; + + $("section#daybox").each((_, el) => { + const $el = $(el); + + const dayStr = $el.find(".date .day").first().text().trim(); + const day = parseInt(dayStr, 10); + if (!day) return; + const date = `${year}-${mm}-${String(day).padStart(2, "0")}`; + + const artist = $el.find(".sche-details .artist").first().text().trim() || null; + const title = $el.find(".sche-details .details-title").first().text().trim(); + if (!title) return; + + let openTime: string | null = null; + let startTime: string | null = null; + let price: string | null = null; + let ticketUrl: string | null = null; + + $el.find("dl.row dt").each((_, dt) => { + const label = $(dt).text().trim(); + const $dd = $(dt).next("dd"); + if (/OPEN/i.test(label)) { + const times = $dd.text().trim().match(/(\d{1,2}:\d{2})/g) ?? []; + openTime = times[0] ?? null; + startTime = times[1] ?? null; + } else if (/ADV/i.test(label)) { + price = $dd.text().trim() || null; + } else if (/Ticket/i.test(label)) { + ticketUrl = $dd.find("a[href]").first().attr("href") ?? null; + } + }); + + const imgSrc = $el.find("img").first().attr("src") ?? null; + const imageUrl = imgSrc + ? (imgSrc.startsWith("http") ? imgSrc : `${venue.url}/schedule/${year}/${imgSrc}`) + : null; + + events.push({ + venue_id: venue.id, + title, + artist, + date, + open_time: openTime, + start_time: startTime, + price, + ticket_url: ticketUrl, + image_url: imageUrl, + source_url: url, + }); + }); + + return events; +} + +export const scraper: Scraper = { + venue, + async scrape(): Promise<EventInput[]> { + const now = new Date(); + const months = [0, 1, 2].map((offset) => { + const d = new Date(now.getFullYear(), now.getMonth() + offset, 1); + return { year: d.getFullYear(), month: d.getMonth() + 1 }; + }); + const results = await Promise.all(months.map(({ year, month }) => scrapeMonth(year, month))); + return results.flat(); + }, +}; |
