summaryrefslogtreecommitdiff
path: root/app/scrapers/duo-music-exchange.ts
diff options
context:
space:
mode:
authoryyamashita <yyamashita@mosquit.one>2026-05-10 23:22:17 +0900
committeryyamashita <yyamashita@mosquit.one>2026-05-10 23:22:17 +0900
commitb56e79b5b288b7c9e2fef396b303afc32c9baf5d (patch)
tree28080f7f019889659ef1682f4d3661ed9650da54 /app/scrapers/duo-music-exchange.ts
parent05d2b35a85a46dde9a1264d3002ba86e02e3d5eb (diff)
Fix multi-month scrape coverage and add duo MUSIC EXCHANGE
- Extend 8 scrapers (liquid-room, shibuya-o, club-quattro, meets-otsuka, nishieifuku-jam, fever-shindaita, fad-yokohama, and new duo-music-exchange) to fetch 3 calendar months instead of 1-2, covering the full 65-day window - Add duo MUSIC EXCHANGE scraper (渋谷, ~700 cap, /schedule/YYYY/index_YYYY-MM.html) - Add npm test: Node.js built-in test runner verifies each scraper fetches all required month URLs via mocked fetch (10 tests, no extra deps) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
Diffstat (limited to 'app/scrapers/duo-music-exchange.ts')
-rw-r--r--app/scrapers/duo-music-exchange.ts103
1 files changed, 103 insertions, 0 deletions
diff --git a/app/scrapers/duo-music-exchange.ts b/app/scrapers/duo-music-exchange.ts
new file mode 100644
index 0000000..57814ea
--- /dev/null
+++ b/app/scrapers/duo-music-exchange.ts
@@ -0,0 +1,103 @@
+/**
+ * duo MUSIC EXCHANGE — https://duomusicexchange.com
+ *
+ * 月別HTML: /schedule/YYYY/index_YYYY-MM.html
+ * DOM構造:
+ * <section id="daybox">
+ * <div class="date"><span class="day">01</span></div>
+ * <div class="sche-details">
+ * <span class="artist">アーティスト名</span>
+ * <span class="details-title">イベントタイトル</span>
+ * <dl class="row">
+ * <dt>OPEN/START</dt><dd>18:00 / 19:00</dd>
+ * <dt>ADV./DOOR</dt><dd>¥3,000 / ¥3,500</dd>
+ * <dt>Ticket.</dt><dd><a href="...">...</a></dd>
+ * </dl>
+ * </div>
+ * </section>
+ */
+import * as cheerio from "cheerio";
+import type { Scraper, VenueMeta } from "./base";
+import type { EventInput } from "~/lib/db.server";
+
+export const venue: VenueMeta = {
+ id: "duo-music-exchange",
+ name: "duo MUSIC EXCHANGE",
+ url: "https://duomusicexchange.com",
+ area: "渋谷",
+ capacity: 700,
+};
+
+async function scrapeMonth(year: number, month: number): Promise<EventInput[]> {
+ const mm = String(month).padStart(2, "0");
+ const url = `${venue.url}/schedule/${year}/index_${year}-${mm}.html`;
+ const res = await fetch(url);
+ if (!res.ok) return [];
+ const $ = cheerio.load(await res.text());
+ const events: EventInput[] = [];
+
+ $("section#daybox").each((_, el) => {
+ const $el = $(el);
+
+ const dayStr = $el.find(".date .day").first().text().trim();
+ const day = parseInt(dayStr, 10);
+ if (!day) return;
+ const date = `${year}-${mm}-${String(day).padStart(2, "0")}`;
+
+ const artist = $el.find(".sche-details .artist").first().text().trim() || null;
+ const title = $el.find(".sche-details .details-title").first().text().trim();
+ if (!title) return;
+
+ let openTime: string | null = null;
+ let startTime: string | null = null;
+ let price: string | null = null;
+ let ticketUrl: string | null = null;
+
+ $el.find("dl.row dt").each((_, dt) => {
+ const label = $(dt).text().trim();
+ const $dd = $(dt).next("dd");
+ if (/OPEN/i.test(label)) {
+ const times = $dd.text().trim().match(/(\d{1,2}:\d{2})/g) ?? [];
+ openTime = times[0] ?? null;
+ startTime = times[1] ?? null;
+ } else if (/ADV/i.test(label)) {
+ price = $dd.text().trim() || null;
+ } else if (/Ticket/i.test(label)) {
+ ticketUrl = $dd.find("a[href]").first().attr("href") ?? null;
+ }
+ });
+
+ const imgSrc = $el.find("img").first().attr("src") ?? null;
+ const imageUrl = imgSrc
+ ? (imgSrc.startsWith("http") ? imgSrc : `${venue.url}/schedule/${year}/${imgSrc}`)
+ : null;
+
+ events.push({
+ venue_id: venue.id,
+ title,
+ artist,
+ date,
+ open_time: openTime,
+ start_time: startTime,
+ price,
+ ticket_url: ticketUrl,
+ image_url: imageUrl,
+ source_url: url,
+ });
+ });
+
+ return events;
+}
+
+export const scraper: Scraper = {
+ venue,
+ async scrape(): Promise<EventInput[]> {
+ const now = new Date();
+ const months = [0, 1, 2].map((offset) => {
+ const d = new Date(now.getFullYear(), now.getMonth() + offset, 1);
+ return { year: d.getFullYear(), month: d.getMonth() + 1 };
+ });
+ const results = await Promise.all(months.map(({ year, month }) => scrapeMonth(year, month)));
+ return results.flat();
+ },
+};