import * as cheerio from "cheerio"; import type { Scraper, VenueMeta } from "./base"; import type { EventInput } from "~/lib/db.server"; const SCHEDULE_URL = "http://www.fad-music.com/fad/?page_id=3"; export const venue: VenueMeta = { id: "fad-yokohama", name: "F.A.D YOKOHAMA", url: "http://www.fad-music.com/fad/", area: "横浜", capacity: 380, }; function getMonthContext(html: string): { year: number; month: number; nextUrl: string | null; } { const $ = cheerio.load(html); // Navigation in entry-content shows links like "2026.04" (prev) and "2026.06" (next) let prevYear = 0; let prevMonth = 0; let nextUrl: string | null = null; $("div.entry-content a[href*='page_id']").each((_, el) => { const text = $(el).text().trim(); const m = text.match(/^(\d{4})\.(\d{2})$/); if (!m) return; if (!prevYear) { prevYear = parseInt(m[1]); prevMonth = parseInt(m[2]); } else if (!nextUrl) { nextUrl = $(el).attr("href") ?? null; return false; } }); if (prevYear) { let month = prevMonth + 1; let year = prevYear; if (month > 12) { month = 1; year++; } return { year, month, nextUrl }; } const now = new Date(); return { year: now.getFullYear(), month: now.getMonth() + 1, nextUrl: null }; } function parsePageEvents( html: string, year: number, month: number, pageUrl: string ): EventInput[] { const $ = cheerio.load(html); const events: EventInput[] = []; const contentHtml = $("div.entry-content").html() ?? ""; // Split into blocks by
tags; skip first block (navigation header) const blocks = contentHtml .split(//i) .slice(1) .filter((b) => b.trim()); for (const block of blocks) { const $b = cheerio.load(block); const $firstP = $b("p").first(); if (!$firstP.length) continue; // Get text with
as newlines const rawHtml = $firstP.html() ?? ""; const lines = rawHtml .replace(//gi, "\n") .replace(/<[^>]+>/g, " ") .split("\n") .map((l) => l.replace(/\s+/g, " ").trim()) .filter(Boolean); if (!lines.length) continue; // First line must be a date: "MM.DD weekday" or "MM . DD weekday" (tag stripping may insert spaces) const dateMatch = lines[0].match( /^(\d{1,2})\s*\.\s*(\d{2})\s+(mon|tue|wed|thu|fri|sat|sun)/i ); if (!dateMatch) continue; const pMonth = parseInt(dateMatch[1], 10); const day = parseInt(dateMatch[2], 10); if (pMonth !== month) continue; const date = `${year}-${String(month).padStart(2, "0")}-${String(day).padStart(2, "0")}`; // Extract artist from medium-size font spans const artistParts: string[] = []; $firstP.find("span").each((_, el) => { if (($(el).attr("style") ?? "").includes("medium")) { const t = $(el).text().trim(); if (t) artistParts.push(t); } }); const artist = artistParts.join(" ") || null; // Find OPEN time line index const timePriceIdx = lines.findIndex((l) => /^OPEN\s*\d{2}:\d{2}/.test(l)); const end = timePriceIdx >= 0 ? timePriceIdx : lines.length; // Title: lines between date and time line, excluding exact artist matches const titleLines = lines .slice(1, end) .filter((l) => !artistParts.includes(l)); const title = titleLines.join(" ").trim() || lines[1] || "Event"; // Parse OPEN/START times and ADV price const tpLine = timePriceIdx >= 0 ? lines[timePriceIdx] : ""; const openTime = tpLine.match(/OPEN\s*(\d{2}:\d{2})/)?.[1] ?? null; const startTime = tpLine.match(/START\s*(\d{2}:\d{2})/)?.[1] ?? null; const advMatch = tpLine.match(/ADV\s*[¥¥]([0-9,]+)/); const price = advMatch ? `ADV¥${advMatch[1]}` : null; // Find ticket URL from all elements in this block const ticketDomains = ["eplus.jp", "pia.jp", "lawson-ticket", "zaiko.io"]; let ticketUrl: string | null = null; $b("a[href]").each((_, el) => { if (ticketUrl) return; const href = $b(el).attr("href") ?? ""; if (href.startsWith("http") && ticketDomains.some((d) => href.includes(d))) { ticketUrl = href; } }); events.push({ venue_id: venue.id, title, artist, date, open_time: openTime, start_time: startTime, price, ticket_url: ticketUrl, source_url: pageUrl, }); } return events; } export const scraper: Scraper = { venue, async scrape(): Promise { const allEvents: EventInput[] = []; let url: string | null = SCHEDULE_URL; for (let page = 0; page < 3 && url; page++) { const res = await fetch(url); if (!res.ok) break; const html = await res.text(); const { year, month, nextUrl } = getMonthContext(html); allEvents.push(...parsePageEvents(html, year, month, url)); url = nextUrl; } return allEvents; }, };