diff options
| author | yyamashita <yyamashita@mosquit.one> | 2026-05-07 22:33:16 +0900 |
|---|---|---|
| committer | yyamashita <yyamashita@mosquit.one> | 2026-05-07 22:33:16 +0900 |
| commit | 5c317d8a75cd398719b1e14d50ac95ecf5dafd02 (patch) | |
| tree | 6f494a4ddd3d93034a095c528353d108ea6ae951 /app/scrapers | |
| parent | fc5e9630ddff9c1864c7027e2c14d71b4f789e0a (diff) | |
Add F.A.D YOKOHAMA scraper
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
Diffstat (limited to 'app/scrapers')
| -rw-r--r-- | app/scrapers/fad-yokohama.ts | 176 | ||||
| -rw-r--r-- | app/scrapers/index.ts | 2 |
2 files changed, 178 insertions, 0 deletions
diff --git a/app/scrapers/fad-yokohama.ts b/app/scrapers/fad-yokohama.ts new file mode 100644 index 0000000..e1aa95c --- /dev/null +++ b/app/scrapers/fad-yokohama.ts @@ -0,0 +1,176 @@ +import * as cheerio from "cheerio"; +import type { Scraper, VenueMeta } from "./base"; +import type { EventInput } from "~/lib/db.server"; + +const SCHEDULE_URL = "http://www.fad-music.com/fad/?page_id=3"; + +export const venue: VenueMeta = { + id: "fad-yokohama", + name: "F.A.D YOKOHAMA", + url: "http://www.fad-music.com/fad/", + area: "横浜", +}; + +function getMonthContext(html: string): { + year: number; + month: number; + nextUrl: string | null; +} { + const $ = cheerio.load(html); + // Navigation in entry-content shows links like "2026.04" (prev) and "2026.06" (next) + let prevYear = 0; + let prevMonth = 0; + let nextUrl: string | null = null; + + $("div.entry-content a[href*='page_id']").each((_, el) => { + const text = $(el).text().trim(); + const m = text.match(/^(\d{4})\.(\d{2})$/); + if (!m) return; + if (!prevYear) { + prevYear = parseInt(m[1]); + prevMonth = parseInt(m[2]); + } else if (!nextUrl) { + nextUrl = $(el).attr("href") ?? null; + return false; + } + }); + + if (prevYear) { + let month = prevMonth + 1; + let year = prevYear; + if (month > 12) { + month = 1; + year++; + } + return { year, month, nextUrl }; + } + + const now = new Date(); + return { year: now.getFullYear(), month: now.getMonth() + 1, nextUrl: null }; +} + +function parsePageEvents( + html: string, + year: number, + month: number, + pageUrl: string +): EventInput[] { + const $ = cheerio.load(html); + const events: EventInput[] = []; + + const contentHtml = $("div.entry-content").html() ?? ""; + // Split into blocks by <hr/> tags; skip first block (navigation header) + const blocks = contentHtml + .split(/<hr\s*\/?>/i) + .slice(1) + .filter((b) => b.trim()); + + for (const block of blocks) { + const $b = cheerio.load(block); + const $firstP = $b("p").first(); + if (!$firstP.length) continue; + + // Get text with <br/> as newlines + const rawHtml = $firstP.html() ?? ""; + const lines = rawHtml + .replace(/<br\s*\/?>/gi, "\n") + .replace(/<[^>]+>/g, " ") + .split("\n") + .map((l) => l.replace(/\s+/g, " ").trim()) + .filter(Boolean); + + if (!lines.length) continue; + + // First line must be a date: "MM.DD weekday" or "MM . DD weekday" (tag stripping may insert spaces) + const dateMatch = lines[0].match( + /^(\d{1,2})\s*\.\s*(\d{2})\s+(mon|tue|wed|thu|fri|sat|sun)/i + ); + if (!dateMatch) continue; + + const pMonth = parseInt(dateMatch[1], 10); + const day = parseInt(dateMatch[2], 10); + if (pMonth !== month) continue; + + const date = `${year}-${String(month).padStart(2, "0")}-${String(day).padStart(2, "0")}`; + + // Extract artist from medium-size font spans + const artistParts: string[] = []; + $firstP.find("span").each((_, el) => { + if (($(el).attr("style") ?? "").includes("medium")) { + const t = $(el).text().trim(); + if (t) artistParts.push(t); + } + }); + const artist = artistParts.join(" ") || null; + + // Find OPEN time line index + const timePriceIdx = lines.findIndex((l) => /^OPEN\s*\d{2}:\d{2}/.test(l)); + const end = timePriceIdx >= 0 ? timePriceIdx : lines.length; + + // Title: lines between date and time line, excluding exact artist matches + const titleLines = lines + .slice(1, end) + .filter((l) => !artistParts.includes(l)); + const title = titleLines.join(" ").trim() || lines[1] || "Event"; + + // Parse OPEN/START times and ADV price + const tpLine = timePriceIdx >= 0 ? lines[timePriceIdx] : ""; + const openTime = tpLine.match(/OPEN\s*(\d{2}:\d{2})/)?.[1] ?? null; + const startTime = tpLine.match(/START\s*(\d{2}:\d{2})/)?.[1] ?? null; + const advMatch = tpLine.match(/ADV\s*[¥¥]([0-9,]+)/); + const price = advMatch ? `ADV¥${advMatch[1]}` : null; + + // Find ticket URL from all elements in this block + const ticketDomains = ["eplus.jp", "pia.jp", "lawson-ticket", "zaiko.io"]; + let ticketUrl: string | null = null; + $b("a[href]").each((_, el) => { + if (ticketUrl) return; + const href = $b(el).attr("href") ?? ""; + if (href.startsWith("http") && ticketDomains.some((d) => href.includes(d))) { + ticketUrl = href; + } + }); + + events.push({ + venue_id: venue.id, + title, + artist, + date, + open_time: openTime, + start_time: startTime, + price, + ticket_url: ticketUrl, + source_url: pageUrl, + }); + } + + return events; +} + +export const scraper: Scraper = { + venue, + async scrape(): Promise<EventInput[]> { + const res = await fetch(SCHEDULE_URL); + if (!res.ok) throw new Error(`HTTP ${res.status}`); + const html = await res.text(); + + const { year, month, nextUrl } = getMonthContext(html); + const events = parsePageEvents(html, year, month, SCHEDULE_URL); + + if (nextUrl) { + const nextRes = await fetch(nextUrl); + if (nextRes.ok) { + const nextHtml = await nextRes.text(); + let nextMonth = month + 1; + let nextYear = year; + if (nextMonth > 12) { + nextMonth = 1; + nextYear++; + } + events.push(...parsePageEvents(nextHtml, nextYear, nextMonth, nextUrl)); + } + } + + return events; + }, +}; diff --git a/app/scrapers/index.ts b/app/scrapers/index.ts index 2d7f1d1..0f17469 100644 --- a/app/scrapers/index.ts +++ b/app/scrapers/index.ts @@ -15,6 +15,7 @@ import { scraper as nishieifukuJam } from "./nishieifuku-jam"; import { scraper as feverShindaita } from "./fever-shindaita"; import { scraper as moonStepNakano } from "./moon-step-nakano"; import { scraper as modShibasaki } from "./mod-shibasaki"; +import { scraper as fadYokohama } from "./fad-yokohama"; export const ALL_SCRAPERS: Scraper[] = [ liquidRoom, @@ -29,6 +30,7 @@ export const ALL_SCRAPERS: Scraper[] = [ feverShindaita, moonStepNakano, modShibasaki, + fadYokohama, ]; export type { Scraper } from "./base"; |
