import * as cheerio from "cheerio";
import type { Scraper, VenueMeta } from "./base";
import type { EventInput } from "~/lib/db.server";
const SCHEDULE_URL = "http://www.fad-music.com/fad/?page_id=3";
export const venue: VenueMeta = {
id: "fad-yokohama",
name: "F.A.D YOKOHAMA",
url: "http://www.fad-music.com/fad/",
area: "横浜",
capacity: 380,
};
function getMonthContext(html: string): {
year: number;
month: number;
nextUrl: string | null;
} {
const $ = cheerio.load(html);
// Navigation in entry-content shows links like "2026.04" (prev) and "2026.06" (next)
let prevYear = 0;
let prevMonth = 0;
let nextUrl: string | null = null;
$("div.entry-content a[href*='page_id']").each((_, el) => {
const text = $(el).text().trim();
const m = text.match(/^(\d{4})\.(\d{2})$/);
if (!m) return;
if (!prevYear) {
prevYear = parseInt(m[1]);
prevMonth = parseInt(m[2]);
} else if (!nextUrl) {
nextUrl = $(el).attr("href") ?? null;
return false;
}
});
if (prevYear) {
let month = prevMonth + 1;
let year = prevYear;
if (month > 12) {
month = 1;
year++;
}
return { year, month, nextUrl };
}
const now = new Date();
return { year: now.getFullYear(), month: now.getMonth() + 1, nextUrl: null };
}
function parsePageEvents(
html: string,
year: number,
month: number,
pageUrl: string
): EventInput[] {
const $ = cheerio.load(html);
const events: EventInput[] = [];
const contentHtml = $("div.entry-content").html() ?? "";
// Split into blocks by
tags; skip first block (navigation header)
const blocks = contentHtml
.split(/
/i)
.slice(1)
.filter((b) => b.trim());
for (const block of blocks) {
const $b = cheerio.load(block);
const $firstP = $b("p").first();
if (!$firstP.length) continue;
// Get text with
as newlines
const rawHtml = $firstP.html() ?? "";
const lines = rawHtml
.replace(/
/gi, "\n")
.replace(/<[^>]+>/g, " ")
.split("\n")
.map((l) => l.replace(/\s+/g, " ").trim())
.filter(Boolean);
if (!lines.length) continue;
// First line must be a date: "MM.DD weekday" or "MM . DD weekday" (tag stripping may insert spaces)
const dateMatch = lines[0].match(
/^(\d{1,2})\s*\.\s*(\d{2})\s+(mon|tue|wed|thu|fri|sat|sun)/i
);
if (!dateMatch) continue;
const pMonth = parseInt(dateMatch[1], 10);
const day = parseInt(dateMatch[2], 10);
if (pMonth !== month) continue;
const date = `${year}-${String(month).padStart(2, "0")}-${String(day).padStart(2, "0")}`;
// Extract artist from medium-size font spans
const artistParts: string[] = [];
$firstP.find("span").each((_, el) => {
if (($(el).attr("style") ?? "").includes("medium")) {
const t = $(el).text().trim();
if (t) artistParts.push(t);
}
});
const artist = artistParts.join(" ") || null;
// Find OPEN time line index
const timePriceIdx = lines.findIndex((l) => /^OPEN\s*\d{2}:\d{2}/.test(l));
const end = timePriceIdx >= 0 ? timePriceIdx : lines.length;
// Title: lines between date and time line, excluding exact artist matches
const titleLines = lines
.slice(1, end)
.filter((l) => !artistParts.includes(l));
const title = titleLines.join(" ").trim() || lines[1] || "Event";
// Parse OPEN/START times and ADV price
const tpLine = timePriceIdx >= 0 ? lines[timePriceIdx] : "";
const openTime = tpLine.match(/OPEN\s*(\d{2}:\d{2})/)?.[1] ?? null;
const startTime = tpLine.match(/START\s*(\d{2}:\d{2})/)?.[1] ?? null;
const advMatch = tpLine.match(/ADV\s*[¥¥]([0-9,]+)/);
const price = advMatch ? `ADV¥${advMatch[1]}` : null;
// Find ticket URL from all elements in this block
const ticketDomains = ["eplus.jp", "pia.jp", "lawson-ticket", "zaiko.io"];
let ticketUrl: string | null = null;
$b("a[href]").each((_, el) => {
if (ticketUrl) return;
const href = $b(el).attr("href") ?? "";
if (href.startsWith("http") && ticketDomains.some((d) => href.includes(d))) {
ticketUrl = href;
}
});
events.push({
venue_id: venue.id,
title,
artist,
date,
open_time: openTime,
start_time: startTime,
price,
ticket_url: ticketUrl,
source_url: pageUrl,
});
}
return events;
}
export const scraper: Scraper = {
venue,
async scrape(): Promise {
const res = await fetch(SCHEDULE_URL);
if (!res.ok) throw new Error(`HTTP ${res.status}`);
const html = await res.text();
const { year, month, nextUrl } = getMonthContext(html);
const events = parsePageEvents(html, year, month, SCHEDULE_URL);
if (nextUrl) {
const nextRes = await fetch(nextUrl);
if (nextRes.ok) {
const nextHtml = await nextRes.text();
let nextMonth = month + 1;
let nextYear = year;
if (nextMonth > 12) {
nextMonth = 1;
nextYear++;
}
events.push(...parsePageEvents(nextHtml, nextYear, nextMonth, nextUrl));
}
}
return events;
},
};