import * as cheerio from "cheerio"; import type { Scraper, VenueMeta } from "./base"; import type { EventInput } from "~/lib/db.server"; export const venue: VenueMeta = { id: "liquid-room", name: "LIQUID ROOM", url: "https://www.liquidroom.net", area: "恵比寿", capacity: 1000, }; function parseHtml(html: string): EventInput[] { const $ = cheerio.load(html); const events: EventInput[] = []; $("article").each((_, el) => { const $el = $(el); const href = $el.find("a.s_link").attr("href") ?? ""; const dateMatch = href.match(/_(\d{4})(\d{2})(\d{2})$/); if (!dateMatch) return; const date = `${dateMatch[1]}-${dateMatch[2]}-${dateMatch[3]}`; const h2 = $el.find("h2").first().text().trim(); if (!h2) return; const subtitle = $el.find("p.subtitle").first().text().trim(); const title = subtitle || h2; const artist = subtitle ? h2 : null; const openTime = $el .find("dl") .filter((_, dl) => $(dl).find("dt").text().includes("OPEN")) .find("dd") .text() .trim() .match(/\d{2}:\d{2}/)?.[0] ?? null; const startTime = $el .find("dl") .filter((_, dl) => $(dl).find("dt").text().includes("START")) .find("dd") .text() .trim() .match(/\d{2}:\d{2}/)?.[0] ?? null; events.push({ venue_id: venue.id, title, artist, date, open_time: openTime, start_time: startTime, image_url: $el.find("div.left img").attr("src") ?? null, source_url: href, }); }); return events; } export const scraper: Scraper = { venue, async scrape(): Promise { const now = new Date(); const urls = [0, 1, 2].map((offset) => { const d = new Date(now.getFullYear(), now.getMonth() + offset, 1); return `https://www.liquidroom.net/schedule/${d.getFullYear()}/${String(d.getMonth() + 1).padStart(2, "0")}`; }); const htmls = await Promise.all( urls.map((url) => fetch(url).then((r) => (r.ok ? r.text() : ""))) ); const seen = new Set(); return htmls.flatMap(parseHtml).filter((e) => { if (seen.has(e.source_url ?? e.title)) return false; seen.add(e.source_url ?? e.title); return true; }); }, };