import { randomUUID } from "crypto"; import { upsertVenue, upsertEvent, insertScrapeLog, updateScrapeLog, type ScrapeLog, } from "./db.server"; import { generateVenueMarkdown, generateAllVenueMarkdown } from "./markdown-writer.server"; import { closeBrowser } from "./playwright.server"; import { ALL_SCRAPERS } from "~/scrapers/index"; import type { EventInput } from "./db.server"; const SCRAPE_WINDOW_DAYS = 65; function scrapeWindow(): { from: string; to: string } { const from = new Date(); from.setHours(0, 0, 0, 0); const to = new Date(from); to.setDate(to.getDate() + SCRAPE_WINDOW_DAYS); return { from: from.toISOString().slice(0, 10), to: to.toISOString().slice(0, 10), }; } function withinWindow(event: EventInput, from: string, to: string): boolean { return event.date >= from && event.date <= to; } export interface ScrapeResult { run_id: string; venue_id: string; venue_name: string; status: "ok" | "error"; events_saved: number; error?: string; } /** Fire-and-forget: start all scrapers in the background, return run_id immediately. */ export function startAllScrapersAsync(): string { const run_id = randomUUID(); // Don't await — runs in background void runAllScrapers(run_id); return run_id; } export function startScraperAsync(venueId: string): string { const run_id = randomUUID(); void runScraper(venueId, run_id); return run_id; } /** Runs all scrapers, writes logs to DB. Can be awaited (e.g. from CLI). */ export async function runAllScrapers(run_id = randomUUID()): Promise { const results: ScrapeResult[] = []; const successIds: string[] = []; for (const scraper of ALL_SCRAPERS) { const { venue } = scraper; upsertVenue(venue.id, venue.name, venue.url, venue.area, venue.capacity); const logId = insertScrapeLog(run_id, venue.id, venue.name); try { const { from, to } = scrapeWindow(); const events = (await scraper.scrape()).filter((e) => withinWindow(e, from, to)); for (const event of events) { upsertEvent(event); } updateScrapeLog(logId, "ok", events.length); successIds.push(venue.id); results.push({ run_id, venue_id: venue.id, venue_name: venue.name, status: "ok", events_saved: events.length }); } catch (err) { const error = err instanceof Error ? err.message : String(err); updateScrapeLog(logId, "error", 0, error); results.push({ run_id, venue_id: venue.id, venue_name: venue.name, status: "error", events_saved: 0, error }); } } generateAllVenueMarkdown(successIds); // Close shared Playwright browser if it was opened by any scraper await closeBrowser(); return results; } /** Runs a single scraper by venue ID. */ export async function runScraper(venueId: string, run_id = randomUUID()): Promise { const scraper = ALL_SCRAPERS.find((s) => s.venue.id === venueId); if (!scraper) { return { run_id, venue_id: venueId, venue_name: venueId, status: "error", events_saved: 0, error: "Scraper not found" }; } const { venue } = scraper; upsertVenue(venue.id, venue.name, venue.url, venue.area, venue.capacity); const logId = insertScrapeLog(run_id, venue.id, venue.name); try { const { from, to } = scrapeWindow(); const events = (await scraper.scrape()).filter((e) => withinWindow(e, from, to)); for (const event of events) { upsertEvent(event); } updateScrapeLog(logId, "ok", events.length); generateVenueMarkdown(venue.id); return { run_id, venue_id: venue.id, venue_name: venue.name, status: "ok", events_saved: events.length }; } catch (err) { const error = err instanceof Error ? err.message : String(err); updateScrapeLog(logId, "error", 0, error); return { run_id, venue_id: venue.id, venue_name: venue.name, status: "error", events_saved: 0, error }; } finally { await closeBrowser(); } }