diff options
Diffstat (limited to 'app/lib')
| -rw-r--r-- | app/lib/db.server.ts | 102 | ||||
| -rw-r--r-- | app/lib/scraper-runner.server.ts | 81 |
2 files changed, 145 insertions, 38 deletions
diff --git a/app/lib/db.server.ts b/app/lib/db.server.ts index 0c55991..26735c6 100644 --- a/app/lib/db.server.ts +++ b/app/lib/db.server.ts @@ -45,6 +45,21 @@ function initSchema(db: Database.Database) { CREATE INDEX IF NOT EXISTS idx_events_date ON events(date); CREATE INDEX IF NOT EXISTS idx_events_venue_id ON events(venue_id); + + CREATE TABLE IF NOT EXISTS scrape_logs ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + run_id TEXT NOT NULL, + venue_id TEXT NOT NULL, + venue_name TEXT NOT NULL, + status TEXT NOT NULL DEFAULT 'running', -- running | ok | error + events_saved INTEGER NOT NULL DEFAULT 0, + error TEXT, + started_at TEXT NOT NULL DEFAULT (datetime('now')), + finished_at TEXT + ); + + CREATE INDEX IF NOT EXISTS idx_scrape_logs_run_id ON scrape_logs(run_id); + CREATE INDEX IF NOT EXISTS idx_scrape_logs_venue_id ON scrape_logs(venue_id); `); } @@ -102,7 +117,19 @@ export function upsertVenue( .run(id, name, url, area ?? null); } -export function upsertEvent(event: EventInput) { +export function upsertEvent(raw: EventInput) { + // Ensure all named parameters exist (better-sqlite3 requires them all) + const event = { + artist: null, + start_time: null, + open_time: null, + ticket_url: null, + price: null, + image_url: null, + description: null, + source_url: null, + ...raw, + }; getDb() .prepare( `INSERT INTO events @@ -190,3 +217,76 @@ export function getVenues(): Venue[] { ) .all() as Venue[]; } + +// ---------- Scrape logs ---------- + +export interface ScrapeLog { + id: number; + run_id: string; + venue_id: string; + venue_name: string; + status: "running" | "ok" | "error"; + events_saved: number; + error: string | null; + started_at: string; + finished_at: string | null; +} + +export function insertScrapeLog( + run_id: string, + venue_id: string, + venue_name: string +): number { + const result = getDb() + .prepare( + `INSERT INTO scrape_logs (run_id, venue_id, venue_name, status) + VALUES (?, ?, ?, 'running')` + ) + .run(run_id, venue_id, venue_name); + return result.lastInsertRowid as number; +} + +export function updateScrapeLog( + id: number, + status: "ok" | "error", + events_saved: number, + error?: string +) { + getDb() + .prepare( + `UPDATE scrape_logs + SET status = ?, events_saved = ?, error = ?, finished_at = datetime('now') + WHERE id = ?` + ) + .run(status, events_saved, error ?? null, id); +} + +export function getLatestScrapeRun(): ScrapeLog[] { + return getDb() + .prepare( + `SELECT * FROM scrape_logs + WHERE run_id = (SELECT run_id FROM scrape_logs ORDER BY started_at DESC LIMIT 1) + ORDER BY id ASC` + ) + .all() as ScrapeLog[]; +} + +export function getScrapeRunById(run_id: string): ScrapeLog[] { + return getDb() + .prepare( + "SELECT * FROM scrape_logs WHERE run_id = ? ORDER BY id ASC" + ) + .all(run_id) as ScrapeLog[]; +} + +export function getLastScrapePerVenue(): ScrapeLog[] { + return getDb() + .prepare( + `SELECT s.* FROM scrape_logs s + INNER JOIN ( + SELECT venue_id, MAX(started_at) AS latest FROM scrape_logs GROUP BY venue_id + ) t ON s.venue_id = t.venue_id AND s.started_at = t.latest + ORDER BY s.venue_name ASC` + ) + .all() as ScrapeLog[]; +} diff --git a/app/lib/scraper-runner.server.ts b/app/lib/scraper-runner.server.ts index 191dd00..87dd16c 100644 --- a/app/lib/scraper-runner.server.ts +++ b/app/lib/scraper-runner.server.ts @@ -1,9 +1,16 @@ -import { upsertVenue, upsertEvent } from "./db.server"; +import { randomUUID } from "crypto"; +import { + upsertVenue, + upsertEvent, + insertScrapeLog, + updateScrapeLog, + type ScrapeLog, +} from "./db.server"; import { generateVenueMarkdown, generateAllVenueMarkdown } from "./markdown-writer.server"; import { ALL_SCRAPERS } from "~/scrapers/index"; import type { EventInput } from "./db.server"; -const SCRAPE_WINDOW_DAYS = 35; // ~1 month +const SCRAPE_WINDOW_DAYS = 35; function scrapeWindow(): { from: string; to: string } { const from = new Date(); @@ -21,81 +28,81 @@ function withinWindow(event: EventInput, from: string, to: string): boolean { } export interface ScrapeResult { + run_id: string; venue_id: string; venue_name: string; + status: "ok" | "error"; events_saved: number; - markdown_path?: string; error?: string; } -export async function runAllScrapers(): Promise<ScrapeResult[]> { +/** Fire-and-forget: start all scrapers in the background, return run_id immediately. */ +export function startAllScrapersAsync(): string { + const run_id = randomUUID(); + // Don't await — runs in background + void runAllScrapers(run_id); + return run_id; +} + +export function startScraperAsync(venueId: string): string { + const run_id = randomUUID(); + void runScraper(venueId, run_id); + return run_id; +} + +/** Runs all scrapers, writes logs to DB. Can be awaited (e.g. from CLI). */ +export async function runAllScrapers(run_id = randomUUID()): Promise<ScrapeResult[]> { const results: ScrapeResult[] = []; const successIds: string[] = []; for (const scraper of ALL_SCRAPERS) { const { venue } = scraper; upsertVenue(venue.id, venue.name, venue.url, venue.area); + const logId = insertScrapeLog(run_id, venue.id, venue.name); try { const { from, to } = scrapeWindow(); - const events = (await scraper.scrape()).filter((e) => - withinWindow(e, from, to) - ); + const events = (await scraper.scrape()).filter((e) => withinWindow(e, from, to)); for (const event of events) { upsertEvent(event); } + updateScrapeLog(logId, "ok", events.length); successIds.push(venue.id); - results.push({ - venue_id: venue.id, - venue_name: venue.name, - events_saved: events.length, - }); + results.push({ run_id, venue_id: venue.id, venue_name: venue.name, status: "ok", events_saved: events.length }); } catch (err) { - results.push({ - venue_id: venue.id, - venue_name: venue.name, - events_saved: 0, - error: err instanceof Error ? err.message : String(err), - }); + const error = err instanceof Error ? err.message : String(err); + updateScrapeLog(logId, "error", 0, error); + results.push({ run_id, venue_id: venue.id, venue_name: venue.name, status: "error", events_saved: 0, error }); } } - // Generate Markdown files for all venues that scraped successfully generateAllVenueMarkdown(successIds); - return results; } -export async function runScraper(venueId: string): Promise<ScrapeResult> { +/** Runs a single scraper by venue ID. */ +export async function runScraper(venueId: string, run_id = randomUUID()): Promise<ScrapeResult> { const scraper = ALL_SCRAPERS.find((s) => s.venue.id === venueId); if (!scraper) { - return { venue_id: venueId, venue_name: venueId, events_saved: 0, error: "Scraper not found" }; + return { run_id, venue_id: venueId, venue_name: venueId, status: "error", events_saved: 0, error: "Scraper not found" }; } const { venue } = scraper; upsertVenue(venue.id, venue.name, venue.url, venue.area); + const logId = insertScrapeLog(run_id, venue.id, venue.name); try { const { from, to } = scrapeWindow(); - const events = (await scraper.scrape()).filter((e) => - withinWindow(e, from, to) - ); + const events = (await scraper.scrape()).filter((e) => withinWindow(e, from, to)); for (const event of events) { upsertEvent(event); } + updateScrapeLog(logId, "ok", events.length); generateVenueMarkdown(venue.id); - return { - venue_id: venue.id, - venue_name: venue.name, - events_saved: events.length, - markdown_path: `events/${venue.id}.md`, - }; + return { run_id, venue_id: venue.id, venue_name: venue.name, status: "ok", events_saved: events.length }; } catch (err) { - return { - venue_id: venue.id, - venue_name: venue.name, - events_saved: 0, - error: err instanceof Error ? err.message : String(err), - }; + const error = err instanceof Error ? err.message : String(err); + updateScrapeLog(logId, "error", 0, error); + return { run_id, venue_id: venue.id, venue_name: venue.name, status: "error", events_saved: 0, error }; } } |
