summaryrefslogtreecommitdiff
path: root/app/lib/scraper-runner.server.ts
diff options
context:
space:
mode:
authoryyamashita <yyamashita@mosquit.one>2026-05-06 22:24:38 +0900
committeryyamashita <yyamashita@mosquit.one>2026-05-06 22:24:38 +0900
commit538fd636e25595d88a958344d285c0e7cf44e530 (patch)
treeeb2999f355570224fa96877d5043af2ef3ec76ef /app/lib/scraper-runner.server.ts
parentf817604858891edb79e26459dae884b158774db1 (diff)
Async scraping, scrape_logs, and CLI
Background scraping: - POST /api/scrape returns 202 immediately with run_id; scraping runs async - GET /api/scrape-status?run_id=xxx polls for results per venue - scrape_logs table: per-venue status (running/ok/error), events_saved, error, timestamps CLI (npm run scrape): - npm run scrape — 全会場をスクレイプ、結果を色付きで出力 - npm run scrape liquid-room — 特定会場のみ - npm run scrape -- --list — 登録済み会場一覧を表示 - エラー時は exit code 1 + エラーメッセージを dim 表示 Venues page: - 最終スクレイプ日時・成否をインラインで表示 - 会場ごとの「更新」ボタンを追加 Bug fix: upsertEvent に description/optional fields のデフォルト値を設定し better-sqlite3 の "Missing named parameter" エラーを解消 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
Diffstat (limited to 'app/lib/scraper-runner.server.ts')
-rw-r--r--app/lib/scraper-runner.server.ts81
1 files changed, 44 insertions, 37 deletions
diff --git a/app/lib/scraper-runner.server.ts b/app/lib/scraper-runner.server.ts
index 191dd00..87dd16c 100644
--- a/app/lib/scraper-runner.server.ts
+++ b/app/lib/scraper-runner.server.ts
@@ -1,9 +1,16 @@
-import { upsertVenue, upsertEvent } from "./db.server";
+import { randomUUID } from "crypto";
+import {
+ upsertVenue,
+ upsertEvent,
+ insertScrapeLog,
+ updateScrapeLog,
+ type ScrapeLog,
+} from "./db.server";
import { generateVenueMarkdown, generateAllVenueMarkdown } from "./markdown-writer.server";
import { ALL_SCRAPERS } from "~/scrapers/index";
import type { EventInput } from "./db.server";
-const SCRAPE_WINDOW_DAYS = 35; // ~1 month
+const SCRAPE_WINDOW_DAYS = 35;
function scrapeWindow(): { from: string; to: string } {
const from = new Date();
@@ -21,81 +28,81 @@ function withinWindow(event: EventInput, from: string, to: string): boolean {
}
export interface ScrapeResult {
+ run_id: string;
venue_id: string;
venue_name: string;
+ status: "ok" | "error";
events_saved: number;
- markdown_path?: string;
error?: string;
}
-export async function runAllScrapers(): Promise<ScrapeResult[]> {
+/** Fire-and-forget: start all scrapers in the background, return run_id immediately. */
+export function startAllScrapersAsync(): string {
+ const run_id = randomUUID();
+ // Don't await — runs in background
+ void runAllScrapers(run_id);
+ return run_id;
+}
+
+export function startScraperAsync(venueId: string): string {
+ const run_id = randomUUID();
+ void runScraper(venueId, run_id);
+ return run_id;
+}
+
+/** Runs all scrapers, writes logs to DB. Can be awaited (e.g. from CLI). */
+export async function runAllScrapers(run_id = randomUUID()): Promise<ScrapeResult[]> {
const results: ScrapeResult[] = [];
const successIds: string[] = [];
for (const scraper of ALL_SCRAPERS) {
const { venue } = scraper;
upsertVenue(venue.id, venue.name, venue.url, venue.area);
+ const logId = insertScrapeLog(run_id, venue.id, venue.name);
try {
const { from, to } = scrapeWindow();
- const events = (await scraper.scrape()).filter((e) =>
- withinWindow(e, from, to)
- );
+ const events = (await scraper.scrape()).filter((e) => withinWindow(e, from, to));
for (const event of events) {
upsertEvent(event);
}
+ updateScrapeLog(logId, "ok", events.length);
successIds.push(venue.id);
- results.push({
- venue_id: venue.id,
- venue_name: venue.name,
- events_saved: events.length,
- });
+ results.push({ run_id, venue_id: venue.id, venue_name: venue.name, status: "ok", events_saved: events.length });
} catch (err) {
- results.push({
- venue_id: venue.id,
- venue_name: venue.name,
- events_saved: 0,
- error: err instanceof Error ? err.message : String(err),
- });
+ const error = err instanceof Error ? err.message : String(err);
+ updateScrapeLog(logId, "error", 0, error);
+ results.push({ run_id, venue_id: venue.id, venue_name: venue.name, status: "error", events_saved: 0, error });
}
}
- // Generate Markdown files for all venues that scraped successfully
generateAllVenueMarkdown(successIds);
-
return results;
}
-export async function runScraper(venueId: string): Promise<ScrapeResult> {
+/** Runs a single scraper by venue ID. */
+export async function runScraper(venueId: string, run_id = randomUUID()): Promise<ScrapeResult> {
const scraper = ALL_SCRAPERS.find((s) => s.venue.id === venueId);
if (!scraper) {
- return { venue_id: venueId, venue_name: venueId, events_saved: 0, error: "Scraper not found" };
+ return { run_id, venue_id: venueId, venue_name: venueId, status: "error", events_saved: 0, error: "Scraper not found" };
}
const { venue } = scraper;
upsertVenue(venue.id, venue.name, venue.url, venue.area);
+ const logId = insertScrapeLog(run_id, venue.id, venue.name);
try {
const { from, to } = scrapeWindow();
- const events = (await scraper.scrape()).filter((e) =>
- withinWindow(e, from, to)
- );
+ const events = (await scraper.scrape()).filter((e) => withinWindow(e, from, to));
for (const event of events) {
upsertEvent(event);
}
+ updateScrapeLog(logId, "ok", events.length);
generateVenueMarkdown(venue.id);
- return {
- venue_id: venue.id,
- venue_name: venue.name,
- events_saved: events.length,
- markdown_path: `events/${venue.id}.md`,
- };
+ return { run_id, venue_id: venue.id, venue_name: venue.name, status: "ok", events_saved: events.length };
} catch (err) {
- return {
- venue_id: venue.id,
- venue_name: venue.name,
- events_saved: 0,
- error: err instanceof Error ? err.message : String(err),
- };
+ const error = err instanceof Error ? err.message : String(err);
+ updateScrapeLog(logId, "error", 0, error);
+ return { run_id, venue_id: venue.id, venue_name: venue.name, status: "error", events_saved: 0, error };
}
}