diff options
| author | yyamashita <yyamashita@mosquit.one> | 2026-05-06 22:24:38 +0900 |
|---|---|---|
| committer | yyamashita <yyamashita@mosquit.one> | 2026-05-06 22:24:38 +0900 |
| commit | 538fd636e25595d88a958344d285c0e7cf44e530 (patch) | |
| tree | eb2999f355570224fa96877d5043af2ef3ec76ef /app | |
| parent | f817604858891edb79e26459dae884b158774db1 (diff) | |
Async scraping, scrape_logs, and CLI
Background scraping:
- POST /api/scrape returns 202 immediately with run_id; scraping runs async
- GET /api/scrape-status?run_id=xxx polls for results per venue
- scrape_logs table: per-venue status (running/ok/error), events_saved, error, timestamps
CLI (npm run scrape):
- npm run scrape — 全会場をスクレイプ、結果を色付きで出力
- npm run scrape liquid-room — 特定会場のみ
- npm run scrape -- --list — 登録済み会場一覧を表示
- エラー時は exit code 1 + エラーメッセージを dim 表示
Venues page:
- 最終スクレイプ日時・成否をインラインで表示
- 会場ごとの「更新」ボタンを追加
Bug fix: upsertEvent に description/optional fields のデフォルト値を設定し
better-sqlite3 の "Missing named parameter" エラーを解消
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
Diffstat (limited to 'app')
| -rw-r--r-- | app/lib/db.server.ts | 102 | ||||
| -rw-r--r-- | app/lib/scraper-runner.server.ts | 81 | ||||
| -rw-r--r-- | app/routes.ts | 1 | ||||
| -rw-r--r-- | app/routes/api.scrape-status.ts | 16 | ||||
| -rw-r--r-- | app/routes/api.scrape.ts | 34 | ||||
| -rw-r--r-- | app/routes/venues.tsx | 115 |
6 files changed, 263 insertions, 86 deletions
diff --git a/app/lib/db.server.ts b/app/lib/db.server.ts index 0c55991..26735c6 100644 --- a/app/lib/db.server.ts +++ b/app/lib/db.server.ts @@ -45,6 +45,21 @@ function initSchema(db: Database.Database) { CREATE INDEX IF NOT EXISTS idx_events_date ON events(date); CREATE INDEX IF NOT EXISTS idx_events_venue_id ON events(venue_id); + + CREATE TABLE IF NOT EXISTS scrape_logs ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + run_id TEXT NOT NULL, + venue_id TEXT NOT NULL, + venue_name TEXT NOT NULL, + status TEXT NOT NULL DEFAULT 'running', -- running | ok | error + events_saved INTEGER NOT NULL DEFAULT 0, + error TEXT, + started_at TEXT NOT NULL DEFAULT (datetime('now')), + finished_at TEXT + ); + + CREATE INDEX IF NOT EXISTS idx_scrape_logs_run_id ON scrape_logs(run_id); + CREATE INDEX IF NOT EXISTS idx_scrape_logs_venue_id ON scrape_logs(venue_id); `); } @@ -102,7 +117,19 @@ export function upsertVenue( .run(id, name, url, area ?? null); } -export function upsertEvent(event: EventInput) { +export function upsertEvent(raw: EventInput) { + // Ensure all named parameters exist (better-sqlite3 requires them all) + const event = { + artist: null, + start_time: null, + open_time: null, + ticket_url: null, + price: null, + image_url: null, + description: null, + source_url: null, + ...raw, + }; getDb() .prepare( `INSERT INTO events @@ -190,3 +217,76 @@ export function getVenues(): Venue[] { ) .all() as Venue[]; } + +// ---------- Scrape logs ---------- + +export interface ScrapeLog { + id: number; + run_id: string; + venue_id: string; + venue_name: string; + status: "running" | "ok" | "error"; + events_saved: number; + error: string | null; + started_at: string; + finished_at: string | null; +} + +export function insertScrapeLog( + run_id: string, + venue_id: string, + venue_name: string +): number { + const result = getDb() + .prepare( + `INSERT INTO scrape_logs (run_id, venue_id, venue_name, status) + VALUES (?, ?, ?, 'running')` + ) + .run(run_id, venue_id, venue_name); + return result.lastInsertRowid as number; +} + +export function updateScrapeLog( + id: number, + status: "ok" | "error", + events_saved: number, + error?: string +) { + getDb() + .prepare( + `UPDATE scrape_logs + SET status = ?, events_saved = ?, error = ?, finished_at = datetime('now') + WHERE id = ?` + ) + .run(status, events_saved, error ?? null, id); +} + +export function getLatestScrapeRun(): ScrapeLog[] { + return getDb() + .prepare( + `SELECT * FROM scrape_logs + WHERE run_id = (SELECT run_id FROM scrape_logs ORDER BY started_at DESC LIMIT 1) + ORDER BY id ASC` + ) + .all() as ScrapeLog[]; +} + +export function getScrapeRunById(run_id: string): ScrapeLog[] { + return getDb() + .prepare( + "SELECT * FROM scrape_logs WHERE run_id = ? ORDER BY id ASC" + ) + .all(run_id) as ScrapeLog[]; +} + +export function getLastScrapePerVenue(): ScrapeLog[] { + return getDb() + .prepare( + `SELECT s.* FROM scrape_logs s + INNER JOIN ( + SELECT venue_id, MAX(started_at) AS latest FROM scrape_logs GROUP BY venue_id + ) t ON s.venue_id = t.venue_id AND s.started_at = t.latest + ORDER BY s.venue_name ASC` + ) + .all() as ScrapeLog[]; +} diff --git a/app/lib/scraper-runner.server.ts b/app/lib/scraper-runner.server.ts index 191dd00..87dd16c 100644 --- a/app/lib/scraper-runner.server.ts +++ b/app/lib/scraper-runner.server.ts @@ -1,9 +1,16 @@ -import { upsertVenue, upsertEvent } from "./db.server"; +import { randomUUID } from "crypto"; +import { + upsertVenue, + upsertEvent, + insertScrapeLog, + updateScrapeLog, + type ScrapeLog, +} from "./db.server"; import { generateVenueMarkdown, generateAllVenueMarkdown } from "./markdown-writer.server"; import { ALL_SCRAPERS } from "~/scrapers/index"; import type { EventInput } from "./db.server"; -const SCRAPE_WINDOW_DAYS = 35; // ~1 month +const SCRAPE_WINDOW_DAYS = 35; function scrapeWindow(): { from: string; to: string } { const from = new Date(); @@ -21,81 +28,81 @@ function withinWindow(event: EventInput, from: string, to: string): boolean { } export interface ScrapeResult { + run_id: string; venue_id: string; venue_name: string; + status: "ok" | "error"; events_saved: number; - markdown_path?: string; error?: string; } -export async function runAllScrapers(): Promise<ScrapeResult[]> { +/** Fire-and-forget: start all scrapers in the background, return run_id immediately. */ +export function startAllScrapersAsync(): string { + const run_id = randomUUID(); + // Don't await — runs in background + void runAllScrapers(run_id); + return run_id; +} + +export function startScraperAsync(venueId: string): string { + const run_id = randomUUID(); + void runScraper(venueId, run_id); + return run_id; +} + +/** Runs all scrapers, writes logs to DB. Can be awaited (e.g. from CLI). */ +export async function runAllScrapers(run_id = randomUUID()): Promise<ScrapeResult[]> { const results: ScrapeResult[] = []; const successIds: string[] = []; for (const scraper of ALL_SCRAPERS) { const { venue } = scraper; upsertVenue(venue.id, venue.name, venue.url, venue.area); + const logId = insertScrapeLog(run_id, venue.id, venue.name); try { const { from, to } = scrapeWindow(); - const events = (await scraper.scrape()).filter((e) => - withinWindow(e, from, to) - ); + const events = (await scraper.scrape()).filter((e) => withinWindow(e, from, to)); for (const event of events) { upsertEvent(event); } + updateScrapeLog(logId, "ok", events.length); successIds.push(venue.id); - results.push({ - venue_id: venue.id, - venue_name: venue.name, - events_saved: events.length, - }); + results.push({ run_id, venue_id: venue.id, venue_name: venue.name, status: "ok", events_saved: events.length }); } catch (err) { - results.push({ - venue_id: venue.id, - venue_name: venue.name, - events_saved: 0, - error: err instanceof Error ? err.message : String(err), - }); + const error = err instanceof Error ? err.message : String(err); + updateScrapeLog(logId, "error", 0, error); + results.push({ run_id, venue_id: venue.id, venue_name: venue.name, status: "error", events_saved: 0, error }); } } - // Generate Markdown files for all venues that scraped successfully generateAllVenueMarkdown(successIds); - return results; } -export async function runScraper(venueId: string): Promise<ScrapeResult> { +/** Runs a single scraper by venue ID. */ +export async function runScraper(venueId: string, run_id = randomUUID()): Promise<ScrapeResult> { const scraper = ALL_SCRAPERS.find((s) => s.venue.id === venueId); if (!scraper) { - return { venue_id: venueId, venue_name: venueId, events_saved: 0, error: "Scraper not found" }; + return { run_id, venue_id: venueId, venue_name: venueId, status: "error", events_saved: 0, error: "Scraper not found" }; } const { venue } = scraper; upsertVenue(venue.id, venue.name, venue.url, venue.area); + const logId = insertScrapeLog(run_id, venue.id, venue.name); try { const { from, to } = scrapeWindow(); - const events = (await scraper.scrape()).filter((e) => - withinWindow(e, from, to) - ); + const events = (await scraper.scrape()).filter((e) => withinWindow(e, from, to)); for (const event of events) { upsertEvent(event); } + updateScrapeLog(logId, "ok", events.length); generateVenueMarkdown(venue.id); - return { - venue_id: venue.id, - venue_name: venue.name, - events_saved: events.length, - markdown_path: `events/${venue.id}.md`, - }; + return { run_id, venue_id: venue.id, venue_name: venue.name, status: "ok", events_saved: events.length }; } catch (err) { - return { - venue_id: venue.id, - venue_name: venue.name, - events_saved: 0, - error: err instanceof Error ? err.message : String(err), - }; + const error = err instanceof Error ? err.message : String(err); + updateScrapeLog(logId, "error", 0, error); + return { run_id, venue_id: venue.id, venue_name: venue.name, status: "error", events_saved: 0, error }; } } diff --git a/app/routes.ts b/app/routes.ts index 028da16..c0096e1 100644 --- a/app/routes.ts +++ b/app/routes.ts @@ -8,4 +8,5 @@ export default [ ]), route("venues", "routes/venues.tsx"), route("api/scrape", "routes/api.scrape.ts"), + route("api/scrape-status", "routes/api.scrape-status.ts"), ] satisfies RouteConfig; diff --git a/app/routes/api.scrape-status.ts b/app/routes/api.scrape-status.ts new file mode 100644 index 0000000..28d08d4 --- /dev/null +++ b/app/routes/api.scrape-status.ts @@ -0,0 +1,16 @@ +/** + * GET /api/scrape-status?run_id=xxx — 指定 run_id の結果を返す + * GET /api/scrape-status — 最新 run の結果を返す + */ +import type { Route } from "./+types/api.scrape-status"; +import { getScrapeRunById, getLatestScrapeRun } from "~/lib/db.server"; + +export async function loader({ request }: Route.LoaderArgs) { + const url = new URL(request.url); + const run_id = url.searchParams.get("run_id"); + + const logs = run_id ? getScrapeRunById(run_id) : getLatestScrapeRun(); + const running = logs.some((l) => l.status === "running"); + + return Response.json({ running, results: logs }); +} diff --git a/app/routes/api.scrape.ts b/app/routes/api.scrape.ts index 4071985..f9daa5c 100644 --- a/app/routes/api.scrape.ts +++ b/app/routes/api.scrape.ts @@ -1,37 +1,37 @@ /** - * Resource route: POST /api/scrape - * Triggers scraping for all venues (or a specific one via ?venue_id=xxx). - * Returns JSON results and redirects back if called from a form. + * Resource route: /api/scrape + * + * POST (form action) — バックグラウンドでスクレイプ開始、202 を即時返却 + * GET ?venue_id=xxx — 特定会場のみバックグラウンド開始 + * GET (パラメータなし) — 全会場をバックグラウンド開始 + * + * ステータス確認は /api/scrape-status?run_id=xxx */ import { redirect } from "react-router"; import type { Route } from "./+types/api.scrape"; -import { runAllScrapers, runScraper } from "~/lib/scraper-runner.server"; +import { startAllScrapersAsync, startScraperAsync } from "~/lib/scraper-runner.server"; export async function action({ request }: Route.ActionArgs) { const formData = await request.formData(); const venueId = formData.get("venue_id"); - const results = venueId - ? [await runScraper(String(venueId))] - : await runAllScrapers(); + const run_id = venueId + ? startScraperAsync(String(venueId)) + : startAllScrapersAsync(); - // If called from a browser form, redirect back const referer = request.headers.get("Referer"); - if (referer) { - return redirect(referer); - } + if (referer) return redirect(referer); - return Response.json({ results }); + return Response.json({ run_id, status: "started" }, { status: 202 }); } -// Allow GET for quick testing in the browser export async function loader({ request }: Route.LoaderArgs) { const url = new URL(request.url); const venueId = url.searchParams.get("venue_id"); - const results = venueId - ? [await runScraper(venueId)] - : await runAllScrapers(); + const run_id = venueId + ? startScraperAsync(venueId) + : startAllScrapersAsync(); - return Response.json({ results }); + return Response.json({ run_id, status: "started" }, { status: 202 }); } diff --git a/app/routes/venues.tsx b/app/routes/venues.tsx index 23b052f..affa72a 100644 --- a/app/routes/venues.tsx +++ b/app/routes/venues.tsx @@ -1,17 +1,19 @@ -import { useLoaderData, Link } from "react-router"; +import { useLoaderData, Link, Form } from "react-router"; import type { Route } from "./+types/venues"; -import { getVenues } from "~/lib/db.server"; +import { getVenues, getLastScrapePerVenue, type ScrapeLog } from "~/lib/db.server"; import { getScraperIds } from "~/lib/venue-meta.server"; export async function loader(_: Route.LoaderArgs) { const venues = getVenues(); const scraperIds = getScraperIds(); - return { venues, scraperIds }; + const scrapeStatus = getLastScrapePerVenue(); + return { venues, scraperIds, scrapeStatus }; } export default function Venues() { - const { venues, scraperIds: scraperIdList } = useLoaderData<typeof loader>(); + const { venues, scraperIds: scraperIdList, scrapeStatus } = useLoaderData<typeof loader>(); const scraperIds = new Set(scraperIdList); + const statusMap = new Map<string, ScrapeLog>(scrapeStatus.map((s) => [s.venue_id, s])); return ( <div className="min-h-screen bg-gray-950 text-gray-100"> @@ -26,43 +28,94 @@ export default function Venues() { </header> <main className="max-w-4xl mx-auto px-4 py-10"> - <div className="mb-8"> - <h1 className="text-2xl font-bold">会場一覧</h1> - <p className="mt-1 text-sm text-gray-400"> - 現在 {scraperIdList.length} 会場のスクレイパーが登録されています。 - 新しい会場を追加するには <code className="bg-gray-800 px-1 rounded">app/scrapers/</code> に - モジュールを追加して <code className="bg-gray-800 px-1 rounded">index.ts</code> に登録してください。 - </p> + <div className="mb-8 flex items-start justify-between gap-4 flex-wrap"> + <div> + <h1 className="text-2xl font-bold">会場一覧</h1> + <p className="mt-1 text-sm text-gray-400"> + 現在 {scraperIdList.length} 会場のスクレイパーが登録されています。 + </p> + </div> + <Form method="post" action="/api/scrape"> + <button + type="submit" + className="rounded-md bg-indigo-600 px-4 py-2 text-sm font-medium hover:bg-indigo-500 transition-colors" + > + 全会場を更新 + </button> + </Form> </div> {venues.length === 0 ? ( - <p className="text-gray-500">まだ会場データがありません。「情報を更新」してください。</p> + <p className="text-gray-500">まだ会場データがありません。「全会場を更新」してください。</p> ) : ( - <div className="grid gap-4 sm:grid-cols-2"> - {venues.map((v) => ( - <Link - key={v.id} - to={`/events?venue_id=${v.id}`} - className="flex items-center justify-between rounded-xl bg-gray-800/60 p-4 hover:bg-gray-800 transition-colors border border-gray-700/50" - > - <div> - <p className="font-semibold">{v.name}</p> - {v.area && <p className="text-sm text-gray-400">{v.area}</p>} + <div className="grid gap-3"> + {venues.map((v) => { + const log = statusMap.get(v.id); + return ( + <div + key={v.id} + className="flex items-center gap-4 rounded-xl bg-gray-800/60 border border-gray-700/40 p-4" + > + {/* 会場名 + エリア */} + <div className="flex-1 min-w-0"> + <Link + to={`/events?venue_id=${v.id}`} + className="font-semibold hover:text-indigo-300 transition-colors" + > + {v.name} + </Link> + {v.area && <p className="text-xs text-gray-400">{v.area}</p>} + </div> + + {/* イベント件数 */} + <span className="text-sm text-gray-400 whitespace-nowrap"> + <span className="text-lg font-bold text-gray-200">{v.event_count ?? 0}</span> 件 + </span> + + {/* 最終スクレイプ状態 */} + {log ? ( + <ScrapeStatus log={log} /> + ) : ( + <span className="text-xs text-gray-600 whitespace-nowrap">未実行</span> + )} + + {/* 個別更新ボタン */} {scraperIds.has(v.id) && ( - <span className="mt-1 inline-block rounded-full bg-emerald-700/40 px-2 py-0.5 text-xs text-emerald-300"> - スクレイパー登録済 - </span> + <Form method="post" action="/api/scrape"> + <input type="hidden" name="venue_id" value={v.id} /> + <button + type="submit" + className="rounded bg-gray-700 px-3 py-1 text-xs hover:bg-gray-600 transition-colors whitespace-nowrap" + > + 更新 + </button> + </Form> )} </div> - <span className="text-2xl font-bold text-gray-500"> - {v.event_count ?? 0} - <span className="text-sm font-normal ml-1">件</span> - </span> - </Link> - ))} + ); + })} </div> )} </main> </div> ); } + +function ScrapeStatus({ log }: { log: ScrapeLog }) { + if (log.status === "running") { + return <span className="text-xs text-yellow-400 whitespace-nowrap">⟳ 実行中...</span>; + } + if (log.status === "error") { + return ( + <span className="text-xs text-red-400 whitespace-nowrap" title={log.error ?? ""}> + ✖ エラー + </span> + ); + } + const time = log.finished_at?.slice(0, 16).replace("T", " ") ?? ""; + return ( + <span className="text-xs text-emerald-400 whitespace-nowrap" title={time}> + ✔ {time} + </span> + ); +} |
