summaryrefslogtreecommitdiff
path: root/app
diff options
context:
space:
mode:
authoryyamashita <yyamashita@mosquit.one>2026-05-06 22:24:38 +0900
committeryyamashita <yyamashita@mosquit.one>2026-05-06 22:24:38 +0900
commit538fd636e25595d88a958344d285c0e7cf44e530 (patch)
treeeb2999f355570224fa96877d5043af2ef3ec76ef /app
parentf817604858891edb79e26459dae884b158774db1 (diff)
Async scraping, scrape_logs, and CLI
Background scraping: - POST /api/scrape returns 202 immediately with run_id; scraping runs async - GET /api/scrape-status?run_id=xxx polls for results per venue - scrape_logs table: per-venue status (running/ok/error), events_saved, error, timestamps CLI (npm run scrape): - npm run scrape — 全会場をスクレイプ、結果を色付きで出力 - npm run scrape liquid-room — 特定会場のみ - npm run scrape -- --list — 登録済み会場一覧を表示 - エラー時は exit code 1 + エラーメッセージを dim 表示 Venues page: - 最終スクレイプ日時・成否をインラインで表示 - 会場ごとの「更新」ボタンを追加 Bug fix: upsertEvent に description/optional fields のデフォルト値を設定し better-sqlite3 の "Missing named parameter" エラーを解消 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
Diffstat (limited to 'app')
-rw-r--r--app/lib/db.server.ts102
-rw-r--r--app/lib/scraper-runner.server.ts81
-rw-r--r--app/routes.ts1
-rw-r--r--app/routes/api.scrape-status.ts16
-rw-r--r--app/routes/api.scrape.ts34
-rw-r--r--app/routes/venues.tsx115
6 files changed, 263 insertions, 86 deletions
diff --git a/app/lib/db.server.ts b/app/lib/db.server.ts
index 0c55991..26735c6 100644
--- a/app/lib/db.server.ts
+++ b/app/lib/db.server.ts
@@ -45,6 +45,21 @@ function initSchema(db: Database.Database) {
CREATE INDEX IF NOT EXISTS idx_events_date ON events(date);
CREATE INDEX IF NOT EXISTS idx_events_venue_id ON events(venue_id);
+
+ CREATE TABLE IF NOT EXISTS scrape_logs (
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ run_id TEXT NOT NULL,
+ venue_id TEXT NOT NULL,
+ venue_name TEXT NOT NULL,
+ status TEXT NOT NULL DEFAULT 'running', -- running | ok | error
+ events_saved INTEGER NOT NULL DEFAULT 0,
+ error TEXT,
+ started_at TEXT NOT NULL DEFAULT (datetime('now')),
+ finished_at TEXT
+ );
+
+ CREATE INDEX IF NOT EXISTS idx_scrape_logs_run_id ON scrape_logs(run_id);
+ CREATE INDEX IF NOT EXISTS idx_scrape_logs_venue_id ON scrape_logs(venue_id);
`);
}
@@ -102,7 +117,19 @@ export function upsertVenue(
.run(id, name, url, area ?? null);
}
-export function upsertEvent(event: EventInput) {
+export function upsertEvent(raw: EventInput) {
+ // Ensure all named parameters exist (better-sqlite3 requires them all)
+ const event = {
+ artist: null,
+ start_time: null,
+ open_time: null,
+ ticket_url: null,
+ price: null,
+ image_url: null,
+ description: null,
+ source_url: null,
+ ...raw,
+ };
getDb()
.prepare(
`INSERT INTO events
@@ -190,3 +217,76 @@ export function getVenues(): Venue[] {
)
.all() as Venue[];
}
+
+// ---------- Scrape logs ----------
+
+export interface ScrapeLog {
+ id: number;
+ run_id: string;
+ venue_id: string;
+ venue_name: string;
+ status: "running" | "ok" | "error";
+ events_saved: number;
+ error: string | null;
+ started_at: string;
+ finished_at: string | null;
+}
+
+export function insertScrapeLog(
+ run_id: string,
+ venue_id: string,
+ venue_name: string
+): number {
+ const result = getDb()
+ .prepare(
+ `INSERT INTO scrape_logs (run_id, venue_id, venue_name, status)
+ VALUES (?, ?, ?, 'running')`
+ )
+ .run(run_id, venue_id, venue_name);
+ return result.lastInsertRowid as number;
+}
+
+export function updateScrapeLog(
+ id: number,
+ status: "ok" | "error",
+ events_saved: number,
+ error?: string
+) {
+ getDb()
+ .prepare(
+ `UPDATE scrape_logs
+ SET status = ?, events_saved = ?, error = ?, finished_at = datetime('now')
+ WHERE id = ?`
+ )
+ .run(status, events_saved, error ?? null, id);
+}
+
+export function getLatestScrapeRun(): ScrapeLog[] {
+ return getDb()
+ .prepare(
+ `SELECT * FROM scrape_logs
+ WHERE run_id = (SELECT run_id FROM scrape_logs ORDER BY started_at DESC LIMIT 1)
+ ORDER BY id ASC`
+ )
+ .all() as ScrapeLog[];
+}
+
+export function getScrapeRunById(run_id: string): ScrapeLog[] {
+ return getDb()
+ .prepare(
+ "SELECT * FROM scrape_logs WHERE run_id = ? ORDER BY id ASC"
+ )
+ .all(run_id) as ScrapeLog[];
+}
+
+export function getLastScrapePerVenue(): ScrapeLog[] {
+ return getDb()
+ .prepare(
+ `SELECT s.* FROM scrape_logs s
+ INNER JOIN (
+ SELECT venue_id, MAX(started_at) AS latest FROM scrape_logs GROUP BY venue_id
+ ) t ON s.venue_id = t.venue_id AND s.started_at = t.latest
+ ORDER BY s.venue_name ASC`
+ )
+ .all() as ScrapeLog[];
+}
diff --git a/app/lib/scraper-runner.server.ts b/app/lib/scraper-runner.server.ts
index 191dd00..87dd16c 100644
--- a/app/lib/scraper-runner.server.ts
+++ b/app/lib/scraper-runner.server.ts
@@ -1,9 +1,16 @@
-import { upsertVenue, upsertEvent } from "./db.server";
+import { randomUUID } from "crypto";
+import {
+ upsertVenue,
+ upsertEvent,
+ insertScrapeLog,
+ updateScrapeLog,
+ type ScrapeLog,
+} from "./db.server";
import { generateVenueMarkdown, generateAllVenueMarkdown } from "./markdown-writer.server";
import { ALL_SCRAPERS } from "~/scrapers/index";
import type { EventInput } from "./db.server";
-const SCRAPE_WINDOW_DAYS = 35; // ~1 month
+const SCRAPE_WINDOW_DAYS = 35;
function scrapeWindow(): { from: string; to: string } {
const from = new Date();
@@ -21,81 +28,81 @@ function withinWindow(event: EventInput, from: string, to: string): boolean {
}
export interface ScrapeResult {
+ run_id: string;
venue_id: string;
venue_name: string;
+ status: "ok" | "error";
events_saved: number;
- markdown_path?: string;
error?: string;
}
-export async function runAllScrapers(): Promise<ScrapeResult[]> {
+/** Fire-and-forget: start all scrapers in the background, return run_id immediately. */
+export function startAllScrapersAsync(): string {
+ const run_id = randomUUID();
+ // Don't await — runs in background
+ void runAllScrapers(run_id);
+ return run_id;
+}
+
+export function startScraperAsync(venueId: string): string {
+ const run_id = randomUUID();
+ void runScraper(venueId, run_id);
+ return run_id;
+}
+
+/** Runs all scrapers, writes logs to DB. Can be awaited (e.g. from CLI). */
+export async function runAllScrapers(run_id = randomUUID()): Promise<ScrapeResult[]> {
const results: ScrapeResult[] = [];
const successIds: string[] = [];
for (const scraper of ALL_SCRAPERS) {
const { venue } = scraper;
upsertVenue(venue.id, venue.name, venue.url, venue.area);
+ const logId = insertScrapeLog(run_id, venue.id, venue.name);
try {
const { from, to } = scrapeWindow();
- const events = (await scraper.scrape()).filter((e) =>
- withinWindow(e, from, to)
- );
+ const events = (await scraper.scrape()).filter((e) => withinWindow(e, from, to));
for (const event of events) {
upsertEvent(event);
}
+ updateScrapeLog(logId, "ok", events.length);
successIds.push(venue.id);
- results.push({
- venue_id: venue.id,
- venue_name: venue.name,
- events_saved: events.length,
- });
+ results.push({ run_id, venue_id: venue.id, venue_name: venue.name, status: "ok", events_saved: events.length });
} catch (err) {
- results.push({
- venue_id: venue.id,
- venue_name: venue.name,
- events_saved: 0,
- error: err instanceof Error ? err.message : String(err),
- });
+ const error = err instanceof Error ? err.message : String(err);
+ updateScrapeLog(logId, "error", 0, error);
+ results.push({ run_id, venue_id: venue.id, venue_name: venue.name, status: "error", events_saved: 0, error });
}
}
- // Generate Markdown files for all venues that scraped successfully
generateAllVenueMarkdown(successIds);
-
return results;
}
-export async function runScraper(venueId: string): Promise<ScrapeResult> {
+/** Runs a single scraper by venue ID. */
+export async function runScraper(venueId: string, run_id = randomUUID()): Promise<ScrapeResult> {
const scraper = ALL_SCRAPERS.find((s) => s.venue.id === venueId);
if (!scraper) {
- return { venue_id: venueId, venue_name: venueId, events_saved: 0, error: "Scraper not found" };
+ return { run_id, venue_id: venueId, venue_name: venueId, status: "error", events_saved: 0, error: "Scraper not found" };
}
const { venue } = scraper;
upsertVenue(venue.id, venue.name, venue.url, venue.area);
+ const logId = insertScrapeLog(run_id, venue.id, venue.name);
try {
const { from, to } = scrapeWindow();
- const events = (await scraper.scrape()).filter((e) =>
- withinWindow(e, from, to)
- );
+ const events = (await scraper.scrape()).filter((e) => withinWindow(e, from, to));
for (const event of events) {
upsertEvent(event);
}
+ updateScrapeLog(logId, "ok", events.length);
generateVenueMarkdown(venue.id);
- return {
- venue_id: venue.id,
- venue_name: venue.name,
- events_saved: events.length,
- markdown_path: `events/${venue.id}.md`,
- };
+ return { run_id, venue_id: venue.id, venue_name: venue.name, status: "ok", events_saved: events.length };
} catch (err) {
- return {
- venue_id: venue.id,
- venue_name: venue.name,
- events_saved: 0,
- error: err instanceof Error ? err.message : String(err),
- };
+ const error = err instanceof Error ? err.message : String(err);
+ updateScrapeLog(logId, "error", 0, error);
+ return { run_id, venue_id: venue.id, venue_name: venue.name, status: "error", events_saved: 0, error };
}
}
diff --git a/app/routes.ts b/app/routes.ts
index 028da16..c0096e1 100644
--- a/app/routes.ts
+++ b/app/routes.ts
@@ -8,4 +8,5 @@ export default [
]),
route("venues", "routes/venues.tsx"),
route("api/scrape", "routes/api.scrape.ts"),
+ route("api/scrape-status", "routes/api.scrape-status.ts"),
] satisfies RouteConfig;
diff --git a/app/routes/api.scrape-status.ts b/app/routes/api.scrape-status.ts
new file mode 100644
index 0000000..28d08d4
--- /dev/null
+++ b/app/routes/api.scrape-status.ts
@@ -0,0 +1,16 @@
+/**
+ * GET /api/scrape-status?run_id=xxx — 指定 run_id の結果を返す
+ * GET /api/scrape-status — 最新 run の結果を返す
+ */
+import type { Route } from "./+types/api.scrape-status";
+import { getScrapeRunById, getLatestScrapeRun } from "~/lib/db.server";
+
+export async function loader({ request }: Route.LoaderArgs) {
+ const url = new URL(request.url);
+ const run_id = url.searchParams.get("run_id");
+
+ const logs = run_id ? getScrapeRunById(run_id) : getLatestScrapeRun();
+ const running = logs.some((l) => l.status === "running");
+
+ return Response.json({ running, results: logs });
+}
diff --git a/app/routes/api.scrape.ts b/app/routes/api.scrape.ts
index 4071985..f9daa5c 100644
--- a/app/routes/api.scrape.ts
+++ b/app/routes/api.scrape.ts
@@ -1,37 +1,37 @@
/**
- * Resource route: POST /api/scrape
- * Triggers scraping for all venues (or a specific one via ?venue_id=xxx).
- * Returns JSON results and redirects back if called from a form.
+ * Resource route: /api/scrape
+ *
+ * POST (form action) — バックグラウンドでスクレイプ開始、202 を即時返却
+ * GET ?venue_id=xxx — 特定会場のみバックグラウンド開始
+ * GET (パラメータなし) — 全会場をバックグラウンド開始
+ *
+ * ステータス確認は /api/scrape-status?run_id=xxx
*/
import { redirect } from "react-router";
import type { Route } from "./+types/api.scrape";
-import { runAllScrapers, runScraper } from "~/lib/scraper-runner.server";
+import { startAllScrapersAsync, startScraperAsync } from "~/lib/scraper-runner.server";
export async function action({ request }: Route.ActionArgs) {
const formData = await request.formData();
const venueId = formData.get("venue_id");
- const results = venueId
- ? [await runScraper(String(venueId))]
- : await runAllScrapers();
+ const run_id = venueId
+ ? startScraperAsync(String(venueId))
+ : startAllScrapersAsync();
- // If called from a browser form, redirect back
const referer = request.headers.get("Referer");
- if (referer) {
- return redirect(referer);
- }
+ if (referer) return redirect(referer);
- return Response.json({ results });
+ return Response.json({ run_id, status: "started" }, { status: 202 });
}
-// Allow GET for quick testing in the browser
export async function loader({ request }: Route.LoaderArgs) {
const url = new URL(request.url);
const venueId = url.searchParams.get("venue_id");
- const results = venueId
- ? [await runScraper(venueId)]
- : await runAllScrapers();
+ const run_id = venueId
+ ? startScraperAsync(venueId)
+ : startAllScrapersAsync();
- return Response.json({ results });
+ return Response.json({ run_id, status: "started" }, { status: 202 });
}
diff --git a/app/routes/venues.tsx b/app/routes/venues.tsx
index 23b052f..affa72a 100644
--- a/app/routes/venues.tsx
+++ b/app/routes/venues.tsx
@@ -1,17 +1,19 @@
-import { useLoaderData, Link } from "react-router";
+import { useLoaderData, Link, Form } from "react-router";
import type { Route } from "./+types/venues";
-import { getVenues } from "~/lib/db.server";
+import { getVenues, getLastScrapePerVenue, type ScrapeLog } from "~/lib/db.server";
import { getScraperIds } from "~/lib/venue-meta.server";
export async function loader(_: Route.LoaderArgs) {
const venues = getVenues();
const scraperIds = getScraperIds();
- return { venues, scraperIds };
+ const scrapeStatus = getLastScrapePerVenue();
+ return { venues, scraperIds, scrapeStatus };
}
export default function Venues() {
- const { venues, scraperIds: scraperIdList } = useLoaderData<typeof loader>();
+ const { venues, scraperIds: scraperIdList, scrapeStatus } = useLoaderData<typeof loader>();
const scraperIds = new Set(scraperIdList);
+ const statusMap = new Map<string, ScrapeLog>(scrapeStatus.map((s) => [s.venue_id, s]));
return (
<div className="min-h-screen bg-gray-950 text-gray-100">
@@ -26,43 +28,94 @@ export default function Venues() {
</header>
<main className="max-w-4xl mx-auto px-4 py-10">
- <div className="mb-8">
- <h1 className="text-2xl font-bold">会場一覧</h1>
- <p className="mt-1 text-sm text-gray-400">
- 現在 {scraperIdList.length} 会場のスクレイパーが登録されています。
- 新しい会場を追加するには <code className="bg-gray-800 px-1 rounded">app/scrapers/</code> に
- モジュールを追加して <code className="bg-gray-800 px-1 rounded">index.ts</code> に登録してください。
- </p>
+ <div className="mb-8 flex items-start justify-between gap-4 flex-wrap">
+ <div>
+ <h1 className="text-2xl font-bold">会場一覧</h1>
+ <p className="mt-1 text-sm text-gray-400">
+ 現在 {scraperIdList.length} 会場のスクレイパーが登録されています。
+ </p>
+ </div>
+ <Form method="post" action="/api/scrape">
+ <button
+ type="submit"
+ className="rounded-md bg-indigo-600 px-4 py-2 text-sm font-medium hover:bg-indigo-500 transition-colors"
+ >
+ 全会場を更新
+ </button>
+ </Form>
</div>
{venues.length === 0 ? (
- <p className="text-gray-500">まだ会場データがありません。「情報を更新」してください。</p>
+ <p className="text-gray-500">まだ会場データがありません。「全会場を更新」してください。</p>
) : (
- <div className="grid gap-4 sm:grid-cols-2">
- {venues.map((v) => (
- <Link
- key={v.id}
- to={`/events?venue_id=${v.id}`}
- className="flex items-center justify-between rounded-xl bg-gray-800/60 p-4 hover:bg-gray-800 transition-colors border border-gray-700/50"
- >
- <div>
- <p className="font-semibold">{v.name}</p>
- {v.area && <p className="text-sm text-gray-400">{v.area}</p>}
+ <div className="grid gap-3">
+ {venues.map((v) => {
+ const log = statusMap.get(v.id);
+ return (
+ <div
+ key={v.id}
+ className="flex items-center gap-4 rounded-xl bg-gray-800/60 border border-gray-700/40 p-4"
+ >
+ {/* 会場名 + エリア */}
+ <div className="flex-1 min-w-0">
+ <Link
+ to={`/events?venue_id=${v.id}`}
+ className="font-semibold hover:text-indigo-300 transition-colors"
+ >
+ {v.name}
+ </Link>
+ {v.area && <p className="text-xs text-gray-400">{v.area}</p>}
+ </div>
+
+ {/* イベント件数 */}
+ <span className="text-sm text-gray-400 whitespace-nowrap">
+ <span className="text-lg font-bold text-gray-200">{v.event_count ?? 0}</span> 件
+ </span>
+
+ {/* 最終スクレイプ状態 */}
+ {log ? (
+ <ScrapeStatus log={log} />
+ ) : (
+ <span className="text-xs text-gray-600 whitespace-nowrap">未実行</span>
+ )}
+
+ {/* 個別更新ボタン */}
{scraperIds.has(v.id) && (
- <span className="mt-1 inline-block rounded-full bg-emerald-700/40 px-2 py-0.5 text-xs text-emerald-300">
- スクレイパー登録済
- </span>
+ <Form method="post" action="/api/scrape">
+ <input type="hidden" name="venue_id" value={v.id} />
+ <button
+ type="submit"
+ className="rounded bg-gray-700 px-3 py-1 text-xs hover:bg-gray-600 transition-colors whitespace-nowrap"
+ >
+ 更新
+ </button>
+ </Form>
)}
</div>
- <span className="text-2xl font-bold text-gray-500">
- {v.event_count ?? 0}
- <span className="text-sm font-normal ml-1">件</span>
- </span>
- </Link>
- ))}
+ );
+ })}
</div>
)}
</main>
</div>
);
}
+
+function ScrapeStatus({ log }: { log: ScrapeLog }) {
+ if (log.status === "running") {
+ return <span className="text-xs text-yellow-400 whitespace-nowrap">⟳ 実行中...</span>;
+ }
+ if (log.status === "error") {
+ return (
+ <span className="text-xs text-red-400 whitespace-nowrap" title={log.error ?? ""}>
+ ✖ エラー
+ </span>
+ );
+ }
+ const time = log.finished_at?.slice(0, 16).replace("T", " ") ?? "";
+ return (
+ <span className="text-xs text-emerald-400 whitespace-nowrap" title={time}>
+ ✔ {time}
+ </span>
+ );
+}