1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
|
import { upsertVenue, upsertEvent } from "./db.server";
import { generateVenueMarkdown, generateAllVenueMarkdown } from "./markdown-writer.server";
import { ALL_SCRAPERS } from "~/scrapers/index";
import type { EventInput } from "./db.server";
const SCRAPE_WINDOW_DAYS = 35; // ~1 month
function scrapeWindow(): { from: string; to: string } {
const from = new Date();
from.setHours(0, 0, 0, 0);
const to = new Date(from);
to.setDate(to.getDate() + SCRAPE_WINDOW_DAYS);
return {
from: from.toISOString().slice(0, 10),
to: to.toISOString().slice(0, 10),
};
}
function withinWindow(event: EventInput, from: string, to: string): boolean {
return event.date >= from && event.date <= to;
}
export interface ScrapeResult {
venue_id: string;
venue_name: string;
events_saved: number;
markdown_path?: string;
error?: string;
}
export async function runAllScrapers(): Promise<ScrapeResult[]> {
const results: ScrapeResult[] = [];
const successIds: string[] = [];
for (const scraper of ALL_SCRAPERS) {
const { venue } = scraper;
upsertVenue(venue.id, venue.name, venue.url, venue.area);
try {
const { from, to } = scrapeWindow();
const events = (await scraper.scrape()).filter((e) =>
withinWindow(e, from, to)
);
for (const event of events) {
upsertEvent(event);
}
successIds.push(venue.id);
results.push({
venue_id: venue.id,
venue_name: venue.name,
events_saved: events.length,
});
} catch (err) {
results.push({
venue_id: venue.id,
venue_name: venue.name,
events_saved: 0,
error: err instanceof Error ? err.message : String(err),
});
}
}
// Generate Markdown files for all venues that scraped successfully
generateAllVenueMarkdown(successIds);
return results;
}
export async function runScraper(venueId: string): Promise<ScrapeResult> {
const scraper = ALL_SCRAPERS.find((s) => s.venue.id === venueId);
if (!scraper) {
return { venue_id: venueId, venue_name: venueId, events_saved: 0, error: "Scraper not found" };
}
const { venue } = scraper;
upsertVenue(venue.id, venue.name, venue.url, venue.area);
try {
const { from, to } = scrapeWindow();
const events = (await scraper.scrape()).filter((e) =>
withinWindow(e, from, to)
);
for (const event of events) {
upsertEvent(event);
}
generateVenueMarkdown(venue.id);
return {
venue_id: venue.id,
venue_name: venue.name,
events_saved: events.length,
markdown_path: `events/${venue.id}.md`,
};
} catch (err) {
return {
venue_id: venue.id,
venue_name: venue.name,
events_saved: 0,
error: err instanceof Error ? err.message : String(err),
};
}
}
|