1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
|
/**
* Pitbar 西荻窪 — http://freecalend.com/open/mem25771
*
* freecalend.com は静的 fetch をブロックするため Playwright を使用。
*
* DOM 構造:
* id="cal-25771-{year}-{month}-{day}" ← 日付セル
* テキストは: 日数字 + "M.D(day)" + イベント本文
* id="sitatumari-25771-..." ← 区切り (スキップ)
*
* open / start 時刻は "open HH:MM" / "start HH:MM" テキストから取得。
*/
import type { Page } from "playwright";
import type { Scraper, VenueMeta } from "./base";
import type { EventInput } from "~/lib/db.server";
import { getBrowser } from "~/lib/playwright.server";
export const venue: VenueMeta = {
id: "pitbar-nishiogikubo",
name: "Pitbar 西荻窪",
url: "https://ameblo.jp/pitbar",
area: "西荻窪",
};
const CALENDAR_URL = "http://freecalend.com/open/mem25771";
const MEMBER_ID = "25771";
// Cells whose text matches this are BAR open-hours entries (not live events)
const BAR_ONLY_RE = /^BAR営業/;
async function extractEvents(page: Page, dateFrom: string, dateTo: string): Promise<EventInput[]> {
const events: EventInput[] = [];
const cellData = await page.evaluate((memberId: string) => {
const prefix = `cal-${memberId}-`;
const cells = document.querySelectorAll(`[id^="${prefix}"]`);
return Array.from(cells).map((el) => {
const id = el.getAttribute("id") ?? "";
const parts = id.split("-");
// id: cal-25771-YYYY-M-D
const year = parts[2];
const month = parts[3];
const day = parts[4];
if (!year || !month || !day) return null;
return {
date: `${year}-${month.padStart(2, "0")}-${day.padStart(2, "0")}`,
text: el.textContent?.trim() ?? "",
};
}).filter(Boolean);
}, MEMBER_ID);
for (const cell of cellData as { date: string; text: string }[]) {
const { date, text } = cell;
if (date < dateFrom || date > dateTo) continue;
// Remove leading "day-number" and "M.D(day)" lines
const lines = text.split(/\n/).map((l) => l.trim()).filter(Boolean);
// First line is the day number, second is "M.D(dayname)" — skip both
const contentLines = lines.slice(2);
if (contentLines.length === 0) continue;
const title = contentLines[0];
if (!title || BAR_ONLY_RE.test(title)) continue;
const fullText = contentLines.join("\n");
const openMatch = fullText.match(/open\s+(\d{1,2}:\d{2})/i);
const startMatch = fullText.match(/start\s+(\d{1,2}:\d{2})/i);
// Extract price: look for lines with "yen" or "円"
const priceMatch = fullText.match(/((?:adv|door|前売)[^\n]*(?:yen|円)[^\n]*)/i);
const price = priceMatch?.[1]?.trim() ?? null;
// Collect artists (lines starting with ■)
const artists = contentLines
.filter((l) => l.startsWith("■"))
.map((l) => l.slice(1).trim())
.join("、");
events.push({
venue_id: venue.id,
title,
artist: artists || null,
date,
open_time: openMatch?.[1] ?? null,
start_time: startMatch?.[1] ?? null,
price,
source_url: CALENDAR_URL,
});
}
return events;
}
export const scraper: Scraper = {
venue,
async scrape(): Promise<EventInput[]> {
const browser = await getBrowser();
const page = await browser.newPage();
try {
await page.goto(CALENDAR_URL, {
waitUntil: "domcontentloaded",
timeout: 20_000,
});
await page.waitForTimeout(5_000);
const today = new Date();
const dateFrom = today.toISOString().slice(0, 10);
const dateTo = new Date(today.getTime() + 35 * 86_400_000).toISOString().slice(0, 10);
return await extractEvents(page, dateFrom, dateTo);
} finally {
await page.close();
}
},
};
|