/** * Pitbar 西荻窪 — http://freecalend.com/open/mem25771 * * freecalend.com は静的 fetch をブロックするため Playwright を使用。 * * DOM 構造: * id="cal-25771-{year}-{month}-{day}" ← 日付セル * テキストは: 日数字 + "M.D(day)" + イベント本文 * id="sitatumari-25771-..." ← 区切り (スキップ) * * open / start 時刻は "open HH:MM" / "start HH:MM" テキストから取得。 */ import type { Page } from "playwright"; import type { Scraper, VenueMeta } from "./base"; import type { EventInput } from "~/lib/db.server"; import { getBrowser } from "~/lib/playwright.server"; export const venue: VenueMeta = { id: "pitbar-nishiogikubo", name: "Pitbar 西荻窪", url: "https://ameblo.jp/pitbar", area: "西荻窪", capacity: 100, }; const CALENDAR_URL = "http://freecalend.com/open/mem25771"; const MEMBER_ID = "25771"; // Cells whose text matches this are BAR open-hours entries (not live events) const BAR_ONLY_RE = /^BAR営業/; async function extractEvents(page: Page, dateFrom: string, dateTo: string): Promise { const events: EventInput[] = []; const cellData = await page.evaluate((memberId: string) => { const prefix = `cal-${memberId}-`; const cells = document.querySelectorAll(`[id^="${prefix}"]`); return Array.from(cells).map((el) => { const id = el.getAttribute("id") ?? ""; const parts = id.split("-"); // id: cal-25771-YYYY-M-D const year = parts[2]; const month = parts[3]; const day = parts[4]; if (!year || !month || !day) return null; return { date: `${year}-${month.padStart(2, "0")}-${day.padStart(2, "0")}`, text: el.textContent?.trim() ?? "", }; }).filter(Boolean); }, MEMBER_ID); for (const cell of cellData as { date: string; text: string }[]) { const { date, text } = cell; if (date < dateFrom || date > dateTo) continue; // Remove leading "day-number" and "M.D(day)" lines const lines = text.split(/\n/).map((l) => l.trim()).filter(Boolean); // First line is the day number, second is "M.D(dayname)" — skip both const contentLines = lines.slice(2); if (contentLines.length === 0) continue; const title = contentLines[0]; if (!title || BAR_ONLY_RE.test(title)) continue; const fullText = contentLines.join("\n"); const openMatch = fullText.match(/open\s+(\d{1,2}:\d{2})/i); const startMatch = fullText.match(/start\s+(\d{1,2}:\d{2})/i); // Extract price: look for lines with "yen" or "円" const priceMatch = fullText.match(/((?:adv|door|前売)[^\n]*(?:yen|円)[^\n]*)/i); const price = priceMatch?.[1]?.trim() ?? null; // Collect artists (lines starting with ■) const artists = contentLines .filter((l) => l.startsWith("■")) .map((l) => l.slice(1).trim()) .join("、"); events.push({ venue_id: venue.id, title, artist: artists || null, date, open_time: openMatch?.[1] ?? null, start_time: startMatch?.[1] ?? null, price, source_url: CALENDAR_URL, }); } return events; } export const scraper: Scraper = { venue, async scrape(): Promise { const browser = await getBrowser(); const page = await browser.newPage(); try { await page.goto(CALENDAR_URL, { waitUntil: "domcontentloaded", timeout: 20_000, }); await page.waitForTimeout(5_000); const today = new Date(); const dateFrom = today.toISOString().slice(0, 10); const dateTo = new Date(today.getTime() + 35 * 86_400_000).toISOString().slice(0, 10); return await extractEvents(page, dateFrom, dateTo); } finally { await page.close(); } }, };