From 0cd5fb770ca9bd3f304d9556a4b33a4ad4f45e7e Mon Sep 17 00:00:00 2001 From: yyamashita Date: Thu, 7 May 2026 10:16:43 +0900 Subject: Playwright scraping for FLAT/Pitbar; web UI display-only - Install Playwright + Chromium; add shared browser singleton (playwright.server.ts) - Rewrite flat-nishiogikubo scraper: Wix calendar via headless browser, month navigation via date picker, extracts .WPczEB/.ExCBIq selectors - Rewrite pitbar-nishiogikubo scraper: freecalend.com via headless browser, parses cal-{member}-{year}-{month}-{day} cell IDs - scraper-runner: close shared browser after each run with closeBrowser() - Remove all scrape trigger buttons from web UI (events index, venues page); remove /api/scrape and /api/scrape-status from routes.ts Co-Authored-By: Claude Sonnet 4.6 --- app/scrapers/pitbar-nishiogikubo.ts | 169 ++++++++++++++++++++---------------- 1 file changed, 92 insertions(+), 77 deletions(-) (limited to 'app/scrapers/pitbar-nishiogikubo.ts') diff --git a/app/scrapers/pitbar-nishiogikubo.ts b/app/scrapers/pitbar-nishiogikubo.ts index 5c70023..54d25d5 100644 --- a/app/scrapers/pitbar-nishiogikubo.ts +++ b/app/scrapers/pitbar-nishiogikubo.ts @@ -1,18 +1,19 @@ /** - * Pitbar 西荻窪 — http://freecalend.com/open/mem25771_date{YYYYMM} + * Pitbar 西荻窪 — http://freecalend.com/open/mem25771 * - * スケジュールは Ameblo (https://ameblo.jp/pitbar/) 経由で - * freecalend.com に掲載されているが、自動リクエストをブロックしている。 + * freecalend.com は静的 fetch をブロックするため Playwright を使用。 * - * 代替案: - * - User-Agent を設定したヘッドレスブラウザで freecalend を取得 - * - 公式 Instagram / X (@pitbar_nishiogi) の投稿を取得 - * - 手動でイベントを登録する管理画面を用意する + * DOM 構造: + * id="cal-25771-{year}-{month}-{day}" ← 日付セル + * テキストは: 日数字 + "M.D(day)" + イベント本文 + * id="sitatumari-25771-..." ← 区切り (スキップ) * - * 月ごとの URL パターン: http://freecalend.com/open/mem25771_date{YYYYMM} + * open / start 時刻は "open HH:MM" / "start HH:MM" テキストから取得。 */ +import type { Page } from "playwright"; import type { Scraper, VenueMeta } from "./base"; import type { EventInput } from "~/lib/db.server"; +import { getBrowser } from "~/lib/playwright.server"; export const venue: VenueMeta = { id: "pitbar-nishiogikubo", @@ -21,81 +22,95 @@ export const venue: VenueMeta = { area: "西荻窪", }; -const FREECALEND_MEMBER = "25771"; +const CALENDAR_URL = "http://freecalend.com/open/mem25771"; +const MEMBER_ID = "25771"; + +// Cells whose text matches this are BAR open-hours entries (not live events) +const BAR_ONLY_RE = /^BAR営業/; + +async function extractEvents(page: Page, dateFrom: string, dateTo: string): Promise { + const events: EventInput[] = []; + + const cellData = await page.evaluate((memberId: string) => { + const prefix = `cal-${memberId}-`; + const cells = document.querySelectorAll(`[id^="${prefix}"]`); + return Array.from(cells).map((el) => { + const id = el.getAttribute("id") ?? ""; + const parts = id.split("-"); + // id: cal-25771-YYYY-M-D + const year = parts[2]; + const month = parts[3]; + const day = parts[4]; + if (!year || !month || !day) return null; + return { + date: `${year}-${month.padStart(2, "0")}-${day.padStart(2, "0")}`, + text: el.textContent?.trim() ?? "", + }; + }).filter(Boolean); + }, MEMBER_ID); + + for (const cell of cellData as { date: string; text: string }[]) { + const { date, text } = cell; + if (date < dateFrom || date > dateTo) continue; + + // Remove leading "day-number" and "M.D(day)" lines + const lines = text.split(/\n/).map((l) => l.trim()).filter(Boolean); + // First line is the day number, second is "M.D(dayname)" — skip both + const contentLines = lines.slice(2); + if (contentLines.length === 0) continue; + + const title = contentLines[0]; + if (!title || BAR_ONLY_RE.test(title)) continue; + + const fullText = contentLines.join("\n"); + const openMatch = fullText.match(/open\s+(\d{1,2}:\d{2})/i); + const startMatch = fullText.match(/start\s+(\d{1,2}:\d{2})/i); + + // Extract price: look for lines with "yen" or "円" + const priceMatch = fullText.match(/((?:adv|door|前売)[^\n]*(?:yen|円)[^\n]*)/i); + const price = priceMatch?.[1]?.trim() ?? null; + + // Collect artists (lines starting with ■) + const artists = contentLines + .filter((l) => l.startsWith("■")) + .map((l) => l.slice(1).trim()) + .join("、"); + + events.push({ + venue_id: venue.id, + title, + artist: artists || null, + date, + open_time: openMatch?.[1] ?? null, + start_time: startMatch?.[1] ?? null, + price, + source_url: CALENDAR_URL, + }); + } + + return events; +} export const scraper: Scraper = { venue, async scrape(): Promise { - const months = upcomingMonths(2); - const events: EventInput[] = []; - - for (const ym of months) { - const url = `http://freecalend.com/open/mem${FREECALEND_MEMBER}_date${ym}`; - const res = await fetch(url, { - headers: { - "User-Agent": - "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 Chrome/124 Safari/537.36", - Referer: "https://ameblo.jp/pitbar/", - }, - redirect: "follow", - }); - if (!res.ok) continue; - - const html = await res.text(); - if (!html.trim()) continue; - - // freecalend は HTML テーブルカレンダー形式 - // 内にイベント名と時刻が入る - const { load } = await import("cheerio"); - const $ = load(html); - - $("td[class*='day_']").each((_, el) => { - const $el = $(el); - const text = $el.text().trim(); - if (!text || /^\d+$/.test(text)) return; // 日付のみのセルはスキップ - - const dayMatch = $el.attr("class")?.match(/day_(\d+)/); - if (!dayMatch) return; - const day = dayMatch[1].padStart(2, "0"); - const date = `${ym.slice(0, 4)}-${ym.slice(4)}-${day}`; - - const lines = text.split(/[\n\r]+/).map((l) => l.trim()).filter(Boolean); - const title = lines[0] ?? text.slice(0, 100); - - const timeMatch = text.match(/(\d{1,2}:\d{2})/g); - const openTime = timeMatch?.[0] ?? null; - const startTime = timeMatch?.[1] ?? null; - - events.push({ - venue_id: venue.id, - title, - date, - open_time: openTime, - start_time: startTime, - source_url: url, - }); + const browser = await getBrowser(); + const page = await browser.newPage(); + + try { + await page.goto(CALENDAR_URL, { + waitUntil: "domcontentloaded", + timeout: 20_000, }); - } + await page.waitForTimeout(5_000); - if (events.length === 0) { - throw new Error( - "Pitbar freecalend からデータを取得できませんでした。" + - "freecalend.com が自動リクエストをブロックしている可能性があります。" - ); - } + const today = new Date(); + const dateFrom = today.toISOString().slice(0, 10); + const dateTo = new Date(today.getTime() + 35 * 86_400_000).toISOString().slice(0, 10); - return events; + return await extractEvents(page, dateFrom, dateTo); + } finally { + await page.close(); + } }, }; - -function upcomingMonths(count: number): string[] { - const months: string[] = []; - const now = new Date(); - for (let i = 0; i < count; i++) { - const d = new Date(now.getFullYear(), now.getMonth() + i, 1); - const y = d.getFullYear(); - const m = String(d.getMonth() + 1).padStart(2, "0"); - months.push(`${y}${m}`); - } - return months; -} -- cgit v1.2.3