diff options
Diffstat (limited to 'app')
| -rw-r--r-- | app/lib/playwright.server.ts | 17 | ||||
| -rw-r--r-- | app/lib/scraper-runner.server.ts | 5 | ||||
| -rw-r--r-- | app/routes.ts | 2 | ||||
| -rw-r--r-- | app/routes/events._index.tsx | 14 | ||||
| -rw-r--r-- | app/routes/venues.tsx | 41 | ||||
| -rw-r--r-- | app/scrapers/flat-nishiogikubo.ts | 142 | ||||
| -rw-r--r-- | app/scrapers/pitbar-nishiogikubo.ts | 169 |
7 files changed, 255 insertions, 135 deletions
diff --git a/app/lib/playwright.server.ts b/app/lib/playwright.server.ts new file mode 100644 index 0000000..aadb5d3 --- /dev/null +++ b/app/lib/playwright.server.ts @@ -0,0 +1,17 @@ +import { chromium, type Browser } from "playwright"; + +let _browser: Browser | null = null; + +export async function getBrowser(): Promise<Browser> { + if (!_browser || !_browser.isConnected()) { + _browser = await chromium.launch({ headless: true }); + } + return _browser; +} + +export async function closeBrowser(): Promise<void> { + if (_browser) { + await _browser.close(); + _browser = null; + } +} diff --git a/app/lib/scraper-runner.server.ts b/app/lib/scraper-runner.server.ts index 87dd16c..012ff95 100644 --- a/app/lib/scraper-runner.server.ts +++ b/app/lib/scraper-runner.server.ts @@ -7,6 +7,7 @@ import { type ScrapeLog, } from "./db.server"; import { generateVenueMarkdown, generateAllVenueMarkdown } from "./markdown-writer.server"; +import { closeBrowser } from "./playwright.server"; import { ALL_SCRAPERS } from "~/scrapers/index"; import type { EventInput } from "./db.server"; @@ -77,6 +78,8 @@ export async function runAllScrapers(run_id = randomUUID()): Promise<ScrapeResul } generateAllVenueMarkdown(successIds); + // Close shared Playwright browser if it was opened by any scraper + await closeBrowser(); return results; } @@ -104,5 +107,7 @@ export async function runScraper(venueId: string, run_id = randomUUID()): Promis const error = err instanceof Error ? err.message : String(err); updateScrapeLog(logId, "error", 0, error); return { run_id, venue_id: venue.id, venue_name: venue.name, status: "error", events_saved: 0, error }; + } finally { + await closeBrowser(); } } diff --git a/app/routes.ts b/app/routes.ts index c0096e1..74fb552 100644 --- a/app/routes.ts +++ b/app/routes.ts @@ -7,6 +7,4 @@ export default [ route(":id", "routes/events.$id.tsx"), ]), route("venues", "routes/venues.tsx"), - route("api/scrape", "routes/api.scrape.ts"), - route("api/scrape-status", "routes/api.scrape-status.ts"), ] satisfies RouteConfig; diff --git a/app/routes/events._index.tsx b/app/routes/events._index.tsx index 3ff441a..1917ace 100644 --- a/app/routes/events._index.tsx +++ b/app/routes/events._index.tsx @@ -1,4 +1,4 @@ -import { useLoaderData, useSearchParams, Form, Link } from "react-router"; +import { useLoaderData, useSearchParams, Link } from "react-router"; import type { Route } from "./+types/events._index"; import { queryEvents, getVenues } from "~/lib/db.server"; import EventCard from "~/components/EventCard"; @@ -49,16 +49,8 @@ export default function EventsIndex() { </header> <main className="max-w-6xl mx-auto px-4 py-8"> - <div className="mb-6 flex items-center justify-between"> + <div className="mb-6"> <h1 className="text-2xl font-bold">イベント一覧</h1> - <Form method="post" action="/api/scrape"> - <button - type="submit" - className="rounded-md bg-indigo-600 px-4 py-2 text-sm font-medium hover:bg-indigo-500 transition-colors" - > - 情報を更新 - </button> - </Form> </div> <FilterBar venues={venues} defaultDateFrom={date_from} defaultDateTo={date_to} /> @@ -66,7 +58,7 @@ export default function EventsIndex() { {events.length === 0 ? ( <div className="mt-16 text-center text-gray-500"> <p className="text-lg">イベントが見つかりません</p> - <p className="mt-2 text-sm">「情報を更新」ボタンでデータを取得してください。</p> + <p className="mt-2 text-sm">スクレイパーを実行してデータを取得してください: <code>npm run scrape</code></p> </div> ) : ( <div className="mt-6 grid gap-4 sm:grid-cols-2 lg:grid-cols-3"> diff --git a/app/routes/venues.tsx b/app/routes/venues.tsx index affa72a..fca90dd 100644 --- a/app/routes/venues.tsx +++ b/app/routes/venues.tsx @@ -1,18 +1,15 @@ -import { useLoaderData, Link, Form } from "react-router"; +import { useLoaderData, Link } from "react-router"; import type { Route } from "./+types/venues"; import { getVenues, getLastScrapePerVenue, type ScrapeLog } from "~/lib/db.server"; -import { getScraperIds } from "~/lib/venue-meta.server"; export async function loader(_: Route.LoaderArgs) { const venues = getVenues(); - const scraperIds = getScraperIds(); const scrapeStatus = getLastScrapePerVenue(); - return { venues, scraperIds, scrapeStatus }; + return { venues, scrapeStatus }; } export default function Venues() { - const { venues, scraperIds: scraperIdList, scrapeStatus } = useLoaderData<typeof loader>(); - const scraperIds = new Set(scraperIdList); + const { venues, scrapeStatus } = useLoaderData<typeof loader>(); const statusMap = new Map<string, ScrapeLog>(scrapeStatus.map((s) => [s.venue_id, s])); return ( @@ -28,21 +25,11 @@ export default function Venues() { </header> <main className="max-w-4xl mx-auto px-4 py-10"> - <div className="mb-8 flex items-start justify-between gap-4 flex-wrap"> - <div> - <h1 className="text-2xl font-bold">会場一覧</h1> - <p className="mt-1 text-sm text-gray-400"> - 現在 {scraperIdList.length} 会場のスクレイパーが登録されています。 - </p> - </div> - <Form method="post" action="/api/scrape"> - <button - type="submit" - className="rounded-md bg-indigo-600 px-4 py-2 text-sm font-medium hover:bg-indigo-500 transition-colors" - > - 全会場を更新 - </button> - </Form> + <div className="mb-8"> + <h1 className="text-2xl font-bold">会場一覧</h1> + <p className="mt-1 text-sm text-gray-400"> + 現在 {venues.length} 会場が登録されています。 + </p> </div> {venues.length === 0 ? ( @@ -79,18 +66,6 @@ export default function Venues() { <span className="text-xs text-gray-600 whitespace-nowrap">未実行</span> )} - {/* 個別更新ボタン */} - {scraperIds.has(v.id) && ( - <Form method="post" action="/api/scrape"> - <input type="hidden" name="venue_id" value={v.id} /> - <button - type="submit" - className="rounded bg-gray-700 px-3 py-1 text-xs hover:bg-gray-600 transition-colors whitespace-nowrap" - > - 更新 - </button> - </Form> - )} </div> ); })} diff --git a/app/scrapers/flat-nishiogikubo.ts b/app/scrapers/flat-nishiogikubo.ts index 03cc70c..da6752f 100644 --- a/app/scrapers/flat-nishiogikubo.ts +++ b/app/scrapers/flat-nishiogikubo.ts @@ -1,17 +1,20 @@ /** * FLAT 西荻窪 — https://www.flat.rinky.info/schedule * - * ⚠️ Wix サイトのためクライアントサイド JS レンダリング。 - * 静的 fetch ではイベントデータを取得できない。 + * Wix イベントカレンダー。JS レンダリングが必要なため Playwright を使用。 * - * 代替案: - * - Playwright/Puppeteer でヘッドレスブラウザを使用 - * - Wix Events API (要サイトオーナーによる API キー発行) + * DOM 構造: + * [data-hook="calendar-cell-<UTC ISO>"] ← 各日付セル + * .WPczEB → 開始時刻 + * .ExCBIq → イベントタイトル + * aria-label が "イベントなし" のセルはスキップ * - * 現在は空配列を返す(エラーにはしない)。 + * 月ナビ: calendar-date-picker-button を開いて datepicker-right-arrow で翌月へ。 */ +import type { Page } from "playwright"; import type { Scraper, VenueMeta } from "./base"; import type { EventInput } from "~/lib/db.server"; +import { getBrowser } from "~/lib/playwright.server"; export const venue: VenueMeta = { id: "flat-nishiogikubo", @@ -20,14 +23,129 @@ export const venue: VenueMeta = { area: "西荻窪", }; +const SCHEDULE_URL = "https://www.flat.rinky.info/schedule"; + +async function extractMonthEvents(page: Page): Promise<EventInput[]> { + const events: EventInput[] = []; + const cells = await page.locator('[data-hook^="calendar-cell-"]').all(); + + for (const cell of cells) { + const ariaLabel = (await cell.getAttribute("aria-label")) ?? ""; + if (ariaLabel.includes("イベントなし")) continue; + + const dataHook = (await cell.getAttribute("data-hook")) ?? ""; + const isoStr = dataHook.replace("calendar-cell-", ""); + if (!isoStr) continue; + + // UTC timestamp → JST date (UTC+9) + const utcMs = new Date(isoStr).getTime(); + if (isNaN(utcMs)) continue; + const jstDate = new Date(utcMs + 9 * 3_600_000).toISOString().slice(0, 10); + + const timeLocs = cell.locator(".WPczEB"); + const titleLocs = cell.locator(".ExCBIq"); + const titleCount = await titleLocs.count(); + const timeCount = await timeLocs.count(); + + for (let i = 0; i < titleCount; i++) { + const title = (await titleLocs.nth(i).innerText()).trim(); + if (!title) continue; + const time = i < timeCount + ? (await timeLocs.nth(i).innerText()).trim() + : null; + + events.push({ + venue_id: venue.id, + title, + date: jstDate, + start_time: time || null, + source_url: SCHEDULE_URL, + }); + } + } + + return events; +} + +async function navigateToMonth(page: Page, targetYYYYMM: string): Promise<void> { + const [targetYear, targetMonth] = targetYYYYMM.split("-").map(Number); + + // Open the date picker + await page.click('[data-hook="calendar-date-picker-button"]'); + await page.waitForTimeout(500); + + // Click next-month arrow until we reach the target month + for (let attempt = 0; attempt < 6; attempt++) { + const monthText = await page.locator('[data-hook="datepicker-month-dropdown-button"]').innerText(); + const yearText = await page.locator('[data-hook="datepicker-year-dropdown-button"]').innerText(); + + const currentYear = parseInt(yearText); + const months: Record<string, number> = { + "1月": 1, "2月": 2, "3月": 3, "4月": 4, "5月": 5, "6月": 6, + "7月": 7, "8月": 8, "9月": 9, "10月": 10, "11月": 11, "12月": 12, + }; + const currentMonth = months[monthText.trim()] ?? 0; + + if (currentYear === targetYear && currentMonth === targetMonth) break; + + const diff = (targetYear * 12 + targetMonth) - (currentYear * 12 + currentMonth); + if (diff > 0) { + await page.click('[data-hook="datepicker-right-arrow"]'); + } else { + await page.click('[data-hook="datepicker-left-arrow"]'); + } + await page.waitForTimeout(300); + } + + // Click any date in the mini-calendar that belongs to the target month + const allDays = await page.locator('[role="dialog"] button, [data-hook="datepicker-right-arrow"] ~ * button').all(); + // Simpler: find a button with aria-label matching target year/month + const targetPrefix = `${targetYear}年${targetMonth}月`; + const dayBtns = await page.locator(`button[aria-label*="${targetPrefix}"]`).all(); + if (dayBtns.length > 0) { + await dayBtns[0].click(); + } else { + // Fallback: press Escape to close picker + await page.keyboard.press("Escape"); + } + await page.waitForTimeout(2000); +} + export const scraper: Scraper = { venue, async scrape(): Promise<EventInput[]> { - // Wix renders events with JavaScript; static fetch returns an empty calendar. - // TODO: Replace with a headless browser implementation (e.g. Playwright). - throw new Error( - "FLAT 西荻窪 は Wix サイトのため JS レンダリングが必要です。" + - "ヘッドレスブラウザ(Playwright 等)への移行が必要です。" - ); + const browser = await getBrowser(); + const page = await browser.newPage(); + + try { + await page.goto(SCHEDULE_URL, { + waitUntil: "domcontentloaded", + timeout: 30_000, + }); + await page.waitForTimeout(5_000); + + const events: EventInput[] = []; + + // Current month events + events.push(...(await extractMonthEvents(page))); + + // Navigate to next month for 35-day window coverage + const now = new Date(); + const nextMonth = new Date(now.getFullYear(), now.getMonth() + 1, 1); + const nextYYYYMM = `${nextMonth.getFullYear()}-${String(nextMonth.getMonth() + 1).padStart(2, "0")}`; + await navigateToMonth(page, nextYYYYMM); + events.push(...(await extractMonthEvents(page))); + + // Deduplicate by date + title + const seen = new Set<string>(); + return events.filter((e) => { + const key = `${e.date}|${e.title}`; + if (seen.has(key)) return false; + seen.add(key); + return true; + }); + } finally { + await page.close(); + } }, }; diff --git a/app/scrapers/pitbar-nishiogikubo.ts b/app/scrapers/pitbar-nishiogikubo.ts index 5c70023..54d25d5 100644 --- a/app/scrapers/pitbar-nishiogikubo.ts +++ b/app/scrapers/pitbar-nishiogikubo.ts @@ -1,18 +1,19 @@ /** - * Pitbar 西荻窪 — http://freecalend.com/open/mem25771_date{YYYYMM} + * Pitbar 西荻窪 — http://freecalend.com/open/mem25771 * - * スケジュールは Ameblo (https://ameblo.jp/pitbar/) 経由で - * freecalend.com に掲載されているが、自動リクエストをブロックしている。 + * freecalend.com は静的 fetch をブロックするため Playwright を使用。 * - * 代替案: - * - User-Agent を設定したヘッドレスブラウザで freecalend を取得 - * - 公式 Instagram / X (@pitbar_nishiogi) の投稿を取得 - * - 手動でイベントを登録する管理画面を用意する + * DOM 構造: + * id="cal-25771-{year}-{month}-{day}" ← 日付セル + * テキストは: 日数字 + "M.D(day)" + イベント本文 + * id="sitatumari-25771-..." ← 区切り (スキップ) * - * 月ごとの URL パターン: http://freecalend.com/open/mem25771_date{YYYYMM} + * open / start 時刻は "open HH:MM" / "start HH:MM" テキストから取得。 */ +import type { Page } from "playwright"; import type { Scraper, VenueMeta } from "./base"; import type { EventInput } from "~/lib/db.server"; +import { getBrowser } from "~/lib/playwright.server"; export const venue: VenueMeta = { id: "pitbar-nishiogikubo", @@ -21,81 +22,95 @@ export const venue: VenueMeta = { area: "西荻窪", }; -const FREECALEND_MEMBER = "25771"; +const CALENDAR_URL = "http://freecalend.com/open/mem25771"; +const MEMBER_ID = "25771"; + +// Cells whose text matches this are BAR open-hours entries (not live events) +const BAR_ONLY_RE = /^BAR営業/; + +async function extractEvents(page: Page, dateFrom: string, dateTo: string): Promise<EventInput[]> { + const events: EventInput[] = []; + + const cellData = await page.evaluate((memberId: string) => { + const prefix = `cal-${memberId}-`; + const cells = document.querySelectorAll(`[id^="${prefix}"]`); + return Array.from(cells).map((el) => { + const id = el.getAttribute("id") ?? ""; + const parts = id.split("-"); + // id: cal-25771-YYYY-M-D + const year = parts[2]; + const month = parts[3]; + const day = parts[4]; + if (!year || !month || !day) return null; + return { + date: `${year}-${month.padStart(2, "0")}-${day.padStart(2, "0")}`, + text: el.textContent?.trim() ?? "", + }; + }).filter(Boolean); + }, MEMBER_ID); + + for (const cell of cellData as { date: string; text: string }[]) { + const { date, text } = cell; + if (date < dateFrom || date > dateTo) continue; + + // Remove leading "day-number" and "M.D(day)" lines + const lines = text.split(/\n/).map((l) => l.trim()).filter(Boolean); + // First line is the day number, second is "M.D(dayname)" — skip both + const contentLines = lines.slice(2); + if (contentLines.length === 0) continue; + + const title = contentLines[0]; + if (!title || BAR_ONLY_RE.test(title)) continue; + + const fullText = contentLines.join("\n"); + const openMatch = fullText.match(/open\s+(\d{1,2}:\d{2})/i); + const startMatch = fullText.match(/start\s+(\d{1,2}:\d{2})/i); + + // Extract price: look for lines with "yen" or "円" + const priceMatch = fullText.match(/((?:adv|door|前売)[^\n]*(?:yen|円)[^\n]*)/i); + const price = priceMatch?.[1]?.trim() ?? null; + + // Collect artists (lines starting with ■) + const artists = contentLines + .filter((l) => l.startsWith("■")) + .map((l) => l.slice(1).trim()) + .join("、"); + + events.push({ + venue_id: venue.id, + title, + artist: artists || null, + date, + open_time: openMatch?.[1] ?? null, + start_time: startMatch?.[1] ?? null, + price, + source_url: CALENDAR_URL, + }); + } + + return events; +} export const scraper: Scraper = { venue, async scrape(): Promise<EventInput[]> { - const months = upcomingMonths(2); - const events: EventInput[] = []; - - for (const ym of months) { - const url = `http://freecalend.com/open/mem${FREECALEND_MEMBER}_date${ym}`; - const res = await fetch(url, { - headers: { - "User-Agent": - "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 Chrome/124 Safari/537.36", - Referer: "https://ameblo.jp/pitbar/", - }, - redirect: "follow", - }); - if (!res.ok) continue; - - const html = await res.text(); - if (!html.trim()) continue; - - // freecalend は HTML テーブルカレンダー形式 - // <td class="day_..."> 内にイベント名と時刻が入る - const { load } = await import("cheerio"); - const $ = load(html); - - $("td[class*='day_']").each((_, el) => { - const $el = $(el); - const text = $el.text().trim(); - if (!text || /^\d+$/.test(text)) return; // 日付のみのセルはスキップ - - const dayMatch = $el.attr("class")?.match(/day_(\d+)/); - if (!dayMatch) return; - const day = dayMatch[1].padStart(2, "0"); - const date = `${ym.slice(0, 4)}-${ym.slice(4)}-${day}`; - - const lines = text.split(/[\n\r]+/).map((l) => l.trim()).filter(Boolean); - const title = lines[0] ?? text.slice(0, 100); - - const timeMatch = text.match(/(\d{1,2}:\d{2})/g); - const openTime = timeMatch?.[0] ?? null; - const startTime = timeMatch?.[1] ?? null; - - events.push({ - venue_id: venue.id, - title, - date, - open_time: openTime, - start_time: startTime, - source_url: url, - }); + const browser = await getBrowser(); + const page = await browser.newPage(); + + try { + await page.goto(CALENDAR_URL, { + waitUntil: "domcontentloaded", + timeout: 20_000, }); - } + await page.waitForTimeout(5_000); - if (events.length === 0) { - throw new Error( - "Pitbar freecalend からデータを取得できませんでした。" + - "freecalend.com が自動リクエストをブロックしている可能性があります。" - ); - } + const today = new Date(); + const dateFrom = today.toISOString().slice(0, 10); + const dateTo = new Date(today.getTime() + 35 * 86_400_000).toISOString().slice(0, 10); - return events; + return await extractEvents(page, dateFrom, dateTo); + } finally { + await page.close(); + } }, }; - -function upcomingMonths(count: number): string[] { - const months: string[] = []; - const now = new Date(); - for (let i = 0; i < count; i++) { - const d = new Date(now.getFullYear(), now.getMonth() + i, 1); - const y = d.getFullYear(); - const m = String(d.getMonth() + 1).padStart(2, "0"); - months.push(`${y}${m}`); - } - return months; -} |
