From be55729482296663da8c96723bfd22080e6762c1 Mon Sep 17 00:00:00 2001 From: yyamashita Date: Wed, 6 May 2026 22:07:53 +0900 Subject: Add Tokyo livehouse event aggregator service Full-stack React Router v7 app that scrapes event listings from major Tokyo live venues (Liquid Room, WWW/WWW X, Shibuya O-EAST, Shinjuku LOFT, Club Quattro) and stores them in SQLite for browsing and search. - Modular scraper architecture: add a new venue by dropping a file in app/scrapers/ and registering it in index.ts - Routes: /events (filter by keyword/venue/date), /events/:id, /venues, GET /api/scrape - EventCard shows artist, date/time, venue, ticket URL, and fee - Post-scrape per-venue Markdown files generated to events/ (dev reference) - /add-livehouse Claude Code skill defined in .claude/commands/ Co-Authored-By: Claude Sonnet 4.6 --- app/components/EventCard.tsx | 89 ++++++++++++++++++ app/components/FilterBar.tsx | 85 +++++++++++++++++ app/lib/db.server.ts | 192 ++++++++++++++++++++++++++++++++++++++ app/lib/markdown-writer.server.ts | 80 ++++++++++++++++ app/lib/scraper-runner.server.ts | 77 +++++++++++++++ app/lib/venue-meta.server.ts | 14 +++ app/root.tsx | 5 +- app/routes.ts | 12 ++- app/routes/api.scrape.ts | 37 ++++++++ app/routes/events.$id.tsx | 124 ++++++++++++++++++++++++ app/routes/events._index.tsx | 94 +++++++++++++++++++ app/routes/index.tsx | 5 + app/routes/venues.tsx | 68 ++++++++++++++ app/scrapers/base.ts | 14 +++ app/scrapers/club-quattro.ts | 78 ++++++++++++++++ app/scrapers/index.ts | 20 ++++ app/scrapers/liquid-room.ts | 87 +++++++++++++++++ app/scrapers/shibuya-o.ts | 82 ++++++++++++++++ app/scrapers/shinjuku-loft.ts | 80 ++++++++++++++++ app/scrapers/www-shibuya.ts | 79 ++++++++++++++++ 20 files changed, 1318 insertions(+), 4 deletions(-) create mode 100644 app/components/EventCard.tsx create mode 100644 app/components/FilterBar.tsx create mode 100644 app/lib/db.server.ts create mode 100644 app/lib/markdown-writer.server.ts create mode 100644 app/lib/scraper-runner.server.ts create mode 100644 app/lib/venue-meta.server.ts create mode 100644 app/routes/api.scrape.ts create mode 100644 app/routes/events.$id.tsx create mode 100644 app/routes/events._index.tsx create mode 100644 app/routes/index.tsx create mode 100644 app/routes/venues.tsx create mode 100644 app/scrapers/base.ts create mode 100644 app/scrapers/club-quattro.ts create mode 100644 app/scrapers/index.ts create mode 100644 app/scrapers/liquid-room.ts create mode 100644 app/scrapers/shibuya-o.ts create mode 100644 app/scrapers/shinjuku-loft.ts create mode 100644 app/scrapers/www-shibuya.ts (limited to 'app') diff --git a/app/components/EventCard.tsx b/app/components/EventCard.tsx new file mode 100644 index 0000000..6651ff9 --- /dev/null +++ b/app/components/EventCard.tsx @@ -0,0 +1,89 @@ +import { Link } from "react-router"; +import type { Event } from "~/lib/db.server"; + +interface Props { + event: Event; +} + +export default function EventCard({ event }: Props) { + const formattedDate = formatDate(event.date); + const timeLabel = buildTimeLabel(event.open_time, event.start_time); + + return ( + + {event.image_url ? ( + {event.title} + ) : ( +
+ 🎸 +
+ )} + +
+ {/* Title */} +

+ {event.title} +

+ + {/* Artist — required */} +

+ {event.artist ?? "出演者未定"} +

+ + {/* Date + time */} +
+ 📅 {formattedDate} + {timeLabel && | {timeLabel}} +
+ + {/* Venue */} +
+ 📍 + + {event.venue_name} + {event.venue_area ? `(${event.venue_area})` : ""} + +
+ + {/* Fee */} + {event.price && ( +

¥ {event.price}

+ )} + + {/* Ticket URL */} + {event.ticket_url && ( + e.stopPropagation()} + className="mt-auto inline-flex items-center gap-1 text-xs text-indigo-400 hover:underline" + > + 🎟 チケット + + )} +
+ + ); +} + +function formatDate(iso: string): string { + const [y, m, d] = iso.split("-"); + const days = ["日", "月", "火", "水", "木", "金", "土"]; + const dayIdx = new Date(`${iso}T00:00:00`).getDay(); + return `${y}/${m}/${d}(${days[dayIdx]})`; +} + +function buildTimeLabel(open: string | null, start: string | null): string { + const parts: string[] = []; + if (open) parts.push(`OPEN ${open}`); + if (start) parts.push(`START ${start}`); + return parts.join(" / "); +} diff --git a/app/components/FilterBar.tsx b/app/components/FilterBar.tsx new file mode 100644 index 0000000..97a3c02 --- /dev/null +++ b/app/components/FilterBar.tsx @@ -0,0 +1,85 @@ +import { Form, useSearchParams } from "react-router"; +import type { Venue } from "~/lib/db.server"; + +interface Props { + venues: Venue[]; +} + +export default function FilterBar({ venues }: Props) { + const [searchParams] = useSearchParams(); + + return ( +
+ {/* Keyword */} +
+ + +
+ + {/* Venue */} +
+ + +
+ + {/* Date from */} +
+ + +
+ + {/* Date to */} +
+ + +
+ + + + {hasFilters(searchParams) && ( + + クリア + + )} +
+ ); +} + +function hasFilters(params: URLSearchParams): boolean { + return ["keyword", "venue_id", "date_from", "date_to"].some((k) => params.get(k)); +} diff --git a/app/lib/db.server.ts b/app/lib/db.server.ts new file mode 100644 index 0000000..0c55991 --- /dev/null +++ b/app/lib/db.server.ts @@ -0,0 +1,192 @@ +import Database from "better-sqlite3"; +import path from "path"; +import { fileURLToPath } from "url"; + +const __dirname = path.dirname(fileURLToPath(import.meta.url)); +const DB_PATH = path.join(__dirname, "../../events.db"); + +let _db: Database.Database | null = null; + +function getDb(): Database.Database { + if (!_db) { + _db = new Database(DB_PATH); + _db.pragma("journal_mode = WAL"); + _db.pragma("foreign_keys = ON"); + initSchema(_db); + } + return _db; +} + +function initSchema(db: Database.Database) { + db.exec(` + CREATE TABLE IF NOT EXISTS venues ( + id TEXT PRIMARY KEY, + name TEXT NOT NULL, + url TEXT NOT NULL, + area TEXT + ); + + CREATE TABLE IF NOT EXISTS events ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + venue_id TEXT NOT NULL REFERENCES venues(id), + title TEXT NOT NULL, + artist TEXT, + date TEXT NOT NULL, + start_time TEXT, + open_time TEXT, + ticket_url TEXT, + price TEXT, + image_url TEXT, + description TEXT, + source_url TEXT, + fetched_at TEXT NOT NULL DEFAULT (datetime('now')), + UNIQUE(venue_id, title, date) + ); + + CREATE INDEX IF NOT EXISTS idx_events_date ON events(date); + CREATE INDEX IF NOT EXISTS idx_events_venue_id ON events(venue_id); + `); +} + +export interface Venue { + id: string; + name: string; + url: string; + area: string | null; + event_count?: number; +} + +export interface Event { + id: number; + venue_id: string; + venue_name: string; + venue_area: string | null; + venue_url?: string; + title: string; + artist: string | null; + date: string; + start_time: string | null; + open_time: string | null; + ticket_url: string | null; + price: string | null; + image_url: string | null; + description: string | null; + source_url: string | null; + fetched_at: string; +} + +export interface EventInput { + venue_id: string; + title: string; + artist?: string | null; + date: string; + start_time?: string | null; + open_time?: string | null; + ticket_url?: string | null; + price?: string | null; + image_url?: string | null; + description?: string | null; + source_url?: string | null; +} + +export function upsertVenue( + id: string, + name: string, + url: string, + area?: string +) { + getDb() + .prepare( + "INSERT OR REPLACE INTO venues (id, name, url, area) VALUES (?, ?, ?, ?)" + ) + .run(id, name, url, area ?? null); +} + +export function upsertEvent(event: EventInput) { + getDb() + .prepare( + `INSERT INTO events + (venue_id, title, artist, date, start_time, open_time, + ticket_url, price, image_url, description, source_url, fetched_at) + VALUES + (@venue_id, @title, @artist, @date, @start_time, @open_time, + @ticket_url, @price, @image_url, @description, @source_url, datetime('now')) + ON CONFLICT(venue_id, title, date) DO UPDATE SET + artist = excluded.artist, + start_time = excluded.start_time, + open_time = excluded.open_time, + ticket_url = excluded.ticket_url, + price = excluded.price, + image_url = excluded.image_url, + description = excluded.description, + source_url = excluded.source_url, + fetched_at = excluded.fetched_at` + ) + .run(event); +} + +export interface QueryEventsParams { + date_from?: string; + date_to?: string; + venue_id?: string; + keyword?: string; + limit?: number; + offset?: number; +} + +export function queryEvents(params: QueryEventsParams = {}): Event[] { + const { date_from, date_to, venue_id, keyword, limit = 60, offset = 0 } = + params; + + const clauses: string[] = []; + const args: unknown[] = []; + + if (date_from) { + clauses.push("e.date >= ?"); + args.push(date_from); + } + if (date_to) { + clauses.push("e.date <= ?"); + args.push(date_to); + } + if (venue_id) { + clauses.push("e.venue_id = ?"); + args.push(venue_id); + } + if (keyword) { + clauses.push("(e.title LIKE ? OR e.artist LIKE ?)"); + args.push(`%${keyword}%`, `%${keyword}%`); + } + + const where = clauses.length ? `WHERE ${clauses.join(" AND ")}` : ""; + + return getDb() + .prepare( + `SELECT e.*, v.name AS venue_name, v.area AS venue_area + FROM events e JOIN venues v ON e.venue_id = v.id + ${where} + ORDER BY e.date ASC, e.start_time ASC + LIMIT ? OFFSET ?` + ) + .all(...args, limit, offset) as Event[]; +} + +export function getEvent(id: number): Event | undefined { + return getDb() + .prepare( + `SELECT e.*, v.name AS venue_name, v.url AS venue_url, v.area AS venue_area + FROM events e JOIN venues v ON e.venue_id = v.id + WHERE e.id = ?` + ) + .get(id) as Event | undefined; +} + +export function getVenues(): Venue[] { + return getDb() + .prepare( + `SELECT v.*, COUNT(e.id) AS event_count + FROM venues v LEFT JOIN events e ON v.id = e.venue_id + GROUP BY v.id ORDER BY v.name` + ) + .all() as Venue[]; +} diff --git a/app/lib/markdown-writer.server.ts b/app/lib/markdown-writer.server.ts new file mode 100644 index 0000000..cfef315 --- /dev/null +++ b/app/lib/markdown-writer.server.ts @@ -0,0 +1,80 @@ +/** + * Generates a Markdown summary file per venue after scraping. + * Files are written to events/.md in the project root. + */ +import fs from "fs"; +import path from "path"; +import { fileURLToPath } from "url"; +import { queryEvents } from "./db.server"; +import type { Event } from "./db.server"; + +const ROOT = path.join(path.dirname(fileURLToPath(import.meta.url)), "../../"); +const EVENTS_DIR = path.join(ROOT, "events"); + +export function generateVenueMarkdown(venueId: string): void { + const events = queryEvents({ venue_id: venueId, limit: 200 }); + if (events.length === 0) return; + + fs.mkdirSync(EVENTS_DIR, { recursive: true }); + + const venueName = events[0].venue_name; + const venueArea = events[0].venue_area ?? ""; + const now = new Date().toISOString().slice(0, 10); + + const lines: string[] = [ + `# ${venueName}(${venueArea})イベント情報`, + ``, + `> 最終更新: ${now} `, + `> データソース: スクレイパー自動取得`, + ``, + `| 日付 | 出演者 | タイトル | 時間 | 料金 | URL |`, + `| ---- | ------ | -------- | ---- | ---- | --- |`, + ]; + + for (const ev of events) { + const date = formatDate(ev.date); + const artist = escape(ev.artist ?? "未定"); + const title = escape(ev.title); + const time = buildTime(ev.open_time, ev.start_time); + const fee = escape(ev.price ?? ""); + const url = ev.ticket_url + ? `[チケット](${ev.ticket_url})` + : ev.source_url + ? `[詳細](${ev.source_url})` + : ""; + + lines.push(`| ${date} | ${artist} | ${title} | ${time} | ${fee} | ${url} |`); + } + + lines.push(``); + lines.push(`---`); + lines.push(`*このファイルは自動生成されます。手動編集は次回更新時に上書きされます。*`); + lines.push(``); + + const filePath = path.join(EVENTS_DIR, `${venueId}.md`); + fs.writeFileSync(filePath, lines.join("\n"), "utf-8"); +} + +export function generateAllVenueMarkdown(venueIds: string[]): void { + for (const id of venueIds) { + generateVenueMarkdown(id); + } +} + +function formatDate(iso: string): string { + const [y, m, d] = iso.split("-"); + const days = ["日", "月", "火", "水", "木", "金", "土"]; + const dayIdx = new Date(`${iso}T00:00:00`).getDay(); + return `${y}/${m}/${d}(${days[dayIdx]})`; +} + +function buildTime(open: string | null, start: string | null): string { + const parts: string[] = []; + if (open) parts.push(`OPEN ${open}`); + if (start) parts.push(`START ${start}`); + return parts.join(" / ") || ""; +} + +function escape(s: string): string { + return s.replace(/\|/g, "\\|").replace(/\n/g, " "); +} diff --git a/app/lib/scraper-runner.server.ts b/app/lib/scraper-runner.server.ts new file mode 100644 index 0000000..070a568 --- /dev/null +++ b/app/lib/scraper-runner.server.ts @@ -0,0 +1,77 @@ +import { upsertVenue, upsertEvent } from "./db.server"; +import { generateVenueMarkdown, generateAllVenueMarkdown } from "./markdown-writer.server"; +import { ALL_SCRAPERS } from "~/scrapers/index"; + +export interface ScrapeResult { + venue_id: string; + venue_name: string; + events_saved: number; + markdown_path?: string; + error?: string; +} + +export async function runAllScrapers(): Promise { + const results: ScrapeResult[] = []; + const successIds: string[] = []; + + for (const scraper of ALL_SCRAPERS) { + const { venue } = scraper; + upsertVenue(venue.id, venue.name, venue.url, venue.area); + + try { + const events = await scraper.scrape(); + for (const event of events) { + upsertEvent(event); + } + successIds.push(venue.id); + results.push({ + venue_id: venue.id, + venue_name: venue.name, + events_saved: events.length, + }); + } catch (err) { + results.push({ + venue_id: venue.id, + venue_name: venue.name, + events_saved: 0, + error: err instanceof Error ? err.message : String(err), + }); + } + } + + // Generate Markdown files for all venues that scraped successfully + generateAllVenueMarkdown(successIds); + + return results; +} + +export async function runScraper(venueId: string): Promise { + const scraper = ALL_SCRAPERS.find((s) => s.venue.id === venueId); + if (!scraper) { + return { venue_id: venueId, venue_name: venueId, events_saved: 0, error: "Scraper not found" }; + } + + const { venue } = scraper; + upsertVenue(venue.id, venue.name, venue.url, venue.area); + + try { + const events = await scraper.scrape(); + for (const event of events) { + upsertEvent(event); + } + generateVenueMarkdown(venue.id); + return { + venue_id: venue.id, + venue_name: venue.name, + events_saved: events.length, + markdown_path: `events/${venue.id}.md`, + }; + } catch (err) { + return { + venue_id: venue.id, + venue_name: venue.name, + events_saved: 0, + error: err instanceof Error ? err.message : String(err), + }; + } +} diff --git a/app/lib/venue-meta.server.ts b/app/lib/venue-meta.server.ts new file mode 100644 index 0000000..58743c8 --- /dev/null +++ b/app/lib/venue-meta.server.ts @@ -0,0 +1,14 @@ +/** + * Server-only module: exposes venue metadata from all registered scrapers. + * Importing this in a route loader ensures scraper code never reaches the client bundle. + */ +import { ALL_SCRAPERS } from "~/scrapers/index"; +import type { VenueMeta } from "~/scrapers/base"; + +export function getScraperVenues(): VenueMeta[] { + return ALL_SCRAPERS.map((s) => s.venue); +} + +export function getScraperIds(): string[] { + return ALL_SCRAPERS.map((s) => s.venue.id); +} diff --git a/app/root.tsx b/app/root.tsx index 9fc6636..dd55df0 100644 --- a/app/root.tsx +++ b/app/root.tsx @@ -25,14 +25,15 @@ export const links: Route.LinksFunction = () => [ export function Layout({ children }: { children: React.ReactNode }) { return ( - + + 東京ライブハウス - + {children} diff --git a/app/routes.ts b/app/routes.ts index 102b402..028da16 100644 --- a/app/routes.ts +++ b/app/routes.ts @@ -1,3 +1,11 @@ -import { type RouteConfig, index } from "@react-router/dev/routes"; +import { type RouteConfig, index, route, prefix } from "@react-router/dev/routes"; -export default [index("routes/home.tsx")] satisfies RouteConfig; +export default [ + index("routes/index.tsx"), + ...prefix("events", [ + index("routes/events._index.tsx"), + route(":id", "routes/events.$id.tsx"), + ]), + route("venues", "routes/venues.tsx"), + route("api/scrape", "routes/api.scrape.ts"), +] satisfies RouteConfig; diff --git a/app/routes/api.scrape.ts b/app/routes/api.scrape.ts new file mode 100644 index 0000000..4071985 --- /dev/null +++ b/app/routes/api.scrape.ts @@ -0,0 +1,37 @@ +/** + * Resource route: POST /api/scrape + * Triggers scraping for all venues (or a specific one via ?venue_id=xxx). + * Returns JSON results and redirects back if called from a form. + */ +import { redirect } from "react-router"; +import type { Route } from "./+types/api.scrape"; +import { runAllScrapers, runScraper } from "~/lib/scraper-runner.server"; + +export async function action({ request }: Route.ActionArgs) { + const formData = await request.formData(); + const venueId = formData.get("venue_id"); + + const results = venueId + ? [await runScraper(String(venueId))] + : await runAllScrapers(); + + // If called from a browser form, redirect back + const referer = request.headers.get("Referer"); + if (referer) { + return redirect(referer); + } + + return Response.json({ results }); +} + +// Allow GET for quick testing in the browser +export async function loader({ request }: Route.LoaderArgs) { + const url = new URL(request.url); + const venueId = url.searchParams.get("venue_id"); + + const results = venueId + ? [await runScraper(venueId)] + : await runAllScrapers(); + + return Response.json({ results }); +} diff --git a/app/routes/events.$id.tsx b/app/routes/events.$id.tsx new file mode 100644 index 0000000..cecb282 --- /dev/null +++ b/app/routes/events.$id.tsx @@ -0,0 +1,124 @@ +import { useLoaderData, Link } from "react-router"; +import type { Route } from "./+types/events.$id"; +import { getEvent } from "~/lib/db.server"; + +export async function loader({ params }: Route.LoaderArgs) { + const id = parseInt(params.id, 10); + if (isNaN(id)) throw new Response("Not Found", { status: 404 }); + const event = getEvent(id); + if (!event) throw new Response("Not Found", { status: 404 }); + return { event }; +} + +export default function EventDetail() { + const { event } = useLoaderData(); + + return ( +
+
+ + 🎸 東京ライブハウス + + +
+ +
+ + ← イベント一覧に戻る + + +
+ {event.image_url && ( + {event.title} + )} + +
+
+

{event.title}

+ {event.artist && ( +

{event.artist}

+ )} +
+ + {event.venue_name} + +
+ +
+ + {event.open_time && } + {event.start_time && } + {event.price && } + {event.venue_area && } +
+ + {event.description && ( +

+ {event.description} +

+ )} + +
+ {event.ticket_url && ( + + チケット購入 + + )} + {event.source_url && ( + + 詳細ページ + + )} + {event.venue_url && ( + + 会場サイト + + )} +
+ +

+ 最終取得: {event.fetched_at} +

+
+
+
+ ); +} + +function Detail({ label, value }: { label: string; value: string }) { + return ( +
+
{label}
+
{value}
+
+ ); +} + +function formatDate(iso: string): string { + const [y, m, d] = iso.split("-"); + const days = ["日", "月", "火", "水", "木", "金", "土"]; + const day = days[new Date(iso).getDay()]; + return `${y}年${m}月${d}日(${day})`; +} diff --git a/app/routes/events._index.tsx b/app/routes/events._index.tsx new file mode 100644 index 0000000..3883d37 --- /dev/null +++ b/app/routes/events._index.tsx @@ -0,0 +1,94 @@ +import { useLoaderData, useSearchParams, Form, Link } from "react-router"; +import type { Route } from "./+types/events._index"; +import { queryEvents, getVenues } from "~/lib/db.server"; +import EventCard from "~/components/EventCard"; +import FilterBar from "~/components/FilterBar"; + +export async function loader({ request }: Route.LoaderArgs) { + const url = new URL(request.url); + const date_from = url.searchParams.get("date_from") ?? undefined; + const date_to = url.searchParams.get("date_to") ?? undefined; + const venue_id = url.searchParams.get("venue_id") ?? undefined; + const keyword = url.searchParams.get("keyword") ?? undefined; + const page = Math.max(1, parseInt(url.searchParams.get("page") ?? "1", 10)); + const limit = 30; + const offset = (page - 1) * limit; + + const events = queryEvents({ date_from, date_to, venue_id, keyword, limit, offset }); + const venues = getVenues(); + + return { events, venues, page, hasMore: events.length === limit }; +} + +export default function EventsIndex() { + const { events, venues, page, hasMore } = useLoaderData(); + const [searchParams] = useSearchParams(); + + return ( +
+
+ + 🎸 東京ライブハウス + + +
+ +
+
+

イベント一覧

+
+ +
+
+ + + + {events.length === 0 ? ( +
+

イベントが見つかりません

+

「情報を更新」ボタンでデータを取得してください。

+
+ ) : ( +
+ {events.map((event) => ( + + ))} +
+ )} + +
+ {page > 1 && ( + + ← 前のページ + + )} + {hasMore && ( + + 次のページ → + + )} +
+
+
+ ); +} + +function buildPageParams(params: URLSearchParams, page: number): string { + const next = new URLSearchParams(params); + next.set("page", String(page)); + return next.toString(); +} diff --git a/app/routes/index.tsx b/app/routes/index.tsx new file mode 100644 index 0000000..1cdb9a4 --- /dev/null +++ b/app/routes/index.tsx @@ -0,0 +1,5 @@ +import { redirect } from "react-router"; + +export function loader() { + return redirect("/events"); +} diff --git a/app/routes/venues.tsx b/app/routes/venues.tsx new file mode 100644 index 0000000..23b052f --- /dev/null +++ b/app/routes/venues.tsx @@ -0,0 +1,68 @@ +import { useLoaderData, Link } from "react-router"; +import type { Route } from "./+types/venues"; +import { getVenues } from "~/lib/db.server"; +import { getScraperIds } from "~/lib/venue-meta.server"; + +export async function loader(_: Route.LoaderArgs) { + const venues = getVenues(); + const scraperIds = getScraperIds(); + return { venues, scraperIds }; +} + +export default function Venues() { + const { venues, scraperIds: scraperIdList } = useLoaderData(); + const scraperIds = new Set(scraperIdList); + + return ( +
+
+ + 🎸 東京ライブハウス + + +
+ +
+
+

会場一覧

+

+ 現在 {scraperIdList.length} 会場のスクレイパーが登録されています。 + 新しい会場を追加するには app/scrapers/ に + モジュールを追加して index.ts に登録してください。 +

+
+ + {venues.length === 0 ? ( +

まだ会場データがありません。「情報を更新」してください。

+ ) : ( +
+ {venues.map((v) => ( + +
+

{v.name}

+ {v.area &&

{v.area}

} + {scraperIds.has(v.id) && ( + + スクレイパー登録済 + + )} +
+ + {v.event_count ?? 0} + + + + ))} +
+ )} +
+
+ ); +} diff --git a/app/scrapers/base.ts b/app/scrapers/base.ts new file mode 100644 index 0000000..512fcbb --- /dev/null +++ b/app/scrapers/base.ts @@ -0,0 +1,14 @@ +import type { EventInput } from "~/lib/db.server"; + +export interface VenueMeta { + id: string; + name: string; + url: string; + area: string; +} + +export interface Scraper { + venue: VenueMeta; + /** Fetch events from the venue's website. Returns event inputs ready for upsert. */ + scrape(): Promise; +} diff --git a/app/scrapers/club-quattro.ts b/app/scrapers/club-quattro.ts new file mode 100644 index 0000000..ae903bc --- /dev/null +++ b/app/scrapers/club-quattro.ts @@ -0,0 +1,78 @@ +/** + * Club Quattro 渋谷 — https://www.club-quattro.com/shibuya/schedule/ + */ +import * as cheerio from "cheerio"; +import type { Scraper, VenueMeta } from "./base"; +import type { EventInput } from "~/lib/db.server"; + +export const venue: VenueMeta = { + id: "club-quattro", + name: "CLUB QUATTRO", + url: "https://www.club-quattro.com", + area: "渋谷", +}; + +export const scraper: Scraper = { + venue, + async scrape(): Promise { + const res = await fetch("https://www.club-quattro.com/shibuya/schedule/"); + if (!res.ok) throw new Error(`HTTP ${res.status}`); + const html = await res.text(); + const $ = cheerio.load(html); + const events: EventInput[] = []; + + $(".schedule-list__item, .c-event, li.event").each((_, el) => { + const $el = $(el); + + const title = $el.find(".schedule-list__title, .event-name, h3, h2").first().text().trim(); + if (!title) return; + + const rawDate = + $el.find(".schedule-list__date, .event-date, time").first().text().trim() || + $el.find("time").attr("datetime") || + ""; + const date = parseJapaneseDate(rawDate); + if (!date) return; + + const timeText = $el.find(".schedule-list__time, .time-info").first().text(); + const openMatch = timeText.match(/OPEN[:: ]*(\d{2}:\d{2})/i); + const startMatch = timeText.match(/START[:: ]*(\d{2}:\d{2})/i); + + const detailHref = $el.find("a[href]").first().attr("href") ?? null; + + events.push({ + venue_id: venue.id, + title, + artist: $el.find(".schedule-list__artist, .artist-name").first().text().trim() || null, + date, + open_time: openMatch?.[1] ?? null, + start_time: startMatch?.[1] ?? null, + ticket_url: + $el.find("a[href*='eplus'], a[href*='pia'], a[href*='ticket']").first().attr("href") ?? null, + image_url: $el.find("img").first().attr("src") + ? absoluteUrl($el.find("img").first().attr("src")!, venue.url) + : null, + source_url: detailHref ? absoluteUrl(detailHref, venue.url) : null, + }); + }); + + return events; + }, +}; + +function parseJapaneseDate(raw: string): string | null { + const m = + raw.match(/(\d{4})[./年](\d{1,2})[./月](\d{1,2})/) || + raw.match(/(\d{1,2})[./月](\d{1,2})/); + if (!m) return null; + if (m.length === 4) { + return `${m[1]}-${m[2].padStart(2, "0")}-${m[3].padStart(2, "0")}`; + } + const year = new Date().getFullYear(); + return `${year}-${m[1].padStart(2, "0")}-${m[2].padStart(2, "0")}`; +} + +function absoluteUrl(url: string, base: string): string { + if (url.startsWith("http")) return url; + return url.startsWith("/") ? base + url : `${base}/${url}`; +} diff --git a/app/scrapers/index.ts b/app/scrapers/index.ts new file mode 100644 index 0000000..97d2586 --- /dev/null +++ b/app/scrapers/index.ts @@ -0,0 +1,20 @@ +/** + * Registry of all venue scrapers. + * To add a new venue: create a new file implementing Scraper, then add it here. + */ +import type { Scraper } from "./base"; +import { scraper as liquidRoom } from "./liquid-room"; +import { scraper as wwwShibuya } from "./www-shibuya"; +import { scraper as shibuyaO } from "./shibuya-o"; +import { scraper as shinjukuLoft } from "./shinjuku-loft"; +import { scraper as clubQuattro } from "./club-quattro"; + +export const ALL_SCRAPERS: Scraper[] = [ + liquidRoom, + wwwShibuya, + shibuyaO, + shinjukuLoft, + clubQuattro, +]; + +export type { Scraper } from "./base"; diff --git a/app/scrapers/liquid-room.ts b/app/scrapers/liquid-room.ts new file mode 100644 index 0000000..b497759 --- /dev/null +++ b/app/scrapers/liquid-room.ts @@ -0,0 +1,87 @@ +/** + * Liquid Room (恵比寿) — https://www.liquidroom.net/schedule + * + * The schedule page lists events with JSON-LD or HTML data. + * Structure:
contains date, title, etc. + */ +import * as cheerio from "cheerio"; +import type { Scraper, VenueMeta } from "./base"; +import type { EventInput } from "~/lib/db.server"; + +export const venue: VenueMeta = { + id: "liquid-room", + name: "LIQUID ROOM", + url: "https://www.liquidroom.net", + area: "恵比寿", +}; + +export const scraper: Scraper = { + venue, + async scrape(): Promise { + const res = await fetch("https://www.liquidroom.net/schedule"); + if (!res.ok) throw new Error(`HTTP ${res.status}`); + const html = await res.text(); + const $ = cheerio.load(html); + const events: EventInput[] = []; + + $("article.p-schedule__item, .schedule-list__item, .c-event-item").each( + (_, el) => { + const $el = $(el); + + const title = + $el.find(".p-schedule__title, .event-title, h3, h2").first().text().trim(); + if (!title) return; + + const dateStr = + $el.find(".p-schedule__date, .event-date, time").first().text().trim() || + $el.find("time").attr("datetime") || + ""; + const date = parseJapaneseDate(dateStr); + if (!date) return; + + const artist = + $el.find(".p-schedule__artist, .artist").first().text().trim() || null; + const startTime = + $el.find(".p-schedule__time, .open-time").first().text().trim().match(/\d{2}:\d{2}/)?.[0] ?? null; + const ticketUrl = + $el.find("a[href*='ticket'], a[href*='eplus'], a[href*='pia']").first().attr("href") ?? null; + const imageUrl = + $el.find("img").first().attr("src") ?? null; + const sourceUrl = + $el.find("a").first().attr("href") ?? null; + + events.push({ + venue_id: venue.id, + title, + artist, + date, + start_time: startTime, + ticket_url: ticketUrl, + image_url: imageUrl ? absoluteUrl(imageUrl, venue.url) : null, + source_url: sourceUrl ? absoluteUrl(sourceUrl, venue.url) : null, + }); + } + ); + + return events; + }, +}; + +function parseJapaneseDate(raw: string): string | null { + // Handles "2025.06.15" "2025/06/15" "2025年06月15日" "06.15" formats + const m = + raw.match(/(\d{4})[./年](\d{1,2})[./月](\d{1,2})/) || + raw.match(/(\d{1,2})[./月](\d{1,2})/); + if (!m) return null; + if (m.length === 4) { + return `${m[1]}-${m[2].padStart(2, "0")}-${m[3].padStart(2, "0")}`; + } + const year = new Date().getFullYear(); + return `${year}-${m[1].padStart(2, "0")}-${m[2].padStart(2, "0")}`; +} + +function absoluteUrl(url: string, base: string): string { + if (url.startsWith("http")) return url; + if (url.startsWith("/")) return base + url; + return base + "/" + url; +} diff --git a/app/scrapers/shibuya-o.ts b/app/scrapers/shibuya-o.ts new file mode 100644 index 0000000..1ad8d8c --- /dev/null +++ b/app/scrapers/shibuya-o.ts @@ -0,0 +1,82 @@ +/** + * Shibuya O-East / O-West / O-Crest / O-Nest (渋谷) + * https://www.shibuya-o.com/schedule/ + * + * The page uses a unified schedule listing for all O venues. + */ +import * as cheerio from "cheerio"; +import type { Scraper, VenueMeta } from "./base"; +import type { EventInput } from "~/lib/db.server"; + +export const venue: VenueMeta = { + id: "shibuya-o", + name: "渋谷 O-EAST / O-WEST", + url: "https://www.shibuya-o.com", + area: "渋谷", +}; + +export const scraper: Scraper = { + venue, + async scrape(): Promise { + const res = await fetch("https://www.shibuya-o.com/schedule/"); + if (!res.ok) throw new Error(`HTTP ${res.status}`); + const html = await res.text(); + const $ = cheerio.load(html); + const events: EventInput[] = []; + + $(".schedule_list li, .c-schedule__item, .event-item").each((_, el) => { + const $el = $(el); + + const title = $el.find(".schedule_title, .title, h3").first().text().trim(); + if (!title) return; + + const rawDate = + $el.find(".schedule_date, .date, time").first().text().trim() || + $el.find("time").attr("datetime") || + ""; + const date = parseJapaneseDate(rawDate); + if (!date) return; + + const hall = $el.find(".schedule_hall, .hall, .venue-name").first().text().trim() || null; + const timeText = $el.find(".schedule_time, .time").first().text(); + const openMatch = timeText.match(/OPEN[:: ]*(\d{2}:\d{2})/i); + const startMatch = timeText.match(/START[:: ]*(\d{2}:\d{2})/i); + + const detailHref = $el.find("a[href]").first().attr("href") ?? null; + + events.push({ + venue_id: venue.id, + title, + artist: hall, + date, + open_time: openMatch?.[1] ?? null, + start_time: startMatch?.[1] ?? null, + ticket_url: + $el.find("a[href*='eplus'], a[href*='lawson'], a[href*='ticket']").first().attr("href") ?? null, + image_url: $el.find("img").first().attr("src") + ? absoluteUrl($el.find("img").first().attr("src")!, venue.url) + : null, + source_url: detailHref ? absoluteUrl(detailHref, venue.url) : null, + }); + }); + + return events; + }, +}; + +function parseJapaneseDate(raw: string): string | null { + const m = + raw.match(/(\d{4})[./年](\d{1,2})[./月](\d{1,2})/) || + raw.match(/(\d{1,2})[./月](\d{1,2})/); + if (!m) return null; + if (m.length === 4) { + return `${m[1]}-${m[2].padStart(2, "0")}-${m[3].padStart(2, "0")}`; + } + const year = new Date().getFullYear(); + return `${year}-${m[1].padStart(2, "0")}-${m[2].padStart(2, "0")}`; +} + +function absoluteUrl(url: string, base: string): string { + if (url.startsWith("http")) return url; + return url.startsWith("/") ? base + url : `${base}/${url}`; +} diff --git a/app/scrapers/shinjuku-loft.ts b/app/scrapers/shinjuku-loft.ts new file mode 100644 index 0000000..8a64761 --- /dev/null +++ b/app/scrapers/shinjuku-loft.ts @@ -0,0 +1,80 @@ +/** + * 新宿 LOFT — https://www.loft-prj.co.jp/schedule/loft + * + * The schedule page renders events inside `.eventlist` items. + */ +import * as cheerio from "cheerio"; +import type { Scraper, VenueMeta } from "./base"; +import type { EventInput } from "~/lib/db.server"; + +export const venue: VenueMeta = { + id: "shinjuku-loft", + name: "新宿 LOFT", + url: "https://www.loft-prj.co.jp", + area: "新宿", +}; + +export const scraper: Scraper = { + venue, + async scrape(): Promise { + const res = await fetch("https://www.loft-prj.co.jp/schedule/loft"); + if (!res.ok) throw new Error(`HTTP ${res.status}`); + const html = await res.text(); + const $ = cheerio.load(html); + const events: EventInput[] = []; + + $(".eventlist__item, .schedule-item, .event_list li").each((_, el) => { + const $el = $(el); + + const title = $el.find(".eventlist__title, .event-title, h3, h2").first().text().trim(); + if (!title) return; + + const rawDate = + $el.find(".eventlist__date, .event-date, time").first().text().trim() || + $el.find("time").attr("datetime") || + ""; + const date = parseJapaneseDate(rawDate); + if (!date) return; + + const timeText = $el.find(".eventlist__time, .time").first().text(); + const openMatch = timeText.match(/OPEN[:: ]*(\d{2}:\d{2})/i); + const startMatch = timeText.match(/START[:: ]*(\d{2}:\d{2})/i); + + const detailHref = $el.find("a[href]").first().attr("href") ?? null; + + events.push({ + venue_id: venue.id, + title, + artist: $el.find(".eventlist__artist, .artist").first().text().trim() || null, + date, + open_time: openMatch?.[1] ?? null, + start_time: startMatch?.[1] ?? null, + ticket_url: + $el.find("a[href*='eplus'], a[href*='pia'], a[href*='ticket']").first().attr("href") ?? null, + image_url: $el.find("img").first().attr("src") + ? absoluteUrl($el.find("img").first().attr("src")!, venue.url) + : null, + source_url: detailHref ? absoluteUrl(detailHref, venue.url) : null, + }); + }); + + return events; + }, +}; + +function parseJapaneseDate(raw: string): string | null { + const m = + raw.match(/(\d{4})[./年](\d{1,2})[./月](\d{1,2})/) || + raw.match(/(\d{1,2})[./月](\d{1,2})/); + if (!m) return null; + if (m.length === 4) { + return `${m[1]}-${m[2].padStart(2, "0")}-${m[3].padStart(2, "0")}`; + } + const year = new Date().getFullYear(); + return `${year}-${m[1].padStart(2, "0")}-${m[2].padStart(2, "0")}`; +} + +function absoluteUrl(url: string, base: string): string { + if (url.startsWith("http")) return url; + return url.startsWith("/") ? base + url : `${base}/${url}`; +} diff --git a/app/scrapers/www-shibuya.ts b/app/scrapers/www-shibuya.ts new file mode 100644 index 0000000..905fc61 --- /dev/null +++ b/app/scrapers/www-shibuya.ts @@ -0,0 +1,79 @@ +/** + * WWW / WWW X (渋谷) — https://www-shibuya.jp/schedule/ + */ +import * as cheerio from "cheerio"; +import type { Scraper, VenueMeta } from "./base"; +import type { EventInput } from "~/lib/db.server"; + +export const venue: VenueMeta = { + id: "www-shibuya", + name: "WWW / WWW X", + url: "https://www-shibuya.jp", + area: "渋谷", +}; + +export const scraper: Scraper = { + venue, + async scrape(): Promise { + const res = await fetch("https://www-shibuya.jp/schedule/"); + if (!res.ok) throw new Error(`HTTP ${res.status}`); + const html = await res.text(); + const $ = cheerio.load(html); + const events: EventInput[] = []; + + $(".schedule-list li, .p-schedule-item, article").each((_, el) => { + const $el = $(el); + + const title = $el.find(".schedule-title, .title, h3, h2").first().text().trim(); + if (!title) return; + + const rawDate = + $el.find(".schedule-date, .date, time").first().text().trim() || + $el.find("time").attr("datetime") || + ""; + const date = parseJapaneseDate(rawDate); + if (!date) return; + + const timeText = $el.find(".schedule-time, .time").first().text(); + const openMatch = timeText.match(/OPEN\s*(\d{2}:\d{2})/i); + const startMatch = timeText.match(/START\s*(\d{2}:\d{2})/i); + + const detailHref = $el.find("a").first().attr("href") ?? null; + + events.push({ + venue_id: venue.id, + title, + artist: $el.find(".artist").first().text().trim() || null, + date, + open_time: openMatch?.[1] ?? null, + start_time: startMatch?.[1] ?? null, + ticket_url: + $el.find("a[href*='eplus'], a[href*='pia'], a[href*='ticket']").first().attr("href") ?? null, + image_url: + $el.find("img").first().attr("src") + ? absoluteUrl($el.find("img").first().attr("src")!, venue.url) + : null, + source_url: detailHref ? absoluteUrl(detailHref, venue.url) : null, + }); + }); + + return events; + }, +}; + +function parseJapaneseDate(raw: string): string | null { + const m = + raw.match(/(\d{4})[./年](\d{1,2})[./月](\d{1,2})/) || + raw.match(/(\d{1,2})[./月](\d{1,2})/); + if (!m) return null; + if (m.length === 4) { + return `${m[1]}-${m[2].padStart(2, "0")}-${m[3].padStart(2, "0")}`; + } + const year = new Date().getFullYear(); + return `${year}-${m[1].padStart(2, "0")}-${m[2].padStart(2, "0")}`; +} + +function absoluteUrl(url: string, base: string): string { + if (url.startsWith("http")) return url; + return url.startsWith("/") ? base + url : `${base}/${url}`; +} -- cgit v1.2.3