summaryrefslogtreecommitdiff
path: root/app/scrapers
diff options
context:
space:
mode:
authoryyamashita <yyamashita@mosquit.one>2026-05-06 22:07:53 +0900
committeryyamashita <yyamashita@mosquit.one>2026-05-06 22:07:53 +0900
commitbe55729482296663da8c96723bfd22080e6762c1 (patch)
treefcd94b1dc5c55f3a80796c90a555863d13fc9a95 /app/scrapers
parent014b29bc22b1c207a03dd560051ecdd5df63f0b1 (diff)
Add Tokyo livehouse event aggregator service
Full-stack React Router v7 app that scrapes event listings from major Tokyo live venues (Liquid Room, WWW/WWW X, Shibuya O-EAST, Shinjuku LOFT, Club Quattro) and stores them in SQLite for browsing and search. - Modular scraper architecture: add a new venue by dropping a file in app/scrapers/ and registering it in index.ts - Routes: /events (filter by keyword/venue/date), /events/:id, /venues, GET /api/scrape - EventCard shows artist, date/time, venue, ticket URL, and fee - Post-scrape per-venue Markdown files generated to events/ (dev reference) - /add-livehouse Claude Code skill defined in .claude/commands/ Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
Diffstat (limited to 'app/scrapers')
-rw-r--r--app/scrapers/base.ts14
-rw-r--r--app/scrapers/club-quattro.ts78
-rw-r--r--app/scrapers/index.ts20
-rw-r--r--app/scrapers/liquid-room.ts87
-rw-r--r--app/scrapers/shibuya-o.ts82
-rw-r--r--app/scrapers/shinjuku-loft.ts80
-rw-r--r--app/scrapers/www-shibuya.ts79
7 files changed, 440 insertions, 0 deletions
diff --git a/app/scrapers/base.ts b/app/scrapers/base.ts
new file mode 100644
index 0000000..512fcbb
--- /dev/null
+++ b/app/scrapers/base.ts
@@ -0,0 +1,14 @@
+import type { EventInput } from "~/lib/db.server";
+
+export interface VenueMeta {
+ id: string;
+ name: string;
+ url: string;
+ area: string;
+}
+
+export interface Scraper {
+ venue: VenueMeta;
+ /** Fetch events from the venue's website. Returns event inputs ready for upsert. */
+ scrape(): Promise<EventInput[]>;
+}
diff --git a/app/scrapers/club-quattro.ts b/app/scrapers/club-quattro.ts
new file mode 100644
index 0000000..ae903bc
--- /dev/null
+++ b/app/scrapers/club-quattro.ts
@@ -0,0 +1,78 @@
+/**
+ * Club Quattro 渋谷 — https://www.club-quattro.com/shibuya/schedule/
+ */
+import * as cheerio from "cheerio";
+import type { Scraper, VenueMeta } from "./base";
+import type { EventInput } from "~/lib/db.server";
+
+export const venue: VenueMeta = {
+ id: "club-quattro",
+ name: "CLUB QUATTRO",
+ url: "https://www.club-quattro.com",
+ area: "渋谷",
+};
+
+export const scraper: Scraper = {
+ venue,
+ async scrape(): Promise<EventInput[]> {
+ const res = await fetch("https://www.club-quattro.com/shibuya/schedule/");
+ if (!res.ok) throw new Error(`HTTP ${res.status}`);
+ const html = await res.text();
+ const $ = cheerio.load(html);
+ const events: EventInput[] = [];
+
+ $(".schedule-list__item, .c-event, li.event").each((_, el) => {
+ const $el = $(el);
+
+ const title = $el.find(".schedule-list__title, .event-name, h3, h2").first().text().trim();
+ if (!title) return;
+
+ const rawDate =
+ $el.find(".schedule-list__date, .event-date, time").first().text().trim() ||
+ $el.find("time").attr("datetime") ||
+ "";
+ const date = parseJapaneseDate(rawDate);
+ if (!date) return;
+
+ const timeText = $el.find(".schedule-list__time, .time-info").first().text();
+ const openMatch = timeText.match(/OPEN[:: ]*(\d{2}:\d{2})/i);
+ const startMatch = timeText.match(/START[:: ]*(\d{2}:\d{2})/i);
+
+ const detailHref = $el.find("a[href]").first().attr("href") ?? null;
+
+ events.push({
+ venue_id: venue.id,
+ title,
+ artist: $el.find(".schedule-list__artist, .artist-name").first().text().trim() || null,
+ date,
+ open_time: openMatch?.[1] ?? null,
+ start_time: startMatch?.[1] ?? null,
+ ticket_url:
+ $el.find("a[href*='eplus'], a[href*='pia'], a[href*='ticket']").first().attr("href") ?? null,
+ image_url: $el.find("img").first().attr("src")
+ ? absoluteUrl($el.find("img").first().attr("src")!, venue.url)
+ : null,
+ source_url: detailHref ? absoluteUrl(detailHref, venue.url) : null,
+ });
+ });
+
+ return events;
+ },
+};
+
+function parseJapaneseDate(raw: string): string | null {
+ const m =
+ raw.match(/(\d{4})[./年](\d{1,2})[./月](\d{1,2})/) ||
+ raw.match(/(\d{1,2})[./月](\d{1,2})/);
+ if (!m) return null;
+ if (m.length === 4) {
+ return `${m[1]}-${m[2].padStart(2, "0")}-${m[3].padStart(2, "0")}`;
+ }
+ const year = new Date().getFullYear();
+ return `${year}-${m[1].padStart(2, "0")}-${m[2].padStart(2, "0")}`;
+}
+
+function absoluteUrl(url: string, base: string): string {
+ if (url.startsWith("http")) return url;
+ return url.startsWith("/") ? base + url : `${base}/${url}`;
+}
diff --git a/app/scrapers/index.ts b/app/scrapers/index.ts
new file mode 100644
index 0000000..97d2586
--- /dev/null
+++ b/app/scrapers/index.ts
@@ -0,0 +1,20 @@
+/**
+ * Registry of all venue scrapers.
+ * To add a new venue: create a new file implementing Scraper, then add it here.
+ */
+import type { Scraper } from "./base";
+import { scraper as liquidRoom } from "./liquid-room";
+import { scraper as wwwShibuya } from "./www-shibuya";
+import { scraper as shibuyaO } from "./shibuya-o";
+import { scraper as shinjukuLoft } from "./shinjuku-loft";
+import { scraper as clubQuattro } from "./club-quattro";
+
+export const ALL_SCRAPERS: Scraper[] = [
+ liquidRoom,
+ wwwShibuya,
+ shibuyaO,
+ shinjukuLoft,
+ clubQuattro,
+];
+
+export type { Scraper } from "./base";
diff --git a/app/scrapers/liquid-room.ts b/app/scrapers/liquid-room.ts
new file mode 100644
index 0000000..b497759
--- /dev/null
+++ b/app/scrapers/liquid-room.ts
@@ -0,0 +1,87 @@
+/**
+ * Liquid Room (恵比寿) — https://www.liquidroom.net/schedule
+ *
+ * The schedule page lists events with JSON-LD or HTML data.
+ * Structure: <div class="p-schedule__item"> contains date, title, etc.
+ */
+import * as cheerio from "cheerio";
+import type { Scraper, VenueMeta } from "./base";
+import type { EventInput } from "~/lib/db.server";
+
+export const venue: VenueMeta = {
+ id: "liquid-room",
+ name: "LIQUID ROOM",
+ url: "https://www.liquidroom.net",
+ area: "恵比寿",
+};
+
+export const scraper: Scraper = {
+ venue,
+ async scrape(): Promise<EventInput[]> {
+ const res = await fetch("https://www.liquidroom.net/schedule");
+ if (!res.ok) throw new Error(`HTTP ${res.status}`);
+ const html = await res.text();
+ const $ = cheerio.load(html);
+ const events: EventInput[] = [];
+
+ $("article.p-schedule__item, .schedule-list__item, .c-event-item").each(
+ (_, el) => {
+ const $el = $(el);
+
+ const title =
+ $el.find(".p-schedule__title, .event-title, h3, h2").first().text().trim();
+ if (!title) return;
+
+ const dateStr =
+ $el.find(".p-schedule__date, .event-date, time").first().text().trim() ||
+ $el.find("time").attr("datetime") ||
+ "";
+ const date = parseJapaneseDate(dateStr);
+ if (!date) return;
+
+ const artist =
+ $el.find(".p-schedule__artist, .artist").first().text().trim() || null;
+ const startTime =
+ $el.find(".p-schedule__time, .open-time").first().text().trim().match(/\d{2}:\d{2}/)?.[0] ?? null;
+ const ticketUrl =
+ $el.find("a[href*='ticket'], a[href*='eplus'], a[href*='pia']").first().attr("href") ?? null;
+ const imageUrl =
+ $el.find("img").first().attr("src") ?? null;
+ const sourceUrl =
+ $el.find("a").first().attr("href") ?? null;
+
+ events.push({
+ venue_id: venue.id,
+ title,
+ artist,
+ date,
+ start_time: startTime,
+ ticket_url: ticketUrl,
+ image_url: imageUrl ? absoluteUrl(imageUrl, venue.url) : null,
+ source_url: sourceUrl ? absoluteUrl(sourceUrl, venue.url) : null,
+ });
+ }
+ );
+
+ return events;
+ },
+};
+
+function parseJapaneseDate(raw: string): string | null {
+ // Handles "2025.06.15" "2025/06/15" "2025年06月15日" "06.15" formats
+ const m =
+ raw.match(/(\d{4})[./年](\d{1,2})[./月](\d{1,2})/) ||
+ raw.match(/(\d{1,2})[./月](\d{1,2})/);
+ if (!m) return null;
+ if (m.length === 4) {
+ return `${m[1]}-${m[2].padStart(2, "0")}-${m[3].padStart(2, "0")}`;
+ }
+ const year = new Date().getFullYear();
+ return `${year}-${m[1].padStart(2, "0")}-${m[2].padStart(2, "0")}`;
+}
+
+function absoluteUrl(url: string, base: string): string {
+ if (url.startsWith("http")) return url;
+ if (url.startsWith("/")) return base + url;
+ return base + "/" + url;
+}
diff --git a/app/scrapers/shibuya-o.ts b/app/scrapers/shibuya-o.ts
new file mode 100644
index 0000000..1ad8d8c
--- /dev/null
+++ b/app/scrapers/shibuya-o.ts
@@ -0,0 +1,82 @@
+/**
+ * Shibuya O-East / O-West / O-Crest / O-Nest (渋谷)
+ * https://www.shibuya-o.com/schedule/
+ *
+ * The page uses a unified schedule listing for all O venues.
+ */
+import * as cheerio from "cheerio";
+import type { Scraper, VenueMeta } from "./base";
+import type { EventInput } from "~/lib/db.server";
+
+export const venue: VenueMeta = {
+ id: "shibuya-o",
+ name: "渋谷 O-EAST / O-WEST",
+ url: "https://www.shibuya-o.com",
+ area: "渋谷",
+};
+
+export const scraper: Scraper = {
+ venue,
+ async scrape(): Promise<EventInput[]> {
+ const res = await fetch("https://www.shibuya-o.com/schedule/");
+ if (!res.ok) throw new Error(`HTTP ${res.status}`);
+ const html = await res.text();
+ const $ = cheerio.load(html);
+ const events: EventInput[] = [];
+
+ $(".schedule_list li, .c-schedule__item, .event-item").each((_, el) => {
+ const $el = $(el);
+
+ const title = $el.find(".schedule_title, .title, h3").first().text().trim();
+ if (!title) return;
+
+ const rawDate =
+ $el.find(".schedule_date, .date, time").first().text().trim() ||
+ $el.find("time").attr("datetime") ||
+ "";
+ const date = parseJapaneseDate(rawDate);
+ if (!date) return;
+
+ const hall = $el.find(".schedule_hall, .hall, .venue-name").first().text().trim() || null;
+ const timeText = $el.find(".schedule_time, .time").first().text();
+ const openMatch = timeText.match(/OPEN[:: ]*(\d{2}:\d{2})/i);
+ const startMatch = timeText.match(/START[:: ]*(\d{2}:\d{2})/i);
+
+ const detailHref = $el.find("a[href]").first().attr("href") ?? null;
+
+ events.push({
+ venue_id: venue.id,
+ title,
+ artist: hall,
+ date,
+ open_time: openMatch?.[1] ?? null,
+ start_time: startMatch?.[1] ?? null,
+ ticket_url:
+ $el.find("a[href*='eplus'], a[href*='lawson'], a[href*='ticket']").first().attr("href") ?? null,
+ image_url: $el.find("img").first().attr("src")
+ ? absoluteUrl($el.find("img").first().attr("src")!, venue.url)
+ : null,
+ source_url: detailHref ? absoluteUrl(detailHref, venue.url) : null,
+ });
+ });
+
+ return events;
+ },
+};
+
+function parseJapaneseDate(raw: string): string | null {
+ const m =
+ raw.match(/(\d{4})[./年](\d{1,2})[./月](\d{1,2})/) ||
+ raw.match(/(\d{1,2})[./月](\d{1,2})/);
+ if (!m) return null;
+ if (m.length === 4) {
+ return `${m[1]}-${m[2].padStart(2, "0")}-${m[3].padStart(2, "0")}`;
+ }
+ const year = new Date().getFullYear();
+ return `${year}-${m[1].padStart(2, "0")}-${m[2].padStart(2, "0")}`;
+}
+
+function absoluteUrl(url: string, base: string): string {
+ if (url.startsWith("http")) return url;
+ return url.startsWith("/") ? base + url : `${base}/${url}`;
+}
diff --git a/app/scrapers/shinjuku-loft.ts b/app/scrapers/shinjuku-loft.ts
new file mode 100644
index 0000000..8a64761
--- /dev/null
+++ b/app/scrapers/shinjuku-loft.ts
@@ -0,0 +1,80 @@
+/**
+ * 新宿 LOFT — https://www.loft-prj.co.jp/schedule/loft
+ *
+ * The schedule page renders events inside `.eventlist` items.
+ */
+import * as cheerio from "cheerio";
+import type { Scraper, VenueMeta } from "./base";
+import type { EventInput } from "~/lib/db.server";
+
+export const venue: VenueMeta = {
+ id: "shinjuku-loft",
+ name: "新宿 LOFT",
+ url: "https://www.loft-prj.co.jp",
+ area: "新宿",
+};
+
+export const scraper: Scraper = {
+ venue,
+ async scrape(): Promise<EventInput[]> {
+ const res = await fetch("https://www.loft-prj.co.jp/schedule/loft");
+ if (!res.ok) throw new Error(`HTTP ${res.status}`);
+ const html = await res.text();
+ const $ = cheerio.load(html);
+ const events: EventInput[] = [];
+
+ $(".eventlist__item, .schedule-item, .event_list li").each((_, el) => {
+ const $el = $(el);
+
+ const title = $el.find(".eventlist__title, .event-title, h3, h2").first().text().trim();
+ if (!title) return;
+
+ const rawDate =
+ $el.find(".eventlist__date, .event-date, time").first().text().trim() ||
+ $el.find("time").attr("datetime") ||
+ "";
+ const date = parseJapaneseDate(rawDate);
+ if (!date) return;
+
+ const timeText = $el.find(".eventlist__time, .time").first().text();
+ const openMatch = timeText.match(/OPEN[:: ]*(\d{2}:\d{2})/i);
+ const startMatch = timeText.match(/START[:: ]*(\d{2}:\d{2})/i);
+
+ const detailHref = $el.find("a[href]").first().attr("href") ?? null;
+
+ events.push({
+ venue_id: venue.id,
+ title,
+ artist: $el.find(".eventlist__artist, .artist").first().text().trim() || null,
+ date,
+ open_time: openMatch?.[1] ?? null,
+ start_time: startMatch?.[1] ?? null,
+ ticket_url:
+ $el.find("a[href*='eplus'], a[href*='pia'], a[href*='ticket']").first().attr("href") ?? null,
+ image_url: $el.find("img").first().attr("src")
+ ? absoluteUrl($el.find("img").first().attr("src")!, venue.url)
+ : null,
+ source_url: detailHref ? absoluteUrl(detailHref, venue.url) : null,
+ });
+ });
+
+ return events;
+ },
+};
+
+function parseJapaneseDate(raw: string): string | null {
+ const m =
+ raw.match(/(\d{4})[./年](\d{1,2})[./月](\d{1,2})/) ||
+ raw.match(/(\d{1,2})[./月](\d{1,2})/);
+ if (!m) return null;
+ if (m.length === 4) {
+ return `${m[1]}-${m[2].padStart(2, "0")}-${m[3].padStart(2, "0")}`;
+ }
+ const year = new Date().getFullYear();
+ return `${year}-${m[1].padStart(2, "0")}-${m[2].padStart(2, "0")}`;
+}
+
+function absoluteUrl(url: string, base: string): string {
+ if (url.startsWith("http")) return url;
+ return url.startsWith("/") ? base + url : `${base}/${url}`;
+}
diff --git a/app/scrapers/www-shibuya.ts b/app/scrapers/www-shibuya.ts
new file mode 100644
index 0000000..905fc61
--- /dev/null
+++ b/app/scrapers/www-shibuya.ts
@@ -0,0 +1,79 @@
+/**
+ * WWW / WWW X (渋谷) — https://www-shibuya.jp/schedule/
+ */
+import * as cheerio from "cheerio";
+import type { Scraper, VenueMeta } from "./base";
+import type { EventInput } from "~/lib/db.server";
+
+export const venue: VenueMeta = {
+ id: "www-shibuya",
+ name: "WWW / WWW X",
+ url: "https://www-shibuya.jp",
+ area: "渋谷",
+};
+
+export const scraper: Scraper = {
+ venue,
+ async scrape(): Promise<EventInput[]> {
+ const res = await fetch("https://www-shibuya.jp/schedule/");
+ if (!res.ok) throw new Error(`HTTP ${res.status}`);
+ const html = await res.text();
+ const $ = cheerio.load(html);
+ const events: EventInput[] = [];
+
+ $(".schedule-list li, .p-schedule-item, article").each((_, el) => {
+ const $el = $(el);
+
+ const title = $el.find(".schedule-title, .title, h3, h2").first().text().trim();
+ if (!title) return;
+
+ const rawDate =
+ $el.find(".schedule-date, .date, time").first().text().trim() ||
+ $el.find("time").attr("datetime") ||
+ "";
+ const date = parseJapaneseDate(rawDate);
+ if (!date) return;
+
+ const timeText = $el.find(".schedule-time, .time").first().text();
+ const openMatch = timeText.match(/OPEN\s*(\d{2}:\d{2})/i);
+ const startMatch = timeText.match(/START\s*(\d{2}:\d{2})/i);
+
+ const detailHref = $el.find("a").first().attr("href") ?? null;
+
+ events.push({
+ venue_id: venue.id,
+ title,
+ artist: $el.find(".artist").first().text().trim() || null,
+ date,
+ open_time: openMatch?.[1] ?? null,
+ start_time: startMatch?.[1] ?? null,
+ ticket_url:
+ $el.find("a[href*='eplus'], a[href*='pia'], a[href*='ticket']").first().attr("href") ?? null,
+ image_url:
+ $el.find("img").first().attr("src")
+ ? absoluteUrl($el.find("img").first().attr("src")!, venue.url)
+ : null,
+ source_url: detailHref ? absoluteUrl(detailHref, venue.url) : null,
+ });
+ });
+
+ return events;
+ },
+};
+
+function parseJapaneseDate(raw: string): string | null {
+ const m =
+ raw.match(/(\d{4})[./年](\d{1,2})[./月](\d{1,2})/) ||
+ raw.match(/(\d{1,2})[./月](\d{1,2})/);
+ if (!m) return null;
+ if (m.length === 4) {
+ return `${m[1]}-${m[2].padStart(2, "0")}-${m[3].padStart(2, "0")}`;
+ }
+ const year = new Date().getFullYear();
+ return `${year}-${m[1].padStart(2, "0")}-${m[2].padStart(2, "0")}`;
+}
+
+function absoluteUrl(url: string, base: string): string {
+ if (url.startsWith("http")) return url;
+ return url.startsWith("/") ? base + url : `${base}/${url}`;
+}