diff options
| author | yyamashita <yyamashita@mosquit.one> | 2026-05-06 22:07:53 +0900 |
|---|---|---|
| committer | yyamashita <yyamashita@mosquit.one> | 2026-05-06 22:07:53 +0900 |
| commit | be55729482296663da8c96723bfd22080e6762c1 (patch) | |
| tree | fcd94b1dc5c55f3a80796c90a555863d13fc9a95 /app/scrapers | |
| parent | 014b29bc22b1c207a03dd560051ecdd5df63f0b1 (diff) | |
Add Tokyo livehouse event aggregator service
Full-stack React Router v7 app that scrapes event listings from major
Tokyo live venues (Liquid Room, WWW/WWW X, Shibuya O-EAST, Shinjuku LOFT,
Club Quattro) and stores them in SQLite for browsing and search.
- Modular scraper architecture: add a new venue by dropping a file in
app/scrapers/ and registering it in index.ts
- Routes: /events (filter by keyword/venue/date), /events/:id, /venues,
GET /api/scrape
- EventCard shows artist, date/time, venue, ticket URL, and fee
- Post-scrape per-venue Markdown files generated to events/ (dev reference)
- /add-livehouse Claude Code skill defined in .claude/commands/
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
Diffstat (limited to 'app/scrapers')
| -rw-r--r-- | app/scrapers/base.ts | 14 | ||||
| -rw-r--r-- | app/scrapers/club-quattro.ts | 78 | ||||
| -rw-r--r-- | app/scrapers/index.ts | 20 | ||||
| -rw-r--r-- | app/scrapers/liquid-room.ts | 87 | ||||
| -rw-r--r-- | app/scrapers/shibuya-o.ts | 82 | ||||
| -rw-r--r-- | app/scrapers/shinjuku-loft.ts | 80 | ||||
| -rw-r--r-- | app/scrapers/www-shibuya.ts | 79 |
7 files changed, 440 insertions, 0 deletions
diff --git a/app/scrapers/base.ts b/app/scrapers/base.ts new file mode 100644 index 0000000..512fcbb --- /dev/null +++ b/app/scrapers/base.ts @@ -0,0 +1,14 @@ +import type { EventInput } from "~/lib/db.server"; + +export interface VenueMeta { + id: string; + name: string; + url: string; + area: string; +} + +export interface Scraper { + venue: VenueMeta; + /** Fetch events from the venue's website. Returns event inputs ready for upsert. */ + scrape(): Promise<EventInput[]>; +} diff --git a/app/scrapers/club-quattro.ts b/app/scrapers/club-quattro.ts new file mode 100644 index 0000000..ae903bc --- /dev/null +++ b/app/scrapers/club-quattro.ts @@ -0,0 +1,78 @@ +/** + * Club Quattro 渋谷 — https://www.club-quattro.com/shibuya/schedule/ + */ +import * as cheerio from "cheerio"; +import type { Scraper, VenueMeta } from "./base"; +import type { EventInput } from "~/lib/db.server"; + +export const venue: VenueMeta = { + id: "club-quattro", + name: "CLUB QUATTRO", + url: "https://www.club-quattro.com", + area: "渋谷", +}; + +export const scraper: Scraper = { + venue, + async scrape(): Promise<EventInput[]> { + const res = await fetch("https://www.club-quattro.com/shibuya/schedule/"); + if (!res.ok) throw new Error(`HTTP ${res.status}`); + const html = await res.text(); + const $ = cheerio.load(html); + const events: EventInput[] = []; + + $(".schedule-list__item, .c-event, li.event").each((_, el) => { + const $el = $(el); + + const title = $el.find(".schedule-list__title, .event-name, h3, h2").first().text().trim(); + if (!title) return; + + const rawDate = + $el.find(".schedule-list__date, .event-date, time").first().text().trim() || + $el.find("time").attr("datetime") || + ""; + const date = parseJapaneseDate(rawDate); + if (!date) return; + + const timeText = $el.find(".schedule-list__time, .time-info").first().text(); + const openMatch = timeText.match(/OPEN[:: ]*(\d{2}:\d{2})/i); + const startMatch = timeText.match(/START[:: ]*(\d{2}:\d{2})/i); + + const detailHref = $el.find("a[href]").first().attr("href") ?? null; + + events.push({ + venue_id: venue.id, + title, + artist: $el.find(".schedule-list__artist, .artist-name").first().text().trim() || null, + date, + open_time: openMatch?.[1] ?? null, + start_time: startMatch?.[1] ?? null, + ticket_url: + $el.find("a[href*='eplus'], a[href*='pia'], a[href*='ticket']").first().attr("href") ?? null, + image_url: $el.find("img").first().attr("src") + ? absoluteUrl($el.find("img").first().attr("src")!, venue.url) + : null, + source_url: detailHref ? absoluteUrl(detailHref, venue.url) : null, + }); + }); + + return events; + }, +}; + +function parseJapaneseDate(raw: string): string | null { + const m = + raw.match(/(\d{4})[./年](\d{1,2})[./月](\d{1,2})/) || + raw.match(/(\d{1,2})[./月](\d{1,2})/); + if (!m) return null; + if (m.length === 4) { + return `${m[1]}-${m[2].padStart(2, "0")}-${m[3].padStart(2, "0")}`; + } + const year = new Date().getFullYear(); + return `${year}-${m[1].padStart(2, "0")}-${m[2].padStart(2, "0")}`; +} + +function absoluteUrl(url: string, base: string): string { + if (url.startsWith("http")) return url; + return url.startsWith("/") ? base + url : `${base}/${url}`; +} diff --git a/app/scrapers/index.ts b/app/scrapers/index.ts new file mode 100644 index 0000000..97d2586 --- /dev/null +++ b/app/scrapers/index.ts @@ -0,0 +1,20 @@ +/** + * Registry of all venue scrapers. + * To add a new venue: create a new file implementing Scraper, then add it here. + */ +import type { Scraper } from "./base"; +import { scraper as liquidRoom } from "./liquid-room"; +import { scraper as wwwShibuya } from "./www-shibuya"; +import { scraper as shibuyaO } from "./shibuya-o"; +import { scraper as shinjukuLoft } from "./shinjuku-loft"; +import { scraper as clubQuattro } from "./club-quattro"; + +export const ALL_SCRAPERS: Scraper[] = [ + liquidRoom, + wwwShibuya, + shibuyaO, + shinjukuLoft, + clubQuattro, +]; + +export type { Scraper } from "./base"; diff --git a/app/scrapers/liquid-room.ts b/app/scrapers/liquid-room.ts new file mode 100644 index 0000000..b497759 --- /dev/null +++ b/app/scrapers/liquid-room.ts @@ -0,0 +1,87 @@ +/** + * Liquid Room (恵比寿) — https://www.liquidroom.net/schedule + * + * The schedule page lists events with JSON-LD or HTML data. + * Structure: <div class="p-schedule__item"> contains date, title, etc. + */ +import * as cheerio from "cheerio"; +import type { Scraper, VenueMeta } from "./base"; +import type { EventInput } from "~/lib/db.server"; + +export const venue: VenueMeta = { + id: "liquid-room", + name: "LIQUID ROOM", + url: "https://www.liquidroom.net", + area: "恵比寿", +}; + +export const scraper: Scraper = { + venue, + async scrape(): Promise<EventInput[]> { + const res = await fetch("https://www.liquidroom.net/schedule"); + if (!res.ok) throw new Error(`HTTP ${res.status}`); + const html = await res.text(); + const $ = cheerio.load(html); + const events: EventInput[] = []; + + $("article.p-schedule__item, .schedule-list__item, .c-event-item").each( + (_, el) => { + const $el = $(el); + + const title = + $el.find(".p-schedule__title, .event-title, h3, h2").first().text().trim(); + if (!title) return; + + const dateStr = + $el.find(".p-schedule__date, .event-date, time").first().text().trim() || + $el.find("time").attr("datetime") || + ""; + const date = parseJapaneseDate(dateStr); + if (!date) return; + + const artist = + $el.find(".p-schedule__artist, .artist").first().text().trim() || null; + const startTime = + $el.find(".p-schedule__time, .open-time").first().text().trim().match(/\d{2}:\d{2}/)?.[0] ?? null; + const ticketUrl = + $el.find("a[href*='ticket'], a[href*='eplus'], a[href*='pia']").first().attr("href") ?? null; + const imageUrl = + $el.find("img").first().attr("src") ?? null; + const sourceUrl = + $el.find("a").first().attr("href") ?? null; + + events.push({ + venue_id: venue.id, + title, + artist, + date, + start_time: startTime, + ticket_url: ticketUrl, + image_url: imageUrl ? absoluteUrl(imageUrl, venue.url) : null, + source_url: sourceUrl ? absoluteUrl(sourceUrl, venue.url) : null, + }); + } + ); + + return events; + }, +}; + +function parseJapaneseDate(raw: string): string | null { + // Handles "2025.06.15" "2025/06/15" "2025年06月15日" "06.15" formats + const m = + raw.match(/(\d{4})[./年](\d{1,2})[./月](\d{1,2})/) || + raw.match(/(\d{1,2})[./月](\d{1,2})/); + if (!m) return null; + if (m.length === 4) { + return `${m[1]}-${m[2].padStart(2, "0")}-${m[3].padStart(2, "0")}`; + } + const year = new Date().getFullYear(); + return `${year}-${m[1].padStart(2, "0")}-${m[2].padStart(2, "0")}`; +} + +function absoluteUrl(url: string, base: string): string { + if (url.startsWith("http")) return url; + if (url.startsWith("/")) return base + url; + return base + "/" + url; +} diff --git a/app/scrapers/shibuya-o.ts b/app/scrapers/shibuya-o.ts new file mode 100644 index 0000000..1ad8d8c --- /dev/null +++ b/app/scrapers/shibuya-o.ts @@ -0,0 +1,82 @@ +/** + * Shibuya O-East / O-West / O-Crest / O-Nest (渋谷) + * https://www.shibuya-o.com/schedule/ + * + * The page uses a unified schedule listing for all O venues. + */ +import * as cheerio from "cheerio"; +import type { Scraper, VenueMeta } from "./base"; +import type { EventInput } from "~/lib/db.server"; + +export const venue: VenueMeta = { + id: "shibuya-o", + name: "渋谷 O-EAST / O-WEST", + url: "https://www.shibuya-o.com", + area: "渋谷", +}; + +export const scraper: Scraper = { + venue, + async scrape(): Promise<EventInput[]> { + const res = await fetch("https://www.shibuya-o.com/schedule/"); + if (!res.ok) throw new Error(`HTTP ${res.status}`); + const html = await res.text(); + const $ = cheerio.load(html); + const events: EventInput[] = []; + + $(".schedule_list li, .c-schedule__item, .event-item").each((_, el) => { + const $el = $(el); + + const title = $el.find(".schedule_title, .title, h3").first().text().trim(); + if (!title) return; + + const rawDate = + $el.find(".schedule_date, .date, time").first().text().trim() || + $el.find("time").attr("datetime") || + ""; + const date = parseJapaneseDate(rawDate); + if (!date) return; + + const hall = $el.find(".schedule_hall, .hall, .venue-name").first().text().trim() || null; + const timeText = $el.find(".schedule_time, .time").first().text(); + const openMatch = timeText.match(/OPEN[:: ]*(\d{2}:\d{2})/i); + const startMatch = timeText.match(/START[:: ]*(\d{2}:\d{2})/i); + + const detailHref = $el.find("a[href]").first().attr("href") ?? null; + + events.push({ + venue_id: venue.id, + title, + artist: hall, + date, + open_time: openMatch?.[1] ?? null, + start_time: startMatch?.[1] ?? null, + ticket_url: + $el.find("a[href*='eplus'], a[href*='lawson'], a[href*='ticket']").first().attr("href") ?? null, + image_url: $el.find("img").first().attr("src") + ? absoluteUrl($el.find("img").first().attr("src")!, venue.url) + : null, + source_url: detailHref ? absoluteUrl(detailHref, venue.url) : null, + }); + }); + + return events; + }, +}; + +function parseJapaneseDate(raw: string): string | null { + const m = + raw.match(/(\d{4})[./年](\d{1,2})[./月](\d{1,2})/) || + raw.match(/(\d{1,2})[./月](\d{1,2})/); + if (!m) return null; + if (m.length === 4) { + return `${m[1]}-${m[2].padStart(2, "0")}-${m[3].padStart(2, "0")}`; + } + const year = new Date().getFullYear(); + return `${year}-${m[1].padStart(2, "0")}-${m[2].padStart(2, "0")}`; +} + +function absoluteUrl(url: string, base: string): string { + if (url.startsWith("http")) return url; + return url.startsWith("/") ? base + url : `${base}/${url}`; +} diff --git a/app/scrapers/shinjuku-loft.ts b/app/scrapers/shinjuku-loft.ts new file mode 100644 index 0000000..8a64761 --- /dev/null +++ b/app/scrapers/shinjuku-loft.ts @@ -0,0 +1,80 @@ +/** + * 新宿 LOFT — https://www.loft-prj.co.jp/schedule/loft + * + * The schedule page renders events inside `.eventlist` items. + */ +import * as cheerio from "cheerio"; +import type { Scraper, VenueMeta } from "./base"; +import type { EventInput } from "~/lib/db.server"; + +export const venue: VenueMeta = { + id: "shinjuku-loft", + name: "新宿 LOFT", + url: "https://www.loft-prj.co.jp", + area: "新宿", +}; + +export const scraper: Scraper = { + venue, + async scrape(): Promise<EventInput[]> { + const res = await fetch("https://www.loft-prj.co.jp/schedule/loft"); + if (!res.ok) throw new Error(`HTTP ${res.status}`); + const html = await res.text(); + const $ = cheerio.load(html); + const events: EventInput[] = []; + + $(".eventlist__item, .schedule-item, .event_list li").each((_, el) => { + const $el = $(el); + + const title = $el.find(".eventlist__title, .event-title, h3, h2").first().text().trim(); + if (!title) return; + + const rawDate = + $el.find(".eventlist__date, .event-date, time").first().text().trim() || + $el.find("time").attr("datetime") || + ""; + const date = parseJapaneseDate(rawDate); + if (!date) return; + + const timeText = $el.find(".eventlist__time, .time").first().text(); + const openMatch = timeText.match(/OPEN[:: ]*(\d{2}:\d{2})/i); + const startMatch = timeText.match(/START[:: ]*(\d{2}:\d{2})/i); + + const detailHref = $el.find("a[href]").first().attr("href") ?? null; + + events.push({ + venue_id: venue.id, + title, + artist: $el.find(".eventlist__artist, .artist").first().text().trim() || null, + date, + open_time: openMatch?.[1] ?? null, + start_time: startMatch?.[1] ?? null, + ticket_url: + $el.find("a[href*='eplus'], a[href*='pia'], a[href*='ticket']").first().attr("href") ?? null, + image_url: $el.find("img").first().attr("src") + ? absoluteUrl($el.find("img").first().attr("src")!, venue.url) + : null, + source_url: detailHref ? absoluteUrl(detailHref, venue.url) : null, + }); + }); + + return events; + }, +}; + +function parseJapaneseDate(raw: string): string | null { + const m = + raw.match(/(\d{4})[./年](\d{1,2})[./月](\d{1,2})/) || + raw.match(/(\d{1,2})[./月](\d{1,2})/); + if (!m) return null; + if (m.length === 4) { + return `${m[1]}-${m[2].padStart(2, "0")}-${m[3].padStart(2, "0")}`; + } + const year = new Date().getFullYear(); + return `${year}-${m[1].padStart(2, "0")}-${m[2].padStart(2, "0")}`; +} + +function absoluteUrl(url: string, base: string): string { + if (url.startsWith("http")) return url; + return url.startsWith("/") ? base + url : `${base}/${url}`; +} diff --git a/app/scrapers/www-shibuya.ts b/app/scrapers/www-shibuya.ts new file mode 100644 index 0000000..905fc61 --- /dev/null +++ b/app/scrapers/www-shibuya.ts @@ -0,0 +1,79 @@ +/** + * WWW / WWW X (渋谷) — https://www-shibuya.jp/schedule/ + */ +import * as cheerio from "cheerio"; +import type { Scraper, VenueMeta } from "./base"; +import type { EventInput } from "~/lib/db.server"; + +export const venue: VenueMeta = { + id: "www-shibuya", + name: "WWW / WWW X", + url: "https://www-shibuya.jp", + area: "渋谷", +}; + +export const scraper: Scraper = { + venue, + async scrape(): Promise<EventInput[]> { + const res = await fetch("https://www-shibuya.jp/schedule/"); + if (!res.ok) throw new Error(`HTTP ${res.status}`); + const html = await res.text(); + const $ = cheerio.load(html); + const events: EventInput[] = []; + + $(".schedule-list li, .p-schedule-item, article").each((_, el) => { + const $el = $(el); + + const title = $el.find(".schedule-title, .title, h3, h2").first().text().trim(); + if (!title) return; + + const rawDate = + $el.find(".schedule-date, .date, time").first().text().trim() || + $el.find("time").attr("datetime") || + ""; + const date = parseJapaneseDate(rawDate); + if (!date) return; + + const timeText = $el.find(".schedule-time, .time").first().text(); + const openMatch = timeText.match(/OPEN\s*(\d{2}:\d{2})/i); + const startMatch = timeText.match(/START\s*(\d{2}:\d{2})/i); + + const detailHref = $el.find("a").first().attr("href") ?? null; + + events.push({ + venue_id: venue.id, + title, + artist: $el.find(".artist").first().text().trim() || null, + date, + open_time: openMatch?.[1] ?? null, + start_time: startMatch?.[1] ?? null, + ticket_url: + $el.find("a[href*='eplus'], a[href*='pia'], a[href*='ticket']").first().attr("href") ?? null, + image_url: + $el.find("img").first().attr("src") + ? absoluteUrl($el.find("img").first().attr("src")!, venue.url) + : null, + source_url: detailHref ? absoluteUrl(detailHref, venue.url) : null, + }); + }); + + return events; + }, +}; + +function parseJapaneseDate(raw: string): string | null { + const m = + raw.match(/(\d{4})[./年](\d{1,2})[./月](\d{1,2})/) || + raw.match(/(\d{1,2})[./月](\d{1,2})/); + if (!m) return null; + if (m.length === 4) { + return `${m[1]}-${m[2].padStart(2, "0")}-${m[3].padStart(2, "0")}`; + } + const year = new Date().getFullYear(); + return `${year}-${m[1].padStart(2, "0")}-${m[2].padStart(2, "0")}`; +} + +function absoluteUrl(url: string, base: string): string { + if (url.startsWith("http")) return url; + return url.startsWith("/") ? base + url : `${base}/${url}`; +} |
