From be55729482296663da8c96723bfd22080e6762c1 Mon Sep 17 00:00:00 2001 From: yyamashita Date: Wed, 6 May 2026 22:07:53 +0900 Subject: Add Tokyo livehouse event aggregator service Full-stack React Router v7 app that scrapes event listings from major Tokyo live venues (Liquid Room, WWW/WWW X, Shibuya O-EAST, Shinjuku LOFT, Club Quattro) and stores them in SQLite for browsing and search. - Modular scraper architecture: add a new venue by dropping a file in app/scrapers/ and registering it in index.ts - Routes: /events (filter by keyword/venue/date), /events/:id, /venues, GET /api/scrape - EventCard shows artist, date/time, venue, ticket URL, and fee - Post-scrape per-venue Markdown files generated to events/ (dev reference) - /add-livehouse Claude Code skill defined in .claude/commands/ Co-Authored-By: Claude Sonnet 4.6 --- app/scrapers/shinjuku-loft.ts | 80 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 app/scrapers/shinjuku-loft.ts (limited to 'app/scrapers/shinjuku-loft.ts') diff --git a/app/scrapers/shinjuku-loft.ts b/app/scrapers/shinjuku-loft.ts new file mode 100644 index 0000000..8a64761 --- /dev/null +++ b/app/scrapers/shinjuku-loft.ts @@ -0,0 +1,80 @@ +/** + * 新宿 LOFT — https://www.loft-prj.co.jp/schedule/loft + * + * The schedule page renders events inside `.eventlist` items. + */ +import * as cheerio from "cheerio"; +import type { Scraper, VenueMeta } from "./base"; +import type { EventInput } from "~/lib/db.server"; + +export const venue: VenueMeta = { + id: "shinjuku-loft", + name: "新宿 LOFT", + url: "https://www.loft-prj.co.jp", + area: "新宿", +}; + +export const scraper: Scraper = { + venue, + async scrape(): Promise { + const res = await fetch("https://www.loft-prj.co.jp/schedule/loft"); + if (!res.ok) throw new Error(`HTTP ${res.status}`); + const html = await res.text(); + const $ = cheerio.load(html); + const events: EventInput[] = []; + + $(".eventlist__item, .schedule-item, .event_list li").each((_, el) => { + const $el = $(el); + + const title = $el.find(".eventlist__title, .event-title, h3, h2").first().text().trim(); + if (!title) return; + + const rawDate = + $el.find(".eventlist__date, .event-date, time").first().text().trim() || + $el.find("time").attr("datetime") || + ""; + const date = parseJapaneseDate(rawDate); + if (!date) return; + + const timeText = $el.find(".eventlist__time, .time").first().text(); + const openMatch = timeText.match(/OPEN[:: ]*(\d{2}:\d{2})/i); + const startMatch = timeText.match(/START[:: ]*(\d{2}:\d{2})/i); + + const detailHref = $el.find("a[href]").first().attr("href") ?? null; + + events.push({ + venue_id: venue.id, + title, + artist: $el.find(".eventlist__artist, .artist").first().text().trim() || null, + date, + open_time: openMatch?.[1] ?? null, + start_time: startMatch?.[1] ?? null, + ticket_url: + $el.find("a[href*='eplus'], a[href*='pia'], a[href*='ticket']").first().attr("href") ?? null, + image_url: $el.find("img").first().attr("src") + ? absoluteUrl($el.find("img").first().attr("src")!, venue.url) + : null, + source_url: detailHref ? absoluteUrl(detailHref, venue.url) : null, + }); + }); + + return events; + }, +}; + +function parseJapaneseDate(raw: string): string | null { + const m = + raw.match(/(\d{4})[./年](\d{1,2})[./月](\d{1,2})/) || + raw.match(/(\d{1,2})[./月](\d{1,2})/); + if (!m) return null; + if (m.length === 4) { + return `${m[1]}-${m[2].padStart(2, "0")}-${m[3].padStart(2, "0")}`; + } + const year = new Date().getFullYear(); + return `${year}-${m[1].padStart(2, "0")}-${m[2].padStart(2, "0")}`; +} + +function absoluteUrl(url: string, base: string): string { + if (url.startsWith("http")) return url; + return url.startsWith("/") ? base + url : `${base}/${url}`; +} -- cgit v1.2.3