summaryrefslogtreecommitdiff
path: root/app/scrapers/shibuya-o.ts
diff options
context:
space:
mode:
Diffstat (limited to 'app/scrapers/shibuya-o.ts')
-rw-r--r--app/scrapers/shibuya-o.ts153
1 files changed, 93 insertions, 60 deletions
diff --git a/app/scrapers/shibuya-o.ts b/app/scrapers/shibuya-o.ts
index 1ad8d8c..3d6f192 100644
--- a/app/scrapers/shibuya-o.ts
+++ b/app/scrapers/shibuya-o.ts
@@ -1,8 +1,15 @@
/**
- * Shibuya O-East / O-West / O-Crest / O-Nest (渋谷)
- * https://www.shibuya-o.com/schedule/
+ * 渋谷 O-EAST / O-WEST / O-Crest / O-nest — https://shibuya-o.com
*
- * The page uses a unified schedule listing for all O venues.
+ * 各ベニューのスケジュールページを個別に取得して統合する。
+ * DOM 構造 (共通):
+ * <div class="p-scheduled-card">
+ * <a href="https://shibuya-o.com/{venue}/schedule/{slug}/">
+ * <span class="p-scheduled-card__date-item">05 / 01</span>
+ * <span class="p-scheduled-card__date-open">OPEN 18:00 / START 19:00</span>
+ * <span class="p-scheduled-card__title-main">タイトル</span>
+ * <li class="p-scheduled-card__artist-item">アーティスト</li>
+ * 年は nav リンク <a href="/east/schedule/?y=2026&m=6"> から取得。
*/
import * as cheerio from "cheerio";
import type { Scraper, VenueMeta } from "./base";
@@ -10,73 +17,99 @@ import type { EventInput } from "~/lib/db.server";
export const venue: VenueMeta = {
id: "shibuya-o",
- name: "渋谷 O-EAST / O-WEST",
- url: "https://www.shibuya-o.com",
+ name: "渋谷 O-EAST / O-WEST / O-Crest / O-nest",
+ url: "https://shibuya-o.com",
area: "渋谷",
};
-export const scraper: Scraper = {
- venue,
- async scrape(): Promise<EventInput[]> {
- const res = await fetch("https://www.shibuya-o.com/schedule/");
- if (!res.ok) throw new Error(`HTTP ${res.status}`);
- const html = await res.text();
- const $ = cheerio.load(html);
- const events: EventInput[] = [];
+const SUB_VENUES = ["east", "west", "crest", "nest"];
+const BASE = "https://shibuya-o.com";
- $(".schedule_list li, .c-schedule__item, .event-item").each((_, el) => {
- const $el = $(el);
+async function scrapeVenue(subVenue: string): Promise<EventInput[]> {
+ const url = `${BASE}/${subVenue}/schedule/`;
+ const res = await fetch(url);
+ if (!res.ok) throw new Error(`HTTP ${res.status} for ${url}`);
+ const $ = cheerio.load(await res.text());
+ const events: EventInput[] = [];
- const title = $el.find(".schedule_title, .title, h3").first().text().trim();
- if (!title) return;
+ // Extract year: try "next" nav link (?y=YYYY&m=MM)
+ const nextHref = $("a[href*='?y='][href*='&m=']").last().attr("href") ?? "";
+ const nextYearMatch = nextHref.match(/[?&]y=(\d{4})/);
+ const nextMonthMatch = nextHref.match(/[?&]m=(\d{1,2})/);
+ const currentMonthRaw = $("div.p-schedule__month").first().text().trim();
+ const currentMonth = parseInt(currentMonthRaw, 10);
- const rawDate =
- $el.find(".schedule_date, .date, time").first().text().trim() ||
- $el.find("time").attr("datetime") ||
- "";
- const date = parseJapaneseDate(rawDate);
- if (!date) return;
+ let year = new Date().getFullYear();
+ if (nextYearMatch && nextMonthMatch) {
+ const nextYear = parseInt(nextYearMatch[1], 10);
+ const nextMonth = parseInt(nextMonthMatch[1], 10);
+ // If next month == current month + 1 (normal case), year == nextYear
+ // If current month == 12 and next month == 1, year == nextYear - 1
+ year = nextMonth === currentMonth + 1 ? nextYear : nextYear - 1;
+ }
- const hall = $el.find(".schedule_hall, .hall, .venue-name").first().text().trim() || null;
- const timeText = $el.find(".schedule_time, .time").first().text();
- const openMatch = timeText.match(/OPEN[:: ]*(\d{2}:\d{2})/i);
- const startMatch = timeText.match(/START[:: ]*(\d{2}:\d{2})/i);
+ $("div.p-scheduled-card").each((_, el) => {
+ const $el = $(el);
- const detailHref = $el.find("a[href]").first().attr("href") ?? null;
+ const dateRaw = $el.find("span.p-scheduled-card__date-item").first().text().trim();
+ // "05 / 01" → month=5, day=1
+ const dateMatch = dateRaw.match(/(\d{1,2})\s*\/\s*(\d{1,2})/);
+ if (!dateMatch) return;
+ const month = parseInt(dateMatch[1], 10);
+ const day = parseInt(dateMatch[2], 10);
+ if (!currentMonth || !month) return;
+ // Handle year rollover (December cards on January page, etc.)
+ const cardYear = month < currentMonth ? year + 1 : year;
+ const date = `${cardYear}-${String(month).padStart(2, "0")}-${String(day).padStart(2, "0")}`;
- events.push({
- venue_id: venue.id,
- title,
- artist: hall,
- date,
- open_time: openMatch?.[1] ?? null,
- start_time: startMatch?.[1] ?? null,
- ticket_url:
- $el.find("a[href*='eplus'], a[href*='lawson'], a[href*='ticket']").first().attr("href") ?? null,
- image_url: $el.find("img").first().attr("src")
- ? absoluteUrl($el.find("img").first().attr("src")!, venue.url)
- : null,
- source_url: detailHref ? absoluteUrl(detailHref, venue.url) : null,
- });
- });
+ const title = $el.find("span.p-scheduled-card__title-main").first().text().trim();
+ if (!title) return;
- return events;
- },
-};
+ const openText = $el.find("span.p-scheduled-card__date-open").first().text().trim();
+ const openMatch = openText.match(/OPEN\s*(\d{2}:\d{2})/i);
+ const startMatch = openText.match(/START\s*(\d{2}:\d{2})/i);
-function parseJapaneseDate(raw: string): string | null {
- const m =
- raw.match(/(\d{4})[./年](\d{1,2})[./月](\d{1,2})/) ||
- raw.match(/(\d{1,2})[./月](\d{1,2})/);
- if (!m) return null;
- if (m.length === 4) {
- return `${m[1]}-${m[2].padStart(2, "0")}-${m[3].padStart(2, "0")}`;
- }
- const year = new Date().getFullYear();
- return `${year}-${m[1].padStart(2, "0")}-${m[2].padStart(2, "0")}`;
-}
+ const artists = $el.find("li.p-scheduled-card__artist-item")
+ .map((_, s) => $(s).text().trim()).get().join("、") || null;
+
+ const detailHref = $el.closest("a[href]").attr("href") ??
+ $el.find("a[href]").first().attr("href") ?? null;
+ const sourceUrl = detailHref
+ ? (detailHref.startsWith("http") ? detailHref : `${BASE}${detailHref}`)
+ : null;
-function absoluteUrl(url: string, base: string): string {
- if (url.startsWith("http")) return url;
- return url.startsWith("/") ? base + url : `${base}/${url}`;
+ const imageUrl = $el.find("figure img").first().attr("src") ?? null;
+
+ events.push({
+ venue_id: venue.id,
+ title,
+ artist: artists,
+ date,
+ open_time: openMatch?.[1] ?? null,
+ start_time: startMatch?.[1] ?? null,
+ image_url: imageUrl,
+ source_url: sourceUrl,
+ });
+ });
+
+ return events;
}
+
+export const scraper: Scraper = {
+ venue,
+ async scrape(): Promise<EventInput[]> {
+ const results = await Promise.allSettled(SUB_VENUES.map(scrapeVenue));
+ const all: EventInput[] = [];
+ for (const r of results) {
+ if (r.status === "fulfilled") all.push(...r.value);
+ }
+ // Deduplicate by date + title
+ const seen = new Set<string>();
+ return all.filter((e) => {
+ const key = `${e.date}|${e.title}`;
+ if (seen.has(key)) return false;
+ seen.add(key);
+ return true;
+ });
+ },
+};