/** * 渋谷 O-EAST / O-WEST / O-Crest / O-nest — https://shibuya-o.com * * 各ベニューのスケジュールページを個別に取得して統合する。 * DOM 構造 (共通): *
* * 05 / 01 * OPEN 18:00 / START 19:00 * タイトル *
  • アーティスト
  • * 年は nav リンク
    から取得。 */ import * as cheerio from "cheerio"; import type { Scraper, VenueMeta } from "./base"; import type { EventInput } from "~/lib/db.server"; export const venue: VenueMeta = { id: "shibuya-o", name: "渋谷 O-EAST / O-WEST / O-Crest / O-nest", url: "https://shibuya-o.com", area: "渋谷", capacity: 1300, }; const SUB_VENUES = ["east", "west", "crest", "nest"]; const BASE = "https://shibuya-o.com"; async function scrapeVenue(subVenue: string): Promise { const url = `${BASE}/${subVenue}/schedule/`; const res = await fetch(url); if (!res.ok) throw new Error(`HTTP ${res.status} for ${url}`); const $ = cheerio.load(await res.text()); const events: EventInput[] = []; // Extract year: try "next" nav link (?y=YYYY&m=MM) const nextHref = $("a[href*='?y='][href*='&m=']").last().attr("href") ?? ""; const nextYearMatch = nextHref.match(/[?&]y=(\d{4})/); const nextMonthMatch = nextHref.match(/[?&]m=(\d{1,2})/); const currentMonthRaw = $("div.p-schedule__month").first().text().trim(); const currentMonth = parseInt(currentMonthRaw, 10); let year = new Date().getFullYear(); if (nextYearMatch && nextMonthMatch) { const nextYear = parseInt(nextYearMatch[1], 10); const nextMonth = parseInt(nextMonthMatch[1], 10); // If next month == current month + 1 (normal case), year == nextYear // If current month == 12 and next month == 1, year == nextYear - 1 year = nextMonth === currentMonth + 1 ? nextYear : nextYear - 1; } $("div.p-scheduled-card").each((_, el) => { const $el = $(el); const dateRaw = $el.find("span.p-scheduled-card__date-item").first().text().trim(); // "05 / 01" → month=5, day=1 const dateMatch = dateRaw.match(/(\d{1,2})\s*\/\s*(\d{1,2})/); if (!dateMatch) return; const month = parseInt(dateMatch[1], 10); const day = parseInt(dateMatch[2], 10); if (!currentMonth || !month) return; // Handle year rollover (December cards on January page, etc.) const cardYear = month < currentMonth ? year + 1 : year; const date = `${cardYear}-${String(month).padStart(2, "0")}-${String(day).padStart(2, "0")}`; const title = $el.find("span.p-scheduled-card__title-main").first().text().trim(); if (!title) return; const openText = $el.find("span.p-scheduled-card__date-open").first().text().trim(); const openMatch = openText.match(/OPEN\s*(\d{2}:\d{2})/i); const startMatch = openText.match(/START\s*(\d{2}:\d{2})/i); const artists = $el.find("li.p-scheduled-card__artist-item") .map((_, s) => $(s).text().trim()).get().join("、") || null; const detailHref = $el.closest("a[href]").attr("href") ?? $el.find("a[href]").first().attr("href") ?? null; const sourceUrl = detailHref ? (detailHref.startsWith("http") ? detailHref : `${BASE}${detailHref}`) : null; const imageUrl = $el.find("figure img").first().attr("src") ?? null; events.push({ venue_id: venue.id, title, artist: artists, date, open_time: openMatch?.[1] ?? null, start_time: startMatch?.[1] ?? null, image_url: imageUrl, source_url: sourceUrl, }); }); return events; } export const scraper: Scraper = { venue, async scrape(): Promise { const results = await Promise.allSettled(SUB_VENUES.map(scrapeVenue)); const all: EventInput[] = []; for (const r of results) { if (r.status === "fulfilled") all.push(...r.value); } // Deduplicate by date + title const seen = new Set(); return all.filter((e) => { const key = `${e.date}|${e.title}`; if (seen.has(key)) return false; seen.add(key); return true; }); }, };