/** * 渋谷 O-EAST / O-WEST / O-Crest / O-nest — https://shibuya-o.com * * 各ベニューのスケジュールページを個別に取得して統合する。 * DOM 構造 (共通): *
* * 05 / 01 * OPEN 18:00 / START 19:00 * タイトル *
  • アーティスト
  • * 年は nav リンク
    から取得。 */ import * as cheerio from "cheerio"; import type { Scraper, VenueMeta } from "./base"; import type { EventInput } from "~/lib/db.server"; export const venue: VenueMeta = { id: "shibuya-o", name: "渋谷 O-EAST / O-WEST / O-Crest / O-nest", url: "https://shibuya-o.com", area: "渋谷", capacity: 1300, }; const SUB_VENUES = ["east", "west", "crest", "nest"]; const BASE = "https://shibuya-o.com"; async function scrapeVenueMonth(subVenue: string, year: number, month: number): Promise { const url = `${BASE}/${subVenue}/schedule/?y=${year}&m=${month}`; const res = await fetch(url); if (!res.ok) throw new Error(`HTTP ${res.status} for ${url}`); const $ = cheerio.load(await res.text()); const events: EventInput[] = []; $("div.p-scheduled-card").each((_, el) => { const $el = $(el); const dateRaw = $el.find("span.p-scheduled-card__date-item").first().text().trim(); const dateMatch = dateRaw.match(/(\d{1,2})\s*\/\s*(\d{1,2})/); if (!dateMatch) return; const cardMonth = parseInt(dateMatch[1], 10); const day = parseInt(dateMatch[2], 10); if (!cardMonth) return; // Handle year rollover (e.g. December page showing January events) const cardYear = cardMonth < month ? year + 1 : year; const date = `${cardYear}-${String(cardMonth).padStart(2, "0")}-${String(day).padStart(2, "0")}`; const title = $el.find("span.p-scheduled-card__title-main").first().text().trim(); if (!title) return; const openText = $el.find("span.p-scheduled-card__date-open").first().text().trim(); const openMatch = openText.match(/OPEN\s*(\d{2}:\d{2})/i); const startMatch = openText.match(/START\s*(\d{2}:\d{2})/i); const artists = $el.find("li.p-scheduled-card__artist-item") .map((_, s) => $(s).text().trim()).get().join("、") || null; const detailHref = $el.closest("a[href]").attr("href") ?? $el.find("a[href]").first().attr("href") ?? null; const sourceUrl = detailHref ? (detailHref.startsWith("http") ? detailHref : `${BASE}${detailHref}`) : null; const imageUrl = $el.find("figure img").first().attr("src") ?? null; events.push({ venue_id: venue.id, title, artist: artists, date, open_time: openMatch?.[1] ?? null, start_time: startMatch?.[1] ?? null, image_url: imageUrl, source_url: sourceUrl, }); }); return events; } export const scraper: Scraper = { venue, async scrape(): Promise { const now = new Date(); const months = [0, 1, 2].map((offset) => { const d = new Date(now.getFullYear(), now.getMonth() + offset, 1); return { year: d.getFullYear(), month: d.getMonth() + 1 }; }); const tasks = SUB_VENUES.flatMap((sub) => months.map(({ year, month }) => scrapeVenueMonth(sub, year, month)) ); const results = await Promise.allSettled(tasks); const all: EventInput[] = []; for (const r of results) { if (r.status === "fulfilled") all.push(...r.value); } const seen = new Set(); return all.filter((e) => { const key = `${e.date}|${e.title}`; if (seen.has(key)) return false; seen.add(key); return true; }); }, };