summaryrefslogtreecommitdiff
path: root/app/scrapers/www-shibuya.ts
diff options
context:
space:
mode:
Diffstat (limited to 'app/scrapers/www-shibuya.ts')
-rw-r--r--app/scrapers/www-shibuya.ts70
1 files changed, 32 insertions, 38 deletions
diff --git a/app/scrapers/www-shibuya.ts b/app/scrapers/www-shibuya.ts
index 905fc61..d561332 100644
--- a/app/scrapers/www-shibuya.ts
+++ b/app/scrapers/www-shibuya.ts
@@ -1,6 +1,3 @@
-/**
- * WWW / WWW X (渋谷) — https://www-shibuya.jp/schedule/
- */
import * as cheerio from "cheerio";
import type { Scraper, VenueMeta } from "./base";
import type { EventInput } from "~/lib/db.server";
@@ -21,39 +18,48 @@ export const scraper: Scraper = {
const $ = cheerio.load(html);
const events: EventInput[] = [];
- $(".schedule-list li, .p-schedule-item, article").each((_, el) => {
+ // Month from nav: "202605May" → year=2026, month=5
+ const monthText = $("li.month").first().text().trim();
+ const monthMatch = monthText.match(/(\d{4})(\d{2})/);
+ let year = monthMatch ? parseInt(monthMatch[1]) : new Date().getFullYear();
+ let month = monthMatch ? parseInt(monthMatch[2]) : new Date().getMonth() + 1;
+ let prevDay = 0;
+
+ $("article.column").each((_, el) => {
const $el = $(el);
- const title = $el.find(".schedule-title, .title, h3, h2").first().text().trim();
- if (!title) return;
+ const day = parseInt($el.find(".date .day").text().trim(), 10);
+ if (!day) return;
- const rawDate =
- $el.find(".schedule-date, .date, time").first().text().trim() ||
- $el.find("time").attr("datetime") ||
- "";
- const date = parseJapaneseDate(rawDate);
- if (!date) return;
+ // Detect month rollover when day numbers reset
+ if (prevDay > 0 && day < prevDay) {
+ month++;
+ if (month > 12) {
+ month = 1;
+ year++;
+ }
+ }
+ prevDay = day;
- const timeText = $el.find(".schedule-time, .time").first().text();
- const openMatch = timeText.match(/OPEN\s*(\d{2}:\d{2})/i);
- const startMatch = timeText.match(/START\s*(\d{2}:\d{2})/i);
+ const date = `${year}-${String(month).padStart(2, "0")}-${String(day).padStart(2, "0")}`;
+
+ const title = $el.find("h3").text().trim();
+ if (!title) return;
- const detailHref = $el.find("a").first().attr("href") ?? null;
+ const timeText = $el.find(".openstart").text();
+ const times = timeText.match(/\d{2}:\d{2}/g) ?? [];
+
+ const href = $el.find("a").first().attr("href") ?? null;
events.push({
venue_id: venue.id,
title,
- artist: $el.find(".artist").first().text().trim() || null,
+ artist: null,
date,
- open_time: openMatch?.[1] ?? null,
- start_time: startMatch?.[1] ?? null,
- ticket_url:
- $el.find("a[href*='eplus'], a[href*='pia'], a[href*='ticket']").first().attr("href") ?? null,
- image_url:
- $el.find("img").first().attr("src")
- ? absoluteUrl($el.find("img").first().attr("src")!, venue.url)
- : null,
- source_url: detailHref ? absoluteUrl(detailHref, venue.url) : null,
+ open_time: times[0] ?? null,
+ start_time: times[1] ?? null,
+ image_url: null,
+ source_url: href ? absoluteUrl(href, venue.url) : null,
});
});
@@ -61,18 +67,6 @@ export const scraper: Scraper = {
},
};
-function parseJapaneseDate(raw: string): string | null {
- const m =
- raw.match(/(\d{4})[./年](\d{1,2})[./月](\d{1,2})/) ||
- raw.match(/(\d{1,2})[./月](\d{1,2})/);
- if (!m) return null;
- if (m.length === 4) {
- return `${m[1]}-${m[2].padStart(2, "0")}-${m[3].padStart(2, "0")}`;
- }
- const year = new Date().getFullYear();
- return `${year}-${m[1].padStart(2, "0")}-${m[2].padStart(2, "0")}`;
-}
-
function absoluteUrl(url: string, base: string): string {
if (url.startsWith("http")) return url;
return url.startsWith("/") ? base + url : `${base}/${url}`;