summaryrefslogtreecommitdiff
path: root/app/scrapers/shibuya-o.ts
diff options
context:
space:
mode:
Diffstat (limited to 'app/scrapers/shibuya-o.ts')
-rw-r--r--app/scrapers/shibuya-o.ts45
1 files changed, 19 insertions, 26 deletions
diff --git a/app/scrapers/shibuya-o.ts b/app/scrapers/shibuya-o.ts
index c674cfc..6d394ff 100644
--- a/app/scrapers/shibuya-o.ts
+++ b/app/scrapers/shibuya-o.ts
@@ -26,42 +26,25 @@ export const venue: VenueMeta = {
const SUB_VENUES = ["east", "west", "crest", "nest"];
const BASE = "https://shibuya-o.com";
-async function scrapeVenue(subVenue: string): Promise<EventInput[]> {
- const url = `${BASE}/${subVenue}/schedule/`;
+async function scrapeVenueMonth(subVenue: string, year: number, month: number): Promise<EventInput[]> {
+ const url = `${BASE}/${subVenue}/schedule/?y=${year}&m=${month}`;
const res = await fetch(url);
if (!res.ok) throw new Error(`HTTP ${res.status} for ${url}`);
const $ = cheerio.load(await res.text());
const events: EventInput[] = [];
- // Extract year: try "next" nav link (?y=YYYY&m=MM)
- const nextHref = $("a[href*='?y='][href*='&m=']").last().attr("href") ?? "";
- const nextYearMatch = nextHref.match(/[?&]y=(\d{4})/);
- const nextMonthMatch = nextHref.match(/[?&]m=(\d{1,2})/);
- const currentMonthRaw = $("div.p-schedule__month").first().text().trim();
- const currentMonth = parseInt(currentMonthRaw, 10);
-
- let year = new Date().getFullYear();
- if (nextYearMatch && nextMonthMatch) {
- const nextYear = parseInt(nextYearMatch[1], 10);
- const nextMonth = parseInt(nextMonthMatch[1], 10);
- // If next month == current month + 1 (normal case), year == nextYear
- // If current month == 12 and next month == 1, year == nextYear - 1
- year = nextMonth === currentMonth + 1 ? nextYear : nextYear - 1;
- }
-
$("div.p-scheduled-card").each((_, el) => {
const $el = $(el);
const dateRaw = $el.find("span.p-scheduled-card__date-item").first().text().trim();
- // "05 / 01" → month=5, day=1
const dateMatch = dateRaw.match(/(\d{1,2})\s*\/\s*(\d{1,2})/);
if (!dateMatch) return;
- const month = parseInt(dateMatch[1], 10);
+ const cardMonth = parseInt(dateMatch[1], 10);
const day = parseInt(dateMatch[2], 10);
- if (!currentMonth || !month) return;
- // Handle year rollover (December cards on January page, etc.)
- const cardYear = month < currentMonth ? year + 1 : year;
- const date = `${cardYear}-${String(month).padStart(2, "0")}-${String(day).padStart(2, "0")}`;
+ if (!cardMonth) return;
+ // Handle year rollover (e.g. December page showing January events)
+ const cardYear = cardMonth < month ? year + 1 : year;
+ const date = `${cardYear}-${String(cardMonth).padStart(2, "0")}-${String(day).padStart(2, "0")}`;
const title = $el.find("span.p-scheduled-card__title-main").first().text().trim();
if (!title) return;
@@ -99,12 +82,22 @@ async function scrapeVenue(subVenue: string): Promise<EventInput[]> {
export const scraper: Scraper = {
venue,
async scrape(): Promise<EventInput[]> {
- const results = await Promise.allSettled(SUB_VENUES.map(scrapeVenue));
+ const now = new Date();
+ const months = [0, 1, 2].map((offset) => {
+ const d = new Date(now.getFullYear(), now.getMonth() + offset, 1);
+ return { year: d.getFullYear(), month: d.getMonth() + 1 };
+ });
+
+ const tasks = SUB_VENUES.flatMap((sub) =>
+ months.map(({ year, month }) => scrapeVenueMonth(sub, year, month))
+ );
+ const results = await Promise.allSettled(tasks);
+
const all: EventInput[] = [];
for (const r of results) {
if (r.status === "fulfilled") all.push(...r.value);
}
- // Deduplicate by date + title
+
const seen = new Set<string>();
return all.filter((e) => {
const key = `${e.date}|${e.title}`;