summaryrefslogtreecommitdiff
path: root/app/scrapers
diff options
context:
space:
mode:
Diffstat (limited to 'app/scrapers')
-rw-r--r--app/scrapers/fever-shindaita.ts117
-rw-r--r--app/scrapers/index.ts14
-rw-r--r--app/scrapers/mod-shibasaki.ts124
-rw-r--r--app/scrapers/moon-step-nakano.ts85
-rw-r--r--app/scrapers/nine-spices.ts94
-rw-r--r--app/scrapers/nishieifuku-jam.ts75
-rw-r--r--app/scrapers/shibuya-o.ts153
-rw-r--r--app/scrapers/warp-kichijoji.ts20
8 files changed, 617 insertions, 65 deletions
diff --git a/app/scrapers/fever-shindaita.ts b/app/scrapers/fever-shindaita.ts
new file mode 100644
index 0000000..71c31f6
--- /dev/null
+++ b/app/scrapers/fever-shindaita.ts
@@ -0,0 +1,117 @@
+/**
+ * 新代田 FEVER — https://www.fever-popo.com
+ *
+ * Movable Type CMS。月別 URL: /schedule/YYYY/MM/
+ * DOM 構造:
+ * <div class="entry-asset">
+ * <h2 class="eventtitle">26.05.01 (Fri)&nbsp;タイトル</h2>
+ * <meta property="og:url" content="https://www.fever-popo.com/schedule/.../MMDD.html">
+ * <h3><p>アーティスト1<br/>アーティスト2</p></h3>
+ * <div>OPEN HH:MM / START HH:MM</div>
+ * <div><p>ADV ¥XXXX (+1drink) / DOOR ¥XXXX (+1drink)</p></div>
+ * <img class="scpickup" src="..."> ← フライヤー画像
+ */
+import * as cheerio from "cheerio";
+import type { Scraper, VenueMeta } from "./base";
+import type { EventInput } from "~/lib/db.server";
+
+export const venue: VenueMeta = {
+ id: "fever-shindaita",
+ name: "新代田 FEVER",
+ url: "https://www.fever-popo.com",
+ area: "新代田",
+};
+
+async function scrapeMonth(yyyymm: string): Promise<EventInput[]> {
+ const [year, month] = yyyymm.split("-");
+ const url = `${venue.url}/schedule/${year}/${month}/`;
+ const res = await fetch(url);
+ if (!res.ok) return [];
+ const $ = cheerio.load(await res.text());
+ const events: EventInput[] = [];
+
+ $("div.entry-asset").each((_, el) => {
+ const $el = $(el);
+
+ // Title: "26.05.01 (Fri) タイトル"
+ const h2Text = $el.find("h2.eventtitle").first().text();
+ const titleMatch = h2Text.match(/^\d{2}\.\d{2}\.\d{2}\s+\([A-Za-z]+\)\s*(.+)$/);
+ if (!titleMatch) return;
+ const title = titleMatch[1].trim();
+ if (!title) return;
+
+ // Date from title prefix: "26.05.01"
+ const dateMatch = h2Text.match(/^(\d{2})\.(\d{2})\.(\d{2})/);
+ if (!dateMatch) return;
+ const date = `20${dateMatch[1]}-${dateMatch[2]}-${dateMatch[3]}`;
+
+ // Source URL from og:url meta inside the entry
+ const sourceUrl = $el.find("meta[property='og:url']").attr("content") ?? null;
+
+ // Artists: first <h3><p> in body
+ const $h3 = $el.find("div.asset-body h3").first();
+ const artist = $h3.find("p").text()
+ .split(/\n|<br\s*\/?>/i)
+ .map((s) => s.replace(/<[^>]+>/g, "").trim())
+ .filter(Boolean)
+ .join("、") || null;
+
+ // Time: div containing "OPEN" / "START"
+ let openTime: string | null = null;
+ let startTime: string | null = null;
+ $el.find("div.asset-body div").each((_, d) => {
+ const text = $(d).text();
+ if (/OPEN/i.test(text) && /START/i.test(text)) {
+ const om = text.match(/OPEN\s*(\d{1,2}:\d{2})/i);
+ const sm = text.match(/START\s*(\d{1,2}:\d{2})/i);
+ if (om) openTime = om[1];
+ if (sm) startTime = sm[1];
+ }
+ });
+
+ // Price: div after the time div
+ let price: string | null = null;
+ $el.find("div.asset-body div").each((_, d) => {
+ const text = $(d).text().trim();
+ if (/[¥¥]/.test(text) && /(ADV|DOOR|前売|当日)/i.test(text)) {
+ price = text.replace(/\s+/g, " ").split("\n")[0].trim() || null;
+ }
+ });
+
+ // Image
+ const imageUrl = $el.find("img.scpickup").first().attr("src") ?? null;
+
+ // Ticket URL
+ const ticketUrl =
+ $el.find("a[href*='eplus'], a[href*='pia'], a[href*='tiget'], a[href*='livepocket'], a[href*='t-dv.com']")
+ .first().attr("href") ?? null;
+
+ events.push({
+ venue_id: venue.id,
+ title,
+ artist,
+ date,
+ open_time: openTime,
+ start_time: startTime,
+ price,
+ ticket_url: ticketUrl,
+ image_url: imageUrl,
+ source_url: sourceUrl,
+ });
+ });
+
+ return events;
+}
+
+export const scraper: Scraper = {
+ venue,
+ async scrape(): Promise<EventInput[]> {
+ const now = new Date();
+ const thisMonth = `${now.getFullYear()}-${String(now.getMonth() + 1).padStart(2, "0")}`;
+ const next = new Date(now.getFullYear(), now.getMonth() + 1, 1);
+ const nextMonth = `${next.getFullYear()}-${String(next.getMonth() + 1).padStart(2, "0")}`;
+
+ const [a, b] = await Promise.all([scrapeMonth(thisMonth), scrapeMonth(nextMonth)]);
+ return [...a, ...b];
+ },
+};
diff --git a/app/scrapers/index.ts b/app/scrapers/index.ts
index 81a0eb7..2d7f1d1 100644
--- a/app/scrapers/index.ts
+++ b/app/scrapers/index.ts
@@ -9,9 +9,12 @@ import { scraper as shibuyaO } from "./shibuya-o";
import { scraper as shinjukuLoft } from "./shinjuku-loft";
import { scraper as clubQuattro } from "./club-quattro";
import { scraper as meetsOtsuka } from "./meets-otsuka";
-import { scraper as warpKichijoji } from "./warp-kichijoji";
import { scraper as flatNishiogikubo } from "./flat-nishiogikubo";
-import { scraper as pitbarNishiogikubo } from "./pitbar-nishiogikubo";
+import { scraper as nineSpices } from "./nine-spices";
+import { scraper as nishieifukuJam } from "./nishieifuku-jam";
+import { scraper as feverShindaita } from "./fever-shindaita";
+import { scraper as moonStepNakano } from "./moon-step-nakano";
+import { scraper as modShibasaki } from "./mod-shibasaki";
export const ALL_SCRAPERS: Scraper[] = [
liquidRoom,
@@ -20,9 +23,12 @@ export const ALL_SCRAPERS: Scraper[] = [
shinjukuLoft,
clubQuattro,
meetsOtsuka,
- warpKichijoji,
flatNishiogikubo,
- pitbarNishiogikubo,
+ nineSpices,
+ nishieifukuJam,
+ feverShindaita,
+ moonStepNakano,
+ modShibasaki,
];
export type { Scraper } from "./base";
diff --git a/app/scrapers/mod-shibasaki.ts b/app/scrapers/mod-shibasaki.ts
new file mode 100644
index 0000000..0e2a96b
--- /dev/null
+++ b/app/scrapers/mod-shibasaki.ts
@@ -0,0 +1,124 @@
+/**
+ * shibasaki mod (調布市柴崎) — https://shibasakimod.com/schedule
+ *
+ * Squarespace イベントリスト。静的 HTML として配信される。
+ * 一覧ページに出演者情報はないため、各イベントの詳細ページを並列取得する。
+ * DOM 構造 (一覧):
+ * <article class="eventlist-event eventlist-event--upcoming">
+ * <time class="event-date" datetime="YYYY-MM-DD">
+ * <h1 class="eventlist-title"><a class="eventlist-title-link" href="/schedule/YYYYMMDD">
+ * DOM 構造 (詳細):
+ * <div class="sqs-html-content"><p style="white-space:pre-wrap;">
+ * ...
+ * live:\nアーティスト名\n... (または 出演:)
+ */
+import * as cheerio from "cheerio";
+import type { Scraper, VenueMeta } from "./base";
+import type { EventInput } from "~/lib/db.server";
+
+export const venue: VenueMeta = {
+ id: "mod-shibasaki",
+ name: "shibasaki mod",
+ url: "https://shibasakimod.com",
+ area: "柴崎",
+};
+
+const SCHEDULE_URL = "https://shibasakimod.com/schedule";
+
+interface EventStub {
+ date: string;
+ title: string;
+ startTime: string | null;
+ sourceUrl: string | null;
+ imageUrl: string | null;
+}
+
+async function fetchArtist(url: string): Promise<string | null> {
+ try {
+ const res = await fetch(url);
+ if (!res.ok) return null;
+ const $ = cheerio.load(await res.text());
+ // Insert newlines at block boundaries before extracting text
+ $(".sqs-html-content br").replaceWith("\n");
+ $(".sqs-html-content p").each((_, el) => {
+ $(el).append("\n");
+ });
+ const text = $(".sqs-html-content").text();
+ const extractArtists = (section: string): string[] => {
+ const artists: string[] = [];
+ for (const raw of section.split(/\n/)) {
+ const l = raw.replace(/\s*@\S+/g, "").replace(/^[・•]\s*/, "").trim();
+ if (!l) continue;
+ if (/^https?:\/\//i.test(l)) continue; // skip social links between artists
+ if (/\d{1,2}:\d{2}|[¥¥]|yen|ticket|チケット|予約|adv|door/i.test(l)) break;
+ artists.push(l);
+ }
+ return artists;
+ };
+
+ const match = text.match(/(?:live|出演|act)[::]\s*([\s\S]+)/i);
+ if (match) {
+ const artists = extractArtists(match[1]);
+ if (artists.length > 0) return artists.join(" / ");
+ }
+ // Fallback: find the first line starting with ・ and extract from there
+ const lines = text.split(/\n/);
+ const firstBullet = lines.findIndex((l) => /^・/.test(l.trim()));
+ if (firstBullet >= 0) {
+ const artists = extractArtists(lines.slice(firstBullet).join("\n"));
+ if (artists.length > 0) return artists.join(" / ");
+ }
+ return null;
+ } catch {
+ return null;
+ }
+}
+
+export const scraper: Scraper = {
+ venue,
+ async scrape(): Promise<EventInput[]> {
+ const res = await fetch(SCHEDULE_URL);
+ if (!res.ok) throw new Error(`HTTP ${res.status}`);
+ const $ = cheerio.load(await res.text());
+ const stubs: EventStub[] = [];
+
+ $("article.eventlist-event--upcoming").each((_, el) => {
+ const $el = $(el);
+
+ const date = $el.find("time.event-date").first().attr("datetime") ?? "";
+ if (!date.match(/^\d{4}-\d{2}-\d{2}$/)) return;
+
+ const title = $el.find("h1.eventlist-title a, h2.eventlist-title a").first().text().trim();
+ if (!title) return;
+
+ const startTime = $el.find("time.event-time-localized-start").first().text().trim() || null;
+
+ const relHref = $el.find("a.eventlist-title-link").first().attr("href") ?? null;
+ const sourceUrl = relHref
+ ? (relHref.startsWith("http") ? relHref : `${venue.url}${relHref}`)
+ : null;
+
+ const imageUrl =
+ $el.find("img[data-src]").first().attr("data-src") ??
+ $el.find("img[src]").first().attr("src") ?? null;
+
+ stubs.push({ date, title, startTime, sourceUrl, imageUrl });
+ });
+
+ const artists = await Promise.all(
+ stubs.map((s) => (s.sourceUrl ? fetchArtist(s.sourceUrl) : Promise.resolve(null)))
+ );
+
+ return stubs.map((s, i) => ({
+ venue_id: venue.id,
+ title: s.title,
+ date: s.date,
+ open_time: null,
+ start_time: s.startTime,
+ price: null,
+ image_url: s.imageUrl,
+ source_url: s.sourceUrl,
+ artist: artists[i],
+ }));
+ },
+};
diff --git a/app/scrapers/moon-step-nakano.ts b/app/scrapers/moon-step-nakano.ts
new file mode 100644
index 0000000..e67e128
--- /dev/null
+++ b/app/scrapers/moon-step-nakano.ts
@@ -0,0 +1,85 @@
+/**
+ * 中野 MOON STEP — https://nakano-dynamite.com/moonstep
+ *
+ * WordPress + The Events Calendar プラグイン。REST API で取得。
+ * エンドポイント: /moonstep/wp-json/tribe/events/v1/events
+ * description フィールドは HTML 文字列で、<pre> 内に出演情報が含まれる:
+ * 出演:\n<LIVE>\nアーティスト\n...
+ */
+import * as cheerio from "cheerio";
+import type { Scraper, VenueMeta } from "./base";
+import type { EventInput } from "~/lib/db.server";
+
+export const venue: VenueMeta = {
+ id: "moon-step-nakano",
+ name: "中野 MOON STEP",
+ url: "https://nakano-dynamite.com/moonstep",
+ area: "中野",
+};
+
+const API_URL = "https://nakano-dynamite.com/moonstep/wp-json/tribe/events/v1/events";
+
+function parseArtists(descriptionHtml: string): string | null {
+ const text = cheerio.load(descriptionHtml).text();
+ const match = text.match(/出演[::]\s*([\s\S]+)/);
+ if (!match) return null;
+ // Stop at first blank line (double \r\n or \n\n)
+ const section = match[1].split(/\r?\n\r?\n/)[0] ?? "";
+ const lines = section
+ .split(/\r?\n/)
+ .map((l) => l.replace(/^\s*[<【[&].*?[>】];]\s*/, "").trim())
+ .filter((l) => l.length > 0);
+ return lines.length > 0 ? lines.join(" / ") : null;
+}
+
+export const scraper: Scraper = {
+ venue,
+ async scrape(): Promise<EventInput[]> {
+ const today = new Date().toISOString().slice(0, 10);
+ const url = `${API_URL}?per_page=50&start_date=${today}`;
+ const res = await fetch(url);
+ if (!res.ok) throw new Error(`HTTP ${res.status}`);
+
+ const data = await res.json() as { events: Record<string, unknown>[] };
+ const events: EventInput[] = [];
+
+ for (const e of data.events ?? []) {
+ const startDate = (e.start_date as string | undefined) ?? "";
+ const date = startDate.slice(0, 10);
+ if (!date.match(/^\d{4}-\d{2}-\d{2}$/)) continue;
+
+ const startDetails = e.start_date_details as Record<string, string> | undefined;
+ const startTime = startDetails
+ ? `${startDetails.hour}:${startDetails.minutes}`
+ : null;
+
+ const rawTitle = (e.title as string | undefined) ?? "";
+ const title = rawTitle.replace(/&#(\d+);/g, (_, n) => String.fromCharCode(Number(n)))
+ .replace(/&amp;/g, "&").replace(/&lt;/g, "<").replace(/&gt;/g, ">").trim();
+ if (!title) continue;
+
+ const cost = (e.cost as string | undefined)?.trim() || null;
+ const sourceUrl = (e.url as string | undefined) ?? null;
+ const image = e.image as Record<string, unknown> | undefined | false;
+ const imageUrl = image ? (image.url as string | undefined) ?? null : null;
+
+ const description = (e.description as string | undefined) ?? "";
+ const openMatch = description.match(/OPEN\s*(\d{1,2}:\d{2})/i);
+ const artist = parseArtists(description);
+
+ events.push({
+ venue_id: venue.id,
+ title,
+ date,
+ open_time: openMatch?.[1] ?? null,
+ start_time: startTime && startTime !== "00:00" ? startTime : null,
+ price: cost,
+ image_url: imageUrl,
+ source_url: sourceUrl,
+ artist,
+ });
+ }
+
+ return events;
+ },
+};
diff --git a/app/scrapers/nine-spices.ts b/app/scrapers/nine-spices.ts
new file mode 100644
index 0000000..f4afa3d
--- /dev/null
+++ b/app/scrapers/nine-spices.ts
@@ -0,0 +1,94 @@
+/**
+ * Nine Spices (新宿) — https://9spices.rinky.info/schedule/
+ *
+ * WordPress ベースの独自テーマ。構造:
+ * <div class="event-cont-par YYYY-MM-DD">
+ * <h3 class="event-title sch"><a href="...">タイトル</a></h3>
+ * <div class="event-leftcol" itemprop="startDate" content="YYYY-MM-DDThh:mm">
+ * <div class="sch-actlist"><span class="actlist-name">アーティスト</span></div>
+ * <div class="sch-time"><div><span>OPEN</span><span>hh:mm</span></div><div><span>START</span>...</div></div>
+ * <div class="sch-price"><div><span>ADV</span><span>¥XXX</span></div></div>
+ */
+import * as cheerio from "cheerio";
+import type { Scraper, VenueMeta } from "./base";
+import type { EventInput } from "~/lib/db.server";
+
+export const venue: VenueMeta = {
+ id: "nine-spices",
+ name: "Nine Spices",
+ url: "https://9spices.rinky.info",
+ area: "新宿",
+};
+
+export const scraper: Scraper = {
+ venue,
+ async scrape(): Promise<EventInput[]> {
+ const res = await fetch("https://9spices.rinky.info/schedule/");
+ if (!res.ok) throw new Error(`HTTP ${res.status}`);
+ const $ = cheerio.load(await res.text());
+ const events: EventInput[] = [];
+
+ $("div[class*='event-cont-par']").each((_, el) => {
+ const $el = $(el);
+
+ // class="event-cont-par 2026-05-01" → extract date
+ const classAttr = $el.attr("class") ?? "";
+ const dateMatch = classAttr.match(/(\d{4}-\d{2}-\d{2})/);
+ if (!dateMatch) return;
+ const date = dateMatch[1];
+
+ const $titleLink = $el.find("h3.event-title a").first();
+ const title = $titleLink.text().trim();
+ if (!title) return;
+
+ const sourceUrl = $titleLink.attr("href") ?? null;
+
+ const artist = $el.find("span.actlist-name")
+ .map((_, s) => $(s).text().trim())
+ .get()
+ .join("、") || null;
+
+ // <div class="sch-time"><div><span>OPEN</span><span>18:30</span></div>...
+ let openTime: string | null = null;
+ let startTime: string | null = null;
+ $el.find("div.sch-time div").each((_, row) => {
+ const spans = $(row).find("span");
+ const label = spans.eq(0).text().trim().toUpperCase();
+ const value = spans.eq(1).text().trim();
+ if (label === "OPEN") openTime = value || null;
+ if (label === "START") startTime = value || null;
+ });
+
+ // <div class="sch-price"><div><span>ADV</span><span>¥2,500</span></div>...
+ const priceParts: string[] = [];
+ $el.find("div.sch-price div").each((_, row) => {
+ const spans = $(row).find("span");
+ const label = spans.eq(0).text().trim();
+ const value = spans.eq(1).text().trim();
+ if (label && value) priceParts.push(`${label} ${value}`);
+ });
+ const price = priceParts.length ? priceParts.join(" / ") : null;
+
+ const imageUrl = $el.find("img.wp-post-image").first().attr("src") ?? null;
+
+ const ticketUrl =
+ $el.find("a[href*='livepocket'], a[href*='eplus'], a[href*='pia'], a[href*='tiget'], a[href*='ticket']")
+ .first().attr("href") ?? null;
+
+ events.push({
+ venue_id: venue.id,
+ title,
+ artist,
+ date,
+ open_time: openTime,
+ start_time: startTime,
+ price,
+ ticket_url: ticketUrl,
+ image_url: imageUrl,
+ source_url: sourceUrl,
+ });
+ });
+
+ return events;
+ },
+};
diff --git a/app/scrapers/nishieifuku-jam.ts b/app/scrapers/nishieifuku-jam.ts
new file mode 100644
index 0000000..c93b051
--- /dev/null
+++ b/app/scrapers/nishieifuku-jam.ts
@@ -0,0 +1,75 @@
+/**
+ * 西永福JAM — https://jam.rinky.info/events
+ *
+ * rinky.info プラットフォーム。meets-otsuka と同じ HTML 構造。
+ * <div class="blog-entry event-wrap" event-date="YYYY-MM-DD">
+ * <h2><a href="/events/ID">タイトル</a></h2>
+ * <p class="act"><span>アーティスト</span></p>
+ * <p class="time">OPEN 18:30 / START 19:00</p>
+ * <span class="ticket-price__label">価格</span>
+ */
+import * as cheerio from "cheerio";
+import type { Scraper, VenueMeta } from "./base";
+import type { EventInput } from "~/lib/db.server";
+
+export const venue: VenueMeta = {
+ id: "nishieifuku-jam",
+ name: "西永福JAM",
+ url: "https://jam.rinky.info",
+ area: "西永福",
+};
+
+export const scraper: Scraper = {
+ venue,
+ async scrape(): Promise<EventInput[]> {
+ const res = await fetch("https://jam.rinky.info/events");
+ if (!res.ok) throw new Error(`HTTP ${res.status}`);
+ const $ = cheerio.load(await res.text());
+ const events: EventInput[] = [];
+
+ $("div.blog-entry.event-wrap").each((_, el) => {
+ const $el = $(el);
+
+ const date = $el.attr("event-date") ?? "";
+ if (!date.match(/^\d{4}-\d{2}-\d{2}$/)) return;
+
+ const $link = $el.find("h2 a").first();
+ const title = $link.text().trim();
+ if (!title) return;
+
+ const detailPath = $link.attr("href") ?? null;
+ const sourceUrl = detailPath ? `${venue.url}${detailPath}` : null;
+
+ const artist = $el.find("p.act span").map((_, s) => $(s).text().trim()).get().join("、") || null;
+
+ const timeText = $el.find("p.time").first().text();
+ const openMatch = timeText.match(/OPEN\s*(\d{2}:\d{2})/i);
+ const startMatch = timeText.match(/START\s*(\d{2}:\d{2})/i);
+
+ const price = $el.find("span.ticket-price__label").first().text().trim() || null;
+
+ const bgStyle = $el.find("div.image-bg").attr("style") ?? "";
+ const imgMatch = bgStyle.match(/url\(["']?([^"')]+)["']?\)/);
+ const imageUrl = imgMatch?.[1] ?? null;
+
+ const ticketUrl =
+ $el.find("a[href*='livepocket'], a[href*='eplus'], a[href*='pia'], a[href*='ticket'], a[href*='tiget']")
+ .first().attr("href") ?? null;
+
+ events.push({
+ venue_id: venue.id,
+ title,
+ artist,
+ date,
+ open_time: openMatch?.[1] ?? null,
+ start_time: startMatch?.[1] ?? null,
+ price,
+ ticket_url: ticketUrl,
+ image_url: imageUrl,
+ source_url: sourceUrl,
+ });
+ });
+
+ return events;
+ },
+};
diff --git a/app/scrapers/shibuya-o.ts b/app/scrapers/shibuya-o.ts
index 1ad8d8c..3d6f192 100644
--- a/app/scrapers/shibuya-o.ts
+++ b/app/scrapers/shibuya-o.ts
@@ -1,8 +1,15 @@
/**
- * Shibuya O-East / O-West / O-Crest / O-Nest (渋谷)
- * https://www.shibuya-o.com/schedule/
+ * 渋谷 O-EAST / O-WEST / O-Crest / O-nest — https://shibuya-o.com
*
- * The page uses a unified schedule listing for all O venues.
+ * 各ベニューのスケジュールページを個別に取得して統合する。
+ * DOM 構造 (共通):
+ * <div class="p-scheduled-card">
+ * <a href="https://shibuya-o.com/{venue}/schedule/{slug}/">
+ * <span class="p-scheduled-card__date-item">05 / 01</span>
+ * <span class="p-scheduled-card__date-open">OPEN 18:00 / START 19:00</span>
+ * <span class="p-scheduled-card__title-main">タイトル</span>
+ * <li class="p-scheduled-card__artist-item">アーティスト</li>
+ * 年は nav リンク <a href="/east/schedule/?y=2026&m=6"> から取得。
*/
import * as cheerio from "cheerio";
import type { Scraper, VenueMeta } from "./base";
@@ -10,73 +17,99 @@ import type { EventInput } from "~/lib/db.server";
export const venue: VenueMeta = {
id: "shibuya-o",
- name: "渋谷 O-EAST / O-WEST",
- url: "https://www.shibuya-o.com",
+ name: "渋谷 O-EAST / O-WEST / O-Crest / O-nest",
+ url: "https://shibuya-o.com",
area: "渋谷",
};
-export const scraper: Scraper = {
- venue,
- async scrape(): Promise<EventInput[]> {
- const res = await fetch("https://www.shibuya-o.com/schedule/");
- if (!res.ok) throw new Error(`HTTP ${res.status}`);
- const html = await res.text();
- const $ = cheerio.load(html);
- const events: EventInput[] = [];
+const SUB_VENUES = ["east", "west", "crest", "nest"];
+const BASE = "https://shibuya-o.com";
- $(".schedule_list li, .c-schedule__item, .event-item").each((_, el) => {
- const $el = $(el);
+async function scrapeVenue(subVenue: string): Promise<EventInput[]> {
+ const url = `${BASE}/${subVenue}/schedule/`;
+ const res = await fetch(url);
+ if (!res.ok) throw new Error(`HTTP ${res.status} for ${url}`);
+ const $ = cheerio.load(await res.text());
+ const events: EventInput[] = [];
- const title = $el.find(".schedule_title, .title, h3").first().text().trim();
- if (!title) return;
+ // Extract year: try "next" nav link (?y=YYYY&m=MM)
+ const nextHref = $("a[href*='?y='][href*='&m=']").last().attr("href") ?? "";
+ const nextYearMatch = nextHref.match(/[?&]y=(\d{4})/);
+ const nextMonthMatch = nextHref.match(/[?&]m=(\d{1,2})/);
+ const currentMonthRaw = $("div.p-schedule__month").first().text().trim();
+ const currentMonth = parseInt(currentMonthRaw, 10);
- const rawDate =
- $el.find(".schedule_date, .date, time").first().text().trim() ||
- $el.find("time").attr("datetime") ||
- "";
- const date = parseJapaneseDate(rawDate);
- if (!date) return;
+ let year = new Date().getFullYear();
+ if (nextYearMatch && nextMonthMatch) {
+ const nextYear = parseInt(nextYearMatch[1], 10);
+ const nextMonth = parseInt(nextMonthMatch[1], 10);
+ // If next month == current month + 1 (normal case), year == nextYear
+ // If current month == 12 and next month == 1, year == nextYear - 1
+ year = nextMonth === currentMonth + 1 ? nextYear : nextYear - 1;
+ }
- const hall = $el.find(".schedule_hall, .hall, .venue-name").first().text().trim() || null;
- const timeText = $el.find(".schedule_time, .time").first().text();
- const openMatch = timeText.match(/OPEN[:: ]*(\d{2}:\d{2})/i);
- const startMatch = timeText.match(/START[:: ]*(\d{2}:\d{2})/i);
+ $("div.p-scheduled-card").each((_, el) => {
+ const $el = $(el);
- const detailHref = $el.find("a[href]").first().attr("href") ?? null;
+ const dateRaw = $el.find("span.p-scheduled-card__date-item").first().text().trim();
+ // "05 / 01" → month=5, day=1
+ const dateMatch = dateRaw.match(/(\d{1,2})\s*\/\s*(\d{1,2})/);
+ if (!dateMatch) return;
+ const month = parseInt(dateMatch[1], 10);
+ const day = parseInt(dateMatch[2], 10);
+ if (!currentMonth || !month) return;
+ // Handle year rollover (December cards on January page, etc.)
+ const cardYear = month < currentMonth ? year + 1 : year;
+ const date = `${cardYear}-${String(month).padStart(2, "0")}-${String(day).padStart(2, "0")}`;
- events.push({
- venue_id: venue.id,
- title,
- artist: hall,
- date,
- open_time: openMatch?.[1] ?? null,
- start_time: startMatch?.[1] ?? null,
- ticket_url:
- $el.find("a[href*='eplus'], a[href*='lawson'], a[href*='ticket']").first().attr("href") ?? null,
- image_url: $el.find("img").first().attr("src")
- ? absoluteUrl($el.find("img").first().attr("src")!, venue.url)
- : null,
- source_url: detailHref ? absoluteUrl(detailHref, venue.url) : null,
- });
- });
+ const title = $el.find("span.p-scheduled-card__title-main").first().text().trim();
+ if (!title) return;
- return events;
- },
-};
+ const openText = $el.find("span.p-scheduled-card__date-open").first().text().trim();
+ const openMatch = openText.match(/OPEN\s*(\d{2}:\d{2})/i);
+ const startMatch = openText.match(/START\s*(\d{2}:\d{2})/i);
-function parseJapaneseDate(raw: string): string | null {
- const m =
- raw.match(/(\d{4})[./年](\d{1,2})[./月](\d{1,2})/) ||
- raw.match(/(\d{1,2})[./月](\d{1,2})/);
- if (!m) return null;
- if (m.length === 4) {
- return `${m[1]}-${m[2].padStart(2, "0")}-${m[3].padStart(2, "0")}`;
- }
- const year = new Date().getFullYear();
- return `${year}-${m[1].padStart(2, "0")}-${m[2].padStart(2, "0")}`;
-}
+ const artists = $el.find("li.p-scheduled-card__artist-item")
+ .map((_, s) => $(s).text().trim()).get().join("、") || null;
+
+ const detailHref = $el.closest("a[href]").attr("href") ??
+ $el.find("a[href]").first().attr("href") ?? null;
+ const sourceUrl = detailHref
+ ? (detailHref.startsWith("http") ? detailHref : `${BASE}${detailHref}`)
+ : null;
-function absoluteUrl(url: string, base: string): string {
- if (url.startsWith("http")) return url;
- return url.startsWith("/") ? base + url : `${base}/${url}`;
+ const imageUrl = $el.find("figure img").first().attr("src") ?? null;
+
+ events.push({
+ venue_id: venue.id,
+ title,
+ artist: artists,
+ date,
+ open_time: openMatch?.[1] ?? null,
+ start_time: startMatch?.[1] ?? null,
+ image_url: imageUrl,
+ source_url: sourceUrl,
+ });
+ });
+
+ return events;
}
+
+export const scraper: Scraper = {
+ venue,
+ async scrape(): Promise<EventInput[]> {
+ const results = await Promise.allSettled(SUB_VENUES.map(scrapeVenue));
+ const all: EventInput[] = [];
+ for (const r of results) {
+ if (r.status === "fulfilled") all.push(...r.value);
+ }
+ // Deduplicate by date + title
+ const seen = new Set<string>();
+ return all.filter((e) => {
+ const key = `${e.date}|${e.title}`;
+ if (seen.has(key)) return false;
+ seen.add(key);
+ return true;
+ });
+ },
+};
diff --git a/app/scrapers/warp-kichijoji.ts b/app/scrapers/warp-kichijoji.ts
index 8a828ea..8929fef 100644
--- a/app/scrapers/warp-kichijoji.ts
+++ b/app/scrapers/warp-kichijoji.ts
@@ -76,10 +76,28 @@ export const scraper: Scraper = {
? rawImg.replace(/^https?:\/\/sp-ao\.shortpixel\.ai\/client\/[^/]+\//, "")
: null;
+ // Artists in <div class="w-flyer"> separated by <br>
+ // notes-wrapper and detail-texts are nested inside w-flyer — clone and strip them
+ const $wFlyer = $el.find("div.w-flyer").first().clone();
+ $wFlyer.find("section.notes-wrapper, div.detail-texts").remove();
+ $wFlyer.find("br").replaceWith("\n");
+ const rawArtist = $wFlyer.text();
+ const artistLines: string[] = [];
+ for (const raw of rawArtist.split("\n")) {
+ const l = raw.trim();
+ if (!l) {
+ if (artistLines.length > 0) break; // stop at first blank line after artists
+ continue;
+ }
+ if (/^[■▼◼▶◆]|チケット|ticket|TICKET|予約|http|\d{1,2}:\d{2}|[¥¥]/i.test(l)) break;
+ artistLines.push(l);
+ }
+ const artist = artistLines.length > 0 ? artistLines.join(" / ") : null;
+
events.push({
venue_id: venue.id,
title,
- artist: null,
+ artist,
date,
open_time: isTime(openTime) ? openTime : null,
start_time: isTime(startTime) ? startTime : null,