summaryrefslogtreecommitdiff
path: root/app/scrapers
diff options
context:
space:
mode:
Diffstat (limited to 'app/scrapers')
-rw-r--r--app/scrapers/club-quattro.ts94
-rw-r--r--app/scrapers/duo-music-exchange.ts103
-rw-r--r--app/scrapers/fad-yokohama.ts31
-rw-r--r--app/scrapers/fever-shindaita.ts12
-rw-r--r--app/scrapers/index.ts2
-rw-r--r--app/scrapers/liquid-room.ts110
-rw-r--r--app/scrapers/meets-otsuka.ts116
-rw-r--r--app/scrapers/nishieifuku-jam.ts113
-rw-r--r--app/scrapers/shibuya-o.ts45
9 files changed, 391 insertions, 235 deletions
diff --git a/app/scrapers/club-quattro.ts b/app/scrapers/club-quattro.ts
index 10b60e9..cbb898e 100644
--- a/app/scrapers/club-quattro.ts
+++ b/app/scrapers/club-quattro.ts
@@ -10,53 +10,71 @@ export const venue: VenueMeta = {
capacity: 750,
};
-export const scraper: Scraper = {
- venue,
- async scrape(): Promise<EventInput[]> {
- const res = await fetch("https://www.club-quattro.com/shibuya/schedule/");
- if (!res.ok) throw new Error(`HTTP ${res.status}`);
- const html = await res.text();
- const $ = cheerio.load(html);
- const events: EventInput[] = [];
+function parseHtml(html: string): EventInput[] {
+ const $ = cheerio.load(html);
+ const events: EventInput[] = [];
- $("li[data-event-date]").each((_, el) => {
- const $el = $(el);
+ $("li[data-event-date]").each((_, el) => {
+ const $el = $(el);
- const date = $el.attr("data-event-date") ?? "";
- if (!date) return;
+ const date = $el.attr("data-event-date") ?? "";
+ if (!date) return;
- const title = $el.find("p.txt-02").text().trim();
- if (!title) return;
+ const title = $el.find("p.txt-02").text().trim();
+ if (!title) return;
- const artist = $el.find("p.txt-01 span").text().trim() || null;
+ const artist = $el.find("p.txt-01 span").text().trim() || null;
- let openTime: string | null = null;
- let startTime: string | null = null;
- $el.find("dl.detail-list .bundle").each((_, bundle) => {
- const label = $(bundle).find("dt").text().trim();
- if (label.includes("開場") || label.includes("開演")) {
- const times = $(bundle).find("dd").text().trim().match(/\d{2}:\d{2}/g) ?? [];
- openTime = times[0] ?? null;
- startTime = times[1] ?? null;
- }
- });
+ let openTime: string | null = null;
+ let startTime: string | null = null;
+ $el.find("dl.detail-list .bundle").each((_, bundle) => {
+ const label = $(bundle).find("dt").text().trim();
+ if (label.includes("開場") || label.includes("開演")) {
+ const times = $(bundle).find("dd").text().trim().match(/\d{2}:\d{2}/g) ?? [];
+ openTime = times[0] ?? null;
+ startTime = times[1] ?? null;
+ }
+ });
- const href = $el.find("a").first().attr("href") ?? null;
- const imageSrc = $el.find(".front img").attr("src") ?? null;
+ const href = $el.find("a").first().attr("href") ?? null;
+ const imageSrc = $el.find(".front img").attr("src") ?? null;
- events.push({
- venue_id: venue.id,
- title,
- artist,
- date,
- open_time: openTime,
- start_time: startTime,
- image_url: imageSrc ? absoluteUrl(imageSrc, venue.url) : null,
- source_url: href ? absoluteUrl(href, venue.url) : null,
- });
+ events.push({
+ venue_id: venue.id,
+ title,
+ artist,
+ date,
+ open_time: openTime,
+ start_time: startTime,
+ image_url: imageSrc ? absoluteUrl(imageSrc, venue.url) : null,
+ source_url: href ? absoluteUrl(href, venue.url) : null,
});
+ });
- return events;
+ return events;
+}
+
+export const scraper: Scraper = {
+ venue,
+ async scrape(): Promise<EventInput[]> {
+ const now = new Date();
+ const urls = [0, 1, 2].map((offset) => {
+ const d = new Date(now.getFullYear(), now.getMonth() + offset, 1);
+ const ym = `${d.getFullYear()}${String(d.getMonth() + 1).padStart(2, "0")}`;
+ return `https://www.club-quattro.com/shibuya/schedule/?ym=${ym}`;
+ });
+
+ const htmls = await Promise.all(
+ urls.map((url) => fetch(url).then((r) => (r.ok ? r.text() : "")))
+ );
+
+ const seen = new Set<string>();
+ return htmls.flatMap(parseHtml).filter((e) => {
+ const key = `${e.date}|${e.title}`;
+ if (seen.has(key)) return false;
+ seen.add(key);
+ return true;
+ });
},
};
diff --git a/app/scrapers/duo-music-exchange.ts b/app/scrapers/duo-music-exchange.ts
new file mode 100644
index 0000000..57814ea
--- /dev/null
+++ b/app/scrapers/duo-music-exchange.ts
@@ -0,0 +1,103 @@
+/**
+ * duo MUSIC EXCHANGE — https://duomusicexchange.com
+ *
+ * 月別HTML: /schedule/YYYY/index_YYYY-MM.html
+ * DOM構造:
+ * <section id="daybox">
+ * <div class="date"><span class="day">01</span></div>
+ * <div class="sche-details">
+ * <span class="artist">アーティスト名</span>
+ * <span class="details-title">イベントタイトル</span>
+ * <dl class="row">
+ * <dt>OPEN/START</dt><dd>18:00 / 19:00</dd>
+ * <dt>ADV./DOOR</dt><dd>¥3,000 / ¥3,500</dd>
+ * <dt>Ticket.</dt><dd><a href="...">...</a></dd>
+ * </dl>
+ * </div>
+ * </section>
+ */
+import * as cheerio from "cheerio";
+import type { Scraper, VenueMeta } from "./base";
+import type { EventInput } from "~/lib/db.server";
+
+export const venue: VenueMeta = {
+ id: "duo-music-exchange",
+ name: "duo MUSIC EXCHANGE",
+ url: "https://duomusicexchange.com",
+ area: "渋谷",
+ capacity: 700,
+};
+
+async function scrapeMonth(year: number, month: number): Promise<EventInput[]> {
+ const mm = String(month).padStart(2, "0");
+ const url = `${venue.url}/schedule/${year}/index_${year}-${mm}.html`;
+ const res = await fetch(url);
+ if (!res.ok) return [];
+ const $ = cheerio.load(await res.text());
+ const events: EventInput[] = [];
+
+ $("section#daybox").each((_, el) => {
+ const $el = $(el);
+
+ const dayStr = $el.find(".date .day").first().text().trim();
+ const day = parseInt(dayStr, 10);
+ if (!day) return;
+ const date = `${year}-${mm}-${String(day).padStart(2, "0")}`;
+
+ const artist = $el.find(".sche-details .artist").first().text().trim() || null;
+ const title = $el.find(".sche-details .details-title").first().text().trim();
+ if (!title) return;
+
+ let openTime: string | null = null;
+ let startTime: string | null = null;
+ let price: string | null = null;
+ let ticketUrl: string | null = null;
+
+ $el.find("dl.row dt").each((_, dt) => {
+ const label = $(dt).text().trim();
+ const $dd = $(dt).next("dd");
+ if (/OPEN/i.test(label)) {
+ const times = $dd.text().trim().match(/(\d{1,2}:\d{2})/g) ?? [];
+ openTime = times[0] ?? null;
+ startTime = times[1] ?? null;
+ } else if (/ADV/i.test(label)) {
+ price = $dd.text().trim() || null;
+ } else if (/Ticket/i.test(label)) {
+ ticketUrl = $dd.find("a[href]").first().attr("href") ?? null;
+ }
+ });
+
+ const imgSrc = $el.find("img").first().attr("src") ?? null;
+ const imageUrl = imgSrc
+ ? (imgSrc.startsWith("http") ? imgSrc : `${venue.url}/schedule/${year}/${imgSrc}`)
+ : null;
+
+ events.push({
+ venue_id: venue.id,
+ title,
+ artist,
+ date,
+ open_time: openTime,
+ start_time: startTime,
+ price,
+ ticket_url: ticketUrl,
+ image_url: imageUrl,
+ source_url: url,
+ });
+ });
+
+ return events;
+}
+
+export const scraper: Scraper = {
+ venue,
+ async scrape(): Promise<EventInput[]> {
+ const now = new Date();
+ const months = [0, 1, 2].map((offset) => {
+ const d = new Date(now.getFullYear(), now.getMonth() + offset, 1);
+ return { year: d.getFullYear(), month: d.getMonth() + 1 };
+ });
+ const results = await Promise.all(months.map(({ year, month }) => scrapeMonth(year, month)));
+ return results.flat();
+ },
+};
diff --git a/app/scrapers/fad-yokohama.ts b/app/scrapers/fad-yokohama.ts
index a01ea0d..f8f7cbc 100644
--- a/app/scrapers/fad-yokohama.ts
+++ b/app/scrapers/fad-yokohama.ts
@@ -151,27 +151,18 @@ function parsePageEvents(
export const scraper: Scraper = {
venue,
async scrape(): Promise<EventInput[]> {
- const res = await fetch(SCHEDULE_URL);
- if (!res.ok) throw new Error(`HTTP ${res.status}`);
- const html = await res.text();
-
- const { year, month, nextUrl } = getMonthContext(html);
- const events = parsePageEvents(html, year, month, SCHEDULE_URL);
-
- if (nextUrl) {
- const nextRes = await fetch(nextUrl);
- if (nextRes.ok) {
- const nextHtml = await nextRes.text();
- let nextMonth = month + 1;
- let nextYear = year;
- if (nextMonth > 12) {
- nextMonth = 1;
- nextYear++;
- }
- events.push(...parsePageEvents(nextHtml, nextYear, nextMonth, nextUrl));
- }
+ const allEvents: EventInput[] = [];
+ let url: string | null = SCHEDULE_URL;
+
+ for (let page = 0; page < 3 && url; page++) {
+ const res = await fetch(url);
+ if (!res.ok) break;
+ const html = await res.text();
+ const { year, month, nextUrl } = getMonthContext(html);
+ allEvents.push(...parsePageEvents(html, year, month, url));
+ url = nextUrl;
}
- return events;
+ return allEvents;
},
};
diff --git a/app/scrapers/fever-shindaita.ts b/app/scrapers/fever-shindaita.ts
index 62c2e2c..6356343 100644
--- a/app/scrapers/fever-shindaita.ts
+++ b/app/scrapers/fever-shindaita.ts
@@ -108,11 +108,11 @@ export const scraper: Scraper = {
venue,
async scrape(): Promise<EventInput[]> {
const now = new Date();
- const thisMonth = `${now.getFullYear()}-${String(now.getMonth() + 1).padStart(2, "0")}`;
- const next = new Date(now.getFullYear(), now.getMonth() + 1, 1);
- const nextMonth = `${next.getFullYear()}-${String(next.getMonth() + 1).padStart(2, "0")}`;
-
- const [a, b] = await Promise.all([scrapeMonth(thisMonth), scrapeMonth(nextMonth)]);
- return [...a, ...b];
+ const months = [0, 1, 2].map((offset) => {
+ const d = new Date(now.getFullYear(), now.getMonth() + offset, 1);
+ return `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, "0")}`;
+ });
+ const results = await Promise.all(months.map(scrapeMonth));
+ return results.flat();
},
};
diff --git a/app/scrapers/index.ts b/app/scrapers/index.ts
index c38816f..e812626 100644
--- a/app/scrapers/index.ts
+++ b/app/scrapers/index.ts
@@ -20,6 +20,7 @@ import { scraper as warpKichijoji } from "./warp-kichijoji";
import { scraper as pitbarNishiogikubo } from "./pitbar-nishiogikubo";
import { scraper as naveyFloor } from "./navey-floor";
import { scraper as shimokitazawaEra } from "./shimokitazawa-era";
+import { scraper as duoMusicExchange } from "./duo-music-exchange";
export const ALL_SCRAPERS: Scraper[] = [
liquidRoom,
@@ -39,6 +40,7 @@ export const ALL_SCRAPERS: Scraper[] = [
pitbarNishiogikubo,
naveyFloor,
shimokitazawaEra,
+ duoMusicExchange,
];
export type { Scraper } from "./base";
diff --git a/app/scrapers/liquid-room.ts b/app/scrapers/liquid-room.ts
index 1eeade6..a1265c8 100644
--- a/app/scrapers/liquid-room.ts
+++ b/app/scrapers/liquid-room.ts
@@ -10,62 +10,76 @@ export const venue: VenueMeta = {
capacity: 1000,
};
-export const scraper: Scraper = {
- venue,
- async scrape(): Promise<EventInput[]> {
- const res = await fetch("https://www.liquidroom.net/schedule");
- if (!res.ok) throw new Error(`HTTP ${res.status}`);
- const html = await res.text();
- const $ = cheerio.load(html);
- const events: EventInput[] = [];
+function parseHtml(html: string): EventInput[] {
+ const $ = cheerio.load(html);
+ const events: EventInput[] = [];
- $("article").each((_, el) => {
- const $el = $(el);
+ $("article").each((_, el) => {
+ const $el = $(el);
- const href = $el.find("a.s_link").attr("href") ?? "";
- // Date is encoded in the URL: e.g. /schedule/eventname_20260501
- const dateMatch = href.match(/_(\d{4})(\d{2})(\d{2})$/);
- if (!dateMatch) return;
- const date = `${dateMatch[1]}-${dateMatch[2]}-${dateMatch[3]}`;
+ const href = $el.find("a.s_link").attr("href") ?? "";
+ const dateMatch = href.match(/_(\d{4})(\d{2})(\d{2})$/);
+ if (!dateMatch) return;
+ const date = `${dateMatch[1]}-${dateMatch[2]}-${dateMatch[3]}`;
- const h2 = $el.find("h2").first().text().trim();
- if (!h2) return;
+ const h2 = $el.find("h2").first().text().trim();
+ if (!h2) return;
- const subtitle = $el.find("p.subtitle").first().text().trim();
- // h2 is the artist/band name; subtitle (if present) is the event title
- const title = subtitle || h2;
- const artist = subtitle ? h2 : null;
+ const subtitle = $el.find("p.subtitle").first().text().trim();
+ const title = subtitle || h2;
+ const artist = subtitle ? h2 : null;
- const openTime =
- $el
- .find("dl")
- .filter((_, dl) => $(dl).find("dt").text().includes("OPEN"))
- .find("dd")
- .text()
- .trim()
- .match(/\d{2}:\d{2}/)?.[0] ?? null;
+ const openTime =
+ $el
+ .find("dl")
+ .filter((_, dl) => $(dl).find("dt").text().includes("OPEN"))
+ .find("dd")
+ .text()
+ .trim()
+ .match(/\d{2}:\d{2}/)?.[0] ?? null;
- const startTime =
- $el
- .find("dl")
- .filter((_, dl) => $(dl).find("dt").text().includes("START"))
- .find("dd")
- .text()
- .trim()
- .match(/\d{2}:\d{2}/)?.[0] ?? null;
+ const startTime =
+ $el
+ .find("dl")
+ .filter((_, dl) => $(dl).find("dt").text().includes("START"))
+ .find("dd")
+ .text()
+ .trim()
+ .match(/\d{2}:\d{2}/)?.[0] ?? null;
- events.push({
- venue_id: venue.id,
- title,
- artist,
- date,
- open_time: openTime,
- start_time: startTime,
- image_url: $el.find("div.left img").attr("src") ?? null,
- source_url: href,
- });
+ events.push({
+ venue_id: venue.id,
+ title,
+ artist,
+ date,
+ open_time: openTime,
+ start_time: startTime,
+ image_url: $el.find("div.left img").attr("src") ?? null,
+ source_url: href,
});
+ });
- return events;
+ return events;
+}
+
+export const scraper: Scraper = {
+ venue,
+ async scrape(): Promise<EventInput[]> {
+ const now = new Date();
+ const urls = [0, 1, 2].map((offset) => {
+ const d = new Date(now.getFullYear(), now.getMonth() + offset, 1);
+ return `https://www.liquidroom.net/schedule/${d.getFullYear()}/${String(d.getMonth() + 1).padStart(2, "0")}`;
+ });
+
+ const htmls = await Promise.all(
+ urls.map((url) => fetch(url).then((r) => (r.ok ? r.text() : "")))
+ );
+
+ const seen = new Set<string>();
+ return htmls.flatMap(parseHtml).filter((e) => {
+ if (seen.has(e.source_url ?? e.title)) return false;
+ seen.add(e.source_url ?? e.title);
+ return true;
+ });
},
};
diff --git a/app/scrapers/meets-otsuka.ts b/app/scrapers/meets-otsuka.ts
index 0b56251..0acc925 100644
--- a/app/scrapers/meets-otsuka.ts
+++ b/app/scrapers/meets-otsuka.ts
@@ -21,60 +21,76 @@ export const venue: VenueMeta = {
capacity: 100,
};
+function parseHtml(html: string): EventInput[] {
+ const $ = cheerio.load(html);
+ const events: EventInput[] = [];
+
+ $("div.blog-entry.event-wrap").each((_, el) => {
+ const $el = $(el);
+
+ const date = $el.attr("event-date") ?? "";
+ if (!date.match(/^\d{4}-\d{2}-\d{2}$/)) return;
+
+ const $link = $el.find("h2 a").first();
+ const title = $link.text().trim();
+ if (!title) return;
+
+ const detailPath = $link.attr("href") ?? null;
+ const sourceUrl = detailPath ? `${venue.url}${detailPath}` : null;
+
+ const artist = $el.find("p.act span").map((_, s) => $(s).text().trim()).get().join("、") || null;
+
+ const timeText = $el.find("p.time").first().text();
+ const openMatch = timeText.match(/OPEN\s*(\d{2}:\d{2})/i);
+ const startMatch = timeText.match(/START\s*(\d{2}:\d{2})/i);
+
+ const price = $el.find("span.ticket-price__label").first().text().trim() || null;
+
+ const bgStyle = $el.find("div.image-bg").attr("style") ?? "";
+ const imgMatch = bgStyle.match(/url\(["']?([^"')]+)["']?\)/);
+ const imageUrl = imgMatch?.[1] ?? null;
+
+ const ticketUrl =
+ $el.find("a[href*='livepocket'], a[href*='eplus'], a[href*='pia'], a[href*='ticket']")
+ .first().attr("href") ?? null;
+
+ events.push({
+ venue_id: venue.id,
+ title,
+ artist,
+ date,
+ open_time: openMatch?.[1] ?? null,
+ start_time: startMatch?.[1] ?? null,
+ price,
+ ticket_url: ticketUrl,
+ image_url: imageUrl,
+ source_url: sourceUrl,
+ });
+ });
+
+ return events;
+}
+
export const scraper: Scraper = {
venue,
async scrape(): Promise<EventInput[]> {
- const res = await fetch("https://meets.rinky.info/events");
- if (!res.ok) throw new Error(`HTTP ${res.status}`);
- const $ = cheerio.load(await res.text());
- const events: EventInput[] = [];
-
- $("div.blog-entry.event-wrap").each((_, el) => {
- const $el = $(el);
-
- const date = $el.attr("event-date") ?? "";
- if (!date.match(/^\d{4}-\d{2}-\d{2}$/)) return;
-
- const $link = $el.find("h2 a").first();
- const title = $link.text().trim();
- if (!title) return;
-
- const detailPath = $link.attr("href") ?? null;
- const sourceUrl = detailPath
- ? `${venue.url}${detailPath}`
- : null;
-
- const artist = $el.find("p.act span").map((_, s) => $(s).text().trim()).get().join("、") || null;
-
- const timeText = $el.find("p.time").first().text();
- const openMatch = timeText.match(/OPEN\s*(\d{2}:\d{2})/i);
- const startMatch = timeText.match(/START\s*(\d{2}:\d{2})/i);
-
- const price = $el.find("span.ticket-price__label").first().text().trim() || null;
-
- // background-image: url("...")
- const bgStyle = $el.find("div.image-bg").attr("style") ?? "";
- const imgMatch = bgStyle.match(/url\(["']?([^"')]+)["']?\)/);
- const imageUrl = imgMatch?.[1] ?? null;
-
- const ticketUrl =
- $el.find("a[href*='livepocket'], a[href*='eplus'], a[href*='pia'], a[href*='ticket']")
- .first().attr("href") ?? null;
-
- events.push({
- venue_id: venue.id,
- title,
- artist,
- date,
- open_time: openMatch?.[1] ?? null,
- start_time: startMatch?.[1] ?? null,
- price,
- ticket_url: ticketUrl,
- image_url: imageUrl,
- source_url: sourceUrl,
- });
+ const now = new Date();
+ const urls = [0, 1, 2].map((offset) => {
+ const d = new Date(now.getFullYear(), now.getMonth() + offset, 1);
+ const ym = `${d.getFullYear()}/${String(d.getMonth() + 1).padStart(2, "0")}`;
+ return `https://meets.rinky.info/events?date=${encodeURIComponent(ym)}`;
});
- return events;
+ const htmls = await Promise.all(
+ urls.map((url) => fetch(url).then((r) => (r.ok ? r.text() : "")))
+ );
+
+ const seen = new Set<string>();
+ return htmls.flatMap(parseHtml).filter((e) => {
+ const key = `${e.date}|${e.title}`;
+ if (seen.has(key)) return false;
+ seen.add(key);
+ return true;
+ });
},
};
diff --git a/app/scrapers/nishieifuku-jam.ts b/app/scrapers/nishieifuku-jam.ts
index 7408e02..094d5fe 100644
--- a/app/scrapers/nishieifuku-jam.ts
+++ b/app/scrapers/nishieifuku-jam.ts
@@ -20,57 +20,76 @@ export const venue: VenueMeta = {
capacity: 250,
};
+function parseHtml(html: string): EventInput[] {
+ const $ = cheerio.load(html);
+ const events: EventInput[] = [];
+
+ $("div.blog-entry.event-wrap").each((_, el) => {
+ const $el = $(el);
+
+ const date = $el.attr("event-date") ?? "";
+ if (!date.match(/^\d{4}-\d{2}-\d{2}$/)) return;
+
+ const $link = $el.find("h2 a").first();
+ const title = $link.text().trim();
+ if (!title) return;
+
+ const detailPath = $link.attr("href") ?? null;
+ const sourceUrl = detailPath ? `${venue.url}${detailPath}` : null;
+
+ const artist = $el.find("p.act span").map((_, s) => $(s).text().trim()).get().join("、") || null;
+
+ const timeText = $el.find("p.time").first().text();
+ const openMatch = timeText.match(/OPEN\s*(\d{2}:\d{2})/i);
+ const startMatch = timeText.match(/START\s*(\d{2}:\d{2})/i);
+
+ const price = $el.find("span.ticket-price__label").first().text().trim() || null;
+
+ const bgStyle = $el.find("div.image-bg").attr("style") ?? "";
+ const imgMatch = bgStyle.match(/url\(["']?([^"')]+)["']?\)/);
+ const imageUrl = imgMatch?.[1] ?? null;
+
+ const ticketUrl =
+ $el.find("a[href*='livepocket'], a[href*='eplus'], a[href*='pia'], a[href*='ticket'], a[href*='tiget']")
+ .first().attr("href") ?? null;
+
+ events.push({
+ venue_id: venue.id,
+ title,
+ artist,
+ date,
+ open_time: openMatch?.[1] ?? null,
+ start_time: startMatch?.[1] ?? null,
+ price,
+ ticket_url: ticketUrl,
+ image_url: imageUrl,
+ source_url: sourceUrl,
+ });
+ });
+
+ return events;
+}
+
export const scraper: Scraper = {
venue,
async scrape(): Promise<EventInput[]> {
- const res = await fetch("https://jam.rinky.info/events");
- if (!res.ok) throw new Error(`HTTP ${res.status}`);
- const $ = cheerio.load(await res.text());
- const events: EventInput[] = [];
-
- $("div.blog-entry.event-wrap").each((_, el) => {
- const $el = $(el);
-
- const date = $el.attr("event-date") ?? "";
- if (!date.match(/^\d{4}-\d{2}-\d{2}$/)) return;
-
- const $link = $el.find("h2 a").first();
- const title = $link.text().trim();
- if (!title) return;
-
- const detailPath = $link.attr("href") ?? null;
- const sourceUrl = detailPath ? `${venue.url}${detailPath}` : null;
-
- const artist = $el.find("p.act span").map((_, s) => $(s).text().trim()).get().join("、") || null;
-
- const timeText = $el.find("p.time").first().text();
- const openMatch = timeText.match(/OPEN\s*(\d{2}:\d{2})/i);
- const startMatch = timeText.match(/START\s*(\d{2}:\d{2})/i);
-
- const price = $el.find("span.ticket-price__label").first().text().trim() || null;
-
- const bgStyle = $el.find("div.image-bg").attr("style") ?? "";
- const imgMatch = bgStyle.match(/url\(["']?([^"')]+)["']?\)/);
- const imageUrl = imgMatch?.[1] ?? null;
-
- const ticketUrl =
- $el.find("a[href*='livepocket'], a[href*='eplus'], a[href*='pia'], a[href*='ticket'], a[href*='tiget']")
- .first().attr("href") ?? null;
-
- events.push({
- venue_id: venue.id,
- title,
- artist,
- date,
- open_time: openMatch?.[1] ?? null,
- start_time: startMatch?.[1] ?? null,
- price,
- ticket_url: ticketUrl,
- image_url: imageUrl,
- source_url: sourceUrl,
- });
+ const now = new Date();
+ const urls = [0, 1, 2].map((offset) => {
+ const d = new Date(now.getFullYear(), now.getMonth() + offset, 1);
+ const ym = `${d.getFullYear()}/${String(d.getMonth() + 1).padStart(2, "0")}`;
+ return `https://jam.rinky.info/events?date=${encodeURIComponent(ym)}`;
});
- return events;
+ const htmls = await Promise.all(
+ urls.map((url) => fetch(url).then((r) => (r.ok ? r.text() : "")))
+ );
+
+ const seen = new Set<string>();
+ return htmls.flatMap(parseHtml).filter((e) => {
+ const key = `${e.date}|${e.title}`;
+ if (seen.has(key)) return false;
+ seen.add(key);
+ return true;
+ });
},
};
diff --git a/app/scrapers/shibuya-o.ts b/app/scrapers/shibuya-o.ts
index c674cfc..6d394ff 100644
--- a/app/scrapers/shibuya-o.ts
+++ b/app/scrapers/shibuya-o.ts
@@ -26,42 +26,25 @@ export const venue: VenueMeta = {
const SUB_VENUES = ["east", "west", "crest", "nest"];
const BASE = "https://shibuya-o.com";
-async function scrapeVenue(subVenue: string): Promise<EventInput[]> {
- const url = `${BASE}/${subVenue}/schedule/`;
+async function scrapeVenueMonth(subVenue: string, year: number, month: number): Promise<EventInput[]> {
+ const url = `${BASE}/${subVenue}/schedule/?y=${year}&m=${month}`;
const res = await fetch(url);
if (!res.ok) throw new Error(`HTTP ${res.status} for ${url}`);
const $ = cheerio.load(await res.text());
const events: EventInput[] = [];
- // Extract year: try "next" nav link (?y=YYYY&m=MM)
- const nextHref = $("a[href*='?y='][href*='&m=']").last().attr("href") ?? "";
- const nextYearMatch = nextHref.match(/[?&]y=(\d{4})/);
- const nextMonthMatch = nextHref.match(/[?&]m=(\d{1,2})/);
- const currentMonthRaw = $("div.p-schedule__month").first().text().trim();
- const currentMonth = parseInt(currentMonthRaw, 10);
-
- let year = new Date().getFullYear();
- if (nextYearMatch && nextMonthMatch) {
- const nextYear = parseInt(nextYearMatch[1], 10);
- const nextMonth = parseInt(nextMonthMatch[1], 10);
- // If next month == current month + 1 (normal case), year == nextYear
- // If current month == 12 and next month == 1, year == nextYear - 1
- year = nextMonth === currentMonth + 1 ? nextYear : nextYear - 1;
- }
-
$("div.p-scheduled-card").each((_, el) => {
const $el = $(el);
const dateRaw = $el.find("span.p-scheduled-card__date-item").first().text().trim();
- // "05 / 01" → month=5, day=1
const dateMatch = dateRaw.match(/(\d{1,2})\s*\/\s*(\d{1,2})/);
if (!dateMatch) return;
- const month = parseInt(dateMatch[1], 10);
+ const cardMonth = parseInt(dateMatch[1], 10);
const day = parseInt(dateMatch[2], 10);
- if (!currentMonth || !month) return;
- // Handle year rollover (December cards on January page, etc.)
- const cardYear = month < currentMonth ? year + 1 : year;
- const date = `${cardYear}-${String(month).padStart(2, "0")}-${String(day).padStart(2, "0")}`;
+ if (!cardMonth) return;
+ // Handle year rollover (e.g. December page showing January events)
+ const cardYear = cardMonth < month ? year + 1 : year;
+ const date = `${cardYear}-${String(cardMonth).padStart(2, "0")}-${String(day).padStart(2, "0")}`;
const title = $el.find("span.p-scheduled-card__title-main").first().text().trim();
if (!title) return;
@@ -99,12 +82,22 @@ async function scrapeVenue(subVenue: string): Promise<EventInput[]> {
export const scraper: Scraper = {
venue,
async scrape(): Promise<EventInput[]> {
- const results = await Promise.allSettled(SUB_VENUES.map(scrapeVenue));
+ const now = new Date();
+ const months = [0, 1, 2].map((offset) => {
+ const d = new Date(now.getFullYear(), now.getMonth() + offset, 1);
+ return { year: d.getFullYear(), month: d.getMonth() + 1 };
+ });
+
+ const tasks = SUB_VENUES.flatMap((sub) =>
+ months.map(({ year, month }) => scrapeVenueMonth(sub, year, month))
+ );
+ const results = await Promise.allSettled(tasks);
+
const all: EventInput[] = [];
for (const r of results) {
if (r.status === "fulfilled") all.push(...r.value);
}
- // Deduplicate by date + title
+
const seen = new Set<string>();
return all.filter((e) => {
const key = `${e.date}|${e.title}`;