summaryrefslogtreecommitdiff
path: root/app/scrapers/meets-otsuka.ts
diff options
context:
space:
mode:
Diffstat (limited to 'app/scrapers/meets-otsuka.ts')
-rw-r--r--app/scrapers/meets-otsuka.ts116
1 files changed, 66 insertions, 50 deletions
diff --git a/app/scrapers/meets-otsuka.ts b/app/scrapers/meets-otsuka.ts
index 0b56251..0acc925 100644
--- a/app/scrapers/meets-otsuka.ts
+++ b/app/scrapers/meets-otsuka.ts
@@ -21,60 +21,76 @@ export const venue: VenueMeta = {
capacity: 100,
};
+function parseHtml(html: string): EventInput[] {
+ const $ = cheerio.load(html);
+ const events: EventInput[] = [];
+
+ $("div.blog-entry.event-wrap").each((_, el) => {
+ const $el = $(el);
+
+ const date = $el.attr("event-date") ?? "";
+ if (!date.match(/^\d{4}-\d{2}-\d{2}$/)) return;
+
+ const $link = $el.find("h2 a").first();
+ const title = $link.text().trim();
+ if (!title) return;
+
+ const detailPath = $link.attr("href") ?? null;
+ const sourceUrl = detailPath ? `${venue.url}${detailPath}` : null;
+
+ const artist = $el.find("p.act span").map((_, s) => $(s).text().trim()).get().join("、") || null;
+
+ const timeText = $el.find("p.time").first().text();
+ const openMatch = timeText.match(/OPEN\s*(\d{2}:\d{2})/i);
+ const startMatch = timeText.match(/START\s*(\d{2}:\d{2})/i);
+
+ const price = $el.find("span.ticket-price__label").first().text().trim() || null;
+
+ const bgStyle = $el.find("div.image-bg").attr("style") ?? "";
+ const imgMatch = bgStyle.match(/url\(["']?([^"')]+)["']?\)/);
+ const imageUrl = imgMatch?.[1] ?? null;
+
+ const ticketUrl =
+ $el.find("a[href*='livepocket'], a[href*='eplus'], a[href*='pia'], a[href*='ticket']")
+ .first().attr("href") ?? null;
+
+ events.push({
+ venue_id: venue.id,
+ title,
+ artist,
+ date,
+ open_time: openMatch?.[1] ?? null,
+ start_time: startMatch?.[1] ?? null,
+ price,
+ ticket_url: ticketUrl,
+ image_url: imageUrl,
+ source_url: sourceUrl,
+ });
+ });
+
+ return events;
+}
+
export const scraper: Scraper = {
venue,
async scrape(): Promise<EventInput[]> {
- const res = await fetch("https://meets.rinky.info/events");
- if (!res.ok) throw new Error(`HTTP ${res.status}`);
- const $ = cheerio.load(await res.text());
- const events: EventInput[] = [];
-
- $("div.blog-entry.event-wrap").each((_, el) => {
- const $el = $(el);
-
- const date = $el.attr("event-date") ?? "";
- if (!date.match(/^\d{4}-\d{2}-\d{2}$/)) return;
-
- const $link = $el.find("h2 a").first();
- const title = $link.text().trim();
- if (!title) return;
-
- const detailPath = $link.attr("href") ?? null;
- const sourceUrl = detailPath
- ? `${venue.url}${detailPath}`
- : null;
-
- const artist = $el.find("p.act span").map((_, s) => $(s).text().trim()).get().join("、") || null;
-
- const timeText = $el.find("p.time").first().text();
- const openMatch = timeText.match(/OPEN\s*(\d{2}:\d{2})/i);
- const startMatch = timeText.match(/START\s*(\d{2}:\d{2})/i);
-
- const price = $el.find("span.ticket-price__label").first().text().trim() || null;
-
- // background-image: url("...")
- const bgStyle = $el.find("div.image-bg").attr("style") ?? "";
- const imgMatch = bgStyle.match(/url\(["']?([^"')]+)["']?\)/);
- const imageUrl = imgMatch?.[1] ?? null;
-
- const ticketUrl =
- $el.find("a[href*='livepocket'], a[href*='eplus'], a[href*='pia'], a[href*='ticket']")
- .first().attr("href") ?? null;
-
- events.push({
- venue_id: venue.id,
- title,
- artist,
- date,
- open_time: openMatch?.[1] ?? null,
- start_time: startMatch?.[1] ?? null,
- price,
- ticket_url: ticketUrl,
- image_url: imageUrl,
- source_url: sourceUrl,
- });
+ const now = new Date();
+ const urls = [0, 1, 2].map((offset) => {
+ const d = new Date(now.getFullYear(), now.getMonth() + offset, 1);
+ const ym = `${d.getFullYear()}/${String(d.getMonth() + 1).padStart(2, "0")}`;
+ return `https://meets.rinky.info/events?date=${encodeURIComponent(ym)}`;
});
- return events;
+ const htmls = await Promise.all(
+ urls.map((url) => fetch(url).then((r) => (r.ok ? r.text() : "")))
+ );
+
+ const seen = new Set<string>();
+ return htmls.flatMap(parseHtml).filter((e) => {
+ const key = `${e.date}|${e.title}`;
+ if (seen.has(key)) return false;
+ seen.add(key);
+ return true;
+ });
},
};