summaryrefslogtreecommitdiff
path: root/app/scrapers/club-quattro.ts
diff options
context:
space:
mode:
Diffstat (limited to 'app/scrapers/club-quattro.ts')
-rw-r--r--app/scrapers/club-quattro.ts94
1 files changed, 56 insertions, 38 deletions
diff --git a/app/scrapers/club-quattro.ts b/app/scrapers/club-quattro.ts
index 10b60e9..cbb898e 100644
--- a/app/scrapers/club-quattro.ts
+++ b/app/scrapers/club-quattro.ts
@@ -10,53 +10,71 @@ export const venue: VenueMeta = {
capacity: 750,
};
-export const scraper: Scraper = {
- venue,
- async scrape(): Promise<EventInput[]> {
- const res = await fetch("https://www.club-quattro.com/shibuya/schedule/");
- if (!res.ok) throw new Error(`HTTP ${res.status}`);
- const html = await res.text();
- const $ = cheerio.load(html);
- const events: EventInput[] = [];
+function parseHtml(html: string): EventInput[] {
+ const $ = cheerio.load(html);
+ const events: EventInput[] = [];
- $("li[data-event-date]").each((_, el) => {
- const $el = $(el);
+ $("li[data-event-date]").each((_, el) => {
+ const $el = $(el);
- const date = $el.attr("data-event-date") ?? "";
- if (!date) return;
+ const date = $el.attr("data-event-date") ?? "";
+ if (!date) return;
- const title = $el.find("p.txt-02").text().trim();
- if (!title) return;
+ const title = $el.find("p.txt-02").text().trim();
+ if (!title) return;
- const artist = $el.find("p.txt-01 span").text().trim() || null;
+ const artist = $el.find("p.txt-01 span").text().trim() || null;
- let openTime: string | null = null;
- let startTime: string | null = null;
- $el.find("dl.detail-list .bundle").each((_, bundle) => {
- const label = $(bundle).find("dt").text().trim();
- if (label.includes("開場") || label.includes("開演")) {
- const times = $(bundle).find("dd").text().trim().match(/\d{2}:\d{2}/g) ?? [];
- openTime = times[0] ?? null;
- startTime = times[1] ?? null;
- }
- });
+ let openTime: string | null = null;
+ let startTime: string | null = null;
+ $el.find("dl.detail-list .bundle").each((_, bundle) => {
+ const label = $(bundle).find("dt").text().trim();
+ if (label.includes("開場") || label.includes("開演")) {
+ const times = $(bundle).find("dd").text().trim().match(/\d{2}:\d{2}/g) ?? [];
+ openTime = times[0] ?? null;
+ startTime = times[1] ?? null;
+ }
+ });
- const href = $el.find("a").first().attr("href") ?? null;
- const imageSrc = $el.find(".front img").attr("src") ?? null;
+ const href = $el.find("a").first().attr("href") ?? null;
+ const imageSrc = $el.find(".front img").attr("src") ?? null;
- events.push({
- venue_id: venue.id,
- title,
- artist,
- date,
- open_time: openTime,
- start_time: startTime,
- image_url: imageSrc ? absoluteUrl(imageSrc, venue.url) : null,
- source_url: href ? absoluteUrl(href, venue.url) : null,
- });
+ events.push({
+ venue_id: venue.id,
+ title,
+ artist,
+ date,
+ open_time: openTime,
+ start_time: startTime,
+ image_url: imageSrc ? absoluteUrl(imageSrc, venue.url) : null,
+ source_url: href ? absoluteUrl(href, venue.url) : null,
});
+ });
- return events;
+ return events;
+}
+
+export const scraper: Scraper = {
+ venue,
+ async scrape(): Promise<EventInput[]> {
+ const now = new Date();
+ const urls = [0, 1, 2].map((offset) => {
+ const d = new Date(now.getFullYear(), now.getMonth() + offset, 1);
+ const ym = `${d.getFullYear()}${String(d.getMonth() + 1).padStart(2, "0")}`;
+ return `https://www.club-quattro.com/shibuya/schedule/?ym=${ym}`;
+ });
+
+ const htmls = await Promise.all(
+ urls.map((url) => fetch(url).then((r) => (r.ok ? r.text() : "")))
+ );
+
+ const seen = new Set<string>();
+ return htmls.flatMap(parseHtml).filter((e) => {
+ const key = `${e.date}|${e.title}`;
+ if (seen.has(key)) return false;
+ seen.add(key);
+ return true;
+ });
},
};