diff options
Diffstat (limited to 'app/scrapers/club-quattro.ts')
| -rw-r--r-- | app/scrapers/club-quattro.ts | 94 |
1 files changed, 56 insertions, 38 deletions
diff --git a/app/scrapers/club-quattro.ts b/app/scrapers/club-quattro.ts index 10b60e9..cbb898e 100644 --- a/app/scrapers/club-quattro.ts +++ b/app/scrapers/club-quattro.ts @@ -10,53 +10,71 @@ export const venue: VenueMeta = { capacity: 750, }; -export const scraper: Scraper = { - venue, - async scrape(): Promise<EventInput[]> { - const res = await fetch("https://www.club-quattro.com/shibuya/schedule/"); - if (!res.ok) throw new Error(`HTTP ${res.status}`); - const html = await res.text(); - const $ = cheerio.load(html); - const events: EventInput[] = []; +function parseHtml(html: string): EventInput[] { + const $ = cheerio.load(html); + const events: EventInput[] = []; - $("li[data-event-date]").each((_, el) => { - const $el = $(el); + $("li[data-event-date]").each((_, el) => { + const $el = $(el); - const date = $el.attr("data-event-date") ?? ""; - if (!date) return; + const date = $el.attr("data-event-date") ?? ""; + if (!date) return; - const title = $el.find("p.txt-02").text().trim(); - if (!title) return; + const title = $el.find("p.txt-02").text().trim(); + if (!title) return; - const artist = $el.find("p.txt-01 span").text().trim() || null; + const artist = $el.find("p.txt-01 span").text().trim() || null; - let openTime: string | null = null; - let startTime: string | null = null; - $el.find("dl.detail-list .bundle").each((_, bundle) => { - const label = $(bundle).find("dt").text().trim(); - if (label.includes("開場") || label.includes("開演")) { - const times = $(bundle).find("dd").text().trim().match(/\d{2}:\d{2}/g) ?? []; - openTime = times[0] ?? null; - startTime = times[1] ?? null; - } - }); + let openTime: string | null = null; + let startTime: string | null = null; + $el.find("dl.detail-list .bundle").each((_, bundle) => { + const label = $(bundle).find("dt").text().trim(); + if (label.includes("開場") || label.includes("開演")) { + const times = $(bundle).find("dd").text().trim().match(/\d{2}:\d{2}/g) ?? []; + openTime = times[0] ?? null; + startTime = times[1] ?? null; + } + }); - const href = $el.find("a").first().attr("href") ?? null; - const imageSrc = $el.find(".front img").attr("src") ?? null; + const href = $el.find("a").first().attr("href") ?? null; + const imageSrc = $el.find(".front img").attr("src") ?? null; - events.push({ - venue_id: venue.id, - title, - artist, - date, - open_time: openTime, - start_time: startTime, - image_url: imageSrc ? absoluteUrl(imageSrc, venue.url) : null, - source_url: href ? absoluteUrl(href, venue.url) : null, - }); + events.push({ + venue_id: venue.id, + title, + artist, + date, + open_time: openTime, + start_time: startTime, + image_url: imageSrc ? absoluteUrl(imageSrc, venue.url) : null, + source_url: href ? absoluteUrl(href, venue.url) : null, }); + }); - return events; + return events; +} + +export const scraper: Scraper = { + venue, + async scrape(): Promise<EventInput[]> { + const now = new Date(); + const urls = [0, 1, 2].map((offset) => { + const d = new Date(now.getFullYear(), now.getMonth() + offset, 1); + const ym = `${d.getFullYear()}${String(d.getMonth() + 1).padStart(2, "0")}`; + return `https://www.club-quattro.com/shibuya/schedule/?ym=${ym}`; + }); + + const htmls = await Promise.all( + urls.map((url) => fetch(url).then((r) => (r.ok ? r.text() : ""))) + ); + + const seen = new Set<string>(); + return htmls.flatMap(parseHtml).filter((e) => { + const key = `${e.date}|${e.title}`; + if (seen.has(key)) return false; + seen.add(key); + return true; + }); }, }; |
