1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
|
import * as cheerio from "cheerio";
import type { Scraper, VenueMeta } from "./base";
import type { EventInput } from "~/lib/db.server";
export const venue: VenueMeta = {
id: "club-quattro",
name: "CLUB QUATTRO",
url: "https://www.club-quattro.com",
area: "渋谷",
capacity: 750,
};
function parseHtml(html: string): EventInput[] {
const $ = cheerio.load(html);
const events: EventInput[] = [];
$("li[data-event-date]").each((_, el) => {
const $el = $(el);
const date = $el.attr("data-event-date") ?? "";
if (!date) return;
const title = $el.find("p.txt-02").text().trim();
if (!title) return;
const artist = $el.find("p.txt-01 span").text().trim() || null;
let openTime: string | null = null;
let startTime: string | null = null;
$el.find("dl.detail-list .bundle").each((_, bundle) => {
const label = $(bundle).find("dt").text().trim();
if (label.includes("開場") || label.includes("開演")) {
const times = $(bundle).find("dd").text().trim().match(/\d{2}:\d{2}/g) ?? [];
openTime = times[0] ?? null;
startTime = times[1] ?? null;
}
});
const href = $el.find("a").first().attr("href") ?? null;
const imageSrc = $el.find(".front img").attr("src") ?? null;
events.push({
venue_id: venue.id,
title,
artist,
date,
open_time: openTime,
start_time: startTime,
image_url: imageSrc ? absoluteUrl(imageSrc, venue.url) : null,
source_url: href ? absoluteUrl(href, venue.url) : null,
});
});
return events;
}
export const scraper: Scraper = {
venue,
async scrape(): Promise<EventInput[]> {
const now = new Date();
const urls = [0, 1, 2].map((offset) => {
const d = new Date(now.getFullYear(), now.getMonth() + offset, 1);
const ym = `${d.getFullYear()}${String(d.getMonth() + 1).padStart(2, "0")}`;
return `https://www.club-quattro.com/shibuya/schedule/?ym=${ym}`;
});
const htmls = await Promise.all(
urls.map((url) => fetch(url).then((r) => (r.ok ? r.text() : "")))
);
const seen = new Set<string>();
return htmls.flatMap(parseHtml).filter((e) => {
const key = `${e.date}|${e.title}`;
if (seen.has(key)) return false;
seen.add(key);
return true;
});
},
};
function absoluteUrl(url: string, base: string): string {
if (url.startsWith("http")) return url;
return url.startsWith("/") ? base + url : `${base}/${url}`;
}
|