1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
|
import * as cheerio from "cheerio";
import type { Scraper, VenueMeta } from "./base";
import type { EventInput } from "~/lib/db.server";
export const venue: VenueMeta = {
id: "liquid-room",
name: "LIQUID ROOM",
url: "https://www.liquidroom.net",
area: "恵比寿",
capacity: 1000,
};
function parseHtml(html: string): EventInput[] {
const $ = cheerio.load(html);
const events: EventInput[] = [];
$("article").each((_, el) => {
const $el = $(el);
const href = $el.find("a.s_link").attr("href") ?? "";
const dateMatch = href.match(/_(\d{4})(\d{2})(\d{2})$/);
if (!dateMatch) return;
const date = `${dateMatch[1]}-${dateMatch[2]}-${dateMatch[3]}`;
const h2 = $el.find("h2").first().text().trim();
if (!h2) return;
const subtitle = $el.find("p.subtitle").first().text().trim();
const title = subtitle || h2;
const artist = subtitle ? h2 : null;
const openTime =
$el
.find("dl")
.filter((_, dl) => $(dl).find("dt").text().includes("OPEN"))
.find("dd")
.text()
.trim()
.match(/\d{2}:\d{2}/)?.[0] ?? null;
const startTime =
$el
.find("dl")
.filter((_, dl) => $(dl).find("dt").text().includes("START"))
.find("dd")
.text()
.trim()
.match(/\d{2}:\d{2}/)?.[0] ?? null;
events.push({
venue_id: venue.id,
title,
artist,
date,
open_time: openTime,
start_time: startTime,
image_url: $el.find("div.left img").attr("src") ?? null,
source_url: href,
});
});
return events;
}
export const scraper: Scraper = {
venue,
async scrape(): Promise<EventInput[]> {
const now = new Date();
const urls = [0, 1, 2].map((offset) => {
const d = new Date(now.getFullYear(), now.getMonth() + offset, 1);
return `https://www.liquidroom.net/schedule/${d.getFullYear()}/${String(d.getMonth() + 1).padStart(2, "0")}`;
});
const htmls = await Promise.all(
urls.map((url) => fetch(url).then((r) => (r.ok ? r.text() : "")))
);
const seen = new Set<string>();
return htmls.flatMap(parseHtml).filter((e) => {
if (seen.has(e.source_url ?? e.title)) return false;
seen.add(e.source_url ?? e.title);
return true;
});
},
};
|