1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
|
/**
* duo MUSIC EXCHANGE — https://duomusicexchange.com
*
* 月別HTML: /schedule/YYYY/index_YYYY-MM.html
* DOM構造:
* <section id="daybox">
* <div class="date"><span class="day">01</span></div>
* <div class="sche-details">
* <span class="artist">アーティスト名</span>
* <span class="details-title">イベントタイトル</span>
* <dl class="row">
* <dt>OPEN/START</dt><dd>18:00 / 19:00</dd>
* <dt>ADV./DOOR</dt><dd>¥3,000 / ¥3,500</dd>
* <dt>Ticket.</dt><dd><a href="...">...</a></dd>
* </dl>
* </div>
* </section>
*/
import * as cheerio from "cheerio";
import type { Scraper, VenueMeta } from "./base";
import type { EventInput } from "~/lib/db.server";
export const venue: VenueMeta = {
id: "duo-music-exchange",
name: "duo MUSIC EXCHANGE",
url: "https://duomusicexchange.com",
area: "渋谷",
capacity: 700,
};
async function scrapeMonth(year: number, month: number): Promise<EventInput[]> {
const mm = String(month).padStart(2, "0");
const url = `${venue.url}/schedule/${year}/index_${year}-${mm}.html`;
const res = await fetch(url);
if (!res.ok) return [];
const $ = cheerio.load(await res.text());
const events: EventInput[] = [];
$("section#daybox").each((_, el) => {
const $el = $(el);
const dayStr = $el.find(".date .day").first().text().trim();
const day = parseInt(dayStr, 10);
if (!day) return;
const date = `${year}-${mm}-${String(day).padStart(2, "0")}`;
const artist = $el.find(".sche-details .artist").first().text().trim() || null;
const title = $el.find(".sche-details .details-title").first().text().trim();
if (!title) return;
let openTime: string | null = null;
let startTime: string | null = null;
let price: string | null = null;
let ticketUrl: string | null = null;
$el.find("dl.row dt").each((_, dt) => {
const label = $(dt).text().trim();
const $dd = $(dt).next("dd");
if (/OPEN/i.test(label)) {
const times = $dd.text().trim().match(/(\d{1,2}:\d{2})/g) ?? [];
openTime = times[0] ?? null;
startTime = times[1] ?? null;
} else if (/ADV/i.test(label)) {
price = $dd.text().trim() || null;
} else if (/Ticket/i.test(label)) {
ticketUrl = $dd.find("a[href]").first().attr("href") ?? null;
}
});
const imgSrc = $el.find("img").first().attr("src") ?? null;
const imageUrl = imgSrc
? (imgSrc.startsWith("http") ? imgSrc : `${venue.url}/schedule/${year}/${imgSrc}`)
: null;
events.push({
venue_id: venue.id,
title,
artist,
date,
open_time: openTime,
start_time: startTime,
price,
ticket_url: ticketUrl,
image_url: imageUrl,
source_url: url,
});
});
return events;
}
export const scraper: Scraper = {
venue,
async scrape(): Promise<EventInput[]> {
const now = new Date();
const months = [0, 1, 2].map((offset) => {
const d = new Date(now.getFullYear(), now.getMonth() + offset, 1);
return { year: d.getFullYear(), month: d.getMonth() + 1 };
});
const results = await Promise.all(months.map(({ year, month }) => scrapeMonth(year, month)));
return results.flat();
},
};
|