summaryrefslogtreecommitdiff
path: root/app/scrapers/den-atsu.ts
diff options
context:
space:
mode:
Diffstat (limited to 'app/scrapers/den-atsu.ts')
-rw-r--r--app/scrapers/den-atsu.ts149
1 files changed, 149 insertions, 0 deletions
diff --git a/app/scrapers/den-atsu.ts b/app/scrapers/den-atsu.ts
new file mode 100644
index 0000000..28980d1
--- /dev/null
+++ b/app/scrapers/den-atsu.ts
@@ -0,0 +1,149 @@
+/**
+ * 東高円寺二万電圧 — https://den-atsu.com
+ *
+ * WordPress カスタムテーマ。月別スケジュールページ構造:
+ * <p class="p1">■YYYY.M/D(day)</p> ← 日付マーカー
+ * <p class="p1"><span style="color:red">タイトル</span></p> ← タイトル (複数行あり)
+ * <p class="p1">アーティスト名</p> ← 出演者 (複数行あり)
+ * <p class="p1">open.HH:MM start.HH:MM\nadv.Nyen door.Nyen\n<a>チケット</a></p>
+ * <p>&nbsp;</p> ← イベント区切り
+ */
+import * as cheerio from "cheerio";
+import type { Scraper, VenueMeta } from "./base";
+import type { EventInput } from "~/lib/db.server";
+
+export const venue: VenueMeta = {
+ id: "den-atsu",
+ name: "東高円寺二万電圧",
+ url: "https://den-atsu.com",
+ area: "東高円寺",
+ capacity: 130,
+};
+
+const TICKET_LINK_SELECTOR =
+ 'a[href*="eplus"], a[href*="livepocket"], a[href*="tiget"], a[href*="pia.jp"], a[href*="ticket"]';
+
+function parseHtml(html: string, year: number, month: number): EventInput[] {
+ const $ = cheerio.load(html);
+ const events: EventInput[] = [];
+ const sourceUrl = `https://den-atsu.com/schedule/${year}-${month}-schedule/`;
+
+ // Collect all <p> elements under the content section
+ const paras = $("div.inner p").toArray();
+
+ let i = 0;
+ while (i < paras.length) {
+ const $p = $(paras[i]);
+ const text = $p.text().trim();
+
+ // Date marker: ■YYYY.M/D(day)
+ const dateMatch = text.match(/^■(\d{4})\.(\d{1,2})\/(\d{1,2})/);
+ if (!dateMatch) {
+ i++;
+ continue;
+ }
+ const date = `${dateMatch[1]}-${dateMatch[2].padStart(2, "0")}-${dateMatch[3].padStart(2, "0")}`;
+ i++;
+
+ // Title: consecutive p.p1 containing red-colored spans
+ const titleParts: string[] = [];
+ while (i < paras.length) {
+ const $cur = $(paras[i]);
+ if ($cur.find("span[style*='color']").length === 0) break;
+ const part = $cur.text().trim();
+ if (part) titleParts.push(part);
+ i++;
+ }
+ const title = titleParts.join(" ").trim();
+ if (!title) continue;
+
+ // Artists: p.p1 or plain p without red spans, not time/price lines
+ const artistParts: string[] = [];
+ while (i < paras.length) {
+ const $cur = $(paras[i]);
+ const t = $cur.text().trim();
+ if (!t || t === " ") { i++; break; } // blank separator → done
+ if (t.match(/^■\d{4}/)) break; // next event
+ if ($cur.find("span[style*='color']").length > 0) break;
+ if (t.match(/^open\./i) || t.match(/^adv\./i)) break;
+ artistParts.push(t);
+ i++;
+ }
+ const artist = artistParts.join("、").trim() || null;
+
+ // Info line: open/start times, adv/door prices, ticket link
+ let openTime: string | null = null;
+ let startTime: string | null = null;
+ let price: string | null = null;
+ let ticketUrl: string | null = null;
+
+ while (i < paras.length) {
+ const $cur = $(paras[i]);
+ const t = $cur.text().trim();
+ if (!t || t === " ") { i++; break; }
+ if (t.match(/^■\d{4}/)) break;
+
+ const openMatch = t.match(/open\.(\d{1,2}:\d{2})/i);
+ const startMatch = t.match(/start\.(\d{1,2}:\d{2})/i);
+ const advMatch = t.match(/adv\.([\d,]+)yen/i);
+ const doorMatch = t.match(/door\.([\d,]+)yen/i);
+
+ if (openMatch) openTime = openMatch[1];
+ if (startMatch) startTime = startMatch[1];
+
+ if (advMatch && doorMatch) {
+ price = `前売 ¥${advMatch[1]} / 当日 ¥${doorMatch[1]}`;
+ } else if (advMatch) {
+ price = `前売 ¥${advMatch[1]}`;
+ }
+
+ if (!ticketUrl) {
+ ticketUrl = $cur.find(TICKET_LINK_SELECTOR).first().attr("href") ?? null;
+ }
+ i++;
+ }
+
+ events.push({
+ venue_id: venue.id,
+ title,
+ artist,
+ date,
+ open_time: openTime,
+ start_time: startTime,
+ price,
+ ticket_url: ticketUrl,
+ image_url: null,
+ source_url: sourceUrl,
+ });
+ }
+
+ return events;
+}
+
+export const scraper: Scraper = {
+ venue,
+ async scrape(): Promise<EventInput[]> {
+ const now = new Date();
+ const targets = [0, 1, 2].map((offset) => {
+ const d = new Date(now.getFullYear(), now.getMonth() + offset, 1);
+ return { year: d.getFullYear(), month: d.getMonth() + 1 };
+ });
+
+ const results = await Promise.all(
+ targets.map(async ({ year, month }) => {
+ const url = `https://den-atsu.com/schedule/${year}-${month}-schedule/`;
+ const res = await fetch(url);
+ if (!res.ok) return [];
+ return parseHtml(await res.text(), year, month);
+ })
+ );
+
+ const seen = new Set<string>();
+ return results.flat().filter((e) => {
+ const key = `${e.date}|${e.title}`;
+ if (seen.has(key)) return false;
+ seen.add(key);
+ return true;
+ });
+ },
+};