From 4c39c1b30a206d9c87c8f65581e93ee3c84328ad Mon Sep 17 00:00:00 2001 From: yyamashita Date: Thu, 14 May 2026 23:07:34 +0900 Subject: =?UTF-8?q?Add=20=E6=9D=B1=E9=AB=98=E5=86=86=E5=AF=BA=E4=BA=8C?= =?UTF-8?q?=E4=B8=87=E9=9B=BB=E5=9C=A7=20(den-atsu)=20scraper?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Sonnet 4.6 --- app/scrapers/den-atsu.ts | 149 +++++++++++++++++++++++++++++++++++++++++++++++ app/scrapers/index.ts | 2 + 2 files changed, 151 insertions(+) create mode 100644 app/scrapers/den-atsu.ts (limited to 'app') diff --git a/app/scrapers/den-atsu.ts b/app/scrapers/den-atsu.ts new file mode 100644 index 0000000..28980d1 --- /dev/null +++ b/app/scrapers/den-atsu.ts @@ -0,0 +1,149 @@ +/** + * 東高円寺二万電圧 — https://den-atsu.com + * + * WordPress カスタムテーマ。月別スケジュールページ構造: + *

■YYYY.M/D(day)

← 日付マーカー + *

タイトル

← タイトル (複数行あり) + *

アーティスト名

← 出演者 (複数行あり) + *

open.HH:MM start.HH:MM\nadv.Nyen door.Nyen\nチケット

+ *

 

← イベント区切り + */ +import * as cheerio from "cheerio"; +import type { Scraper, VenueMeta } from "./base"; +import type { EventInput } from "~/lib/db.server"; + +export const venue: VenueMeta = { + id: "den-atsu", + name: "東高円寺二万電圧", + url: "https://den-atsu.com", + area: "東高円寺", + capacity: 130, +}; + +const TICKET_LINK_SELECTOR = + 'a[href*="eplus"], a[href*="livepocket"], a[href*="tiget"], a[href*="pia.jp"], a[href*="ticket"]'; + +function parseHtml(html: string, year: number, month: number): EventInput[] { + const $ = cheerio.load(html); + const events: EventInput[] = []; + const sourceUrl = `https://den-atsu.com/schedule/${year}-${month}-schedule/`; + + // Collect all

elements under the content section + const paras = $("div.inner p").toArray(); + + let i = 0; + while (i < paras.length) { + const $p = $(paras[i]); + const text = $p.text().trim(); + + // Date marker: ■YYYY.M/D(day) + const dateMatch = text.match(/^■(\d{4})\.(\d{1,2})\/(\d{1,2})/); + if (!dateMatch) { + i++; + continue; + } + const date = `${dateMatch[1]}-${dateMatch[2].padStart(2, "0")}-${dateMatch[3].padStart(2, "0")}`; + i++; + + // Title: consecutive p.p1 containing red-colored spans + const titleParts: string[] = []; + while (i < paras.length) { + const $cur = $(paras[i]); + if ($cur.find("span[style*='color']").length === 0) break; + const part = $cur.text().trim(); + if (part) titleParts.push(part); + i++; + } + const title = titleParts.join(" ").trim(); + if (!title) continue; + + // Artists: p.p1 or plain p without red spans, not time/price lines + const artistParts: string[] = []; + while (i < paras.length) { + const $cur = $(paras[i]); + const t = $cur.text().trim(); + if (!t || t === " ") { i++; break; } // blank separator → done + if (t.match(/^■\d{4}/)) break; // next event + if ($cur.find("span[style*='color']").length > 0) break; + if (t.match(/^open\./i) || t.match(/^adv\./i)) break; + artistParts.push(t); + i++; + } + const artist = artistParts.join("、").trim() || null; + + // Info line: open/start times, adv/door prices, ticket link + let openTime: string | null = null; + let startTime: string | null = null; + let price: string | null = null; + let ticketUrl: string | null = null; + + while (i < paras.length) { + const $cur = $(paras[i]); + const t = $cur.text().trim(); + if (!t || t === " ") { i++; break; } + if (t.match(/^■\d{4}/)) break; + + const openMatch = t.match(/open\.(\d{1,2}:\d{2})/i); + const startMatch = t.match(/start\.(\d{1,2}:\d{2})/i); + const advMatch = t.match(/adv\.([\d,]+)yen/i); + const doorMatch = t.match(/door\.([\d,]+)yen/i); + + if (openMatch) openTime = openMatch[1]; + if (startMatch) startTime = startMatch[1]; + + if (advMatch && doorMatch) { + price = `前売 ¥${advMatch[1]} / 当日 ¥${doorMatch[1]}`; + } else if (advMatch) { + price = `前売 ¥${advMatch[1]}`; + } + + if (!ticketUrl) { + ticketUrl = $cur.find(TICKET_LINK_SELECTOR).first().attr("href") ?? null; + } + i++; + } + + events.push({ + venue_id: venue.id, + title, + artist, + date, + open_time: openTime, + start_time: startTime, + price, + ticket_url: ticketUrl, + image_url: null, + source_url: sourceUrl, + }); + } + + return events; +} + +export const scraper: Scraper = { + venue, + async scrape(): Promise { + const now = new Date(); + const targets = [0, 1, 2].map((offset) => { + const d = new Date(now.getFullYear(), now.getMonth() + offset, 1); + return { year: d.getFullYear(), month: d.getMonth() + 1 }; + }); + + const results = await Promise.all( + targets.map(async ({ year, month }) => { + const url = `https://den-atsu.com/schedule/${year}-${month}-schedule/`; + const res = await fetch(url); + if (!res.ok) return []; + return parseHtml(await res.text(), year, month); + }) + ); + + const seen = new Set(); + return results.flat().filter((e) => { + const key = `${e.date}|${e.title}`; + if (seen.has(key)) return false; + seen.add(key); + return true; + }); + }, +}; diff --git a/app/scrapers/index.ts b/app/scrapers/index.ts index e812626..d765248 100644 --- a/app/scrapers/index.ts +++ b/app/scrapers/index.ts @@ -21,6 +21,7 @@ import { scraper as pitbarNishiogikubo } from "./pitbar-nishiogikubo"; import { scraper as naveyFloor } from "./navey-floor"; import { scraper as shimokitazawaEra } from "./shimokitazawa-era"; import { scraper as duoMusicExchange } from "./duo-music-exchange"; +import { scraper as denAtsu } from "./den-atsu"; export const ALL_SCRAPERS: Scraper[] = [ liquidRoom, @@ -41,6 +42,7 @@ export const ALL_SCRAPERS: Scraper[] = [ naveyFloor, shimokitazawaEra, duoMusicExchange, + denAtsu, ]; export type { Scraper } from "./base"; -- cgit v1.2.3