diff options
Diffstat (limited to 'app/scrapers/den-atsu.ts')
| -rw-r--r-- | app/scrapers/den-atsu.ts | 149 |
1 files changed, 149 insertions, 0 deletions
diff --git a/app/scrapers/den-atsu.ts b/app/scrapers/den-atsu.ts new file mode 100644 index 0000000..28980d1 --- /dev/null +++ b/app/scrapers/den-atsu.ts @@ -0,0 +1,149 @@ +/** + * 東高円寺二万電圧 — https://den-atsu.com + * + * WordPress カスタムテーマ。月別スケジュールページ構造: + * <p class="p1">■YYYY.M/D(day)</p> ← 日付マーカー + * <p class="p1"><span style="color:red">タイトル</span></p> ← タイトル (複数行あり) + * <p class="p1">アーティスト名</p> ← 出演者 (複数行あり) + * <p class="p1">open.HH:MM start.HH:MM\nadv.Nyen door.Nyen\n<a>チケット</a></p> + * <p> </p> ← イベント区切り + */ +import * as cheerio from "cheerio"; +import type { Scraper, VenueMeta } from "./base"; +import type { EventInput } from "~/lib/db.server"; + +export const venue: VenueMeta = { + id: "den-atsu", + name: "東高円寺二万電圧", + url: "https://den-atsu.com", + area: "東高円寺", + capacity: 130, +}; + +const TICKET_LINK_SELECTOR = + 'a[href*="eplus"], a[href*="livepocket"], a[href*="tiget"], a[href*="pia.jp"], a[href*="ticket"]'; + +function parseHtml(html: string, year: number, month: number): EventInput[] { + const $ = cheerio.load(html); + const events: EventInput[] = []; + const sourceUrl = `https://den-atsu.com/schedule/${year}-${month}-schedule/`; + + // Collect all <p> elements under the content section + const paras = $("div.inner p").toArray(); + + let i = 0; + while (i < paras.length) { + const $p = $(paras[i]); + const text = $p.text().trim(); + + // Date marker: ■YYYY.M/D(day) + const dateMatch = text.match(/^■(\d{4})\.(\d{1,2})\/(\d{1,2})/); + if (!dateMatch) { + i++; + continue; + } + const date = `${dateMatch[1]}-${dateMatch[2].padStart(2, "0")}-${dateMatch[3].padStart(2, "0")}`; + i++; + + // Title: consecutive p.p1 containing red-colored spans + const titleParts: string[] = []; + while (i < paras.length) { + const $cur = $(paras[i]); + if ($cur.find("span[style*='color']").length === 0) break; + const part = $cur.text().trim(); + if (part) titleParts.push(part); + i++; + } + const title = titleParts.join(" ").trim(); + if (!title) continue; + + // Artists: p.p1 or plain p without red spans, not time/price lines + const artistParts: string[] = []; + while (i < paras.length) { + const $cur = $(paras[i]); + const t = $cur.text().trim(); + if (!t || t === " ") { i++; break; } // blank separator → done + if (t.match(/^■\d{4}/)) break; // next event + if ($cur.find("span[style*='color']").length > 0) break; + if (t.match(/^open\./i) || t.match(/^adv\./i)) break; + artistParts.push(t); + i++; + } + const artist = artistParts.join("、").trim() || null; + + // Info line: open/start times, adv/door prices, ticket link + let openTime: string | null = null; + let startTime: string | null = null; + let price: string | null = null; + let ticketUrl: string | null = null; + + while (i < paras.length) { + const $cur = $(paras[i]); + const t = $cur.text().trim(); + if (!t || t === " ") { i++; break; } + if (t.match(/^■\d{4}/)) break; + + const openMatch = t.match(/open\.(\d{1,2}:\d{2})/i); + const startMatch = t.match(/start\.(\d{1,2}:\d{2})/i); + const advMatch = t.match(/adv\.([\d,]+)yen/i); + const doorMatch = t.match(/door\.([\d,]+)yen/i); + + if (openMatch) openTime = openMatch[1]; + if (startMatch) startTime = startMatch[1]; + + if (advMatch && doorMatch) { + price = `前売 ¥${advMatch[1]} / 当日 ¥${doorMatch[1]}`; + } else if (advMatch) { + price = `前売 ¥${advMatch[1]}`; + } + + if (!ticketUrl) { + ticketUrl = $cur.find(TICKET_LINK_SELECTOR).first().attr("href") ?? null; + } + i++; + } + + events.push({ + venue_id: venue.id, + title, + artist, + date, + open_time: openTime, + start_time: startTime, + price, + ticket_url: ticketUrl, + image_url: null, + source_url: sourceUrl, + }); + } + + return events; +} + +export const scraper: Scraper = { + venue, + async scrape(): Promise<EventInput[]> { + const now = new Date(); + const targets = [0, 1, 2].map((offset) => { + const d = new Date(now.getFullYear(), now.getMonth() + offset, 1); + return { year: d.getFullYear(), month: d.getMonth() + 1 }; + }); + + const results = await Promise.all( + targets.map(async ({ year, month }) => { + const url = `https://den-atsu.com/schedule/${year}-${month}-schedule/`; + const res = await fetch(url); + if (!res.ok) return []; + return parseHtml(await res.text(), year, month); + }) + ); + + const seen = new Set<string>(); + return results.flat().filter((e) => { + const key = `${e.date}|${e.title}`; + if (seen.has(key)) return false; + seen.add(key); + return true; + }); + }, +}; |
