diff options
Diffstat (limited to 'app/scrapers/mod-shibasaki.ts')
| -rw-r--r-- | app/scrapers/mod-shibasaki.ts | 124 |
1 files changed, 124 insertions, 0 deletions
diff --git a/app/scrapers/mod-shibasaki.ts b/app/scrapers/mod-shibasaki.ts new file mode 100644 index 0000000..0e2a96b --- /dev/null +++ b/app/scrapers/mod-shibasaki.ts @@ -0,0 +1,124 @@ +/** + * shibasaki mod (調布市柴崎) — https://shibasakimod.com/schedule + * + * Squarespace イベントリスト。静的 HTML として配信される。 + * 一覧ページに出演者情報はないため、各イベントの詳細ページを並列取得する。 + * DOM 構造 (一覧): + * <article class="eventlist-event eventlist-event--upcoming"> + * <time class="event-date" datetime="YYYY-MM-DD"> + * <h1 class="eventlist-title"><a class="eventlist-title-link" href="/schedule/YYYYMMDD"> + * DOM 構造 (詳細): + * <div class="sqs-html-content"><p style="white-space:pre-wrap;"> + * ... + * live:\nアーティスト名\n... (または 出演:) + */ +import * as cheerio from "cheerio"; +import type { Scraper, VenueMeta } from "./base"; +import type { EventInput } from "~/lib/db.server"; + +export const venue: VenueMeta = { + id: "mod-shibasaki", + name: "shibasaki mod", + url: "https://shibasakimod.com", + area: "柴崎", +}; + +const SCHEDULE_URL = "https://shibasakimod.com/schedule"; + +interface EventStub { + date: string; + title: string; + startTime: string | null; + sourceUrl: string | null; + imageUrl: string | null; +} + +async function fetchArtist(url: string): Promise<string | null> { + try { + const res = await fetch(url); + if (!res.ok) return null; + const $ = cheerio.load(await res.text()); + // Insert newlines at block boundaries before extracting text + $(".sqs-html-content br").replaceWith("\n"); + $(".sqs-html-content p").each((_, el) => { + $(el).append("\n"); + }); + const text = $(".sqs-html-content").text(); + const extractArtists = (section: string): string[] => { + const artists: string[] = []; + for (const raw of section.split(/\n/)) { + const l = raw.replace(/\s*@\S+/g, "").replace(/^[・•]\s*/, "").trim(); + if (!l) continue; + if (/^https?:\/\//i.test(l)) continue; // skip social links between artists + if (/\d{1,2}:\d{2}|[¥¥]|yen|ticket|チケット|予約|adv|door/i.test(l)) break; + artists.push(l); + } + return artists; + }; + + const match = text.match(/(?:live|出演|act)[::]\s*([\s\S]+)/i); + if (match) { + const artists = extractArtists(match[1]); + if (artists.length > 0) return artists.join(" / "); + } + // Fallback: find the first line starting with ・ and extract from there + const lines = text.split(/\n/); + const firstBullet = lines.findIndex((l) => /^・/.test(l.trim())); + if (firstBullet >= 0) { + const artists = extractArtists(lines.slice(firstBullet).join("\n")); + if (artists.length > 0) return artists.join(" / "); + } + return null; + } catch { + return null; + } +} + +export const scraper: Scraper = { + venue, + async scrape(): Promise<EventInput[]> { + const res = await fetch(SCHEDULE_URL); + if (!res.ok) throw new Error(`HTTP ${res.status}`); + const $ = cheerio.load(await res.text()); + const stubs: EventStub[] = []; + + $("article.eventlist-event--upcoming").each((_, el) => { + const $el = $(el); + + const date = $el.find("time.event-date").first().attr("datetime") ?? ""; + if (!date.match(/^\d{4}-\d{2}-\d{2}$/)) return; + + const title = $el.find("h1.eventlist-title a, h2.eventlist-title a").first().text().trim(); + if (!title) return; + + const startTime = $el.find("time.event-time-localized-start").first().text().trim() || null; + + const relHref = $el.find("a.eventlist-title-link").first().attr("href") ?? null; + const sourceUrl = relHref + ? (relHref.startsWith("http") ? relHref : `${venue.url}${relHref}`) + : null; + + const imageUrl = + $el.find("img[data-src]").first().attr("data-src") ?? + $el.find("img[src]").first().attr("src") ?? null; + + stubs.push({ date, title, startTime, sourceUrl, imageUrl }); + }); + + const artists = await Promise.all( + stubs.map((s) => (s.sourceUrl ? fetchArtist(s.sourceUrl) : Promise.resolve(null))) + ); + + return stubs.map((s, i) => ({ + venue_id: venue.id, + title: s.title, + date: s.date, + open_time: null, + start_time: s.startTime, + price: null, + image_url: s.imageUrl, + source_url: s.sourceUrl, + artist: artists[i], + })); + }, +}; |
