diff options
Diffstat (limited to 'app/scrapers/moon-step-nakano.ts')
| -rw-r--r-- | app/scrapers/moon-step-nakano.ts | 85 |
1 files changed, 85 insertions, 0 deletions
diff --git a/app/scrapers/moon-step-nakano.ts b/app/scrapers/moon-step-nakano.ts new file mode 100644 index 0000000..e67e128 --- /dev/null +++ b/app/scrapers/moon-step-nakano.ts @@ -0,0 +1,85 @@ +/** + * 中野 MOON STEP — https://nakano-dynamite.com/moonstep + * + * WordPress + The Events Calendar プラグイン。REST API で取得。 + * エンドポイント: /moonstep/wp-json/tribe/events/v1/events + * description フィールドは HTML 文字列で、<pre> 内に出演情報が含まれる: + * 出演:\n<LIVE>\nアーティスト\n... + */ +import * as cheerio from "cheerio"; +import type { Scraper, VenueMeta } from "./base"; +import type { EventInput } from "~/lib/db.server"; + +export const venue: VenueMeta = { + id: "moon-step-nakano", + name: "中野 MOON STEP", + url: "https://nakano-dynamite.com/moonstep", + area: "中野", +}; + +const API_URL = "https://nakano-dynamite.com/moonstep/wp-json/tribe/events/v1/events"; + +function parseArtists(descriptionHtml: string): string | null { + const text = cheerio.load(descriptionHtml).text(); + const match = text.match(/出演[::]\s*([\s\S]+)/); + if (!match) return null; + // Stop at first blank line (double \r\n or \n\n) + const section = match[1].split(/\r?\n\r?\n/)[0] ?? ""; + const lines = section + .split(/\r?\n/) + .map((l) => l.replace(/^\s*[<【[&].*?[>】];]\s*/, "").trim()) + .filter((l) => l.length > 0); + return lines.length > 0 ? lines.join(" / ") : null; +} + +export const scraper: Scraper = { + venue, + async scrape(): Promise<EventInput[]> { + const today = new Date().toISOString().slice(0, 10); + const url = `${API_URL}?per_page=50&start_date=${today}`; + const res = await fetch(url); + if (!res.ok) throw new Error(`HTTP ${res.status}`); + + const data = await res.json() as { events: Record<string, unknown>[] }; + const events: EventInput[] = []; + + for (const e of data.events ?? []) { + const startDate = (e.start_date as string | undefined) ?? ""; + const date = startDate.slice(0, 10); + if (!date.match(/^\d{4}-\d{2}-\d{2}$/)) continue; + + const startDetails = e.start_date_details as Record<string, string> | undefined; + const startTime = startDetails + ? `${startDetails.hour}:${startDetails.minutes}` + : null; + + const rawTitle = (e.title as string | undefined) ?? ""; + const title = rawTitle.replace(/&#(\d+);/g, (_, n) => String.fromCharCode(Number(n))) + .replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">").trim(); + if (!title) continue; + + const cost = (e.cost as string | undefined)?.trim() || null; + const sourceUrl = (e.url as string | undefined) ?? null; + const image = e.image as Record<string, unknown> | undefined | false; + const imageUrl = image ? (image.url as string | undefined) ?? null : null; + + const description = (e.description as string | undefined) ?? ""; + const openMatch = description.match(/OPEN\s*(\d{1,2}:\d{2})/i); + const artist = parseArtists(description); + + events.push({ + venue_id: venue.id, + title, + date, + open_time: openMatch?.[1] ?? null, + start_time: startTime && startTime !== "00:00" ? startTime : null, + price: cost, + image_url: imageUrl, + source_url: sourceUrl, + artist, + }); + } + + return events; + }, +}; |
