diff options
| author | yyamashita <yyamashita@mosquit.one> | 2026-05-07 19:27:50 +0900 |
|---|---|---|
| committer | yyamashita <yyamashita@mosquit.one> | 2026-05-07 19:27:50 +0900 |
| commit | d5e975b601e70adf901c8e1eb7e61f0388941195 (patch) | |
| tree | f1778ff15b6540b44c354cb76c44aac795448c4a /app/scrapers/moon-step-nakano.ts | |
| parent | bffc2c74408ff7163cea0c0392dfc4b15c620a5f (diff) | |
Add 5 new venue scrapers; extract artist info for WARP, shibuya-o, MOON STEP, mod
New scrapers: Fever 下北沢, Nine Spices 下北沢, 西荻窪 JAM, mod 柴崎, 中野 MOON STEP
Artist extraction added/fixed:
- warp-kichijoji: parse div.w-flyer (clone + remove nested notes-wrapper)
- shibuya-o: rewrite to scrape each sub-venue; artist from li.p-scheduled-card__artist-item
- moon-step-nakano: parse 出演 section from WordPress API description HTML
- mod-shibasaki: fetch individual event pages in parallel; handle live:/出演:/・ bullet formats
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
Diffstat (limited to 'app/scrapers/moon-step-nakano.ts')
| -rw-r--r-- | app/scrapers/moon-step-nakano.ts | 85 |
1 files changed, 85 insertions, 0 deletions
diff --git a/app/scrapers/moon-step-nakano.ts b/app/scrapers/moon-step-nakano.ts new file mode 100644 index 0000000..e67e128 --- /dev/null +++ b/app/scrapers/moon-step-nakano.ts @@ -0,0 +1,85 @@ +/** + * 中野 MOON STEP — https://nakano-dynamite.com/moonstep + * + * WordPress + The Events Calendar プラグイン。REST API で取得。 + * エンドポイント: /moonstep/wp-json/tribe/events/v1/events + * description フィールドは HTML 文字列で、<pre> 内に出演情報が含まれる: + * 出演:\n<LIVE>\nアーティスト\n... + */ +import * as cheerio from "cheerio"; +import type { Scraper, VenueMeta } from "./base"; +import type { EventInput } from "~/lib/db.server"; + +export const venue: VenueMeta = { + id: "moon-step-nakano", + name: "中野 MOON STEP", + url: "https://nakano-dynamite.com/moonstep", + area: "中野", +}; + +const API_URL = "https://nakano-dynamite.com/moonstep/wp-json/tribe/events/v1/events"; + +function parseArtists(descriptionHtml: string): string | null { + const text = cheerio.load(descriptionHtml).text(); + const match = text.match(/出演[::]\s*([\s\S]+)/); + if (!match) return null; + // Stop at first blank line (double \r\n or \n\n) + const section = match[1].split(/\r?\n\r?\n/)[0] ?? ""; + const lines = section + .split(/\r?\n/) + .map((l) => l.replace(/^\s*[<【[&].*?[>】];]\s*/, "").trim()) + .filter((l) => l.length > 0); + return lines.length > 0 ? lines.join(" / ") : null; +} + +export const scraper: Scraper = { + venue, + async scrape(): Promise<EventInput[]> { + const today = new Date().toISOString().slice(0, 10); + const url = `${API_URL}?per_page=50&start_date=${today}`; + const res = await fetch(url); + if (!res.ok) throw new Error(`HTTP ${res.status}`); + + const data = await res.json() as { events: Record<string, unknown>[] }; + const events: EventInput[] = []; + + for (const e of data.events ?? []) { + const startDate = (e.start_date as string | undefined) ?? ""; + const date = startDate.slice(0, 10); + if (!date.match(/^\d{4}-\d{2}-\d{2}$/)) continue; + + const startDetails = e.start_date_details as Record<string, string> | undefined; + const startTime = startDetails + ? `${startDetails.hour}:${startDetails.minutes}` + : null; + + const rawTitle = (e.title as string | undefined) ?? ""; + const title = rawTitle.replace(/&#(\d+);/g, (_, n) => String.fromCharCode(Number(n))) + .replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">").trim(); + if (!title) continue; + + const cost = (e.cost as string | undefined)?.trim() || null; + const sourceUrl = (e.url as string | undefined) ?? null; + const image = e.image as Record<string, unknown> | undefined | false; + const imageUrl = image ? (image.url as string | undefined) ?? null : null; + + const description = (e.description as string | undefined) ?? ""; + const openMatch = description.match(/OPEN\s*(\d{1,2}:\d{2})/i); + const artist = parseArtists(description); + + events.push({ + venue_id: venue.id, + title, + date, + open_time: openMatch?.[1] ?? null, + start_time: startTime && startTime !== "00:00" ? startTime : null, + price: cost, + image_url: imageUrl, + source_url: sourceUrl, + artist, + }); + } + + return events; + }, +}; |
