summaryrefslogtreecommitdiff
path: root/app/scrapers/moon-step-nakano.ts
blob: e67e128b01866c14f26b67357d1a909c1ed35415 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
/**
 * 中野 MOON STEP — https://nakano-dynamite.com/moonstep
 *
 * WordPress + The Events Calendar プラグイン。REST API で取得。
 * エンドポイント: /moonstep/wp-json/tribe/events/v1/events
 * description フィールドは HTML 文字列で、<pre> 内に出演情報が含まれる:
 *   出演:\n<LIVE>\nアーティスト\n...
 */
import * as cheerio from "cheerio";
import type { Scraper, VenueMeta } from "./base";
import type { EventInput } from "~/lib/db.server";

export const venue: VenueMeta = {
  id: "moon-step-nakano",
  name: "中野 MOON STEP",
  url: "https://nakano-dynamite.com/moonstep",
  area: "中野",
};

const API_URL = "https://nakano-dynamite.com/moonstep/wp-json/tribe/events/v1/events";

function parseArtists(descriptionHtml: string): string | null {
  const text = cheerio.load(descriptionHtml).text();
  const match = text.match(/出演[::]\s*([\s\S]+)/);
  if (!match) return null;
  // Stop at first blank line (double \r\n or \n\n)
  const section = match[1].split(/\r?\n\r?\n/)[0] ?? "";
  const lines = section
    .split(/\r?\n/)
    .map((l) => l.replace(/^\s*[<【[&].*?[>】];]\s*/, "").trim())
    .filter((l) => l.length > 0);
  return lines.length > 0 ? lines.join(" / ") : null;
}

export const scraper: Scraper = {
  venue,
  async scrape(): Promise<EventInput[]> {
    const today = new Date().toISOString().slice(0, 10);
    const url = `${API_URL}?per_page=50&start_date=${today}`;
    const res = await fetch(url);
    if (!res.ok) throw new Error(`HTTP ${res.status}`);

    const data = await res.json() as { events: Record<string, unknown>[] };
    const events: EventInput[] = [];

    for (const e of data.events ?? []) {
      const startDate = (e.start_date as string | undefined) ?? "";
      const date = startDate.slice(0, 10);
      if (!date.match(/^\d{4}-\d{2}-\d{2}$/)) continue;

      const startDetails = e.start_date_details as Record<string, string> | undefined;
      const startTime = startDetails
        ? `${startDetails.hour}:${startDetails.minutes}`
        : null;

      const rawTitle = (e.title as string | undefined) ?? "";
      const title = rawTitle.replace(/&#(\d+);/g, (_, n) => String.fromCharCode(Number(n)))
        .replace(/&amp;/g, "&").replace(/&lt;/g, "<").replace(/&gt;/g, ">").trim();
      if (!title) continue;

      const cost = (e.cost as string | undefined)?.trim() || null;
      const sourceUrl = (e.url as string | undefined) ?? null;
      const image = e.image as Record<string, unknown> | undefined | false;
      const imageUrl = image ? (image.url as string | undefined) ?? null : null;

      const description = (e.description as string | undefined) ?? "";
      const openMatch = description.match(/OPEN\s*(\d{1,2}:\d{2})/i);
      const artist = parseArtists(description);

      events.push({
        venue_id: venue.id,
        title,
        date,
        open_time: openMatch?.[1] ?? null,
        start_time: startTime && startTime !== "00:00" ? startTime : null,
        price: cost,
        image_url: imageUrl,
        source_url: sourceUrl,
        artist,
      });
    }

    return events;
  },
};