summaryrefslogtreecommitdiff
path: root/app/scrapers/shinjuku-loft.ts
blob: d5602e770ad748726a7909665173bbf583f663ab (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import * as cheerio from "cheerio";
import type { Scraper, VenueMeta } from "./base";
import type { EventInput } from "~/lib/db.server";

export const venue: VenueMeta = {
  id: "shinjuku-loft",
  name: "新宿 LOFT",
  url: "https://www.loft-prj.co.jp",
  area: "新宿",
};

export const scraper: Scraper = {
  venue,
  async scrape(): Promise<EventInput[]> {
    const res = await fetch("https://www.loft-prj.co.jp/schedule/loft");
    if (!res.ok) throw new Error(`HTTP ${res.status}`);
    const html = await res.text();
    const $ = cheerio.load(html);
    const events: EventInput[] = [];
    const seen = new Set<string>();

    $("section.block_schedule_list a[href*='/schedule/loft/schedule/']").each(
      (_, el) => {
        const $el = $(el);
        const href = $el.attr("href") ?? "";
        if (seen.has(href)) return;
        seen.add(href);

        const year = $el.find("time div.year").text().trim();
        const month = $el.find("time div.month").text().trim();
        const day = $el.find("time div.day").text().trim();
        if (!year || !month || !day) return;
        const date = `${year}-${month.padStart(2, "0")}-${day.padStart(2, "0")}`;

        const title = $el.find(".c_title span").text().trim();
        if (!title) return;

        const timeText = $el.find(".open").text().trim();
        const openMatch = timeText.match(/OPEN\s*(\d{2}:\d{2})/i);
        const startMatch = timeText.match(/START\s*(\d{2}:\d{2})/i);

        const artists = $el
          .find("ul.artist_tag li")
          .map((_, li) => $(li).text().trim())
          .get()
          .filter((a) => a !== "...");
        const artist = artists.join(" / ") || null;

        events.push({
          venue_id: venue.id,
          title,
          artist,
          date,
          open_time: openMatch?.[1] ?? null,
          start_time: startMatch?.[1] ?? null,
          image_url: $el.find("span.bg").attr("data-bg") ?? null,
          source_url: href,
        });
      }
    );

    return events;
  },
};