summaryrefslogtreecommitdiff
path: root/app/scrapers/shimokitazawa-era.ts
blob: 3678a579f91ac488e052a278d5957c47c6984265 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import * as cheerio from "cheerio";
import type { Scraper, VenueMeta } from "./base";
import type { EventInput } from "~/lib/db.server";

export const venue: VenueMeta = {
  id: "shimokitazawa-era",
  name: "下北沢ERA",
  url: "http://s-era.jp",
  area: "下北沢",
  capacity: 200,
};

export const scraper: Scraper = {
  venue,
  async scrape(): Promise<EventInput[]> {
    // s-era.jp has an invalid TLS cert; fetch via http
    const res = await fetch("http://s-era.jp/schedule");
    if (!res.ok) throw new Error(`HTTP ${res.status}`);
    const html = await res.text();
    const $ = cheerio.load(html);
    const events: EventInput[] = [];

    $("article.schedule-box").each((_, el) => {
      const $el = $(el);

      const date = $el.find("time").attr("datetime") ?? null;
      if (!date) return;

      const title = $el.find("h4").text().replace(/\s+/g, " ").trim();
      if (!title) return;

      let openTime: string | null = null;
      let startTime: string | null = null;
      let price: string | null = null;

      $el.find(".detail-grid span.title").each((_, span) => {
        const label = $(span).text().trim();
        const value = $(span).next("span.strong").text().trim();
        if (label === "OPEN") openTime = value.match(/\d{2}:\d{2}/)?.[0] ?? null;
        else if (label === "START") startTime = value.match(/\d{2}:\d{2}/)?.[0] ?? null;
        else if (label === "ADV") price = value || null;
      });
      if (!price) {
        price =
          $el.find("p.freetext span.strong").text().replace(/\s+/g, " ").trim() || null;
      }

      // artist names sit as direct text in div.w-flyer, before notes-wrapper/detail-texts
      const $wflyer = $el.find("div.w-flyer").clone();
      $wflyer.find("section.notes-wrapper, div.detail-texts").remove();
      const artist = $wflyer.text().replace(/\s+/g, " ").trim() || null;

      const ticketUrl = $el.find("p.playguides a").attr("href") ?? null;

      events.push({
        venue_id: venue.id,
        title,
        artist,
        date,
        open_time: openTime,
        start_time: startTime,
        price,
        ticket_url: ticketUrl,
        source_url: "http://s-era.jp/schedule",
      });
    });

    return events;
  },
};