summaryrefslogtreecommitdiff
path: root/app/scrapers/pitbar-nishiogikubo.ts
blob: 255300262e76ea3277ac5fe6ba065bd3c221ee21 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
/**
 * Pitbar 西荻窪 — http://freecalend.com/open/mem25771
 *
 * freecalend.com は静的 fetch をブロックするため Playwright を使用。
 *
 * DOM 構造:
 *   id="cal-25771-{year}-{month}-{day}"  ← 日付セル
 *     テキストは: 日数字 + "M.D(day)" + イベント本文
 *   id="sitatumari-25771-..."            ← 区切り (スキップ)
 *
 * open / start 時刻は "open HH:MM" / "start HH:MM" テキストから取得。
 */
import type { Page } from "playwright";
import type { Scraper, VenueMeta } from "./base";
import type { EventInput } from "~/lib/db.server";
import { getBrowser } from "~/lib/playwright.server";

export const venue: VenueMeta = {
  id: "pitbar-nishiogikubo",
  name: "Pitbar 西荻窪",
  url: "https://ameblo.jp/pitbar",
  area: "西荻窪",
  capacity: 100,
};

const CALENDAR_URL = "http://freecalend.com/open/mem25771";
const MEMBER_ID = "25771";

// Cells whose text matches this are BAR open-hours entries (not live events)
const BAR_ONLY_RE = /^BAR営業/;

async function extractEvents(page: Page, dateFrom: string, dateTo: string): Promise<EventInput[]> {
  const events: EventInput[] = [];

  const cellData = await page.evaluate((memberId: string) => {
    const prefix = `cal-${memberId}-`;
    const cells = document.querySelectorAll(`[id^="${prefix}"]`);
    return Array.from(cells).map((el) => {
      const id = el.getAttribute("id") ?? "";
      const parts = id.split("-");
      // id: cal-25771-YYYY-M-D
      const year = parts[2];
      const month = parts[3];
      const day = parts[4];
      if (!year || !month || !day) return null;
      return {
        date: `${year}-${month.padStart(2, "0")}-${day.padStart(2, "0")}`,
        text: el.textContent?.trim() ?? "",
      };
    }).filter(Boolean);
  }, MEMBER_ID);

  for (const cell of cellData as { date: string; text: string }[]) {
    const { date, text } = cell;
    if (date < dateFrom || date > dateTo) continue;

    // Remove leading "day-number" and "M.D(day)" lines
    const lines = text.split(/\n/).map((l) => l.trim()).filter(Boolean);
    // First line is the day number, second is "M.D(dayname)" — skip both
    const contentLines = lines.slice(2);
    if (contentLines.length === 0) continue;

    const title = contentLines[0];
    if (!title || BAR_ONLY_RE.test(title)) continue;

    const fullText = contentLines.join("\n");
    const openMatch = fullText.match(/open\s+(\d{1,2}:\d{2})/i);
    const startMatch = fullText.match(/start\s+(\d{1,2}:\d{2})/i);

    // Extract price: look for lines with "yen" or "円"
    const priceMatch = fullText.match(/((?:adv|door|前売)[^\n]*(?:yen|円)[^\n]*)/i);
    const price = priceMatch?.[1]?.trim() ?? null;

    // Collect artists (lines starting with ■)
    const artists = contentLines
      .filter((l) => l.startsWith("■"))
      .map((l) => l.slice(1).trim())
      .join("、");

    events.push({
      venue_id: venue.id,
      title,
      artist: artists || null,
      date,
      open_time: openMatch?.[1] ?? null,
      start_time: startMatch?.[1] ?? null,
      price,
      source_url: CALENDAR_URL,
    });
  }

  return events;
}

export const scraper: Scraper = {
  venue,
  async scrape(): Promise<EventInput[]> {
    const browser = await getBrowser();
    const page = await browser.newPage();

    try {
      await page.goto(CALENDAR_URL, {
        waitUntil: "domcontentloaded",
        timeout: 20_000,
      });
      await page.waitForTimeout(5_000);

      const today = new Date();
      const dateFrom = today.toISOString().slice(0, 10);
      const dateTo = new Date(today.getTime() + 35 * 86_400_000).toISOString().slice(0, 10);

      return await extractEvents(page, dateFrom, dateTo);
    } finally {
      await page.close();
    }
  },
};