summaryrefslogtreecommitdiff
path: root/app
diff options
context:
space:
mode:
authoryyamashita <yyamashita@mosquit.one>2026-05-06 22:20:00 +0900
committeryyamashita <yyamashita@mosquit.one>2026-05-06 22:20:00 +0900
commitf817604858891edb79e26459dae884b158774db1 (patch)
tree7a4cc1cd7f1091d2eece430e5d5de7d02d987669 /app
parent079176fae1d513d68b53c57274f3ae2864f352fc (diff)
Add 4 new venue scrapers: Meets 大塚, WARP 吉祥寺, FLAT 西荻窪, Pitbar 西荻窪
meets-otsuka: rinky.info プラットフォーム。div.blog-entry.event-wrap[event-date] から日付・タイトル・出演者・時間・価格・チケットURLを取得。 warp-kichijoji: WordPress カスタムテーマ。<h3>YYYY<br/><span>MM</span></h3> で 年月を取得、article.schedules-box から各イベントをパース。 flat-nishiogikubo: Wix サイトのため JS レンダリング必須。エラーを返す プレースホルダー実装(Playwright 等への移行が必要)。 pitbar-nishiogikubo: freecalend.com (mem25771) から取得を試みるが、 ボット遮断のため現状はエラー。URL パターン・代替策をコメントに記載。 SCRAPE_TARGETS.md に状態列(✅/⚠️)を追加。 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
Diffstat (limited to 'app')
-rw-r--r--app/scrapers/flat-nishiogikubo.ts33
-rw-r--r--app/scrapers/index.ts8
-rw-r--r--app/scrapers/meets-otsuka.ts79
-rw-r--r--app/scrapers/pitbar-nishiogikubo.ts101
-rw-r--r--app/scrapers/warp-kichijoji.ts99
5 files changed, 320 insertions, 0 deletions
diff --git a/app/scrapers/flat-nishiogikubo.ts b/app/scrapers/flat-nishiogikubo.ts
new file mode 100644
index 0000000..03cc70c
--- /dev/null
+++ b/app/scrapers/flat-nishiogikubo.ts
@@ -0,0 +1,33 @@
+/**
+ * FLAT 西荻窪 — https://www.flat.rinky.info/schedule
+ *
+ * ⚠️ Wix サイトのためクライアントサイド JS レンダリング。
+ * 静的 fetch ではイベントデータを取得できない。
+ *
+ * 代替案:
+ * - Playwright/Puppeteer でヘッドレスブラウザを使用
+ * - Wix Events API (要サイトオーナーによる API キー発行)
+ *
+ * 現在は空配列を返す(エラーにはしない)。
+ */
+import type { Scraper, VenueMeta } from "./base";
+import type { EventInput } from "~/lib/db.server";
+
+export const venue: VenueMeta = {
+ id: "flat-nishiogikubo",
+ name: "FLAT 西荻窪",
+ url: "https://www.flat.rinky.info",
+ area: "西荻窪",
+};
+
+export const scraper: Scraper = {
+ venue,
+ async scrape(): Promise<EventInput[]> {
+ // Wix renders events with JavaScript; static fetch returns an empty calendar.
+ // TODO: Replace with a headless browser implementation (e.g. Playwright).
+ throw new Error(
+ "FLAT 西荻窪 は Wix サイトのため JS レンダリングが必要です。" +
+ "ヘッドレスブラウザ(Playwright 等)への移行が必要です。"
+ );
+ },
+};
diff --git a/app/scrapers/index.ts b/app/scrapers/index.ts
index 97d2586..81a0eb7 100644
--- a/app/scrapers/index.ts
+++ b/app/scrapers/index.ts
@@ -8,6 +8,10 @@ import { scraper as wwwShibuya } from "./www-shibuya";
import { scraper as shibuyaO } from "./shibuya-o";
import { scraper as shinjukuLoft } from "./shinjuku-loft";
import { scraper as clubQuattro } from "./club-quattro";
+import { scraper as meetsOtsuka } from "./meets-otsuka";
+import { scraper as warpKichijoji } from "./warp-kichijoji";
+import { scraper as flatNishiogikubo } from "./flat-nishiogikubo";
+import { scraper as pitbarNishiogikubo } from "./pitbar-nishiogikubo";
export const ALL_SCRAPERS: Scraper[] = [
liquidRoom,
@@ -15,6 +19,10 @@ export const ALL_SCRAPERS: Scraper[] = [
shibuyaO,
shinjukuLoft,
clubQuattro,
+ meetsOtsuka,
+ warpKichijoji,
+ flatNishiogikubo,
+ pitbarNishiogikubo,
];
export type { Scraper } from "./base";
diff --git a/app/scrapers/meets-otsuka.ts b/app/scrapers/meets-otsuka.ts
new file mode 100644
index 0000000..57cf120
--- /dev/null
+++ b/app/scrapers/meets-otsuka.ts
@@ -0,0 +1,79 @@
+/**
+ * Meets 大塚 — https://meets.rinky.info/events
+ *
+ * rinky.info プラットフォーム。イベントは以下の構造:
+ * <div class="blog-entry event-wrap" event-date="YYYY-MM-DD">
+ * <h2><a href="/events/ID">タイトル</a></h2>
+ * <p class="act"><span>アーティスト</span></p>
+ * <p class="time">OPEN 18:30 / START 19:00</p>
+ * <span class="ticket-price__label">価格</span>
+ * <div class="image-bg" style="background-image: url(...)">
+ */
+import * as cheerio from "cheerio";
+import type { Scraper, VenueMeta } from "./base";
+import type { EventInput } from "~/lib/db.server";
+
+export const venue: VenueMeta = {
+ id: "meets-otsuka",
+ name: "Meets 大塚",
+ url: "https://meets.rinky.info",
+ area: "大塚",
+};
+
+export const scraper: Scraper = {
+ venue,
+ async scrape(): Promise<EventInput[]> {
+ const res = await fetch("https://meets.rinky.info/events");
+ if (!res.ok) throw new Error(`HTTP ${res.status}`);
+ const $ = cheerio.load(await res.text());
+ const events: EventInput[] = [];
+
+ $("div.blog-entry.event-wrap").each((_, el) => {
+ const $el = $(el);
+
+ const date = $el.attr("event-date") ?? "";
+ if (!date.match(/^\d{4}-\d{2}-\d{2}$/)) return;
+
+ const $link = $el.find("h2 a").first();
+ const title = $link.text().trim();
+ if (!title) return;
+
+ const detailPath = $link.attr("href") ?? null;
+ const sourceUrl = detailPath
+ ? `${venue.url}${detailPath}`
+ : null;
+
+ const artist = $el.find("p.act span").map((_, s) => $(s).text().trim()).get().join("、") || null;
+
+ const timeText = $el.find("p.time").first().text();
+ const openMatch = timeText.match(/OPEN\s*(\d{2}:\d{2})/i);
+ const startMatch = timeText.match(/START\s*(\d{2}:\d{2})/i);
+
+ const price = $el.find("span.ticket-price__label").first().text().trim() || null;
+
+ // background-image: url("...")
+ const bgStyle = $el.find("div.image-bg").attr("style") ?? "";
+ const imgMatch = bgStyle.match(/url\(["']?([^"')]+)["']?\)/);
+ const imageUrl = imgMatch?.[1] ?? null;
+
+ const ticketUrl =
+ $el.find("a[href*='livepocket'], a[href*='eplus'], a[href*='pia'], a[href*='ticket']")
+ .first().attr("href") ?? null;
+
+ events.push({
+ venue_id: venue.id,
+ title,
+ artist,
+ date,
+ open_time: openMatch?.[1] ?? null,
+ start_time: startMatch?.[1] ?? null,
+ price,
+ ticket_url: ticketUrl,
+ image_url: imageUrl,
+ source_url: sourceUrl,
+ });
+ });
+
+ return events;
+ },
+};
diff --git a/app/scrapers/pitbar-nishiogikubo.ts b/app/scrapers/pitbar-nishiogikubo.ts
new file mode 100644
index 0000000..5c70023
--- /dev/null
+++ b/app/scrapers/pitbar-nishiogikubo.ts
@@ -0,0 +1,101 @@
+/**
+ * Pitbar 西荻窪 — http://freecalend.com/open/mem25771_date{YYYYMM}
+ *
+ * スケジュールは Ameblo (https://ameblo.jp/pitbar/) 経由で
+ * freecalend.com に掲載されているが、自動リクエストをブロックしている。
+ *
+ * 代替案:
+ * - User-Agent を設定したヘッドレスブラウザで freecalend を取得
+ * - 公式 Instagram / X (@pitbar_nishiogi) の投稿を取得
+ * - 手動でイベントを登録する管理画面を用意する
+ *
+ * 月ごとの URL パターン: http://freecalend.com/open/mem25771_date{YYYYMM}
+ */
+import type { Scraper, VenueMeta } from "./base";
+import type { EventInput } from "~/lib/db.server";
+
+export const venue: VenueMeta = {
+ id: "pitbar-nishiogikubo",
+ name: "Pitbar 西荻窪",
+ url: "https://ameblo.jp/pitbar",
+ area: "西荻窪",
+};
+
+const FREECALEND_MEMBER = "25771";
+
+export const scraper: Scraper = {
+ venue,
+ async scrape(): Promise<EventInput[]> {
+ const months = upcomingMonths(2);
+ const events: EventInput[] = [];
+
+ for (const ym of months) {
+ const url = `http://freecalend.com/open/mem${FREECALEND_MEMBER}_date${ym}`;
+ const res = await fetch(url, {
+ headers: {
+ "User-Agent":
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 Chrome/124 Safari/537.36",
+ Referer: "https://ameblo.jp/pitbar/",
+ },
+ redirect: "follow",
+ });
+ if (!res.ok) continue;
+
+ const html = await res.text();
+ if (!html.trim()) continue;
+
+ // freecalend は HTML テーブルカレンダー形式
+ // <td class="day_..."> 内にイベント名と時刻が入る
+ const { load } = await import("cheerio");
+ const $ = load(html);
+
+ $("td[class*='day_']").each((_, el) => {
+ const $el = $(el);
+ const text = $el.text().trim();
+ if (!text || /^\d+$/.test(text)) return; // 日付のみのセルはスキップ
+
+ const dayMatch = $el.attr("class")?.match(/day_(\d+)/);
+ if (!dayMatch) return;
+ const day = dayMatch[1].padStart(2, "0");
+ const date = `${ym.slice(0, 4)}-${ym.slice(4)}-${day}`;
+
+ const lines = text.split(/[\n\r]+/).map((l) => l.trim()).filter(Boolean);
+ const title = lines[0] ?? text.slice(0, 100);
+
+ const timeMatch = text.match(/(\d{1,2}:\d{2})/g);
+ const openTime = timeMatch?.[0] ?? null;
+ const startTime = timeMatch?.[1] ?? null;
+
+ events.push({
+ venue_id: venue.id,
+ title,
+ date,
+ open_time: openTime,
+ start_time: startTime,
+ source_url: url,
+ });
+ });
+ }
+
+ if (events.length === 0) {
+ throw new Error(
+ "Pitbar freecalend からデータを取得できませんでした。" +
+ "freecalend.com が自動リクエストをブロックしている可能性があります。"
+ );
+ }
+
+ return events;
+ },
+};
+
+function upcomingMonths(count: number): string[] {
+ const months: string[] = [];
+ const now = new Date();
+ for (let i = 0; i < count; i++) {
+ const d = new Date(now.getFullYear(), now.getMonth() + i, 1);
+ const y = d.getFullYear();
+ const m = String(d.getMonth() + 1).padStart(2, "0");
+ months.push(`${y}${m}`);
+ }
+ return months;
+}
diff --git a/app/scrapers/warp-kichijoji.ts b/app/scrapers/warp-kichijoji.ts
new file mode 100644
index 0000000..8a828ea
--- /dev/null
+++ b/app/scrapers/warp-kichijoji.ts
@@ -0,0 +1,99 @@
+/**
+ * 吉祥寺 WARP — http://warp.rinky.info/schedules
+ *
+ * WordPress カスタムテーマ。年月は:
+ * <h3>2026<br /><span>05</span></h3>
+ *
+ * イベント構造:
+ * <article id="box-DD-ID" class="schedules-box">
+ * <section class="date-box[-sun|-sat]">DD<span class="dayofweek">...</span></section>
+ * <h4>タイトル</h4>
+ * <section class="notes-wrapper">
+ * <p>OPEN / START<br/><span class="strong">HH:MM / HH:MM</span></p>
+ * <p>ADV / DOOR<br/><span class="strong">¥XXXX / ¥XXXX</span></p>
+ * </section>
+ * <section class="flyer"><img data-src="..." /></section>
+ * </article>
+ */
+import * as cheerio from "cheerio";
+import type { Scraper, VenueMeta } from "./base";
+import type { EventInput } from "~/lib/db.server";
+
+export const venue: VenueMeta = {
+ id: "warp-kichijoji",
+ name: "吉祥寺 WARP",
+ url: "http://warp.rinky.info",
+ area: "吉祥寺",
+};
+
+export const scraper: Scraper = {
+ venue,
+ async scrape(): Promise<EventInput[]> {
+ const res = await fetch("http://warp.rinky.info/schedules");
+ if (!res.ok) throw new Error(`HTTP ${res.status}`);
+ const $ = cheerio.load(await res.text());
+ const events: EventInput[] = [];
+
+ // Extract year + month from <h3>2026<br/><span>05</span></h3>
+ const h3Text = $("h3").first().text().trim(); // e.g. "2026\n05"
+ const yearMonthMatch = h3Text.match(/(\d{4})\D*(\d{2})/);
+ if (!yearMonthMatch) return events;
+ const year = yearMonthMatch[1];
+ const month = yearMonthMatch[2];
+
+ $("article.schedules-box").each((_, el) => {
+ const $el = $(el);
+
+ // Day from article id: "box-03-23546" → "03"
+ const id = $el.attr("id") ?? "";
+ const dayMatch = id.match(/^box-(\d{2})-/);
+ if (!dayMatch) return;
+ const day = dayMatch[1];
+ const date = `${year}-${month}-${day}`;
+
+ const title = $el.find("h4").first().text().replace(/<br\s*\/?>/gi, " ").trim();
+ if (!title) return;
+
+ // First notes-wrapper <p> contains OPEN/START times
+ const $notes = $el.find("section.notes-wrapper p");
+ const timeStrong = $notes.eq(0).find("span.strong").text().trim();
+ // e.g. "18:30 / 19:00"
+ const [openTime, startTime] = timeStrong.split("/").map((s) => s.trim());
+
+ // Second <p> contains ADV/DOOR price
+ const priceStrong = $notes.eq(1).find("span.strong").text().trim();
+ // e.g. "¥3,000 / ¥3,500"
+ const price = priceStrong !== "TBA / TBA" && priceStrong ? priceStrong : null;
+
+ // Image: prefer data-src (lazy), fall back to noscript img src
+ const $flyer = $el.find("section.flyer img").first();
+ const rawImg =
+ $flyer.attr("data-src") ??
+ $el.find("section.flyer noscript img").first().attr("src") ??
+ null;
+ // Strip ShortPixel CDN prefix if present
+ const imageUrl = rawImg
+ ? rawImg.replace(/^https?:\/\/sp-ao\.shortpixel\.ai\/client\/[^/]+\//, "")
+ : null;
+
+ events.push({
+ venue_id: venue.id,
+ title,
+ artist: null,
+ date,
+ open_time: isTime(openTime) ? openTime : null,
+ start_time: isTime(startTime) ? startTime : null,
+ price,
+ ticket_url: $el.find("a[href*='livepocket'], a[href*='eplus'], a[href*='pia']").first().attr("href") ?? null,
+ image_url: imageUrl,
+ source_url: null,
+ });
+ });
+
+ return events;
+ },
+};
+
+function isTime(s: string | undefined): boolean {
+ return !!s && /^\d{2}:\d{2}$/.test(s.trim());
+}