summaryrefslogtreecommitdiff
path: root/app/scrapers
diff options
context:
space:
mode:
Diffstat (limited to 'app/scrapers')
-rw-r--r--app/scrapers/buzzfront-yokohama.ts111
-rw-r--r--app/scrapers/index.ts2
2 files changed, 113 insertions, 0 deletions
diff --git a/app/scrapers/buzzfront-yokohama.ts b/app/scrapers/buzzfront-yokohama.ts
new file mode 100644
index 0000000..d206e1d
--- /dev/null
+++ b/app/scrapers/buzzfront-yokohama.ts
@@ -0,0 +1,111 @@
+/**
+ * BuzzFront YOKOHAMA — https://buzzfront.net
+ *
+ * meets-otsuka と同じ omatsuri.tech プラットフォーム。構造:
+ * <div class="blog-entry event-wrap" event-date="YYYY-MM-DD">
+ * <h2><a href="/events/ID">タイトル</a></h2>
+ * <p class="act"><span>アーティスト</span></p>
+ * <p class="time">OPEN 19:00 / START 19:30</p>
+ * <span class="ticket-price__label">前売券(税込)</span>
+ * <span class="ticket-price__amount">3,000円</span>
+ * <div class="image-bg" style="background-image: url(...)">
+ */
+import * as cheerio from "cheerio";
+import type { Scraper, VenueMeta } from "./base";
+import type { EventInput } from "~/lib/db.server";
+
+export const venue: VenueMeta = {
+ id: "buzzfront-yokohama",
+ name: "BuzzFront YOKOHAMA",
+ url: "https://buzzfront.net",
+ area: "横浜",
+ capacity: 300,
+};
+
+function parseHtml(html: string): EventInput[] {
+ const $ = cheerio.load(html);
+ const events: EventInput[] = [];
+
+ $("div.blog-entry.event-wrap").each((_, el) => {
+ const $el = $(el);
+
+ const date = $el.attr("event-date") ?? "";
+ if (!date.match(/^\d{4}-\d{2}-\d{2}$/)) return;
+
+ const $link = $el.find("h2 a").first();
+ const title = $link.text().trim();
+ if (!title) return;
+
+ const detailPath = $link.attr("href") ?? null;
+ const sourceUrl = detailPath ? `${venue.url}${detailPath}` : null;
+
+ const artist =
+ $el.find("p.act span").map((_, s) => $(s).text().trim()).get().join("、") || null;
+
+ const timeText = $el.find("p.time").first().text();
+ const openMatch = timeText.match(/OPEN\s*(\d{2}:\d{2})/i);
+ const startMatch = timeText.match(/START\s*(\d{2}:\d{2})/i);
+
+ const advLabel = $el.find("span.ticket-price__label").filter((_, s) =>
+ $(s).text().includes("前売")
+ ).first();
+ const advAmount = advLabel.next("span.ticket-price__amount").text().trim();
+ const doorLabel = $el.find("span.ticket-price__label").filter((_, s) =>
+ $(s).text().includes("当日")
+ ).first();
+ const doorAmount = doorLabel.next("span.ticket-price__amount").text().trim();
+ const price =
+ advAmount && doorAmount
+ ? `前売 ${advAmount} / 当日 ${doorAmount}`
+ : advAmount || doorAmount || null;
+
+ const bgStyle = $el.find("div.image-bg").attr("style") ?? "";
+ const imgMatch = bgStyle.match(/url\(["']?([^"')]+)["']?\)/);
+ const imageUrl = imgMatch?.[1] ?? null;
+
+ const ticketUrl =
+ $el
+ .find("a[href*='livepocket'], a[href*='eplus'], a[href*='pia'], a[href*='tiget'], a[href*='ticket']")
+ .first()
+ .attr("href") ?? null;
+
+ events.push({
+ venue_id: venue.id,
+ title,
+ artist,
+ date,
+ open_time: openMatch?.[1] ?? null,
+ start_time: startMatch?.[1] ?? null,
+ price,
+ ticket_url: ticketUrl,
+ image_url: imageUrl,
+ source_url: sourceUrl,
+ });
+ });
+
+ return events;
+}
+
+export const scraper: Scraper = {
+ venue,
+ async scrape(): Promise<EventInput[]> {
+ const now = new Date();
+ const urls = [0, 1, 2].map((offset) => {
+ const d = new Date(now.getFullYear(), now.getMonth() + offset, 1);
+ const ym = `${d.getFullYear()}/${String(d.getMonth() + 1).padStart(2, "0")}`;
+ return `https://buzzfront.net/events?date=${encodeURIComponent(ym)}`;
+ });
+
+ const htmls = await Promise.all(
+ urls.map((url) => fetch(url).then((r) => (r.ok ? r.text() : "")))
+ );
+
+ const seen = new Set<string>();
+ return htmls.flatMap(parseHtml).filter((e) => {
+ const key = `${e.date}|${e.title}`;
+ if (seen.has(key)) return false;
+ seen.add(key);
+ return true;
+ });
+ },
+};
diff --git a/app/scrapers/index.ts b/app/scrapers/index.ts
index d765248..fa0df9b 100644
--- a/app/scrapers/index.ts
+++ b/app/scrapers/index.ts
@@ -22,6 +22,7 @@ import { scraper as naveyFloor } from "./navey-floor";
import { scraper as shimokitazawaEra } from "./shimokitazawa-era";
import { scraper as duoMusicExchange } from "./duo-music-exchange";
import { scraper as denAtsu } from "./den-atsu";
+import { scraper as buzzfrontYokohama } from "./buzzfront-yokohama";
export const ALL_SCRAPERS: Scraper[] = [
liquidRoom,
@@ -43,6 +44,7 @@ export const ALL_SCRAPERS: Scraper[] = [
shimokitazawaEra,
duoMusicExchange,
denAtsu,
+ buzzfrontYokohama,
];
export type { Scraper } from "./base";