summaryrefslogtreecommitdiff
path: root/app/scrapers
diff options
context:
space:
mode:
Diffstat (limited to 'app/scrapers')
-rw-r--r--app/scrapers/index.ts4
-rw-r--r--app/scrapers/navey-floor.ts78
-rw-r--r--app/scrapers/shimokitazawa-era.ts69
3 files changed, 151 insertions, 0 deletions
diff --git a/app/scrapers/index.ts b/app/scrapers/index.ts
index 4ecb2a8..c38816f 100644
--- a/app/scrapers/index.ts
+++ b/app/scrapers/index.ts
@@ -18,6 +18,8 @@ import { scraper as modShibasaki } from "./mod-shibasaki";
import { scraper as fadYokohama } from "./fad-yokohama";
import { scraper as warpKichijoji } from "./warp-kichijoji";
import { scraper as pitbarNishiogikubo } from "./pitbar-nishiogikubo";
+import { scraper as naveyFloor } from "./navey-floor";
+import { scraper as shimokitazawaEra } from "./shimokitazawa-era";
export const ALL_SCRAPERS: Scraper[] = [
liquidRoom,
@@ -35,6 +37,8 @@ export const ALL_SCRAPERS: Scraper[] = [
fadYokohama,
warpKichijoji,
pitbarNishiogikubo,
+ naveyFloor,
+ shimokitazawaEra,
];
export type { Scraper } from "./base";
diff --git a/app/scrapers/navey-floor.ts b/app/scrapers/navey-floor.ts
new file mode 100644
index 0000000..806193e
--- /dev/null
+++ b/app/scrapers/navey-floor.ts
@@ -0,0 +1,78 @@
+import * as cheerio from "cheerio";
+import type { Scraper, VenueMeta } from "./base";
+import type { EventInput } from "~/lib/db.server";
+
+export const venue: VenueMeta = {
+ id: "navey-floor",
+ name: "navey floor",
+ url: "https://navey-floor.com",
+ area: "赤坂",
+};
+
+function parseNaveyDate(text: string): string | null {
+ const m = text.trim().match(/(\d{4})\/(\d{2})\/(\d{2})/);
+ if (!m) return null;
+ return `${m[1]}-${m[2]}-${m[3]}`;
+}
+
+export const scraper: Scraper = {
+ venue,
+ async scrape(): Promise<EventInput[]> {
+ const res = await fetch("https://navey-floor.com/event/");
+ if (!res.ok) throw new Error(`HTTP ${res.status}`);
+ const html = await res.text();
+ const $ = cheerio.load(html);
+ const events: EventInput[] = [];
+
+ $("article.hentry").each((_, el) => {
+ const $el = $(el);
+
+ const date = parseNaveyDate($el.find("h3.event-date__h").text());
+ if (!date) return;
+
+ const $titleLink = $el.find("h2.event-title__h > a");
+ const title = $titleLink.text().replace(/\s+/g, " ").trim();
+ const sourceUrl = $titleLink.attr("href") ?? null;
+ if (!title) return;
+
+ let openTime: string | null = null;
+ let startTime: string | null = null;
+ let artist: string | null = null;
+ let price: string | null = null;
+
+ $el.find("ul.event-ul li").each((_, li) => {
+ const $li = $(li);
+ const label = $li.find("span").first().text().trim();
+
+ if (label === "open/start") {
+ const m = $li.text().match(/(\d{2}:\d{2})\/(\d{2}:\d{2})/);
+ if (m) { openTime = m[1]; startTime = m[2]; }
+ } else if (label === "act") {
+ const $clone = $li.clone();
+ $clone.find("span").remove();
+ artist = $clone.text().replace(/^\s*:\s*/, "").replace(/\s+/g, " ").trim() || null;
+ } else if (label === "ticket") {
+ const $clone = $li.clone();
+ $clone.find("span").remove();
+ price = $clone.text().replace(/^\s*:\s*/, "").replace(/\s+/g, " ").trim() || null;
+ }
+ });
+
+ const ticketUrl = $el.find("div.attention a").attr("href") ?? null;
+
+ events.push({
+ venue_id: venue.id,
+ title,
+ artist,
+ date,
+ open_time: openTime,
+ start_time: startTime,
+ price,
+ ticket_url: ticketUrl,
+ source_url: sourceUrl,
+ });
+ });
+
+ return events;
+ },
+};
diff --git a/app/scrapers/shimokitazawa-era.ts b/app/scrapers/shimokitazawa-era.ts
new file mode 100644
index 0000000..a35f8e2
--- /dev/null
+++ b/app/scrapers/shimokitazawa-era.ts
@@ -0,0 +1,69 @@
+import * as cheerio from "cheerio";
+import type { Scraper, VenueMeta } from "./base";
+import type { EventInput } from "~/lib/db.server";
+
+export const venue: VenueMeta = {
+ id: "shimokitazawa-era",
+ name: "下北沢ERA",
+ url: "http://s-era.jp",
+ area: "下北沢",
+};
+
+export const scraper: Scraper = {
+ venue,
+ async scrape(): Promise<EventInput[]> {
+ // s-era.jp has an invalid TLS cert; fetch via http
+ const res = await fetch("http://s-era.jp/schedule");
+ if (!res.ok) throw new Error(`HTTP ${res.status}`);
+ const html = await res.text();
+ const $ = cheerio.load(html);
+ const events: EventInput[] = [];
+
+ $("article.schedule-box").each((_, el) => {
+ const $el = $(el);
+
+ const date = $el.find("time").attr("datetime") ?? null;
+ if (!date) return;
+
+ const title = $el.find("h4").text().replace(/\s+/g, " ").trim();
+ if (!title) return;
+
+ let openTime: string | null = null;
+ let startTime: string | null = null;
+ let price: string | null = null;
+
+ $el.find(".detail-grid span.title").each((_, span) => {
+ const label = $(span).text().trim();
+ const value = $(span).next("span.strong").text().trim();
+ if (label === "OPEN") openTime = value.match(/\d{2}:\d{2}/)?.[0] ?? null;
+ else if (label === "START") startTime = value.match(/\d{2}:\d{2}/)?.[0] ?? null;
+ else if (label === "ADV") price = value || null;
+ });
+ if (!price) {
+ price =
+ $el.find("p.freetext span.strong").text().replace(/\s+/g, " ").trim() || null;
+ }
+
+ // artist names sit as direct text in div.w-flyer, before notes-wrapper/detail-texts
+ const $wflyer = $el.find("div.w-flyer").clone();
+ $wflyer.find("section.notes-wrapper, div.detail-texts").remove();
+ const artist = $wflyer.text().replace(/\s+/g, " ").trim() || null;
+
+ const ticketUrl = $el.find("p.playguides a").attr("href") ?? null;
+
+ events.push({
+ venue_id: venue.id,
+ title,
+ artist,
+ date,
+ open_time: openTime,
+ start_time: startTime,
+ price,
+ ticket_url: ticketUrl,
+ source_url: "http://s-era.jp/schedule",
+ });
+ });
+
+ return events;
+ },
+};