From d116d4cee456f7d8f5fea535742e90a75b05d814 Mon Sep 17 00:00:00 2001 From: yyamashita Date: Fri, 8 May 2026 03:35:09 +0900 Subject: Add navey-floor and shimokitazawa-era scrapers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit navey floor (赤坂): 29件取得、Cheerio + WordPress カスタムCMS 下北沢ERA: 22件取得、HTTP only (TLS証明書無効のため) Co-Authored-By: Claude Sonnet 4.6 --- app/scrapers/navey-floor.ts | 78 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 app/scrapers/navey-floor.ts (limited to 'app/scrapers/navey-floor.ts') diff --git a/app/scrapers/navey-floor.ts b/app/scrapers/navey-floor.ts new file mode 100644 index 0000000..806193e --- /dev/null +++ b/app/scrapers/navey-floor.ts @@ -0,0 +1,78 @@ +import * as cheerio from "cheerio"; +import type { Scraper, VenueMeta } from "./base"; +import type { EventInput } from "~/lib/db.server"; + +export const venue: VenueMeta = { + id: "navey-floor", + name: "navey floor", + url: "https://navey-floor.com", + area: "赤坂", +}; + +function parseNaveyDate(text: string): string | null { + const m = text.trim().match(/(\d{4})\/(\d{2})\/(\d{2})/); + if (!m) return null; + return `${m[1]}-${m[2]}-${m[3]}`; +} + +export const scraper: Scraper = { + venue, + async scrape(): Promise { + const res = await fetch("https://navey-floor.com/event/"); + if (!res.ok) throw new Error(`HTTP ${res.status}`); + const html = await res.text(); + const $ = cheerio.load(html); + const events: EventInput[] = []; + + $("article.hentry").each((_, el) => { + const $el = $(el); + + const date = parseNaveyDate($el.find("h3.event-date__h").text()); + if (!date) return; + + const $titleLink = $el.find("h2.event-title__h > a"); + const title = $titleLink.text().replace(/\s+/g, " ").trim(); + const sourceUrl = $titleLink.attr("href") ?? null; + if (!title) return; + + let openTime: string | null = null; + let startTime: string | null = null; + let artist: string | null = null; + let price: string | null = null; + + $el.find("ul.event-ul li").each((_, li) => { + const $li = $(li); + const label = $li.find("span").first().text().trim(); + + if (label === "open/start") { + const m = $li.text().match(/(\d{2}:\d{2})\/(\d{2}:\d{2})/); + if (m) { openTime = m[1]; startTime = m[2]; } + } else if (label === "act") { + const $clone = $li.clone(); + $clone.find("span").remove(); + artist = $clone.text().replace(/^\s*:\s*/, "").replace(/\s+/g, " ").trim() || null; + } else if (label === "ticket") { + const $clone = $li.clone(); + $clone.find("span").remove(); + price = $clone.text().replace(/^\s*:\s*/, "").replace(/\s+/g, " ").trim() || null; + } + }); + + const ticketUrl = $el.find("div.attention a").attr("href") ?? null; + + events.push({ + venue_id: venue.id, + title, + artist, + date, + open_time: openTime, + start_time: startTime, + price, + ticket_url: ticketUrl, + source_url: sourceUrl, + }); + }); + + return events; + }, +}; -- cgit v1.2.3