From d116d4cee456f7d8f5fea535742e90a75b05d814 Mon Sep 17 00:00:00 2001 From: yyamashita Date: Fri, 8 May 2026 03:35:09 +0900 Subject: Add navey-floor and shimokitazawa-era scrapers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit navey floor (赤坂): 29件取得、Cheerio + WordPress カスタムCMS 下北沢ERA: 22件取得、HTTP only (TLS証明書無効のため) Co-Authored-By: Claude Sonnet 4.6 --- app/scrapers/shimokitazawa-era.ts | 69 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 app/scrapers/shimokitazawa-era.ts (limited to 'app/scrapers/shimokitazawa-era.ts') diff --git a/app/scrapers/shimokitazawa-era.ts b/app/scrapers/shimokitazawa-era.ts new file mode 100644 index 0000000..a35f8e2 --- /dev/null +++ b/app/scrapers/shimokitazawa-era.ts @@ -0,0 +1,69 @@ +import * as cheerio from "cheerio"; +import type { Scraper, VenueMeta } from "./base"; +import type { EventInput } from "~/lib/db.server"; + +export const venue: VenueMeta = { + id: "shimokitazawa-era", + name: "下北沢ERA", + url: "http://s-era.jp", + area: "下北沢", +}; + +export const scraper: Scraper = { + venue, + async scrape(): Promise { + // s-era.jp has an invalid TLS cert; fetch via http + const res = await fetch("http://s-era.jp/schedule"); + if (!res.ok) throw new Error(`HTTP ${res.status}`); + const html = await res.text(); + const $ = cheerio.load(html); + const events: EventInput[] = []; + + $("article.schedule-box").each((_, el) => { + const $el = $(el); + + const date = $el.find("time").attr("datetime") ?? null; + if (!date) return; + + const title = $el.find("h4").text().replace(/\s+/g, " ").trim(); + if (!title) return; + + let openTime: string | null = null; + let startTime: string | null = null; + let price: string | null = null; + + $el.find(".detail-grid span.title").each((_, span) => { + const label = $(span).text().trim(); + const value = $(span).next("span.strong").text().trim(); + if (label === "OPEN") openTime = value.match(/\d{2}:\d{2}/)?.[0] ?? null; + else if (label === "START") startTime = value.match(/\d{2}:\d{2}/)?.[0] ?? null; + else if (label === "ADV") price = value || null; + }); + if (!price) { + price = + $el.find("p.freetext span.strong").text().replace(/\s+/g, " ").trim() || null; + } + + // artist names sit as direct text in div.w-flyer, before notes-wrapper/detail-texts + const $wflyer = $el.find("div.w-flyer").clone(); + $wflyer.find("section.notes-wrapper, div.detail-texts").remove(); + const artist = $wflyer.text().replace(/\s+/g, " ").trim() || null; + + const ticketUrl = $el.find("p.playguides a").attr("href") ?? null; + + events.push({ + venue_id: venue.id, + title, + artist, + date, + open_time: openTime, + start_time: startTime, + price, + ticket_url: ticketUrl, + source_url: "http://s-era.jp/schedule", + }); + }); + + return events; + }, +}; -- cgit v1.2.3