diff options
Diffstat (limited to 'app')
| -rw-r--r-- | app/scrapers/index.ts | 4 | ||||
| -rw-r--r-- | app/scrapers/navey-floor.ts | 78 | ||||
| -rw-r--r-- | app/scrapers/shimokitazawa-era.ts | 69 |
3 files changed, 151 insertions, 0 deletions
diff --git a/app/scrapers/index.ts b/app/scrapers/index.ts index 4ecb2a8..c38816f 100644 --- a/app/scrapers/index.ts +++ b/app/scrapers/index.ts @@ -18,6 +18,8 @@ import { scraper as modShibasaki } from "./mod-shibasaki"; import { scraper as fadYokohama } from "./fad-yokohama"; import { scraper as warpKichijoji } from "./warp-kichijoji"; import { scraper as pitbarNishiogikubo } from "./pitbar-nishiogikubo"; +import { scraper as naveyFloor } from "./navey-floor"; +import { scraper as shimokitazawaEra } from "./shimokitazawa-era"; export const ALL_SCRAPERS: Scraper[] = [ liquidRoom, @@ -35,6 +37,8 @@ export const ALL_SCRAPERS: Scraper[] = [ fadYokohama, warpKichijoji, pitbarNishiogikubo, + naveyFloor, + shimokitazawaEra, ]; export type { Scraper } from "./base"; diff --git a/app/scrapers/navey-floor.ts b/app/scrapers/navey-floor.ts new file mode 100644 index 0000000..806193e --- /dev/null +++ b/app/scrapers/navey-floor.ts @@ -0,0 +1,78 @@ +import * as cheerio from "cheerio"; +import type { Scraper, VenueMeta } from "./base"; +import type { EventInput } from "~/lib/db.server"; + +export const venue: VenueMeta = { + id: "navey-floor", + name: "navey floor", + url: "https://navey-floor.com", + area: "赤坂", +}; + +function parseNaveyDate(text: string): string | null { + const m = text.trim().match(/(\d{4})\/(\d{2})\/(\d{2})/); + if (!m) return null; + return `${m[1]}-${m[2]}-${m[3]}`; +} + +export const scraper: Scraper = { + venue, + async scrape(): Promise<EventInput[]> { + const res = await fetch("https://navey-floor.com/event/"); + if (!res.ok) throw new Error(`HTTP ${res.status}`); + const html = await res.text(); + const $ = cheerio.load(html); + const events: EventInput[] = []; + + $("article.hentry").each((_, el) => { + const $el = $(el); + + const date = parseNaveyDate($el.find("h3.event-date__h").text()); + if (!date) return; + + const $titleLink = $el.find("h2.event-title__h > a"); + const title = $titleLink.text().replace(/\s+/g, " ").trim(); + const sourceUrl = $titleLink.attr("href") ?? null; + if (!title) return; + + let openTime: string | null = null; + let startTime: string | null = null; + let artist: string | null = null; + let price: string | null = null; + + $el.find("ul.event-ul li").each((_, li) => { + const $li = $(li); + const label = $li.find("span").first().text().trim(); + + if (label === "open/start") { + const m = $li.text().match(/(\d{2}:\d{2})\/(\d{2}:\d{2})/); + if (m) { openTime = m[1]; startTime = m[2]; } + } else if (label === "act") { + const $clone = $li.clone(); + $clone.find("span").remove(); + artist = $clone.text().replace(/^\s*:\s*/, "").replace(/\s+/g, " ").trim() || null; + } else if (label === "ticket") { + const $clone = $li.clone(); + $clone.find("span").remove(); + price = $clone.text().replace(/^\s*:\s*/, "").replace(/\s+/g, " ").trim() || null; + } + }); + + const ticketUrl = $el.find("div.attention a").attr("href") ?? null; + + events.push({ + venue_id: venue.id, + title, + artist, + date, + open_time: openTime, + start_time: startTime, + price, + ticket_url: ticketUrl, + source_url: sourceUrl, + }); + }); + + return events; + }, +}; diff --git a/app/scrapers/shimokitazawa-era.ts b/app/scrapers/shimokitazawa-era.ts new file mode 100644 index 0000000..a35f8e2 --- /dev/null +++ b/app/scrapers/shimokitazawa-era.ts @@ -0,0 +1,69 @@ +import * as cheerio from "cheerio"; +import type { Scraper, VenueMeta } from "./base"; +import type { EventInput } from "~/lib/db.server"; + +export const venue: VenueMeta = { + id: "shimokitazawa-era", + name: "下北沢ERA", + url: "http://s-era.jp", + area: "下北沢", +}; + +export const scraper: Scraper = { + venue, + async scrape(): Promise<EventInput[]> { + // s-era.jp has an invalid TLS cert; fetch via http + const res = await fetch("http://s-era.jp/schedule"); + if (!res.ok) throw new Error(`HTTP ${res.status}`); + const html = await res.text(); + const $ = cheerio.load(html); + const events: EventInput[] = []; + + $("article.schedule-box").each((_, el) => { + const $el = $(el); + + const date = $el.find("time").attr("datetime") ?? null; + if (!date) return; + + const title = $el.find("h4").text().replace(/\s+/g, " ").trim(); + if (!title) return; + + let openTime: string | null = null; + let startTime: string | null = null; + let price: string | null = null; + + $el.find(".detail-grid span.title").each((_, span) => { + const label = $(span).text().trim(); + const value = $(span).next("span.strong").text().trim(); + if (label === "OPEN") openTime = value.match(/\d{2}:\d{2}/)?.[0] ?? null; + else if (label === "START") startTime = value.match(/\d{2}:\d{2}/)?.[0] ?? null; + else if (label === "ADV") price = value || null; + }); + if (!price) { + price = + $el.find("p.freetext span.strong").text().replace(/\s+/g, " ").trim() || null; + } + + // artist names sit as direct text in div.w-flyer, before notes-wrapper/detail-texts + const $wflyer = $el.find("div.w-flyer").clone(); + $wflyer.find("section.notes-wrapper, div.detail-texts").remove(); + const artist = $wflyer.text().replace(/\s+/g, " ").trim() || null; + + const ticketUrl = $el.find("p.playguides a").attr("href") ?? null; + + events.push({ + venue_id: venue.id, + title, + artist, + date, + open_time: openTime, + start_time: startTime, + price, + ticket_url: ticketUrl, + source_url: "http://s-era.jp/schedule", + }); + }); + + return events; + }, +}; |
