From f817604858891edb79e26459dae884b158774db1 Mon Sep 17 00:00:00 2001 From: yyamashita Date: Wed, 6 May 2026 22:20:00 +0900 Subject: =?UTF-8?q?Add=204=20new=20venue=20scrapers:=20Meets=20=E5=A4=A7?= =?UTF-8?q?=E5=A1=9A,=20WARP=20=E5=90=89=E7=A5=A5=E5=AF=BA,=20FLAT=20?= =?UTF-8?q?=E8=A5=BF=E8=8D=BB=E7=AA=AA,=20Pitbar=20=E8=A5=BF=E8=8D=BB?= =?UTF-8?q?=E7=AA=AA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit meets-otsuka: rinky.info プラットフォーム。div.blog-entry.event-wrap[event-date] から日付・タイトル・出演者・時間・価格・チケットURLを取得。 warp-kichijoji: WordPress カスタムテーマ。

YYYY
MM

で 年月を取得、article.schedules-box から各イベントをパース。 flat-nishiogikubo: Wix サイトのため JS レンダリング必須。エラーを返す プレースホルダー実装(Playwright 等への移行が必要)。 pitbar-nishiogikubo: freecalend.com (mem25771) から取得を試みるが、 ボット遮断のため現状はエラー。URL パターン・代替策をコメントに記載。 SCRAPE_TARGETS.md に状態列(✅/⚠️)を追加。 Co-Authored-By: Claude Sonnet 4.6 --- app/scrapers/warp-kichijoji.ts | 99 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 app/scrapers/warp-kichijoji.ts (limited to 'app/scrapers/warp-kichijoji.ts') diff --git a/app/scrapers/warp-kichijoji.ts b/app/scrapers/warp-kichijoji.ts new file mode 100644 index 0000000..8a828ea --- /dev/null +++ b/app/scrapers/warp-kichijoji.ts @@ -0,0 +1,99 @@ +/** + * 吉祥寺 WARP — http://warp.rinky.info/schedules + * + * WordPress カスタムテーマ。年月は: + *

2026
05

+ * + * イベント構造: + *
+ *
DD...
+ *

タイトル

+ *
+ *

OPEN / START
HH:MM / HH:MM

+ *

ADV / DOOR
¥XXXX / ¥XXXX

+ *
+ *
+ *
+ */ +import * as cheerio from "cheerio"; +import type { Scraper, VenueMeta } from "./base"; +import type { EventInput } from "~/lib/db.server"; + +export const venue: VenueMeta = { + id: "warp-kichijoji", + name: "吉祥寺 WARP", + url: "http://warp.rinky.info", + area: "吉祥寺", +}; + +export const scraper: Scraper = { + venue, + async scrape(): Promise { + const res = await fetch("http://warp.rinky.info/schedules"); + if (!res.ok) throw new Error(`HTTP ${res.status}`); + const $ = cheerio.load(await res.text()); + const events: EventInput[] = []; + + // Extract year + month from

2026
05

+ const h3Text = $("h3").first().text().trim(); // e.g. "2026\n05" + const yearMonthMatch = h3Text.match(/(\d{4})\D*(\d{2})/); + if (!yearMonthMatch) return events; + const year = yearMonthMatch[1]; + const month = yearMonthMatch[2]; + + $("article.schedules-box").each((_, el) => { + const $el = $(el); + + // Day from article id: "box-03-23546" → "03" + const id = $el.attr("id") ?? ""; + const dayMatch = id.match(/^box-(\d{2})-/); + if (!dayMatch) return; + const day = dayMatch[1]; + const date = `${year}-${month}-${day}`; + + const title = $el.find("h4").first().text().replace(//gi, " ").trim(); + if (!title) return; + + // First notes-wrapper

contains OPEN/START times + const $notes = $el.find("section.notes-wrapper p"); + const timeStrong = $notes.eq(0).find("span.strong").text().trim(); + // e.g. "18:30 / 19:00" + const [openTime, startTime] = timeStrong.split("/").map((s) => s.trim()); + + // Second

contains ADV/DOOR price + const priceStrong = $notes.eq(1).find("span.strong").text().trim(); + // e.g. "¥3,000 / ¥3,500" + const price = priceStrong !== "TBA / TBA" && priceStrong ? priceStrong : null; + + // Image: prefer data-src (lazy), fall back to noscript img src + const $flyer = $el.find("section.flyer img").first(); + const rawImg = + $flyer.attr("data-src") ?? + $el.find("section.flyer noscript img").first().attr("src") ?? + null; + // Strip ShortPixel CDN prefix if present + const imageUrl = rawImg + ? rawImg.replace(/^https?:\/\/sp-ao\.shortpixel\.ai\/client\/[^/]+\//, "") + : null; + + events.push({ + venue_id: venue.id, + title, + artist: null, + date, + open_time: isTime(openTime) ? openTime : null, + start_time: isTime(startTime) ? startTime : null, + price, + ticket_url: $el.find("a[href*='livepocket'], a[href*='eplus'], a[href*='pia']").first().attr("href") ?? null, + image_url: imageUrl, + source_url: null, + }); + }); + + return events; + }, +}; + +function isTime(s: string | undefined): boolean { + return !!s && /^\d{2}:\d{2}$/.test(s.trim()); +} -- cgit v1.2.3