diff options
| author | yyamashita <yyamashita@mosquit.one> | 2026-05-07 19:27:50 +0900 |
|---|---|---|
| committer | yyamashita <yyamashita@mosquit.one> | 2026-05-07 19:27:50 +0900 |
| commit | d5e975b601e70adf901c8e1eb7e61f0388941195 (patch) | |
| tree | f1778ff15b6540b44c354cb76c44aac795448c4a /app/scrapers | |
| parent | bffc2c74408ff7163cea0c0392dfc4b15c620a5f (diff) | |
Add 5 new venue scrapers; extract artist info for WARP, shibuya-o, MOON STEP, mod
New scrapers: Fever 下北沢, Nine Spices 下北沢, 西荻窪 JAM, mod 柴崎, 中野 MOON STEP
Artist extraction added/fixed:
- warp-kichijoji: parse div.w-flyer (clone + remove nested notes-wrapper)
- shibuya-o: rewrite to scrape each sub-venue; artist from li.p-scheduled-card__artist-item
- moon-step-nakano: parse 出演 section from WordPress API description HTML
- mod-shibasaki: fetch individual event pages in parallel; handle live:/出演:/・ bullet formats
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
Diffstat (limited to 'app/scrapers')
| -rw-r--r-- | app/scrapers/fever-shindaita.ts | 117 | ||||
| -rw-r--r-- | app/scrapers/index.ts | 14 | ||||
| -rw-r--r-- | app/scrapers/mod-shibasaki.ts | 124 | ||||
| -rw-r--r-- | app/scrapers/moon-step-nakano.ts | 85 | ||||
| -rw-r--r-- | app/scrapers/nine-spices.ts | 94 | ||||
| -rw-r--r-- | app/scrapers/nishieifuku-jam.ts | 75 | ||||
| -rw-r--r-- | app/scrapers/shibuya-o.ts | 153 | ||||
| -rw-r--r-- | app/scrapers/warp-kichijoji.ts | 20 |
8 files changed, 617 insertions, 65 deletions
diff --git a/app/scrapers/fever-shindaita.ts b/app/scrapers/fever-shindaita.ts new file mode 100644 index 0000000..71c31f6 --- /dev/null +++ b/app/scrapers/fever-shindaita.ts @@ -0,0 +1,117 @@ +/** + * 新代田 FEVER — https://www.fever-popo.com + * + * Movable Type CMS。月別 URL: /schedule/YYYY/MM/ + * DOM 構造: + * <div class="entry-asset"> + * <h2 class="eventtitle">26.05.01 (Fri) タイトル</h2> + * <meta property="og:url" content="https://www.fever-popo.com/schedule/.../MMDD.html"> + * <h3><p>アーティスト1<br/>アーティスト2</p></h3> + * <div>OPEN HH:MM / START HH:MM</div> + * <div><p>ADV ¥XXXX (+1drink) / DOOR ¥XXXX (+1drink)</p></div> + * <img class="scpickup" src="..."> ← フライヤー画像 + */ +import * as cheerio from "cheerio"; +import type { Scraper, VenueMeta } from "./base"; +import type { EventInput } from "~/lib/db.server"; + +export const venue: VenueMeta = { + id: "fever-shindaita", + name: "新代田 FEVER", + url: "https://www.fever-popo.com", + area: "新代田", +}; + +async function scrapeMonth(yyyymm: string): Promise<EventInput[]> { + const [year, month] = yyyymm.split("-"); + const url = `${venue.url}/schedule/${year}/${month}/`; + const res = await fetch(url); + if (!res.ok) return []; + const $ = cheerio.load(await res.text()); + const events: EventInput[] = []; + + $("div.entry-asset").each((_, el) => { + const $el = $(el); + + // Title: "26.05.01 (Fri) タイトル" + const h2Text = $el.find("h2.eventtitle").first().text(); + const titleMatch = h2Text.match(/^\d{2}\.\d{2}\.\d{2}\s+\([A-Za-z]+\)\s*(.+)$/); + if (!titleMatch) return; + const title = titleMatch[1].trim(); + if (!title) return; + + // Date from title prefix: "26.05.01" + const dateMatch = h2Text.match(/^(\d{2})\.(\d{2})\.(\d{2})/); + if (!dateMatch) return; + const date = `20${dateMatch[1]}-${dateMatch[2]}-${dateMatch[3]}`; + + // Source URL from og:url meta inside the entry + const sourceUrl = $el.find("meta[property='og:url']").attr("content") ?? null; + + // Artists: first <h3><p> in body + const $h3 = $el.find("div.asset-body h3").first(); + const artist = $h3.find("p").text() + .split(/\n|<br\s*\/?>/i) + .map((s) => s.replace(/<[^>]+>/g, "").trim()) + .filter(Boolean) + .join("、") || null; + + // Time: div containing "OPEN" / "START" + let openTime: string | null = null; + let startTime: string | null = null; + $el.find("div.asset-body div").each((_, d) => { + const text = $(d).text(); + if (/OPEN/i.test(text) && /START/i.test(text)) { + const om = text.match(/OPEN\s*(\d{1,2}:\d{2})/i); + const sm = text.match(/START\s*(\d{1,2}:\d{2})/i); + if (om) openTime = om[1]; + if (sm) startTime = sm[1]; + } + }); + + // Price: div after the time div + let price: string | null = null; + $el.find("div.asset-body div").each((_, d) => { + const text = $(d).text().trim(); + if (/[¥¥]/.test(text) && /(ADV|DOOR|前売|当日)/i.test(text)) { + price = text.replace(/\s+/g, " ").split("\n")[0].trim() || null; + } + }); + + // Image + const imageUrl = $el.find("img.scpickup").first().attr("src") ?? null; + + // Ticket URL + const ticketUrl = + $el.find("a[href*='eplus'], a[href*='pia'], a[href*='tiget'], a[href*='livepocket'], a[href*='t-dv.com']") + .first().attr("href") ?? null; + + events.push({ + venue_id: venue.id, + title, + artist, + date, + open_time: openTime, + start_time: startTime, + price, + ticket_url: ticketUrl, + image_url: imageUrl, + source_url: sourceUrl, + }); + }); + + return events; +} + +export const scraper: Scraper = { + venue, + async scrape(): Promise<EventInput[]> { + const now = new Date(); + const thisMonth = `${now.getFullYear()}-${String(now.getMonth() + 1).padStart(2, "0")}`; + const next = new Date(now.getFullYear(), now.getMonth() + 1, 1); + const nextMonth = `${next.getFullYear()}-${String(next.getMonth() + 1).padStart(2, "0")}`; + + const [a, b] = await Promise.all([scrapeMonth(thisMonth), scrapeMonth(nextMonth)]); + return [...a, ...b]; + }, +}; diff --git a/app/scrapers/index.ts b/app/scrapers/index.ts index 81a0eb7..2d7f1d1 100644 --- a/app/scrapers/index.ts +++ b/app/scrapers/index.ts @@ -9,9 +9,12 @@ import { scraper as shibuyaO } from "./shibuya-o"; import { scraper as shinjukuLoft } from "./shinjuku-loft"; import { scraper as clubQuattro } from "./club-quattro"; import { scraper as meetsOtsuka } from "./meets-otsuka"; -import { scraper as warpKichijoji } from "./warp-kichijoji"; import { scraper as flatNishiogikubo } from "./flat-nishiogikubo"; -import { scraper as pitbarNishiogikubo } from "./pitbar-nishiogikubo"; +import { scraper as nineSpices } from "./nine-spices"; +import { scraper as nishieifukuJam } from "./nishieifuku-jam"; +import { scraper as feverShindaita } from "./fever-shindaita"; +import { scraper as moonStepNakano } from "./moon-step-nakano"; +import { scraper as modShibasaki } from "./mod-shibasaki"; export const ALL_SCRAPERS: Scraper[] = [ liquidRoom, @@ -20,9 +23,12 @@ export const ALL_SCRAPERS: Scraper[] = [ shinjukuLoft, clubQuattro, meetsOtsuka, - warpKichijoji, flatNishiogikubo, - pitbarNishiogikubo, + nineSpices, + nishieifukuJam, + feverShindaita, + moonStepNakano, + modShibasaki, ]; export type { Scraper } from "./base"; diff --git a/app/scrapers/mod-shibasaki.ts b/app/scrapers/mod-shibasaki.ts new file mode 100644 index 0000000..0e2a96b --- /dev/null +++ b/app/scrapers/mod-shibasaki.ts @@ -0,0 +1,124 @@ +/** + * shibasaki mod (調布市柴崎) — https://shibasakimod.com/schedule + * + * Squarespace イベントリスト。静的 HTML として配信される。 + * 一覧ページに出演者情報はないため、各イベントの詳細ページを並列取得する。 + * DOM 構造 (一覧): + * <article class="eventlist-event eventlist-event--upcoming"> + * <time class="event-date" datetime="YYYY-MM-DD"> + * <h1 class="eventlist-title"><a class="eventlist-title-link" href="/schedule/YYYYMMDD"> + * DOM 構造 (詳細): + * <div class="sqs-html-content"><p style="white-space:pre-wrap;"> + * ... + * live:\nアーティスト名\n... (または 出演:) + */ +import * as cheerio from "cheerio"; +import type { Scraper, VenueMeta } from "./base"; +import type { EventInput } from "~/lib/db.server"; + +export const venue: VenueMeta = { + id: "mod-shibasaki", + name: "shibasaki mod", + url: "https://shibasakimod.com", + area: "柴崎", +}; + +const SCHEDULE_URL = "https://shibasakimod.com/schedule"; + +interface EventStub { + date: string; + title: string; + startTime: string | null; + sourceUrl: string | null; + imageUrl: string | null; +} + +async function fetchArtist(url: string): Promise<string | null> { + try { + const res = await fetch(url); + if (!res.ok) return null; + const $ = cheerio.load(await res.text()); + // Insert newlines at block boundaries before extracting text + $(".sqs-html-content br").replaceWith("\n"); + $(".sqs-html-content p").each((_, el) => { + $(el).append("\n"); + }); + const text = $(".sqs-html-content").text(); + const extractArtists = (section: string): string[] => { + const artists: string[] = []; + for (const raw of section.split(/\n/)) { + const l = raw.replace(/\s*@\S+/g, "").replace(/^[・•]\s*/, "").trim(); + if (!l) continue; + if (/^https?:\/\//i.test(l)) continue; // skip social links between artists + if (/\d{1,2}:\d{2}|[¥¥]|yen|ticket|チケット|予約|adv|door/i.test(l)) break; + artists.push(l); + } + return artists; + }; + + const match = text.match(/(?:live|出演|act)[::]\s*([\s\S]+)/i); + if (match) { + const artists = extractArtists(match[1]); + if (artists.length > 0) return artists.join(" / "); + } + // Fallback: find the first line starting with ・ and extract from there + const lines = text.split(/\n/); + const firstBullet = lines.findIndex((l) => /^・/.test(l.trim())); + if (firstBullet >= 0) { + const artists = extractArtists(lines.slice(firstBullet).join("\n")); + if (artists.length > 0) return artists.join(" / "); + } + return null; + } catch { + return null; + } +} + +export const scraper: Scraper = { + venue, + async scrape(): Promise<EventInput[]> { + const res = await fetch(SCHEDULE_URL); + if (!res.ok) throw new Error(`HTTP ${res.status}`); + const $ = cheerio.load(await res.text()); + const stubs: EventStub[] = []; + + $("article.eventlist-event--upcoming").each((_, el) => { + const $el = $(el); + + const date = $el.find("time.event-date").first().attr("datetime") ?? ""; + if (!date.match(/^\d{4}-\d{2}-\d{2}$/)) return; + + const title = $el.find("h1.eventlist-title a, h2.eventlist-title a").first().text().trim(); + if (!title) return; + + const startTime = $el.find("time.event-time-localized-start").first().text().trim() || null; + + const relHref = $el.find("a.eventlist-title-link").first().attr("href") ?? null; + const sourceUrl = relHref + ? (relHref.startsWith("http") ? relHref : `${venue.url}${relHref}`) + : null; + + const imageUrl = + $el.find("img[data-src]").first().attr("data-src") ?? + $el.find("img[src]").first().attr("src") ?? null; + + stubs.push({ date, title, startTime, sourceUrl, imageUrl }); + }); + + const artists = await Promise.all( + stubs.map((s) => (s.sourceUrl ? fetchArtist(s.sourceUrl) : Promise.resolve(null))) + ); + + return stubs.map((s, i) => ({ + venue_id: venue.id, + title: s.title, + date: s.date, + open_time: null, + start_time: s.startTime, + price: null, + image_url: s.imageUrl, + source_url: s.sourceUrl, + artist: artists[i], + })); + }, +}; diff --git a/app/scrapers/moon-step-nakano.ts b/app/scrapers/moon-step-nakano.ts new file mode 100644 index 0000000..e67e128 --- /dev/null +++ b/app/scrapers/moon-step-nakano.ts @@ -0,0 +1,85 @@ +/** + * 中野 MOON STEP — https://nakano-dynamite.com/moonstep + * + * WordPress + The Events Calendar プラグイン。REST API で取得。 + * エンドポイント: /moonstep/wp-json/tribe/events/v1/events + * description フィールドは HTML 文字列で、<pre> 内に出演情報が含まれる: + * 出演:\n<LIVE>\nアーティスト\n... + */ +import * as cheerio from "cheerio"; +import type { Scraper, VenueMeta } from "./base"; +import type { EventInput } from "~/lib/db.server"; + +export const venue: VenueMeta = { + id: "moon-step-nakano", + name: "中野 MOON STEP", + url: "https://nakano-dynamite.com/moonstep", + area: "中野", +}; + +const API_URL = "https://nakano-dynamite.com/moonstep/wp-json/tribe/events/v1/events"; + +function parseArtists(descriptionHtml: string): string | null { + const text = cheerio.load(descriptionHtml).text(); + const match = text.match(/出演[::]\s*([\s\S]+)/); + if (!match) return null; + // Stop at first blank line (double \r\n or \n\n) + const section = match[1].split(/\r?\n\r?\n/)[0] ?? ""; + const lines = section + .split(/\r?\n/) + .map((l) => l.replace(/^\s*[<【[&].*?[>】];]\s*/, "").trim()) + .filter((l) => l.length > 0); + return lines.length > 0 ? lines.join(" / ") : null; +} + +export const scraper: Scraper = { + venue, + async scrape(): Promise<EventInput[]> { + const today = new Date().toISOString().slice(0, 10); + const url = `${API_URL}?per_page=50&start_date=${today}`; + const res = await fetch(url); + if (!res.ok) throw new Error(`HTTP ${res.status}`); + + const data = await res.json() as { events: Record<string, unknown>[] }; + const events: EventInput[] = []; + + for (const e of data.events ?? []) { + const startDate = (e.start_date as string | undefined) ?? ""; + const date = startDate.slice(0, 10); + if (!date.match(/^\d{4}-\d{2}-\d{2}$/)) continue; + + const startDetails = e.start_date_details as Record<string, string> | undefined; + const startTime = startDetails + ? `${startDetails.hour}:${startDetails.minutes}` + : null; + + const rawTitle = (e.title as string | undefined) ?? ""; + const title = rawTitle.replace(/&#(\d+);/g, (_, n) => String.fromCharCode(Number(n))) + .replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">").trim(); + if (!title) continue; + + const cost = (e.cost as string | undefined)?.trim() || null; + const sourceUrl = (e.url as string | undefined) ?? null; + const image = e.image as Record<string, unknown> | undefined | false; + const imageUrl = image ? (image.url as string | undefined) ?? null : null; + + const description = (e.description as string | undefined) ?? ""; + const openMatch = description.match(/OPEN\s*(\d{1,2}:\d{2})/i); + const artist = parseArtists(description); + + events.push({ + venue_id: venue.id, + title, + date, + open_time: openMatch?.[1] ?? null, + start_time: startTime && startTime !== "00:00" ? startTime : null, + price: cost, + image_url: imageUrl, + source_url: sourceUrl, + artist, + }); + } + + return events; + }, +}; diff --git a/app/scrapers/nine-spices.ts b/app/scrapers/nine-spices.ts new file mode 100644 index 0000000..f4afa3d --- /dev/null +++ b/app/scrapers/nine-spices.ts @@ -0,0 +1,94 @@ +/** + * Nine Spices (新宿) — https://9spices.rinky.info/schedule/ + * + * WordPress ベースの独自テーマ。構造: + * <div class="event-cont-par YYYY-MM-DD"> + * <h3 class="event-title sch"><a href="...">タイトル</a></h3> + * <div class="event-leftcol" itemprop="startDate" content="YYYY-MM-DDThh:mm"> + * <div class="sch-actlist"><span class="actlist-name">アーティスト</span></div> + * <div class="sch-time"><div><span>OPEN</span><span>hh:mm</span></div><div><span>START</span>...</div></div> + * <div class="sch-price"><div><span>ADV</span><span>¥XXX</span></div></div> + */ +import * as cheerio from "cheerio"; +import type { Scraper, VenueMeta } from "./base"; +import type { EventInput } from "~/lib/db.server"; + +export const venue: VenueMeta = { + id: "nine-spices", + name: "Nine Spices", + url: "https://9spices.rinky.info", + area: "新宿", +}; + +export const scraper: Scraper = { + venue, + async scrape(): Promise<EventInput[]> { + const res = await fetch("https://9spices.rinky.info/schedule/"); + if (!res.ok) throw new Error(`HTTP ${res.status}`); + const $ = cheerio.load(await res.text()); + const events: EventInput[] = []; + + $("div[class*='event-cont-par']").each((_, el) => { + const $el = $(el); + + // class="event-cont-par 2026-05-01" → extract date + const classAttr = $el.attr("class") ?? ""; + const dateMatch = classAttr.match(/(\d{4}-\d{2}-\d{2})/); + if (!dateMatch) return; + const date = dateMatch[1]; + + const $titleLink = $el.find("h3.event-title a").first(); + const title = $titleLink.text().trim(); + if (!title) return; + + const sourceUrl = $titleLink.attr("href") ?? null; + + const artist = $el.find("span.actlist-name") + .map((_, s) => $(s).text().trim()) + .get() + .join("、") || null; + + // <div class="sch-time"><div><span>OPEN</span><span>18:30</span></div>... + let openTime: string | null = null; + let startTime: string | null = null; + $el.find("div.sch-time div").each((_, row) => { + const spans = $(row).find("span"); + const label = spans.eq(0).text().trim().toUpperCase(); + const value = spans.eq(1).text().trim(); + if (label === "OPEN") openTime = value || null; + if (label === "START") startTime = value || null; + }); + + // <div class="sch-price"><div><span>ADV</span><span>¥2,500</span></div>... + const priceParts: string[] = []; + $el.find("div.sch-price div").each((_, row) => { + const spans = $(row).find("span"); + const label = spans.eq(0).text().trim(); + const value = spans.eq(1).text().trim(); + if (label && value) priceParts.push(`${label} ${value}`); + }); + const price = priceParts.length ? priceParts.join(" / ") : null; + + const imageUrl = $el.find("img.wp-post-image").first().attr("src") ?? null; + + const ticketUrl = + $el.find("a[href*='livepocket'], a[href*='eplus'], a[href*='pia'], a[href*='tiget'], a[href*='ticket']") + .first().attr("href") ?? null; + + events.push({ + venue_id: venue.id, + title, + artist, + date, + open_time: openTime, + start_time: startTime, + price, + ticket_url: ticketUrl, + image_url: imageUrl, + source_url: sourceUrl, + }); + }); + + return events; + }, +}; diff --git a/app/scrapers/nishieifuku-jam.ts b/app/scrapers/nishieifuku-jam.ts new file mode 100644 index 0000000..c93b051 --- /dev/null +++ b/app/scrapers/nishieifuku-jam.ts @@ -0,0 +1,75 @@ +/** + * 西永福JAM — https://jam.rinky.info/events + * + * rinky.info プラットフォーム。meets-otsuka と同じ HTML 構造。 + * <div class="blog-entry event-wrap" event-date="YYYY-MM-DD"> + * <h2><a href="/events/ID">タイトル</a></h2> + * <p class="act"><span>アーティスト</span></p> + * <p class="time">OPEN 18:30 / START 19:00</p> + * <span class="ticket-price__label">価格</span> + */ +import * as cheerio from "cheerio"; +import type { Scraper, VenueMeta } from "./base"; +import type { EventInput } from "~/lib/db.server"; + +export const venue: VenueMeta = { + id: "nishieifuku-jam", + name: "西永福JAM", + url: "https://jam.rinky.info", + area: "西永福", +}; + +export const scraper: Scraper = { + venue, + async scrape(): Promise<EventInput[]> { + const res = await fetch("https://jam.rinky.info/events"); + if (!res.ok) throw new Error(`HTTP ${res.status}`); + const $ = cheerio.load(await res.text()); + const events: EventInput[] = []; + + $("div.blog-entry.event-wrap").each((_, el) => { + const $el = $(el); + + const date = $el.attr("event-date") ?? ""; + if (!date.match(/^\d{4}-\d{2}-\d{2}$/)) return; + + const $link = $el.find("h2 a").first(); + const title = $link.text().trim(); + if (!title) return; + + const detailPath = $link.attr("href") ?? null; + const sourceUrl = detailPath ? `${venue.url}${detailPath}` : null; + + const artist = $el.find("p.act span").map((_, s) => $(s).text().trim()).get().join("、") || null; + + const timeText = $el.find("p.time").first().text(); + const openMatch = timeText.match(/OPEN\s*(\d{2}:\d{2})/i); + const startMatch = timeText.match(/START\s*(\d{2}:\d{2})/i); + + const price = $el.find("span.ticket-price__label").first().text().trim() || null; + + const bgStyle = $el.find("div.image-bg").attr("style") ?? ""; + const imgMatch = bgStyle.match(/url\(["']?([^"')]+)["']?\)/); + const imageUrl = imgMatch?.[1] ?? null; + + const ticketUrl = + $el.find("a[href*='livepocket'], a[href*='eplus'], a[href*='pia'], a[href*='ticket'], a[href*='tiget']") + .first().attr("href") ?? null; + + events.push({ + venue_id: venue.id, + title, + artist, + date, + open_time: openMatch?.[1] ?? null, + start_time: startMatch?.[1] ?? null, + price, + ticket_url: ticketUrl, + image_url: imageUrl, + source_url: sourceUrl, + }); + }); + + return events; + }, +}; diff --git a/app/scrapers/shibuya-o.ts b/app/scrapers/shibuya-o.ts index 1ad8d8c..3d6f192 100644 --- a/app/scrapers/shibuya-o.ts +++ b/app/scrapers/shibuya-o.ts @@ -1,8 +1,15 @@ /** - * Shibuya O-East / O-West / O-Crest / O-Nest (渋谷) - * https://www.shibuya-o.com/schedule/ + * 渋谷 O-EAST / O-WEST / O-Crest / O-nest — https://shibuya-o.com * - * The page uses a unified schedule listing for all O venues. + * 各ベニューのスケジュールページを個別に取得して統合する。 + * DOM 構造 (共通): + * <div class="p-scheduled-card"> + * <a href="https://shibuya-o.com/{venue}/schedule/{slug}/"> + * <span class="p-scheduled-card__date-item">05 / 01</span> + * <span class="p-scheduled-card__date-open">OPEN 18:00 / START 19:00</span> + * <span class="p-scheduled-card__title-main">タイトル</span> + * <li class="p-scheduled-card__artist-item">アーティスト</li> + * 年は nav リンク <a href="/east/schedule/?y=2026&m=6"> から取得。 */ import * as cheerio from "cheerio"; import type { Scraper, VenueMeta } from "./base"; @@ -10,73 +17,99 @@ import type { EventInput } from "~/lib/db.server"; export const venue: VenueMeta = { id: "shibuya-o", - name: "渋谷 O-EAST / O-WEST", - url: "https://www.shibuya-o.com", + name: "渋谷 O-EAST / O-WEST / O-Crest / O-nest", + url: "https://shibuya-o.com", area: "渋谷", }; -export const scraper: Scraper = { - venue, - async scrape(): Promise<EventInput[]> { - const res = await fetch("https://www.shibuya-o.com/schedule/"); - if (!res.ok) throw new Error(`HTTP ${res.status}`); - const html = await res.text(); - const $ = cheerio.load(html); - const events: EventInput[] = []; +const SUB_VENUES = ["east", "west", "crest", "nest"]; +const BASE = "https://shibuya-o.com"; - $(".schedule_list li, .c-schedule__item, .event-item").each((_, el) => { - const $el = $(el); +async function scrapeVenue(subVenue: string): Promise<EventInput[]> { + const url = `${BASE}/${subVenue}/schedule/`; + const res = await fetch(url); + if (!res.ok) throw new Error(`HTTP ${res.status} for ${url}`); + const $ = cheerio.load(await res.text()); + const events: EventInput[] = []; - const title = $el.find(".schedule_title, .title, h3").first().text().trim(); - if (!title) return; + // Extract year: try "next" nav link (?y=YYYY&m=MM) + const nextHref = $("a[href*='?y='][href*='&m=']").last().attr("href") ?? ""; + const nextYearMatch = nextHref.match(/[?&]y=(\d{4})/); + const nextMonthMatch = nextHref.match(/[?&]m=(\d{1,2})/); + const currentMonthRaw = $("div.p-schedule__month").first().text().trim(); + const currentMonth = parseInt(currentMonthRaw, 10); - const rawDate = - $el.find(".schedule_date, .date, time").first().text().trim() || - $el.find("time").attr("datetime") || - ""; - const date = parseJapaneseDate(rawDate); - if (!date) return; + let year = new Date().getFullYear(); + if (nextYearMatch && nextMonthMatch) { + const nextYear = parseInt(nextYearMatch[1], 10); + const nextMonth = parseInt(nextMonthMatch[1], 10); + // If next month == current month + 1 (normal case), year == nextYear + // If current month == 12 and next month == 1, year == nextYear - 1 + year = nextMonth === currentMonth + 1 ? nextYear : nextYear - 1; + } - const hall = $el.find(".schedule_hall, .hall, .venue-name").first().text().trim() || null; - const timeText = $el.find(".schedule_time, .time").first().text(); - const openMatch = timeText.match(/OPEN[:: ]*(\d{2}:\d{2})/i); - const startMatch = timeText.match(/START[:: ]*(\d{2}:\d{2})/i); + $("div.p-scheduled-card").each((_, el) => { + const $el = $(el); - const detailHref = $el.find("a[href]").first().attr("href") ?? null; + const dateRaw = $el.find("span.p-scheduled-card__date-item").first().text().trim(); + // "05 / 01" → month=5, day=1 + const dateMatch = dateRaw.match(/(\d{1,2})\s*\/\s*(\d{1,2})/); + if (!dateMatch) return; + const month = parseInt(dateMatch[1], 10); + const day = parseInt(dateMatch[2], 10); + if (!currentMonth || !month) return; + // Handle year rollover (December cards on January page, etc.) + const cardYear = month < currentMonth ? year + 1 : year; + const date = `${cardYear}-${String(month).padStart(2, "0")}-${String(day).padStart(2, "0")}`; - events.push({ - venue_id: venue.id, - title, - artist: hall, - date, - open_time: openMatch?.[1] ?? null, - start_time: startMatch?.[1] ?? null, - ticket_url: - $el.find("a[href*='eplus'], a[href*='lawson'], a[href*='ticket']").first().attr("href") ?? null, - image_url: $el.find("img").first().attr("src") - ? absoluteUrl($el.find("img").first().attr("src")!, venue.url) - : null, - source_url: detailHref ? absoluteUrl(detailHref, venue.url) : null, - }); - }); + const title = $el.find("span.p-scheduled-card__title-main").first().text().trim(); + if (!title) return; - return events; - }, -}; + const openText = $el.find("span.p-scheduled-card__date-open").first().text().trim(); + const openMatch = openText.match(/OPEN\s*(\d{2}:\d{2})/i); + const startMatch = openText.match(/START\s*(\d{2}:\d{2})/i); -function parseJapaneseDate(raw: string): string | null { - const m = - raw.match(/(\d{4})[./年](\d{1,2})[./月](\d{1,2})/) || - raw.match(/(\d{1,2})[./月](\d{1,2})/); - if (!m) return null; - if (m.length === 4) { - return `${m[1]}-${m[2].padStart(2, "0")}-${m[3].padStart(2, "0")}`; - } - const year = new Date().getFullYear(); - return `${year}-${m[1].padStart(2, "0")}-${m[2].padStart(2, "0")}`; -} + const artists = $el.find("li.p-scheduled-card__artist-item") + .map((_, s) => $(s).text().trim()).get().join("、") || null; + + const detailHref = $el.closest("a[href]").attr("href") ?? + $el.find("a[href]").first().attr("href") ?? null; + const sourceUrl = detailHref + ? (detailHref.startsWith("http") ? detailHref : `${BASE}${detailHref}`) + : null; -function absoluteUrl(url: string, base: string): string { - if (url.startsWith("http")) return url; - return url.startsWith("/") ? base + url : `${base}/${url}`; + const imageUrl = $el.find("figure img").first().attr("src") ?? null; + + events.push({ + venue_id: venue.id, + title, + artist: artists, + date, + open_time: openMatch?.[1] ?? null, + start_time: startMatch?.[1] ?? null, + image_url: imageUrl, + source_url: sourceUrl, + }); + }); + + return events; } + +export const scraper: Scraper = { + venue, + async scrape(): Promise<EventInput[]> { + const results = await Promise.allSettled(SUB_VENUES.map(scrapeVenue)); + const all: EventInput[] = []; + for (const r of results) { + if (r.status === "fulfilled") all.push(...r.value); + } + // Deduplicate by date + title + const seen = new Set<string>(); + return all.filter((e) => { + const key = `${e.date}|${e.title}`; + if (seen.has(key)) return false; + seen.add(key); + return true; + }); + }, +}; diff --git a/app/scrapers/warp-kichijoji.ts b/app/scrapers/warp-kichijoji.ts index 8a828ea..8929fef 100644 --- a/app/scrapers/warp-kichijoji.ts +++ b/app/scrapers/warp-kichijoji.ts @@ -76,10 +76,28 @@ export const scraper: Scraper = { ? rawImg.replace(/^https?:\/\/sp-ao\.shortpixel\.ai\/client\/[^/]+\//, "") : null; + // Artists in <div class="w-flyer"> separated by <br> + // notes-wrapper and detail-texts are nested inside w-flyer — clone and strip them + const $wFlyer = $el.find("div.w-flyer").first().clone(); + $wFlyer.find("section.notes-wrapper, div.detail-texts").remove(); + $wFlyer.find("br").replaceWith("\n"); + const rawArtist = $wFlyer.text(); + const artistLines: string[] = []; + for (const raw of rawArtist.split("\n")) { + const l = raw.trim(); + if (!l) { + if (artistLines.length > 0) break; // stop at first blank line after artists + continue; + } + if (/^[■▼◼▶◆]|チケット|ticket|TICKET|予約|http|\d{1,2}:\d{2}|[¥¥]/i.test(l)) break; + artistLines.push(l); + } + const artist = artistLines.length > 0 ? artistLines.join(" / ") : null; + events.push({ venue_id: venue.id, title, - artist: null, + artist, date, open_time: isTime(openTime) ? openTime : null, start_time: isTime(startTime) ? startTime : null, |
