From d5e975b601e70adf901c8e1eb7e61f0388941195 Mon Sep 17 00:00:00 2001 From: yyamashita Date: Thu, 7 May 2026 19:27:50 +0900 Subject: Add 5 new venue scrapers; extract artist info for WARP, shibuya-o, MOON STEP, mod MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New scrapers: Fever 下北沢, Nine Spices 下北沢, 西荻窪 JAM, mod 柴崎, 中野 MOON STEP Artist extraction added/fixed: - warp-kichijoji: parse div.w-flyer (clone + remove nested notes-wrapper) - shibuya-o: rewrite to scrape each sub-venue; artist from li.p-scheduled-card__artist-item - moon-step-nakano: parse 出演 section from WordPress API description HTML - mod-shibasaki: fetch individual event pages in parallel; handle live:/出演:/・ bullet formats Co-Authored-By: Claude Sonnet 4.6 --- app/scrapers/warp-kichijoji.ts | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) (limited to 'app/scrapers/warp-kichijoji.ts') diff --git a/app/scrapers/warp-kichijoji.ts b/app/scrapers/warp-kichijoji.ts index 8a828ea..8929fef 100644 --- a/app/scrapers/warp-kichijoji.ts +++ b/app/scrapers/warp-kichijoji.ts @@ -76,10 +76,28 @@ export const scraper: Scraper = { ? rawImg.replace(/^https?:\/\/sp-ao\.shortpixel\.ai\/client\/[^/]+\//, "") : null; + // Artists in
separated by
+ // notes-wrapper and detail-texts are nested inside w-flyer — clone and strip them + const $wFlyer = $el.find("div.w-flyer").first().clone(); + $wFlyer.find("section.notes-wrapper, div.detail-texts").remove(); + $wFlyer.find("br").replaceWith("\n"); + const rawArtist = $wFlyer.text(); + const artistLines: string[] = []; + for (const raw of rawArtist.split("\n")) { + const l = raw.trim(); + if (!l) { + if (artistLines.length > 0) break; // stop at first blank line after artists + continue; + } + if (/^[■▼◼▶◆]|チケット|ticket|TICKET|予約|http|\d{1,2}:\d{2}|[¥¥]/i.test(l)) break; + artistLines.push(l); + } + const artist = artistLines.length > 0 ? artistLines.join(" / ") : null; + events.push({ venue_id: venue.id, title, - artist: null, + artist, date, open_time: isTime(openTime) ? openTime : null, start_time: isTime(startTime) ? startTime : null, -- cgit v1.2.3