summaryrefslogtreecommitdiff
path: root/app/scrapers/shinjuku-loft.ts
diff options
context:
space:
mode:
Diffstat (limited to 'app/scrapers/shinjuku-loft.ts')
-rw-r--r--app/scrapers/shinjuku-loft.ts86
1 files changed, 35 insertions, 51 deletions
diff --git a/app/scrapers/shinjuku-loft.ts b/app/scrapers/shinjuku-loft.ts
index 8a64761..d5602e7 100644
--- a/app/scrapers/shinjuku-loft.ts
+++ b/app/scrapers/shinjuku-loft.ts
@@ -1,8 +1,3 @@
-/**
- * 新宿 LOFT — https://www.loft-prj.co.jp/schedule/loft
- *
- * The schedule page renders events inside `.eventlist` items.
- */
import * as cheerio from "cheerio";
import type { Scraper, VenueMeta } from "./base";
import type { EventInput } from "~/lib/db.server";
@@ -22,59 +17,48 @@ export const scraper: Scraper = {
const html = await res.text();
const $ = cheerio.load(html);
const events: EventInput[] = [];
+ const seen = new Set<string>();
- $(".eventlist__item, .schedule-item, .event_list li").each((_, el) => {
- const $el = $(el);
+ $("section.block_schedule_list a[href*='/schedule/loft/schedule/']").each(
+ (_, el) => {
+ const $el = $(el);
+ const href = $el.attr("href") ?? "";
+ if (seen.has(href)) return;
+ seen.add(href);
- const title = $el.find(".eventlist__title, .event-title, h3, h2").first().text().trim();
- if (!title) return;
+ const year = $el.find("time div.year").text().trim();
+ const month = $el.find("time div.month").text().trim();
+ const day = $el.find("time div.day").text().trim();
+ if (!year || !month || !day) return;
+ const date = `${year}-${month.padStart(2, "0")}-${day.padStart(2, "0")}`;
- const rawDate =
- $el.find(".eventlist__date, .event-date, time").first().text().trim() ||
- $el.find("time").attr("datetime") ||
- "";
- const date = parseJapaneseDate(rawDate);
- if (!date) return;
+ const title = $el.find(".c_title span").text().trim();
+ if (!title) return;
- const timeText = $el.find(".eventlist__time, .time").first().text();
- const openMatch = timeText.match(/OPEN[:: ]*(\d{2}:\d{2})/i);
- const startMatch = timeText.match(/START[:: ]*(\d{2}:\d{2})/i);
+ const timeText = $el.find(".open").text().trim();
+ const openMatch = timeText.match(/OPEN\s*(\d{2}:\d{2})/i);
+ const startMatch = timeText.match(/START\s*(\d{2}:\d{2})/i);
- const detailHref = $el.find("a[href]").first().attr("href") ?? null;
+ const artists = $el
+ .find("ul.artist_tag li")
+ .map((_, li) => $(li).text().trim())
+ .get()
+ .filter((a) => a !== "...");
+ const artist = artists.join(" / ") || null;
- events.push({
- venue_id: venue.id,
- title,
- artist: $el.find(".eventlist__artist, .artist").first().text().trim() || null,
- date,
- open_time: openMatch?.[1] ?? null,
- start_time: startMatch?.[1] ?? null,
- ticket_url:
- $el.find("a[href*='eplus'], a[href*='pia'], a[href*='ticket']").first().attr("href") ?? null,
- image_url: $el.find("img").first().attr("src")
- ? absoluteUrl($el.find("img").first().attr("src")!, venue.url)
- : null,
- source_url: detailHref ? absoluteUrl(detailHref, venue.url) : null,
- });
- });
+ events.push({
+ venue_id: venue.id,
+ title,
+ artist,
+ date,
+ open_time: openMatch?.[1] ?? null,
+ start_time: startMatch?.[1] ?? null,
+ image_url: $el.find("span.bg").attr("data-bg") ?? null,
+ source_url: href,
+ });
+ }
+ );
return events;
},
};
-
-function parseJapaneseDate(raw: string): string | null {
- const m =
- raw.match(/(\d{4})[./年](\d{1,2})[./月](\d{1,2})/) ||
- raw.match(/(\d{1,2})[./月](\d{1,2})/);
- if (!m) return null;
- if (m.length === 4) {
- return `${m[1]}-${m[2].padStart(2, "0")}-${m[3].padStart(2, "0")}`;
- }
- const year = new Date().getFullYear();
- return `${year}-${m[1].padStart(2, "0")}-${m[2].padStart(2, "0")}`;
-}
-
-function absoluteUrl(url: string, base: string): string {
- if (url.startsWith("http")) return url;
- return url.startsWith("/") ? base + url : `${base}/${url}`;
-}