aboutsummaryrefslogtreecommitdiffstats
path: root/sega
diff options
context:
space:
mode:
authorPinapelz <yukais@pinapelz.com>2025-04-15 00:24:35 -0700
committerPinapelz <yukais@pinapelz.com>2025-04-15 00:24:35 -0700
commit91f4a6ba665ff92a759758bec5ae13528da6a3c1 (patch)
treeadf00e9c076500c930fcfa7e46ce3675156a2eb4 /sega
parentedf5d77f1c44660e2e49c69635193df3ed715325 (diff)
add parser for CHUNITHM INTL
Diffstat (limited to 'sega')
-rw-r--r--sega/chuni_intl.py59
1 files changed, 59 insertions, 0 deletions
diff --git a/sega/chuni_intl.py b/sega/chuni_intl.py
new file mode 100644
index 0000000..1421773
--- /dev/null
+++ b/sega/chuni_intl.py
@@ -0,0 +1,59 @@
+from bs4 import BeautifulSoup
+from datetime import datetime, timezone, timedelta
+from urllib.parse import urljoin
+import re
+from enum import Enum
+
+class ParserVersion(Enum):
+ ALPHA=1
+
+def make_chuni_intl_parser(identifier: str, parser: ParserVersion):
+ def alpha_parser(html: str):
+ """
+ Confirmed on:
+ LUMINOUS PLUS
+ """
+ soup = BeautifulSoup(html, "html.parser")
+ base_url = "https://info-chunithm.sega.com/"
+ items = soup.select("li.news--list__item")
+ results = []
+
+ for item in items:
+ a_tag = item.select_one("a.news--list__post")
+ if not a_tag:
+ continue
+
+ url = urljoin(base_url, a_tag["href"])
+ date_text = item.select_one("div.news--date").text.strip()
+ headline = item.select_one("p.news--title").text.strip()
+ img_tag = item.select_one("div.news--thumbnail img")
+ image_url = urljoin(base_url, img_tag["src"]) if img_tag else None
+
+ date_match = re.match(r"(\d{4})\.(\d{1,2})\.(\d{1,2})", date_text)
+ if not date_match:
+ continue
+ year, month, day = map(int, date_match.groups())
+ jst = timezone(timedelta(hours=9))
+ dt = datetime(year, month, day, tzinfo=jst)
+ timestamp = int(dt.timestamp())
+
+ results.append({
+ "date": dt.strftime("%Y-%m-%d"),
+ "identifier": identifier,
+ "type": None,
+ "timestamp": timestamp,
+ "headline": headline,
+ "content": headline,
+ "url": url,
+ "images": [{
+ "image": image_url,
+ "link": url
+ }] if image_url else []
+ })
+
+ return results
+
+ if parser == ParserVersion.ALPHA
+ return alpha_parser
+
+parse_chuni_intl_luminous_plus_news_site = make_chuni_intl_parser("CHUNITHM_INTL_LUMINOUS_PLUS", ParserVersion.ALPHA)
send patches to the email below
yukais@pinapelz.com
include the subject [PATCH repo_name]
pinapelz.com
homepage