diff options
| author | Pinapelz <yukais@pinapelz.com> | 2025-04-15 00:24:35 -0700 |
|---|---|---|
| committer | Pinapelz <yukais@pinapelz.com> | 2025-04-15 00:24:35 -0700 |
| commit | 91f4a6ba665ff92a759758bec5ae13528da6a3c1 (patch) | |
| tree | adf00e9c076500c930fcfa7e46ce3675156a2eb4 /sega | |
| parent | edf5d77f1c44660e2e49c69635193df3ed715325 (diff) | |
add parser for CHUNITHM INTL
Diffstat (limited to 'sega')
| -rw-r--r-- | sega/chuni_intl.py | 59 |
1 files changed, 59 insertions, 0 deletions
diff --git a/sega/chuni_intl.py b/sega/chuni_intl.py new file mode 100644 index 0000000..1421773 --- /dev/null +++ b/sega/chuni_intl.py @@ -0,0 +1,59 @@ +from bs4 import BeautifulSoup +from datetime import datetime, timezone, timedelta +from urllib.parse import urljoin +import re +from enum import Enum + +class ParserVersion(Enum): + ALPHA=1 + +def make_chuni_intl_parser(identifier: str, parser: ParserVersion): + def alpha_parser(html: str): + """ + Confirmed on: + LUMINOUS PLUS + """ + soup = BeautifulSoup(html, "html.parser") + base_url = "https://info-chunithm.sega.com/" + items = soup.select("li.news--list__item") + results = [] + + for item in items: + a_tag = item.select_one("a.news--list__post") + if not a_tag: + continue + + url = urljoin(base_url, a_tag["href"]) + date_text = item.select_one("div.news--date").text.strip() + headline = item.select_one("p.news--title").text.strip() + img_tag = item.select_one("div.news--thumbnail img") + image_url = urljoin(base_url, img_tag["src"]) if img_tag else None + + date_match = re.match(r"(\d{4})\.(\d{1,2})\.(\d{1,2})", date_text) + if not date_match: + continue + year, month, day = map(int, date_match.groups()) + jst = timezone(timedelta(hours=9)) + dt = datetime(year, month, day, tzinfo=jst) + timestamp = int(dt.timestamp()) + + results.append({ + "date": dt.strftime("%Y-%m-%d"), + "identifier": identifier, + "type": None, + "timestamp": timestamp, + "headline": headline, + "content": headline, + "url": url, + "images": [{ + "image": image_url, + "link": url + }] if image_url else [] + }) + + return results + + if parser == ParserVersion.ALPHA + return alpha_parser + +parse_chuni_intl_luminous_plus_news_site = make_chuni_intl_parser("CHUNITHM_INTL_LUMINOUS_PLUS", ParserVersion.ALPHA) |
