diff options
| author | Pinapelz <yukais@pinapelz.com> | 2025-04-15 00:25:29 -0700 |
|---|---|---|
| committer | Pinapelz <yukais@pinapelz.com> | 2025-04-15 00:25:29 -0700 |
| commit | 5bf27feebd8087932de138bda1a4605acc95bef4 (patch) | |
| tree | 677dc1cafffaf7b9416e2307d2ba07a442662062 /sega/chuni_jp.py | |
| parent | 91f4a6ba665ff92a759758bec5ae13528da6a3c1 (diff) | |
refactor sega games to follow function factory design
unlikely to be much change between each game
Diffstat (limited to 'sega/chuni_jp.py')
| -rw-r--r-- | sega/chuni_jp.py | 111 |
1 files changed, 63 insertions, 48 deletions
diff --git a/sega/chuni_jp.py b/sega/chuni_jp.py index bdbe800..981fb8f 100644 --- a/sega/chuni_jp.py +++ b/sega/chuni_jp.py @@ -2,54 +2,69 @@ from bs4 import BeautifulSoup from datetime import datetime, timezone, timedelta from urllib.parse import urljoin import re +from enum import Enum -def parse_chuni_jp_verse_news_site(html: str): - soup = BeautifulSoup(html, "html.parser") - news_entries = [] - news_wrapper = soup.find("div", class_="newsMainWrapper-left") - if not news_wrapper: - return news_entries - for a_tag in news_wrapper.find_all("a", href=True): - if not a_tag.find("div", class_="chuniCommonBox-inner"): - continue - news_dict = {} - news_url = a_tag.get("href") - news_dict["url"] = news_url - date_container = a_tag.find("div", class_="chuniCommonBox-inner-title") - date_str = None - if date_container: - title_span = date_container.find("span", class_="title") - if title_span: - text = title_span.get_text(strip=True) - date_match = re.search(r"(\d{4}\.\d{2}\.\d{2})", text) - if date_match: - date_str = date_match.group(1) - news_dict["date"] = date_str - news_dict["type"] = None - timestamp = None - if date_str: - try: - dt = datetime.strptime(date_str, "%Y.%m.%d") - dt = dt.replace(tzinfo=timezone(timedelta(hours=9))) - timestamp = int(dt.timestamp()) - except Exception: - timestamp = None - news_dict["timestamp"] = timestamp - main_content = a_tag.find("div", class_="chuniCommonBox-inner-main") - headline = None - content_text = "" - if main_content: - content_text = main_content.get_text(separator=" ", strip=True) - news_dict["content"] = content_text - images = {"image": None, "link": None} - if main_content: - img_tag = main_content.find("img") - if img_tag: - images["image"] = img_tag.get("src") - images["link"] = news_url - news_dict["images"] = [images] - news_dict["identifier"] = "CHUNITHM_JP_VERSE" +class ParserVersion(Enum): + ALPHA=1 + +def make_chuni_jp_parser(identifier: str, parser: ParserVersion): + def alpha_parser(html: str): + """ + Confirmed on: + VERSE + """ + soup = BeautifulSoup(html, "html.parser") + news_entries = [] + news_wrapper = soup.find("div", class_="newsMainWrapper-left") + if not news_wrapper: + return news_entries + for a_tag in news_wrapper.find_all("a", href=True): + if not a_tag.find("div", class_="chuniCommonBox-inner"): + continue + news_dict = {} + news_url = a_tag.get("href") + news_dict["url"] = news_url + + date_container = a_tag.find("div", class_="chuniCommonBox-inner-title") + date_str = None + if date_container: + title_span = date_container.find("span", class_="title") + if title_span: + text = title_span.get_text(strip=True) + date_match = re.search(r"(\d{4}\.\d{2}\.\d{2})", text) + if date_match: + date_str = date_match.group(1) + news_dict["date"] = date_str + news_dict["type"] = None + timestamp = None + if date_str: + try: + dt = datetime.strptime(date_str, "%Y.%m.%d") + dt = dt.replace(tzinfo=timezone(timedelta(hours=9))) + timestamp = int(dt.timestamp()) + except Exception: + timestamp = None + news_dict["timestamp"] = timestamp + + main_content = a_tag.find("div", class_="chuniCommonBox-inner-main") + content_text = "" + if main_content: + content_text = main_content.get_text(separator=" ", strip=True) + news_dict["content"] = content_text - news_entries.append(news_dict) + images = {"image": None, "link": None} + if main_content: + img_tag = main_content.find("img") + if img_tag: + images["image"] = img_tag.get("src") + images["link"] = news_url + news_dict["images"] = [images] + news_dict["identifier"] = identifier + + news_entries.append(news_dict) + + return news_entries + if parser == ParserVersion.ALPHA: + return alpha_parser - return news_entries +parse_chuni_jp_verse_news_site = make_chuni_jp_parser("CHUNITHM_JP_VERSE", ParserVersion.ALPHA) |
