aboutsummaryrefslogtreecommitdiffstats
path: root/sega/chuni_jp.py
diff options
context:
space:
mode:
authorPinapelz <yukais@pinapelz.com>2025-04-15 00:25:29 -0700
committerPinapelz <yukais@pinapelz.com>2025-04-15 00:25:29 -0700
commit5bf27feebd8087932de138bda1a4605acc95bef4 (patch)
tree677dc1cafffaf7b9416e2307d2ba07a442662062 /sega/chuni_jp.py
parent91f4a6ba665ff92a759758bec5ae13528da6a3c1 (diff)
refactor sega games to follow function factory design
unlikely to be much change between each game
Diffstat (limited to 'sega/chuni_jp.py')
-rw-r--r--sega/chuni_jp.py111
1 files changed, 63 insertions, 48 deletions
diff --git a/sega/chuni_jp.py b/sega/chuni_jp.py
index bdbe800..981fb8f 100644
--- a/sega/chuni_jp.py
+++ b/sega/chuni_jp.py
@@ -2,54 +2,69 @@ from bs4 import BeautifulSoup
from datetime import datetime, timezone, timedelta
from urllib.parse import urljoin
import re
+from enum import Enum
-def parse_chuni_jp_verse_news_site(html: str):
- soup = BeautifulSoup(html, "html.parser")
- news_entries = []
- news_wrapper = soup.find("div", class_="newsMainWrapper-left")
- if not news_wrapper:
- return news_entries
- for a_tag in news_wrapper.find_all("a", href=True):
- if not a_tag.find("div", class_="chuniCommonBox-inner"):
- continue
- news_dict = {}
- news_url = a_tag.get("href")
- news_dict["url"] = news_url
- date_container = a_tag.find("div", class_="chuniCommonBox-inner-title")
- date_str = None
- if date_container:
- title_span = date_container.find("span", class_="title")
- if title_span:
- text = title_span.get_text(strip=True)
- date_match = re.search(r"(\d{4}\.\d{2}\.\d{2})", text)
- if date_match:
- date_str = date_match.group(1)
- news_dict["date"] = date_str
- news_dict["type"] = None
- timestamp = None
- if date_str:
- try:
- dt = datetime.strptime(date_str, "%Y.%m.%d")
- dt = dt.replace(tzinfo=timezone(timedelta(hours=9)))
- timestamp = int(dt.timestamp())
- except Exception:
- timestamp = None
- news_dict["timestamp"] = timestamp
- main_content = a_tag.find("div", class_="chuniCommonBox-inner-main")
- headline = None
- content_text = ""
- if main_content:
- content_text = main_content.get_text(separator=" ", strip=True)
- news_dict["content"] = content_text
- images = {"image": None, "link": None}
- if main_content:
- img_tag = main_content.find("img")
- if img_tag:
- images["image"] = img_tag.get("src")
- images["link"] = news_url
- news_dict["images"] = [images]
- news_dict["identifier"] = "CHUNITHM_JP_VERSE"
+class ParserVersion(Enum):
+ ALPHA=1
+
+def make_chuni_jp_parser(identifier: str, parser: ParserVersion):
+ def alpha_parser(html: str):
+ """
+ Confirmed on:
+ VERSE
+ """
+ soup = BeautifulSoup(html, "html.parser")
+ news_entries = []
+ news_wrapper = soup.find("div", class_="newsMainWrapper-left")
+ if not news_wrapper:
+ return news_entries
+ for a_tag in news_wrapper.find_all("a", href=True):
+ if not a_tag.find("div", class_="chuniCommonBox-inner"):
+ continue
+ news_dict = {}
+ news_url = a_tag.get("href")
+ news_dict["url"] = news_url
+
+ date_container = a_tag.find("div", class_="chuniCommonBox-inner-title")
+ date_str = None
+ if date_container:
+ title_span = date_container.find("span", class_="title")
+ if title_span:
+ text = title_span.get_text(strip=True)
+ date_match = re.search(r"(\d{4}\.\d{2}\.\d{2})", text)
+ if date_match:
+ date_str = date_match.group(1)
+ news_dict["date"] = date_str
+ news_dict["type"] = None
+ timestamp = None
+ if date_str:
+ try:
+ dt = datetime.strptime(date_str, "%Y.%m.%d")
+ dt = dt.replace(tzinfo=timezone(timedelta(hours=9)))
+ timestamp = int(dt.timestamp())
+ except Exception:
+ timestamp = None
+ news_dict["timestamp"] = timestamp
+
+ main_content = a_tag.find("div", class_="chuniCommonBox-inner-main")
+ content_text = ""
+ if main_content:
+ content_text = main_content.get_text(separator=" ", strip=True)
+ news_dict["content"] = content_text
- news_entries.append(news_dict)
+ images = {"image": None, "link": None}
+ if main_content:
+ img_tag = main_content.find("img")
+ if img_tag:
+ images["image"] = img_tag.get("src")
+ images["link"] = news_url
+ news_dict["images"] = [images]
+ news_dict["identifier"] = identifier
+
+ news_entries.append(news_dict)
+
+ return news_entries
+ if parser == ParserVersion.ALPHA:
+ return alpha_parser
- return news_entries
+parse_chuni_jp_verse_news_site = make_chuni_jp_parser("CHUNITHM_JP_VERSE", ParserVersion.ALPHA)
send patches to the email below
yukais@pinapelz.com
include the subject [PATCH repo_name]
pinapelz.com
homepage