refactor sega games to follow function factory design

unlikely to be much change between each game
author: Pinapelz <yukais@pinapelz.com> 2025-04-15 00:25:29 -0700
committer: Pinapelz <yukais@pinapelz.com> 2025-04-15 00:25:29 -0700
commit: 5bf27feebd8087932de138bda1a4605acc95bef4 (patch)
tree: 677dc1cafffaf7b9416e2307d2ba07a442662062 /sega/chuni_jp.py
parent: 91f4a6ba665ff92a759758bec5ae13528da6a3c1 (diff)
1 files changed, 63 insertions, 48 deletions
diff --git a/sega/chuni_jp.py b/sega/chuni_jp.py
index bdbe800..981fb8f 100644
--- a/sega/chuni_jp.py
+++ b/sega/chuni_jp.py
@@ -2,54 +2,69 @@ from bs4 import BeautifulSoup
 from datetime import datetime, timezone, timedelta
 from urllib.parse import urljoin
 import re
+from enum import Enum
 
-def parse_chuni_jp_verse_news_site(html: str):
-    soup = BeautifulSoup(html, "html.parser")
-    news_entries = []
-    news_wrapper = soup.find("div", class_="newsMainWrapper-left")
-    if not news_wrapper:
-        return news_entries
-    for a_tag in news_wrapper.find_all("a", href=True):
-        if not a_tag.find("div", class_="chuniCommonBox-inner"):
-            continue
-        news_dict = {}
-        news_url = a_tag.get("href")
-        news_dict["url"] = news_url
-        date_container = a_tag.find("div", class_="chuniCommonBox-inner-title")
-        date_str = None
-        if date_container:
-            title_span = date_container.find("span", class_="title")
-            if title_span:
-                text = title_span.get_text(strip=True)
-                date_match = re.search(r"(\d{4}\.\d{2}\.\d{2})", text)
-                if date_match:
-                    date_str = date_match.group(1)
-        news_dict["date"] = date_str
-        news_dict["type"] = None
-        timestamp = None
-        if date_str:
-            try:
-                dt = datetime.strptime(date_str, "%Y.%m.%d")
-                dt = dt.replace(tzinfo=timezone(timedelta(hours=9)))
-                timestamp = int(dt.timestamp())
-            except Exception:
-                timestamp = None
-        news_dict["timestamp"] = timestamp
-        main_content = a_tag.find("div", class_="chuniCommonBox-inner-main")
-        headline = None
-        content_text = ""
-        if main_content:
-            content_text = main_content.get_text(separator=" ", strip=True)
-        news_dict["content"] = content_text
-        images = {"image": None, "link": None}
-        if main_content:
-            img_tag = main_content.find("img")
-            if img_tag:
-                images["image"] = img_tag.get("src")
-                images["link"] = news_url
-        news_dict["images"] = [images]
-        news_dict["identifier"] = "CHUNITHM_JP_VERSE"
+class ParserVersion(Enum):
+    ALPHA=1
+
+def make_chuni_jp_parser(identifier: str, parser: ParserVersion):
+    def alpha_parser(html: str):
+        """
+        Confirmed on:
+        VERSE
+        """
+        soup = BeautifulSoup(html, "html.parser")
+        news_entries = []
+        news_wrapper = soup.find("div", class_="newsMainWrapper-left")
+        if not news_wrapper:
+            return news_entries
+        for a_tag in news_wrapper.find_all("a", href=True):
+            if not a_tag.find("div", class_="chuniCommonBox-inner"):
+                continue
+            news_dict = {}
+            news_url = a_tag.get("href")
+            news_dict["url"] = news_url
+
+            date_container = a_tag.find("div", class_="chuniCommonBox-inner-title")
+            date_str = None
+            if date_container:
+                title_span = date_container.find("span", class_="title")
+                if title_span:
+                    text = title_span.get_text(strip=True)
+                    date_match = re.search(r"(\d{4}\.\d{2}\.\d{2})", text)
+                    if date_match:
+                        date_str = date_match.group(1)
+            news_dict["date"] = date_str
+            news_dict["type"] = None
+            timestamp = None
+            if date_str:
+                try:
+                    dt = datetime.strptime(date_str, "%Y.%m.%d")
+                    dt = dt.replace(tzinfo=timezone(timedelta(hours=9)))
+                    timestamp = int(dt.timestamp())
+                except Exception:
+                    timestamp = None
+            news_dict["timestamp"] = timestamp
+
+            main_content = a_tag.find("div", class_="chuniCommonBox-inner-main")
+            content_text = ""
+            if main_content:
+                content_text = main_content.get_text(separator=" ", strip=True)
+            news_dict["content"] = content_text
 
-        news_entries.append(news_dict)
+            images = {"image": None, "link": None}
+            if main_content:
+                img_tag = main_content.find("img")
+                if img_tag:
+                    images["image"] = img_tag.get("src")
+                    images["link"] = news_url
+            news_dict["images"] = [images]
+            news_dict["identifier"] = identifier
+
+            news_entries.append(news_dict)
+
+        return news_entries
+    if parser == ParserVersion.ALPHA:
+        return alpha_parser
 
-    return news_entries
+parse_chuni_jp_verse_news_site = make_chuni_jp_parser("CHUNITHM_JP_VERSE", ParserVersion.ALPHA)
author	Pinapelz <yukais@pinapelz.com>	2025-04-15 00:25:29 -0700
committer	Pinapelz <yukais@pinapelz.com>	2025-04-15 00:25:29 -0700
commit	5bf27feebd8087932de138bda1a4605acc95bef4 (patch)
tree	677dc1cafffaf7b9416e2307d2ba07a442662062 /sega/chuni_jp.py
parent	91f4a6ba665ff92a759758bec5ae13528da6a3c1 (diff)