aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPinapelz <yukais@pinapelz.com>2025-04-15 00:25:29 -0700
committerPinapelz <yukais@pinapelz.com>2025-04-15 00:25:29 -0700
commit5bf27feebd8087932de138bda1a4605acc95bef4 (patch)
tree677dc1cafffaf7b9416e2307d2ba07a442662062
parent91f4a6ba665ff92a759758bec5ae13528da6a3c1 (diff)
refactor sega games to follow function factory design
unlikely to be much change between each game
-rw-r--r--sega/chuni_jp.py111
-rw-r--r--sega/maimaidx_intl.py69
-rw-r--r--sega/maimaidx_jp.py85
-rw-r--r--sega/ongeki_jp.py84
4 files changed, 199 insertions, 150 deletions
diff --git a/sega/chuni_jp.py b/sega/chuni_jp.py
index bdbe800..981fb8f 100644
--- a/sega/chuni_jp.py
+++ b/sega/chuni_jp.py
@@ -2,54 +2,69 @@ from bs4 import BeautifulSoup
from datetime import datetime, timezone, timedelta
from urllib.parse import urljoin
import re
+from enum import Enum
-def parse_chuni_jp_verse_news_site(html: str):
- soup = BeautifulSoup(html, "html.parser")
- news_entries = []
- news_wrapper = soup.find("div", class_="newsMainWrapper-left")
- if not news_wrapper:
- return news_entries
- for a_tag in news_wrapper.find_all("a", href=True):
- if not a_tag.find("div", class_="chuniCommonBox-inner"):
- continue
- news_dict = {}
- news_url = a_tag.get("href")
- news_dict["url"] = news_url
- date_container = a_tag.find("div", class_="chuniCommonBox-inner-title")
- date_str = None
- if date_container:
- title_span = date_container.find("span", class_="title")
- if title_span:
- text = title_span.get_text(strip=True)
- date_match = re.search(r"(\d{4}\.\d{2}\.\d{2})", text)
- if date_match:
- date_str = date_match.group(1)
- news_dict["date"] = date_str
- news_dict["type"] = None
- timestamp = None
- if date_str:
- try:
- dt = datetime.strptime(date_str, "%Y.%m.%d")
- dt = dt.replace(tzinfo=timezone(timedelta(hours=9)))
- timestamp = int(dt.timestamp())
- except Exception:
- timestamp = None
- news_dict["timestamp"] = timestamp
- main_content = a_tag.find("div", class_="chuniCommonBox-inner-main")
- headline = None
- content_text = ""
- if main_content:
- content_text = main_content.get_text(separator=" ", strip=True)
- news_dict["content"] = content_text
- images = {"image": None, "link": None}
- if main_content:
- img_tag = main_content.find("img")
- if img_tag:
- images["image"] = img_tag.get("src")
- images["link"] = news_url
- news_dict["images"] = [images]
- news_dict["identifier"] = "CHUNITHM_JP_VERSE"
+class ParserVersion(Enum):
+ ALPHA=1
+
+def make_chuni_jp_parser(identifier: str, parser: ParserVersion):
+ def alpha_parser(html: str):
+ """
+ Confirmed on:
+ VERSE
+ """
+ soup = BeautifulSoup(html, "html.parser")
+ news_entries = []
+ news_wrapper = soup.find("div", class_="newsMainWrapper-left")
+ if not news_wrapper:
+ return news_entries
+ for a_tag in news_wrapper.find_all("a", href=True):
+ if not a_tag.find("div", class_="chuniCommonBox-inner"):
+ continue
+ news_dict = {}
+ news_url = a_tag.get("href")
+ news_dict["url"] = news_url
+
+ date_container = a_tag.find("div", class_="chuniCommonBox-inner-title")
+ date_str = None
+ if date_container:
+ title_span = date_container.find("span", class_="title")
+ if title_span:
+ text = title_span.get_text(strip=True)
+ date_match = re.search(r"(\d{4}\.\d{2}\.\d{2})", text)
+ if date_match:
+ date_str = date_match.group(1)
+ news_dict["date"] = date_str
+ news_dict["type"] = None
+ timestamp = None
+ if date_str:
+ try:
+ dt = datetime.strptime(date_str, "%Y.%m.%d")
+ dt = dt.replace(tzinfo=timezone(timedelta(hours=9)))
+ timestamp = int(dt.timestamp())
+ except Exception:
+ timestamp = None
+ news_dict["timestamp"] = timestamp
+
+ main_content = a_tag.find("div", class_="chuniCommonBox-inner-main")
+ content_text = ""
+ if main_content:
+ content_text = main_content.get_text(separator=" ", strip=True)
+ news_dict["content"] = content_text
- news_entries.append(news_dict)
+ images = {"image": None, "link": None}
+ if main_content:
+ img_tag = main_content.find("img")
+ if img_tag:
+ images["image"] = img_tag.get("src")
+ images["link"] = news_url
+ news_dict["images"] = [images]
+ news_dict["identifier"] = identifier
+
+ news_entries.append(news_dict)
+
+ return news_entries
+ if parser == ParserVersion.ALPHA:
+ return alpha_parser
- return news_entries
+parse_chuni_jp_verse_news_site = make_chuni_jp_parser("CHUNITHM_JP_VERSE", ParserVersion.ALPHA)
diff --git a/sega/maimaidx_intl.py b/sega/maimaidx_intl.py
index 57e7cfb..1671d9f 100644
--- a/sega/maimaidx_intl.py
+++ b/sega/maimaidx_intl.py
@@ -1,36 +1,47 @@
from bs4 import BeautifulSoup
from datetime import datetime, timezone, timedelta
-import time
+from enum import Enum
-def parse_maimaidx_intl_prism_news_site(html: str):
- soup = BeautifulSoup(html, "html.parser")
- items = soup.select(".dl--pop__item")
+class ParserVersion(Enum):
+ ALPHA=1
- entries = []
- for item in items:
- date_text = item.select_one(".dl--pop__head").text.strip().replace(" UP", "")
- dt = datetime.strptime(date_text, "%Y.%m.%d").replace(tzinfo=timezone(timedelta(hours=9)))
- timestamp = int(dt.timestamp())
+def make_maimaidx_intl_parser(identifier: str, parser: ParserVersion):
+ def alpha_parser(html: str):
+ """
+ Confirmed on:
+ PRISM
+ """
+ soup = BeautifulSoup(html, "html.parser")
+ items = soup.select(".dl--pop__item")
- img_tag = item.select_one("a.dl--pop__thumb img")
- image_url = img_tag["srcset"] if img_tag else None
- full_image_url = image_url.replace("../", "https://maimai.sega.com/") if image_url else None
+ entries = []
+ for item in items:
+ date_text = item.select_one(".dl--pop__head").text.strip().replace(" UP", "")
+ dt = datetime.strptime(date_text, "%Y.%m.%d").replace(tzinfo=timezone(timedelta(hours=9)))
+ timestamp = int(dt.timestamp())
- entry = {
- "date": date_text,
- "identifier": "MAIMAIDX_INTL_PRISM",
- "type": None,
- "timestamp": timestamp,
- "headline": None,
- "content": f"New maimai DX International News / maimai DX International の新しいお知らせ\n\n{full_image_url}",
- "url": None,
- "images": [
- {
- "image": full_image_url,
- "link": None
- }
- ]
- }
+ img_tag = item.select_one("a.dl--pop__thumb img")
+ image_url = img_tag["srcset"] if img_tag else None
+ full_image_url = image_url.replace("../", "https://maimai.sega.com/") if image_url else None
- entries.append(entry)
- return entries
+ entry = {
+ "date": date_text,
+ "identifier": identifier,
+ "type": None,
+ "timestamp": timestamp,
+ "headline": None,
+ "content": f"New maimai DX International News / maimai DX International の新しいお知らせ\n\n{full_image_url}",
+ "url": None,
+ "images": [
+ {
+ "image": full_image_url,
+ "link": None
+ }
+ ]
+ }
+ entries.append(entry)
+ return entries
+ if parser == ParserVersion.ALPHA:
+ return alpha_parser
+
+parse_maimaidx_intl_prism_news_site = make_maimaidx_intl_parser("MAIMAIDX_INTL_PRISM", ParserVersion.ALPHA)
diff --git a/sega/maimaidx_jp.py b/sega/maimaidx_jp.py
index 90530f0..720a618 100644
--- a/sega/maimaidx_jp.py
+++ b/sega/maimaidx_jp.py
@@ -1,46 +1,59 @@
from bs4 import BeautifulSoup
from datetime import datetime, timezone, timedelta
from urllib.parse import urljoin
-import re
+from enum import Enum
-def parse_maimaidx_jp_prism_plus_news_site(html: str):
- soup = BeautifulSoup(html, "html.parser")
- base_url = "https://info-maimai.sega.jp/"
- news_items = []
+class ParserVersion(Enum):
+ ALPHA=1
- news_boxes = soup.select(".maiPager-content .newsBox")
- for box in news_boxes:
- a_tag = box.select_one("a")
- url = urljoin(base_url, a_tag["href"]) if a_tag and a_tag.get("href") else None
+def make_maimaidx_jpn_parser(identifier: str, parser: ParserVersion):
+ def alpha_parser(html: str):
+ """
+ Confirmed on:
+ PRISM PLUS
+ """
+ soup = BeautifulSoup(html, "html.parser")
+ base_url = "https://info-maimai.sega.jp/"
+ news_items = []
- img_tag = box.select_one("img")
- image_url = urljoin(base_url, img_tag["src"]) if img_tag else None
+ news_boxes = soup.select(".maiPager-content .newsBox")
+ for box in news_boxes:
+ a_tag = box.select_one("a")
+ url = urljoin(base_url, a_tag["href"]) if a_tag and a_tag.get("href") else None
- date_tag = box.select_one(".newsDate")
- raw_date = date_tag.get_text(strip=True) if date_tag else None
+ img_tag = box.select_one("img")
+ image_url = urljoin(base_url, img_tag["src"]) if img_tag else None
- jst = timezone(timedelta(hours=9))
- try:
- dt = datetime.strptime(raw_date.split(" ")[0], "%Y.%m.%d").replace(tzinfo=jst)
- timestamp = int(dt.timestamp())
- except:
- dt = None
- timestamp = 0
+ date_tag = box.select_one(".newsDate")
+ raw_date = date_tag.get_text(strip=True) if date_tag else None
- content_tag = box.select_one(".newsLink")
- content = content_tag.get_text(strip=True) if content_tag else None
- news_items.append({
- "date": raw_date,
- "identifier": "MAIMAIDX_JPN_PRISM_PLUS",
- "type": None,
- "timestamp": timestamp,
- "headline": None,
- "content": content,
- "url": url,
- "images": [{
- "image": image_url,
- "link": url
- }] if image_url else []
- })
+ jst = timezone(timedelta(hours=9))
+ try:
+ dt = datetime.strptime(raw_date.split(" ")[0], "%Y.%m.%d").replace(tzinfo=jst)
+ timestamp = int(dt.timestamp())
+ except:
+ dt = None
+ timestamp = 0
- return news_items
+ content_tag = box.select_one(".newsLink")
+ content = content_tag.get_text(strip=True) if content_tag else None
+
+ news_items.append({
+ "date": raw_date,
+ "identifier": identifier,
+ "type": None,
+ "timestamp": timestamp,
+ "headline": None,
+ "content": content,
+ "url": url,
+ "images": [{
+ "image": image_url,
+ "link": url
+ }] if image_url else []
+ })
+
+ return news_items
+ if parser == ParserVersion.ALPHA:
+ return alpha_parser
+
+parse_maimaidx_jp_prism_plus_news_site = make_maimaidx_jpn_parser("MAIMAIDX_JPN_PRISM_PLUS", ParserVersion.ALPHA)
diff --git a/sega/ongeki_jp.py b/sega/ongeki_jp.py
index 587f358..a2a05fb 100644
--- a/sega/ongeki_jp.py
+++ b/sega/ongeki_jp.py
@@ -1,48 +1,58 @@
from bs4 import BeautifulSoup
from datetime import datetime
import time
+from enum import Enum
-def parse_ongeki_refresh_news_site(html: str):
- soup = BeautifulSoup(html, "html.parser")
- items = []
+class ParserVersion(Enum):
+ ALPHA=1
- for li in soup.select("li.p-news__listChild"):
- a_tag = li.select_one("a.p-news__listLink")
- url = a_tag["href"] if a_tag else None
+def make_ongeki_parser(identifier: str, parser: ParserVersion):
+ def alpha_parser(html: str):
+ soup = BeautifulSoup(html, "html.parser")
+ items = []
- img_tag = li.select_one(".p-news__listThumb img")
- image_url = img_tag["src"] if img_tag else None
- image_alt = img_tag["alt"] if img_tag else ""
- image_link = url if image_url else None
+ for li in soup.select("li.p-news__listChild"):
+ a_tag = li.select_one("a.p-news__listLink")
+ url = a_tag["href"] if a_tag else None
- date_type_text = li.select_one(".p-news__listTextUpper")
- date_text = date_type_text.text.strip().split("/")[0].strip() if date_type_text else None
- type_text = date_type_text.text.strip().split("/")[-1].strip() if "/" in date_type_text.text else None
+ img_tag = li.select_one(".p-news__listThumb img")
+ image_url = img_tag["src"] if img_tag else None
+ image_alt = img_tag["alt"] if img_tag else ""
+ image_link = url if image_url else None
- headline_tag = li.select_one(".p-news__listTextUnder")
- headline = headline_tag.text.strip() if headline_tag else None
+ date_type_text = li.select_one(".p-news__listTextUpper")
+ date_text = date_type_text.text.strip().split("/")[0].strip() if date_type_text else None
+ type_text = date_type_text.text.strip().split("/")[-1].strip() if "/" in date_type_text.text else None
- timestamp = None
- if date_text:
- try:
- dt = datetime.strptime(date_text, "%Y.%m.%d %a")
- timestamp = int(time.mktime(dt.timetuple()))
- except:
- timestamp = None
- entry = {
- "date": date_text,
- "identifier": "ONGEKI_JPN_REFRESH",
- "type": type_text if type_text not in ["GAME", "CARDMAKER"] else None,
- "timestamp": timestamp,
- "headline": None,
- "content": image_alt,
- "url": url,
- "images": [{
- "image": image_url,
- "link": image_link
- }] if image_url else []
- }
+ headline_tag = li.select_one(".p-news__listTextUnder")
+ headline = headline_tag.text.strip() if headline_tag else None
- items.append(entry)
+ timestamp = None
+ if date_text:
+ try:
+ dt = datetime.strptime(date_text, "%Y.%m.%d %a")
+ timestamp = int(time.mktime(dt.timetuple()))
+ except:
+ timestamp = None
- return items
+ entry = {
+ "date": date_text,
+ "identifier": identifier,
+ "type": type_text if type_text not in ["GAME", "CARDMAKER"] else None,
+ "timestamp": timestamp,
+ "headline": None,
+ "content": image_alt,
+ "url": url,
+ "images": [{
+ "image": image_url,
+ "link": image_link
+ }] if image_url else []
+ }
+
+ items.append(entry)
+
+ return items
+ if parser == ParserVersion.ALPHA:
+ return alpha_parser
+
+parse_ongeki_refresh_news_site = make_ongeki_parser("ONGEKI_JPN_REFRESH", ParserVersion.ALPHA)
send patches to the email below
yukais@pinapelz.com
include the subject [PATCH repo_name]
pinapelz.com
homepage