aboutsummaryrefslogtreecommitdiffstats
path: root/sega/maimaidx_jp.py
diff options
context:
space:
mode:
authorPinapelz <yukais@pinapelz.com>2026-03-12 13:56:30 -0700
committerPinapelz <yukais@pinapelz.com>2026-03-12 13:56:50 -0700
commitcaa3cf245186ab0f6fb33e63a7dd838d834da12e (patch)
treebc5742a134ecabf0b9d35cc12b1d6f67defd5da7 /sega/maimaidx_jp.py
parent5658441ab9b703c95a48e654d41e45cc3a55ffd3 (diff)
refactor: move to common NewsSource interface
cleanup imports by defining initaliazers modules and decorator remove legacy scrapers remove single factory for sega games (sites don't change that much)
Diffstat (limited to 'sega/maimaidx_jp.py')
-rw-r--r--sega/maimaidx_jp.py89
1 files changed, 41 insertions, 48 deletions
diff --git a/sega/maimaidx_jp.py b/sega/maimaidx_jp.py
index 1314325..2b61c9a 100644
--- a/sega/maimaidx_jp.py
+++ b/sega/maimaidx_jp.py
@@ -1,60 +1,53 @@
from bs4 import BeautifulSoup
from datetime import datetime, timezone, timedelta
from urllib.parse import urljoin
-from enum import Enum
-class ParserVersion(Enum):
- ALPHA=1
-def make_maimaidx_jpn_parser(identifier: str, parser: ParserVersion):
- def alpha_parser(html: str):
- """
- Confirmed on:
- PRISM PLUS
- """
- soup = BeautifulSoup(html, "html.parser")
- base_url = "https://info-maimai.sega.jp/"
- news_items = []
+def parse_maimaidx_jp_news_site(html: str):
+ """
+ Confirmed on:
+ PRISM PLUS
+ """
+ identifier = "MAIMAIDX_JP"
+ soup = BeautifulSoup(html, "html.parser")
+ base_url = "https://info-maimai.sega.jp/"
+ news_items = []
- news_boxes = soup.select(".maiPager-content .newsBox")
- for box in news_boxes:
- a_tag = box.select_one("a")
- url = urljoin(base_url, a_tag["href"]) if a_tag and a_tag.get("href") else None
+ news_boxes = soup.select(".maiPager-content .newsBox")
+ for box in news_boxes:
+ a_tag = box.select_one("a")
+ url = urljoin(base_url, a_tag["href"]) if a_tag and a_tag.get("href") else None
- img_tag = box.select_one("img")
- image_url = urljoin(base_url, img_tag["src"]) if img_tag else None
+ img_tag = box.select_one("img")
+ image_url = urljoin(base_url, img_tag["src"]) if img_tag else None
- date_tag = box.select_one(".newsDate")
- raw_date = date_tag.get_text(strip=True) if date_tag else None
+ date_tag = box.select_one(".newsDate")
+ raw_date = date_tag.get_text(strip=True) if date_tag else None
- jst = timezone(timedelta(hours=9))
- try:
- dt = datetime.strptime(raw_date.split(" ")[0], "%Y.%m.%d").replace(tzinfo=jst)
- timestamp = int(dt.timestamp())
- except:
- dt = None
- timestamp = 0
+ jst = timezone(timedelta(hours=9))
+ try:
+ dt = datetime.strptime(raw_date.split(" ")[0], "%Y.%m.%d").replace(tzinfo=jst)
+ timestamp = int(dt.timestamp())
+ except Exception:
+ dt = None
+ timestamp = 0
- content_tag = box.select_one(".newsLink")
- content = content_tag.get_text(strip=True) if content_tag else None
+ content_tag = box.select_one(".newsLink")
+ content = content_tag.get_text(strip=True) if content_tag else None
- news_items.append({
- "date": raw_date,
- "identifier": identifier,
- "type": None,
- "timestamp": timestamp,
- "headline": None,
- "content": content,
- "url": url,
- 'is_ai_summary': False,
- "images": [{
- "image": image_url,
- "link": url
- }] if image_url else []
- })
+ news_items.append({
+ "date": raw_date,
+ "identifier": identifier,
+ "type": None,
+ "timestamp": timestamp,
+ "headline": None,
+ "content": content,
+ "url": url,
+ "is_ai_summary": False,
+ "images": [{
+ "image": image_url,
+ "link": url
+ }] if image_url else []
+ })
- return news_items
- if parser == ParserVersion.ALPHA:
- return alpha_parser
-
-parse_maimaidx_jp_news_site = make_maimaidx_jpn_parser("MAIMAIDX_JP", ParserVersion.ALPHA)
+ return news_items \ No newline at end of file
send patches to the email below
yukais@pinapelz.com
include the subject [PATCH repo_name]
pinapelz.com
homepage