refactor: move to common NewsSource interface

cleanup imports by defining initaliazers modules and decorator remove legacy scrapers remove single factory for sega games (sites don't change that much)
author: Pinapelz <yukais@pinapelz.com> 2026-03-12 13:56:30 -0700
committer: Pinapelz <yukais@pinapelz.com> 2026-03-12 13:56:50 -0700
commit: caa3cf245186ab0f6fb33e63a7dd838d834da12e (patch)
tree: bc5742a134ecabf0b9d35cc12b1d6f67defd5da7 /bemani
parent: 5658441ab9b703c95a48e654d41e45cc3a55ffd3 (diff)
3 files changed, 7 insertions, 131 deletions
diff --git a/bemani/__init__.py b/bemani/__init__.py
new file mode 100644
index 0000000..f16ed0a
--- /dev/null
+++ b/bemani/__init__.py
@@ -0,0 +1,7 @@
+from bemani.sdvx import parse_exceed_gear_news_site
+from bemani.polaris_chord import parse_polaris_chord_news_site
+
+__all__ = [
+    "parse_exceed_gear_news_site",
+    "parse_polaris_chord_news_site",
+]
diff --git a/bemani/ddr.py b/bemani/ddr.py
deleted file mode 100644
index b5ae93c..0000000
--- a/bemani/ddr.py
+++ /dev/null
@@ -1,63 +0,0 @@
-"""
-Currently unused as e-eamusement app feed is favored. Here for archival purposes
-"""
-from bs4 import BeautifulSoup
-from datetime import datetime
-from urllib.parse import urljoin
-import time
-
-def parse_ddr_world_news_site(html: str):
-    base_url = "https://p.eagate.573.jp"
-    soup = BeautifulSoup(html, 'html.parser')
-    news_entries = []
-
-    for div in soup.select("div#info > div.news_one"):
-        style = div.get('style', '')
-        if 'none' in style:
-            continue
-
-        title_tag = div.select_one("div.news_title > div.title")
-        date_tag  = div.select_one("div.news_title > div.date")
-        headline  = title_tag.get_text(strip=True) if title_tag else None
-        date_str  = date_tag.get_text(strip=True)  if date_tag  else None
-
-        try:
-            dt = datetime.strptime(date_str, "%Y/%m/%d")
-            date_iso  = dt.strftime("%Y-%m-%d")
-            timestamp = int(time.mktime(dt.timetuple()))
-        except Exception:
-            date_iso, timestamp = None, None
-        paras = [p.get_text(strip=True, separator="\n\n")
-                 for p in div.find_all("p", recursive=False)]
-        if not paras:
-            for child in div.find_all(recursive=False):
-                cls = child.get("class", [])
-                if "news_title" in cls or "img_news_center" in cls:
-                    continue
-                if child.name == "div":
-                    paras.append(child.get_text(strip=True, separator="\n\n"))
-
-        content = "\n\n\n".join(paras) if paras else None
-        if content:
-            content = f"\n{content}\n"
-
-        images = []
-        for img in div.select("div.img_news_center img"):
-            raw_src = img.get("data-src") or img.get("src")
-            if raw_src:
-                full_url = urljoin(base_url, raw_src)
-                images.append({"image": full_url, "link": None})
-
-        news_entries.append({
-            "date":       date_iso,
-            "identifier": "DDR",
-            "type":       None,
-            "timestamp":  timestamp,
-            "headline":   headline,
-            "content":    content,
-            "url":        None,
-            "images":     images,
-            'is_ai_summary': False
-        })
-
-    return news_entries
diff --git a/bemani/iidx.py b/bemani/iidx.py
deleted file mode 100644
index de7f34c..0000000
--- a/bemani/iidx.py
+++ /dev/null
@@ -1,68 +0,0 @@
-from bs4 import BeautifulSoup
-from datetime import datetime
-from urllib.parse import urljoin
-import re
-
-KEY_TERMS_TL = [
-    ("クプロ", "QPro")
-]
-
-# Legacy code. e-amuse feed provides better data
-def parse_pinky_crush_news_site(html: str):
-    base_url = "https://p.eagate.573.jp"
-    type_map = {
-        "i_01": "NEWSONG",
-        "i_02": "RANKING",
-        "i_03": "EVENT",
-        "i_04": "SHOP",
-        "i_05": "OTHER"
-    }
-    soup = BeautifulSoup(html, "html.parser")
-    news_items = []
-
-    for li in soup.select("#info-news > li"):
-        date_elem = li.select_one(".news-main > li:nth-of-type(1)")
-        headline_elem = li.select_one(".news-main > li:nth-of-type(2)")
-        content_elem = li.select_one(".news-main > li:nth-of-type(3)")
-        type_class = li.get("class", [None])[0]
-        if not (date_elem and content_elem):
-            continue
-        date_str = date_elem.text.strip()
-        try:
-            dt = datetime.strptime(date_str, "%Y/%m/%d")
-            timestamp = int(dt.timestamp())
-        except ValueError:
-            timestamp = None
-
-        headline = headline_elem.a.text.strip() if headline_elem.a else headline_elem.text.strip()
-
-        for a in content_elem.select("a[href]"):
-            href = urljoin(base_url, a["href"])
-            text = a.get_text(strip=True)
-            a.replace_with(f"[{text}]({href})")
-
-        for br in content_elem.find_all("br"):
-            br.replace_with("\n")
-
-        content = content_elem.get_text().strip()
-
-        content = content.replace(
-            "                              e-amusement ベーシックコース                          ",
-            " e-amusement ベーシックコース "
-        )
-        content = content.replace("※", "\n※")
-        content = re.sub(r"\n[ \t]+", "\n", content)
-        content = re.sub(r'\s*/\s*', '/', content)
-        news_items.append({
-            "date": date_str,
-            "identifier": "IIDX",
-            "type": type_map[type_class],
-            "timestamp": timestamp,
-            "headline": headline,
-            "content": content,
-            "url": None,
-            "images": [],
-            'is_ai_summary': False
-        })
-
-    return news_items
author	Pinapelz <yukais@pinapelz.com>	2026-03-12 13:56:30 -0700
committer	Pinapelz <yukais@pinapelz.com>	2026-03-12 13:56:50 -0700
commit	caa3cf245186ab0f6fb33e63a7dd838d834da12e (patch)
tree	bc5742a134ecabf0b9d35cc12b1d6f67defd5da7 /bemani
parent	5658441ab9b703c95a48e654d41e45cc3a55ffd3 (diff)