aboutsummaryrefslogtreecommitdiffstats
path: root/bemani
diff options
context:
space:
mode:
authorPinapelz <yukais@pinapelz.com>2026-03-12 13:56:30 -0700
committerPinapelz <yukais@pinapelz.com>2026-03-12 13:56:50 -0700
commitcaa3cf245186ab0f6fb33e63a7dd838d834da12e (patch)
treebc5742a134ecabf0b9d35cc12b1d6f67defd5da7 /bemani
parent5658441ab9b703c95a48e654d41e45cc3a55ffd3 (diff)
refactor: move to common NewsSource interface
cleanup imports by defining initaliazers modules and decorator remove legacy scrapers remove single factory for sega games (sites don't change that much)
Diffstat (limited to 'bemani')
-rw-r--r--bemani/__init__.py7
-rw-r--r--bemani/ddr.py63
-rw-r--r--bemani/iidx.py68
3 files changed, 7 insertions, 131 deletions
diff --git a/bemani/__init__.py b/bemani/__init__.py
new file mode 100644
index 0000000..f16ed0a
--- /dev/null
+++ b/bemani/__init__.py
@@ -0,0 +1,7 @@
+from bemani.sdvx import parse_exceed_gear_news_site
+from bemani.polaris_chord import parse_polaris_chord_news_site
+
+__all__ = [
+ "parse_exceed_gear_news_site",
+ "parse_polaris_chord_news_site",
+]
diff --git a/bemani/ddr.py b/bemani/ddr.py
deleted file mode 100644
index b5ae93c..0000000
--- a/bemani/ddr.py
+++ /dev/null
@@ -1,63 +0,0 @@
-"""
-Currently unused as e-eamusement app feed is favored. Here for archival purposes
-"""
-from bs4 import BeautifulSoup
-from datetime import datetime
-from urllib.parse import urljoin
-import time
-
-def parse_ddr_world_news_site(html: str):
- base_url = "https://p.eagate.573.jp"
- soup = BeautifulSoup(html, 'html.parser')
- news_entries = []
-
- for div in soup.select("div#info > div.news_one"):
- style = div.get('style', '')
- if 'none' in style:
- continue
-
- title_tag = div.select_one("div.news_title > div.title")
- date_tag = div.select_one("div.news_title > div.date")
- headline = title_tag.get_text(strip=True) if title_tag else None
- date_str = date_tag.get_text(strip=True) if date_tag else None
-
- try:
- dt = datetime.strptime(date_str, "%Y/%m/%d")
- date_iso = dt.strftime("%Y-%m-%d")
- timestamp = int(time.mktime(dt.timetuple()))
- except Exception:
- date_iso, timestamp = None, None
- paras = [p.get_text(strip=True, separator="\n\n")
- for p in div.find_all("p", recursive=False)]
- if not paras:
- for child in div.find_all(recursive=False):
- cls = child.get("class", [])
- if "news_title" in cls or "img_news_center" in cls:
- continue
- if child.name == "div":
- paras.append(child.get_text(strip=True, separator="\n\n"))
-
- content = "\n\n\n".join(paras) if paras else None
- if content:
- content = f"\n{content}\n"
-
- images = []
- for img in div.select("div.img_news_center img"):
- raw_src = img.get("data-src") or img.get("src")
- if raw_src:
- full_url = urljoin(base_url, raw_src)
- images.append({"image": full_url, "link": None})
-
- news_entries.append({
- "date": date_iso,
- "identifier": "DDR",
- "type": None,
- "timestamp": timestamp,
- "headline": headline,
- "content": content,
- "url": None,
- "images": images,
- 'is_ai_summary': False
- })
-
- return news_entries
diff --git a/bemani/iidx.py b/bemani/iidx.py
deleted file mode 100644
index de7f34c..0000000
--- a/bemani/iidx.py
+++ /dev/null
@@ -1,68 +0,0 @@
-from bs4 import BeautifulSoup
-from datetime import datetime
-from urllib.parse import urljoin
-import re
-
-KEY_TERMS_TL = [
- ("クプロ", "QPro")
-]
-
-# Legacy code. e-amuse feed provides better data
-def parse_pinky_crush_news_site(html: str):
- base_url = "https://p.eagate.573.jp"
- type_map = {
- "i_01": "NEWSONG",
- "i_02": "RANKING",
- "i_03": "EVENT",
- "i_04": "SHOP",
- "i_05": "OTHER"
- }
- soup = BeautifulSoup(html, "html.parser")
- news_items = []
-
- for li in soup.select("#info-news > li"):
- date_elem = li.select_one(".news-main > li:nth-of-type(1)")
- headline_elem = li.select_one(".news-main > li:nth-of-type(2)")
- content_elem = li.select_one(".news-main > li:nth-of-type(3)")
- type_class = li.get("class", [None])[0]
- if not (date_elem and content_elem):
- continue
- date_str = date_elem.text.strip()
- try:
- dt = datetime.strptime(date_str, "%Y/%m/%d")
- timestamp = int(dt.timestamp())
- except ValueError:
- timestamp = None
-
- headline = headline_elem.a.text.strip() if headline_elem.a else headline_elem.text.strip()
-
- for a in content_elem.select("a[href]"):
- href = urljoin(base_url, a["href"])
- text = a.get_text(strip=True)
- a.replace_with(f"[{text}]({href})")
-
- for br in content_elem.find_all("br"):
- br.replace_with("\n")
-
- content = content_elem.get_text().strip()
-
- content = content.replace(
- " e-amusement ベーシックコース ",
- " e-amusement ベーシックコース "
- )
- content = content.replace("※", "\n※")
- content = re.sub(r"\n[ \t]+", "\n", content)
- content = re.sub(r'\s*/\s*', '/', content)
- news_items.append({
- "date": date_str,
- "identifier": "IIDX",
- "type": type_map[type_class],
- "timestamp": timestamp,
- "headline": headline,
- "content": content,
- "url": None,
- "images": [],
- 'is_ai_summary': False
- })
-
- return news_items
send patches to the email below
yukais@pinapelz.com
include the subject [PATCH repo_name]
pinapelz.com
homepage