From caa3cf245186ab0f6fb33e63a7dd838d834da12e Mon Sep 17 00:00:00 2001 From: Pinapelz Date: Thu, 12 Mar 2026 13:56:30 -0700 Subject: refactor: move to common NewsSource interface cleanup imports by defining initaliazers modules and decorator remove legacy scrapers remove single factory for sega games (sites don't change that much) --- bemani/ddr.py | 63 ----------------------------------------------------------- 1 file changed, 63 deletions(-) delete mode 100644 bemani/ddr.py (limited to 'bemani/ddr.py') diff --git a/bemani/ddr.py b/bemani/ddr.py deleted file mode 100644 index b5ae93c..0000000 --- a/bemani/ddr.py +++ /dev/null @@ -1,63 +0,0 @@ -""" -Currently unused as e-eamusement app feed is favored. Here for archival purposes -""" -from bs4 import BeautifulSoup -from datetime import datetime -from urllib.parse import urljoin -import time - -def parse_ddr_world_news_site(html: str): - base_url = "https://p.eagate.573.jp" - soup = BeautifulSoup(html, 'html.parser') - news_entries = [] - - for div in soup.select("div#info > div.news_one"): - style = div.get('style', '') - if 'none' in style: - continue - - title_tag = div.select_one("div.news_title > div.title") - date_tag = div.select_one("div.news_title > div.date") - headline = title_tag.get_text(strip=True) if title_tag else None - date_str = date_tag.get_text(strip=True) if date_tag else None - - try: - dt = datetime.strptime(date_str, "%Y/%m/%d") - date_iso = dt.strftime("%Y-%m-%d") - timestamp = int(time.mktime(dt.timetuple())) - except Exception: - date_iso, timestamp = None, None - paras = [p.get_text(strip=True, separator="\n\n") - for p in div.find_all("p", recursive=False)] - if not paras: - for child in div.find_all(recursive=False): - cls = child.get("class", []) - if "news_title" in cls or "img_news_center" in cls: - continue - if child.name == "div": - paras.append(child.get_text(strip=True, separator="\n\n")) - - content = "\n\n\n".join(paras) if paras else None - if content: - content = f"\n{content}\n" - - images = [] - for img in div.select("div.img_news_center img"): - raw_src = img.get("data-src") or img.get("src") - if raw_src: - full_url = urljoin(base_url, raw_src) - images.append({"image": full_url, "link": None}) - - news_entries.append({ - "date": date_iso, - "identifier": "DDR", - "type": None, - "timestamp": timestamp, - "headline": headline, - "content": content, - "url": None, - "images": images, - 'is_ai_summary': False - }) - - return news_entries -- cgit v1.2.3