diff options
| author | Pinapelz <yukais@pinapelz.com> | 2026-03-12 13:56:30 -0700 |
|---|---|---|
| committer | Pinapelz <yukais@pinapelz.com> | 2026-03-12 13:56:50 -0700 |
| commit | caa3cf245186ab0f6fb33e63a7dd838d834da12e (patch) | |
| tree | bc5742a134ecabf0b9d35cc12b1d6f67defd5da7 /bemani/ddr.py | |
| parent | 5658441ab9b703c95a48e654d41e45cc3a55ffd3 (diff) | |
refactor: move to common NewsSource interface
cleanup imports by defining initaliazers modules and decorator
remove legacy scrapers
remove single factory for sega games (sites don't change that much)
Diffstat (limited to 'bemani/ddr.py')
| -rw-r--r-- | bemani/ddr.py | 63 |
1 files changed, 0 insertions, 63 deletions
diff --git a/bemani/ddr.py b/bemani/ddr.py deleted file mode 100644 index b5ae93c..0000000 --- a/bemani/ddr.py +++ /dev/null @@ -1,63 +0,0 @@ -""" -Currently unused as e-eamusement app feed is favored. Here for archival purposes -""" -from bs4 import BeautifulSoup -from datetime import datetime -from urllib.parse import urljoin -import time - -def parse_ddr_world_news_site(html: str): - base_url = "https://p.eagate.573.jp" - soup = BeautifulSoup(html, 'html.parser') - news_entries = [] - - for div in soup.select("div#info > div.news_one"): - style = div.get('style', '') - if 'none' in style: - continue - - title_tag = div.select_one("div.news_title > div.title") - date_tag = div.select_one("div.news_title > div.date") - headline = title_tag.get_text(strip=True) if title_tag else None - date_str = date_tag.get_text(strip=True) if date_tag else None - - try: - dt = datetime.strptime(date_str, "%Y/%m/%d") - date_iso = dt.strftime("%Y-%m-%d") - timestamp = int(time.mktime(dt.timetuple())) - except Exception: - date_iso, timestamp = None, None - paras = [p.get_text(strip=True, separator="\n\n") - for p in div.find_all("p", recursive=False)] - if not paras: - for child in div.find_all(recursive=False): - cls = child.get("class", []) - if "news_title" in cls or "img_news_center" in cls: - continue - if child.name == "div": - paras.append(child.get_text(strip=True, separator="\n\n")) - - content = "\n\n\n".join(paras) if paras else None - if content: - content = f"\n{content}\n" - - images = [] - for img in div.select("div.img_news_center img"): - raw_src = img.get("data-src") or img.get("src") - if raw_src: - full_url = urljoin(base_url, raw_src) - images.append({"image": full_url, "link": None}) - - news_entries.append({ - "date": date_iso, - "identifier": "DDR", - "type": None, - "timestamp": timestamp, - "headline": headline, - "content": content, - "url": None, - "images": images, - 'is_ai_summary': False - }) - - return news_entries |
