diff options
| author | Pinapelz <yukais@pinapelz.com> | 2026-03-12 13:56:30 -0700 |
|---|---|---|
| committer | Pinapelz <yukais@pinapelz.com> | 2026-03-12 13:56:50 -0700 |
| commit | caa3cf245186ab0f6fb33e63a7dd838d834da12e (patch) | |
| tree | bc5742a134ecabf0b9d35cc12b1d6f67defd5da7 /bemani/iidx.py | |
| parent | 5658441ab9b703c95a48e654d41e45cc3a55ffd3 (diff) | |
refactor: move to common NewsSource interface
cleanup imports by defining initaliazers modules and decorator
remove legacy scrapers
remove single factory for sega games (sites don't change that much)
Diffstat (limited to 'bemani/iidx.py')
| -rw-r--r-- | bemani/iidx.py | 68 |
1 files changed, 0 insertions, 68 deletions
diff --git a/bemani/iidx.py b/bemani/iidx.py deleted file mode 100644 index de7f34c..0000000 --- a/bemani/iidx.py +++ /dev/null @@ -1,68 +0,0 @@ -from bs4 import BeautifulSoup -from datetime import datetime -from urllib.parse import urljoin -import re - -KEY_TERMS_TL = [ - ("クプロ", "QPro") -] - -# Legacy code. e-amuse feed provides better data -def parse_pinky_crush_news_site(html: str): - base_url = "https://p.eagate.573.jp" - type_map = { - "i_01": "NEWSONG", - "i_02": "RANKING", - "i_03": "EVENT", - "i_04": "SHOP", - "i_05": "OTHER" - } - soup = BeautifulSoup(html, "html.parser") - news_items = [] - - for li in soup.select("#info-news > li"): - date_elem = li.select_one(".news-main > li:nth-of-type(1)") - headline_elem = li.select_one(".news-main > li:nth-of-type(2)") - content_elem = li.select_one(".news-main > li:nth-of-type(3)") - type_class = li.get("class", [None])[0] - if not (date_elem and content_elem): - continue - date_str = date_elem.text.strip() - try: - dt = datetime.strptime(date_str, "%Y/%m/%d") - timestamp = int(dt.timestamp()) - except ValueError: - timestamp = None - - headline = headline_elem.a.text.strip() if headline_elem.a else headline_elem.text.strip() - - for a in content_elem.select("a[href]"): - href = urljoin(base_url, a["href"]) - text = a.get_text(strip=True) - a.replace_with(f"[{text}]({href})") - - for br in content_elem.find_all("br"): - br.replace_with("\n") - - content = content_elem.get_text().strip() - - content = content.replace( - " e-amusement ベーシックコース ", - " e-amusement ベーシックコース " - ) - content = content.replace("※", "\n※") - content = re.sub(r"\n[ \t]+", "\n", content) - content = re.sub(r'\s*/\s*', '/', content) - news_items.append({ - "date": date_str, - "identifier": "IIDX", - "type": type_map[type_class], - "timestamp": timestamp, - "headline": headline, - "content": content, - "url": None, - "images": [], - 'is_ai_summary': False - }) - - return news_items |
