diff options
| author | Pinapelz <yukais@pinapelz.com> | 2025-04-16 23:55:00 -0700 |
|---|---|---|
| committer | Pinapelz <yukais@pinapelz.com> | 2025-04-16 23:55:00 -0700 |
| commit | a5b15484423f9c9c9518a7be49845f018a8ff46f (patch) | |
| tree | 4d9254e50f62d2cfc5c5269950d08b0cb9b7dab4 /bemani | |
| parent | f140219f082e658f65a10d9ebfa070f5d649771d (diff) | |
feat: add support for DDR World
Diffstat (limited to 'bemani')
| -rw-r--r-- | bemani/ddr.py | 56 | ||||
| -rw-r--r-- | bemani/iidx.py | 3 | ||||
| -rw-r--r-- | bemani/sdvx.py | 3 |
3 files changed, 60 insertions, 2 deletions
diff --git a/bemani/ddr.py b/bemani/ddr.py new file mode 100644 index 0000000..947728c --- /dev/null +++ b/bemani/ddr.py @@ -0,0 +1,56 @@ +from bs4 import BeautifulSoup +from datetime import datetime +from urllib.parse import urljoin +import time +import re + +def parse_ddr_world_news_site(html: str): + base_url = "https://p.eagate.573.jp" + soup = BeautifulSoup(html, 'html.parser') + news_entries = [] + + for div in soup.select("div#info > div.news_one"): + if 'none' in div.get('style', ''): + continue + title_tag = div.select_one("div.news_title > div.title") + date_tag = div.select_one("div.news_title > div.date") + headline = title_tag.get_text(strip=True) if title_tag else None + date_str = date_tag.get_text(strip=True) if date_tag else None + + try: + dt = datetime.strptime(date_str, "%Y/%m/%d") + date_iso = dt.strftime("%Y-%m-%d") + timestamp = int(time.mktime(dt.timetuple())) + except Exception: + date_iso, timestamp = None, None + + paras = [p.get_text(strip=True, separator="\n") + for p in div.find_all("p", recursive=False)] + if not paras: + for child in div.find_all(recursive=False): + cls = child.get("class", []) + if "news_title" in cls or "img_news_center" in cls: + continue + if child.name == "div": + paras.append(child.get_text(strip=True, separator="\n")) + content = "\n\n".join(paras) if paras else None + + # image (use data-src if present) + img = div.select_one("div.img_news_center img") + raw_src = img.get("data-src") or img.get("src") if img else None + image_url = urljoin(base_url, raw_src) if raw_src else None + + news_entries.append({ + "date": date_iso, + "identifier": "DDR_WORLD", + "type": None, + "timestamp": timestamp, + "headline": headline, + "content": content, + "url": base_url, + "images": { + "image": image_url, + "link": None + } + }) + return news_entries diff --git a/bemani/iidx.py b/bemani/iidx.py index c13e05f..cc69fe1 100644 --- a/bemani/iidx.py +++ b/bemani/iidx.py @@ -7,7 +7,8 @@ KEY_TERMS_TL = [ ("クプロ", "QPro") ] -def parse_pinky_crush_news_site(html: str, base_url): +def parse_pinky_crush_news_site(html: str): + base_url = "https://p.eagate.573.jp" type_map = { "i_01": "NEWSONG", "i_02": "RANKING", diff --git a/bemani/sdvx.py b/bemani/sdvx.py index a87fe44..c77f198 100644 --- a/bemani/sdvx.py +++ b/bemani/sdvx.py @@ -2,7 +2,8 @@ from bs4 import BeautifulSoup from datetime import datetime from urllib.parse import urljoin -def parse_exceed_gear_news_site(html: str, base_url: str): +def parse_exceed_gear_news_site(html: str): + base_url = "https://p.eagate.573.jp" soup = BeautifulSoup(html, 'html.parser') news_list = soup.select('.tab ul.news li') |
