From 491ec0a18a58384b3d74990161d69051353701db Mon Sep 17 00:00:00 2001 From: Pinapelz Date: Wed, 24 Dec 2025 00:28:45 -0800 Subject: add support for sdvx nabla news site --- bemani/sdvx.py | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ constants.py | 1 + generate.py | 2 +- news_feed.py | 5 ++++ 4 files changed, 83 insertions(+), 1 deletion(-) diff --git a/bemani/sdvx.py b/bemani/sdvx.py index 9d5a33b..5a7d25c 100644 --- a/bemani/sdvx.py +++ b/bemani/sdvx.py @@ -49,3 +49,79 @@ def parse_exceed_gear_news_site(html: str): }) return entries + +def parse_nabla_news_site(html: str): + base_url = "https://p.eagate.573.jp" + soup = BeautifulSoup(html, 'html.parser') + news_list = soup.select('#news-inner ul.news li') + + entries = [] + for li in news_list: + strong_tags = li.select('strong') + if not strong_tags: + continue + + date = strong_tags[0] + date_str = date.text.strip() + try: + dt = datetime.strptime(date_str, "%Y.%m.%d") + timestamp = int(dt.timestamp()) + except ValueError: + timestamp = None + + headline_text = None + if len(strong_tags) > 1: + headline_text = strong_tags[1].text.strip() + + for tag in li.select('font, b, u, span'): + tag.unwrap() + + content_parts = [] + for node in li.contents: + if hasattr(node, 'name'): + if node.name == 'strong': + continue + elif node.name == 'br': + content_parts.append('\n') + elif node.name == 'a' and 'link-text' in node.get('class', []): + content_parts.append(node.text.strip()) + elif node.name not in ['img']: # Skip image tags for content + content_parts.append(node.get_text(strip=True)) + else: + text = str(node).strip() + if text and text not in [date_str, headline_text]: + content_parts.append(text) + + content = '\n'.join(filter(None, content_parts)).strip() + + images = [] + for img in li.select('img'): + src = img.get('data-original') or img.get('src') + if not src or (isinstance(src, str) and src.startswith('data:')): + continue + if isinstance(src, str): + src = urljoin(base_url, src) + parent = img.find_parent('a') + href = None + if parent and hasattr(parent, 'get') and parent.get('href'): + href_val = parent.get('href') + if isinstance(href_val, str): + href = urljoin(base_url, href_val) + + image_entry = {'image': src, 'link': href} + if image_entry not in images: + images.append(image_entry) + + entries.append({ + 'date': date_str, + 'identifier': 'SOUND_VOLTEX', + 'type': None, + 'timestamp': timestamp, + 'headline': headline_text, + 'content': content, + "url": None, + 'images': images, + 'is_ai_summary': False + }) + + return entries diff --git a/constants.py b/constants.py index 6125147..22567a9 100644 --- a/constants.py +++ b/constants.py @@ -3,6 +3,7 @@ from enum import Enum DAYS_LIMIT=14 SOUND_VOLTEX_EXCEED_GEAR_NEWS_SITE ="https://p.eagate.573.jp/game/sdvx/vi/news/index.html" +SOUND_VOLTEX_NABLA_NEWS_SITE="https://p.eagate.573.jp/game/sdvx/vii/news/index.html" IIDX_PINKY_CRUSH_NEWS_SITE="https://p.eagate.573.jp/game/2dx/32/info/index.html" # legacy should not be used, eamuse feed is more verbose DDR_WORLD_NEWS_SITE="https://p.eagate.573.jp/game/ddr/ddrworld/info/index.html" POLARIS_CHORD_NEWS_SITE="https://p.eagate.573.jp/game/polarischord/pc/news/news.html" diff --git a/generate.py b/generate.py index ab7d968..555b2d6 100644 --- a/generate.py +++ b/generate.py @@ -165,7 +165,7 @@ def generate_iidx_news_file(eamuse_feed: bool=False): return news def generate_sdvx_news_file(): - news = generate_news_file("sdvx_news", constants.SOUND_VOLTEX_EXCEED_GEAR_NEWS_SITE) + news = generate_news_file("sdvx_news", constants.SOUND_VOLTEX_NABLA_NEWS_SITE) attempt_broadcast_notifications(news, "New Information for SOUND VOLTEX","sdvx") return news diff --git a/news_feed.py b/news_feed.py index c962e82..805aecf 100644 --- a/news_feed.py +++ b/news_feed.py @@ -66,6 +66,11 @@ def get_news(news_url: str, version=None) -> list: news_posts = sorted(sound_voltex.parse_exceed_gear_news_site(site_data), key=lambda x: x['timestamp'], reverse=True) news_posts = translate.add_translate_text_to_en(news_posts, overrides=[("ボルテ", "SDVX")]) + elif news_url == constants.SOUND_VOLTEX_NABLA_NEWS_SITE: + site_data = download_site_as_html(news_url) + news_posts = sorted(sound_voltex.parse_nabla_news_site(site_data), key=lambda x: x['timestamp'], reverse=True) + news_posts = translate.add_translate_text_to_en(news_posts, overrides=[("ボルテ", "SDVX")]) + elif news_url == constants.IIDX_PINKY_CRUSH_NEWS_SITE: site_data = download_site_as_html(news_url) news_posts = sorted(iidx.parse_pinky_crush_news_site(site_data), key=lambda x: x['timestamp'], reverse=True) -- cgit v1.2.3