From 5e31d2870f07085e4c837a17572a3e9eedb6df26 Mon Sep 17 00:00:00 2001 From: Pinapelz Date: Sun, 22 Mar 2026 23:53:34 -0700 Subject: Rename SDVX parser to parse_nabla_news_site Replace parse_exceed_gear_news_site with parse_nabla_news_site and update imports and callers in __init__.py and news_feed.py --- bemani/__init__.py | 4 +- bemani/sdvx.py | 48 -------------- community/__init__.py | 4 +- community/wacca_plus.py | 125 +++++++++++++++++++++++++++++++++++++ community/wacca_plus/__init__.py | 0 community/wacca_plus/wacca_plus.py | 125 ------------------------------------- news_feed.py | 6 +- 7 files changed, 132 insertions(+), 180 deletions(-) create mode 100644 community/wacca_plus.py delete mode 100644 community/wacca_plus/__init__.py delete mode 100644 community/wacca_plus/wacca_plus.py diff --git a/bemani/__init__.py b/bemani/__init__.py index f16ed0a..dc4762a 100644 --- a/bemani/__init__.py +++ b/bemani/__init__.py @@ -1,7 +1,7 @@ -from bemani.sdvx import parse_exceed_gear_news_site +from bemani.sdvx import parse_nabla_news_site from bemani.polaris_chord import parse_polaris_chord_news_site __all__ = [ - "parse_exceed_gear_news_site", + "parse_nabla_news_site", "parse_polaris_chord_news_site", ] diff --git a/bemani/sdvx.py b/bemani/sdvx.py index 5a7d25c..5d7f72e 100644 --- a/bemani/sdvx.py +++ b/bemani/sdvx.py @@ -2,54 +2,6 @@ from bs4 import BeautifulSoup from datetime import datetime from urllib.parse import urljoin -def parse_exceed_gear_news_site(html: str): - base_url = "https://p.eagate.573.jp" - soup = BeautifulSoup(html, 'html.parser') - news_list = soup.select('.tab ul.news li') - - entries = [] - for li in news_list: - date = li.select_one('strong') - pre = li.select_one('pre') - - if not date or not pre: - continue - date_str = date.text.strip() - try: - dt = datetime.strptime(date_str, "%Y.%m.%d") - timestamp = int(dt.timestamp()) - except ValueError: - timestamp = None - headline = li.select_one('p.notice') - headline_text = headline.text.strip() if headline else None - for tag in pre.select('font, b, u, span'): - tag.unwrap() - content = pre.get_text(separator='\n', strip=True) - images = [] - for img in pre.select('img'): - src = img.get('data-original') or img.get('src') - if not src or src.startswith('data:'): - continue - src = urljoin(base_url, src) - parent = img.find_parent('a') - href = urljoin(base_url, parent['href']) if parent and parent.has_attr('href') else None - if {'image': src, 'link': href} not in images: - images.append({'image': src, 'link': href}) - - entries.append({ - 'date': date_str, - 'identifier': 'SOUND_VOLTEX', - 'type': None, - 'timestamp': timestamp, - 'headline': headline_text, - 'content': content, - "url": None, - 'images': images, - 'is_ai_summary': False - }) - - return entries - def parse_nabla_news_site(html: str): base_url = "https://p.eagate.573.jp" soup = BeautifulSoup(html, 'html.parser') diff --git a/community/__init__.py b/community/__init__.py index 835b7e6..e05fade 100644 --- a/community/__init__.py +++ b/community/__init__.py @@ -1,7 +1,7 @@ from community.disc import fetch_messages from community.museca_plus import parse_museca_plus_news_site from community.rbdx import get_carousel_posts -from community.wacca_plus.wacca_plus import parse_announcement_messages, check_is_generation_possible +from community.wacca_plus import parse_announcement_messages, check_is_generation_possible __all__ = [ "fetch_messages", @@ -9,4 +9,4 @@ __all__ = [ "get_carousel_posts", "parse_announcement_messages", "check_is_generation_possible", -] \ No newline at end of file +] diff --git a/community/wacca_plus.py b/community/wacca_plus.py new file mode 100644 index 0000000..c15bbf7 --- /dev/null +++ b/community/wacca_plus.py @@ -0,0 +1,125 @@ +from datetime import datetime +from dotenv import load_dotenv +from common import create_database_connection +from catboxpy.catbox import CatboxClient +import os +import time +import openai +import json + +from summarizer import generate_headline_and_content_from_images + +load_dotenv() + +def check_is_generation_possible(): + return os.getenv("OPENAI_API_KEY") is not None and os.getenv("DISCORD_AUTHORIZATION") is not None + + +def check_is_announcement_image(img_url: str): + openai.api_key = os.getenv("OPENAI_API_KEY") + + tools = [ + { + "type": "function", + "function": { + "name": "classify_wacca_plus_image", + "description": "Classify if an image is WACCA PLUS announcement, update, or information", + "parameters": { + "type": "object", + "properties": { + "is_wacca_plus_related": { + "type": "boolean", + "description": "Is this image related to WACCA PLUS?", + }, + "category": { + "type": "string", + "enum": ["announcement", "update", "info", "null"], + "description": "Category of image if related; otherwise null.", + }, + }, + "required": ["is_wacca_plus_related", "category"], + }, + } + } + ] + + response = openai.chat.completions.create( + model="gpt-4o", + messages=[ + { + "role": "user", + "content": [ + {"type": "text", "text": "Does this image contain official update, event, or announcement information for the game WACCA PLUS? Ignore unrelated content like gameplay screenshots, score posts, or arcade cabinet photos. Classify accordingly."}, + {"type": "image_url", "image_url": {"url": img_url}}, + ], + } + ], + tools=tools, + tool_choice={"type": "function", "function": {"name": "classify_wacca_plus_image"}}, + ) + + tool_args = response.choices[0].message.tool_calls[0].function.arguments + parsed_result = json.loads(tool_args) + return parsed_result["is_wacca_plus_related"], parsed_result["category"] + + +def _upload_image_to_catbox(image_url: str): + client = CatboxClient() + file_url = client.upload(image_url) + if not file_url or file_url == "": + return image_url + return file_url + +def parse_announcement_messages(message_json: dict): + news_posts = [] + database = create_database_connection() + for message in message_json: + type = None + message_content = message.get("content", "") + if len(message["attachments"]) == 0: + continue + image_attachments = [] + for attachment in message["attachments"]: + if "image" in attachment["content_type"]: + image_attachments.append(attachment) + + if len(image_attachments) == 0: + continue + + filtered_images = [] + image_urls = [] # save the images before they get encoded + for image in image_attachments: + image_urls.append(image["url"]) + entry = database.get_wac_entry(image["id"]) + if entry: + is_related = entry[0] + type = entry[1] + else: + is_related, type = check_is_announcement_image(image["url"]) + database.add_new_wac_entry(key=image["id"], is_news=is_related, post_type=type) + + if not is_related: + continue + filtered_images.append({"image": _upload_image_to_catbox(image["url"]), "url": None}) + + if len(filtered_images) == 0: + continue + + date = message["timestamp"].split("T")[0] + date_obj = datetime.strptime(date, "%Y-%m-%d") + unix_time = int(time.mktime(date_obj.timetuple())) + headline, content = generate_headline_and_content_from_images(image_urls, "WACCA PLUS", message_content) + + news_posts.append({ + "date": date, + "identifier": "WACCA_PLUS", + "type": type.upper(), + "timestamp": unix_time, + "content": content, + "headline": headline, + "url": None, + "images": filtered_images, + 'is_ai_summary': True + }) + database.close() + return news_posts diff --git a/community/wacca_plus/__init__.py b/community/wacca_plus/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/community/wacca_plus/wacca_plus.py b/community/wacca_plus/wacca_plus.py deleted file mode 100644 index c15bbf7..0000000 --- a/community/wacca_plus/wacca_plus.py +++ /dev/null @@ -1,125 +0,0 @@ -from datetime import datetime -from dotenv import load_dotenv -from common import create_database_connection -from catboxpy.catbox import CatboxClient -import os -import time -import openai -import json - -from summarizer import generate_headline_and_content_from_images - -load_dotenv() - -def check_is_generation_possible(): - return os.getenv("OPENAI_API_KEY") is not None and os.getenv("DISCORD_AUTHORIZATION") is not None - - -def check_is_announcement_image(img_url: str): - openai.api_key = os.getenv("OPENAI_API_KEY") - - tools = [ - { - "type": "function", - "function": { - "name": "classify_wacca_plus_image", - "description": "Classify if an image is WACCA PLUS announcement, update, or information", - "parameters": { - "type": "object", - "properties": { - "is_wacca_plus_related": { - "type": "boolean", - "description": "Is this image related to WACCA PLUS?", - }, - "category": { - "type": "string", - "enum": ["announcement", "update", "info", "null"], - "description": "Category of image if related; otherwise null.", - }, - }, - "required": ["is_wacca_plus_related", "category"], - }, - } - } - ] - - response = openai.chat.completions.create( - model="gpt-4o", - messages=[ - { - "role": "user", - "content": [ - {"type": "text", "text": "Does this image contain official update, event, or announcement information for the game WACCA PLUS? Ignore unrelated content like gameplay screenshots, score posts, or arcade cabinet photos. Classify accordingly."}, - {"type": "image_url", "image_url": {"url": img_url}}, - ], - } - ], - tools=tools, - tool_choice={"type": "function", "function": {"name": "classify_wacca_plus_image"}}, - ) - - tool_args = response.choices[0].message.tool_calls[0].function.arguments - parsed_result = json.loads(tool_args) - return parsed_result["is_wacca_plus_related"], parsed_result["category"] - - -def _upload_image_to_catbox(image_url: str): - client = CatboxClient() - file_url = client.upload(image_url) - if not file_url or file_url == "": - return image_url - return file_url - -def parse_announcement_messages(message_json: dict): - news_posts = [] - database = create_database_connection() - for message in message_json: - type = None - message_content = message.get("content", "") - if len(message["attachments"]) == 0: - continue - image_attachments = [] - for attachment in message["attachments"]: - if "image" in attachment["content_type"]: - image_attachments.append(attachment) - - if len(image_attachments) == 0: - continue - - filtered_images = [] - image_urls = [] # save the images before they get encoded - for image in image_attachments: - image_urls.append(image["url"]) - entry = database.get_wac_entry(image["id"]) - if entry: - is_related = entry[0] - type = entry[1] - else: - is_related, type = check_is_announcement_image(image["url"]) - database.add_new_wac_entry(key=image["id"], is_news=is_related, post_type=type) - - if not is_related: - continue - filtered_images.append({"image": _upload_image_to_catbox(image["url"]), "url": None}) - - if len(filtered_images) == 0: - continue - - date = message["timestamp"].split("T")[0] - date_obj = datetime.strptime(date, "%Y-%m-%d") - unix_time = int(time.mktime(date_obj.timetuple())) - headline, content = generate_headline_and_content_from_images(image_urls, "WACCA PLUS", message_content) - - news_posts.append({ - "date": date, - "identifier": "WACCA_PLUS", - "type": type.upper(), - "timestamp": unix_time, - "content": content, - "headline": headline, - "url": None, - "images": filtered_images, - 'is_ai_summary': True - }) - database.close() - return news_posts diff --git a/news_feed.py b/news_feed.py index 1bcb240..3dd1d61 100644 --- a/news_feed.py +++ b/news_feed.py @@ -51,9 +51,9 @@ def _attach_llm_summaries(news_posts: list, game_name: str): @registry.register(constants.SOUND_VOLTEX_NABLA_NEWS_SITE) class SoundVoltexSource(NewsSource): def fetch(self, version=None) -> list[dict]: - from bemani.sdvx import parse_exceed_gear_news_site + from bemani.sdvx import parse_nabla_news_site site_data = download_site_as_html(constants.SOUND_VOLTEX_NABLA_NEWS_SITE) - news_posts = sorted(parse_exceed_gear_news_site(site_data), key=lambda x: x['timestamp'], reverse=True) + news_posts = sorted(parse_nabla_news_site(site_data), key=lambda x: x['timestamp'], reverse=True) return translate.add_translate_text_to_en(news_posts, overrides=[("ボルテ", "SDVX")]) # Can't find a Polaris feed on EAM app so this is here instead @@ -300,7 +300,7 @@ class WanganMaxiSource(NewsSource): @registry.register(constants.WACCA_PLUS_MAGIC_STRING) class WaccaPlusSource(NewsSource): def fetch(self, version=None) -> list[dict]: - from community.wacca_plus.wacca_plus import parse_announcement_messages, check_is_generation_possible + from community.wacca_plus import parse_announcement_messages, check_is_generation_possible from community.disc import fetch_messages if not check_is_generation_possible(): return [] -- cgit v1.2.3