diff options
| -rw-r--r-- | .gitignore | 1 | ||||
| -rw-r--r-- | community/disc.py | 28 | ||||
| -rw-r--r-- | community/wacca_plus/wacca_plus.py | 132 | ||||
| -rw-r--r-- | constants.py | 2 | ||||
| -rw-r--r-- | generate.py | 6 | ||||
| -rw-r--r-- | news_feed.py | 8 |
6 files changed, 176 insertions, 1 deletions
@@ -172,3 +172,4 @@ cython_debug/ .pypirc news tl_cache.json +wac_result_cache.json diff --git a/community/disc.py b/community/disc.py new file mode 100644 index 0000000..8cc5aeb --- /dev/null +++ b/community/disc.py @@ -0,0 +1,28 @@ +import os +import requests +from dotenv import load_dotenv + +load_dotenv() + +def fetch_messages(channel_id: str): + url = f"https://discord.com/api/v9/channels/{channel_id}/messages?limit=50" + headers = { + "accept-encoding": "gzip, deflate, br, zstd", + "accept-language": "en-GB", + "authorization": os.getenv("DISCORD_AUTHORIZATION"), # Replace with your real token + "priority": "u=1, i", + "sec-ch-ua": '"Not:A-Brand";v="24", "Chromium";v="134"', + "sec-ch-ua-mobile": "?0", + "sec-ch-ua-platform": '"Linux"', + "sec-fetch-dest": "empty", + "sec-fetch-mode": "cors", + "sec-fetch-site": "same-origin", + "user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36", + "x-debug-options": "bugReporterEnabled", + "x-discord-locale": "en-US", + "x-discord-timezone": "America/Vancouver", + } + response = requests.get(url, headers=headers) + return response.json() + if response.ok: + print(f"Failed to fetch: {response.status_code} - {response.text}") diff --git a/community/wacca_plus/wacca_plus.py b/community/wacca_plus/wacca_plus.py new file mode 100644 index 0000000..023a7ae --- /dev/null +++ b/community/wacca_plus/wacca_plus.py @@ -0,0 +1,132 @@ +import os +from datetime import datetime +import time +import requests +import openai +import json +from dotenv import load_dotenv +import base64 + +load_dotenv() + +def check_is_generation_possible(): + return os.getenv("OPENAI_API_KEY") is not None and os.getenv("DISCORD_AUTHORIZATION") is not None + + +def check_is_announcement_image(img_url: str): + openai.api_key = os.getenv("OPENAI_API_KEY") + + tools = [ + { + "type": "function", + "function": { + "name": "classify_wacca_plus_image", + "description": "Classify if an image is WACCA PLUS announcement, update, or information", + "parameters": { + "type": "object", + "properties": { + "is_wacca_plus_related": { + "type": "boolean", + "description": "Is this image related to WACCA PLUS?", + }, + "category": { + "type": "string", + "enum": ["announcement", "update", "info", "null"], + "description": "Category of image if related; otherwise null.", + }, + }, + "required": ["is_wacca_plus_related", "category"], + }, + } + } + ] + + response = openai.chat.completions.create( + model="gpt-4o", + messages=[ + { + "role": "user", + "content": [ + {"type": "text", "text": "Does this image contain official update, event, or announcement information for the game WACCA PLUS? Ignore unrelated content like gameplay screenshots, score posts, or arcade cabinet photos. Classify accordingly."}, + {"type": "image_url", "image_url": {"url": img_url}}, + ], + } + ], + tools=tools, + tool_choice={"type": "function", "function": {"name": "classify_wacca_plus_image"}}, + ) + + tool_args = response.choices[0].message.tool_calls[0].function.arguments + parsed_result = json.loads(tool_args) + return parsed_result["is_wacca_plus_related"], parsed_result["category"] + +def _load_cache(): + cache_file = "wac_result_cache.json" + if not os.path.exists(cache_file): + with open(cache_file, "w") as file: + json.dump({}, file) + with open(cache_file, "r") as file: + return json.load(file) + +def _save_cache(cache: dict): + cache_file = "wac_result_cache.json" + with open(cache_file, "w") as file: + json.dump(cache, file) + +def _convert_image_to_base64(img_url: str): + response = requests.get(img_url) + if response.status_code == 200: + img_data = response.content + img_base64 = base64.b64encode(img_data).decode('utf-8') + mime_type = response.headers['Content-Type'] + return f"data:{mime_type};base64,{img_base64}" + else: + raise Exception(f"Failed to fetch image from URL: {img_url}, status code: {response.status_code}") + +def parse_announcement_messages(message_json: dict): + news_posts = [] + cache = _load_cache() + for message in message_json: + type = None + if len(message["attachments"]) == 0: + continue + image_attachments = [] + for attachment in message["attachments"]: + if "image" in attachment["content_type"]: + image_attachments.append(attachment) + + if len(image_attachments) == 0: + continue + + filtered_images = [] + for image in image_attachments: + if image["id"] in cache: + is_related = cache[image["id"]][0] + type = cache[image["id"]][1] + else: + is_related, type = check_is_announcement_image(image["url"]) + cache[image["id"]] = [is_related, type] + if not is_related: + continue + filtered_images.append({"image": _convert_image_to_base64(image["url"]), "url": None}) + + if len(filtered_images) == 0: + continue + + date = message["timestamp"].split("T")[0] + date_obj = datetime.strptime(date, "%Y-%m-%d") + unix_time = int(time.mktime(date_obj.timetuple())) + + news_posts.append({ + "date": date, + "identifier": "WACCA_PLUS", + "type": type, + "timestamp": unix_time, + "content": "NEW INFORMATION FROM WACCA+ / WACCA+ の最新情報", + "headline": None, + "url": None, + "images": filtered_images + }) + + _save_cache(cache) + return news_posts diff --git a/constants.py b/constants.py index 81967d5..361c72d 100644 --- a/constants.py +++ b/constants.py @@ -28,6 +28,8 @@ TAIKO_BLOG_SITE="https://taiko-ch.net/blog/" ADD_EN_TRANSLATION=True # Only takes effect if an API key is provided in .env CHUNI_RECURSIVE_IMAGE=True # Scrape the individual post pages and get all images there +WACCA_PLUS_MAGIC_STRING="1206017527864369262" + class CHUNITHM_VERSION(Enum): LUMINOUS_PLUS = 1 VERSE = 2 diff --git a/generate.py b/generate.py index 0e8070c..fbdbf5c 100644 --- a/generate.py +++ b/generate.py @@ -109,12 +109,14 @@ def generate_music_diver_news_file(): def generate_taiko_news_file(): return generate_news_file("taiko_news", constants.TAIKO_BLOG_SITE) +def generate_wacca_plus_news_file(): + return generate_news_file("wacca_plus_news", constants.WACCA_PLUS_MAGIC_STRING) + if __name__ == "__main__": log_output("JOB START", "TASK") if not os.path.exists(OUTPUT_DIR): log_output(f"{OUTPUT_DIR} was not found. Creating this directory...") os.makedirs(OUTPUT_DIR) - iidx_news_data = generate_iidx_news_file(eamuse_feed=True) sdvx_news_data = generate_sdvx_news_file() ddr_news_data = generate_ddr_news_file(eamuse_feed=True) @@ -129,6 +131,8 @@ if __name__ == "__main__": chunithm_intl_news_data = generate_chunithm_intl_news_file() music_diver_news_data = generate_music_diver_news_file() taiko_news_data = generate_taiko_news_file() + generate_wacca_plus_news_file() + news = create_merged_feed( iidx_news_data, diff --git a/news_feed.py b/news_feed.py index c9f5131..a5a1ade 100644 --- a/news_feed.py +++ b/news_feed.py @@ -30,6 +30,8 @@ import sega.maimaidx_intl as maimaidx_intl import sega.ongeki_jp as ongeki_jp import taito.music_diver as music_diver import bandai_namco.taiko as taiko +import community.disc as disc +import community.wacca_plus.wacca_plus as wac_plus import constants import translate @@ -135,6 +137,12 @@ def get_news(news_url: str, version=None) -> list: site_data = download_site_as_html(news_url) news_posts = sorted(taiko.parse_taiko_blog_site(site_data), key=lambda x: x['timestamp'], reverse=True) + elif news_url == constants.WACCA_PLUS_MAGIC_STRING: + if not wac_plus.check_is_generation_possible(): + news_posts = [] + else: + messages = disc.fetch_messages(constants.WACCA_PLUS_MAGIC_STRING) + news_posts = sorted(wac_plus.parse_announcement_messages(messages), key=lambda x: x['timestamp'], reverse=True) else: news_posts = [] return news_posts |
