aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.gitignore1
-rw-r--r--community/wacca_plus/wacca_plus.py13
-rw-r--r--generate.py1
-rw-r--r--news_feed.py16
-rw-r--r--requirements.txtbin836 -> 1384 bytes
-rw-r--r--site/src/components/NewsFeed.tsx7
-rw-r--r--summarizer.py100
7 files changed, 133 insertions, 5 deletions
diff --git a/.gitignore b/.gitignore
index 5524799..9d7d2ab 100644
--- a/.gitignore
+++ b/.gitignore
@@ -173,3 +173,4 @@ cython_debug/
news
tl_cache.json
wac_result_cache.json
+summarization_cache.json \ No newline at end of file
diff --git a/community/wacca_plus/wacca_plus.py b/community/wacca_plus/wacca_plus.py
index d5e0aa4..fae3dd0 100644
--- a/community/wacca_plus/wacca_plus.py
+++ b/community/wacca_plus/wacca_plus.py
@@ -6,6 +6,10 @@ import openai
import json
from dotenv import load_dotenv
import base64
+import sys
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../../")))
+
+from summarizer import generate_headline_and_content_from_images
load_dotenv()
@@ -99,7 +103,9 @@ def parse_announcement_messages(message_json: dict):
continue
filtered_images = []
+ image_urls = [] # save the images before they get encoded
for image in image_attachments:
+ image_urls.append(image["url"])
if image["id"] in cache:
is_related = cache[image["id"]][0]
type = cache[image["id"]][1]
@@ -116,17 +122,18 @@ def parse_announcement_messages(message_json: dict):
date = message["timestamp"].split("T")[0]
date_obj = datetime.strptime(date, "%Y-%m-%d")
unix_time = int(time.mktime(date_obj.timetuple()))
+ headline, content = generate_headline_and_content_from_images(image_urls, "WACCA PLUS")
news_posts.append({
"date": date,
"identifier": "WACCA_PLUS",
"type": type.upper(),
"timestamp": unix_time,
- "content": "NEW INFORMATION FROM WACCA PLUS / WACCA PLUS の最新情報",
- "headline": None,
+ "content": content,
+ "headline": headline,
"url": None,
"images": filtered_images,
- 'is_ai_summary': False
+ 'is_ai_summary': True
})
_save_cache(cache)
diff --git a/generate.py b/generate.py
index afb96a8..a3edbd3 100644
--- a/generate.py
+++ b/generate.py
@@ -3,7 +3,6 @@ Generates news JSON files
Generally you're expected to update the game versions manually
as for most games you only ever want the latest version (supported) of the game
"""
-from ast import Constant
import news_feed as feed
import constants
import json
diff --git a/news_feed.py b/news_feed.py
index 18e9dbd..d78c78c 100644
--- a/news_feed.py
+++ b/news_feed.py
@@ -14,7 +14,8 @@ Generic format for a news entry. All keys are considered to be nullable
'link': If there's an associated href. Else None
}
- ]
+ ],
+ 'is_ai_summary': boolean
}
"""
@@ -36,6 +37,17 @@ import community.museca_plus as mus_plus
import community.rbdx as rbdx
import constants
import translate
+import summarizer
+
+def _attach_llm_summaries(news_posts: list, game_name: str):
+ for post in news_posts:
+ image_urls = [img["image"] for img in post.get("images", []) if "image" in img]
+ if image_urls:
+ headline, content = summarizer.generate_headline_and_content_from_images(image_urls, game_name)
+ post["headline"] = headline
+ post["content"] = content
+ post["is_ai_summary"] = True
+
def get_news(news_url: str, version=None) -> list:
if news_url == constants.SOUND_VOLTEX_EXCEED_GEAR_NEWS_SITE:
@@ -124,6 +136,7 @@ def get_news(news_url: str, version=None) -> list:
scraper.close()
if version == constants.MAIMAIDX_VERSION.PRISM:
news_posts = sorted(maimaidx_intl.parse_maimaidx_intl_prism_news_site(site_data), key=lambda x: x['timestamp'], reverse=True)
+ _attach_llm_summaries(news_posts, "maimai DX International")
elif news_url == constants.ONGEKI_JP_NEWS_SITE:
site_data = download_site_as_html(news_url)
@@ -154,6 +167,7 @@ def get_news(news_url: str, version=None) -> list:
elif news_url == constants.RB_DELUXE_PLUS_NEWS:
site_data = download_site_as_html(news_url)
news_posts = rbdx.get_carousel_posts(site_data)
+ _attach_llm_summaries(news_posts, "REFLEC BEAT PLUS DELUXE")
else:
news_posts = []
diff --git a/requirements.txt b/requirements.txt
index c5df683..7cbd3b8 100644
--- a/requirements.txt
+++ b/requirements.txt
Binary files differ
diff --git a/site/src/components/NewsFeed.tsx b/site/src/components/NewsFeed.tsx
index 0151975..3e6f6b9 100644
--- a/site/src/components/NewsFeed.tsx
+++ b/site/src/components/NewsFeed.tsx
@@ -16,6 +16,7 @@ export interface NewsData {
}>;
en_headline: string | null;
en_content: string | null;
+ is_ai_summary: boolean | null;
}
interface NewsFeedProps {
@@ -102,6 +103,12 @@ export const NewsFeed: React.FC<NewsFeedProps> = ({ newsItems }) => {
</button>
)}
</div>
+ {/* AI Disclaimer */}
+ {news.is_ai_summary && (
+ <div className={`${isMoe ? "bg-pink-200 text-pink-800" : "bg-gray-800 text-white"} px-3 py-1 text-xs text-center`}>
+ The information above is summarized by AI / 上記の情報はAIによって生成されました。
+ </div>
+ )}
{/* Images */}
{news.images.length > 0 && (
diff --git a/summarizer.py b/summarizer.py
new file mode 100644
index 0000000..d3d66f1
--- /dev/null
+++ b/summarizer.py
@@ -0,0 +1,100 @@
+from dotenv import load_dotenv
+import openai
+import json
+import hashlib
+import os
+
+load_dotenv()
+
+
+def summarization_is_possible() -> bool:
+ return os.getenv("OPENAI_API_KEY")
+
+
+def _load_cache():
+ cache_file = "summarization_cache.json"
+ if not os.path.exists(cache_file):
+ with open(cache_file, "w") as file:
+ json.dump({}, file)
+ with open(cache_file, "r") as file:
+ return json.load(file)
+
+
+def _save_cache(cache: dict):
+ cache_file = "summarization_cache.json"
+ with open(cache_file, "w") as file:
+ json.dump(cache, file)
+
+
+def _make_cache_key(game: str, img_urls: list[str]) -> str:
+ normalized_game = game.strip().lower()
+ img_data = json.dumps(sorted(img_urls), separators=(",", ":"))
+ hash_digest = hashlib.sha256(img_data.encode()).hexdigest()[:12]
+ return f"{normalized_game}_{hash_digest}"
+
+
+def generate_headline_and_content_from_images(img_urls: list[str], game: str):
+ """
+ Uses LLM to generate the headline and content when none provided by source, based on one or more images.
+ """
+ cache = _load_cache()
+ cache_key = _make_cache_key(game, img_urls)
+ if cache_key in cache:
+ cached = cache[cache_key]
+ return cached["headline"], cached["content"]
+ tools = [
+ {
+ "type": "function",
+ "function": {
+ "name": "generate_update_text",
+ "description": "Generates a concise English headline and short description for a rhythm game update image.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "headline": {
+ "type": "string",
+ "description": "A short English headline summarizing the game update.",
+ },
+ "content": {
+ "type": "string",
+ "description": "A brief English description of the new content shown in the image(s).",
+ },
+ },
+ "required": ["headline", "content"],
+ },
+ },
+ }
+ ]
+
+ messages = [
+ {
+ "role": "user",
+ "content": [
+ {
+ "type": "text",
+ "text": (
+ f"Given one or more update-related images for the arcade game {game}, return a short, professional English headline and a brief, stern and concise description summarizing the content. No need to repeat game name"
+ ),
+ },
+ *[{"type": "image_url", "image_url": {"url": url}} for url in img_urls],
+ ],
+ }
+ ]
+
+ response = openai.chat.completions.create(
+ model="gpt-4o",
+ messages=messages,
+ tools=tools,
+ tool_choice={
+ "type": "function",
+ "function": {"name": "generate_update_text"},
+ },
+ )
+
+ tool_result = response.choices[0].message.tool_calls[0].function.arguments
+ parsed_result = json.loads(tool_result)
+ headline = parsed_result["headline"]
+ content = parsed_result["content"]
+ cache[cache_key] = {"headline": headline, "content": content}
+ _save_cache(cache)
+ return headline, content
send patches to the email below
yukais@pinapelz.com
include the subject [PATCH repo_name]
pinapelz.com
homepage