7 files changed, 133 insertions, 5 deletions
diff --git a/.gitignore b/.gitignore
index 5524799..9d7d2ab 100644
--- a/.gitignore
+++ b/.gitignore
@@ -173,3 +173,4 @@ cython_debug/
 news
 tl_cache.json
 wac_result_cache.json
+summarization_cache.json
+\ No newline at end of file
diff --git a/community/wacca_plus/wacca_plus.py b/community/wacca_plus/wacca_plus.py
index d5e0aa4..fae3dd0 100644
--- a/community/wacca_plus/wacca_plus.py
+++ b/community/wacca_plus/wacca_plus.py
@@ -6,6 +6,10 @@ import openai
 import json
 from dotenv import load_dotenv
 import base64
+import sys
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../../")))
+
+from summarizer import generate_headline_and_content_from_images
 
 load_dotenv()
 
@@ -99,7 +103,9 @@ def parse_announcement_messages(message_json: dict):
             continue
 
         filtered_images = []
+        image_urls = [] # save the images before they get encoded
         for image in image_attachments:
+            image_urls.append(image["url"])
             if image["id"] in cache:
                 is_related = cache[image["id"]][0]
                 type = cache[image["id"]][1]
@@ -116,17 +122,18 @@ def parse_announcement_messages(message_json: dict):
         date = message["timestamp"].split("T")[0]
         date_obj = datetime.strptime(date, "%Y-%m-%d")
         unix_time = int(time.mktime(date_obj.timetuple()))
+        headline, content = generate_headline_and_content_from_images(image_urls, "WACCA PLUS")
 
         news_posts.append({
             "date": date,
             "identifier": "WACCA_PLUS",
             "type": type.upper(),
             "timestamp": unix_time,
-            "content": "NEW INFORMATION FROM WACCA PLUS / WACCA PLUS の最新情報",
-            "headline": None,
+            "content": content,
+            "headline": headline,
             "url": None,
             "images": filtered_images,
-            'is_ai_summary': False
+            'is_ai_summary': True
         })
 
     _save_cache(cache)
diff --git a/generate.py b/generate.py
index afb96a8..a3edbd3 100644
--- a/generate.py
+++ b/generate.py
@@ -3,7 +3,6 @@ Generates news JSON files
 Generally you're expected to update the game versions manually
 as for most games you only ever want the latest version (supported) of the game
 """
-from ast import Constant
 import news_feed as feed
 import constants
 import json
diff --git a/news_feed.py b/news_feed.py
index 18e9dbd..d78c78c 100644
--- a/news_feed.py
+++ b/news_feed.py
@@ -14,7 +14,8 @@ Generic format for a news entry. All keys are considered to be nullable
         'link': If there's an associated href. Else None
         }
 
-    ]
+    ],
+    'is_ai_summary': boolean
 }
 """
 
@@ -36,6 +37,17 @@ import community.museca_plus as mus_plus
 import community.rbdx as rbdx
 import constants
 import translate
+import summarizer
+
+def _attach_llm_summaries(news_posts: list, game_name: str):
+    for post in news_posts:
+        image_urls = [img["image"] for img in post.get("images", []) if "image" in img]
+        if image_urls:
+            headline, content = summarizer.generate_headline_and_content_from_images(image_urls, game_name)
+            post["headline"] = headline
+            post["content"] = content
+            post["is_ai_summary"] = True
+
 
 def get_news(news_url: str, version=None) -> list:
     if news_url == constants.SOUND_VOLTEX_EXCEED_GEAR_NEWS_SITE:
@@ -124,6 +136,7 @@ def get_news(news_url: str, version=None) -> list:
         scraper.close()
         if version == constants.MAIMAIDX_VERSION.PRISM:
             news_posts = sorted(maimaidx_intl.parse_maimaidx_intl_prism_news_site(site_data), key=lambda x: x['timestamp'], reverse=True)
+            _attach_llm_summaries(news_posts, "maimai DX International")
 
     elif news_url == constants.ONGEKI_JP_NEWS_SITE:
         site_data = download_site_as_html(news_url)
@@ -154,6 +167,7 @@ def get_news(news_url: str, version=None) -> list:
     elif news_url == constants.RB_DELUXE_PLUS_NEWS:
         site_data = download_site_as_html(news_url)
         news_posts = rbdx.get_carousel_posts(site_data)
+        _attach_llm_summaries(news_posts, "REFLEC BEAT PLUS DELUXE")
 
     else:
         news_posts = []
diff --git a/requirements.txt b/requirements.txt
index c5df683..7cbd3b8 100644
--- a/requirements.txt
+++ b/requirements.txt
diff --git a/site/src/components/NewsFeed.tsx b/site/src/components/NewsFeed.tsx
index 0151975..3e6f6b9 100644
--- a/site/src/components/NewsFeed.tsx
+++ b/site/src/components/NewsFeed.tsx
@@ -16,6 +16,7 @@ export interface NewsData {
   }>;
   en_headline: string | null;
   en_content: string | null;
+  is_ai_summary: boolean | null;
 }
 
 interface NewsFeedProps {
@@ -102,6 +103,12 @@ export const NewsFeed: React.FC<NewsFeedProps> = ({ newsItems }) => {
                 </button>
               )}
             </div>
+            {/* AI Disclaimer */}
+            {news.is_ai_summary && (
+              <div className={`${isMoe ? "bg-pink-200 text-pink-800" : "bg-gray-800 text-white"} px-3 py-1 text-xs text-center`}>
+              The information above is summarized by AI / 上記の情報はAIによって生成されました。
+              </div>
+            )}
 
             {/* Images */}
             {news.images.length > 0 && (
diff --git a/summarizer.py b/summarizer.py
new file mode 100644
index 0000000..d3d66f1
--- /dev/null
+++ b/summarizer.py
@@ -0,0 +1,100 @@
+from dotenv import load_dotenv
+import openai
+import json
+import hashlib
+import os
+
+load_dotenv()
+
+
+def summarization_is_possible() -> bool:
+    return os.getenv("OPENAI_API_KEY")
+
+
+def _load_cache():
+    cache_file = "summarization_cache.json"
+    if not os.path.exists(cache_file):
+        with open(cache_file, "w") as file:
+            json.dump({}, file)
+    with open(cache_file, "r") as file:
+        return json.load(file)
+
+
+def _save_cache(cache: dict):
+    cache_file = "summarization_cache.json"
+    with open(cache_file, "w") as file:
+        json.dump(cache, file)
+
+
+def _make_cache_key(game: str, img_urls: list[str]) -> str:
+    normalized_game = game.strip().lower()
+    img_data = json.dumps(sorted(img_urls), separators=(",", ":"))
+    hash_digest = hashlib.sha256(img_data.encode()).hexdigest()[:12]
+    return f"{normalized_game}_{hash_digest}"
+
+
+def generate_headline_and_content_from_images(img_urls: list[str], game: str):
+    """
+    Uses LLM to generate the headline and content when none provided by source, based on one or more images.
+    """
+    cache = _load_cache()
+    cache_key = _make_cache_key(game, img_urls)
+    if cache_key in cache:
+        cached = cache[cache_key]
+        return cached["headline"], cached["content"]
+    tools = [
+        {
+            "type": "function",
+            "function": {
+                "name": "generate_update_text",
+                "description": "Generates a concise English headline and short description for a rhythm game update image.",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "headline": {
+                            "type": "string",
+                            "description": "A short English headline summarizing the game update.",
+                        },
+                        "content": {
+                            "type": "string",
+                            "description": "A brief English description of the new content shown in the image(s).",
+                        },
+                    },
+                    "required": ["headline", "content"],
+                },
+            },
+        }
+    ]
+
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "text",
+                    "text": (
+                        f"Given one or more update-related images for the arcade game {game}, return a short, professional English headline and a brief, stern and concise description summarizing the content. No need to repeat game name"
+                    ),
+                },
+                *[{"type": "image_url", "image_url": {"url": url}} for url in img_urls],
+            ],
+        }
+    ]
+
+    response = openai.chat.completions.create(
+        model="gpt-4o",
+        messages=messages,
+        tools=tools,
+        tool_choice={
+            "type": "function",
+            "function": {"name": "generate_update_text"},
+        },
+    )
+
+    tool_result = response.choices[0].message.tool_calls[0].function.arguments
+    parsed_result = json.loads(tool_result)
+    headline = parsed_result["headline"]
+    content = parsed_result["content"]
+    cache[cache_key] = {"headline": headline, "content": content}
+    _save_cache(cache)
+    return headline, content