add ai generated headline and content when source does not provides any

author: Pinapelz <yukais@pinapelz.com> 2025-04-25 01:07:44 -0700
committer: Pinapelz <yukais@pinapelz.com> 2025-04-25 01:07:44 -0700
commit: 71767853b0f65352b7418cc9a14f17acc5dbd9b8 (patch)
tree: 4cc0e35d6a33790eafc9b12d06eae31aa6f2022a /news_feed.py
parent: 4b0998b4afa48f9e2968b1bb76ef42bc6a94e1b6 (diff)
1 files changed, 15 insertions, 1 deletions
diff --git a/news_feed.py b/news_feed.py
index 18e9dbd..d78c78c 100644
--- a/news_feed.py
+++ b/news_feed.py
@@ -14,7 +14,8 @@ Generic format for a news entry. All keys are considered to be nullable
         'link': If there's an associated href. Else None
         }
 
-    ]
+    ],
+    'is_ai_summary': boolean
 }
 """
 
@@ -36,6 +37,17 @@ import community.museca_plus as mus_plus
 import community.rbdx as rbdx
 import constants
 import translate
+import summarizer
+
+def _attach_llm_summaries(news_posts: list, game_name: str):
+    for post in news_posts:
+        image_urls = [img["image"] for img in post.get("images", []) if "image" in img]
+        if image_urls:
+            headline, content = summarizer.generate_headline_and_content_from_images(image_urls, game_name)
+            post["headline"] = headline
+            post["content"] = content
+            post["is_ai_summary"] = True
+
 
 def get_news(news_url: str, version=None) -> list:
     if news_url == constants.SOUND_VOLTEX_EXCEED_GEAR_NEWS_SITE:
@@ -124,6 +136,7 @@ def get_news(news_url: str, version=None) -> list:
         scraper.close()
         if version == constants.MAIMAIDX_VERSION.PRISM:
             news_posts = sorted(maimaidx_intl.parse_maimaidx_intl_prism_news_site(site_data), key=lambda x: x['timestamp'], reverse=True)
+            _attach_llm_summaries(news_posts, "maimai DX International")
 
     elif news_url == constants.ONGEKI_JP_NEWS_SITE:
         site_data = download_site_as_html(news_url)
@@ -154,6 +167,7 @@ def get_news(news_url: str, version=None) -> list:
     elif news_url == constants.RB_DELUXE_PLUS_NEWS:
         site_data = download_site_as_html(news_url)
         news_posts = rbdx.get_carousel_posts(site_data)
+        _attach_llm_summaries(news_posts, "REFLEC BEAT PLUS DELUXE")
 
     else:
         news_posts = []
author	Pinapelz <yukais@pinapelz.com>	2025-04-25 01:07:44 -0700
committer	Pinapelz <yukais@pinapelz.com>	2025-04-25 01:07:44 -0700
commit	71767853b0f65352b7418cc9a14f17acc5dbd9b8 (patch)
tree	4cc0e35d6a33790eafc9b12d06eae31aa6f2022a /news_feed.py
parent	4b0998b4afa48f9e2968b1bb76ef42bc6a94e1b6 (diff)