diff options
| author | Pinapelz <yukais@pinapelz.com> | 2025-04-25 01:07:44 -0700 |
|---|---|---|
| committer | Pinapelz <yukais@pinapelz.com> | 2025-04-25 01:07:44 -0700 |
| commit | 71767853b0f65352b7418cc9a14f17acc5dbd9b8 (patch) | |
| tree | 4cc0e35d6a33790eafc9b12d06eae31aa6f2022a /news_feed.py | |
| parent | 4b0998b4afa48f9e2968b1bb76ef42bc6a94e1b6 (diff) | |
add ai generated headline and content when source does not provides any
Diffstat (limited to 'news_feed.py')
| -rw-r--r-- | news_feed.py | 16 |
1 files changed, 15 insertions, 1 deletions
diff --git a/news_feed.py b/news_feed.py index 18e9dbd..d78c78c 100644 --- a/news_feed.py +++ b/news_feed.py @@ -14,7 +14,8 @@ Generic format for a news entry. All keys are considered to be nullable 'link': If there's an associated href. Else None } - ] + ], + 'is_ai_summary': boolean } """ @@ -36,6 +37,17 @@ import community.museca_plus as mus_plus import community.rbdx as rbdx import constants import translate +import summarizer + +def _attach_llm_summaries(news_posts: list, game_name: str): + for post in news_posts: + image_urls = [img["image"] for img in post.get("images", []) if "image" in img] + if image_urls: + headline, content = summarizer.generate_headline_and_content_from_images(image_urls, game_name) + post["headline"] = headline + post["content"] = content + post["is_ai_summary"] = True + def get_news(news_url: str, version=None) -> list: if news_url == constants.SOUND_VOLTEX_EXCEED_GEAR_NEWS_SITE: @@ -124,6 +136,7 @@ def get_news(news_url: str, version=None) -> list: scraper.close() if version == constants.MAIMAIDX_VERSION.PRISM: news_posts = sorted(maimaidx_intl.parse_maimaidx_intl_prism_news_site(site_data), key=lambda x: x['timestamp'], reverse=True) + _attach_llm_summaries(news_posts, "maimai DX International") elif news_url == constants.ONGEKI_JP_NEWS_SITE: site_data = download_site_as_html(news_url) @@ -154,6 +167,7 @@ def get_news(news_url: str, version=None) -> list: elif news_url == constants.RB_DELUXE_PLUS_NEWS: site_data = download_site_as_html(news_url) news_posts = rbdx.get_carousel_posts(site_data) + _attach_llm_summaries(news_posts, "REFLEC BEAT PLUS DELUXE") else: news_posts = [] |
