1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
|
"""
Generic format for a news entry. All keys are considered to be nullable
{
'date': JST date of news post,
'identifier': unique identifier for the game (usually some deriv. of the title),
'type': Type of post if available, otherwise if not provided it will be None (aka Generic news)
'timestamp': Unixtime of date above,
'headline': Headline,
'content': All text content of news,
'url': URL to full post if available,
'images': [
{
'image': URL to image,
'link': If there's an associated href. Else None
}
],
'is_ai_summary': boolean
}
"""
from site_scraper import SiteScraper, download_site_as_html
import konami.eamuse_app as eamuse_app
import bemani.sdvx as sound_voltex
import bemani.iidx as iidx
import bemani.ddr as ddr
import sega.chuni_jp as chunithm_jp
import bemani.polaris_chord as polaris_chord
import sega.chuni_intl as chuni_intl
import sega.maimaidx_jp as maimaidx_jp
import sega.maimaidx_intl as maimaidx_intl
import sega.ongeki_jp as ongeki_jp
import sega.idac as idac
import taito.music_diver as music_diver
import taito.street_fighter as street_fighter
import bandai_namco.taiko as taiko
import bandai_namco.wmmt as wmmt
import community.disc as disc
import community.wacca_plus.wacca_plus as wac_plus
import community.museca_plus as mus_plus
import community.rbdx as rbdx
import constants
import translate
import summarizer
from datetime import datetime
def _attach_llm_summaries(news_posts: list, game_name: str):
for post in news_posts:
image_urls = [img["image"] for img in post.get("images", []) if "image" in img]
if image_urls:
headline, content = summarizer.generate_headline_and_content_from_images(image_urls, game_name)
if headline is None and content is None:
datetime_str = datetime.now().strftime("%H:%M:%S")
post["headline"] = f"{game_name} Update"
post["content"] = f"573-UPDATES has found a news post for {game_name} at {datetime_str}, please refer to the image for more details!"
post["is_ai_summary"] = False
post["headline"] = headline
post["content"] = content
post["is_ai_summary"] = True
def get_news(news_url: str, version=None) -> list:
if news_url == constants.SOUND_VOLTEX_EXCEED_GEAR_NEWS_SITE:
site_data = download_site_as_html(news_url)
news_posts = sorted(sound_voltex.parse_exceed_gear_news_site(site_data), key=lambda x: x['timestamp'], reverse=True)
news_posts = translate.add_translate_text_to_en(news_posts, overrides=[("ボルテ", "SDVX")])
elif news_url == constants.IIDX_PINKY_CRUSH_NEWS_SITE:
site_data = download_site_as_html(news_url)
news_posts = sorted(iidx.parse_pinky_crush_news_site(site_data), key=lambda x: x['timestamp'], reverse=True)
news_posts = translate.add_translate_text_to_en(news_posts, iidx.KEY_TERMS_TL)
elif news_url == constants.POLARIS_CHORD_NEWS_SITE:
site_data = download_site_as_html(news_url)
news_posts = sorted(polaris_chord.parse_polaris_chord_news_site(site_data), key=lambda x: x['timestamp'], reverse=True)
news_posts = translate.add_translate_text_to_en(news_posts, iidx.KEY_TERMS_TL)
elif news_url == constants.EAMUSE_APP_API_ROUTE:
site_data = download_site_as_html(news_url+"/?uuid_to="+version+"&format=json")
match version:
case constants.IIDX_EAMUSE_APP_ID:
news_posts= sorted(eamuse_app.parse_news_api_route(site_data, "IIDX_EAMUSEMENT", constants.EAMUSE_POST_SITE), key=lambda x: x['timestamp'], reverse=True)
news_posts = translate.add_translate_text_to_en(news_posts, iidx.KEY_TERMS_TL)
case constants.DDR_EAMUSE_APP_ID:
news_posts= sorted(eamuse_app.parse_news_api_route(site_data, "DDR_EAMUSEMENT", constants.EAMUSE_POST_SITE), key=lambda x: x['timestamp'], reverse=True)
news_posts = translate.add_translate_text_to_en(news_posts)
case constants.SDVX_EAMUSE_APP_ID:
news_posts= sorted(eamuse_app.parse_news_api_route(site_data, "SOUND_VOLTEX_EAMUSEMENT", constants.EAMUSE_POST_SITE ), key=lambda x: x['timestamp'], reverse=True)
news_posts = translate.add_translate_text_to_en(news_posts)
case constants.JUBEAT_EAMUSE_APP_ID:
news_posts= sorted(eamuse_app.parse_news_api_route(site_data, "JUBEAT_EAMUSEMENT", constants.EAMUSE_POST_SITE), key=lambda x: x['timestamp'], reverse=True)
news_posts = translate.add_translate_text_to_en(news_posts)
case constants.POPN_MUSIC_EAMUSE_APP_ID:
news_posts= sorted(eamuse_app.parse_news_api_route(site_data, "POPN_MUSIC_EAMUSEMENT", constants.EAMUSE_POST_SITE), key=lambda x: x['timestamp'], reverse=True)
news_posts = translate.add_translate_text_to_en(news_posts)
case constants.GITADORA_EAMUSE_APP_ID:
news_posts= sorted(eamuse_app.parse_news_api_route(site_data, "GITADORA_EAMUSEMENT", constants.EAMUSE_POST_SITE), key=lambda x: x['timestamp'], reverse=True)
news_posts = translate.add_translate_text_to_en(news_posts)
case constants.NOSTALGIA_EAMUSE_APP_ID:
news_posts= sorted(eamuse_app.parse_news_api_route(site_data, "NOSTALGIA_EAMUSEMENT", constants.EAMUSE_POST_SITE), key=lambda x: x['timestamp'], reverse=True)
news_posts = translate.add_translate_text_to_en(news_posts)
case constants.DANCE_RUSH_APP_ID:
news_posts= sorted(eamuse_app.parse_news_api_route(site_data, "DANCE_RUSH_EAMUSEMENT", constants.EAMUSE_POST_SITE), key=lambda x: x['timestamp'], reverse=True)
news_posts = translate.add_translate_text_to_en(news_posts)
case constants.DANCE_AROUND_APP_ID:
news_posts= sorted(eamuse_app.parse_news_api_route(site_data, "DANCE_AROUND_EAMUSEMENT", constants.EAMUSE_POST_SITE), key=lambda x: x['timestamp'], reverse=True)
news_posts = translate.add_translate_text_to_en(news_posts)
case _:
raise ValueError("Cannot find provided e-amuse app gameId", version)
elif news_url == constants.DDR_WORLD_NEWS_SITE:
scraper = SiteScraper(headless=True)
site_data = scraper.get_page_source(news_url)
scraper.close()
news_posts = sorted(ddr.parse_ddr_world_news_site(site_data), key=lambda x: x['timestamp'], reverse=True)
news_posts = translate.add_translate_text_to_en(news_posts)
elif news_url == constants.CHUNITHM_JP_NEWS_SITE:
site_data = download_site_as_html(news_url)
if version == [ constants.CHUNITHM_VERSION.VERSE, constants.CHUNITHM_VERSION.X_VERSE ]:
news_posts = sorted(chunithm_jp.parse_chuni_jp_news_site(site_data), key=lambda x: x['timestamp'], reverse=True)
news_posts = translate.add_translate_text_to_en(news_posts)
if constants.CHUNI_RECURSIVE_IMAGE:
for i in range(len(news_posts)):
if not news_posts[i]["url"]:
continue
post_site_data = download_site_as_html(news_posts[i]["url"])
post_images = chunithm_jp.parse_chuni_jp_post_images(post_site_data)
news_posts[i]["images"].extend([image for image in post_images if not any(existing_image['image'] == image['image'] for existing_image in news_posts[i]["images"])])
elif news_url == constants.CHUNITHM_INTL_NEWS_SITE:
site_data = download_site_as_html(news_url)
news_posts = sorted(chuni_intl.parse_chuni_intl_api_route(site_data, "CHUNITHM_INTL", constants.CHUNITHM_INTL_RECENT_NEWS_LIMIT), key=lambda x: x['timestamp'], reverse=True)
print(news_posts)
exit()
if constants.CHUNI_RECURSIVE_IMAGE:
for i in range(len(news_posts)):
if not news_posts[i]["url"]:
continue
post_site_data = download_site_as_html(news_posts[i]["url"])
post_images = chuni_intl.parse_chuni_intl_post_images(post_site_data)
news_posts[i]["images"].extend([image for image in post_images if not any(existing_image['image'] == image['image'] for existing_image in news_posts[i]["images"])])
elif news_url == constants.MAIMAIDX_JP_NEWS_SITE:
site_data = download_site_as_html(news_url)
if version in [ constants.MAIMAIDX_VERSION.PRISM_PLUS, constants.MAIMAIDX_VERSION.CIRCLE ]:
news_posts = sorted(maimaidx_jp.parse_maimaidx_jp_news_site(site_data), key=lambda x: x['timestamp'], reverse=True)
news_posts = translate.add_translate_text_to_en(news_posts)
elif news_url == constants.MAIMAIDX_INTL_NEWS_SITE:
site_data = download_site_as_html(news_url)
news_posts = sorted(maimaidx_intl.parse_maimaidx_intl_api_route(site_data, "MAIMAIDX_INTL", constants.MAIMAIDX_INTL_RECENT_NEWS_LIMIT), key=lambda x: x['timestamp'], reverse=True)
_attach_llm_summaries(news_posts, "maimai DX International")
elif news_url == constants.ONGEKI_JP_NEWS_SITE:
site_data = download_site_as_html(news_url)
if version == constants.ONGEKI_VERSION.REFRESH:
news_posts = sorted(ongeki_jp.parse_ongeki_news_site(site_data), key=lambda x: x['timestamp'], reverse=True)
news_posts = translate.add_translate_text_to_en(news_posts)
elif news_url == constants.IDAC_NEWS_SITE:
site_data = download_site_as_html(news_url)
news_posts = sorted(idac.parse_idac_news_site(site_data), key=lambda x: x['timestamp'], reverse=True)
for news in news_posts:
promo_image_url = idac.get_promo_image(download_site_as_html(news["url"]))
news["images"] = [{'image': promo_image_url, 'link': None}]
news_posts = translate.add_translate_text_to_en(news_posts)
elif news_url == constants.MUSIC_DIVER_NEWS:
api_data = download_site_as_html(news_url)
news_posts = sorted(music_diver.parse_music_diver_news_json(api_data), key=lambda x: x['timestamp'], reverse=True)
elif news_url == constants.STREET_FIGHTER_NEWS_SITE:
site_data = download_site_as_html(news_url)
news_posts = sorted(street_fighter.parse_sf_news_site(site_data), key=lambda x: x['timestamp'], reverse=True)
news_posts = translate.add_translate_text_to_en(news_posts)
elif news_url == constants.TAIKO_BLOG_SITE:
site_data = download_site_as_html(news_url)
news_posts = sorted(taiko.parse_taiko_blog_site(site_data), key=lambda x: x['timestamp'], reverse=True)
news_posts = translate.add_translate_text_to_en(news_posts)
elif news_url == constants.WANGAN_MAXI_GENERIC:
news_posts = []
na_site_data = download_site_as_html(constants.WANGAN_MAXI_NA_NEWS_SITE, response_encoding="utf-8")
prelim_na_news_data = wmmt.get_wmmt_na_news_post_links(na_site_data)
for data in prelim_na_news_data:
post_site_data = download_site_as_html(data["url"])
news = wmmt.parse_wmmt_na_news(post_site_data, data)
if news is not None:
news_posts.append(news)
asia_oce_site_data = download_site_as_html(constants.WANGAN_MAXI_ASIA_OCE_NEWS_SITE, response_encoding="utf-8")
prelim_asia_oce_news_data = wmmt.get_wmmt_asia_oce_news_post_links(asia_oce_site_data)
for data in prelim_asia_oce_news_data:
post_site_data = download_site_as_html(data["url"])
news = wmmt.parse_wmmt_asia_oce_news(post_site_data, data)
if news is not None:
news_posts.append(news)
jp_site_data = download_site_as_html(constants.WANGAN_MAXI_JP_NEWS_SITE, response_encoding="utf-8")
prelim_jp_news_data = wmmt.get_wmmt_jp_news_post_links(jp_site_data)
jp_news = []
for data in prelim_jp_news_data:
post_site_data = download_site_as_html(data["url"], response_encoding="utf-8")
news = wmmt.parse_wmmt_jp_news(post_site_data, data)
if news is not None:
jp_news.append(news)
jp_news = translate.add_translate_text_to_en(jp_news)
news_posts.extend(jp_news)
news_posts = sorted(news_posts, key=lambda x: x['timestamp'], reverse=True)
return news_posts
elif news_url == constants.WACCA_PLUS_MAGIC_STRING:
if not wac_plus.check_is_generation_possible():
news_posts = []
else:
messages = disc.fetch_messages(constants.WACCA_PLUS_MAGIC_STRING)
news_posts = sorted(wac_plus.parse_announcement_messages(messages), key=lambda x: x['timestamp'], reverse=True)
elif news_url == constants.MUSECA_PLUS_NEWS_SITE:
site_data = download_site_as_html(news_url)
news_posts = sorted(mus_plus.parse_museca_plus_news_site(site_data), key=lambda x: x['timestamp'], reverse=True)
elif news_url == constants.RB_DELUXE_PLUS_NEWS:
site_data = download_site_as_html(news_url)
news_posts = rbdx.get_carousel_posts(site_data)
_attach_llm_summaries(news_posts, "REFLEC BEAT PLUS DELUXE")
else:
news_posts = []
return news_posts
|