1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
|
"""
Generates news JSON files
Generally you're expected to update the game versions manually
as for most games you only ever want the latest version (supported) of the game
"""
import news_feed as feed
import constants
import json
import hashlib
import os
from functools import lru_cache
from dotenv import load_dotenv
from datetime import datetime, timedelta
from common import create_database_connection
from feed import build_rss_from_news_feed
from notifications import check_can_send_notifs, broadcast_to_topic
load_dotenv()
OUTPUT_DIR = "news"
ARCHIVE_NEWS = True
GENERATE_RSS_FEEDS = True
SEND_NOTIFICATIONS = True
@lru_cache(maxsize=1000)
def compute_json_hash(data):
if isinstance(data, dict):
data_str = json.dumps(data, sort_keys=True)
else:
data_str = str(data)
return hashlib.sha256(data_str.encode("utf-8")).hexdigest()
def save_news_to_db(news_feed: list):
log_output("Writing news to local save database. This is purely for archival reasons")
database = create_database_connection()
for entry in news_feed:
key = entry.get('archive_hash') or compute_json_hash(json.dumps(entry, sort_keys=True))
database.add_news_entry(key, entry)
database.close()
def create_merged_feed(*news_lists, limit=constants.DAYS_LIMIT):
"""
Generator-based memory-efficient merging of multiple news feeds.
Only includes news newer than `limit` days.
"""
cutoff = datetime.now() - timedelta(days=limit)
recent_items = (
item
for news_list in news_lists
if news_list
for item in news_list
if datetime.fromtimestamp(item["timestamp"]) >= cutoff
)
return sorted(recent_items, key=lambda x: x["timestamp"], reverse=True)
def attach_news_meta_data(news_data: list):
"""
Attaches additional metadata to news data files
Currently this is only the time of completion
"""
return {
"fetch_time": int(datetime.now().timestamp()),
"news_posts": news_data,
}
def attempt_broadcast_notifications(news_data: list, title: str, topic: str, image: str="http://arcade.moekyun.me/liris.webp", limit=constants.DAYS_LIMIT):
if news_data and SEND_NOTIFICATIONS:
if not check_can_send_notifs:
print("[WARNING] Skipping notifications as env vars are not properly configured. See template")
else:
database = create_database_connection()
cutoff = datetime.now() - timedelta(days=limit)
for entry in news_data:
if datetime.fromtimestamp(entry["timestamp"]) < cutoff:
continue
cleaned_entry = entry
if "archive_hash" in cleaned_entry:
cleaned_entry = entry.copy()
del cleaned_entry["archive_hash"]
news_id = compute_json_hash(json.dumps(cleaned_entry, sort_keys=True))
if database.check_news_id_exists(news_id):
continue
else:
print("[Notifications] Broadcasting Unique News with id: " + news_id)
if entry.get("en_headline"):
body_text = entry["en_headline"]
elif entry.get("en_content"):
body_text = entry["en_content"]
elif entry.get("headline"):
body_text = entry["headline"]
else:
body_text = entry.get("content", "")
if len(body_text) > 50:
body_text = body_text[:50] + "..."
if entry.get("images"):
if len(entry.get("images")) >= 1:
image = entry.get("images")[0]["image"]
broadcast_to_topic(topic, title, body_text, image)
def log_output(message: str, type: str = "DEBUG"):
"""
Prints a log line output with a timestamp
"""
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
print(f"[{timestamp}] [{type}]: {message}")
def generate_news_file(filename, url, version=None, formatted_name: str = None):
log_output(f"Fetching {filename.upper()} News Data", "NEWS")
news_data = None
try:
news_data = feed.get_news(url, version) if version else feed.get_news(url)
log_output("Computing and Attaching Archived IDs")
for item in news_data:
hash_value = compute_json_hash(json.dumps(item, sort_keys=True))
item['archive_hash'] = hash_value
except Exception as e:
print(e)
print("[ERROR] Wasn't able to fetch news. Skipping...")
path = f"{OUTPUT_DIR}/{filename}.json"
if news_data:
log_output(f"Success. Got {filename.upper()} News Data. Saving to file...", "NEWS")
with open(path, "w", encoding="utf-8") as f:
json.dump(attach_news_meta_data(news_data), f, indent=2, ensure_ascii=False)
elif os.path.exists(path):
log_output(
f"Failed. Couldn't fetch {filename.upper()} data. Using previously scraped data",
"NEWS",
)
with open(path, "r", encoding="utf-8") as json_file:
news_data = json.load(json_file)["news_posts"]
else:
log_output(f"Failed. Couldn't fetch {filename.upper()} data. Skipping...", "NEWS")
return news_data
if GENERATE_RSS_FEEDS:
rss_file_path = f"{OUTPUT_DIR}/{filename}.xml"
log_output(f"Generating RSS Feed: {rss_file_path}")
if not formatted_name:
formatted_name = filename
title = f"{formatted_name} News RSS Feed"
description = f"The latest information for {formatted_name} from official sources"
build_rss_from_news_feed(title, description, path, rss_file_path)
return news_data
# For e-amusement games you can choose to pull from a specific implementation of the scraper or the generic feed provided
# by the e-amusement app. Information is different
def generate_iidx_news_file():
news = generate_news_file("iidx_news", constants.EAMUSE_APP_API_ROUTE, constants.IIDX_EAMUSE_APP_ID)
attempt_broadcast_notifications(news, "New information for beatmania IIDX", "iidx")
return news
def generate_sdvx_news_file():
news = generate_news_file("sdvx_news", constants.SOUND_VOLTEX_NABLA_NEWS_SITE)
attempt_broadcast_notifications(news, "New Information for SOUND VOLTEX","sdvx")
return news
def generate_ddr_news_file():
news = generate_news_file("ddr_news", constants.EAMUSE_APP_FEED, constants.DDR_EAMUSE_APP_ID)
attempt_broadcast_notifications(news, "New information for DanceDanceRevolution", "ddr")
return news
def generate_polaris_chord_news_file():
news = generate_news_file("polaris_chord_news", constants.POLARIS_CHORD_NEWS_SITE)
attempt_broadcast_notifications(news, "New information for POLARIS CHORD", "polaris_chord")
return news
def generate_dance_around_news_file():
news = generate_news_file("dance_around_news", constants.EAMUSE_APP_FEED, constants.DANCE_AROUND_APP_ID)
attempt_broadcast_notifications(news, "New information for DANCE aROUND", "dance_around")
return news
def generate_dance_rush_news_file():
news = generate_news_file("dance_rush_news", constants.EAMUSE_APP_FEED, constants.DANCE_RUSH_APP_ID)
attempt_broadcast_notifications(news, "New information for DANCERUSH STARDOM", "dance_rush")
return news
def generate_popn_music_news_file():
news = generate_news_file("popn_music_news", constants.EAMUSE_APP_FEED, constants.POPN_MUSIC_EAMUSE_APP_ID)
attempt_broadcast_notifications(news, "New information for pop'n music", "popn_music")
return news
def generate_jubeat_news_file():
news = generate_news_file("jubeat_news", constants.EAMUSE_APP_FEED, constants.JUBEAT_EAMUSE_APP_ID)
attempt_broadcast_notifications(news, "New information for jubeat", "jubeat")
return news
def generate_nostalgia_news_file():
news = generate_news_file("nostalgia_news", constants.EAMUSE_APP_FEED, constants.NOSTALGIA_EAMUSE_APP_ID)
attempt_broadcast_notifications(news, "New information for NOSTALGIA", "nostalgia")
return news
def generate_gitadora_news_file():
news = generate_news_file("gitadora_news", constants.EAMUSE_APP_FEED, constants.GITADORA_EAMUSE_APP_ID)
attempt_broadcast_notifications(news, "New information for GITADORA", "gitadora")
return news
def generate_chunithm_jp_news_file():
news = generate_news_file("chunithm_jp_news", constants.CHUNITHM_JP_NEWS_SITE, constants.CHUNITHM_VERSION.X_VERSE_X)
attempt_broadcast_notifications(news, "New information for CHUNITHM (Japan ver.)", "chunithm_jp")
return news
def generate_maimaidx_jp_news_file():
news = generate_news_file("maimaidx_jp_news", constants.MAIMAIDX_JP_NEWS_SITE, constants.MAIMAIDX_VERSION.CIRCLE_PLUS)
attempt_broadcast_notifications(news, "New information for maimai DX (Japan ver.)", "maimaidx_jp")
return news
def generate_ongeki_jp_news_file():
news = generate_news_file("ongeki_jp_news", constants.ONGEKI_JP_NEWS_SITE, constants.ONGEKI_VERSION.REFRESH)
attempt_broadcast_notifications(news, "New information for ONGEKI/オンゲキ", "ongeki_jp")
return news
def generate_maimaidx_intl_news_file():
news = generate_news_file("maimaidx_intl_news", constants.MAIMAIDX_INTL_NEWS_SITE)
attempt_broadcast_notifications(news, "New information for maimai DX (International ver.)", "maimaidx_intl")
return news
def generate_chunithm_intl_news_file():
news = generate_news_file("chunithm_intl_news", constants.CHUNITHM_INTL_NEWS_SITE)
attempt_broadcast_notifications(news, "New information for CHUNITHM (International ver.)", "chunithm_intl")
return news
def generate_idac_news_file():
news = generate_news_file("idac_news", constants.IDAC_NEWS_SITE)
attempt_broadcast_notifications(news, "New information for Initial D THE ARCADE", "idac")
return news
def generate_music_diver_news_file():
news = generate_news_file("music_diver_news", constants.MUSIC_DIVER_NEWS)
attempt_broadcast_notifications(news, "New information for MUSIC DIVER", "music_diver")
return news
def generate_street_fighter_news_file():
news = generate_news_file("street_fighter_news", constants.STREET_FIGHTER_NEWS_SITE, constants.STREET_FIGHTER_VERSION.SIX)
attempt_broadcast_notifications(news, "New information for Street Fighter", "street_fighter")
return news
def generate_taiko_news_file():
news = generate_news_file("taiko_news", constants.TAIKO_BLOG_SITE)
attempt_broadcast_notifications(news, "New information for Taiko/太鼓の達人", "taiko")
return news
def generate_wmmt_news_file():
news = generate_news_file("wmmt_news", constants.WANGAN_MAXI_GENERIC)
attempt_broadcast_notifications(news, "New information for Wangan Midnight Maximum Tune", "wmmt")
return news
def generate_wacca_plus_news_file():
if not os.environ.get("OPENAI_API_KEY"):
print("[WARNING] Skipping WACCA PLUS generation, not possible without summarization key")
return {}
news = generate_news_file("wacca_plus_news", constants.WACCA_PLUS_MAGIC_STRING)
attempt_broadcast_notifications(news, "New information for WACCA", "wacca_plus")
return news
def generate_museca_plus_news_file():
news = generate_news_file("museca_plus_news", constants.MUSECA_PLUS_NEWS_SITE)
attempt_broadcast_notifications(news, "New information for MUSECA", "museca_plus")
return news
def generate_rbdx_plus_news_file():
news = generate_news_file("rb_deluxe_plus_news", constants.RB_DELUXE_PLUS_NEWS)
return news
if __name__ == "__main__":
log_output("JOB START", "TASK")
if not os.path.exists(OUTPUT_DIR):
log_output(f"{OUTPUT_DIR} was not found. Creating this directory...")
os.makedirs(OUTPUT_DIR)
sdvx_news_data = generate_sdvx_news_file()
polaris_news_data = generate_polaris_chord_news_file()
iidx_news_data = generate_iidx_news_file()
ddr_news_data = generate_ddr_news_file()
dance_rush_news_data = generate_dance_rush_news_file()
dance_around_news_data = generate_dance_around_news_file()
gitadora_news_data = generate_gitadora_news_file()
popn_music_news_data = generate_popn_music_news_file()
jubeat_news_data = generate_jubeat_news_file()
nostalgia_news_data = generate_nostalgia_news_file()
chunithm_jp_news_data = generate_chunithm_jp_news_file()
maimaidx_jp_news_data = generate_maimaidx_jp_news_file()
ongeki_jp_news_data = generate_ongeki_jp_news_file()
idac_news_data = generate_idac_news_file()
maimaidx_intl_news_data = generate_maimaidx_intl_news_file()
chunithm_intl_news_data = generate_chunithm_intl_news_file()
music_diver_news_data = generate_music_diver_news_file()
street_fighter_news_data = generate_street_fighter_news_file()
taiko_news_data = generate_taiko_news_file()
wacca_plus_news = generate_wacca_plus_news_file()
museca_plus_news = generate_museca_plus_news_file()
generate_rbdx_plus_news_file()
wmmt_news = generate_wmmt_news_file()
news = create_merged_feed(
iidx_news_data,
sdvx_news_data,
ddr_news_data,
gitadora_news_data,
popn_music_news_data,
jubeat_news_data,
nostalgia_news_data,
chunithm_jp_news_data,
maimaidx_jp_news_data,
ongeki_jp_news_data,
idac_news_data,
maimaidx_intl_news_data,
chunithm_intl_news_data,
music_diver_news_data,
street_fighter_news_data,
taiko_news_data,
wacca_plus_news,
museca_plus_news,
polaris_news_data,
dance_rush_news_data,
dance_around_news_data,
wmmt_news
)
# log_output("Creating merged news.json file for all news that are within " + str(constants.DAYS_LIMIT) + " days old")
# log_output("Computing and Attaching Archived IDs for merged feed")
# for item in news:
# if 'archive_hash' not in item:
# hash_value = compute_json_hash(json.dumps(item, sort_keys=True))
# item['archive_hash'] = hash_value
# if ARCHIVE_NEWS:
# save_news_to_db(news)
# with open(OUTPUT_DIR+'/news.json', 'w') as json_file:
# json.dump(attach_news_meta_data(news), json_file)
# log_output("JOB DONE", "TASK")
|