aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPinapelz <yukais@pinapelz.com>2025-04-13 18:36:11 -0700
committerPinapelz <yukais@pinapelz.com>2025-04-13 18:36:11 -0700
commitfed975d2b2f8c1763f268c7e668c1d4b0cfabd92 (patch)
treecc9b1a9758d64704f228bdcd291d0f29053eb2ec
parent18a76b5bd09df3b1f34208bed4663939ca8f3f18 (diff)
feat: merged news feed
-rw-r--r--bemani/iidx.py1
-rw-r--r--bemani/sdvx.py1
-rw-r--r--generate.py47
-rw-r--r--news_feed.py12
-rw-r--r--scrape.py26
-rw-r--r--sega/chuni_jp.py1
6 files changed, 53 insertions, 35 deletions
diff --git a/bemani/iidx.py b/bemani/iidx.py
index 978ecbd..0d97e91 100644
--- a/bemani/iidx.py
+++ b/bemani/iidx.py
@@ -50,6 +50,7 @@ def parse_pinky_crush_news_site(html: str, base_url):
content = re.sub(r'\s*/\s*', '/', content)
news_items.append({
"date": date_str,
+ "identifier": "IIDX_PINKY_CRUSH",
"type": type_map[type_class],
"timestamp": timestamp,
"headline": headline,
diff --git a/bemani/sdvx.py b/bemani/sdvx.py
index 50772e8..83d0d7c 100644
--- a/bemani/sdvx.py
+++ b/bemani/sdvx.py
@@ -36,6 +36,7 @@ def parse_exceed_gear_news_site(html: str, base_url: str):
entries.append({
'date': date_str,
+ 'identifier': 'SOUND_VOLTEX_EXCEED_GEAR',
'type': None,
'timestamp': timestamp,
'headline': headline_text,
diff --git a/generate.py b/generate.py
new file mode 100644
index 0000000..209e924
--- /dev/null
+++ b/generate.py
@@ -0,0 +1,47 @@
+"""
+Generates news JSON files
+"""
+import news_feed as feed
+import constants
+import json
+import os
+
+from datetime import datetime, timedelta
+
+
+OUTPUT_DIR = "news"
+
+def create_merged_feed(*news_lists):
+ merged_feed = []
+ for news_list in news_lists:
+ merged_feed.extend(news_list)
+ cutoff_date = datetime.now() - timedelta(days=constants.DAYS_LIMIT)
+ filtered_feed = [news for news in merged_feed if datetime.fromtimestamp(news['timestamp']) >= cutoff_date]
+ sorted_feed = sorted(filtered_feed, key=lambda x: x['timestamp'], reverse=True)
+ return sorted_feed
+
+def attach_news_meta_data(news_data: list):
+ return {
+ "fetch_time": int(datetime.now().timestamp()),
+ "news_posts": news_data
+ }
+
+if __name__ == "__main__":
+ if not os.path.exists(OUTPUT_DIR):
+ os.makedirs(OUTPUT_DIR)
+
+ iidx_news_data = feed.get_news(constants.IIDX_PINKY_CRUSH_NEWS_SITE)
+ with open(OUTPUT_DIR+'/iidx_news.json', 'w') as json_file:
+ json.dump(attach_news_meta_data(iidx_news_data), json_file)
+
+ sdvx_news_data = feed.get_news(constants.SOUND_VOLTEX_EXCEED_GEAR_NEWS_SITE)
+ with open(OUTPUT_DIR+'/sdvx_news.json', 'w') as json_file:
+ json.dump(attach_news_meta_data(sdvx_news_data), json_file)
+
+ chunithm_jp_news_data = feed.get_news(constants.CHUNITHM_NEWS_SITE, constants.CHUNITHM_VERSION.VERSE)
+ with open(OUTPUT_DIR+'/chunithm_jp_news.json', 'w') as json_file:
+ json.dump(attach_news_meta_data(chunithm_jp_news_data), json_file)
+
+ news = create_merged_feed(iidx_news_data, sdvx_news_data, chunithm_jp_news_data)
+ with open(OUTPUT_DIR+'/news.json', 'w') as json_file:
+ json.dump(attach_news_meta_data(news), json_file)
diff --git a/news_feed.py b/news_feed.py
index 5737cea..1a04e6c 100644
--- a/news_feed.py
+++ b/news_feed.py
@@ -1,7 +1,8 @@
"""
Generic format for a news entry. All keys are considered to be nullable
{
- 'date': JST date of news post
+ 'date': JST date of news post,
+ 'identifier': unique identifier for the game (usually some deriv. of the title),
'type': Type of post if available, otherwise if not provided it will be None (aka Generic news)
'timestamp': Unixtime of date above,
'headline': Headline,
@@ -16,7 +17,6 @@ Generic format for a news entry. All keys are considered to be nullable
"""
from email.utils import parsedate_to_datetime
-from datetime import datetime
from site_scraper import SiteScraper
import bemani.sdvx as sound_voltex
import bemani.iidx as iidx
@@ -25,7 +25,6 @@ import constants
def get_news(news_url: str, version=None) -> list:
scraper = SiteScraper(headless=True)
- news_json = {}
site_data = scraper.get_page_source(news_url)
if news_url == constants.SOUND_VOLTEX_EXCEED_GEAR_NEWS_SITE:
news_posts = sorted(sound_voltex.parse_exceed_gear_news_site(site_data, constants.EAMUSEMENT_BASE_URL), key=lambda x: x['timestamp'], reverse=True)
@@ -37,9 +36,4 @@ def get_news(news_url: str, version=None) -> list:
else:
news_posts = []
scraper.close()
- news_json = {
- "fetch_date": int(datetime.now().timestamp()),
- "posts": news_posts
-
- }
- return news_json
+ return news_posts
diff --git a/scrape.py b/scrape.py
deleted file mode 100644
index 8d1f467..0000000
--- a/scrape.py
+++ /dev/null
@@ -1,26 +0,0 @@
-"""
-Generates news JSON files
-"""
-import news_feed as feed
-import constants
-import json
-import os
-
-
-OUTPUT_DIR = "news"
-
-if __name__ == "__main__":
- if not os.path.exists(OUTPUT_DIR):
- os.makedirs(OUTPUT_DIR)
-
- iidx_news_data = feed.get_news(constants.IIDX_PINKY_CRUSH_NEWS_SITE)
- with open(OUTPUT_DIR+'/iidx_news.json', 'w') as json_file:
- json.dump(iidx_news_data, json_file)
-
- sdvx_news_data = feed.get_news(constants.SOUND_VOLTEX_EXCEED_GEAR_NEWS_SITE)
- with open(OUTPUT_DIR+'/sdvx_news.json', 'w') as json_file:
- json.dump(sdvx_news_data, json_file)
-
- chunithm_jp_news_data = feed.get_news(constants.CHUNITHM_NEWS_SITE, constants.CHUNITHM_VERSION.VERSE)
- with open(OUTPUT_DIR+'/chunithm_jp_news.json', 'w') as json_file:
- json.dump(chunithm_jp_news_data, json_file)
diff --git a/sega/chuni_jp.py b/sega/chuni_jp.py
index df727d4..a45872d 100644
--- a/sega/chuni_jp.py
+++ b/sega/chuni_jp.py
@@ -54,6 +54,7 @@ def parse_chuni_jp_verse_news_site(html: str):
images["image"] = img_tag.get("src")
images["link"] = news_url
news_dict["images"] = images
+ news_dict["identifier"] = "CHUNITHM_JP_VERSE"
news_entries.append(news_dict)
send patches to the email below
yukais@pinapelz.com
include the subject [PATCH repo_name]
pinapelz.com
homepage