aboutsummaryrefslogtreecommitdiffstats
path: root/generate.py
diff options
context:
space:
mode:
Diffstat (limited to 'generate.py')
-rw-r--r--generate.py43
1 files changed, 27 insertions, 16 deletions
diff --git a/generate.py b/generate.py
index 194b909..afb96a8 100644
--- a/generate.py
+++ b/generate.py
@@ -7,6 +7,7 @@ from ast import Constant
import news_feed as feed
import constants
import json
+import hashlib
import os
from datetime import datetime, timedelta
@@ -14,18 +15,24 @@ from datetime import datetime, timedelta
OUTPUT_DIR = "news"
+def compute_json_hash(data):
+ return hashlib.sha256(json.dumps(data, sort_keys=True).encode('utf-8')).hexdigest()
+
+
def create_merged_feed(*news_lists, limit=constants.DAYS_LIMIT):
"""
- Merge multiple news feeds into a singular one
- limit = maximum number of days old to be included in the merged feed
+ Generator-based memory-efficient merging of multiple news feeds.
+ Only includes news newer than `limit` days.
"""
- merged_feed = []
- for news_list in news_lists:
- merged_feed.extend(news_list)
- cutoff_date = datetime.now() - timedelta(days=limit)
- filtered_feed = [news for news in merged_feed if datetime.fromtimestamp(news['timestamp']) >= cutoff_date]
- sorted_feed = sorted(filtered_feed, key=lambda x: x['timestamp'], reverse=True)
- return sorted_feed
+ cutoff = datetime.now() - timedelta(days=limit)
+ recent_items = (
+ item
+ for news_list in news_lists
+ for item in news_list
+ if datetime.fromtimestamp(item['timestamp']) >= cutoff
+ )
+ return sorted(recent_items, key=lambda x: x['timestamp'], reverse=True)
+
def attach_news_meta_data(news_data: list):
"""
@@ -37,6 +44,7 @@ def attach_news_meta_data(news_data: list):
"news_posts": news_data
}
+
def log_output(message: str, type: str="DEBUG"):
"""
Prints a log line output with a timestamp
@@ -44,21 +52,24 @@ def log_output(message: str, type: str="DEBUG"):
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
print(f"[{timestamp}] [{type}]: {message}")
+
def generate_news_file(filename, url, version=None):
log_output(f"Fetching {filename.upper()} News Data", "NEWS")
news_data = feed.get_news(url, version) if version else feed.get_news(url)
- if len(news_data) != 0:
+ path = f"{OUTPUT_DIR}/{filename}.json"
+ if news_data:
log_output(f"Success. Got {filename.upper()} News Data. Saving to file...", "NEWS")
- with open(f"{OUTPUT_DIR}/{filename}.json", 'w') as json_file:
- json.dump(attach_news_meta_data(news_data), json_file)
- elif os.path.exists(f"{OUTPUT_DIR}/{filename}.json"):
- print(f"Failed. Couldn't fetch {filename.upper()} data. Using previously scraped data", "NEWS")
- with open(f"{OUTPUT_DIR}/{filename}.json", 'r') as json_file:
+ with open(path, 'w') as f:
+ json.dump(attach_news_meta_data(news_data), f, indent=2)
+ elif os.path.exists(path):
+ log_output(f"Failed. Couldn't fetch {filename.upper()} data. Using previously scraped data", "NEWS")
+ with open(path, 'r') as json_file:
news_data = json.load(json_file)['news_posts']
else:
- print(f"Failed. Couldn't fetch {filename.upper()} data. Skipping...", "NEWS")
+ log_output(f"Failed. Couldn't fetch {filename.upper()} data. Skipping...", "NEWS")
return news_data
+
# For e-amusement games you can choose to pull from a specific implementation of the scraper or the generic feed provided
# by the e-amusement app. Information is different
def generate_iidx_news_file(eamuse_feed: bool=False):
send patches to the email below
yukais@pinapelz.com
include the subject [PATCH repo_name]
pinapelz.com
homepage