diff options
| -rw-r--r-- | fileutil.py | 14 | ||||
| -rw-r--r-- | nijitrack.py | 73 | ||||
| -rw-r--r-- | routes.py | 25 |
3 files changed, 81 insertions, 31 deletions
diff --git a/fileutil.py b/fileutil.py index baf62f0..bccc36d 100644 --- a/fileutil.py +++ b/fileutil.py @@ -2,6 +2,8 @@ import os.path import urllib.request import json import configparser +import requests +import hashlib def _read_file(path: str, lines=True) -> list: @@ -90,3 +92,15 @@ def load_json_file(json_file_path: str) -> dict: return json.load(file) +def compare_file_with_url(file_path, url): + try: + with open(file_path, "rb") as f: + local_hash = hashlib.md5(f.read()).hexdigest() + response = requests.get(url) + response.raise_for_status() + remote_hash = hashlib.md5(response.content).hexdigest() + print(f"HASH RESULT: {file_path} | {url} -> {local_hash == remote_hash}") + return local_hash == remote_hash + except Exception as e: + print(f"Error comparing {file_path} and {url}: {e}") + return False diff --git a/nijitrack.py b/nijitrack.py index 4b2faea..32f9d0b 100644 --- a/nijitrack.py +++ b/nijitrack.py @@ -173,17 +173,76 @@ def uploadFileToBucketR2(filepath: str) -> bool: ) try: with open(filepath, "rb") as f: - s3.Bucket(bucket_name).upload_fileobj(f, filepath) + s3.Bucket(bucket_name).upload_fileobj( + Fileobj=f, + Key=filepath, + ExtraArgs={"ContentType": "application/json"} + ) + print("Successfully uploaded", filepath, "to R2") return True except Exception as e: print("An error occurred while attempting to upload to R2") print(e) return False -def generate_api_routes(bucket_type: BucketType): - import app - api_subscribers = app.api_subscribers() - print(api_subscribers) +def generate_api_routes(check_exists, bucket_type: BucketType, server): + print(check_exists) + remote_api = os.environ.get("API_URL") + def write_file(filename: str, data): + import json + with open(filename, 'w', encoding='utf-8') as file: + json.dump(data, file, ensure_ascii=False, indent=2, default=str) + + def handle_upload(filename: str): + if check_exists and fs.compare_file_with_url(filename, remote_api + "/" + filename): + print("Skipping upload. Data on remote matches for " + filename) + os.remove(filename) + return + if bucket_type == BucketType.B2: + uploadFileToBucketB2(filename) + elif bucket_type == BucketType.R2: + uploadFileToBucketR2(filename) + else: + print("Invalid bucket type specified.") + os.remove(filename) + return False + os.remove(filename) + + import routes + query = f"SELECT name from {DATA_SETTING['TABLE_LIVE']}" + channel_names = [name[0] for name in server.execute_query(query)] + + api_subscribers = routes.get_subscribers_data() + write_file("subscribers.json", api_subscribers) + handle_upload("subscribers.json") + + api_groups = routes.get_group_mappings() + write_file("groups.json", api_groups) + handle_upload("groups.json") + + for name in channel_names: + name_space_removed = name.replace(" ", "") + + api_subscribers_timeseries = routes.get_channel_timeseries(name) + write_file(f"subscribers_{name_space_removed}.json", api_subscribers_timeseries) + handle_upload(f"subscribers_{name_space_removed}.json") + + api_subscribers_7d = routes.get_channel_7d(name) + write_file(f"subscribers_{name_space_removed}_7d.json", api_subscribers_7d) + handle_upload(f"subscribers_{name_space_removed}_7d.json") + + milestones = routes.get_channel_milestones(name) + write_file(f"milestones_{name_space_removed}.json", milestones) + handle_upload(f"milestones_{name_space_removed}.json") + + diffs = routes.get_channel_diffs(name) + write_file(f"diffs_{name_space_removed}.json", diffs) + handle_upload(f"diffs_{name_space_removed}.json") + + info = routes.get_channel_info(name) + write_file(f"info_{name_space_removed}.json", info) + handle_upload(f"info_{name_space_removed}.json") + if __name__ == "__main__": parser = argparse.ArgumentParser(description="NijiTrack - A Subscriber Tracker") @@ -193,6 +252,7 @@ if __name__ == "__main__": parser.add_argument('--uploadGraph', action='store_true', help="Upload graph html to Backblaze B2") parser.add_argument('--uploadRoutes', action='store_true', help="Pre-generate every API route and upload it") parser.add_argument('--ff', action='store_true', help="Force a full refresh of all data (override daily refresh)") + parser.add_argument('--checkExists', action='store_true', help="If json data alrady exists on remote and its the same, skip upload") args = parser.parse_args() server = create_database_connection() initialize_database(server) @@ -228,4 +288,5 @@ if __name__ == "__main__": if args.uploadRoutes: if upstream_bucket is None: print("Tried to upload routes but no remote source has been specified. Skipping....") - generate_api_routes(upstream_bucket) + generate_api_routes(args.checkExists, upstream_bucket, server) + print("DONE! Uploading Static API Routes") @@ -29,31 +29,6 @@ def get_subscribers_data(): } for row in data] return {"timestamp": datetime.datetime.now(), "channel_data": channel_data_list} -def get_twitch_data(): - server = create_database_connection() - query = ''' - SELECT sd.*, h.*, ts.follower_count - FROM subscriber_data sd - INNER JOIN "24h_historical" h ON sd.channel_id = h.channel_id - LEFT JOIN twitch_stats ts ON sd.channel_id = ts.channel_id - ORDER BY sd.subscriber_count DESC - ''' - data = server.execute_query(query) - channel_data_list = [] - for row in data: - youtube_subs = row[4] - twitch_followers = row[-1] if row[-1] is not None else 0 - total_followers = youtube_subs + twitch_followers - channel_data_list.append({ - "channel_name": row[3], - "profile_pic": row[2], - "subscribers": youtube_subs, - "sub_org": row[5], - "twitch_followers": twitch_followers, - "total_sum": total_followers, - }) - return {"timestamp": datetime.datetime.now(), "channel_data": channel_data_list} - def get_channel_timeseries(channel_name): server = create_database_connection() query = "SELECT * FROM subscriber_data_historical WHERE name = %s AND timestamp > %s ORDER BY TO_CHAR(timestamp, 'YYYY-MM-DD')" |
