From 70237b5a5d82e8425eb5870a975bde497a6def08 Mon Sep 17 00:00:00 2001 From: Pinapelz Date: Tue, 20 Jun 2023 02:14:34 -0700 Subject: Refactored entire codebase - Replaced most code with HTML templates - Fixed some janky SQL queries - Replaced config.py with ini and json --- .gitignore | 163 +++++++++++++++++++- config.ini | 31 ++++ config.json | 7 - config.py | 54 ------- data/exclude_channel.txt | 28 ++++ data/last_refresh.txt | 2 +- decorators.py | 16 ++ fileutil.py | 81 +++++----- html_builders/builder.py | 109 ++++++++++++++ html_builders/elements.py | 91 +++++++++++ html_builders/graphs.py | 32 ++++ html_builders/tables.py | 72 +++++++++ html_gen/html_gen.py | 375 ---------------------------------------------- main.py | 168 --------------------- nijitrack.py | 93 ++++++++++++ sql/graph_builder.py | 128 ---------------- sql/sql_handler.py | 76 ++++++---- sql/table_builder.py | 252 ------------------------------- sql_table_config.json | 7 + webapi/holodex.py | 66 ++++---- 20 files changed, 775 insertions(+), 1076 deletions(-) create mode 100644 config.ini delete mode 100644 config.json delete mode 100644 config.py create mode 100644 decorators.py create mode 100644 html_builders/builder.py create mode 100644 html_builders/elements.py create mode 100644 html_builders/graphs.py create mode 100644 html_builders/tables.py delete mode 100644 html_gen/html_gen.py delete mode 100644 main.py create mode 100644 nijitrack.py delete mode 100644 sql/graph_builder.py delete mode 100644 sql/table_builder.py create mode 100644 sql_table_config.json diff --git a/.gitignore b/.gitignore index 79dfcc8..2dcdbd2 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,165 @@ config.json .venv .github main.py -.idea \ No newline at end of file +.idea + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ diff --git a/config.ini b/config.ini new file mode 100644 index 0000000..17217c1 --- /dev/null +++ b/config.ini @@ -0,0 +1,31 @@ +[SQL] +; Database connection details +host = localhost +user = root +password = +database = Nijitrack_Dev + +[TABLES] +; Names of the tables that should be created in the database + +; Current subscriber data +live = subscriber_data +historical = subscriber_data_historical +daily = 24h_historical + +[API] +holodex = HOLODEXAPI + +[PATH] +root_html = stats + +[WEBSITE] +; Website details +homepage = https://nijitracker.com +title = Nijitracker +description = A site that tracks the historical subscriber data for Nijisanji affiliated livers +icon = https://raw.githubusercontent.com/pinapelz/NijiTrack/master/assets/icon.png +timezone = PST +footer_message = This is a demo of Nijitrack, a way to track historical subscriber data for any set of channels on YouTube.
This webpage is not affiliated with ANYCOLOR or any of the channels listed here in any way
Date Started: 2023-03-26 + + diff --git a/config.json b/config.json deleted file mode 100644 index 63c1ac9..0000000 --- a/config.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "address": "127.0.0.1", - "user": "root", - "password": "", - "youtube_api_key": "key", - "holodex_api_key": "key" -} diff --git a/config.py b/config.py deleted file mode 100644 index 3b2404c..0000000 --- a/config.py +++ /dev/null @@ -1,54 +0,0 @@ -""" -This file contains all the constants used in the main.py file -""" - -""" -0 = Holodex -1 = YouTube API -""" -MODE = 1 - -# General database configurations -DB_NAME = "pinapelz$nijitrack" -TABLE_NAME = "subscriber_data" -TABLE_COLUMNS = "id INT PRIMARY KEY AUTO_INCREMENT, channel_id VARCHAR(255), profile_pic VARCHAR(255), name VARCHAR(255), subscriber_count INT, timestamp DATETIME" -HISTORICAL_TABLE_NAME = "subscriber_data_historical" -DAY_DIFF_TABLE_NAME = "24h_historical" # Stores the difference in subscriber count for each channel in the past 24 hours -DIFF_COLUMNS = "id INT PRIMARY KEY AUTO_INCREMENT, channel_id VARCHAR(255), sub_diff INT" # Stores the difference in subscriber count for each channel in the past 24 hours -table_columns = "channel_id, profile_pic, name, subscriber_count, timestamp" -diff_columns = "channel_id, sub_diff" - -HEADER_TITLE = "Nijitracker" # Shows at the top of the page - -# Message at the bottom of the webpage -FOOTER_MESSAGE = """ -This is a demo of Nijitrack, a way to track historical subscriber data for any set of channels on YouTube.
-This webpage is not affiliated with ANYCOLOR or any of the channels listed here in any way
-Date Started: 2023-03-26 -""" - -# (Optional) for things like Discord embeds or Twitter cards -META_DATA_TITLE = "Nijitracker" -META_DATA_DESCRIPTION = "A site that tracks the historical subscriber data for Nijisanji affiliated livers (demo)" -META_PROFILE_PIC = "https://raw.githubusercontent.com/pinapelz/NijiTrack/master/assets/icon.png" - - -COLOR_THEME = "2a4b71" -ROOT_URL = "https://nijitracker.com" -TIMEZONE = "PST" -TIMEZONE_STR = "Pacific Standard Time" - -ROOT_STORAGE_PATH = "" - -# Skip the options below if you are not using Holodex -# Organization names must match the names on Holodex (case sensitive) -HOLODEX_ORG = "Nijisanji,Hololive" # Can be a comma separated list of organizations -ORG_MEMBER_COUNT = 300 -UPDATE_LOCAL_RECORDS = True # If true, will update the local records of the channels.txt and exclude_channels.txt file - -# URL for pulling the channels.txt and exclude_channels.txt file from a URL (should be raw txt form) -# Provide link to the directory/base url that contains the channels.txt and exclude_channels.txt files -EXTERNAL_DATA_URL = "https://raw.githubusercontent.com/pinapelz/NijiTrack/pettan-track-data/data/" # You'll need to manually call this -REFRESH_DAILY = False # If true, will refresh the data from the external data url every day - - diff --git a/data/exclude_channel.txt b/data/exclude_channel.txt index 0756eee..d0d5b11 100644 --- a/data/exclude_channel.txt +++ b/data/exclude_channel.txt @@ -26,6 +26,34 @@ UCKQi12nOGZsJ5nOuCTHErmA UCSUZugdxy9Wcrkp781cFt1w UCTi_rzf5QIkXjhJjkbcAdTg UCpfjQCCavrO-rnKaAaIF9dg +UCX7YkU9nEeaoZbkVLVajcMg +UCwi4P78SVunSYAGrvC9aKcw +UCNRh9kkByBTYLo0IJupnAug +C4Jyg9gFStHO8r5n4ya7XCQ +UCStzBFfjwFYb0qSYHnzFZvw +UCrhf6HYKnV6bxyaB6ooJzbw +UCfki3lMEF6SGBFiFfo9kvUA +UCtHFXfrn52juTqGBN4WbMVw +UCbLgcjfsUaCUgJh9SVit8kw +UC_D2DNy-KUNQJ_NGMppgmyg +UCxWcO9CLti4uouUIS5IIF-Q +UC4Jyg9gFStHO8r5n4ya7XCQ +UCz6vnIbgiqFT9xUcD6Bp65Q +UC-JSeFfovhNsEhftt1WHMvg +UC0PwyIlUefx1LGWjFI0QjMg +UCRzHROJUp7Wg900p_fXtJtQ +UC2NtHPaDUA5htYsjVrO8Gng +UCLSzgV37Dt24T8p3-TNiSLg +UC8vZcu6W-EJ6UYEy_C31c-A +UCh00mQw8BCrTbchYWGz4JTQ +UCWwwXXd_RPzj3LAyEfBslsg +UCINqoksO3CQPCt8a-mRe-Ew +UCF1JdALrXgub24weQpqDy9Q +UCvQIBipkXlXXVdGga9sn8dw +UCKQi12nOGZsJ5nOuCTHErmA +UCSUZugdxy9Wcrkp781cFt1w +UCTi_rzf5QIkXjhJjkbcAdTg +UCpfjQCCavrO-rnKaAaIF9dg UC0lik9pHju6ONgkBh7N5wHw UC1ZV7KBscK0EMoJKFu1DnDg UC69URn8iP4u8D_zUp-IJ1sg diff --git a/data/last_refresh.txt b/data/last_refresh.txt index 9552d2a..92719ce 100644 --- a/data/last_refresh.txt +++ b/data/last_refresh.txt @@ -1 +1 @@ -2023-04-06 \ No newline at end of file +2023-06-20 \ No newline at end of file diff --git a/decorators.py b/decorators.py new file mode 100644 index 0000000..bc0b420 --- /dev/null +++ b/decorators.py @@ -0,0 +1,16 @@ +import time + + +def log(message: str): + def decorator(func): + def wrapper(*args, **kwargs): + print("TASK: " + message) + start = time.time() + result = func(*args, **kwargs) + end = time.time() + print(f"COMPLETE: {message} {round(end - start, 3)} seconds") + return result + + return wrapper + + return decorator diff --git a/fileutil.py b/fileutil.py index 6cc1351..b8f6015 100644 --- a/fileutil.py +++ b/fileutil.py @@ -2,6 +2,7 @@ import os.path import urllib.request import json import time +import configparser def _read_file(path: str, lines=True) -> list: @@ -12,29 +13,11 @@ def _read_file(path: str, lines=True) -> list: return file.read().splitlines() -def get_login_data(path="config.json"): - # gets login data from config.json - data = json.loads(_read_file(os.path.join(path), lines=False)) - try: - return data["address"], data["user"], data["password"] - except KeyError: - raise Exception("Login data not found") - - -def get_api_key(api: str, path="config.json"): - # gets api key from config.json - data = json.loads(_read_file(os.path.join(path), lines=False)) - try: - return data[api] - except KeyError: - raise Exception(f"API key for {api} not found") - - def get_excluded_channels(): # gets excluded channels from exclude_channel.txt - if not os.path.exists(os.path.join("data","exclude_channel.txt")): - open(os.path.join("data","exclude_channel.txt"), "w").close() - excluded_channels = _read_file(os.path.join("data","exclude_channel.txt")) + if not os.path.exists(os.path.join("data", "exclude_channel.txt")): + open(os.path.join("data", "exclude_channel.txt"), "w").close() + excluded_channels = _read_file(os.path.join("data", "exclude_channel.txt")) return excluded_channels @@ -49,10 +32,11 @@ def save_local_channels(data: list, path: str = "data"): open(path, "w").close() with open(path, "w", encoding="utf-8") as file: for channel in data: - if channel['id'] in excluded_channels: + if channel["id"] in excluded_channels: continue file.write(f"{channel['id']},{channel['english_name']}\n") + def get_local_channels(path: str = "data"): """ Get the channel names and ids locally for when the API is down @@ -61,28 +45,57 @@ def get_local_channels(path: str = "data"): if not os.path.exists(path): raise Exception("Local channel data not found") with open(path, "r", encoding="utf-8") as file: - rows = file.read().splitlines() + rows = file.read().splitlines() return [tuple(row.split(",")) for row in rows] + def check_diff_refresh(): - if not os.path.exists(os.path.join("data","last_refresh.txt")): - with open(os.path.join("data","last_refresh.txt"), "w", encoding="utf-8") as file: + if not os.path.exists(os.path.join("data", "last_refresh.txt")): + with open( + os.path.join("data", "last_refresh.txt"), "w", encoding="utf-8" + ) as file: file.write(time.strftime("%Y-%m-%d")) return True - with open(os.path.join("data","last_refresh.txt"), "r", encoding="utf-8") as file: + with open(os.path.join("data", "last_refresh.txt"), "r", encoding="utf-8") as file: last_refresh = file.read() if last_refresh != time.strftime("%Y-%m-%d"): - with open(os.path.join("data","last_refresh.txt"), "w", encoding="utf-8") as file: + with open( + os.path.join("data", "last_refresh.txt"), "w", encoding="utf-8" + ) as file: file.write(time.strftime("%Y-%m-%d")) return True + def update_data_files(url: str) -> None: # Updates the local txt channel data stored in data folder - if not os.path.exists(os.path.join("data","channels.txt")): - open(os.path.join("data","channels.txt"), "w").close() - urllib.request.urlretrieve(url+"channels.txt", os.path.join("data","channels.txt")) + if not os.path.exists(os.path.join("data", "channels.txt")): + open(os.path.join("data", "channels.txt"), "w").close() + urllib.request.urlretrieve( + url + "channels.txt", os.path.join("data", "channels.txt") + ) # downloaded txt file from url and write to channels.txt - - if not os.path.exists(os.path.join("data","exclude_channel.txt")): - open(os.path.join("data","exclude_channel.txt"), "w").close() - urllib.request.urlretrieve(url+"exclude_channel.txt", os.path.join("data","exclude_channel.txt")) + + if not os.path.exists(os.path.join("data", "exclude_channel.txt")): + open(os.path.join("data", "exclude_channel.txt"), "w").close() + urllib.request.urlretrieve( + url + "exclude_channel.txt", os.path.join("data", "exclude_channel.txt") + ) + + +def load_config(ini_filepath: str) -> dict: + config_object = configparser.ConfigParser() + file = open(ini_filepath, "r") + config_object.read_file(file) + output_dict = {} + sections = config_object.sections() + for section in sections: + output_dict[section] = {} + for key in config_object[section]: + output_dict[section][key] = config_object[section][key] + return output_dict + +def load_json_file(json_file_path: str) -> dict: + with open(json_file_path, "r", encoding="utf-8") as file: + return json.load(file) + + diff --git a/html_builders/builder.py b/html_builders/builder.py new file mode 100644 index 0000000..dea2e8c --- /dev/null +++ b/html_builders/builder.py @@ -0,0 +1,109 @@ +from jinja2 import Environment, FileSystemLoader +import html_builders.elements as elements +import html_builders.graphs as graphs +import html_builders.tables as tables +import os +import time + +MENU_ITEMS = [ + ("Nijitracker", "https://www.nijitracker.com"), + ("Pettantracker", "https://nijitracker.com/pettantrack") +] + + +def build_ranking_page(server, CONFIG: dict, exclude_channels: list = []): + page_path = os.path.join(CONFIG["PATH"]["root_html"], "index.html") + if not os.path.exists(page_path): + os.makedirs(os.path.dirname(page_path), exist_ok=True) + file_loader = FileSystemLoader("templates") + env = Environment(loader=file_loader) + template = env.get_template("ranking.html") + + input_dict = { + "meta_image_url": CONFIG["WEBSITE"]["icon"], + "meta_description": CONFIG["WEBSITE"]["description"], + "meta_title": CONFIG["WEBSITE"]["title"], + "title_banner": elements.build_title_banner( + CONFIG["WEBSITE"]["title"], + MENU_ITEMS + ), + "ranking_graph": graphs.plot_subscriber_count_over_time(server, CONFIG["TABLES"]["historical"], exclude_channels=exclude_channels), + "divider": "Last Updated: " + time.strftime('%Y-%m-%d %H:%M:%S') + " " + CONFIG["WEBSITE"]["timezone"], + "ranking_table": tables.generate_html_table(server, CONFIG["TABLES"]["live"], CONFIG["TABLES"]["daily"]), + "footer": CONFIG["WEBSITE"]["footer_message"] + } + output = template.render(input_dict) + with open(page_path, "w", encoding="utf-8") as f: + f.write(output) + +def build_individual_page(server, CONFIG: dict, channel_data: str): + def transform_sql_string(string: str) -> str: + return string.encode("ascii", "ignore").decode().replace("'", "''") + channel_id = channel_data["id"] + desc = channel_data["description"] + pfp = channel_data["photo"] + sub_count = channel_data["subscriber_count"] + channel_name = channel_data["english_name"] + if channel_name is None: + channel_name = channel_data["name"] + channel_name = transform_sql_string(channel_name) + sub_count_str = "{:,.0f}".format(int(sub_count)) + page_path = os.path.join(CONFIG["PATH"]["root_html"], channel_name + ".html") + file_loader = FileSystemLoader("templates") + env = Environment(loader=file_loader) + template = env.get_template("individual.html") + input_dict = { + "meta_image_url": CONFIG["WEBSITE"]["icon"], + "meta_description": CONFIG["WEBSITE"]["description"], + "meta_title": channel_name + " - " + CONFIG["WEBSITE"]["title"], + "homepage_url": CONFIG["WEBSITE"]["homepage"], + "sub_text": sub_count_str + " Subscribers", + "name": channel_name, + "profile_pic": pfp, + "description": desc, + "channel_id": channel_id, + "projection_card": elements.build_projection_card(server, CONFIG["TABLES"]["historical"], int(sub_count), channel_name, timezone = CONFIG["WEBSITE"]["timezone"]), + "subscriber_trend": graphs.plot_subscriber_count_over_time(server, CONFIG["TABLES"]["historical"], gtitle = "Subscriber Count Over Time for " + channel_name, overrideQuery = f"SELECT name, subscriber_count, timestamp, channel_id FROM {CONFIG['TABLES']['historical']} WHERE channel_id = '{channel_id}' ORDER by timestamp DESC", markers = "lines+markers"), + "divider": "Recent Subscriber Data:", + "weekly_trend": graphs.plot_subscriber_count_over_time(server, CONFIG["TABLES"]["historical"], gtitle = "Weekly Subscriber Count Over Time for " + channel_name, overrideQuery = f"SELECT name, subscriber_count, timestamp, channel_id FROM {CONFIG['TABLES']['historical']} WHERE channel_id = '{channel_id}' AND timestamp >= DATE_SUB(NOW(), INTERVAL 7 DAY) ORDER by timestamp DESC", markers = "lines+markers"), + "full_table_url": CONFIG["WEBSITE"]["homepage"] + "/tables/"+channel_name + } + output = template.render(input_dict) + with open(page_path, "w", encoding="utf-8") as f: + f.write(output) + +def build_table_page(server, CONFIG: dict, channel_data: str): + def transform_sql_string(string: str) -> str: + return string.encode("ascii", "ignore").decode().replace("'", "''") + channel_id = channel_data["id"] + desc = channel_data["description"] + pfp = channel_data["photo"] + sub_count = channel_data["subscriber_count"] + channel_name = channel_data["english_name"] + if channel_name is None: + channel_name = channel_data["name"] + channel_name = transform_sql_string(channel_name) + + file_loader = FileSystemLoader("templates") + env = Environment(loader=file_loader) + template = env.get_template("full_table.html") + + input_dict = { + "meta_image_url": CONFIG["WEBSITE"]["icon"], + "meta_description": CONFIG["WEBSITE"]["description"], + "meta_title": channel_name + " - " + CONFIG["WEBSITE"]["title"], + "homepage_url": CONFIG["WEBSITE"]["homepage"], + "sub_text": channel_name, + "full_table": tables.generate_individual_table(server, CONFIG["TABLES"]["historical"], channel_name), + "name": channel_name, + "profile_pic": pfp, + "description": desc, + "channel_id": channel_id, + } + output = template.render(input_dict) + page_path = os.path.join("tables", channel_name + ".html") + with open(page_path, "w", encoding="utf-8") as f: + f.write(output) + + + \ No newline at end of file diff --git a/html_builders/elements.py b/html_builders/elements.py new file mode 100644 index 0000000..41c8525 --- /dev/null +++ b/html_builders/elements.py @@ -0,0 +1,91 @@ +import math +import pandas as pd +import warnings +from datetime import datetime, timedelta + +def build_title_banner(text: str, menu_items): + banner_html = f""" + + " + return banner_html + +def build_projection_card(server, table_name, curr_subscribers, channel_name, + timezone = "Pacific Standard Time"): + def get_next_milestone(subscriber_count): + num_digits = len(str(subscriber_count)) + if num_digits <= 4: + milestone_interval = 10000 + else: + milestone_interval = 10 ** (num_digits - 1) + next_milestone = math.ceil( + subscriber_count / milestone_interval) * milestone_interval + + return next_milestone + + warnings.filterwarnings('ignore') # Ignore pandas warning regarding pyodbc + def create_milestone_card(time_until_milestone, next_milestone, not_enough_data = False, + declining = False): + now = datetime.now() + milestone_date = ( + now + timedelta(seconds = time_until_milestone)).strftime('%Y-%m-%d') + relative_time = now + timedelta(seconds = time_until_milestone) - now + next_milestone_str = "{:,}".format(next_milestone) + if relative_time.days > 1: + relative_time_str = f"In {relative_time.days} days" + elif relative_time.days == 1: + relative_time_str = f"In {relative_time.days} day" + elif relative_time.days < 0: + relative_time_str = f"{-relative_time.days} days ago" + elif not_enough_data: + relative_time_str = "Not enough data" + elif declining: + relative_time_str = "Declining" + else: + relative_time_str = "Today" + card = f""" +
+
Next Milestone
+
{next_milestone_str}
+
+
Estimated Date:
+
{milestone_date}
+
+
+
{relative_time_str}
+
{timezone}
+
+
+
+ """ + return card + query = f"SELECT name, subscriber_count, timestamp FROM {table_name} WHERE timestamp >= DATE_SUB(CURRENT_DATE(), INTERVAL 30 DAY) AND name = \"{channel_name}\" ORDER BY timestamp DESC" + df = pd.read_sql_query(query, server.get_connection()) + df = df.sort_values(by = 'timestamp') + # get the rate of change from first data point to last data point + first_data = df.iloc[0] + last_data = df.iloc[-1] + delta_sub_count = last_data['subscriber_count'] - first_data['subscriber_count'] + delta_time = (last_data['timestamp'] - first_data['timestamp']).total_seconds() + + # Calculate the average rate of change of subscriber_count over time + avg_rate_of_change = delta_sub_count / delta_time + next_milestone = get_next_milestone(curr_subscribers) + if avg_rate_of_change == 0 or math.isnan(avg_rate_of_change) or math.isinf(avg_rate_of_change): + return create_milestone_card(0, next_milestone, not_enough_data = True) + if avg_rate_of_change < 0: + return create_milestone_card(0, next_milestone, declining = True) + time_to_next_milestone = ( + next_milestone - curr_subscribers) / avg_rate_of_change + return create_milestone_card(time_to_next_milestone, next_milestone) + + + diff --git a/html_builders/graphs.py b/html_builders/graphs.py new file mode 100644 index 0000000..2770880 --- /dev/null +++ b/html_builders/graphs.py @@ -0,0 +1,32 @@ +import plotly.graph_objs as go +import pandas as pd +import warnings +import math +from datetime import datetime, timedelta +import numpy as np +import mysql.connector +from mysql.connector import errorcode + +def plot_subscriber_count_over_time(server, table_name, gtitle = "Subscriber Count Over Time for Nijisanji Members", + overrideQuery = None, markers = "lines", exclude_channels = []): + warnings.filterwarnings('ignore') # Ignore pandas warning regarding pyodbc + query = f"SELECT name, subscriber_count, timestamp, channel_id FROM {table_name} ORDER by timestamp DESC" if overrideQuery is None else overrideQuery + df = pd.read_sql_query(query, server.get_connection()) + groups = df.groupby("name") + fig = go.Figure() + config = dict({'responsive': True, 'displaylogo': False, 'modeBarButtonsToAdd': ['pan2d', 'zoomIn2d', 'zoomOut2d']}) + for channel, group in groups: + if len(exclude_channels) != 0 and group['channel_id'].iloc[0] in exclude_channels: + continue + fig.add_trace(go.Scattergl( + x = group["timestamp"], y = group["subscriber_count"], name = channel, mode = markers, + showlegend = True)) + fig.update_layout( + title = {'text': gtitle, 'x': 0.5, 'xanchor': 'center', + 'yanchor': 'top', 'font': {'family': 'Arial', 'size': 30}}, + xaxis_title = "Timestamp", + yaxis_title = "Subscribers", + legend = dict(font = dict(size = 16), title = dict(text = "Channels")), + height = 950, + ) + return fig.to_html(config = config) \ No newline at end of file diff --git a/html_builders/tables.py b/html_builders/tables.py new file mode 100644 index 0000000..87d2b4a --- /dev/null +++ b/html_builders/tables.py @@ -0,0 +1,72 @@ +def generate_html_table(server, table, diff_table, headers=["Rank", "Liver", "Subscriber", "Difference (24hr)"], root_url="https://www.nijitracker.com"): + def get_daily_difference_subs(sub_count_str: str): + diff_cursor = server.get_connection().cursor() + diff_cursor.execute(query) + diff_data = diff_cursor.fetchall() + old_sub_count = int(diff_data[0][0]) + current_sub_count = int(sub_count_str) + if old_sub_count > current_sub_count: + return f"-{old_sub_count - current_sub_count}" + else: + return f"+{current_sub_count - old_sub_count}" + cursor = server.get_connection().cursor() + query = f"SELECT id, channel_id, name, subscriber_count, timestamp, profile_pic FROM {table} ORDER by subscriber_count DESC" + cursor.execute(query) + data = cursor.fetchall() + header = "" + for h in headers: + header += f"{h}" + header += "" + table_html = "" + table_html += header + rank = 1 + for row in data: + table_row = "" + table_row += f"" + rank += 1 + for i, col in enumerate(row): + if cursor.description[i][0] == "name": + channel_url = f"{root_url}/stats/{row[2]}" + profile_pic_url = row[5] + table_row += f"" + elif cursor.description[i][0] == "subscriber_count": + formatted_sub_count = "{:,.0f}".format(int(col)) + table_row += f"" + elif cursor.description[i][0] == "timestamp": + query = f"SELECT sub_diff FROM {diff_table} WHERE channel_id = '{row[1]}'" + try: + table_row += f"" + except IndexError: + raise Exception("Are you trying to use a new set of channels?\nPlease delete last_refresh.txt in data folder first!") + elif cursor.description[i][0] not in ["id", "channel_id", "profile_pic"]: + table_row += f"" + table_row += "" + table_html += table_row + table_html += "
{rank}{col}{formatted_sub_count}{get_daily_difference_subs(row[3])}{col}
" + cursor.close() + return table_html + +def generate_individual_table(server, table_name, channel_name, param="LIMIT 7"): + cursor = server.get_connection().cursor() + query = f"SELECT subscriber_count, timestamp FROM {table_name} WHERE name=\"{channel_name}\" GROUP BY DATE(timestamp) ORDER by timestamp DESC " + param + cursor.execute(query) + data = cursor.fetchall() + table = "" + header = "" + header_cols = ["Subscribers", "Timestamp"] + for col in header_cols: + header += f"" + header += "" + table += header + for row in data: + table += "" + for i, col in enumerate(row): + if cursor.description[i][0] == "subscriber_count": + formatted_sub_count = "{:,.0f}".format(int(col)) + table += f"" + else: + table += f"" + table += "" + table += "
{col}
{formatted_sub_count}{col}
" + cursor.close() + return table \ No newline at end of file diff --git a/html_gen/html_gen.py b/html_gen/html_gen.py deleted file mode 100644 index 10042d8..0000000 --- a/html_gen/html_gen.py +++ /dev/null @@ -1,375 +0,0 @@ - -def generate_html_divider(label_text): - divider = '
' - divider += '
' - divider += f'{label_text}' - divider += '
' - return divider - - -def generate_title_banner(title_text, color_theme="2a4b71"): - banner_html = f""" - - - - """ - return banner_html - - - - -def generate_meta_data(title: str, description: str, image_url: str): - return f""" - - - - - {title} - - - - - - - - - - - - - - - - - - - - - - - - """ - -def build_footer_info(info: str): - footer = f""" -
-
- Information -
-
-

Data for this site is taken every hour. Due to limitation with YouTube's API only certain increments in subscriber counts will be reflected based on each liver's channel size

-

Each liver has their own page to track their own individual achievements and subscriber count. The full table of all recorded daily data for each channel is also viewable

-
-
- A Note on Graduating Livers -
-
-

If a liver is determined to be graduating, retiring, or terminated, Nijitracker will stop tracking their subscriber count, and their page and entry will be removed

-

Historical data is still retrievable through the Nijitracker API api.nijitracker.com -

-
-
-

{info}

-
-
- - License - -
-
- """ - return footer - -def side_swipe_header(text: str, color_theme="007ACC", url: str="https://nijitracker.com"): - banner_html = f""" - - - - - """ - return banner_html - - - -def generate_info_card(name, youtube_channel_id, profile_pic, description): - # Build the HTML string - html = f""" -
- -
-
- {name} -
-
-

{name}

-

{description}

- YouTube Channel -
-
- """ - # Return the HTML string - return html - - -def generate_subscriber_info_card(subscriber_count): - formatted_sub_count = "{:,.0f}".format(int(subscriber_count)) - html_template = f''' -
-
-

{formatted_sub_count}

-

Subscribers

-
-
- ''' - return html_template - -def generate_full_table_button(url): - button_html = """ - -
- - """ - button_html += f"""View Full Table""" - return button_html - -def generate_doctype_footer(): - return """ - - - """ \ No newline at end of file diff --git a/main.py b/main.py deleted file mode 100644 index 702eb9f..0000000 --- a/main.py +++ /dev/null @@ -1,168 +0,0 @@ -import os -import sys -import time - -import sql.graph_builder as gb -import sql.table_builder as tb -import fileutil as fs -import html_gen.html_gen as ht -from sql.sql_handler import SQLHandler -from webapi.holodex import HolodexAPI -from webapi.youtube import YouTubeAPI -from config import * - -def update_database(server: SQLHandler, data): - server.clear_table(TABLE_NAME) - server.reset_auto_increment(TABLE_NAME) - excluded_channels = fs.get_excluded_channels() - refresh_diff_table = fs.check_diff_refresh() - for channel in data: - channel_id = channel['id'] - if channel_id in excluded_channels: - continue - pfp = channel['photo'] - name = channel['english_name'] - sub_count = channel['subscriber_count'] - - if name is None: # if the channel doesn't have an english name, use the japanese name - name = channel['name'] - name = name.encode("ascii", "ignore").decode() - - data_row = f"'{channel_id}', '{pfp}','{name}', {sub_count}, '{time.strftime('%Y-%m-%d %H:%M:%S')}'" - server.create_table(DAY_DIFF_TABLE_NAME, DIFF_COLUMNS) - - # Difference should only be calculated every 24 hours - # If the channel is new then calculate now, else then make sure 24 hours has passed since last reading - if refresh_diff_table or not server.check_row_exists(DAY_DIFF_TABLE_NAME, "channel_id", channel_id): - if REFRESH_DAILY: - generate_channel_files() - if not server.check_row_exists(DAY_DIFF_TABLE_NAME, "channel_id", channel_id): - server.insert_data(DAY_DIFF_TABLE_NAME, - diff_columns, f"'{channel_id}', {sub_count}") - else: - server.update_row(DAY_DIFF_TABLE_NAME, "channel_id", channel_id, "sub_diff", sub_count) - server.insert_data(HISTORICAL_TABLE_NAME, table_columns, data_row) - - # make updates to the main table - server.insert_data(TABLE_NAME, table_columns, data_row) - name_table = "channel_"+channel_id.lower()+"_subscriber_data" - name_table = name_table.replace("-", "$") - server.create_table(name_table, TABLE_COLUMNS) - server.insert_data(name_table, table_columns, data_row) - # server.drop_table(name_table) - -def generate_individual_stats(server: SQLHandler, data): - for channel in data: - channel_id = channel['id'] - if channel_id in fs.get_excluded_channels(): - continue - name = channel['english_name'] - desc = channel['description'] - pfp = channel['photo'] - sub_count = channel['subscriber_count'] - sub_count_str = "{:,.0f}".format(int(sub_count)) - if name is None: - name = channel['name'] - name = name.encode("ascii", "ignore").decode() - print("GENERATING PAGE FOR "+name+"...") - - # Calculate table key name stored in the database - table_key = "channel_"+channel_id.lower()+"_subscriber_data" - table_key = table_key.replace("-", "$") - - # Generate missing directories - if not os.path.exists("stats"): - os.mkdir("stats") - if not os.path.exists("tables"): - os.mkdir("tables") - - with open(ROOT_STORAGE_PATH+"stats/"+name+".html", "w", encoding="utf-8") as file: - range_query = f"SELECT name, subscriber_count, timestamp FROM {table_key} WHERE timestamp >= DATE_SUB(CURRENT_DATE(), INTERVAL 7 DAY) ORDER BY timestamp DESC" - group_query = f"SELECT name, subscriber_count, timestamp FROM {table_key} GROUP BY DATE(timestamp) ORDER BY timestamp DESC" - file.write(ht.generate_meta_data(name + " - "+ META_DATA_TITLE, META_DATA_DESCRIPTION, META_PROFILE_PIC)+"\n"+ - ht.side_swipe_header(sub_count_str + " Subscribers", COLOR_THEME, ROOT_URL)+"\n"+ - ht.generate_info_card(name, channel_id, pfp, desc)+ - gb.generate_projection(server.host_name, server.username, server.password, DB_NAME, table_key, int(sub_count), TIMEZONE_STR)+ - gb.plot_subscriber_count_over_time(server.host_name, server.username, server.password, DB_NAME, table_name=table_key, gtitle=name+" Subscriber Trend", overrideQuery=group_query) + - ht.generate_html_divider("Recent Subscriber Data:") + - gb.plot_subscriber_count_over_time(server.host_name, server.username, server.password, DB_NAME, table_name=table_key, gtitle=name+" Last 7 Days Trend", overrideQuery=range_query, markers="lines+markers") + - tb.generate_individual_table(server.host_name, server.username, server.password, DB_NAME, table_key)+ - ht.generate_full_table_button(ROOT_URL+"/tables/"+name) + - ht.generate_doctype_footer() - ) - with open(ROOT_STORAGE_PATH+"tables/"+name+".html", "w", encoding="utf-8") as file: - file.write(ht.generate_meta_data(name + " - "+ META_DATA_TITLE, META_DATA_DESCRIPTION, META_PROFILE_PIC)+"\n"+ - ht.side_swipe_header(name, COLOR_THEME, ROOT_URL)+"\n"+ - ht.generate_info_card(name, channel_id, pfp, desc)+ - tb.generate_individual_table(server.host_name, server.username, server.password, DB_NAME, table_key, param="") + - ht.generate_doctype_footer()) - - -def main(mode=0): - address, user, password = fs.get_login_data() - server = SQLHandler(address, user, password, DB_NAME) - server.create_table(TABLE_NAME, TABLE_COLUMNS) - server.create_table(HISTORICAL_TABLE_NAME, TABLE_COLUMNS) - server.create_table(DAY_DIFF_TABLE_NAME, DIFF_COLUMNS) - excluded_channels = [] - data = [] - if mode == 0: - holodex_organizations = HOLODEX_ORG.split(",") - print("Running Holodex API Data Collection\n"+"Found "+str(len(holodex_organizations))+" organizations") - try: - for org in holodex_organizations: - hldex = HolodexAPI(fs.get_api_key("holodex_api_key"), member_count = ORG_MEMBER_COUNT, organization = org) - data += hldex.get_data_all_channels() - excluded_channels += hldex.get_exclude_channels() - except: - main(mode=1) - - elif mode == 1: - print("Running YouTube API Data Collection") - ytapi = YouTubeAPI(fs.get_api_key("youtube_api_key")) - data = ytapi.get_data_all_channels(fs.get_local_channels()) - - # Updating DB and generating HTML data - update_database(server, data) - excluded_channels += fs.get_excluded_channels() - with open(ROOT_STORAGE_PATH+"index.html", "w", encoding="utf-8") as file: - file.write(ht.generate_meta_data(META_DATA_TITLE, META_DATA_DESCRIPTION, META_PROFILE_PIC) + - ht.generate_title_banner(HEADER_TITLE, COLOR_THEME) + - gb.plot_subscriber_count_over_time(address, user, password, DB_NAME, HISTORICAL_TABLE_NAME, gtitle="Subscriber Count Over Time", exclude_channels=excluded_channels) + - "\n"+ht.generate_html_divider("Last Updated: "+time.strftime('%Y-%m-%d %H:%M:%S')+" "+ TIMEZONE) + - tb.sql_to_html_table(address, user, password, DB_NAME, TABLE_NAME, root_url=ROOT_URL) + - ht.build_footer_info(FOOTER_MESSAGE) + - ht.generate_doctype_footer()) - - # Generating individual pages - generate_individual_stats(server, data) - -def generate_channel_files(): - """ - Generates the channels.txt and exclude_channels.txt files based on Holodex listings - """ - if not UPDATE_LOCAL_RECORDS: - return - print("Running Channel Files Update") - active_channels = [] - exclude_channels = [] - if not os.path.exists("data"): - os.mkdir("data") - for org in HOLODEX_ORG.split(","): - hldex = HolodexAPI(fs.get_api_key("holodex_api_key"), member_count = ORG_MEMBER_COUNT, - organization = org) - hldex.get_data_all_channels() - active_channels += hldex.get_active_channels() - exclude_channels += hldex.get_exclude_channels() - with open("data/channels.txt", "w", encoding="utf-8") as file: - file.write("\n".join(active_channels)) - with open("data/exclude_channel.txt", "w", encoding="utf-8") as file: - file.write("\n".join(exclude_channels)) - print("Success! Channel Files Updated!") - -if __name__ == "__main2__": - MODE = 0 - if len(sys.argv) > 1: - MODE = int(sys.argv[1]) - ROOT_STORAGE_PATH = sys.argv[2] - main(MODE) diff --git a/nijitrack.py b/nijitrack.py new file mode 100644 index 0000000..3e7393e --- /dev/null +++ b/nijitrack.py @@ -0,0 +1,93 @@ +import os +import sys +import time + +import fileutil as fs +from sql.sql_handler import SQLHandler +from webapi.holodex import HolodexAPI +from webapi.youtube import YouTubeAPI +from decorators import * +import html_builders.builder as builder + + + + +CONFIG = fs.load_config("config.ini") +DATA_SETTING = fs.load_json_file("sql_table_config.json") + + +@log("Initializing Database") +def initialize_database(server: SQLHandler): + server.create_table(name = CONFIG["TABLES"]["live"], column = DATA_SETTING["LIVE_COLUMNS"]) + server.create_table(name = CONFIG["TABLES"]["historical"], column = DATA_SETTING["LIVE_COLUMNS"]) + server.create_table(name = CONFIG["TABLES"]["daily"], column = DATA_SETTING["DAILY_COLUMNS"]) + + +@log("Inserting Live Data into Database") +def record_subscriber_data(data: list): + def transform_sql_string(string: str) -> str: + return string.encode("ascii", "ignore").decode().replace("'", "''") + def record_diff_data(data_tuple: tuple): + refresh_daily = fs.check_diff_refresh() + if not server.check_row_exists(CONFIG["TABLES"]["daily"], "channel_id", channel_id): + # data_tuple = (channel_id, pfp, channel_name, sub_count, time.strftime('%Y-%m-%d %H:%M:%S')) + server.insert_row(CONFIG["TABLES"]["daily"], DATA_SETTING["DAILY_HEADER"], (data_tuple[0], data_tuple[3])) + server.insert_row(name = CONFIG["TABLES"]["historical"], column = DATA_SETTING["LIVE_HEADER"], data=data_tuple) + return + elif refresh_daily: + server.update_row(CONFIG["TABLES"]["daily"], "sub_count", sub_count, "channel_id", channel_id) + server.insert_row(name = CONFIG["TABLES"]["historical"], column = DATA_SETTING["LIVE_HEADER"], data=data_tuple) + + exclude_channels = fs.get_excluded_channels() + for channel in data: + channel_id = channel["id"] + if channel_id in exclude_channels: + continue + pfp = channel["photo"] + sub_count = channel["subscriber_count"] + channel_name = channel["english_name"] + if channel_name is None: + channel_name = channel["name"] + channel_name = transform_sql_string(channel_name) + data_tuple = (channel_id, pfp, channel_name, sub_count, time.strftime('%Y-%m-%d %H:%M:%S')) + server.insert_row(name = CONFIG["TABLES"]["live"], column = DATA_SETTING["LIVE_HEADER"], data=data_tuple) + record_diff_data(data_tuple) + +@log("Generating Indvidual Channel Pages") +def generate_individual_pages(server: SQLHandler, data: list): + if not os.path.exists("stats"): + os.mkdir("stats") + if not os.path.exists("tables"): + os.mkdir("tables") + for channel in data: + # builder.build_individual_page(server, CONFIG, channel) + builder.build_table_page(server, CONFIG, channel) + quit() + + + +@log("Running Holodex Generation") +def holodex_generation(server: SQLHandler): + holodex_organizations = DATA_SETTING["HOLODEX_ORGS"].split(",") + server.clear_table(CONFIG["TABLES"]["live"]) + server.reset_auto_increment(CONFIG["TABLES"]["live"]) + holodex = HolodexAPI(CONFIG["API"]["holodex"]) + for organization in holodex_organizations: + holodex.set_organization(organization) + record_subscriber_data(holodex.get_subscriber_data()) + return holodex.get_generated_channel_data() + + + + +if __name__ == "__main__": + server = SQLHandler(CONFIG["SQL"]["host"], CONFIG["SQL"]["user"], CONFIG["SQL"]["password"], CONFIG["SQL"]["database"]) + # initialize_database(server) + channel_data = holodex_generation(server) + generate_individual_pages(server, channel_data) + # builder.build_ranking_page(server, CONFIG) + + + + + diff --git a/sql/graph_builder.py b/sql/graph_builder.py deleted file mode 100644 index df108b3..0000000 --- a/sql/graph_builder.py +++ /dev/null @@ -1,128 +0,0 @@ -import plotly.graph_objs as go -import pandas as pd -import mysql.connector -from mysql.connector import errorcode -import warnings -import math -from datetime import datetime, timedelta -import numpy as np - - -def plot_subscriber_count_over_time(host, username, password, database_name, table_name, - gtitle = "Subscriber Count Over Time for Nijisanji Members", - overrideQuery = None, markers = "lines", exclude_channels = []): - warnings.filterwarnings('ignore') # Ignore pandas warning regarding pyodbc - try: - cnx = mysql.connector.connect(user = username, password = password, - host = host, database = database_name) - except mysql.connector.Error as err: - if err.errno == errorcode.ER_ACCESS_DENIED_ERROR: - print("Something is wrong with your user name or password") - elif err.errno == errorcode.ER_BAD_DB_ERROR: - print("Database does not exist") - else: - print(err) - query = f"SELECT name, subscriber_count, timestamp, channel_id FROM {table_name} ORDER by timestamp DESC" if overrideQuery is None else overrideQuery - df = pd.read_sql_query(query, cnx) - groups = df.groupby("name") - fig = go.Figure() - config = dict({'responsive': True, 'displaylogo': False}) - for channel, group in groups: - if len(exclude_channels) != 0 and group['channel_id'].iloc[0] in exclude_channels: - continue - fig.add_trace(go.Scattergl( - x = group["timestamp"], y = group["subscriber_count"], name = channel, mode = markers, - showlegend = True)) - - fig.update_layout( - title = {'text': gtitle, 'x': 0.5, 'xanchor': 'center', - 'yanchor': 'top', 'font': {'family': 'Arial', 'size': 30}}, - xaxis_title = "Timestamp", - yaxis_title = "Subscribers", - legend = dict(font = dict(size = 16), title = dict(text = "Channels")), - height = 950 - ) - return fig.to_html(config = config) - - -def generate_projection(host, username, password, database_name, table_name, curr_subscribers, - timezone = "Pacific Standard Time"): - def get_next_milestone(subscriber_count): - num_digits = len(str(subscriber_count)) - if num_digits <= 4: - milestone_interval = 10000 - else: - milestone_interval = 10 ** (num_digits - 1) - next_milestone = math.ceil( - subscriber_count / milestone_interval) * milestone_interval - - return next_milestone - - warnings.filterwarnings('ignore') # Ignore pandas warning regarding pyodbc - try: - cnx = mysql.connector.connect(user = username, password = password, - host = host, database = database_name) - except mysql.connector.Error as err: - if err.errno == errorcode.ER_ACCESS_DENIED_ERROR: - print("Something is wrong with your user name or password") - elif err.errno == errorcode.ER_BAD_DB_ERROR: - print("Database does not exist") - else: - print(err) - - def create_milestone_card(time_until_milestone, next_milestone, not_enough_data = False, - declining = False): - now = datetime.now() - milestone_date = ( - now + timedelta(seconds = time_until_milestone)).strftime('%Y-%m-%d') - relative_time = now + timedelta(seconds = time_until_milestone) - now - next_milestone_str = "{:,}".format(next_milestone) - if relative_time.days > 1: - relative_time_str = f"In {relative_time.days} days" - elif relative_time.days == 1: - relative_time_str = f"In {relative_time.days} day" - elif relative_time.days < 0: - relative_time_str = f"{-relative_time.days} days ago" - elif not_enough_data: - relative_time_str = "Not enough data" - elif declining: - relative_time_str = "Declining" - else: - relative_time_str = "Today" - card = f""" -
-
Next Milestone
-
{next_milestone_str}
-
-
Estimated Date:
-
{milestone_date}
-
-
-
{relative_time_str}
-
{timezone}
-
-
-
- """ - - return card - - query = f"SELECT name, subscriber_count, timestamp FROM {table_name} WHERE timestamp >= DATE_SUB(CURRENT_DATE(), INTERVAL 30 DAY) ORDER BY timestamp DESC" - df = pd.read_sql_query(query, cnx) - df = df.sort_values(by = 'timestamp') - # get the rate of change from first data point to last data point - first_data = df.iloc[0] - last_data = df.iloc[-1] - delta_sub_count = last_data['subscriber_count'] - first_data['subscriber_count'] - delta_time = (last_data['timestamp'] - first_data['timestamp']).total_seconds() - - # Calculate the average rate of change of subscriber_count over time - avg_rate_of_change = delta_sub_count / delta_time - next_milestone = get_next_milestone(curr_subscribers) - if avg_rate_of_change == 0 or math.isnan(avg_rate_of_change) or math.isinf(avg_rate_of_change): - return create_milestone_card(0, next_milestone, not_enough_data = True) - if avg_rate_of_change < 0: - return create_milestone_card(0, next_milestone, declining = True) - time_to_next_milestone = ( - next_milestone - curr_subscribers) / avg_rate_of_change - return create_milestone_card(time_to_next_milestone, next_milestone) diff --git a/sql/sql_handler.py b/sql/sql_handler.py index d349f64..a02db52 100644 --- a/sql/sql_handler.py +++ b/sql/sql_handler.py @@ -12,19 +12,18 @@ class SQLHandler: host_name, user_name, user_password) self._load_database(database_name) - def _create_server_connection(self, host_name: str, user_name: str, user_password: str, exclude=None) -> mysql.connector: + def _create_server_connection(self, host_name: str, user_name: str, user_password: str) -> mysql.connector: connection = None try: - connection = mysql.connector.connect( - host=host_name, - user=user_name, - passwd=user_password - ) + connection = mysql.connector.connect(host=host_name, user=user_name, passwd=user_password) print("MySQL Database connection successful") except Error as err: print(f"Error: '{err}'") return connection + def get_connection(self): + return self.connection + def _create_database(self, cursor: str, database_name: str): try: cursor.execute( @@ -48,75 +47,77 @@ class SQLHandler: print(err) exit(1) - def create_table(self, table_name: str, table_columns: str): + def create_table(self, name: str, column: str): cursor = self.connection.cursor() try: - cursor.execute(f"CREATE TABLE {table_name} ({table_columns})") - print(f"Table {table_name} created successfully") + cursor.execute(f"CREATE TABLE {name} ({column})") + print(f"Table {name} created successfully") except Error as err: print(err) - def insert_data(self, table_name: str, table_columns: str, data: str): + def insert_row(self, name: str, column: str, data: tuple): cursor = self.connection.cursor() try: - cursor.execute( - f"INSERT INTO {table_name} ({table_columns}) VALUES ({data})") + placeholders = ', '.join(['%s'] * len(data)) + query = f"INSERT INTO {name} ({column}) VALUES ({placeholders})" + cursor.execute(query, data) self.connection.commit() - print("Data inserted successfully") + print("Data Inserted:", data) except Error as err: print("Error inserting data") print(err) - def clear_table(self, table_name: str): + + def clear_table(self, name: str): cursor = self.connection.cursor() try: - cursor.execute(f"DELETE FROM {table_name}") + cursor.execute(f"DELETE FROM {name}") self.connection.commit() print("Table cleared successfully") except Error as err: print("Error clearing table") print(err) - def reset_auto_increment(self, table_name: str): + def reset_auto_increment(self, name: str): cursor = self.connection.cursor() try: - cursor.execute(f"ALTER TABLE {table_name} AUTO_INCREMENT = 1") + cursor.execute(f"ALTER TABLE {name} AUTO_INCREMENT = 1") self.connection.commit() print("Table reset successfully") except Error as err: print("Error resetting table") print(err) - def copy_rows_to_new_table(self, table_name: str, new_table_name: str, table_columns: str): + def copy_rows_to_new_table(self, name: str, new_name: str, column: str): cursor = self.connection.cursor() try: cursor.execute( - f"INSERT INTO {new_table_name} ({table_columns}) SELECT {table_columns} FROM {table_name}") + f"INSERT INTO {new_name} ({column}) SELECT {column} FROM {name}") cursor.execute( - f"ALTER TABLE {new_table_name} MODIFY COLUMN id INT AUTO_INCREMENT") + f"ALTER TABLE {new_name} MODIFY COLUMN id INT AUTO_INCREMENT") self.connection.commit() print("Rows copied successfully") except Error as err: print("Error copying rows") print(err) - def drop_table(self, table_name: str): + def drop_table(self, name: str): cursor = self.connection.cursor() try: - cursor.execute(f"DROP TABLE {table_name}") + cursor.execute(f"DROP TABLE {name}") self.connection.commit() print("Table dropped successfully") except Error as err: print("Error dropping table") print(err) - def check_row_exists(self, table_name: str, column_name: str, value: str): + def check_row_exists(self, name: str, column_name: str, value: str): """ Checks if a row exists in a table """ cursor = self.connection.cursor() try: - cursor.execute(f"SELECT * FROM {table_name} WHERE {column_name} = '{value}'") + cursor.execute(f"SELECT * FROM {name} WHERE {column_name} = '{value}'") result = cursor.fetchone() if result: return True @@ -126,15 +127,36 @@ class SQLHandler: print("Error checking row") print(err) - def update_row(self, table_name: str, column_name: str, search_val: str, replace_col:str, new_value: str): + def update_row(self, name: str, column_name: str, search_val: str, replace_col:str, new_value: str): """ Updates a row in a table """ cursor = self.connection.cursor() try: - cursor.execute(f"UPDATE {table_name} SET {replace_col} = '{new_value}' WHERE {column_name} = '{search_val}'") + cursor.execute(f"UPDATE {name} SET {replace_col} = '{new_value}' WHERE {column_name} = '{search_val}'") self.connection.commit() print("Row updated successfully") except Error as err: print("Error updating row") - print(err) \ No newline at end of file + print(err) + + def execute_query(self, query: str): + cursor = self.connection.cursor() + try: + cursor.execute(query) + result = cursor.fetchall() + return result + except Error as err: + print("Error executing query") + print(err) + + def get_query_result(self, query: str): + cursor = self.connection.cursor() + try: + cursor.execute(query) + result = cursor.fetchall() + return result + except Error as err: + print("Error executing query") + print(err) + \ No newline at end of file diff --git a/sql/table_builder.py b/sql/table_builder.py deleted file mode 100644 index e5fbb57..0000000 --- a/sql/table_builder.py +++ /dev/null @@ -1,252 +0,0 @@ -import mysql.connector -from mysql.connector import errorcode - -def sql_to_html_table(host, username, password, database_name, table_name, - diff_table="24h_historical", headers=["Rank", "Liver", "Subscribers", "Difference (24hr)"], root_url="https://nijitracker.com"): - try: - cnx = mysql.connector.connect(user=username, password=password, - host=host, database=database_name) - except mysql.connector.Error as err: - if err.errno == errorcode.ER_ACCESS_DENIED_ERROR: - print("Something is wrong with your user name or password") - elif err.errno == errorcode.ER_BAD_DB_ERROR: - print("Database does not exist") - else: - print(err) - - cursor = cnx.cursor() - query = f"SELECT id, channel_id, name, subscriber_count, timestamp, profile_pic FROM {table_name} ORDER by subscriber_count DESC" - cursor.execute(query) - data = cursor.fetchall() - table = "" - header = "" - header_cols = headers - for col in header_cols: - header += f"" - header += "" - table += header - rank = 1 - for row in data: - table += "" - table += f"" - rank += 1 - for i, col in enumerate(row): - if cursor.description[i][0] == "name": - channel_url = f"{root_url}/stats/{row[2]}" - profile_pic_url = row[5] - table += f"" - elif cursor.description[i][0] == "subscriber_count": - formatted_sub_count = "{:,.0f}".format(int(col)) - table += f"" - elif cursor.description[i][0] == "timestamp": - query = f"SELECT sub_diff FROM {diff_table} WHERE channel_id = '{row[1]}'" - try: - diff_cursor = cnx.cursor() - diff_cursor.execute(query) - diff_data = diff_cursor.fetchall() - old_sub_count = int(diff_data[0][0]) - current_sub_count = int(row[3]) - if old_sub_count > current_sub_count: - difference = f"-{old_sub_count - current_sub_count}" - else: - difference = f"+{current_sub_count - old_sub_count}" - table += f"" - except IndexError: - raise Exception("Are you trying to use a new set of channels?\nPlease delete last_refresh.txt in data folder first!") - elif cursor.description[i][0] not in ["id", "channel_id", "profile_pic"]: - table += f"" - - - table += "" - table += "
{col}
{rank}{col}{formatted_sub_count}{difference}{col}
" - style = "" - style += "" - - - cursor.close() - cnx.close() - return table + style - - -def generate_individual_table(host, username, password, database_name, table_name, param="LIMIT 7"): - try: - cnx = mysql.connector.connect(user=username, password=password, - host=host, database=database_name) - except mysql.connector.Error as err: - if err.errno == errorcode.ER_ACCESS_DENIED_ERROR: - print("Something is wrong with your user name or password") - elif err.errno == errorcode.ER_BAD_DB_ERROR: - print("Database does not exist") - else: - print(err) - - cursor = cnx.cursor() - query = f"SELECT subscriber_count, timestamp FROM {table_name} GROUP BY DATE(timestamp) ORDER by timestamp DESC " + param - - cursor.execute(query) - data = cursor.fetchall() - table = "" - header = "" - header_cols = ["Subscribers", "Timestamp"] - for col in header_cols: - header += f"" - header += "" - table += header - for row in data: - table += "" - for i, col in enumerate(row): - if cursor.description[i][0] == "subscriber_count": - formatted_sub_count = "{:,.0f}".format(int(col)) - table += f"" - else: - table += f"" - table += "" - table += "
{col}
{formatted_sub_count}{col}
" - style = "" - style += "" - - cursor.close() - cnx.close() - return table + style diff --git a/sql_table_config.json b/sql_table_config.json new file mode 100644 index 0000000..7aea426 --- /dev/null +++ b/sql_table_config.json @@ -0,0 +1,7 @@ +{ + "LIVE_COLUMNS": "id INT PRIMARY KEY AUTO_INCREMENT, channel_id VARCHAR(255), profile_pic VARCHAR(255), name VARCHAR(255), subscriber_count INT, timestamp DATETIME", + "LIVE_HEADER": "channel_id, profile_pic, name, subscriber_count, timestamp", + "DAILY_COLUMNS": "id INT PRIMARY KEY AUTO_INCREMENT, channel_id VARCHAR(255), sub_diff INT", + "DAILY_HEADER": "channel_id, sub_diff", + "HOLODEX_ORGS": "Phase%20Connect" +} diff --git a/webapi/holodex.py b/webapi/holodex.py index 6a5dc93..5f81892 100644 --- a/webapi/holodex.py +++ b/webapi/holodex.py @@ -1,4 +1,5 @@ from webapi.web_api import WebAPI +from typing import Iterable class HolodexAPI(WebAPI): @@ -6,62 +7,69 @@ class HolodexAPI(WebAPI): Class for interacting with the Holodex API """ - def __init__(self, api_key: str = None, member_count: int = 300, - organization: str = "Nijisanji"): - super().__init__(api_key = api_key, base_url = "https://holodex.net/api/v2/") + def __init__(self,api_key: str = None,member_count: int = 300,organization: str = "Nijisanji"): + super().__init__(api_key=api_key, base_url="https://holodex.net/api/v2/") self.member_count = member_count self.organization = organization self._inactive_channels = [] - self._active_channels = [] + self._channel_data = [] - def get_data_all_channels(self) -> list: + def get_subscriber_data(self) -> Iterable: """ Gets data for all channels in a particular organization """ members = self.member_count data = [] - filtered_data = [] + active_channels = [] offset = 0 while members > 0: data += self._download_url( - f"channels?type=vtuber&offset={offset}&limit=100&org={self.organization}") + f"channels?type=vtuber&offset={offset}&limit=100&org={self.organization}" + ) members -= 100 offset += 100 for channel in data: - print(channel['name']) - if channel['inactive'] is False: - try: - self._active_channels.append(channel['id'] + "," + channel['english_name']) - except (KeyError, TypeError, ValueError): - self._active_channels.append(channel['id'] + "," + channel['name']) - channel['description'] = self.get_channel_description(channel['id']) - filtered_data.append(channel) - else: - self._inactive_channels.append(channel['id']) - return filtered_data - - def get_exclude_channels(self) -> list: - """ - Gets the list of excluded channels - """ - return self._inactive_channels + print("DEBUG: ", channel["id"]) + try: + channel["description"] = self.get_channel_description(channel["id"]) + if channel["inactive"]: + self._inactive_channels.append(channel["id"]) + continue + active_channels.append(channel) + except (KeyError, TypeError, ValueError): + print("DEBUG:","An error occured with parsing ", channel["id"], channel["name"]) + continue + self._channel_data = active_channels + return active_channels def get_view_count(self, channel_id: str) -> int: """ Gets the view count for a particular channel """ data = self._download_url(f"channels/{channel_id}") - return data['view_count'] + return data["view_count"] def get_channel_description(self, channel_id: str) -> str: """ Gets the description for a particular channel """ data = self._download_url(f"channels/{channel_id}") - return data['description'] + return data["description"] + + def set_organization(self, organization: str): + """ + Sets the organization for the API + """ + self.organization = organization + + def get_inactive_channels(self) -> list: + """ + Gets the list of inactive channels + """ + return self._inactive_channels - def get_active_channels(self) -> list: + def get_generated_channel_data(self) -> list: """ - Gets the list of active channels + Gets the list of channel data """ - yield from self._active_channels + return self._channel_data -- cgit v1.2.3