diff options
| author | Pinapelz <donaldshan1@outlook.com> | 2023-06-20 02:15:12 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-06-20 02:15:12 -0700 |
| commit | 5b25b6e64140e02c244456973e9305fe43c355c6 (patch) | |
| tree | 6930d7ae771fb188fc6a3a0f7f1f81bad5bf84f2 | |
| parent | c929c11f9006db67e10ddd7fa599124a6edeadeb (diff) | |
| parent | 70237b5a5d82e8425eb5870a975bde497a6def08 (diff) | |
Merge pull request #2 from pinapelz/develop-rewrite
Refactored entire codebase
| -rw-r--r-- | .gitignore | 163 | ||||
| -rw-r--r-- | config.ini | 31 | ||||
| -rw-r--r-- | config.json | 7 | ||||
| -rw-r--r-- | config.py | 54 | ||||
| -rw-r--r-- | data/exclude_channel.txt | 28 | ||||
| -rw-r--r-- | data/last_refresh.txt | 2 | ||||
| -rw-r--r-- | decorators.py | 16 | ||||
| -rw-r--r-- | fileutil.py | 81 | ||||
| -rw-r--r-- | html_builders/builder.py | 109 | ||||
| -rw-r--r-- | html_builders/elements.py (renamed from sql/graph_builder.py) | 77 | ||||
| -rw-r--r-- | html_builders/graphs.py | 32 | ||||
| -rw-r--r-- | html_builders/tables.py | 72 | ||||
| -rw-r--r-- | html_gen/html_gen.py | 375 | ||||
| -rw-r--r-- | main.py | 168 | ||||
| -rw-r--r-- | nijitrack.py | 93 | ||||
| -rw-r--r-- | sql/sql_handler.py | 76 | ||||
| -rw-r--r-- | sql/table_builder.py | 252 | ||||
| -rw-r--r-- | sql_table_config.json | 7 | ||||
| -rw-r--r-- | webapi/holodex.py | 66 |
19 files changed, 704 insertions, 1005 deletions
@@ -3,4 +3,165 @@ config.json .venv .github main.py -.idea
\ No newline at end of file +.idea + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ diff --git a/config.ini b/config.ini new file mode 100644 index 0000000..17217c1 --- /dev/null +++ b/config.ini @@ -0,0 +1,31 @@ +[SQL] +; Database connection details +host = localhost +user = root +password = +database = Nijitrack_Dev + +[TABLES] +; Names of the tables that should be created in the database + +; Current subscriber data +live = subscriber_data +historical = subscriber_data_historical +daily = 24h_historical + +[API] +holodex = HOLODEXAPI + +[PATH] +root_html = stats + +[WEBSITE] +; Website details +homepage = https://nijitracker.com +title = Nijitracker +description = A site that tracks the historical subscriber data for Nijisanji affiliated livers +icon = https://raw.githubusercontent.com/pinapelz/NijiTrack/master/assets/icon.png +timezone = PST +footer_message = This is a demo of Nijitrack, a way to track historical subscriber data for any set of channels on YouTube.<br>This webpage is not affiliated with ANYCOLOR or any of the channels listed here in any way<br>Date Started: 2023-03-26 + + diff --git a/config.json b/config.json deleted file mode 100644 index 63c1ac9..0000000 --- a/config.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "address": "127.0.0.1", - "user": "root", - "password": "", - "youtube_api_key": "key", - "holodex_api_key": "key" -} diff --git a/config.py b/config.py deleted file mode 100644 index 3b2404c..0000000 --- a/config.py +++ /dev/null @@ -1,54 +0,0 @@ -""" -This file contains all the constants used in the main.py file -""" - -""" -0 = Holodex -1 = YouTube API -""" -MODE = 1 - -# General database configurations -DB_NAME = "pinapelz$nijitrack" -TABLE_NAME = "subscriber_data" -TABLE_COLUMNS = "id INT PRIMARY KEY AUTO_INCREMENT, channel_id VARCHAR(255), profile_pic VARCHAR(255), name VARCHAR(255), subscriber_count INT, timestamp DATETIME" -HISTORICAL_TABLE_NAME = "subscriber_data_historical" -DAY_DIFF_TABLE_NAME = "24h_historical" # Stores the difference in subscriber count for each channel in the past 24 hours -DIFF_COLUMNS = "id INT PRIMARY KEY AUTO_INCREMENT, channel_id VARCHAR(255), sub_diff INT" # Stores the difference in subscriber count for each channel in the past 24 hours -table_columns = "channel_id, profile_pic, name, subscriber_count, timestamp" -diff_columns = "channel_id, sub_diff" - -HEADER_TITLE = "Nijitracker" # Shows at the top of the page - -# Message at the bottom of the webpage -FOOTER_MESSAGE = """ -This is a demo of Nijitrack, a way to track historical subscriber data for any set of channels on YouTube.<br> -This webpage is not affiliated with ANYCOLOR or any of the channels listed here in any way<br> -Date Started: 2023-03-26 -""" - -# (Optional) for things like Discord embeds or Twitter cards -META_DATA_TITLE = "Nijitracker" -META_DATA_DESCRIPTION = "A site that tracks the historical subscriber data for Nijisanji affiliated livers (demo)" -META_PROFILE_PIC = "https://raw.githubusercontent.com/pinapelz/NijiTrack/master/assets/icon.png" - - -COLOR_THEME = "2a4b71" -ROOT_URL = "https://nijitracker.com" -TIMEZONE = "PST" -TIMEZONE_STR = "Pacific Standard Time" - -ROOT_STORAGE_PATH = "" - -# Skip the options below if you are not using Holodex -# Organization names must match the names on Holodex (case sensitive) -HOLODEX_ORG = "Nijisanji,Hololive" # Can be a comma separated list of organizations -ORG_MEMBER_COUNT = 300 -UPDATE_LOCAL_RECORDS = True # If true, will update the local records of the channels.txt and exclude_channels.txt file - -# URL for pulling the channels.txt and exclude_channels.txt file from a URL (should be raw txt form) -# Provide link to the directory/base url that contains the channels.txt and exclude_channels.txt files -EXTERNAL_DATA_URL = "https://raw.githubusercontent.com/pinapelz/NijiTrack/pettan-track-data/data/" # You'll need to manually call this -REFRESH_DAILY = False # If true, will refresh the data from the external data url every day - - diff --git a/data/exclude_channel.txt b/data/exclude_channel.txt index 0756eee..d0d5b11 100644 --- a/data/exclude_channel.txt +++ b/data/exclude_channel.txt @@ -26,6 +26,34 @@ UCKQi12nOGZsJ5nOuCTHErmA UCSUZugdxy9Wcrkp781cFt1w UCTi_rzf5QIkXjhJjkbcAdTg UCpfjQCCavrO-rnKaAaIF9dg +UCX7YkU9nEeaoZbkVLVajcMg +UCwi4P78SVunSYAGrvC9aKcw +UCNRh9kkByBTYLo0IJupnAug +C4Jyg9gFStHO8r5n4ya7XCQ +UCStzBFfjwFYb0qSYHnzFZvw +UCrhf6HYKnV6bxyaB6ooJzbw +UCfki3lMEF6SGBFiFfo9kvUA +UCtHFXfrn52juTqGBN4WbMVw +UCbLgcjfsUaCUgJh9SVit8kw +UC_D2DNy-KUNQJ_NGMppgmyg +UCxWcO9CLti4uouUIS5IIF-Q +UC4Jyg9gFStHO8r5n4ya7XCQ +UCz6vnIbgiqFT9xUcD6Bp65Q +UC-JSeFfovhNsEhftt1WHMvg +UC0PwyIlUefx1LGWjFI0QjMg +UCRzHROJUp7Wg900p_fXtJtQ +UC2NtHPaDUA5htYsjVrO8Gng +UCLSzgV37Dt24T8p3-TNiSLg +UC8vZcu6W-EJ6UYEy_C31c-A +UCh00mQw8BCrTbchYWGz4JTQ +UCWwwXXd_RPzj3LAyEfBslsg +UCINqoksO3CQPCt8a-mRe-Ew +UCF1JdALrXgub24weQpqDy9Q +UCvQIBipkXlXXVdGga9sn8dw +UCKQi12nOGZsJ5nOuCTHErmA +UCSUZugdxy9Wcrkp781cFt1w +UCTi_rzf5QIkXjhJjkbcAdTg +UCpfjQCCavrO-rnKaAaIF9dg UC0lik9pHju6ONgkBh7N5wHw UC1ZV7KBscK0EMoJKFu1DnDg UC69URn8iP4u8D_zUp-IJ1sg diff --git a/data/last_refresh.txt b/data/last_refresh.txt index 9552d2a..92719ce 100644 --- a/data/last_refresh.txt +++ b/data/last_refresh.txt @@ -1 +1 @@ -2023-04-06
\ No newline at end of file +2023-06-20
\ No newline at end of file diff --git a/decorators.py b/decorators.py new file mode 100644 index 0000000..bc0b420 --- /dev/null +++ b/decorators.py @@ -0,0 +1,16 @@ +import time + + +def log(message: str): + def decorator(func): + def wrapper(*args, **kwargs): + print("TASK: " + message) + start = time.time() + result = func(*args, **kwargs) + end = time.time() + print(f"COMPLETE: {message} {round(end - start, 3)} seconds") + return result + + return wrapper + + return decorator diff --git a/fileutil.py b/fileutil.py index 6cc1351..b8f6015 100644 --- a/fileutil.py +++ b/fileutil.py @@ -2,6 +2,7 @@ import os.path import urllib.request import json import time +import configparser def _read_file(path: str, lines=True) -> list: @@ -12,29 +13,11 @@ def _read_file(path: str, lines=True) -> list: return file.read().splitlines() -def get_login_data(path="config.json"): - # gets login data from config.json - data = json.loads(_read_file(os.path.join(path), lines=False)) - try: - return data["address"], data["user"], data["password"] - except KeyError: - raise Exception("Login data not found") - - -def get_api_key(api: str, path="config.json"): - # gets api key from config.json - data = json.loads(_read_file(os.path.join(path), lines=False)) - try: - return data[api] - except KeyError: - raise Exception(f"API key for {api} not found") - - def get_excluded_channels(): # gets excluded channels from exclude_channel.txt - if not os.path.exists(os.path.join("data","exclude_channel.txt")): - open(os.path.join("data","exclude_channel.txt"), "w").close() - excluded_channels = _read_file(os.path.join("data","exclude_channel.txt")) + if not os.path.exists(os.path.join("data", "exclude_channel.txt")): + open(os.path.join("data", "exclude_channel.txt"), "w").close() + excluded_channels = _read_file(os.path.join("data", "exclude_channel.txt")) return excluded_channels @@ -49,10 +32,11 @@ def save_local_channels(data: list, path: str = "data"): open(path, "w").close() with open(path, "w", encoding="utf-8") as file: for channel in data: - if channel['id'] in excluded_channels: + if channel["id"] in excluded_channels: continue file.write(f"{channel['id']},{channel['english_name']}\n") + def get_local_channels(path: str = "data"): """ Get the channel names and ids locally for when the API is down @@ -61,28 +45,57 @@ def get_local_channels(path: str = "data"): if not os.path.exists(path): raise Exception("Local channel data not found") with open(path, "r", encoding="utf-8") as file: - rows = file.read().splitlines() + rows = file.read().splitlines() return [tuple(row.split(",")) for row in rows] + def check_diff_refresh(): - if not os.path.exists(os.path.join("data","last_refresh.txt")): - with open(os.path.join("data","last_refresh.txt"), "w", encoding="utf-8") as file: + if not os.path.exists(os.path.join("data", "last_refresh.txt")): + with open( + os.path.join("data", "last_refresh.txt"), "w", encoding="utf-8" + ) as file: file.write(time.strftime("%Y-%m-%d")) return True - with open(os.path.join("data","last_refresh.txt"), "r", encoding="utf-8") as file: + with open(os.path.join("data", "last_refresh.txt"), "r", encoding="utf-8") as file: last_refresh = file.read() if last_refresh != time.strftime("%Y-%m-%d"): - with open(os.path.join("data","last_refresh.txt"), "w", encoding="utf-8") as file: + with open( + os.path.join("data", "last_refresh.txt"), "w", encoding="utf-8" + ) as file: file.write(time.strftime("%Y-%m-%d")) return True + def update_data_files(url: str) -> None: # Updates the local txt channel data stored in data folder - if not os.path.exists(os.path.join("data","channels.txt")): - open(os.path.join("data","channels.txt"), "w").close() - urllib.request.urlretrieve(url+"channels.txt", os.path.join("data","channels.txt")) + if not os.path.exists(os.path.join("data", "channels.txt")): + open(os.path.join("data", "channels.txt"), "w").close() + urllib.request.urlretrieve( + url + "channels.txt", os.path.join("data", "channels.txt") + ) # downloaded txt file from url and write to channels.txt - - if not os.path.exists(os.path.join("data","exclude_channel.txt")): - open(os.path.join("data","exclude_channel.txt"), "w").close() - urllib.request.urlretrieve(url+"exclude_channel.txt", os.path.join("data","exclude_channel.txt")) + + if not os.path.exists(os.path.join("data", "exclude_channel.txt")): + open(os.path.join("data", "exclude_channel.txt"), "w").close() + urllib.request.urlretrieve( + url + "exclude_channel.txt", os.path.join("data", "exclude_channel.txt") + ) + + +def load_config(ini_filepath: str) -> dict: + config_object = configparser.ConfigParser() + file = open(ini_filepath, "r") + config_object.read_file(file) + output_dict = {} + sections = config_object.sections() + for section in sections: + output_dict[section] = {} + for key in config_object[section]: + output_dict[section][key] = config_object[section][key] + return output_dict + +def load_json_file(json_file_path: str) -> dict: + with open(json_file_path, "r", encoding="utf-8") as file: + return json.load(file) + + diff --git a/html_builders/builder.py b/html_builders/builder.py new file mode 100644 index 0000000..dea2e8c --- /dev/null +++ b/html_builders/builder.py @@ -0,0 +1,109 @@ +from jinja2 import Environment, FileSystemLoader +import html_builders.elements as elements +import html_builders.graphs as graphs +import html_builders.tables as tables +import os +import time + +MENU_ITEMS = [ + ("Nijitracker", "https://www.nijitracker.com"), + ("Pettantracker", "https://nijitracker.com/pettantrack") +] + + +def build_ranking_page(server, CONFIG: dict, exclude_channels: list = []): + page_path = os.path.join(CONFIG["PATH"]["root_html"], "index.html") + if not os.path.exists(page_path): + os.makedirs(os.path.dirname(page_path), exist_ok=True) + file_loader = FileSystemLoader("templates") + env = Environment(loader=file_loader) + template = env.get_template("ranking.html") + + input_dict = { + "meta_image_url": CONFIG["WEBSITE"]["icon"], + "meta_description": CONFIG["WEBSITE"]["description"], + "meta_title": CONFIG["WEBSITE"]["title"], + "title_banner": elements.build_title_banner( + CONFIG["WEBSITE"]["title"], + MENU_ITEMS + ), + "ranking_graph": graphs.plot_subscriber_count_over_time(server, CONFIG["TABLES"]["historical"], exclude_channels=exclude_channels), + "divider": "Last Updated: " + time.strftime('%Y-%m-%d %H:%M:%S') + " " + CONFIG["WEBSITE"]["timezone"], + "ranking_table": tables.generate_html_table(server, CONFIG["TABLES"]["live"], CONFIG["TABLES"]["daily"]), + "footer": CONFIG["WEBSITE"]["footer_message"] + } + output = template.render(input_dict) + with open(page_path, "w", encoding="utf-8") as f: + f.write(output) + +def build_individual_page(server, CONFIG: dict, channel_data: str): + def transform_sql_string(string: str) -> str: + return string.encode("ascii", "ignore").decode().replace("'", "''") + channel_id = channel_data["id"] + desc = channel_data["description"] + pfp = channel_data["photo"] + sub_count = channel_data["subscriber_count"] + channel_name = channel_data["english_name"] + if channel_name is None: + channel_name = channel_data["name"] + channel_name = transform_sql_string(channel_name) + sub_count_str = "{:,.0f}".format(int(sub_count)) + page_path = os.path.join(CONFIG["PATH"]["root_html"], channel_name + ".html") + file_loader = FileSystemLoader("templates") + env = Environment(loader=file_loader) + template = env.get_template("individual.html") + input_dict = { + "meta_image_url": CONFIG["WEBSITE"]["icon"], + "meta_description": CONFIG["WEBSITE"]["description"], + "meta_title": channel_name + " - " + CONFIG["WEBSITE"]["title"], + "homepage_url": CONFIG["WEBSITE"]["homepage"], + "sub_text": sub_count_str + " Subscribers", + "name": channel_name, + "profile_pic": pfp, + "description": desc, + "channel_id": channel_id, + "projection_card": elements.build_projection_card(server, CONFIG["TABLES"]["historical"], int(sub_count), channel_name, timezone = CONFIG["WEBSITE"]["timezone"]), + "subscriber_trend": graphs.plot_subscriber_count_over_time(server, CONFIG["TABLES"]["historical"], gtitle = "Subscriber Count Over Time for " + channel_name, overrideQuery = f"SELECT name, subscriber_count, timestamp, channel_id FROM {CONFIG['TABLES']['historical']} WHERE channel_id = '{channel_id}' ORDER by timestamp DESC", markers = "lines+markers"), + "divider": "Recent Subscriber Data:", + "weekly_trend": graphs.plot_subscriber_count_over_time(server, CONFIG["TABLES"]["historical"], gtitle = "Weekly Subscriber Count Over Time for " + channel_name, overrideQuery = f"SELECT name, subscriber_count, timestamp, channel_id FROM {CONFIG['TABLES']['historical']} WHERE channel_id = '{channel_id}' AND timestamp >= DATE_SUB(NOW(), INTERVAL 7 DAY) ORDER by timestamp DESC", markers = "lines+markers"), + "full_table_url": CONFIG["WEBSITE"]["homepage"] + "/tables/"+channel_name + } + output = template.render(input_dict) + with open(page_path, "w", encoding="utf-8") as f: + f.write(output) + +def build_table_page(server, CONFIG: dict, channel_data: str): + def transform_sql_string(string: str) -> str: + return string.encode("ascii", "ignore").decode().replace("'", "''") + channel_id = channel_data["id"] + desc = channel_data["description"] + pfp = channel_data["photo"] + sub_count = channel_data["subscriber_count"] + channel_name = channel_data["english_name"] + if channel_name is None: + channel_name = channel_data["name"] + channel_name = transform_sql_string(channel_name) + + file_loader = FileSystemLoader("templates") + env = Environment(loader=file_loader) + template = env.get_template("full_table.html") + + input_dict = { + "meta_image_url": CONFIG["WEBSITE"]["icon"], + "meta_description": CONFIG["WEBSITE"]["description"], + "meta_title": channel_name + " - " + CONFIG["WEBSITE"]["title"], + "homepage_url": CONFIG["WEBSITE"]["homepage"], + "sub_text": channel_name, + "full_table": tables.generate_individual_table(server, CONFIG["TABLES"]["historical"], channel_name), + "name": channel_name, + "profile_pic": pfp, + "description": desc, + "channel_id": channel_id, + } + output = template.render(input_dict) + page_path = os.path.join("tables", channel_name + ".html") + with open(page_path, "w", encoding="utf-8") as f: + f.write(output) + + +
\ No newline at end of file diff --git a/sql/graph_builder.py b/html_builders/elements.py index df108b3..41c8525 100644 --- a/sql/graph_builder.py +++ b/html_builders/elements.py @@ -1,51 +1,24 @@ -import plotly.graph_objs as go +import math import pandas as pd -import mysql.connector -from mysql.connector import errorcode import warnings -import math from datetime import datetime, timedelta -import numpy as np +def build_title_banner(text: str, menu_items): + banner_html = f""" + <div class="banner-container"> + <div class="banner-text">{text}</div> + </div> + <div class="menu-bar"> + """ + for item in menu_items: + banner_html += f""" + <a href="{item[1]}">{item[0]}</a> -def plot_subscriber_count_over_time(host, username, password, database_name, table_name, - gtitle = "Subscriber Count Over Time for Nijisanji Members", - overrideQuery = None, markers = "lines", exclude_channels = []): - warnings.filterwarnings('ignore') # Ignore pandas warning regarding pyodbc - try: - cnx = mysql.connector.connect(user = username, password = password, - host = host, database = database_name) - except mysql.connector.Error as err: - if err.errno == errorcode.ER_ACCESS_DENIED_ERROR: - print("Something is wrong with your user name or password") - elif err.errno == errorcode.ER_BAD_DB_ERROR: - print("Database does not exist") - else: - print(err) - query = f"SELECT name, subscriber_count, timestamp, channel_id FROM {table_name} ORDER by timestamp DESC" if overrideQuery is None else overrideQuery - df = pd.read_sql_query(query, cnx) - groups = df.groupby("name") - fig = go.Figure() - config = dict({'responsive': True, 'displaylogo': False}) - for channel, group in groups: - if len(exclude_channels) != 0 and group['channel_id'].iloc[0] in exclude_channels: - continue - fig.add_trace(go.Scattergl( - x = group["timestamp"], y = group["subscriber_count"], name = channel, mode = markers, - showlegend = True)) - - fig.update_layout( - title = {'text': gtitle, 'x': 0.5, 'xanchor': 'center', - 'yanchor': 'top', 'font': {'family': 'Arial', 'size': 30}}, - xaxis_title = "Timestamp", - yaxis_title = "Subscribers", - legend = dict(font = dict(size = 16), title = dict(text = "Channels")), - height = 950 - ) - return fig.to_html(config = config) - + """ + banner_html += "</div>" + return banner_html -def generate_projection(host, username, password, database_name, table_name, curr_subscribers, +def build_projection_card(server, table_name, curr_subscribers, channel_name, timezone = "Pacific Standard Time"): def get_next_milestone(subscriber_count): num_digits = len(str(subscriber_count)) @@ -59,17 +32,6 @@ def generate_projection(host, username, password, database_name, table_name, cur return next_milestone warnings.filterwarnings('ignore') # Ignore pandas warning regarding pyodbc - try: - cnx = mysql.connector.connect(user = username, password = password, - host = host, database = database_name) - except mysql.connector.Error as err: - if err.errno == errorcode.ER_ACCESS_DENIED_ERROR: - print("Something is wrong with your user name or password") - elif err.errno == errorcode.ER_BAD_DB_ERROR: - print("Database does not exist") - else: - print(err) - def create_milestone_card(time_until_milestone, next_milestone, not_enough_data = False, declining = False): now = datetime.now() @@ -104,11 +66,9 @@ def generate_projection(host, username, password, database_name, table_name, cur </div> <br> """ - return card - - query = f"SELECT name, subscriber_count, timestamp FROM {table_name} WHERE timestamp >= DATE_SUB(CURRENT_DATE(), INTERVAL 30 DAY) ORDER BY timestamp DESC" - df = pd.read_sql_query(query, cnx) + query = f"SELECT name, subscriber_count, timestamp FROM {table_name} WHERE timestamp >= DATE_SUB(CURRENT_DATE(), INTERVAL 30 DAY) AND name = \"{channel_name}\" ORDER BY timestamp DESC" + df = pd.read_sql_query(query, server.get_connection()) df = df.sort_values(by = 'timestamp') # get the rate of change from first data point to last data point first_data = df.iloc[0] @@ -126,3 +86,6 @@ def generate_projection(host, username, password, database_name, table_name, cur time_to_next_milestone = ( next_milestone - curr_subscribers) / avg_rate_of_change return create_milestone_card(time_to_next_milestone, next_milestone) + + + diff --git a/html_builders/graphs.py b/html_builders/graphs.py new file mode 100644 index 0000000..2770880 --- /dev/null +++ b/html_builders/graphs.py @@ -0,0 +1,32 @@ +import plotly.graph_objs as go +import pandas as pd +import warnings +import math +from datetime import datetime, timedelta +import numpy as np +import mysql.connector +from mysql.connector import errorcode + +def plot_subscriber_count_over_time(server, table_name, gtitle = "Subscriber Count Over Time for Nijisanji Members", + overrideQuery = None, markers = "lines", exclude_channels = []): + warnings.filterwarnings('ignore') # Ignore pandas warning regarding pyodbc + query = f"SELECT name, subscriber_count, timestamp, channel_id FROM {table_name} ORDER by timestamp DESC" if overrideQuery is None else overrideQuery + df = pd.read_sql_query(query, server.get_connection()) + groups = df.groupby("name") + fig = go.Figure() + config = dict({'responsive': True, 'displaylogo': False, 'modeBarButtonsToAdd': ['pan2d', 'zoomIn2d', 'zoomOut2d']}) + for channel, group in groups: + if len(exclude_channels) != 0 and group['channel_id'].iloc[0] in exclude_channels: + continue + fig.add_trace(go.Scattergl( + x = group["timestamp"], y = group["subscriber_count"], name = channel, mode = markers, + showlegend = True)) + fig.update_layout( + title = {'text': gtitle, 'x': 0.5, 'xanchor': 'center', + 'yanchor': 'top', 'font': {'family': 'Arial', 'size': 30}}, + xaxis_title = "Timestamp", + yaxis_title = "Subscribers", + legend = dict(font = dict(size = 16), title = dict(text = "Channels")), + height = 950, + ) + return fig.to_html(config = config)
\ No newline at end of file diff --git a/html_builders/tables.py b/html_builders/tables.py new file mode 100644 index 0000000..87d2b4a --- /dev/null +++ b/html_builders/tables.py @@ -0,0 +1,72 @@ +def generate_html_table(server, table, diff_table, headers=["Rank", "Liver", "Subscriber", "Difference (24hr)"], root_url="https://www.nijitracker.com"): + def get_daily_difference_subs(sub_count_str: str): + diff_cursor = server.get_connection().cursor() + diff_cursor.execute(query) + diff_data = diff_cursor.fetchall() + old_sub_count = int(diff_data[0][0]) + current_sub_count = int(sub_count_str) + if old_sub_count > current_sub_count: + return f"-{old_sub_count - current_sub_count}" + else: + return f"+{current_sub_count - old_sub_count}" + cursor = server.get_connection().cursor() + query = f"SELECT id, channel_id, name, subscriber_count, timestamp, profile_pic FROM {table} ORDER by subscriber_count DESC" + cursor.execute(query) + data = cursor.fetchall() + header = "<tr>" + for h in headers: + header += f"<th>{h}</th>" + header += "</tr>" + table_html = "<table>" + table_html += header + rank = 1 + for row in data: + table_row = "<tr>" + table_row += f"<td>{rank}</td>" + rank += 1 + for i, col in enumerate(row): + if cursor.description[i][0] == "name": + channel_url = f"{root_url}/stats/{row[2]}" + profile_pic_url = row[5] + table_row += f"<td><a href='{channel_url}'><img src='{profile_pic_url}' height='50px' width='50px'>{col}</a></td>" + elif cursor.description[i][0] == "subscriber_count": + formatted_sub_count = "{:,.0f}".format(int(col)) + table_row += f"<td>{formatted_sub_count}</td>" + elif cursor.description[i][0] == "timestamp": + query = f"SELECT sub_diff FROM {diff_table} WHERE channel_id = '{row[1]}'" + try: + table_row += f"<td>{get_daily_difference_subs(row[3])}</td>" + except IndexError: + raise Exception("Are you trying to use a new set of channels?\nPlease delete last_refresh.txt in data folder first!") + elif cursor.description[i][0] not in ["id", "channel_id", "profile_pic"]: + table_row += f"<td>{col}</td>" + table_row += "</tr>" + table_html += table_row + table_html += "</table>" + cursor.close() + return table_html + +def generate_individual_table(server, table_name, channel_name, param="LIMIT 7"): + cursor = server.get_connection().cursor() + query = f"SELECT subscriber_count, timestamp FROM {table_name} WHERE name=\"{channel_name}\" GROUP BY DATE(timestamp) ORDER by timestamp DESC " + param + cursor.execute(query) + data = cursor.fetchall() + table = "<table>" + header = "<tr>" + header_cols = ["Subscribers", "Timestamp"] + for col in header_cols: + header += f"<th>{col}</th>" + header += "</tr>" + table += header + for row in data: + table += "<tr>" + for i, col in enumerate(row): + if cursor.description[i][0] == "subscriber_count": + formatted_sub_count = "{:,.0f}".format(int(col)) + table += f"<td>{formatted_sub_count}</td>" + else: + table += f"<td>{col}</td>" + table += "</tr>" + table += "</table>" + cursor.close() + return table
\ No newline at end of file diff --git a/html_gen/html_gen.py b/html_gen/html_gen.py deleted file mode 100644 index 10042d8..0000000 --- a/html_gen/html_gen.py +++ /dev/null @@ -1,375 +0,0 @@ - -def generate_html_divider(label_text): - divider = '<div style="text-align:center;">' - divider += '<hr style="height: 3px; border: none; background-color: #f2f2f2; margin: 25px 0;" />' - divider += f'<span style="display:inline-block; position:relative; top:-1.5em; padding: 0 20px; background-color:#fff; font-size: 1.2em; font-weight:bold; color: #555; box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1); border-radius: 5px; font-family: Arial, sans-serif; letter-spacing: 2px;">{label_text}</span>' - divider += '</div>' - return divider - - -def generate_title_banner(title_text, color_theme="2a4b71"): - banner_html = f""" - <div class="banner-container"> - <div class="banner-text">{title_text}</div> - </div> - <div class="menu-bar"> - <a href="https://nijitracker.com/">Nijitracker</a> - <a href="https://nijitracker.com/pettantracker">PettanTracker</a> - </div> - <style> - .banner-container {{ - background-color: #{color_theme}; - color: white; - text-align: center; - font-size: 6em; - font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; - font-weight: bold; - padding: 0.2em 0; - margin-bottom: 0.5em; - margin: 0; - position: relative; - word-wrap: break-word; - }} - .banner-text {{ - animation-name: slideInDown; - animation-duration: 1s; - animation-timing-function: ease; - animation-delay: 0s; - animation-fill-mode: both; - }} - .menu-bar {{ - background-color: #f2f2f2; - overflow: hidden; - position: relative; - font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; - font-size: 1.5em; - font-weight: bold; - text-align: center; - }} - .menu-bar a {{ - display: inline-block; - color: #{color_theme}; - text-align: center; - padding: 14px 16px; - text-decoration: none; - transition: 0.3s; - }} - .menu-bar a:hover {{ - background-color: #{color_theme}; - color: white; - }} - @keyframes slideInDown {{ - 0% {{ - transform: translateY(-100%); - opacity: 0; - }} - 100% {{ - transform: translateY(0); - opacity: 1; - }} - }} - @media (max-width: 768px) {{ - .banner-container {{ - font-size: 3em; - padding: 0.2em 0.5em; - }} - .menu-bar a {{ - font-size: 1em; - }} - }} - @media (max-width: 480px) {{ - .banner-container {{ - font-size: 2em; - padding: 0.2em 0.5em; - }} - .menu-bar a {{ - font-size: 0.8em; - }} - }} - </style> - """ - return banner_html - - - - -def generate_meta_data(title: str, description: str, image_url: str): - return f"""<!DOCTYPE html> - <html lang="en"> - <head> - <meta charset="UTF-8"/> - <head> - <title>{title}</title> - <link rel="icon" type="image/x-icon" href="{image_url}"> - <meta name="description" content="{description}"> - <meta name="viewport" content="width=device-width, initial-scale=1.0"> - - <!-- Google / Search Engine Tags --> - <meta itemprop="name" content="{title}"> - <meta itemprop="description" content="{description}"> - <meta itemprop="image" content="{image_url}"> - - <!-- Facebook Meta Tags --> - <meta property="og:url" content="http://nijitracker.com"> - <meta property="og:type" content="website"> - <meta property="og:title" content="{title}"> - <meta property="og:description" content="{description}"> - <meta property="og:image" content="{image_url}"> - - <!-- Twitter Meta Tags --> - <meta name="twitter:card" content="summary_large_image"> - <meta name="twitter:title" content="{title}"> - <meta name="twitter:description" content="{description}"> - <meta name="twitter:image" content="{image_url}"> - </head> - <body> - """ - -def build_footer_info(info: str): - footer = f""" - <div style='background-color: #f5f5f5; padding: 20px; font-size: 16px; font-family: "Open Sans", sans-serif;'> - <div style='text-align: center;'> - <span style='font-weight: bold;'>Information</span> - </div> - <div style='margin-top: 20px; 'text-align': center'> - <p style='text-align: center;'>Data for this site is taken every hour. Due to limitation with YouTube's API only certain increments in subscriber counts will be reflected based on each liver's channel size</p> - <p style='text-align: center;'>Each liver has their own page to track their own individual achievements and subscriber count. The full table of all recorded daily data for each channel is also viewable</p> - </div> - <div style='text-align: center;'> - <span style='font-weight: bold;'>A Note on Graduating Livers</span> - </div> - <div style='margin-top: 20px; 'text-align': center'> - <p style='text-align: center;'>If a liver is determined to be graduating, retiring, or terminated, Nijitracker will stop tracking their subscriber count, and their page and entry will be removed</p> - <p style='text-align: center;'>Historical data is still retrievable through the Nijitracker API <a href="http://api.nijitracker.com">api.nijitracker.com</a> - </p> - </div> - <div style='margin-top: 20px; 'text-align': center'> - <p style='text-align: center;'>{info}</p> - </div> - <div style='text-align: center; margin-top: 20px;'> - <a href='https://github.com/pinapelz/NijiTrack' target='_blank'> - <img src='https://img.shields.io/github/license/pinapelz/NijiTrack?color=%23994CC3&style=flat-square' alt='License'/> - </a> - </div> - </div> - """ - return footer - -def side_swipe_header(text: str, color_theme="007ACC", url: str="https://nijitracker.com"): - banner_html = f""" - <a href="{url}" target="_self" rel="noopener noreferrer"> - <div class="banner-container"> - <div class="banner-text">{text}</div> - </div> - </a> - <style> - .banner-container {{ - background-color: #{color_theme}; - color: white; - text-align: center; - font-size: 6em; - font-family: 'Montserrat', sans-serif; - font-weight: bold; - padding: 0.2em 0; - margin-bottom: 0.5em; - margin: 0; - position: relative; - }} - - .banner-text {{ - position: relative; - animation-name: slideInRight; - animation-duration: 1s; - animation-timing-function: ease; - animation-delay: 0s; - animation-fill-mode: both; - }} - - @keyframes slideInRight {{ - 0% {{ - transform: translateX(-100%); - opacity: 0; - }} - 100% {{ - transform: translateX(0); - opacity: 1; - }} - }} - - /* Responsive design */ - @media (max-width: 768px) {{ - .banner-container {{ - font-size: 4em; - }} - }} - - @media (max-width: 576px) {{ - .banner-container {{ - font-size: 3em; - }} - }} - </style> - """ - return banner_html - - - -def generate_info_card(name, youtube_channel_id, profile_pic, description): - # Build the HTML string - html = f""" - <br> - <style> - .info-card {{ - display: flex; - flex-direction: row; - align-items: center; - margin-bottom: 20px; - box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); - border-radius: 10px; - overflow: hidden; - }} - - .img-wrapper {{ - width: 200px; - height: 200px; - overflow: hidden; - }} - - .img-wrapper img {{ - width: 100%; - height: auto; - }} - - .text-wrapper {{ - flex: 1; - padding: 20px; - }} - - h2 {{ - font-size: 24px; - margin-bottom: 10px; - font-family: Arial, sans-serif; - }} - - p {{ - font-size: 16px; - margin-bottom: 20px; - font-family: Arial, sans-serif; - }} - a{{ - font-family: Arial, sans-serif; - }} - - .button {{ - display: inline-block; - padding: 10px 20px; - border-radius: 20px; - background-color: #0077b5; - color: #fff; - text-decoration: none; - transition: background-color 0.2s ease; - }} - - .button:hover {{ - background-color: #005ea3; - }} - - @media only screen and (max-width: 768px) {{ - .info-card {{ - flex-direction: column; - align-items: flex-start; - }} - .img-wrapper {{ - width: 100%; - height: 250px; - margin-bottom: 20px; - }} - .text-wrapper {{ - width: 100%; - white-space: pre-line; - }} - }} - </style> - <div class="info-card"> - <div class="img-wrapper"> - <img src="{profile_pic}" alt="{name}"> - </div> - <div class="text-wrapper"> - <h2>{name}</h2> - <p>{description}</p> - <a class="button" href="https://www.youtube.com/channel/{youtube_channel_id}">YouTube Channel</a> - </div> - </div> - """ - # Return the HTML string - return html - - -def generate_subscriber_info_card(subscriber_count): - formatted_sub_count = "{:,.0f}".format(int(subscriber_count)) - html_template = f''' - <div style="display: flex; justify-content: center; align-items: center; height: 100vh; background-color: #F7FAFC;"> - <div style="background-color: #ffffff; border-radius: 10px; padding: 20px; box-shadow: 0 0 10px rgba(0, 0, 0, 0.2);"> - <h2 style="font-family: 'Helvetica Neue', sans-serif; font-size: 72px; font-weight: bold; margin-bottom: 10px; text-align: center;">{formatted_sub_count}</h2> - <p style="font-family: 'Helvetica Neue', sans-serif; font-size: 24px; margin-bottom: 0; text-align: center;">Subscribers</p> - </div> - </div> - ''' - return html_template - -def generate_full_table_button(url): - button_html = """ - <style> - .btn { - display: inline-block; - font-weight: 400; - text-align: center; - white-space: nowrap; - vertical-align: middle; - user-select: none; - border: 1px solid transparent; - padding: .375rem .75rem; - font-size: 1rem; - line-height: 1.5; - border-radius: .25rem; - transition: color .15s ease-in-out, - background-color .15s ease-in-out, - border-color .15s ease-in-out, - box-shadow .15s ease-in-out; - color: #fff; - background-color: #007bff; - border-color: #007bff; - margin: 0 auto; - display: block; - max-width: 200px; - } - - .btn:hover { - color: #fff; - background-color: #0069d9; - border-color: #0062cc; - } - - .btn:focus, .btn.focus { - outline: 0; - box-shadow: 0 0 0 .2rem rgba(0, 123, 255, .5); - } - - .btn-lg { - font-size: 1.25rem; - line-height: 1.5; - padding: .3rem .75rem; - border-radius: .3rem; - } - </style> - <br> - - """ - button_html += f"""<a href="{url}" class="btn btn-primary btn-lg" role="button">View Full Table</a>""" - return button_html - -def generate_doctype_footer(): - return """ - </body> - </html> - """
\ No newline at end of file diff --git a/main.py b/main.py deleted file mode 100644 index 702eb9f..0000000 --- a/main.py +++ /dev/null @@ -1,168 +0,0 @@ -import os -import sys -import time - -import sql.graph_builder as gb -import sql.table_builder as tb -import fileutil as fs -import html_gen.html_gen as ht -from sql.sql_handler import SQLHandler -from webapi.holodex import HolodexAPI -from webapi.youtube import YouTubeAPI -from config import * - -def update_database(server: SQLHandler, data): - server.clear_table(TABLE_NAME) - server.reset_auto_increment(TABLE_NAME) - excluded_channels = fs.get_excluded_channels() - refresh_diff_table = fs.check_diff_refresh() - for channel in data: - channel_id = channel['id'] - if channel_id in excluded_channels: - continue - pfp = channel['photo'] - name = channel['english_name'] - sub_count = channel['subscriber_count'] - - if name is None: # if the channel doesn't have an english name, use the japanese name - name = channel['name'] - name = name.encode("ascii", "ignore").decode() - - data_row = f"'{channel_id}', '{pfp}','{name}', {sub_count}, '{time.strftime('%Y-%m-%d %H:%M:%S')}'" - server.create_table(DAY_DIFF_TABLE_NAME, DIFF_COLUMNS) - - # Difference should only be calculated every 24 hours - # If the channel is new then calculate now, else then make sure 24 hours has passed since last reading - if refresh_diff_table or not server.check_row_exists(DAY_DIFF_TABLE_NAME, "channel_id", channel_id): - if REFRESH_DAILY: - generate_channel_files() - if not server.check_row_exists(DAY_DIFF_TABLE_NAME, "channel_id", channel_id): - server.insert_data(DAY_DIFF_TABLE_NAME, - diff_columns, f"'{channel_id}', {sub_count}") - else: - server.update_row(DAY_DIFF_TABLE_NAME, "channel_id", channel_id, "sub_diff", sub_count) - server.insert_data(HISTORICAL_TABLE_NAME, table_columns, data_row) - - # make updates to the main table - server.insert_data(TABLE_NAME, table_columns, data_row) - name_table = "channel_"+channel_id.lower()+"_subscriber_data" - name_table = name_table.replace("-", "$") - server.create_table(name_table, TABLE_COLUMNS) - server.insert_data(name_table, table_columns, data_row) - # server.drop_table(name_table) - -def generate_individual_stats(server: SQLHandler, data): - for channel in data: - channel_id = channel['id'] - if channel_id in fs.get_excluded_channels(): - continue - name = channel['english_name'] - desc = channel['description'] - pfp = channel['photo'] - sub_count = channel['subscriber_count'] - sub_count_str = "{:,.0f}".format(int(sub_count)) - if name is None: - name = channel['name'] - name = name.encode("ascii", "ignore").decode() - print("GENERATING PAGE FOR "+name+"...") - - # Calculate table key name stored in the database - table_key = "channel_"+channel_id.lower()+"_subscriber_data" - table_key = table_key.replace("-", "$") - - # Generate missing directories - if not os.path.exists("stats"): - os.mkdir("stats") - if not os.path.exists("tables"): - os.mkdir("tables") - - with open(ROOT_STORAGE_PATH+"stats/"+name+".html", "w", encoding="utf-8") as file: - range_query = f"SELECT name, subscriber_count, timestamp FROM {table_key} WHERE timestamp >= DATE_SUB(CURRENT_DATE(), INTERVAL 7 DAY) ORDER BY timestamp DESC" - group_query = f"SELECT name, subscriber_count, timestamp FROM {table_key} GROUP BY DATE(timestamp) ORDER BY timestamp DESC" - file.write(ht.generate_meta_data(name + " - "+ META_DATA_TITLE, META_DATA_DESCRIPTION, META_PROFILE_PIC)+"\n"+ - ht.side_swipe_header(sub_count_str + " Subscribers", COLOR_THEME, ROOT_URL)+"\n"+ - ht.generate_info_card(name, channel_id, pfp, desc)+ - gb.generate_projection(server.host_name, server.username, server.password, DB_NAME, table_key, int(sub_count), TIMEZONE_STR)+ - gb.plot_subscriber_count_over_time(server.host_name, server.username, server.password, DB_NAME, table_name=table_key, gtitle=name+" Subscriber Trend", overrideQuery=group_query) + - ht.generate_html_divider("Recent Subscriber Data:") + - gb.plot_subscriber_count_over_time(server.host_name, server.username, server.password, DB_NAME, table_name=table_key, gtitle=name+" Last 7 Days Trend", overrideQuery=range_query, markers="lines+markers") + - tb.generate_individual_table(server.host_name, server.username, server.password, DB_NAME, table_key)+ - ht.generate_full_table_button(ROOT_URL+"/tables/"+name) + - ht.generate_doctype_footer() - ) - with open(ROOT_STORAGE_PATH+"tables/"+name+".html", "w", encoding="utf-8") as file: - file.write(ht.generate_meta_data(name + " - "+ META_DATA_TITLE, META_DATA_DESCRIPTION, META_PROFILE_PIC)+"\n"+ - ht.side_swipe_header(name, COLOR_THEME, ROOT_URL)+"\n"+ - ht.generate_info_card(name, channel_id, pfp, desc)+ - tb.generate_individual_table(server.host_name, server.username, server.password, DB_NAME, table_key, param="") + - ht.generate_doctype_footer()) - - -def main(mode=0): - address, user, password = fs.get_login_data() - server = SQLHandler(address, user, password, DB_NAME) - server.create_table(TABLE_NAME, TABLE_COLUMNS) - server.create_table(HISTORICAL_TABLE_NAME, TABLE_COLUMNS) - server.create_table(DAY_DIFF_TABLE_NAME, DIFF_COLUMNS) - excluded_channels = [] - data = [] - if mode == 0: - holodex_organizations = HOLODEX_ORG.split(",") - print("Running Holodex API Data Collection\n"+"Found "+str(len(holodex_organizations))+" organizations") - try: - for org in holodex_organizations: - hldex = HolodexAPI(fs.get_api_key("holodex_api_key"), member_count = ORG_MEMBER_COUNT, organization = org) - data += hldex.get_data_all_channels() - excluded_channels += hldex.get_exclude_channels() - except: - main(mode=1) - - elif mode == 1: - print("Running YouTube API Data Collection") - ytapi = YouTubeAPI(fs.get_api_key("youtube_api_key")) - data = ytapi.get_data_all_channels(fs.get_local_channels()) - - # Updating DB and generating HTML data - update_database(server, data) - excluded_channels += fs.get_excluded_channels() - with open(ROOT_STORAGE_PATH+"index.html", "w", encoding="utf-8") as file: - file.write(ht.generate_meta_data(META_DATA_TITLE, META_DATA_DESCRIPTION, META_PROFILE_PIC) + - ht.generate_title_banner(HEADER_TITLE, COLOR_THEME) + - gb.plot_subscriber_count_over_time(address, user, password, DB_NAME, HISTORICAL_TABLE_NAME, gtitle="Subscriber Count Over Time", exclude_channels=excluded_channels) + - "\n"+ht.generate_html_divider("Last Updated: "+time.strftime('%Y-%m-%d %H:%M:%S')+" "+ TIMEZONE) + - tb.sql_to_html_table(address, user, password, DB_NAME, TABLE_NAME, root_url=ROOT_URL) + - ht.build_footer_info(FOOTER_MESSAGE) + - ht.generate_doctype_footer()) - - # Generating individual pages - generate_individual_stats(server, data) - -def generate_channel_files(): - """ - Generates the channels.txt and exclude_channels.txt files based on Holodex listings - """ - if not UPDATE_LOCAL_RECORDS: - return - print("Running Channel Files Update") - active_channels = [] - exclude_channels = [] - if not os.path.exists("data"): - os.mkdir("data") - for org in HOLODEX_ORG.split(","): - hldex = HolodexAPI(fs.get_api_key("holodex_api_key"), member_count = ORG_MEMBER_COUNT, - organization = org) - hldex.get_data_all_channels() - active_channels += hldex.get_active_channels() - exclude_channels += hldex.get_exclude_channels() - with open("data/channels.txt", "w", encoding="utf-8") as file: - file.write("\n".join(active_channels)) - with open("data/exclude_channel.txt", "w", encoding="utf-8") as file: - file.write("\n".join(exclude_channels)) - print("Success! Channel Files Updated!") - -if __name__ == "__main2__": - MODE = 0 - if len(sys.argv) > 1: - MODE = int(sys.argv[1]) - ROOT_STORAGE_PATH = sys.argv[2] - main(MODE) diff --git a/nijitrack.py b/nijitrack.py new file mode 100644 index 0000000..3e7393e --- /dev/null +++ b/nijitrack.py @@ -0,0 +1,93 @@ +import os +import sys +import time + +import fileutil as fs +from sql.sql_handler import SQLHandler +from webapi.holodex import HolodexAPI +from webapi.youtube import YouTubeAPI +from decorators import * +import html_builders.builder as builder + + + + +CONFIG = fs.load_config("config.ini") +DATA_SETTING = fs.load_json_file("sql_table_config.json") + + +@log("Initializing Database") +def initialize_database(server: SQLHandler): + server.create_table(name = CONFIG["TABLES"]["live"], column = DATA_SETTING["LIVE_COLUMNS"]) + server.create_table(name = CONFIG["TABLES"]["historical"], column = DATA_SETTING["LIVE_COLUMNS"]) + server.create_table(name = CONFIG["TABLES"]["daily"], column = DATA_SETTING["DAILY_COLUMNS"]) + + +@log("Inserting Live Data into Database") +def record_subscriber_data(data: list): + def transform_sql_string(string: str) -> str: + return string.encode("ascii", "ignore").decode().replace("'", "''") + def record_diff_data(data_tuple: tuple): + refresh_daily = fs.check_diff_refresh() + if not server.check_row_exists(CONFIG["TABLES"]["daily"], "channel_id", channel_id): + # data_tuple = (channel_id, pfp, channel_name, sub_count, time.strftime('%Y-%m-%d %H:%M:%S')) + server.insert_row(CONFIG["TABLES"]["daily"], DATA_SETTING["DAILY_HEADER"], (data_tuple[0], data_tuple[3])) + server.insert_row(name = CONFIG["TABLES"]["historical"], column = DATA_SETTING["LIVE_HEADER"], data=data_tuple) + return + elif refresh_daily: + server.update_row(CONFIG["TABLES"]["daily"], "sub_count", sub_count, "channel_id", channel_id) + server.insert_row(name = CONFIG["TABLES"]["historical"], column = DATA_SETTING["LIVE_HEADER"], data=data_tuple) + + exclude_channels = fs.get_excluded_channels() + for channel in data: + channel_id = channel["id"] + if channel_id in exclude_channels: + continue + pfp = channel["photo"] + sub_count = channel["subscriber_count"] + channel_name = channel["english_name"] + if channel_name is None: + channel_name = channel["name"] + channel_name = transform_sql_string(channel_name) + data_tuple = (channel_id, pfp, channel_name, sub_count, time.strftime('%Y-%m-%d %H:%M:%S')) + server.insert_row(name = CONFIG["TABLES"]["live"], column = DATA_SETTING["LIVE_HEADER"], data=data_tuple) + record_diff_data(data_tuple) + +@log("Generating Indvidual Channel Pages") +def generate_individual_pages(server: SQLHandler, data: list): + if not os.path.exists("stats"): + os.mkdir("stats") + if not os.path.exists("tables"): + os.mkdir("tables") + for channel in data: + # builder.build_individual_page(server, CONFIG, channel) + builder.build_table_page(server, CONFIG, channel) + quit() + + + +@log("Running Holodex Generation") +def holodex_generation(server: SQLHandler): + holodex_organizations = DATA_SETTING["HOLODEX_ORGS"].split(",") + server.clear_table(CONFIG["TABLES"]["live"]) + server.reset_auto_increment(CONFIG["TABLES"]["live"]) + holodex = HolodexAPI(CONFIG["API"]["holodex"]) + for organization in holodex_organizations: + holodex.set_organization(organization) + record_subscriber_data(holodex.get_subscriber_data()) + return holodex.get_generated_channel_data() + + + + +if __name__ == "__main__": + server = SQLHandler(CONFIG["SQL"]["host"], CONFIG["SQL"]["user"], CONFIG["SQL"]["password"], CONFIG["SQL"]["database"]) + # initialize_database(server) + channel_data = holodex_generation(server) + generate_individual_pages(server, channel_data) + # builder.build_ranking_page(server, CONFIG) + + + + + diff --git a/sql/sql_handler.py b/sql/sql_handler.py index d349f64..a02db52 100644 --- a/sql/sql_handler.py +++ b/sql/sql_handler.py @@ -12,19 +12,18 @@ class SQLHandler: host_name, user_name, user_password) self._load_database(database_name) - def _create_server_connection(self, host_name: str, user_name: str, user_password: str, exclude=None) -> mysql.connector: + def _create_server_connection(self, host_name: str, user_name: str, user_password: str) -> mysql.connector: connection = None try: - connection = mysql.connector.connect( - host=host_name, - user=user_name, - passwd=user_password - ) + connection = mysql.connector.connect(host=host_name, user=user_name, passwd=user_password) print("MySQL Database connection successful") except Error as err: print(f"Error: '{err}'") return connection + def get_connection(self): + return self.connection + def _create_database(self, cursor: str, database_name: str): try: cursor.execute( @@ -48,75 +47,77 @@ class SQLHandler: print(err) exit(1) - def create_table(self, table_name: str, table_columns: str): + def create_table(self, name: str, column: str): cursor = self.connection.cursor() try: - cursor.execute(f"CREATE TABLE {table_name} ({table_columns})") - print(f"Table {table_name} created successfully") + cursor.execute(f"CREATE TABLE {name} ({column})") + print(f"Table {name} created successfully") except Error as err: print(err) - def insert_data(self, table_name: str, table_columns: str, data: str): + def insert_row(self, name: str, column: str, data: tuple): cursor = self.connection.cursor() try: - cursor.execute( - f"INSERT INTO {table_name} ({table_columns}) VALUES ({data})") + placeholders = ', '.join(['%s'] * len(data)) + query = f"INSERT INTO {name} ({column}) VALUES ({placeholders})" + cursor.execute(query, data) self.connection.commit() - print("Data inserted successfully") + print("Data Inserted:", data) except Error as err: print("Error inserting data") print(err) - def clear_table(self, table_name: str): + + def clear_table(self, name: str): cursor = self.connection.cursor() try: - cursor.execute(f"DELETE FROM {table_name}") + cursor.execute(f"DELETE FROM {name}") self.connection.commit() print("Table cleared successfully") except Error as err: print("Error clearing table") print(err) - def reset_auto_increment(self, table_name: str): + def reset_auto_increment(self, name: str): cursor = self.connection.cursor() try: - cursor.execute(f"ALTER TABLE {table_name} AUTO_INCREMENT = 1") + cursor.execute(f"ALTER TABLE {name} AUTO_INCREMENT = 1") self.connection.commit() print("Table reset successfully") except Error as err: print("Error resetting table") print(err) - def copy_rows_to_new_table(self, table_name: str, new_table_name: str, table_columns: str): + def copy_rows_to_new_table(self, name: str, new_name: str, column: str): cursor = self.connection.cursor() try: cursor.execute( - f"INSERT INTO {new_table_name} ({table_columns}) SELECT {table_columns} FROM {table_name}") + f"INSERT INTO {new_name} ({column}) SELECT {column} FROM {name}") cursor.execute( - f"ALTER TABLE {new_table_name} MODIFY COLUMN id INT AUTO_INCREMENT") + f"ALTER TABLE {new_name} MODIFY COLUMN id INT AUTO_INCREMENT") self.connection.commit() print("Rows copied successfully") except Error as err: print("Error copying rows") print(err) - def drop_table(self, table_name: str): + def drop_table(self, name: str): cursor = self.connection.cursor() try: - cursor.execute(f"DROP TABLE {table_name}") + cursor.execute(f"DROP TABLE {name}") self.connection.commit() print("Table dropped successfully") except Error as err: print("Error dropping table") print(err) - def check_row_exists(self, table_name: str, column_name: str, value: str): + def check_row_exists(self, name: str, column_name: str, value: str): """ Checks if a row exists in a table """ cursor = self.connection.cursor() try: - cursor.execute(f"SELECT * FROM {table_name} WHERE {column_name} = '{value}'") + cursor.execute(f"SELECT * FROM {name} WHERE {column_name} = '{value}'") result = cursor.fetchone() if result: return True @@ -126,15 +127,36 @@ class SQLHandler: print("Error checking row") print(err) - def update_row(self, table_name: str, column_name: str, search_val: str, replace_col:str, new_value: str): + def update_row(self, name: str, column_name: str, search_val: str, replace_col:str, new_value: str): """ Updates a row in a table """ cursor = self.connection.cursor() try: - cursor.execute(f"UPDATE {table_name} SET {replace_col} = '{new_value}' WHERE {column_name} = '{search_val}'") + cursor.execute(f"UPDATE {name} SET {replace_col} = '{new_value}' WHERE {column_name} = '{search_val}'") self.connection.commit() print("Row updated successfully") except Error as err: print("Error updating row") - print(err)
\ No newline at end of file + print(err) + + def execute_query(self, query: str): + cursor = self.connection.cursor() + try: + cursor.execute(query) + result = cursor.fetchall() + return result + except Error as err: + print("Error executing query") + print(err) + + def get_query_result(self, query: str): + cursor = self.connection.cursor() + try: + cursor.execute(query) + result = cursor.fetchall() + return result + except Error as err: + print("Error executing query") + print(err) +
\ No newline at end of file diff --git a/sql/table_builder.py b/sql/table_builder.py deleted file mode 100644 index e5fbb57..0000000 --- a/sql/table_builder.py +++ /dev/null @@ -1,252 +0,0 @@ -import mysql.connector -from mysql.connector import errorcode - -def sql_to_html_table(host, username, password, database_name, table_name, - diff_table="24h_historical", headers=["Rank", "Liver", "Subscribers", "Difference (24hr)"], root_url="https://nijitracker.com"): - try: - cnx = mysql.connector.connect(user=username, password=password, - host=host, database=database_name) - except mysql.connector.Error as err: - if err.errno == errorcode.ER_ACCESS_DENIED_ERROR: - print("Something is wrong with your user name or password") - elif err.errno == errorcode.ER_BAD_DB_ERROR: - print("Database does not exist") - else: - print(err) - - cursor = cnx.cursor() - query = f"SELECT id, channel_id, name, subscriber_count, timestamp, profile_pic FROM {table_name} ORDER by subscriber_count DESC" - cursor.execute(query) - data = cursor.fetchall() - table = "<table>" - header = "<tr>" - header_cols = headers - for col in header_cols: - header += f"<th>{col}</th>" - header += "</tr>" - table += header - rank = 1 - for row in data: - table += "<tr>" - table += f"<td>{rank}</td>" - rank += 1 - for i, col in enumerate(row): - if cursor.description[i][0] == "name": - channel_url = f"{root_url}/stats/{row[2]}" - profile_pic_url = row[5] - table += f"<td><a href='{channel_url}'><img src='{profile_pic_url}' height='50px' width='50px'>{col}</a></td>" - elif cursor.description[i][0] == "subscriber_count": - formatted_sub_count = "{:,.0f}".format(int(col)) - table += f"<td>{formatted_sub_count}</td>" - elif cursor.description[i][0] == "timestamp": - query = f"SELECT sub_diff FROM {diff_table} WHERE channel_id = '{row[1]}'" - try: - diff_cursor = cnx.cursor() - diff_cursor.execute(query) - diff_data = diff_cursor.fetchall() - old_sub_count = int(diff_data[0][0]) - current_sub_count = int(row[3]) - if old_sub_count > current_sub_count: - difference = f"-{old_sub_count - current_sub_count}" - else: - difference = f"+{current_sub_count - old_sub_count}" - table += f"<td>{difference}</td>" - except IndexError: - raise Exception("Are you trying to use a new set of channels?\nPlease delete last_refresh.txt in data folder first!") - elif cursor.description[i][0] not in ["id", "channel_id", "profile_pic"]: - table += f"<td>{col}</td>" - - - table += "</tr>" - table += "</table>" - style = "<style>\ - table {\ - font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif;\ - font-size: 16px;\ - border-collapse: separate;\ - border-spacing: 0;\ - width: 100%;\ - max-width: 1570px;\ - margin: 0 auto;\ - background-color: #fff;\ - border-radius: 5px;\ - overflow: hidden;\ - box-shadow: 0px 0px 20px rgba(0, 0, 0, 0.1);\ - }\ - th, td {\ - text-align: left;\ - padding: 12px 15px;\ - font-size: 18px;\ - border-bottom: 1px solid #ddd;\ - }\ - th {\ - background-color: #2a4b71;\ - color: #fff;\ - font-weight: bold;\ - text-transform: uppercase;\ - letter-spacing: 0.03em;\ - }\ - td:nth-child(3), td:nth-child(4) {\ - text-align: left;\ - padding: 12px 15px;\ - font-size: 18px;\ - border-bottom: 1px solid #ddd;\ - }\ - tbody tr:nth-child(even) {\ - background-color: #f2f2f2;\ - }\ - tbody tr:hover {\ - background-color: #ddd;\ - }\ - a {\ - color: #3c8dbc;\ - text-decoration: none;\ - }\ - </style>" - style += "<style>\ - @media screen and (max-width: 1024px) {\ - table {\ - font-size: 14px;\ - }\ - th, td {\ - padding: 8px 10px;\ - font-size: 16px;\ - }\ - }\ - @media screen and (max-width: 768px) {\ - th, td {\ - padding: 5px 8px;\ - font-size: 14px;\ - }\ - }\ - @media screen and (max-width: 600px) {\ - th, td {\ - padding: 3px 5px;\ - font-size: 10px;\ - }\ - }\ - @media screen and (max-width: 400px) {\ - th, td {\ - padding: 2px 4px;\ - font-size: 8px;\ - }\ - }\ - </style>" - - - cursor.close() - cnx.close() - return table + style - - -def generate_individual_table(host, username, password, database_name, table_name, param="LIMIT 7"): - try: - cnx = mysql.connector.connect(user=username, password=password, - host=host, database=database_name) - except mysql.connector.Error as err: - if err.errno == errorcode.ER_ACCESS_DENIED_ERROR: - print("Something is wrong with your user name or password") - elif err.errno == errorcode.ER_BAD_DB_ERROR: - print("Database does not exist") - else: - print(err) - - cursor = cnx.cursor() - query = f"SELECT subscriber_count, timestamp FROM {table_name} GROUP BY DATE(timestamp) ORDER by timestamp DESC " + param - - cursor.execute(query) - data = cursor.fetchall() - table = "<table>" - header = "<tr>" - header_cols = ["Subscribers", "Timestamp"] - for col in header_cols: - header += f"<th>{col}</th>" - header += "</tr>" - table += header - for row in data: - table += "<tr>" - for i, col in enumerate(row): - if cursor.description[i][0] == "subscriber_count": - formatted_sub_count = "{:,.0f}".format(int(col)) - table += f"<td>{formatted_sub_count}</td>" - else: - table += f"<td>{col}</td>" - table += "</tr>" - table += "</table>" - style = "<style>\ - table {\ - font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif;\ - font-size: 16px;\ - border-collapse: separate;\ - border-spacing: 0;\ - width: 100%;\ - max-width: 1570px;\ - margin: 0 auto;\ - background-color: #fff;\ - border-radius: 5px;\ - overflow: hidden;\ - box-shadow: 0px 0px 20px rgba(0, 0, 0, 0.1);\ - }\ - th, td {\ - text-align: left;\ - padding: 12px 15px;\ - font-size: 18px;\ - border-bottom: 1px solid #ddd;\ - }\ - th {\ - background-color: #2a4b71;\ - color: #fff;\ - font-weight: bold;\ - text-transform: uppercase;\ - letter-spacing: 0.03em;\ - }\ - td:nth-child(3), td:nth-child(4) {\ - text-align: left;\ - padding: 12px 15px;\ - font-size: 18px;\ - border-bottom: 1px solid #ddd;\ - }\ - tbody tr:nth-child(even) {\ - background-color: #f2f2f2;\ - }\ - tbody tr:hover {\ - background-color: #ddd;\ - }\ - a {\ - color: #3c8dbc;\ - text-decoration: none;\ - }\ - </style>" - style += "<style>\ - @media screen and (max-width: 1024px) {\ - table {\ - font-size: 14px;\ - }\ - th, td {\ - padding: 8px 10px;\ - font-size: 16px;\ - }\ - }\ - @media screen and (max-width: 768px) {\ - th, td {\ - padding: 5px 8px;\ - font-size: 14px;\ - }\ - }\ - @media screen and (max-width: 600px) {\ - th, td {\ - padding: 3px 5px;\ - font-size: 12px;\ - }\ - }\ - @media screen and (max-width: 400px) {\ - th, td {\ - padding: 2px 4px;\ - font-size: 10px;\ - }\ - }\ - </style>" - - cursor.close() - cnx.close() - return table + style diff --git a/sql_table_config.json b/sql_table_config.json new file mode 100644 index 0000000..7aea426 --- /dev/null +++ b/sql_table_config.json @@ -0,0 +1,7 @@ +{ + "LIVE_COLUMNS": "id INT PRIMARY KEY AUTO_INCREMENT, channel_id VARCHAR(255), profile_pic VARCHAR(255), name VARCHAR(255), subscriber_count INT, timestamp DATETIME", + "LIVE_HEADER": "channel_id, profile_pic, name, subscriber_count, timestamp", + "DAILY_COLUMNS": "id INT PRIMARY KEY AUTO_INCREMENT, channel_id VARCHAR(255), sub_diff INT", + "DAILY_HEADER": "channel_id, sub_diff", + "HOLODEX_ORGS": "Phase%20Connect" +} diff --git a/webapi/holodex.py b/webapi/holodex.py index 6a5dc93..5f81892 100644 --- a/webapi/holodex.py +++ b/webapi/holodex.py @@ -1,4 +1,5 @@ from webapi.web_api import WebAPI +from typing import Iterable class HolodexAPI(WebAPI): @@ -6,62 +7,69 @@ class HolodexAPI(WebAPI): Class for interacting with the Holodex API """ - def __init__(self, api_key: str = None, member_count: int = 300, - organization: str = "Nijisanji"): - super().__init__(api_key = api_key, base_url = "https://holodex.net/api/v2/") + def __init__(self,api_key: str = None,member_count: int = 300,organization: str = "Nijisanji"): + super().__init__(api_key=api_key, base_url="https://holodex.net/api/v2/") self.member_count = member_count self.organization = organization self._inactive_channels = [] - self._active_channels = [] + self._channel_data = [] - def get_data_all_channels(self) -> list: + def get_subscriber_data(self) -> Iterable: """ Gets data for all channels in a particular organization """ members = self.member_count data = [] - filtered_data = [] + active_channels = [] offset = 0 while members > 0: data += self._download_url( - f"channels?type=vtuber&offset={offset}&limit=100&org={self.organization}") + f"channels?type=vtuber&offset={offset}&limit=100&org={self.organization}" + ) members -= 100 offset += 100 for channel in data: - print(channel['name']) - if channel['inactive'] is False: - try: - self._active_channels.append(channel['id'] + "," + channel['english_name']) - except (KeyError, TypeError, ValueError): - self._active_channels.append(channel['id'] + "," + channel['name']) - channel['description'] = self.get_channel_description(channel['id']) - filtered_data.append(channel) - else: - self._inactive_channels.append(channel['id']) - return filtered_data - - def get_exclude_channels(self) -> list: - """ - Gets the list of excluded channels - """ - return self._inactive_channels + print("DEBUG: ", channel["id"]) + try: + channel["description"] = self.get_channel_description(channel["id"]) + if channel["inactive"]: + self._inactive_channels.append(channel["id"]) + continue + active_channels.append(channel) + except (KeyError, TypeError, ValueError): + print("DEBUG:","An error occured with parsing ", channel["id"], channel["name"]) + continue + self._channel_data = active_channels + return active_channels def get_view_count(self, channel_id: str) -> int: """ Gets the view count for a particular channel """ data = self._download_url(f"channels/{channel_id}") - return data['view_count'] + return data["view_count"] def get_channel_description(self, channel_id: str) -> str: """ Gets the description for a particular channel """ data = self._download_url(f"channels/{channel_id}") - return data['description'] + return data["description"] + + def set_organization(self, organization: str): + """ + Sets the organization for the API + """ + self.organization = organization + + def get_inactive_channels(self) -> list: + """ + Gets the list of inactive channels + """ + return self._inactive_channels - def get_active_channels(self) -> list: + def get_generated_channel_data(self) -> list: """ - Gets the list of active channels + Gets the list of channel data """ - yield from self._active_channels + return self._channel_data |
