diff options
| -rw-r--r-- | nijitrack.py | 14 | ||||
| -rw-r--r-- | sql_table_config.json | 23 | ||||
| -rw-r--r-- | twitch.py | 79 |
3 files changed, 106 insertions, 10 deletions
diff --git a/nijitrack.py b/nijitrack.py index fd2c76b..b9706f3 100644 --- a/nijitrack.py +++ b/nijitrack.py @@ -4,6 +4,7 @@ from datetime import datetime import dotenv import pytz +import twitch from b2sdk.v2 import * from logger import * from sql.pg_handler import PostgresHandler @@ -35,6 +36,7 @@ def initialize_database(server: PostgresHandler): server.create_table(name = DATA_SETTING["TABLE_LIVE"], column = DATA_SETTING["LIVE_COLUMNS"]) server.create_table(name = DATA_SETTING["TABLE_HISTORICAL"], column = DATA_SETTING["HISTORICAL_COLUMNS"]) server.create_table(name = DATA_SETTING["TABLE_DAILY"], column = DATA_SETTING["DAILY_COLUMNS"]) + server.create_table(name = DATA_SETTING["TABLE_TWITCH"], column = DATA_SETTING["TWITCH_COLUMNS"]) @track_task_time("Inserting Live Data into Database") @@ -98,6 +100,18 @@ def record_subscriber_data(data: list, force_refresh: bool = False): data_tuple = (channel_id, pfp, channel_name, sub_count, sub_org, video_count, view_count, formatted_time) historical_data_tuple = (channel_id, pfp, channel_name, sub_count, formatted_time) server.insert_row(table_name = DATA_SETTING["TABLE_LIVE"], column = DATA_SETTING["LIVE_HEADER"], data=data_tuple) + twitch_name = twitch.youtube_to_twitch_map.get(channel_id, None) + if twitch_name: + follower_count = twitch.get_followers_total(twitch_name) + if follower_count is None: + print("[TWITCH] Failed to get follower count attempting to scrape", twitch_name) + follower_count = twitch.get_total_follower_count_scrape(twitch_name) + if follower_count: + print(f"[TWITCH] Got follower count for {channel_name} -> {follower_count}") + server.delete_row(table_name = DATA_SETTING["TABLE_TWITCH"], column = "channel_id", value = channel_id) + server.insert_row(table_name = DATA_SETTING["TABLE_TWITCH"], column = DATA_SETTING["TWITCH_HEADER"], data=(channel_id, follower_count)) + else: + print(f"[TWITCH] Failed to get follower count for {channel_name}. Likely too low") update_data_records(historical_data_tuple, should_update_historical_data) diff --git a/sql_table_config.json b/sql_table_config.json index a4be574..45dc6e7 100644 --- a/sql_table_config.json +++ b/sql_table_config.json @@ -1,12 +1,15 @@ { - "TABLE_LIVE": "subscriber_data", - "TABLE_HISTORICAL": "subscriber_data_historical", - "TABLE_DAILY": "24h_historical", - "LIVE_COLUMNS": "id SERIAL PRIMARY KEY, channel_id VARCHAR(255), profile_pic VARCHAR(255), name VARCHAR(255), subscriber_count INT, suborg VARCHAR(255), video_count INT, view_count INT, timestamp TIMESTAMP", - "LIVE_HEADER": "channel_id, profile_pic, name, subscriber_count, suborg, video_count, view_count, timestamp", - "DAILY_COLUMNS": "id SERIAL PRIMARY KEY, channel_id VARCHAR(255), sub_diff INT", - "DAILY_HEADER": "channel_id, sub_diff", - "HISTORICAL_COLUMNS": "id SERIAL PRIMARY KEY, channel_id VARCHAR(255), profile_pic VARCHAR(255), name VARCHAR(255), subscriber_count INT, timestamp TIMESTAMP", - "HISTORICAL_HEADER": "channel_id, profile_pic, name, subscriber_count, timestamp", - "HOLODEX_ORGS": "Phase%20Connect" + "TABLE_LIVE": "subscriber_data", + "TABLE_HISTORICAL": "subscriber_data_historical", + "TABLE_DAILY": "24h_historical", + "TABLE_TWITCH": "twitch_stats", + "LIVE_COLUMNS": "id SERIAL PRIMARY KEY, channel_id VARCHAR(255), profile_pic VARCHAR(255), name VARCHAR(255), subscriber_count INT, suborg VARCHAR(255), video_count INT, view_count INT, timestamp TIMESTAMP", + "LIVE_HEADER": "channel_id, profile_pic, name, subscriber_count, suborg, video_count, view_count, timestamp", + "DAILY_COLUMNS": "id SERIAL PRIMARY KEY, channel_id VARCHAR(255), sub_diff INT", + "DAILY_HEADER": "channel_id, sub_diff", + "HISTORICAL_COLUMNS": "id SERIAL PRIMARY KEY, channel_id VARCHAR(255), profile_pic VARCHAR(255), name VARCHAR(255), subscriber_count INT, timestamp TIMESTAMP", + "HISTORICAL_HEADER": "channel_id, profile_pic, name, subscriber_count, timestamp", + "TWITCH_COLUMNS": "channel_id VARCHAR(255) PRIMARY KEY, follower_count INT", + "TWITCH_HEADER": "channel_id, follower_count", + "HOLODEX_ORGS": "Phase%20Connect" } diff --git a/twitch.py b/twitch.py new file mode 100644 index 0000000..9ede9bb --- /dev/null +++ b/twitch.py @@ -0,0 +1,79 @@ +import requests +from bs4 import BeautifulSoup +import time + + +youtube_to_twitch_map = { + "UC1cExET9xoWSO9iSnRsW_1Q": "michiru_shisui", + "UC3K7pmiHsNSx1y0tdx2bbCw": "tenma", + "UCB7sSUNwh_dXE7ZL3DsGDpw": "utatanenasa", + "UCg7sW-h1PUowdiR5K4HlBew": "asheliarinkou", + "UCJ46YTYBQVXsfsp8-HryoUA": "pippa", + "UCN5bD1YYapThOeadG7YkBOA": "iorihakushika", + "UC0w_dvkIwnXzMak6gfeioRQ": "emberamane", + "UC98iRMvRqUxRD6GP4NtRskw": "dizzydokuro", + "UCkb-r702uhx4-6Lrmetp-Ow": "jellyhoshiumi", + "UCx_zwZuGIS4jxO07kFk8G6Q": "kanekolumi", + "UC3aEtHpGzCFvoSn_wRWzgZQ": "EepySleepy", + "UCG5vZgELi3on_pksaLrIoxw": "marimari_en", + "UCrGTSXWMiAWoPlowSuMVZ4Q": "clioaite", + "UC-hMwvRuMsQrfgu0DPKLV2A": "remilianephys", + "UCJ4O6PWA47f6XbCgrLQNqEQ": "himemiyarie", + "UCnJNNk45O1QYS2oMRYFKSyw": "amanogawashiina", + "UCVo_KgPNsDKxHwzib7uarCw": "komachipanko", + "UCejbicoRnQjCOdAPAv5JPwg": "muumuyu", + "UCoAQsc-DQ0MjfTp059otQAw": "RunieRuse", + "UC-tBLCGTheczDn5mYNoNWTQ": "eimiisami", + "UCXDytlJU6RL8D68VrPZGyIA": "HikanariHina", + "UCGXwv2zYOxeWiNNyPiLCBCQ": "kokoromomemory", + "UCnNLZWjl4GvVF4s8zBT9_kA": "ayaseyuu_", + "UCRlZaszk84YXjRtdPCxqjvw": "kaminariclara", + "UCtWH0tVAcUcSm4v96H5cAqQ": "grampico" +} + +def get_followers_total(channel_name): + url = f"https://twitchtracker.com/api/channels/summary/{channel_name}" + response = requests.get(url) + if response.status_code == 200: + data = response.json() + return int(data.get("followers_total")) if "followers_total" in data else None + else: + return None + + +def get_total_follower_count_scrape(username: str) -> int: + url = f"https://twitchtracker.com/{username}" + headers = { + "User-Agent": ( + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " + "AppleWebKit/537.36 (KHTML, like Gecko) " + "Chrome/124.0.0.0 Safari/537.36" + ) + } + + response = requests.get(url, headers=headers, timeout=10) + response.raise_for_status() + html = response.text + + soup = BeautifulSoup(html, 'html.parser') + for li in soup.find_all("li", class_="list-group-item"): + label = li.find("div", style=lambda val: val and "font-size:12px" in val) + if label and label.text.strip().lower() == "followers": + number_span = li.find("span", class_="to-number") + if number_span and number_span.text: + try: + print("[TWITCH_SCRAPE] Forced cooldown 5 seconds") + time.sleep(5) + return int(number_span.text.replace(",", "").strip()) + except ValueError: + continue + follower_blocks = soup.find_all(string=lambda text: text and "Followers" in text) + for text in follower_blocks: + parent = text.find_parent() + if parent: + number = parent.find_next("span", class_="to-number") + if number and number.text.replace(",", "").strip().isdigit(): + print("[TWITCH_SCRAPE] Forced cooldown 5 seconds") + time.sleep(5) + return int(number.text.replace(",", "").strip()) + return None |
