summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPinapelz <yukais@pinapelz.com>2025-05-05 22:51:05 -0700
committerPinapelz <yukais@pinapelz.com>2025-05-05 22:55:35 -0700
commitba43fe649d929ef596cdbc81c333193facf2f695 (patch)
tree9856d6edb1a3458dc9340ebb62a4479f602ac665
parente6f407d85a2acd610f9c336a3d60cdd084aeb02c (diff)
phase_tracker_only: twitch follower data collection
-rw-r--r--nijitrack.py14
-rw-r--r--sql_table_config.json23
-rw-r--r--twitch.py79
3 files changed, 106 insertions, 10 deletions
diff --git a/nijitrack.py b/nijitrack.py
index fd2c76b..b9706f3 100644
--- a/nijitrack.py
+++ b/nijitrack.py
@@ -4,6 +4,7 @@ from datetime import datetime
import dotenv
import pytz
+import twitch
from b2sdk.v2 import *
from logger import *
from sql.pg_handler import PostgresHandler
@@ -35,6 +36,7 @@ def initialize_database(server: PostgresHandler):
server.create_table(name = DATA_SETTING["TABLE_LIVE"], column = DATA_SETTING["LIVE_COLUMNS"])
server.create_table(name = DATA_SETTING["TABLE_HISTORICAL"], column = DATA_SETTING["HISTORICAL_COLUMNS"])
server.create_table(name = DATA_SETTING["TABLE_DAILY"], column = DATA_SETTING["DAILY_COLUMNS"])
+ server.create_table(name = DATA_SETTING["TABLE_TWITCH"], column = DATA_SETTING["TWITCH_COLUMNS"])
@track_task_time("Inserting Live Data into Database")
@@ -98,6 +100,18 @@ def record_subscriber_data(data: list, force_refresh: bool = False):
data_tuple = (channel_id, pfp, channel_name, sub_count, sub_org, video_count, view_count, formatted_time)
historical_data_tuple = (channel_id, pfp, channel_name, sub_count, formatted_time)
server.insert_row(table_name = DATA_SETTING["TABLE_LIVE"], column = DATA_SETTING["LIVE_HEADER"], data=data_tuple)
+ twitch_name = twitch.youtube_to_twitch_map.get(channel_id, None)
+ if twitch_name:
+ follower_count = twitch.get_followers_total(twitch_name)
+ if follower_count is None:
+ print("[TWITCH] Failed to get follower count attempting to scrape", twitch_name)
+ follower_count = twitch.get_total_follower_count_scrape(twitch_name)
+ if follower_count:
+ print(f"[TWITCH] Got follower count for {channel_name} -> {follower_count}")
+ server.delete_row(table_name = DATA_SETTING["TABLE_TWITCH"], column = "channel_id", value = channel_id)
+ server.insert_row(table_name = DATA_SETTING["TABLE_TWITCH"], column = DATA_SETTING["TWITCH_HEADER"], data=(channel_id, follower_count))
+ else:
+ print(f"[TWITCH] Failed to get follower count for {channel_name}. Likely too low")
update_data_records(historical_data_tuple, should_update_historical_data)
diff --git a/sql_table_config.json b/sql_table_config.json
index a4be574..45dc6e7 100644
--- a/sql_table_config.json
+++ b/sql_table_config.json
@@ -1,12 +1,15 @@
{
- "TABLE_LIVE": "subscriber_data",
- "TABLE_HISTORICAL": "subscriber_data_historical",
- "TABLE_DAILY": "24h_historical",
- "LIVE_COLUMNS": "id SERIAL PRIMARY KEY, channel_id VARCHAR(255), profile_pic VARCHAR(255), name VARCHAR(255), subscriber_count INT, suborg VARCHAR(255), video_count INT, view_count INT, timestamp TIMESTAMP",
- "LIVE_HEADER": "channel_id, profile_pic, name, subscriber_count, suborg, video_count, view_count, timestamp",
- "DAILY_COLUMNS": "id SERIAL PRIMARY KEY, channel_id VARCHAR(255), sub_diff INT",
- "DAILY_HEADER": "channel_id, sub_diff",
- "HISTORICAL_COLUMNS": "id SERIAL PRIMARY KEY, channel_id VARCHAR(255), profile_pic VARCHAR(255), name VARCHAR(255), subscriber_count INT, timestamp TIMESTAMP",
- "HISTORICAL_HEADER": "channel_id, profile_pic, name, subscriber_count, timestamp",
- "HOLODEX_ORGS": "Phase%20Connect"
+ "TABLE_LIVE": "subscriber_data",
+ "TABLE_HISTORICAL": "subscriber_data_historical",
+ "TABLE_DAILY": "24h_historical",
+ "TABLE_TWITCH": "twitch_stats",
+ "LIVE_COLUMNS": "id SERIAL PRIMARY KEY, channel_id VARCHAR(255), profile_pic VARCHAR(255), name VARCHAR(255), subscriber_count INT, suborg VARCHAR(255), video_count INT, view_count INT, timestamp TIMESTAMP",
+ "LIVE_HEADER": "channel_id, profile_pic, name, subscriber_count, suborg, video_count, view_count, timestamp",
+ "DAILY_COLUMNS": "id SERIAL PRIMARY KEY, channel_id VARCHAR(255), sub_diff INT",
+ "DAILY_HEADER": "channel_id, sub_diff",
+ "HISTORICAL_COLUMNS": "id SERIAL PRIMARY KEY, channel_id VARCHAR(255), profile_pic VARCHAR(255), name VARCHAR(255), subscriber_count INT, timestamp TIMESTAMP",
+ "HISTORICAL_HEADER": "channel_id, profile_pic, name, subscriber_count, timestamp",
+ "TWITCH_COLUMNS": "channel_id VARCHAR(255) PRIMARY KEY, follower_count INT",
+ "TWITCH_HEADER": "channel_id, follower_count",
+ "HOLODEX_ORGS": "Phase%20Connect"
}
diff --git a/twitch.py b/twitch.py
new file mode 100644
index 0000000..9ede9bb
--- /dev/null
+++ b/twitch.py
@@ -0,0 +1,79 @@
+import requests
+from bs4 import BeautifulSoup
+import time
+
+
+youtube_to_twitch_map = {
+ "UC1cExET9xoWSO9iSnRsW_1Q": "michiru_shisui",
+ "UC3K7pmiHsNSx1y0tdx2bbCw": "tenma",
+ "UCB7sSUNwh_dXE7ZL3DsGDpw": "utatanenasa",
+ "UCg7sW-h1PUowdiR5K4HlBew": "asheliarinkou",
+ "UCJ46YTYBQVXsfsp8-HryoUA": "pippa",
+ "UCN5bD1YYapThOeadG7YkBOA": "iorihakushika",
+ "UC0w_dvkIwnXzMak6gfeioRQ": "emberamane",
+ "UC98iRMvRqUxRD6GP4NtRskw": "dizzydokuro",
+ "UCkb-r702uhx4-6Lrmetp-Ow": "jellyhoshiumi",
+ "UCx_zwZuGIS4jxO07kFk8G6Q": "kanekolumi",
+ "UC3aEtHpGzCFvoSn_wRWzgZQ": "EepySleepy",
+ "UCG5vZgELi3on_pksaLrIoxw": "marimari_en",
+ "UCrGTSXWMiAWoPlowSuMVZ4Q": "clioaite",
+ "UC-hMwvRuMsQrfgu0DPKLV2A": "remilianephys",
+ "UCJ4O6PWA47f6XbCgrLQNqEQ": "himemiyarie",
+ "UCnJNNk45O1QYS2oMRYFKSyw": "amanogawashiina",
+ "UCVo_KgPNsDKxHwzib7uarCw": "komachipanko",
+ "UCejbicoRnQjCOdAPAv5JPwg": "muumuyu",
+ "UCoAQsc-DQ0MjfTp059otQAw": "RunieRuse",
+ "UC-tBLCGTheczDn5mYNoNWTQ": "eimiisami",
+ "UCXDytlJU6RL8D68VrPZGyIA": "HikanariHina",
+ "UCGXwv2zYOxeWiNNyPiLCBCQ": "kokoromomemory",
+ "UCnNLZWjl4GvVF4s8zBT9_kA": "ayaseyuu_",
+ "UCRlZaszk84YXjRtdPCxqjvw": "kaminariclara",
+ "UCtWH0tVAcUcSm4v96H5cAqQ": "grampico"
+}
+
+def get_followers_total(channel_name):
+ url = f"https://twitchtracker.com/api/channels/summary/{channel_name}"
+ response = requests.get(url)
+ if response.status_code == 200:
+ data = response.json()
+ return int(data.get("followers_total")) if "followers_total" in data else None
+ else:
+ return None
+
+
+def get_total_follower_count_scrape(username: str) -> int:
+ url = f"https://twitchtracker.com/{username}"
+ headers = {
+ "User-Agent": (
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
+ "AppleWebKit/537.36 (KHTML, like Gecko) "
+ "Chrome/124.0.0.0 Safari/537.36"
+ )
+ }
+
+ response = requests.get(url, headers=headers, timeout=10)
+ response.raise_for_status()
+ html = response.text
+
+ soup = BeautifulSoup(html, 'html.parser')
+ for li in soup.find_all("li", class_="list-group-item"):
+ label = li.find("div", style=lambda val: val and "font-size:12px" in val)
+ if label and label.text.strip().lower() == "followers":
+ number_span = li.find("span", class_="to-number")
+ if number_span and number_span.text:
+ try:
+ print("[TWITCH_SCRAPE] Forced cooldown 5 seconds")
+ time.sleep(5)
+ return int(number_span.text.replace(",", "").strip())
+ except ValueError:
+ continue
+ follower_blocks = soup.find_all(string=lambda text: text and "Followers" in text)
+ for text in follower_blocks:
+ parent = text.find_parent()
+ if parent:
+ number = parent.find_next("span", class_="to-number")
+ if number and number.text.replace(",", "").strip().isdigit():
+ print("[TWITCH_SCRAPE] Forced cooldown 5 seconds")
+ time.sleep(5)
+ return int(number.text.replace(",", "").strip())
+ return None
send patches to the email below
yukais@pinapelz.com
include the subject [PATCH repo_name]
pinapelz.com
homepage