aboutsummaryrefslogtreecommitdiffstats
path: root/vod_parser.py
blob: 6e02b61c52f2dec8da8dc0abc3dcebc6eda8a200 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import os
import json
from collections import defaultdict
from datetime import datetime

LIVE_CHAT_RENDERERS = ["liveChatViewerEngagementMessageRenderer", "liveChatTextMessageRenderer"]

class VodParser:
    def __init__(self, video_id: str, data_writer, theshold: int = 1, cooldown: float = 5, keywords: list = [], start_unix_time: int = 0):
        self.video_id = video_id
        self._chat_file = f"{self.video_id}.live_chat.json"
        self._threshold = theshold
        self._cooldown = cooldown
        self._keywords = keywords
        self._start_unix_time = start_unix_time
        self.data_writer = data_writer

    def download_live_chat(self):
        if os.path.exists(f"{self.video_id}.live_chat.json"):
            os.remove(f"{self.video_id}.live_chat.json")
        os.system(f"yt-dlp {self.video_id} --skip-download -o '%(id)s' --write-sub --")
        self._chat_file = f"{self.video_id}.live_chat.json"
    
    def clean_up(self):
        os.remove(f"{self.video_id}.live_chat.json")
    
    
    def parse_chat(self):
        message_count_buffer = 0
        last_refresh_time = 0
        with open(self._chat_file, "r") as ndjson_file:
            for line in ndjson_file:
                chat_data = json.loads(line)
                for renderer in LIVE_CHAT_RENDERERS:
                    try:
                        timestamp_usec = chat_data["replayChatItemAction"]["actions"][0]["addChatItemAction"]["item"][renderer]["timestampUsec"]
                        unix_timestamp = int(timestamp_usec) / 1000000

                        if last_refresh_time == 0 or unix_timestamp < last_refresh_time:
                            last_refresh_time = unix_timestamp
                            message_count_buffer += 1
                            continue
                            
                        if unix_timestamp - last_refresh_time > self._cooldown:
                            message_rate = message_count_buffer / (unix_timestamp - last_refresh_time)
                            if message_rate > self._threshold:
                                print(f"Message rate exceeded threshold: {message_rate} messages per second")
                                seconds_since_start_of_stream = unix_timestamp - self._start_unix_time
                                youtube_timestamp = datetime.utcfromtimestamp(seconds_since_start_of_stream).strftime('%H:%M:%S')
                                print(f"Timestamp: {youtube_timestamp}")
                                self.data_writer.write(f"Notable moment found at {youtube_timestamp}\n")


                            message_count_buffer = 0
                            last_refresh_time = unix_timestamp
                        message_count_buffer += 1
                        
                    except KeyError:
                        continue


if __name__ == "__main__":
    parser = VodParser("RsYA5heZSk8")
    parser.parse_chat()
send patches to the email below
yukais@pinapelz.com
include the subject [PATCH repo_name]
pinapelz.com
homepage