aboutsummaryrefslogtreecommitdiffstats
path: root/playlist_generator/generate_daily.py
blob: b810ba94cef803eced53552e385bcb8fa164a83d (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import os
import boto3
import json
import requests
import random
from dotenv import load_dotenv
from datetime import datetime, timezone
import yt_dlp
import time

load_dotenv()

ACCOUNT_ID = os.getenv("R2_ACCOUNT_ID")
ACCESS_KEY = os.getenv("R2_ACCESS_KEY")
SECRET_KEY = os.getenv("R2_SECRET_KEY")
BUCKET = os.getenv("R2_BUCKET")
API_URL = os.getenv("API_URL")
HEARDLE_SALT = (
    os.getenv("VITE_HEARDLE_SALT")
    or os.getenv("OBFUSCATION_KEY")
)


def xor_buffer(data: bytes, key: bytes) -> bytes:
    return bytes(b ^ key[i % len(key)] for i, b in enumerate(data))

def get_obfuscation_key(date: str) -> bytes:
    if not HEARDLE_SALT:
        raise ValueError(
            "Missing HEARDLE salt. Set VITE_HEARDLE_SALT (preferred) or OBFUSCATION_KEY."
        )
    return (HEARDLE_SALT + date).encode("utf-8")

def delete_file(file_path):
    if os.path.exists(file_path):
        os.remove(file_path)
        return True
    return False

def decode_data(hex_data: str, date: str):
    encrypted = bytes.fromhex(hex_data)
    key = get_obfuscation_key(date)
    decrypted = xor_buffer(encrypted, key)
    return json.loads(decrypted.decode("utf-8"))


def fetch_daily() -> dict:
    if not API_URL:
        raise ValueError("Missing API_URL in environment.")

    url = f"{API_URL}/today"
    response = requests.get(url, timeout=15)
    response.raise_for_status()
    payload = response.json()
    if "date" not in payload or "data" not in payload:
        raise ValueError(f"Unexpected /today response shape: {payload}")

    return payload


def download_random_segment_mp3(youtube_id: str, output_file="today.mp3") -> str:
    url = f"https://www.youtube.com/watch?v={youtube_id}"
    with yt_dlp.YoutubeDL({"quiet": True}) as ydl:
        info = ydl.extract_info(url, download=False)
        duration = info.get("duration", 60)
    start = 0 if duration <= 17 else random.randint(0, duration - 17)
    ydl_opts = {
        "format": "bestaudio/best",
        "outtmpl": output_file.replace(".mp3", ".%(ext)s"),
        "quiet": True,
        "postprocessors": [{
            "key": "FFmpegExtractAudio",
            "preferredcodec": "mp3",
            "preferredquality": "192",
        }],

        "download_ranges": lambda info, _: [
            {"start_time": start, "end_time": start + 17}
        ],
        "force_keyframes_at_cuts": True,
        "overwrites": True,
        "nopart": True,
    }
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        ydl.download([url])

    return output_file

def upload_to_r2(file_path: str, object_key: str):
    s3 = boto3.client(
        "s3",
        endpoint_url=f"https://{ACCOUNT_ID}.r2.cloudflarestorage.com",
        aws_access_key_id=ACCESS_KEY,
        aws_secret_access_key=SECRET_KEY,
        region_name="auto",
    )
    s3.upload_file(file_path, BUCKET, object_key)


def write_json(file_path, data):
    with open(file_path, "w", encoding="utf-8") as f:
        json.dump(data, f, indent=4, ensure_ascii=False)

def read_json(file_path, default=None):
    if default is None:
        default = {}
    if not os.path.exists(file_path):
        with open(file_path, "w", encoding="utf-8") as f:
            json.dump(default, f, indent=4, ensure_ascii=False)
        return default
    try:
        with open(file_path, "r", encoding="utf-8") as f:
            return json.load(f)
    except json.JSONDecodeError:
        with open(file_path, "w", encoding="utf-8") as f:
            json.dump(default, f, indent=4, ensure_ascii=False)
        return default

def main():
    new_data = False
    daily_data = fetch_daily()
    attempt = 0
    while not new_data:
        dumped_data = read_json("save.json")
        if dumped_data == daily_data:
            attempt += 1
            print(f"Server still returning old data, waiting... {attempt} ")
            time.sleep(5)
        else:
            new_data = True
        daily_data = fetch_daily()
    data = decode_data(daily_data["data"], daily_data["date"])
    print(data)
    youtube_id = data["youtubeId"]
    clip_path = download_random_segment_mp3(youtube_id)
    date = daily_data["date"]
    upload_to_r2(clip_path, f"kheardle/{date}.mp3")
    delete_file(clip_path)
    write_json("save.json", daily_data)

if __name__ == "__main__":
    main()
send patches to the email below
yukais@pinapelz.com
include the subject [PATCH repo_name]
pinapelz.com
homepage