{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [], "gpuType": "A100", "machine_shape": "hm" }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "accelerator": "GPU" }, "cells": [ { "cell_type": "markdown", "source": [ "# Auto Generate Subtitled MKV from Video" ], "metadata": { "id": "SXzFlDnXer6P" } }, { "cell_type": "code", "source": [ "#@title Install Dependencies\n", "!apt -y install ffmpeg\n", "!pip install -q demucs openai-whisper yt-dlp" ], "metadata": { "id": "Oo_kRyXD_rHm" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "You can either directly translate via Whisper or use Google Translate in between. Results may vary\n", "\n", "The code below downloads a video from YouTube, however you may also upload a file. Name it `video.webm`" ], "metadata": { "id": "gM5-zcxxLKjY" } }, { "cell_type": "code", "source": [ "#@markdown Leave video_file as `video.webm` if you are using YouTube otherwise change it to your video file name\n", "import subprocess\n", "from pathlib import Path\n", "import shutil\n", "\n", "YT_URL = \"\" #@param {type:\"string\"}\n", "VIDEO_FILE = \"video.webm\" #@param {type:\"string\"}\n", "\n", "def download_video(url: str, output_name: str):\n", " if Path(output_name).exists():\n", " print(f\"[!] File {output_name} already exists\")\n", " return\n", "\n", " print(f\"[!] Downloading video from {url}...\")\n", "\n", " cmd = [\n", " \"yt-dlp\",\n", " \"-f\", \"bestvideo[ext=webm]+bestaudio[ext=webm]/best[ext=webm]/best\",\n", " \"-o\", output_name,\n", " url\n", " ]\n", "\n", " subprocess.run(cmd, check=True)\n", "\n", " print(f\"[+] Downloaded to {output_name}\")\n", "\n" ], "metadata": { "cellView": "form", "id": "w2pAP5yWRDwB" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "#@title Direct Translation (Whisper -> Target Language)\n", "TARGET_LANG = \"en\" #@param {type:\"string\"}\n", "DEMUX_AUDIO_FORMAT = \"wav\"\n", "WHISPER_MODEL = \"large\" #@param {type:\"string\"}\n", "DEVICE = \"cuda\" #@param [\"cuda\", \"cpu\"] {type:\"string\"}\n", "CLEANUP_TEMP_FILES = True #@param {type:\"boolean\"}\n", "WORKDIR = Path(\"/content/temp\")\n", "PROMPT = \"\" #@param {type:\"string\"}\n", "\n", "def extract_audio(video_path: Path, audio_path: Path):\n", " cmd = [\n", " \"ffmpeg\",\n", " \"-y\",\n", " \"-i\", str(video_path),\n", " \"-vn\",\n", " \"-acodec\", \"pcm_s16le\",\n", " \"-ar\", \"44100\",\n", " \"-ac\", \"2\",\n", " str(audio_path)\n", " ]\n", " subprocess.run(cmd, check=True)\n", " print(f\"[+] Extracted audio to {audio_path}\")\n", "\n", "def run_demucs(audio_path: Path, out_dir: Path):\n", " cmd = [\n", " \"demucs\",\n", " \"--two-stems\", \"vocals\",\n", " \"--device\", DEVICE,\n", " \"-o\", str(out_dir),\n", " str(audio_path)\n", " ]\n", " subprocess.run(cmd, check=True)\n", " filename = audio_path.stem\n", " vocals_path = out_dir / \"htdemucs\" / filename / \"vocals.wav\"\n", " print(f\"[+] Demucs vocals saved at {vocals_path}\")\n", " return vocals_path\n", "\n", "def run_whisper(audio_path: Path, out_dir: Path, target_lang=\"en\"):\n", " cmd = [\n", " \"whisper\",\n", " str(audio_path),\n", " \"--model\", WHISPER_MODEL,\n", " \"--output_format\", \"srt\",\n", " \"--output_dir\", str(out_dir),\n", " \"--task\", \"translate\" if target_lang != \"auto\" else \"transcribe\",\n", " ]\n", " if PROMPT:\n", " cmd.extend([\n", " \"--initial_prompt\", PROMPT,\n", " \"--carry_initial_prompt\", \"True\"\n", " ])\n", " if target_lang not in [\"auto\", \"en\"]:\n", " cmd.extend([\"--language\", target_lang])\n", " subprocess.run(cmd, check=True)\n", " srt_file = out_dir / f\"{audio_path.stem}.srt\"\n", " print(f\"[+] Whisper generated SRT at {srt_file}\")\n", " return srt_file\n", "\n", "def mux_video_with_subtitle(video_path: Path, srt_path: Path, output_path: Path):\n", " cmd = [\n", " \"ffmpeg\",\n", " \"-y\",\n", " \"-i\", str(video_path),\n", " \"-i\", str(srt_path),\n", " \"-c\", \"copy\",\n", " \"-c:s\", \"srt\",\n", " \"-map\", \"0:v\",\n", " \"-map\", \"0:a?\",\n", " \"-map\", \"1\",\n", " \"-metadata:s:s:0\", \"language=eng\",\n", " str(output_path)\n", " ]\n", " subprocess.run(cmd, check=True)\n", " print(f\"[+] Final MKV saved at {output_path}\")\n", "\n", "def main():\n", " workdir = WORKDIR\n", " workdir.mkdir(exist_ok=True, parents=True)\n", "\n", " video_path = Path(VIDEO_FILE)\n", "\n", " # 1. Download\n", " download_video(YT_URL, VIDEO_FILE)\n", "\n", " base_name = video_path.stem\n", " audio_path = workdir / f\"{base_name}.{DEMUX_AUDIO_FORMAT}\"\n", " demucs_out = workdir / \"demucs_out\"\n", " demucs_out.mkdir(exist_ok=True)\n", " srt_out = workdir / \"srt_out\"\n", " srt_out.mkdir(exist_ok=True)\n", " final_mkv = Path(\"/content\") / f\"{base_name}_final.mkv\"\n", "\n", " # 2. Process\n", " extract_audio(video_path, audio_path)\n", " vocals_path = run_demucs(audio_path, demucs_out)\n", " print(\"[!] Generating subtitles (Whisper)...\")\n", " srt_file = run_whisper(vocals_path, srt_out, target_lang=TARGET_LANG)\n", " mux_video_with_subtitle(video_path, srt_file, final_mkv)\n", "\n", " print(\"[+] All done!\")\n", " if CLEANUP_TEMP_FILES:\n", " shutil.rmtree(workdir, ignore_errors=True)\n", " print(\"[+] Temp files deleted\")\n", "\n", "main()" ], "metadata": { "id": "3WceASw_Hl8u" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "IadifqcV-6PP", "cellView": "form" }, "outputs": [], "source": [ "#@title Cascading Translation (Whisper -> SRT -> Google Translate -> Target Lang)\n", "!pip install deep-translator\n", "from deep_translator import GoogleTranslator\n", "\n", "\n", "TARGET_LANG = \"en\" #@param {type:\"string\"}\n", "DEMUX_AUDIO_FORMAT = \"wav\"\n", "WHISPER_MODEL = \"large\" #@param {type:\"string\"}\n", "DEVICE = \"cuda\" #@param [\"cuda\", \"cpu\"] {type:\"string\"}\n", "WORKDIR = Path(\"/content/temp\")\n", "\n", "\n", "def extract_audio(video_path: Path, audio_path: Path):\n", "\n", " cmd = [\n", " \"ffmpeg\",\n", " \"-y\",\n", " \"-i\", str(video_path),\n", " \"-vn\",\n", " \"-acodec\", \"pcm_s16le\",\n", " \"-ar\", \"44100\",\n", " \"-ac\", \"2\",\n", " str(audio_path)\n", " ]\n", "\n", " subprocess.run(cmd, check=True)\n", "\n", " print(f\"[+] Extracted audio to {audio_path}\")\n", "\n", "\n", "def run_demucs(audio_path: Path, out_dir: Path):\n", "\n", " cmd = [\n", " \"demucs\",\n", " \"--two-stems\", \"vocals\",\n", " \"--device\", DEVICE,\n", " \"-o\", str(out_dir),\n", " str(audio_path)\n", " ]\n", "\n", " subprocess.run(cmd, check=True)\n", "\n", " filename = audio_path.stem\n", " vocals_path = out_dir / \"htdemucs\" / filename / \"vocals.wav\"\n", "\n", " print(f\"[+] Demucs vocals saved at {vocals_path}\")\n", "\n", " return vocals_path\n", "\n", "\n", "def run_whisper(audio_path: Path, out_dir: Path):\n", "\n", " cmd = [\n", " \"whisper\",\n", " str(audio_path),\n", " \"--model\", WHISPER_MODEL,\n", " \"--output_format\", \"srt\",\n", " \"--output_dir\", str(out_dir),\n", " \"--task\", \"transcribe\",\n", " \"--verbose\", \"True\"\n", " ]\n", "\n", " subprocess.run(cmd, check=True)\n", "\n", " srt_file = out_dir / f\"{audio_path.stem}.srt\"\n", "\n", " print(f\"[+] Whisper generated SRT at {srt_file}\")\n", "\n", " return srt_file\n", "\n", "\n", "def translate_srt(input_srt: Path, output_srt: Path, target_lang=\"en\"):\n", "\n", " translator = GoogleTranslator(source=\"auto\", target=target_lang)\n", "\n", " with open(input_srt, \"r\", encoding=\"utf-8\") as f:\n", " lines = f.readlines()\n", "\n", " translated_lines = []\n", "\n", " for line in lines:\n", "\n", " stripped = line.strip()\n", "\n", " if stripped.isdigit() or \"-->\" in stripped or stripped == \"\":\n", " translated_lines.append(line)\n", " continue\n", "\n", " try:\n", " translated = translator.translate(stripped)\n", " translated_lines.append(translated + \"\\n\")\n", "\n", " except Exception:\n", " translated_lines.append(line)\n", "\n", " with open(output_srt, \"w\", encoding=\"utf-8\") as f:\n", " f.writelines(translated_lines)\n", "\n", " print(f\"[+] Translated subtitles saved at {output_srt}\")\n", "\n", " return output_srt\n", "\n", "\n", "def mux_video_with_subtitle(video_path: Path, srt_path: Path, output_path: Path):\n", "\n", " cmd = [\n", " \"ffmpeg\",\n", " \"-y\",\n", " \"-i\", str(video_path),\n", " \"-i\", str(srt_path),\n", " \"-c\", \"copy\",\n", " \"-c:s\", \"srt\",\n", " \"-map\", \"0:v\",\n", " \"-map\", \"0:a?\",\n", " \"-map\", \"1\",\n", " \"-metadata:s:s:0\", \"language=eng\",\n", " str(output_path)\n", " ]\n", "\n", " subprocess.run(cmd, check=True)\n", "\n", " print(f\"[+] Final MKV saved at {output_path}\")\n", "\n", "\n", "def main():\n", "\n", " workdir = WORKDIR\n", " workdir.mkdir(exist_ok=True, parents=True)\n", "\n", " video_path = Path(VIDEO_FILE)\n", "\n", " download_video(YT_URL, VIDEO_FILE)\n", "\n", " base_name = video_path.stem\n", "\n", " audio_path = workdir / f\"{base_name}.{DEMUX_AUDIO_FORMAT}\"\n", "\n", " demucs_out = workdir / \"demucs_out\"\n", " demucs_out.mkdir(exist_ok=True)\n", "\n", " srt_out = workdir / \"srt_out\"\n", " srt_out.mkdir(exist_ok=True)\n", "\n", " final_mkv = Path(\"/content\") / f\"{base_name}_final.mkv\"\n", "\n", " extract_audio(video_path, audio_path)\n", "\n", " vocals_path = run_demucs(audio_path, demucs_out)\n", "\n", " print(\"[!] Generating subtitles (Whisper)...\")\n", "\n", " srt_file = run_whisper(vocals_path, srt_out)\n", "\n", " translated_srt = srt_out / \"translated.srt\"\n", "\n", " print(\"[!] Translating subtitles...\")\n", "\n", " translate_srt(srt_file, translated_srt, TARGET_LANG)\n", "\n", " mux_video_with_subtitle(video_path, translated_srt, final_mkv)\n", "\n", " print(\"[+] All done!\")\n", "\n", " shutil.rmtree(workdir, ignore_errors=True)\n", "\n", " print(\"[+] Temp files deleted\")\n", "\n", "\n", "main()" ] } ] }