diff options
| author | Pinapelz <donaldshan1@outlook.com> | 2026-03-11 22:19:58 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2026-03-11 22:19:58 -0700 |
| commit | 1e56db585cf705126deee39d956215c05f89f2ba (patch) | |
| tree | 6946e2fa0f01e50125f63a3e69c1203fa298313a | |
| parent | e6f39efe00f6d35e6485e19f1a37d74236cddf9d (diff) | |
add Autogen_Subtitles_MKV.ipynb
| -rw-r--r-- | Autogen_Subtitles_MKV.ipynb | 386 |
1 files changed, 386 insertions, 0 deletions
diff --git a/Autogen_Subtitles_MKV.ipynb b/Autogen_Subtitles_MKV.ipynb new file mode 100644 index 0000000..f1740b2 --- /dev/null +++ b/Autogen_Subtitles_MKV.ipynb @@ -0,0 +1,386 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "gpuType": "A100", + "machine_shape": "hm" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + }, + "accelerator": "GPU" + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Auto Generate Subtitled MKV from Video" + ], + "metadata": { + "id": "SXzFlDnXer6P" + } + }, + { + "cell_type": "code", + "source": [ + "#@title Install Dependencies\n", + "!apt -y install ffmpeg\n", + "!pip install -q demucs openai-whisper yt-dlp" + ], + "metadata": { + "id": "Oo_kRyXD_rHm" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "You can either directly translate via Whisper or use Google Translate in between. Results may vary\n", + "\n", + "The code below downloads a video from YouTube, however you may also upload a file. Name it `video.webm`" + ], + "metadata": { + "id": "gM5-zcxxLKjY" + } + }, + { + "cell_type": "code", + "source": [ + "#@markdown Leave video_file as `video.webm` if you are using YouTube otherwise change it to your video file name\n", + "import subprocess\n", + "from pathlib import Path\n", + "import shutil\n", + "\n", + "YT_URL = \"\" #@param {type:\"string\"}\n", + "VIDEO_FILE = \"video.webm\" #@param {type:\"string\"}\n", + "\n", + "def download_video(url: str, output_name: str):\n", + " if Path(output_name).exists():\n", + " print(f\"[!] File {output_name} already exists\")\n", + " return\n", + "\n", + " print(f\"[!] Downloading video from {url}...\")\n", + "\n", + " cmd = [\n", + " \"yt-dlp\",\n", + " \"-f\", \"bestvideo[ext=webm]+bestaudio[ext=webm]/best[ext=webm]/best\",\n", + " \"-o\", output_name,\n", + " url\n", + " ]\n", + "\n", + " subprocess.run(cmd, check=True)\n", + "\n", + " print(f\"[+] Downloaded to {output_name}\")\n", + "\n" + ], + "metadata": { + "cellView": "form", + "id": "w2pAP5yWRDwB" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "#@title Direct Translation (Whisper -> Target Language)\n", + "TARGET_LANG = \"en\" #@param {type:\"string\"}\n", + "DEMUX_AUDIO_FORMAT = \"wav\"\n", + "WHISPER_MODEL = \"large\" #@param {type:\"string\"}\n", + "DEVICE = \"cuda\" #@param [\"cuda\", \"cpu\"] {type:\"string\"}\n", + "CLEANUP_TEMP_FILES = True #@param {type:\"boolean\"}\n", + "WORKDIR = Path(\"/content/temp\")\n", + "PROMPT = \"\" #@param {type:\"string\"}\n", + "\n", + "def extract_audio(video_path: Path, audio_path: Path):\n", + " cmd = [\n", + " \"ffmpeg\",\n", + " \"-y\",\n", + " \"-i\", str(video_path),\n", + " \"-vn\",\n", + " \"-acodec\", \"pcm_s16le\",\n", + " \"-ar\", \"44100\",\n", + " \"-ac\", \"2\",\n", + " str(audio_path)\n", + " ]\n", + " subprocess.run(cmd, check=True)\n", + " print(f\"[+] Extracted audio to {audio_path}\")\n", + "\n", + "def run_demucs(audio_path: Path, out_dir: Path):\n", + " cmd = [\n", + " \"demucs\",\n", + " \"--two-stems\", \"vocals\",\n", + " \"--device\", DEVICE,\n", + " \"-o\", str(out_dir),\n", + " str(audio_path)\n", + " ]\n", + " subprocess.run(cmd, check=True)\n", + " filename = audio_path.stem\n", + " vocals_path = out_dir / \"htdemucs\" / filename / \"vocals.wav\"\n", + " print(f\"[+] Demucs vocals saved at {vocals_path}\")\n", + " return vocals_path\n", + "\n", + "def run_whisper(audio_path: Path, out_dir: Path, target_lang=\"en\"):\n", + " cmd = [\n", + " \"whisper\",\n", + " str(audio_path),\n", + " \"--model\", WHISPER_MODEL,\n", + " \"--output_format\", \"srt\",\n", + " \"--output_dir\", str(out_dir),\n", + " \"--task\", \"translate\" if target_lang != \"auto\" else \"transcribe\",\n", + " ]\n", + " if PROMPT:\n", + " cmd.extend([\n", + " \"--initial_prompt\", PROMPT,\n", + " \"--carry_initial_prompt\", \"True\"\n", + " ])\n", + " if target_lang not in [\"auto\", \"en\"]:\n", + " cmd.extend([\"--language\", target_lang])\n", + " subprocess.run(cmd, check=True)\n", + " srt_file = out_dir / f\"{audio_path.stem}.srt\"\n", + " print(f\"[+] Whisper generated SRT at {srt_file}\")\n", + " return srt_file\n", + "\n", + "def mux_video_with_subtitle(video_path: Path, srt_path: Path, output_path: Path):\n", + " cmd = [\n", + " \"ffmpeg\",\n", + " \"-y\",\n", + " \"-i\", str(video_path),\n", + " \"-i\", str(srt_path),\n", + " \"-c\", \"copy\",\n", + " \"-c:s\", \"srt\",\n", + " \"-map\", \"0:v\",\n", + " \"-map\", \"0:a?\",\n", + " \"-map\", \"1\",\n", + " \"-metadata:s:s:0\", \"language=eng\",\n", + " str(output_path)\n", + " ]\n", + " subprocess.run(cmd, check=True)\n", + " print(f\"[+] Final MKV saved at {output_path}\")\n", + "\n", + "def main():\n", + " workdir = WORKDIR\n", + " workdir.mkdir(exist_ok=True, parents=True)\n", + "\n", + " video_path = Path(VIDEO_FILE)\n", + "\n", + " # 1. Download\n", + " download_video(YT_URL, VIDEO_FILE)\n", + "\n", + " base_name = video_path.stem\n", + " audio_path = workdir / f\"{base_name}.{DEMUX_AUDIO_FORMAT}\"\n", + " demucs_out = workdir / \"demucs_out\"\n", + " demucs_out.mkdir(exist_ok=True)\n", + " srt_out = workdir / \"srt_out\"\n", + " srt_out.mkdir(exist_ok=True)\n", + " final_mkv = Path(\"/content\") / f\"{base_name}_final.mkv\"\n", + "\n", + " # 2. Process\n", + " extract_audio(video_path, audio_path)\n", + " vocals_path = run_demucs(audio_path, demucs_out)\n", + " print(\"[!] Generating subtitles (Whisper)...\")\n", + " srt_file = run_whisper(vocals_path, srt_out, target_lang=TARGET_LANG)\n", + " mux_video_with_subtitle(video_path, srt_file, final_mkv)\n", + "\n", + " print(\"[+] All done!\")\n", + " if CLEANUP_TEMP_FILES:\n", + " shutil.rmtree(workdir, ignore_errors=True)\n", + " print(\"[+] Temp files deleted\")\n", + "\n", + "main()" + ], + "metadata": { + "id": "3WceASw_Hl8u" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "IadifqcV-6PP", + "cellView": "form" + }, + "outputs": [], + "source": [ + "#@title Cascading Translation (Whisper -> SRT -> Google Translate -> Target Lang)\n", + "!pip install deep-translator\n", + "from deep_translator import GoogleTranslator\n", + "\n", + "\n", + "TARGET_LANG = \"en\" #@param {type:\"string\"}\n", + "DEMUX_AUDIO_FORMAT = \"wav\"\n", + "WHISPER_MODEL = \"large\" #@param {type:\"string\"}\n", + "DEVICE = \"cuda\" #@param [\"cuda\", \"cpu\"] {type:\"string\"}\n", + "WORKDIR = Path(\"/content/temp\")\n", + "\n", + "\n", + "def extract_audio(video_path: Path, audio_path: Path):\n", + "\n", + " cmd = [\n", + " \"ffmpeg\",\n", + " \"-y\",\n", + " \"-i\", str(video_path),\n", + " \"-vn\",\n", + " \"-acodec\", \"pcm_s16le\",\n", + " \"-ar\", \"44100\",\n", + " \"-ac\", \"2\",\n", + " str(audio_path)\n", + " ]\n", + "\n", + " subprocess.run(cmd, check=True)\n", + "\n", + " print(f\"[+] Extracted audio to {audio_path}\")\n", + "\n", + "\n", + "def run_demucs(audio_path: Path, out_dir: Path):\n", + "\n", + " cmd = [\n", + " \"demucs\",\n", + " \"--two-stems\", \"vocals\",\n", + " \"--device\", DEVICE,\n", + " \"-o\", str(out_dir),\n", + " str(audio_path)\n", + " ]\n", + "\n", + " subprocess.run(cmd, check=True)\n", + "\n", + " filename = audio_path.stem\n", + " vocals_path = out_dir / \"htdemucs\" / filename / \"vocals.wav\"\n", + "\n", + " print(f\"[+] Demucs vocals saved at {vocals_path}\")\n", + "\n", + " return vocals_path\n", + "\n", + "\n", + "def run_whisper(audio_path: Path, out_dir: Path):\n", + "\n", + " cmd = [\n", + " \"whisper\",\n", + " str(audio_path),\n", + " \"--model\", WHISPER_MODEL,\n", + " \"--output_format\", \"srt\",\n", + " \"--output_dir\", str(out_dir),\n", + " \"--task\", \"transcribe\",\n", + " \"--verbose\", \"True\"\n", + " ]\n", + "\n", + " subprocess.run(cmd, check=True)\n", + "\n", + " srt_file = out_dir / f\"{audio_path.stem}.srt\"\n", + "\n", + " print(f\"[+] Whisper generated SRT at {srt_file}\")\n", + "\n", + " return srt_file\n", + "\n", + "\n", + "def translate_srt(input_srt: Path, output_srt: Path, target_lang=\"en\"):\n", + "\n", + " translator = GoogleTranslator(source=\"auto\", target=target_lang)\n", + "\n", + " with open(input_srt, \"r\", encoding=\"utf-8\") as f:\n", + " lines = f.readlines()\n", + "\n", + " translated_lines = []\n", + "\n", + " for line in lines:\n", + "\n", + " stripped = line.strip()\n", + "\n", + " if stripped.isdigit() or \"-->\" in stripped or stripped == \"\":\n", + " translated_lines.append(line)\n", + " continue\n", + "\n", + " try:\n", + " translated = translator.translate(stripped)\n", + " translated_lines.append(translated + \"\\n\")\n", + "\n", + " except Exception:\n", + " translated_lines.append(line)\n", + "\n", + " with open(output_srt, \"w\", encoding=\"utf-8\") as f:\n", + " f.writelines(translated_lines)\n", + "\n", + " print(f\"[+] Translated subtitles saved at {output_srt}\")\n", + "\n", + " return output_srt\n", + "\n", + "\n", + "def mux_video_with_subtitle(video_path: Path, srt_path: Path, output_path: Path):\n", + "\n", + " cmd = [\n", + " \"ffmpeg\",\n", + " \"-y\",\n", + " \"-i\", str(video_path),\n", + " \"-i\", str(srt_path),\n", + " \"-c\", \"copy\",\n", + " \"-c:s\", \"srt\",\n", + " \"-map\", \"0:v\",\n", + " \"-map\", \"0:a?\",\n", + " \"-map\", \"1\",\n", + " \"-metadata:s:s:0\", \"language=eng\",\n", + " str(output_path)\n", + " ]\n", + "\n", + " subprocess.run(cmd, check=True)\n", + "\n", + " print(f\"[+] Final MKV saved at {output_path}\")\n", + "\n", + "\n", + "def main():\n", + "\n", + " workdir = WORKDIR\n", + " workdir.mkdir(exist_ok=True, parents=True)\n", + "\n", + " video_path = Path(VIDEO_FILE)\n", + "\n", + " download_video(YT_URL, VIDEO_FILE)\n", + "\n", + " base_name = video_path.stem\n", + "\n", + " audio_path = workdir / f\"{base_name}.{DEMUX_AUDIO_FORMAT}\"\n", + "\n", + " demucs_out = workdir / \"demucs_out\"\n", + " demucs_out.mkdir(exist_ok=True)\n", + "\n", + " srt_out = workdir / \"srt_out\"\n", + " srt_out.mkdir(exist_ok=True)\n", + "\n", + " final_mkv = Path(\"/content\") / f\"{base_name}_final.mkv\"\n", + "\n", + " extract_audio(video_path, audio_path)\n", + "\n", + " vocals_path = run_demucs(audio_path, demucs_out)\n", + "\n", + " print(\"[!] Generating subtitles (Whisper)...\")\n", + "\n", + " srt_file = run_whisper(vocals_path, srt_out)\n", + "\n", + " translated_srt = srt_out / \"translated.srt\"\n", + "\n", + " print(\"[!] Translating subtitles...\")\n", + "\n", + " translate_srt(srt_file, translated_srt, TARGET_LANG)\n", + "\n", + " mux_video_with_subtitle(video_path, translated_srt, final_mkv)\n", + "\n", + " print(\"[+] All done!\")\n", + "\n", + " shutil.rmtree(workdir, ignore_errors=True)\n", + "\n", + " print(\"[+] Temp files deleted\")\n", + "\n", + "\n", + "main()" + ] + } + ] +}
\ No newline at end of file |
