add Autogen_Subtitles_MKV.ipynb

author: Pinapelz <donaldshan1@outlook.com> 2026-03-11 22:19:58 -0700
committer: GitHub <noreply@github.com> 2026-03-11 22:19:58 -0700
commit: 1e56db585cf705126deee39d956215c05f89f2ba (patch)
tree: 6946e2fa0f01e50125f63a3e69c1203fa298313a
parent: e6f39efe00f6d35e6485e19f1a37d74236cddf9d (diff)
1 files changed, 386 insertions, 0 deletions
diff --git a/Autogen_Subtitles_MKV.ipynb b/Autogen_Subtitles_MKV.ipynb
new file mode 100644
index 0000000..f1740b2
--- /dev/null
+++ b/Autogen_Subtitles_MKV.ipynb
@@ -0,0 +1,386 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "provenance": [],
+      "gpuType": "A100",
+      "machine_shape": "hm"
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    },
+    "accelerator": "GPU"
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# Auto Generate Subtitled MKV from Video"
+      ],
+      "metadata": {
+        "id": "SXzFlDnXer6P"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "#@title Install Dependencies\n",
+        "!apt -y install ffmpeg\n",
+        "!pip install -q demucs openai-whisper yt-dlp"
+      ],
+      "metadata": {
+        "id": "Oo_kRyXD_rHm"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "You can either directly translate via Whisper or use Google Translate in between. Results may vary\n",
+        "\n",
+        "The code below downloads a video from YouTube, however you may also upload a file. Name it `video.webm`"
+      ],
+      "metadata": {
+        "id": "gM5-zcxxLKjY"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "#@markdown Leave video_file as `video.webm` if you are using YouTube otherwise change it to your video file name\n",
+        "import subprocess\n",
+        "from pathlib import Path\n",
+        "import shutil\n",
+        "\n",
+        "YT_URL = \"\" #@param {type:\"string\"}\n",
+        "VIDEO_FILE = \"video.webm\" #@param {type:\"string\"}\n",
+        "\n",
+        "def download_video(url: str, output_name: str):\n",
+        "    if Path(output_name).exists():\n",
+        "        print(f\"[!] File {output_name} already exists\")\n",
+        "        return\n",
+        "\n",
+        "    print(f\"[!] Downloading video from {url}...\")\n",
+        "\n",
+        "    cmd = [\n",
+        "        \"yt-dlp\",\n",
+        "        \"-f\", \"bestvideo[ext=webm]+bestaudio[ext=webm]/best[ext=webm]/best\",\n",
+        "        \"-o\", output_name,\n",
+        "        url\n",
+        "    ]\n",
+        "\n",
+        "    subprocess.run(cmd, check=True)\n",
+        "\n",
+        "    print(f\"[+] Downloaded to {output_name}\")\n",
+        "\n"
+      ],
+      "metadata": {
+        "cellView": "form",
+        "id": "w2pAP5yWRDwB"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "#@title Direct Translation (Whisper -> Target Language)\n",
+        "TARGET_LANG = \"en\" #@param {type:\"string\"}\n",
+        "DEMUX_AUDIO_FORMAT = \"wav\"\n",
+        "WHISPER_MODEL = \"large\" #@param {type:\"string\"}\n",
+        "DEVICE = \"cuda\" #@param [\"cuda\", \"cpu\"] {type:\"string\"}\n",
+        "CLEANUP_TEMP_FILES = True #@param {type:\"boolean\"}\n",
+        "WORKDIR = Path(\"/content/temp\")\n",
+        "PROMPT = \"\" #@param {type:\"string\"}\n",
+        "\n",
+        "def extract_audio(video_path: Path, audio_path: Path):\n",
+        "    cmd = [\n",
+        "        \"ffmpeg\",\n",
+        "        \"-y\",\n",
+        "        \"-i\", str(video_path),\n",
+        "        \"-vn\",\n",
+        "        \"-acodec\", \"pcm_s16le\",\n",
+        "        \"-ar\", \"44100\",\n",
+        "        \"-ac\", \"2\",\n",
+        "        str(audio_path)\n",
+        "    ]\n",
+        "    subprocess.run(cmd, check=True)\n",
+        "    print(f\"[+] Extracted audio to {audio_path}\")\n",
+        "\n",
+        "def run_demucs(audio_path: Path, out_dir: Path):\n",
+        "    cmd = [\n",
+        "        \"demucs\",\n",
+        "        \"--two-stems\", \"vocals\",\n",
+        "        \"--device\", DEVICE,\n",
+        "        \"-o\", str(out_dir),\n",
+        "        str(audio_path)\n",
+        "    ]\n",
+        "    subprocess.run(cmd, check=True)\n",
+        "    filename = audio_path.stem\n",
+        "    vocals_path = out_dir / \"htdemucs\" / filename / \"vocals.wav\"\n",
+        "    print(f\"[+] Demucs vocals saved at {vocals_path}\")\n",
+        "    return vocals_path\n",
+        "\n",
+        "def run_whisper(audio_path: Path, out_dir: Path, target_lang=\"en\"):\n",
+        "    cmd = [\n",
+        "        \"whisper\",\n",
+        "        str(audio_path),\n",
+        "        \"--model\", WHISPER_MODEL,\n",
+        "        \"--output_format\", \"srt\",\n",
+        "        \"--output_dir\", str(out_dir),\n",
+        "        \"--task\", \"translate\" if target_lang != \"auto\" else \"transcribe\",\n",
+        "    ]\n",
+        "    if PROMPT:\n",
+        "        cmd.extend([\n",
+        "            \"--initial_prompt\", PROMPT,\n",
+        "            \"--carry_initial_prompt\", \"True\"\n",
+        "        ])\n",
+        "    if target_lang not in [\"auto\", \"en\"]:\n",
+        "        cmd.extend([\"--language\", target_lang])\n",
+        "    subprocess.run(cmd, check=True)\n",
+        "    srt_file = out_dir / f\"{audio_path.stem}.srt\"\n",
+        "    print(f\"[+] Whisper generated SRT at {srt_file}\")\n",
+        "    return srt_file\n",
+        "\n",
+        "def mux_video_with_subtitle(video_path: Path, srt_path: Path, output_path: Path):\n",
+        "    cmd = [\n",
+        "        \"ffmpeg\",\n",
+        "        \"-y\",\n",
+        "        \"-i\", str(video_path),\n",
+        "        \"-i\", str(srt_path),\n",
+        "        \"-c\", \"copy\",\n",
+        "        \"-c:s\", \"srt\",\n",
+        "        \"-map\", \"0:v\",\n",
+        "        \"-map\", \"0:a?\",\n",
+        "        \"-map\", \"1\",\n",
+        "        \"-metadata:s:s:0\", \"language=eng\",\n",
+        "        str(output_path)\n",
+        "    ]\n",
+        "    subprocess.run(cmd, check=True)\n",
+        "    print(f\"[+] Final MKV saved at {output_path}\")\n",
+        "\n",
+        "def main():\n",
+        "    workdir = WORKDIR\n",
+        "    workdir.mkdir(exist_ok=True, parents=True)\n",
+        "\n",
+        "    video_path = Path(VIDEO_FILE)\n",
+        "\n",
+        "    # 1. Download\n",
+        "    download_video(YT_URL, VIDEO_FILE)\n",
+        "\n",
+        "    base_name = video_path.stem\n",
+        "    audio_path = workdir / f\"{base_name}.{DEMUX_AUDIO_FORMAT}\"\n",
+        "    demucs_out = workdir / \"demucs_out\"\n",
+        "    demucs_out.mkdir(exist_ok=True)\n",
+        "    srt_out = workdir / \"srt_out\"\n",
+        "    srt_out.mkdir(exist_ok=True)\n",
+        "    final_mkv = Path(\"/content\") / f\"{base_name}_final.mkv\"\n",
+        "\n",
+        "    # 2. Process\n",
+        "    extract_audio(video_path, audio_path)\n",
+        "    vocals_path = run_demucs(audio_path, demucs_out)\n",
+        "    print(\"[!] Generating subtitles (Whisper)...\")\n",
+        "    srt_file = run_whisper(vocals_path, srt_out, target_lang=TARGET_LANG)\n",
+        "    mux_video_with_subtitle(video_path, srt_file, final_mkv)\n",
+        "\n",
+        "    print(\"[+] All done!\")\n",
+        "    if CLEANUP_TEMP_FILES:\n",
+        "      shutil.rmtree(workdir, ignore_errors=True)\n",
+        "      print(\"[+] Temp files deleted\")\n",
+        "\n",
+        "main()"
+      ],
+      "metadata": {
+        "id": "3WceASw_Hl8u"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "IadifqcV-6PP",
+        "cellView": "form"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Cascading Translation (Whisper -> SRT -> Google Translate -> Target Lang)\n",
+        "!pip install deep-translator\n",
+        "from deep_translator import GoogleTranslator\n",
+        "\n",
+        "\n",
+        "TARGET_LANG = \"en\" #@param {type:\"string\"}\n",
+        "DEMUX_AUDIO_FORMAT = \"wav\"\n",
+        "WHISPER_MODEL = \"large\" #@param {type:\"string\"}\n",
+        "DEVICE = \"cuda\" #@param [\"cuda\", \"cpu\"] {type:\"string\"}\n",
+        "WORKDIR = Path(\"/content/temp\")\n",
+        "\n",
+        "\n",
+        "def extract_audio(video_path: Path, audio_path: Path):\n",
+        "\n",
+        "    cmd = [\n",
+        "        \"ffmpeg\",\n",
+        "        \"-y\",\n",
+        "        \"-i\", str(video_path),\n",
+        "        \"-vn\",\n",
+        "        \"-acodec\", \"pcm_s16le\",\n",
+        "        \"-ar\", \"44100\",\n",
+        "        \"-ac\", \"2\",\n",
+        "        str(audio_path)\n",
+        "    ]\n",
+        "\n",
+        "    subprocess.run(cmd, check=True)\n",
+        "\n",
+        "    print(f\"[+] Extracted audio to {audio_path}\")\n",
+        "\n",
+        "\n",
+        "def run_demucs(audio_path: Path, out_dir: Path):\n",
+        "\n",
+        "    cmd = [\n",
+        "        \"demucs\",\n",
+        "        \"--two-stems\", \"vocals\",\n",
+        "        \"--device\", DEVICE,\n",
+        "        \"-o\", str(out_dir),\n",
+        "        str(audio_path)\n",
+        "    ]\n",
+        "\n",
+        "    subprocess.run(cmd, check=True)\n",
+        "\n",
+        "    filename = audio_path.stem\n",
+        "    vocals_path = out_dir / \"htdemucs\" / filename / \"vocals.wav\"\n",
+        "\n",
+        "    print(f\"[+] Demucs vocals saved at {vocals_path}\")\n",
+        "\n",
+        "    return vocals_path\n",
+        "\n",
+        "\n",
+        "def run_whisper(audio_path: Path, out_dir: Path):\n",
+        "\n",
+        "    cmd = [\n",
+        "        \"whisper\",\n",
+        "        str(audio_path),\n",
+        "        \"--model\", WHISPER_MODEL,\n",
+        "        \"--output_format\", \"srt\",\n",
+        "        \"--output_dir\", str(out_dir),\n",
+        "        \"--task\", \"transcribe\",\n",
+        "        \"--verbose\", \"True\"\n",
+        "    ]\n",
+        "\n",
+        "    subprocess.run(cmd, check=True)\n",
+        "\n",
+        "    srt_file = out_dir / f\"{audio_path.stem}.srt\"\n",
+        "\n",
+        "    print(f\"[+] Whisper generated SRT at {srt_file}\")\n",
+        "\n",
+        "    return srt_file\n",
+        "\n",
+        "\n",
+        "def translate_srt(input_srt: Path, output_srt: Path, target_lang=\"en\"):\n",
+        "\n",
+        "    translator = GoogleTranslator(source=\"auto\", target=target_lang)\n",
+        "\n",
+        "    with open(input_srt, \"r\", encoding=\"utf-8\") as f:\n",
+        "        lines = f.readlines()\n",
+        "\n",
+        "    translated_lines = []\n",
+        "\n",
+        "    for line in lines:\n",
+        "\n",
+        "        stripped = line.strip()\n",
+        "\n",
+        "        if stripped.isdigit() or \"-->\" in stripped or stripped == \"\":\n",
+        "            translated_lines.append(line)\n",
+        "            continue\n",
+        "\n",
+        "        try:\n",
+        "            translated = translator.translate(stripped)\n",
+        "            translated_lines.append(translated + \"\\n\")\n",
+        "\n",
+        "        except Exception:\n",
+        "            translated_lines.append(line)\n",
+        "\n",
+        "    with open(output_srt, \"w\", encoding=\"utf-8\") as f:\n",
+        "        f.writelines(translated_lines)\n",
+        "\n",
+        "    print(f\"[+] Translated subtitles saved at {output_srt}\")\n",
+        "\n",
+        "    return output_srt\n",
+        "\n",
+        "\n",
+        "def mux_video_with_subtitle(video_path: Path, srt_path: Path, output_path: Path):\n",
+        "\n",
+        "    cmd = [\n",
+        "        \"ffmpeg\",\n",
+        "        \"-y\",\n",
+        "        \"-i\", str(video_path),\n",
+        "        \"-i\", str(srt_path),\n",
+        "        \"-c\", \"copy\",\n",
+        "        \"-c:s\", \"srt\",\n",
+        "        \"-map\", \"0:v\",\n",
+        "        \"-map\", \"0:a?\",\n",
+        "        \"-map\", \"1\",\n",
+        "        \"-metadata:s:s:0\", \"language=eng\",\n",
+        "        str(output_path)\n",
+        "    ]\n",
+        "\n",
+        "    subprocess.run(cmd, check=True)\n",
+        "\n",
+        "    print(f\"[+] Final MKV saved at {output_path}\")\n",
+        "\n",
+        "\n",
+        "def main():\n",
+        "\n",
+        "    workdir = WORKDIR\n",
+        "    workdir.mkdir(exist_ok=True, parents=True)\n",
+        "\n",
+        "    video_path = Path(VIDEO_FILE)\n",
+        "\n",
+        "    download_video(YT_URL, VIDEO_FILE)\n",
+        "\n",
+        "    base_name = video_path.stem\n",
+        "\n",
+        "    audio_path = workdir / f\"{base_name}.{DEMUX_AUDIO_FORMAT}\"\n",
+        "\n",
+        "    demucs_out = workdir / \"demucs_out\"\n",
+        "    demucs_out.mkdir(exist_ok=True)\n",
+        "\n",
+        "    srt_out = workdir / \"srt_out\"\n",
+        "    srt_out.mkdir(exist_ok=True)\n",
+        "\n",
+        "    final_mkv = Path(\"/content\") / f\"{base_name}_final.mkv\"\n",
+        "\n",
+        "    extract_audio(video_path, audio_path)\n",
+        "\n",
+        "    vocals_path = run_demucs(audio_path, demucs_out)\n",
+        "\n",
+        "    print(\"[!] Generating subtitles (Whisper)...\")\n",
+        "\n",
+        "    srt_file = run_whisper(vocals_path, srt_out)\n",
+        "\n",
+        "    translated_srt = srt_out / \"translated.srt\"\n",
+        "\n",
+        "    print(\"[!] Translating subtitles...\")\n",
+        "\n",
+        "    translate_srt(srt_file, translated_srt, TARGET_LANG)\n",
+        "\n",
+        "    mux_video_with_subtitle(video_path, translated_srt, final_mkv)\n",
+        "\n",
+        "    print(\"[+] All done!\")\n",
+        "\n",
+        "    shutil.rmtree(workdir, ignore_errors=True)\n",
+        "\n",
+        "    print(\"[+] Temp files deleted\")\n",
+        "\n",
+        "\n",
+        "main()"
+      ]
+    }
+  ]
+}
+\ No newline at end of file
author	Pinapelz <donaldshan1@outlook.com>	2026-03-11 22:19:58 -0700
committer	GitHub <noreply@github.com>	2026-03-11 22:19:58 -0700
commit	1e56db585cf705126deee39d956215c05f89f2ba (patch)
tree	6946e2fa0f01e50125f63a3e69c1203fa298313a
parent	e6f39efe00f6d35e6485e19f1a37d74236cddf9d (diff)