aboutsummaryrefslogtreecommitdiffstats
path: root/Autogen_Subtitles_MKV.ipynb
diff options
context:
space:
mode:
authorPinapelz <donaldshan1@outlook.com>2026-03-11 22:19:58 -0700
committerGitHub <noreply@github.com>2026-03-11 22:19:58 -0700
commit1e56db585cf705126deee39d956215c05f89f2ba (patch)
tree6946e2fa0f01e50125f63a3e69c1203fa298313a /Autogen_Subtitles_MKV.ipynb
parente6f39efe00f6d35e6485e19f1a37d74236cddf9d (diff)
add Autogen_Subtitles_MKV.ipynb
Diffstat (limited to 'Autogen_Subtitles_MKV.ipynb')
-rw-r--r--Autogen_Subtitles_MKV.ipynb386
1 files changed, 386 insertions, 0 deletions
diff --git a/Autogen_Subtitles_MKV.ipynb b/Autogen_Subtitles_MKV.ipynb
new file mode 100644
index 0000000..f1740b2
--- /dev/null
+++ b/Autogen_Subtitles_MKV.ipynb
@@ -0,0 +1,386 @@
+{
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+ "colab": {
+ "provenance": [],
+ "gpuType": "A100",
+ "machine_shape": "hm"
+ },
+ "kernelspec": {
+ "name": "python3",
+ "display_name": "Python 3"
+ },
+ "language_info": {
+ "name": "python"
+ },
+ "accelerator": "GPU"
+ },
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "source": [
+ "# Auto Generate Subtitled MKV from Video"
+ ],
+ "metadata": {
+ "id": "SXzFlDnXer6P"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "#@title Install Dependencies\n",
+ "!apt -y install ffmpeg\n",
+ "!pip install -q demucs openai-whisper yt-dlp"
+ ],
+ "metadata": {
+ "id": "Oo_kRyXD_rHm"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "You can either directly translate via Whisper or use Google Translate in between. Results may vary\n",
+ "\n",
+ "The code below downloads a video from YouTube, however you may also upload a file. Name it `video.webm`"
+ ],
+ "metadata": {
+ "id": "gM5-zcxxLKjY"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "#@markdown Leave video_file as `video.webm` if you are using YouTube otherwise change it to your video file name\n",
+ "import subprocess\n",
+ "from pathlib import Path\n",
+ "import shutil\n",
+ "\n",
+ "YT_URL = \"\" #@param {type:\"string\"}\n",
+ "VIDEO_FILE = \"video.webm\" #@param {type:\"string\"}\n",
+ "\n",
+ "def download_video(url: str, output_name: str):\n",
+ " if Path(output_name).exists():\n",
+ " print(f\"[!] File {output_name} already exists\")\n",
+ " return\n",
+ "\n",
+ " print(f\"[!] Downloading video from {url}...\")\n",
+ "\n",
+ " cmd = [\n",
+ " \"yt-dlp\",\n",
+ " \"-f\", \"bestvideo[ext=webm]+bestaudio[ext=webm]/best[ext=webm]/best\",\n",
+ " \"-o\", output_name,\n",
+ " url\n",
+ " ]\n",
+ "\n",
+ " subprocess.run(cmd, check=True)\n",
+ "\n",
+ " print(f\"[+] Downloaded to {output_name}\")\n",
+ "\n"
+ ],
+ "metadata": {
+ "cellView": "form",
+ "id": "w2pAP5yWRDwB"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "#@title Direct Translation (Whisper -> Target Language)\n",
+ "TARGET_LANG = \"en\" #@param {type:\"string\"}\n",
+ "DEMUX_AUDIO_FORMAT = \"wav\"\n",
+ "WHISPER_MODEL = \"large\" #@param {type:\"string\"}\n",
+ "DEVICE = \"cuda\" #@param [\"cuda\", \"cpu\"] {type:\"string\"}\n",
+ "CLEANUP_TEMP_FILES = True #@param {type:\"boolean\"}\n",
+ "WORKDIR = Path(\"/content/temp\")\n",
+ "PROMPT = \"\" #@param {type:\"string\"}\n",
+ "\n",
+ "def extract_audio(video_path: Path, audio_path: Path):\n",
+ " cmd = [\n",
+ " \"ffmpeg\",\n",
+ " \"-y\",\n",
+ " \"-i\", str(video_path),\n",
+ " \"-vn\",\n",
+ " \"-acodec\", \"pcm_s16le\",\n",
+ " \"-ar\", \"44100\",\n",
+ " \"-ac\", \"2\",\n",
+ " str(audio_path)\n",
+ " ]\n",
+ " subprocess.run(cmd, check=True)\n",
+ " print(f\"[+] Extracted audio to {audio_path}\")\n",
+ "\n",
+ "def run_demucs(audio_path: Path, out_dir: Path):\n",
+ " cmd = [\n",
+ " \"demucs\",\n",
+ " \"--two-stems\", \"vocals\",\n",
+ " \"--device\", DEVICE,\n",
+ " \"-o\", str(out_dir),\n",
+ " str(audio_path)\n",
+ " ]\n",
+ " subprocess.run(cmd, check=True)\n",
+ " filename = audio_path.stem\n",
+ " vocals_path = out_dir / \"htdemucs\" / filename / \"vocals.wav\"\n",
+ " print(f\"[+] Demucs vocals saved at {vocals_path}\")\n",
+ " return vocals_path\n",
+ "\n",
+ "def run_whisper(audio_path: Path, out_dir: Path, target_lang=\"en\"):\n",
+ " cmd = [\n",
+ " \"whisper\",\n",
+ " str(audio_path),\n",
+ " \"--model\", WHISPER_MODEL,\n",
+ " \"--output_format\", \"srt\",\n",
+ " \"--output_dir\", str(out_dir),\n",
+ " \"--task\", \"translate\" if target_lang != \"auto\" else \"transcribe\",\n",
+ " ]\n",
+ " if PROMPT:\n",
+ " cmd.extend([\n",
+ " \"--initial_prompt\", PROMPT,\n",
+ " \"--carry_initial_prompt\", \"True\"\n",
+ " ])\n",
+ " if target_lang not in [\"auto\", \"en\"]:\n",
+ " cmd.extend([\"--language\", target_lang])\n",
+ " subprocess.run(cmd, check=True)\n",
+ " srt_file = out_dir / f\"{audio_path.stem}.srt\"\n",
+ " print(f\"[+] Whisper generated SRT at {srt_file}\")\n",
+ " return srt_file\n",
+ "\n",
+ "def mux_video_with_subtitle(video_path: Path, srt_path: Path, output_path: Path):\n",
+ " cmd = [\n",
+ " \"ffmpeg\",\n",
+ " \"-y\",\n",
+ " \"-i\", str(video_path),\n",
+ " \"-i\", str(srt_path),\n",
+ " \"-c\", \"copy\",\n",
+ " \"-c:s\", \"srt\",\n",
+ " \"-map\", \"0:v\",\n",
+ " \"-map\", \"0:a?\",\n",
+ " \"-map\", \"1\",\n",
+ " \"-metadata:s:s:0\", \"language=eng\",\n",
+ " str(output_path)\n",
+ " ]\n",
+ " subprocess.run(cmd, check=True)\n",
+ " print(f\"[+] Final MKV saved at {output_path}\")\n",
+ "\n",
+ "def main():\n",
+ " workdir = WORKDIR\n",
+ " workdir.mkdir(exist_ok=True, parents=True)\n",
+ "\n",
+ " video_path = Path(VIDEO_FILE)\n",
+ "\n",
+ " # 1. Download\n",
+ " download_video(YT_URL, VIDEO_FILE)\n",
+ "\n",
+ " base_name = video_path.stem\n",
+ " audio_path = workdir / f\"{base_name}.{DEMUX_AUDIO_FORMAT}\"\n",
+ " demucs_out = workdir / \"demucs_out\"\n",
+ " demucs_out.mkdir(exist_ok=True)\n",
+ " srt_out = workdir / \"srt_out\"\n",
+ " srt_out.mkdir(exist_ok=True)\n",
+ " final_mkv = Path(\"/content\") / f\"{base_name}_final.mkv\"\n",
+ "\n",
+ " # 2. Process\n",
+ " extract_audio(video_path, audio_path)\n",
+ " vocals_path = run_demucs(audio_path, demucs_out)\n",
+ " print(\"[!] Generating subtitles (Whisper)...\")\n",
+ " srt_file = run_whisper(vocals_path, srt_out, target_lang=TARGET_LANG)\n",
+ " mux_video_with_subtitle(video_path, srt_file, final_mkv)\n",
+ "\n",
+ " print(\"[+] All done!\")\n",
+ " if CLEANUP_TEMP_FILES:\n",
+ " shutil.rmtree(workdir, ignore_errors=True)\n",
+ " print(\"[+] Temp files deleted\")\n",
+ "\n",
+ "main()"
+ ],
+ "metadata": {
+ "id": "3WceASw_Hl8u"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "IadifqcV-6PP",
+ "cellView": "form"
+ },
+ "outputs": [],
+ "source": [
+ "#@title Cascading Translation (Whisper -> SRT -> Google Translate -> Target Lang)\n",
+ "!pip install deep-translator\n",
+ "from deep_translator import GoogleTranslator\n",
+ "\n",
+ "\n",
+ "TARGET_LANG = \"en\" #@param {type:\"string\"}\n",
+ "DEMUX_AUDIO_FORMAT = \"wav\"\n",
+ "WHISPER_MODEL = \"large\" #@param {type:\"string\"}\n",
+ "DEVICE = \"cuda\" #@param [\"cuda\", \"cpu\"] {type:\"string\"}\n",
+ "WORKDIR = Path(\"/content/temp\")\n",
+ "\n",
+ "\n",
+ "def extract_audio(video_path: Path, audio_path: Path):\n",
+ "\n",
+ " cmd = [\n",
+ " \"ffmpeg\",\n",
+ " \"-y\",\n",
+ " \"-i\", str(video_path),\n",
+ " \"-vn\",\n",
+ " \"-acodec\", \"pcm_s16le\",\n",
+ " \"-ar\", \"44100\",\n",
+ " \"-ac\", \"2\",\n",
+ " str(audio_path)\n",
+ " ]\n",
+ "\n",
+ " subprocess.run(cmd, check=True)\n",
+ "\n",
+ " print(f\"[+] Extracted audio to {audio_path}\")\n",
+ "\n",
+ "\n",
+ "def run_demucs(audio_path: Path, out_dir: Path):\n",
+ "\n",
+ " cmd = [\n",
+ " \"demucs\",\n",
+ " \"--two-stems\", \"vocals\",\n",
+ " \"--device\", DEVICE,\n",
+ " \"-o\", str(out_dir),\n",
+ " str(audio_path)\n",
+ " ]\n",
+ "\n",
+ " subprocess.run(cmd, check=True)\n",
+ "\n",
+ " filename = audio_path.stem\n",
+ " vocals_path = out_dir / \"htdemucs\" / filename / \"vocals.wav\"\n",
+ "\n",
+ " print(f\"[+] Demucs vocals saved at {vocals_path}\")\n",
+ "\n",
+ " return vocals_path\n",
+ "\n",
+ "\n",
+ "def run_whisper(audio_path: Path, out_dir: Path):\n",
+ "\n",
+ " cmd = [\n",
+ " \"whisper\",\n",
+ " str(audio_path),\n",
+ " \"--model\", WHISPER_MODEL,\n",
+ " \"--output_format\", \"srt\",\n",
+ " \"--output_dir\", str(out_dir),\n",
+ " \"--task\", \"transcribe\",\n",
+ " \"--verbose\", \"True\"\n",
+ " ]\n",
+ "\n",
+ " subprocess.run(cmd, check=True)\n",
+ "\n",
+ " srt_file = out_dir / f\"{audio_path.stem}.srt\"\n",
+ "\n",
+ " print(f\"[+] Whisper generated SRT at {srt_file}\")\n",
+ "\n",
+ " return srt_file\n",
+ "\n",
+ "\n",
+ "def translate_srt(input_srt: Path, output_srt: Path, target_lang=\"en\"):\n",
+ "\n",
+ " translator = GoogleTranslator(source=\"auto\", target=target_lang)\n",
+ "\n",
+ " with open(input_srt, \"r\", encoding=\"utf-8\") as f:\n",
+ " lines = f.readlines()\n",
+ "\n",
+ " translated_lines = []\n",
+ "\n",
+ " for line in lines:\n",
+ "\n",
+ " stripped = line.strip()\n",
+ "\n",
+ " if stripped.isdigit() or \"-->\" in stripped or stripped == \"\":\n",
+ " translated_lines.append(line)\n",
+ " continue\n",
+ "\n",
+ " try:\n",
+ " translated = translator.translate(stripped)\n",
+ " translated_lines.append(translated + \"\\n\")\n",
+ "\n",
+ " except Exception:\n",
+ " translated_lines.append(line)\n",
+ "\n",
+ " with open(output_srt, \"w\", encoding=\"utf-8\") as f:\n",
+ " f.writelines(translated_lines)\n",
+ "\n",
+ " print(f\"[+] Translated subtitles saved at {output_srt}\")\n",
+ "\n",
+ " return output_srt\n",
+ "\n",
+ "\n",
+ "def mux_video_with_subtitle(video_path: Path, srt_path: Path, output_path: Path):\n",
+ "\n",
+ " cmd = [\n",
+ " \"ffmpeg\",\n",
+ " \"-y\",\n",
+ " \"-i\", str(video_path),\n",
+ " \"-i\", str(srt_path),\n",
+ " \"-c\", \"copy\",\n",
+ " \"-c:s\", \"srt\",\n",
+ " \"-map\", \"0:v\",\n",
+ " \"-map\", \"0:a?\",\n",
+ " \"-map\", \"1\",\n",
+ " \"-metadata:s:s:0\", \"language=eng\",\n",
+ " str(output_path)\n",
+ " ]\n",
+ "\n",
+ " subprocess.run(cmd, check=True)\n",
+ "\n",
+ " print(f\"[+] Final MKV saved at {output_path}\")\n",
+ "\n",
+ "\n",
+ "def main():\n",
+ "\n",
+ " workdir = WORKDIR\n",
+ " workdir.mkdir(exist_ok=True, parents=True)\n",
+ "\n",
+ " video_path = Path(VIDEO_FILE)\n",
+ "\n",
+ " download_video(YT_URL, VIDEO_FILE)\n",
+ "\n",
+ " base_name = video_path.stem\n",
+ "\n",
+ " audio_path = workdir / f\"{base_name}.{DEMUX_AUDIO_FORMAT}\"\n",
+ "\n",
+ " demucs_out = workdir / \"demucs_out\"\n",
+ " demucs_out.mkdir(exist_ok=True)\n",
+ "\n",
+ " srt_out = workdir / \"srt_out\"\n",
+ " srt_out.mkdir(exist_ok=True)\n",
+ "\n",
+ " final_mkv = Path(\"/content\") / f\"{base_name}_final.mkv\"\n",
+ "\n",
+ " extract_audio(video_path, audio_path)\n",
+ "\n",
+ " vocals_path = run_demucs(audio_path, demucs_out)\n",
+ "\n",
+ " print(\"[!] Generating subtitles (Whisper)...\")\n",
+ "\n",
+ " srt_file = run_whisper(vocals_path, srt_out)\n",
+ "\n",
+ " translated_srt = srt_out / \"translated.srt\"\n",
+ "\n",
+ " print(\"[!] Translating subtitles...\")\n",
+ "\n",
+ " translate_srt(srt_file, translated_srt, TARGET_LANG)\n",
+ "\n",
+ " mux_video_with_subtitle(video_path, translated_srt, final_mkv)\n",
+ "\n",
+ " print(\"[+] All done!\")\n",
+ "\n",
+ " shutil.rmtree(workdir, ignore_errors=True)\n",
+ "\n",
+ " print(\"[+] Temp files deleted\")\n",
+ "\n",
+ "\n",
+ "main()"
+ ]
+ }
+ ]
+} \ No newline at end of file
send patches to the email below
yukais@pinapelz.com
include the subject [PATCH repo_name]
pinapelz.com
homepage