Add MP4 upload support

This commit is contained in:
2026-05-22 20:36:33 +01:00
parent 665ea41c65
commit a25a60f217
6 changed files with 185 additions and 29 deletions

View File

@@ -78,6 +78,8 @@ Open `http://127.0.0.1:7860` and submit a YouTube URL. Jobs run through the same
The OpenAI-compatible translation endpoint, API key, and model can be changed in the UI under **OpenAI-Compatible Settings**. Click **Save Settings** to persist them to `.cache/web_settings.json` for future web jobs. Unsaved values in the fields are still used for the next job you start. The OpenAI-compatible translation endpoint, API key, and model can be changed in the UI under **OpenAI-Compatible Settings**. Click **Save Settings** to persist them to `.cache/web_settings.json` for future web jobs. Unsaved values in the fields are still used for the next job you start.
You can also upload a local `.mp4` instead of entering a YouTube URL. Uploaded videos are staged under `.cache/uploads` and processed with the same transcription, translation, dubbing, and render pipeline.
### Docker ### Docker
Build and run the Gradio UI in a container: Build and run the Gradio UI in a container:
@@ -120,11 +122,18 @@ Authentication options for restricted videos still work as before:
.venv\Scripts\python.exe main.py "https://youtube.com/watch?v=VIDEO_ID" --lang de --cookies cookies.txt .venv\Scripts\python.exe main.py "https://youtube.com/watch?v=VIDEO_ID" --lang de --cookies cookies.txt
``` ```
Process a local MP4:
```powershell
.venv\Scripts\python.exe main.py --input-file "C:\path\to\video.mp4" --lang es
```
## CLI Options ## CLI Options
| Option | Description | | Option | Description |
| --- | --- | | --- | --- |
| `url` | YouTube video URL to process | | `url` | YouTube video URL to process |
| `--input-file` | Local MP4 file to process instead of a YouTube URL |
| `--lang`, `-l` | Target language code | | `--lang`, `-l` | Target language code |
| `--browser`, `-b` | Browser name for cookie extraction | | `--browser`, `-b` | Browser name for cookie extraction |
| `--cookies`, `-c` | Path to exported cookies file | | `--cookies`, `-c` | Path to exported cookies file |

40
main.py
View File

@@ -7,6 +7,7 @@ import argparse
import asyncio import asyncio
import shutil import shutil
import time import time
from pathlib import Path
from src.audio_separation import DEFAULT_MIX_MODE from src.audio_separation import DEFAULT_MIX_MODE
from src.core_utils import ConfigurationError from src.core_utils import ConfigurationError
@@ -28,7 +29,11 @@ Examples:
""", """,
) )
parser.add_argument("url", help="YouTube video URL to subtitle") parser.add_argument("url", nargs="?", help="YouTube video URL to subtitle")
parser.add_argument(
"--input-file",
help="Path to a local MP4 file to dub instead of downloading from YouTube.",
)
parser.add_argument( parser.add_argument(
"--lang", "--lang",
"-l", "-l",
@@ -148,6 +153,24 @@ def _build_translation_config(args: argparse.Namespace) -> TranslationConfig:
) )
def _validate_source_args(args: argparse.Namespace) -> None:
"""Ensure exactly one source input is configured."""
if bool(args.url) == bool(args.input_file):
raise SystemExit("Provide either a YouTube URL or --input-file, but not both.")
def _prepare_local_video(input_file: str, media_module, cache_dir: Path) -> tuple[Path, Path]:
"""Validate a local MP4 and extract its audio for the shared pipeline."""
video_path = Path(input_file).expanduser().resolve()
if not video_path.exists():
raise FileNotFoundError(f"Input file not found: {video_path}")
if video_path.suffix.lower() != ".mp4":
raise ValueError("Only MP4 input files are supported.")
audio_path = cache_dir / f"{video_path.stem}_uploaded.wav"
return video_path, media_module.extract_audio_from_video(video_path, audio_path)
def _get_source_language_hint() -> str: def _get_source_language_hint() -> str:
"""Read an optional source language override from the environment.""" """Read an optional source language override from the environment."""
import os import os
@@ -190,6 +213,7 @@ def main() -> None:
"""Run the full YouTube Auto Dub pipeline.""" """Run the full YouTube Auto Dub pipeline."""
parser = build_parser() parser = build_parser()
args = parser.parse_args() args = parser.parse_args()
_validate_source_args(args)
import src.engines import src.engines
import src.media import src.media
@@ -233,11 +257,21 @@ def main() -> None:
) )
print(f"\n{'=' * 60}") print(f"\n{'=' * 60}")
print("STEP 1: DOWNLOADING CONTENT") print("STEP 1: PREPARING CONTENT")
print(f"{'=' * 60}") print(f"{'=' * 60}")
print(f"[*] Target URL: {args.url}")
print(f"[*] Target Language: {args.lang.upper()}") print(f"[*] Target Language: {args.lang.upper()}")
if args.input_file:
print(f"[*] Source MP4: {args.input_file}")
try:
video_path, audio_path = _prepare_local_video(args.input_file, src.media, src.engines.CACHE_DIR)
print(f"[+] Local video ready: {video_path}")
print(f"[+] Audio extracted: {audio_path}")
except Exception as exc:
print(f"\n[!] LOCAL INPUT FAILED: {exc}")
return
else:
print(f"[*] Target URL: {args.url}")
try: try:
video_path = src.youtube.downloadVideo( video_path = src.youtube.downloadVideo(
args.url, args.url,

View File

@@ -22,6 +22,29 @@ from src.engines import SAMPLE_RATE
FINAL_MIX_CHANNELS = 2 FINAL_MIX_CHANNELS = 2
def extract_audio_from_video(video_path: Path, output_path: Path) -> Path:
"""Extract mono WAV audio from a local video file for transcription."""
if not video_path.exists():
raise FileNotFoundError(f"Source video is missing: {video_path}")
output_path.parent.mkdir(parents=True, exist_ok=True)
cmd = [
'ffmpeg', '-y', '-v', 'error',
'-i', str(video_path),
'-vn',
'-acodec', 'pcm_s16le',
'-ar', str(SAMPLE_RATE),
'-ac', '1',
str(output_path),
]
subprocess.run(cmd, check=True, timeout=None)
if not output_path.exists() or output_path.stat().st_size < 1024:
raise RuntimeError(f"Audio extraction did not create a usable WAV file: {output_path}")
return output_path
def _build_subtitle_filter(subtitle_path: Path) -> str: def _build_subtitle_filter(subtitle_path: Path) -> str:
"""Build a Windows-safe FFmpeg subtitles filter expression.""" """Build a Windows-safe FFmpeg subtitles filter expression."""
escaped_path = str(subtitle_path.resolve()).replace("\\", "/").replace(":", "\\:") escaped_path = str(subtitle_path.resolve()).replace("\\", "/").replace(":", "\\:")

View File

@@ -4,7 +4,7 @@ from __future__ import annotations
from src.audio_separation import DEFAULT_MIX_MODE from src.audio_separation import DEFAULT_MIX_MODE
from main import _build_translation_config, build_parser from main import _build_translation_config, _validate_source_args, build_parser
def test_parser_accepts_lmstudio_flags(): def test_parser_accepts_lmstudio_flags():
@@ -69,3 +69,37 @@ def test_parser_defaults_to_instrumental_only_mix_mode():
args = parser.parse_args(["https://youtube.com/watch?v=demo"]) args = parser.parse_args(["https://youtube.com/watch?v=demo"])
assert args.mix_mode == DEFAULT_MIX_MODE assert args.mix_mode == DEFAULT_MIX_MODE
def test_parser_accepts_local_input_file_without_url():
parser = build_parser()
args = parser.parse_args(["--input-file", "demo.mp4", "--lang", "fr"])
assert args.url is None
assert args.input_file == "demo.mp4"
assert args.lang == "fr"
def test_validate_source_args_rejects_missing_source():
parser = build_parser()
args = parser.parse_args([])
try:
_validate_source_args(args)
except SystemExit as exc:
assert "Provide either" in str(exc)
else:
raise AssertionError("Expected SystemExit for missing source")
def test_validate_source_args_rejects_two_sources():
parser = build_parser()
args = parser.parse_args(["https://youtube.com/watch?v=demo", "--input-file", "demo.mp4"])
try:
_validate_source_args(args)
except SystemExit as exc:
assert "not both" in str(exc)
else:
raise AssertionError("Expected SystemExit for two sources")

View File

@@ -39,6 +39,20 @@ def test_build_pipeline_command_accepts_optional_settings():
assert "--gpu" in command assert "--gpu" in command
def test_build_pipeline_command_accepts_uploaded_mp4():
command = build_pipeline_command(
{
"input_file": "C:\\videos\\demo.mp4",
"lang": "de",
}
)
assert "https://youtube.com/watch?v=demo" not in command
assert "--input-file" in command
assert command[command.index("--input-file") + 1] == "C:\\videos\\demo.mp4"
assert command[command.index("--lang") + 1] == "de"
def test_create_app_builds_gradio_blocks(): def test_create_app_builds_gradio_blocks():
app = create_app() app = create_app()

View File

@@ -8,6 +8,7 @@ from datetime import datetime, timezone
import json import json
from pathlib import Path from pathlib import Path
import os import os
import shutil
import subprocess import subprocess
import sys import sys
import threading import threading
@@ -28,6 +29,7 @@ from src.translation import (
BASE_DIR = Path(__file__).resolve().parent BASE_DIR = Path(__file__).resolve().parent
LOG_DIR = BASE_DIR / "logs" / "gradio" LOG_DIR = BASE_DIR / "logs" / "gradio"
SETTINGS_FILE = BASE_DIR / ".cache" / "web_settings.json" SETTINGS_FILE = BASE_DIR / ".cache" / "web_settings.json"
UPLOAD_DIR = BASE_DIR / ".cache" / "uploads"
@dataclass @dataclass
@@ -107,12 +109,15 @@ def build_pipeline_command(form: dict[str, str | bool]) -> list[str]:
command = [ command = [
sys.executable, sys.executable,
str(BASE_DIR / "main.py"), str(BASE_DIR / "main.py"),
args.url,
"--lang", "--lang",
args.lang, args.lang,
"--mix-mode", "--mix-mode",
args.mix_mode, args.mix_mode,
] ]
if args.url:
command.insert(2, args.url)
if args.input_file:
command.extend(["--input-file", args.input_file])
if args.translation_backend: if args.translation_backend:
command.extend(["--translation-backend", args.translation_backend]) command.extend(["--translation-backend", args.translation_backend])
@@ -135,10 +140,15 @@ def build_pipeline_command(form: dict[str, str | bool]) -> list[str]:
def _form_to_cli_args(form: dict[str, str | bool]) -> list[str]: def _form_to_cli_args(form: dict[str, str | bool]) -> list[str]:
url = (form.get("url") or "").strip() url = (form.get("url") or "").strip()
if not url: input_file = (form.get("input_file") or "").strip()
raise ValueError("A YouTube URL is required.") if not url and not input_file:
raise ValueError("A YouTube URL or uploaded MP4 is required.")
if url and input_file:
raise ValueError("Use either a YouTube URL or uploaded MP4, not both.")
cli_args = [url] cli_args = [url] if url else []
if input_file:
cli_args.extend(["--input-file", input_file])
field_flags = { field_flags = {
"lang": "--lang", "lang": "--lang",
"browser": "--browser", "browser": "--browser",
@@ -168,6 +178,24 @@ def _form_to_cli_args(form: dict[str, str | bool]) -> list[str]:
return cli_args return cli_args
def _stage_uploaded_mp4(uploaded_file: str | None) -> str:
if not uploaded_file:
return ""
source_path = Path(uploaded_file)
if source_path.suffix.lower() != ".mp4":
raise ValueError("Only MP4 uploads are supported.")
if not source_path.exists():
raise FileNotFoundError(f"Uploaded file not found: {source_path}")
safe_stem = "".join(char if char.isalnum() or char in {"-", "_"} else "_" for char in source_path.stem)
staged_name = f"{uuid.uuid4().hex[:12]}_{safe_stem or 'upload'}.mp4"
UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
staged_path = UPLOAD_DIR / staged_name
shutil.copy2(source_path, staged_path)
return str(staged_path)
def _format_job_status(job: DubJob | None) -> str: def _format_job_status(job: DubJob | None) -> str:
if job is None: if job is None:
return "Ready" return "Ready"
@@ -234,6 +262,7 @@ def _output_choices() -> list[str]:
def _start_job( def _start_job(
url: str, url: str,
uploaded_mp4: str | None,
lang: str, lang: str,
whisper_model: str, whisper_model: str,
mix_mode: str, mix_mode: str,
@@ -248,8 +277,15 @@ def _start_job(
base_url = (lmstudio_base_url or "").strip() or saved_settings["base_url"] base_url = (lmstudio_base_url or "").strip() or saved_settings["base_url"]
api_key = (lmstudio_api_key or "").strip() or saved_settings["api_key"] api_key = (lmstudio_api_key or "").strip() or saved_settings["api_key"]
model = (lmstudio_model or "").strip() or saved_settings["model"] model = (lmstudio_model or "").strip() or saved_settings["model"]
try:
input_file = _stage_uploaded_mp4(uploaded_mp4)
except (OSError, ValueError) as exc:
message = str(exc) or "Invalid uploaded MP4."
return "", message, message, gr.update(choices=_output_choices())
form = { form = {
"url": url, "url": url,
"input_file": input_file,
"lang": lang, "lang": lang,
"whisper_model": whisper_model, "whisper_model": whisper_model,
"mix_mode": mix_mode, "mix_mode": mix_mode,
@@ -323,6 +359,11 @@ def create_app() -> gr.Blocks:
with gr.Row(): with gr.Row():
with gr.Column(scale=5): with gr.Column(scale=5):
url = gr.Textbox(label="YouTube URL", placeholder="https://www.youtube.com/watch?v=...") url = gr.Textbox(label="YouTube URL", placeholder="https://www.youtube.com/watch?v=...")
uploaded_mp4 = gr.File(
label="Upload MP4",
file_types=[".mp4"],
type="filepath",
)
with gr.Row(): with gr.Row():
lang = gr.Textbox(label="Target Language", value="es", max_lines=1) lang = gr.Textbox(label="Target Language", value="es", max_lines=1)
whisper_model = gr.Dropdown( whisper_model = gr.Dropdown(
@@ -386,6 +427,7 @@ def create_app() -> gr.Blocks:
inputs = [ inputs = [
url, url,
uploaded_mp4,
lang, lang,
whisper_model, whisper_model,
mix_mode, mix_mode,