fix(audio): remove vocal bleed with instrumental-only mix

This commit is contained in:
2026-03-30 18:44:48 +01:00
parent 348369c69a
commit 3c9b3c8090
6 changed files with 534 additions and 40 deletions

44
main.py
View File

@@ -8,6 +8,7 @@ import asyncio
import shutil
import time
from src.audio_separation import DEFAULT_MIX_MODE
from src.core_utils import ConfigurationError
from src.translation import TranslationConfig
@@ -54,6 +55,15 @@ Examples:
"-wm",
help="Whisper model to use (tiny, base, small, medium, large-v3). Default: auto-select based on VRAM",
)
parser.add_argument(
"--mix-mode",
default=DEFAULT_MIX_MODE,
choices=[DEFAULT_MIX_MODE, "original-audio", "dub-only"],
help=(
"Final audio bed for the dubbed output. "
"Default 'instrumental-only' uses a no-vocals bed when separation succeeds."
),
)
parser.add_argument(
"--translation-backend",
default="lmstudio",
@@ -309,7 +319,36 @@ def main() -> None:
print(f"[+] Subtitles generated: {subtitle_path}")
print(f"\n{'=' * 60}")
print("STEP 7: FINAL VIDEO RENDERING")
print("STEP 7: AUDIO BED PREPARATION")
print(f"{'=' * 60}")
effective_mix_mode = args.mix_mode
background_audio_path = None
background_volume = 0.10
if effective_mix_mode == DEFAULT_MIX_MODE:
print(f"[*] Preparing default {DEFAULT_MIX_MODE} mix bed...")
separation_result = engine.separate_audio(audio_path, src.engines.TEMP_DIR)
if separation_result.warning:
print(f"[!] WARNING: {separation_result.warning}")
if separation_result.instrumental_path:
background_audio_path = separation_result.instrumental_path
background_volume = separation_result.recommended_bg_volume
print(f"[+] Background stem ready: {background_audio_path}")
if separation_result.vocals_path:
print(f"[*] Original vocal stem isolated at: {separation_result.vocals_path}")
else:
effective_mix_mode = "dub-only"
print("[!] WARNING: Could not build a safe instrumental bed. Falling back to dub-only output.")
elif effective_mix_mode == "original-audio":
background_audio_path = audio_path
print("[!] WARNING: original-audio mix keeps the source speech bed and may reintroduce bleed.")
else:
print("[*] Dub-only mix selected. No source audio bed will be used.")
print(f"\n{'=' * 60}")
print("STEP 8: FINAL VIDEO RENDERING")
print(f"{'=' * 60}")
try:
@@ -328,6 +367,9 @@ def main() -> None:
concat_manifest_path,
final_output,
subtitle_path=subtitle_path,
background_audio_path=background_audio_path,
mix_mode=effective_mix_mode,
background_volume=background_volume,
)
if final_output.exists():