"""Focused tests for vocal-bleed reduction in the final dub mix.""" from __future__ import annotations import math import shutil import subprocess from pathlib import Path import numpy as np import pytest import soundfile as sf from src.audio_separation import AudioSeparator, DEFAULT_MIX_MODE from src import media FFMPEG_READY = shutil.which("ffmpeg") is not None and shutil.which("ffprobe") is not None def _sine_wave(frequency: float, duration: float, sample_rate: int, amplitude: float) -> np.ndarray: t = np.linspace(0.0, duration, int(sample_rate * duration), endpoint=False) return (amplitude * np.sin(2.0 * math.pi * frequency * t)).astype(np.float32) def _run(cmd: list[str]) -> None: subprocess.run(cmd, check=True, capture_output=True, text=True, timeout=None) def _extract_peak(signal: np.ndarray, sample_rate: int, frequency: float) -> float: window = np.hanning(len(signal)) spectrum = np.fft.rfft(signal * window) freqs = np.fft.rfftfreq(len(signal), d=1.0 / sample_rate) index = int(np.argmin(np.abs(freqs - frequency))) return float(np.abs(spectrum[index])) @pytest.mark.skipif(not FFMPEG_READY, reason="FFmpeg is required for audio pipeline tests") def test_default_mix_prefers_instrumental_bed_and_keeps_dub_prominent(tmp_path: Path): sample_rate = 24_000 duration = 2.0 centered_voice = _sine_wave(440.0, duration, sample_rate, amplitude=0.35) ambience_left = _sine_wave(660.0, duration, sample_rate, amplitude=0.18) ambience_right = -ambience_left original_stereo = np.column_stack( [centered_voice + ambience_left, centered_voice + ambience_right] ) original_audio = tmp_path / "original.wav" sf.write(original_audio, original_stereo, sample_rate) dub_audio = tmp_path / "dub.wav" sf.write(dub_audio, _sine_wave(1000.0, duration, sample_rate, amplitude=0.30), sample_rate) manifest_path = tmp_path / "dub_manifest.txt" manifest_path.write_text(f"file '{dub_audio.resolve().as_posix()}'\n", encoding="utf-8") video_path = tmp_path / "video.mp4" _run( [ "ffmpeg", "-y", "-v", "error", "-f", "lavfi", "-i", f"color=c=black:s=320x240:d={duration}", "-i", str(original_audio), "-c:v", "libx264", "-pix_fmt", "yuv420p", "-c:a", "aac", "-shortest", str(video_path), ] ) separator = AudioSeparator() separation = separator.separate_audio(original_audio, tmp_path) assert separation.instrumental_path is not None assert separation.instrumental_path.exists() instrumental_audio, instrumental_rate = sf.read(separation.instrumental_path, always_2d=True) original_audio_data, _ = sf.read(original_audio, always_2d=True) centered_before = _extract_peak(original_audio_data[:, 0], sample_rate, 440.0) centered_after = _extract_peak(instrumental_audio[:, 0], instrumental_rate, 440.0) ambience_after = _extract_peak(instrumental_audio[:, 0], instrumental_rate, 660.0) assert centered_after < centered_before * 0.15 assert ambience_after > 0.01 output_video = tmp_path / "dubbed.mp4" media.render_video( video_path=video_path, concat_file=manifest_path, output_path=output_video, background_audio_path=separation.instrumental_path, mix_mode=DEFAULT_MIX_MODE, background_volume=separation.recommended_bg_volume, ) mixed_audio = tmp_path / "mixed.wav" _run( [ "ffmpeg", "-y", "-v", "error", "-i", str(output_video), "-vn", "-c:a", "pcm_s16le", str(mixed_audio), ] ) rendered_audio, rendered_rate = sf.read(mixed_audio, always_2d=True) rendered_channel = rendered_audio[:, 0] dub_peak = _extract_peak(rendered_channel, rendered_rate, 1000.0) residual_original_peak = _extract_peak(rendered_channel, rendered_rate, 440.0) ambience_peak = _extract_peak(rendered_channel, rendered_rate, 660.0) assert dub_peak > residual_original_peak * 4 assert ambience_peak > residual_original_peak @pytest.mark.skipif(not FFMPEG_READY, reason="FFmpeg is required for audio pipeline tests") def test_separator_warns_and_returns_no_bed_for_mono_input(tmp_path: Path): mono_audio = tmp_path / "mono.wav" sf.write(mono_audio, _sine_wave(440.0, 1.0, 24_000, amplitude=0.30), 24_000) result = AudioSeparator().separate_audio(mono_audio, tmp_path) assert result.instrumental_path is None assert result.warning is not None assert "mono" in result.warning.lower()