From 803f532ff3aecb577af1746d68a2ae707a612e8b Mon Sep 17 00:00:00 2001 From: oimwiodev Date: Sun, 24 May 2026 16:44:17 +0100 Subject: [PATCH] Retry transient TTS synthesis failures --- src/engines.py | 41 ++++++++++++++++++++++++----------- tests/test_tts_retry.py | 48 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+), 12 deletions(-) create mode 100644 tests/test_tts_retry.py diff --git a/src/engines.py b/src/engines.py index aa17712..4e5caef 100644 --- a/src/engines.py +++ b/src/engines.py @@ -53,6 +53,8 @@ for directory_path in [CACHE_DIR, OUTPUT_DIR, TEMP_DIR]: # Audio processing settings SAMPLE_RATE = 24000 AUDIO_CHANNELS = 1 +DEFAULT_TTS_MAX_RETRIES = int(os.getenv("TTS_MAX_RETRIES", "4")) +DEFAULT_TTS_RETRY_BACKOFF_SECONDS = float(os.getenv("TTS_RETRY_BACKOFF_SECONDS", "2.0")) def _select_optimal_whisper_model(device: str = "cpu") -> str: """Select optimal Whisper model based on available VRAM and device. @@ -487,22 +489,37 @@ class Engine(PipelineComponent): ) -> None: if not text.strip(): raise ValueError("Text empty") out_path.parent.mkdir(parents=True, exist_ok=True) - - try: + + last_error: Optional[Exception] = None + for attempt in range(1, DEFAULT_TTS_MAX_RETRIES + 1): lang_cfg = self._getLangConfig(target_lang) voice_pool = self.config_manager.getVoicePool(target_lang, gender) voice = voice_pool[0] if voice_pool else DEFAULT_VOICE - communicate = edge_tts.Communicate(text, voice=voice, rate=rate) - await communicate.save(str(out_path)) - - if not out_path.exists() or out_path.stat().st_size < 1024: - raise RuntimeError("TTS file invalid") - - except Exception as e: - if out_path.exists(): out_path.unlink(missing_ok=True) - _handleError(e, "TTS synthesis") - raise TTSError(f"TTS failed: {e}") from e + try: + communicate = edge_tts.Communicate(text, voice=voice, rate=rate) + await communicate.save(str(out_path)) + + if not out_path.exists() or out_path.stat().st_size < 1024: + raise RuntimeError("TTS file invalid") + + return + except Exception as exc: + last_error = exc + if out_path.exists(): + out_path.unlink(missing_ok=True) + + if attempt < DEFAULT_TTS_MAX_RETRIES: + wait_time = DEFAULT_TTS_RETRY_BACKOFF_SECONDS * attempt + print( + f"[!] TTS synthesis failed " + f"(attempt {attempt}/{DEFAULT_TTS_MAX_RETRIES}): {exc}. " + f"Retrying in {wait_time:.1f}s..." + ) + await asyncio.sleep(wait_time) + + _handleError(last_error or RuntimeError("unknown TTS failure"), "TTS synthesis") + raise TTSError(f"TTS failed after {DEFAULT_TTS_MAX_RETRIES} attempts: {last_error}") from last_error def smartChunk(segments: List[Dict]) -> List[Dict]: diff --git a/tests/test_tts_retry.py b/tests/test_tts_retry.py new file mode 100644 index 0000000..250e485 --- /dev/null +++ b/tests/test_tts_retry.py @@ -0,0 +1,48 @@ +"""Tests for transient Edge TTS retry behavior.""" + +from __future__ import annotations + +import asyncio + +from src.engines import Engine +from src.translation import TranslationConfig + + +def test_synthesize_retries_transient_edge_tts_failure(tmp_path, monkeypatch): + calls = {"count": 0} + + class FakeCommunicate: + def __init__(self, text, voice, rate): + self.text = text + self.voice = voice + self.rate = rate + + async def save(self, out_path): + calls["count"] += 1 + if calls["count"] == 1: + raise RuntimeError("transient 503") + with open(out_path, "wb") as audio_file: + audio_file.write(b"0" * 2048) + + async def no_sleep(_seconds): + return None + + monkeypatch.setattr("src.engines.edge_tts.Communicate", FakeCommunicate) + monkeypatch.setattr("src.engines.asyncio.sleep", no_sleep) + monkeypatch.setattr("src.engines.DEFAULT_TTS_MAX_RETRIES", 2) + + engine = Engine( + "cpu", + translation_config=TranslationConfig( + base_url="http://127.0.0.1:1234/v1", + api_key="test-key", + model="test-model", + ), + ) + out_path = tmp_path / "tts.mp3" + + asyncio.run(engine.synthesize("Bonjour", "fr", out_path)) + + assert calls["count"] == 2 + assert out_path.exists() + assert out_path.stat().st_size == 2048