Retry transient TTS synthesis failures

This commit is contained in:
2026-05-24 16:44:17 +01:00
parent f1e72f27e2
commit 803f532ff3
2 changed files with 77 additions and 12 deletions

View File

@@ -53,6 +53,8 @@ for directory_path in [CACHE_DIR, OUTPUT_DIR, TEMP_DIR]:
# Audio processing settings # Audio processing settings
SAMPLE_RATE = 24000 SAMPLE_RATE = 24000
AUDIO_CHANNELS = 1 AUDIO_CHANNELS = 1
DEFAULT_TTS_MAX_RETRIES = int(os.getenv("TTS_MAX_RETRIES", "4"))
DEFAULT_TTS_RETRY_BACKOFF_SECONDS = float(os.getenv("TTS_RETRY_BACKOFF_SECONDS", "2.0"))
def _select_optimal_whisper_model(device: str = "cpu") -> str: def _select_optimal_whisper_model(device: str = "cpu") -> str:
"""Select optimal Whisper model based on available VRAM and device. """Select optimal Whisper model based on available VRAM and device.
@@ -487,22 +489,37 @@ class Engine(PipelineComponent):
) -> None: ) -> None:
if not text.strip(): raise ValueError("Text empty") if not text.strip(): raise ValueError("Text empty")
out_path.parent.mkdir(parents=True, exist_ok=True) out_path.parent.mkdir(parents=True, exist_ok=True)
try: last_error: Optional[Exception] = None
for attempt in range(1, DEFAULT_TTS_MAX_RETRIES + 1):
lang_cfg = self._getLangConfig(target_lang) lang_cfg = self._getLangConfig(target_lang)
voice_pool = self.config_manager.getVoicePool(target_lang, gender) voice_pool = self.config_manager.getVoicePool(target_lang, gender)
voice = voice_pool[0] if voice_pool else DEFAULT_VOICE voice = voice_pool[0] if voice_pool else DEFAULT_VOICE
communicate = edge_tts.Communicate(text, voice=voice, rate=rate) try:
await communicate.save(str(out_path)) communicate = edge_tts.Communicate(text, voice=voice, rate=rate)
await communicate.save(str(out_path))
if not out_path.exists() or out_path.stat().st_size < 1024:
raise RuntimeError("TTS file invalid") if not out_path.exists() or out_path.stat().st_size < 1024:
raise RuntimeError("TTS file invalid")
except Exception as e:
if out_path.exists(): out_path.unlink(missing_ok=True) return
_handleError(e, "TTS synthesis") except Exception as exc:
raise TTSError(f"TTS failed: {e}") from e last_error = exc
if out_path.exists():
out_path.unlink(missing_ok=True)
if attempt < DEFAULT_TTS_MAX_RETRIES:
wait_time = DEFAULT_TTS_RETRY_BACKOFF_SECONDS * attempt
print(
f"[!] TTS synthesis failed "
f"(attempt {attempt}/{DEFAULT_TTS_MAX_RETRIES}): {exc}. "
f"Retrying in {wait_time:.1f}s..."
)
await asyncio.sleep(wait_time)
_handleError(last_error or RuntimeError("unknown TTS failure"), "TTS synthesis")
raise TTSError(f"TTS failed after {DEFAULT_TTS_MAX_RETRIES} attempts: {last_error}") from last_error
def smartChunk(segments: List[Dict]) -> List[Dict]: def smartChunk(segments: List[Dict]) -> List[Dict]:

48
tests/test_tts_retry.py Normal file
View File

@@ -0,0 +1,48 @@
"""Tests for transient Edge TTS retry behavior."""
from __future__ import annotations
import asyncio
from src.engines import Engine
from src.translation import TranslationConfig
def test_synthesize_retries_transient_edge_tts_failure(tmp_path, monkeypatch):
calls = {"count": 0}
class FakeCommunicate:
def __init__(self, text, voice, rate):
self.text = text
self.voice = voice
self.rate = rate
async def save(self, out_path):
calls["count"] += 1
if calls["count"] == 1:
raise RuntimeError("transient 503")
with open(out_path, "wb") as audio_file:
audio_file.write(b"0" * 2048)
async def no_sleep(_seconds):
return None
monkeypatch.setattr("src.engines.edge_tts.Communicate", FakeCommunicate)
monkeypatch.setattr("src.engines.asyncio.sleep", no_sleep)
monkeypatch.setattr("src.engines.DEFAULT_TTS_MAX_RETRIES", 2)
engine = Engine(
"cpu",
translation_config=TranslationConfig(
base_url="http://127.0.0.1:1234/v1",
api_key="test-key",
model="test-model",
),
)
out_path = tmp_path / "tts.mp3"
asyncio.run(engine.synthesize("Bonjour", "fr", out_path))
assert calls["count"] == 2
assert out_path.exists()
assert out_path.stat().st_size == 2048