From 803f532ff3aecb577af1746d68a2ae707a612e8b Mon Sep 17 00:00:00 2001
From: oimwiodev <oimwiodev@outlook.com>
Date: Sun, 24 May 2026 16:44:17 +0100
Subject: [PATCH] Retry transient TTS synthesis failures

---
 src/engines.py          | 41 ++++++++++++++++++++++++-----------
 tests/test_tts_retry.py | 48 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 77 insertions(+), 12 deletions(-)
 create mode 100644 tests/test_tts_retry.py

diff --git a/src/engines.py b/src/engines.py
index aa17712..4e5caef 100644
--- a/src/engines.py
+++ b/src/engines.py
@@ -53,6 +53,8 @@ for directory_path in [CACHE_DIR, OUTPUT_DIR, TEMP_DIR]:
 # Audio processing settings
 SAMPLE_RATE = 24000
 AUDIO_CHANNELS = 1
+DEFAULT_TTS_MAX_RETRIES = int(os.getenv("TTS_MAX_RETRIES", "4"))
+DEFAULT_TTS_RETRY_BACKOFF_SECONDS = float(os.getenv("TTS_RETRY_BACKOFF_SECONDS", "2.0"))
 
 def _select_optimal_whisper_model(device: str = "cpu") -> str:
     """Select optimal Whisper model based on available VRAM and device.
@@ -487,22 +489,37 @@ class Engine(PipelineComponent):
     ) -> None:
         if not text.strip(): raise ValueError("Text empty")
         out_path.parent.mkdir(parents=True, exist_ok=True)
-        
-        try:
+
+        last_error: Optional[Exception] = None
+        for attempt in range(1, DEFAULT_TTS_MAX_RETRIES + 1):
             lang_cfg = self._getLangConfig(target_lang)
             voice_pool = self.config_manager.getVoicePool(target_lang, gender)
             voice = voice_pool[0] if voice_pool else DEFAULT_VOICE
 
-            communicate = edge_tts.Communicate(text, voice=voice, rate=rate)
-            await communicate.save(str(out_path))
-            
-            if not out_path.exists() or out_path.stat().st_size < 1024:
-                raise RuntimeError("TTS file invalid")
-                
-        except Exception as e:
-            if out_path.exists(): out_path.unlink(missing_ok=True)
-            _handleError(e, "TTS synthesis")
-            raise TTSError(f"TTS failed: {e}") from e
+            try:
+                communicate = edge_tts.Communicate(text, voice=voice, rate=rate)
+                await communicate.save(str(out_path))
+
+                if not out_path.exists() or out_path.stat().st_size < 1024:
+                    raise RuntimeError("TTS file invalid")
+
+                return
+            except Exception as exc:
+                last_error = exc
+                if out_path.exists():
+                    out_path.unlink(missing_ok=True)
+
+                if attempt < DEFAULT_TTS_MAX_RETRIES:
+                    wait_time = DEFAULT_TTS_RETRY_BACKOFF_SECONDS * attempt
+                    print(
+                        f"[!] TTS synthesis failed "
+                        f"(attempt {attempt}/{DEFAULT_TTS_MAX_RETRIES}): {exc}. "
+                        f"Retrying in {wait_time:.1f}s..."
+                    )
+                    await asyncio.sleep(wait_time)
+
+        _handleError(last_error or RuntimeError("unknown TTS failure"), "TTS synthesis")
+        raise TTSError(f"TTS failed after {DEFAULT_TTS_MAX_RETRIES} attempts: {last_error}") from last_error
 
 
 def smartChunk(segments: List[Dict]) -> List[Dict]:
diff --git a/tests/test_tts_retry.py b/tests/test_tts_retry.py
new file mode 100644
index 0000000..250e485
--- /dev/null
+++ b/tests/test_tts_retry.py
@@ -0,0 +1,48 @@
+"""Tests for transient Edge TTS retry behavior."""
+
+from __future__ import annotations
+
+import asyncio
+
+from src.engines import Engine
+from src.translation import TranslationConfig
+
+
+def test_synthesize_retries_transient_edge_tts_failure(tmp_path, monkeypatch):
+    calls = {"count": 0}
+
+    class FakeCommunicate:
+        def __init__(self, text, voice, rate):
+            self.text = text
+            self.voice = voice
+            self.rate = rate
+
+        async def save(self, out_path):
+            calls["count"] += 1
+            if calls["count"] == 1:
+                raise RuntimeError("transient 503")
+            with open(out_path, "wb") as audio_file:
+                audio_file.write(b"0" * 2048)
+
+    async def no_sleep(_seconds):
+        return None
+
+    monkeypatch.setattr("src.engines.edge_tts.Communicate", FakeCommunicate)
+    monkeypatch.setattr("src.engines.asyncio.sleep", no_sleep)
+    monkeypatch.setattr("src.engines.DEFAULT_TTS_MAX_RETRIES", 2)
+
+    engine = Engine(
+        "cpu",
+        translation_config=TranslationConfig(
+            base_url="http://127.0.0.1:1234/v1",
+            api_key="test-key",
+            model="test-model",
+        ),
+    )
+    out_path = tmp_path / "tts.mp3"
+
+    asyncio.run(engine.synthesize("Bonjour", "fr", out_path))
+
+    assert calls["count"] == 2
+    assert out_path.exists()
+    assert out_path.stat().st_size == 2048