baseline: initial working version

2026-03-30 18:18:41 +01:00
commit 27cfe2a3f5
19 changed files with 3878 additions and 0 deletions
--- a/tests/test_translation.py
+++ b/tests/test_translation.py
@@ -0,0 +1,136 @@
+"""Tests for the LM Studio translation layer."""
+
+from __future__ import annotations
+
+import httpx
+import pytest
+
+from src.core_utils import TranslationError
+from src.translation import LMStudioTranslator, TranslationConfig
+
+
+def _mock_client(handler):
+    return httpx.Client(transport=httpx.MockTransport(handler))
+
+
+def test_translation_config_normalizes_base_url():
+    config = TranslationConfig.from_env(base_url="http://127.0.0.1:1234")
+
+    assert config.base_url == "http://127.0.0.1:1234/v1"
+    assert config.chat_completions_url == "http://127.0.0.1:1234/v1/chat/completions"
+    assert config.model == "gemma-3-4b-it"
+
+
+def test_build_payload_includes_model_and_prompt():
+    translator = LMStudioTranslator(TranslationConfig(), client=_mock_client(lambda request: None))
+
+    payload = translator.build_payload("Hello world", "en", "es")
+
+    assert payload["model"] == "gemma-3-4b-it"
+    assert payload["messages"][0]["role"] == "system"
+    assert "Translate the user-provided text from en to es." in payload["messages"][0]["content"]
+    assert payload["messages"][1]["content"] == "Hello world"
+
+
+def test_translate_segments_preserves_order_and_blank_segments():
+    def handler(request: httpx.Request) -> httpx.Response:
+        text = request.read().decode("utf-8")
+        if "first" in text:
+            content = "primero"
+        elif "third" in text:
+            content = "tercero"
+        else:
+            content = "desconocido"
+        return httpx.Response(200, json={"choices": [{"message": {"content": content}}]})
+
+    translator = LMStudioTranslator(TranslationConfig(), client=_mock_client(handler))
+
+    translated = translator.translate_segments(["first", "", "third"], target_language="es", source_language="en")
+
+    assert translated == ["primero", "", "tercero"]
+
+
+def test_retry_on_transient_http_error_then_succeeds():
+    attempts = {"count": 0}
+
+    def handler(request: httpx.Request) -> httpx.Response:
+        attempts["count"] += 1
+        if attempts["count"] == 1:
+            return httpx.Response(503, json={"error": {"message": "busy"}})
+        return httpx.Response(200, json={"choices": [{"message": {"content": "hola"}}]})
+
+    translator = LMStudioTranslator(
+        TranslationConfig(max_retries=2),
+        client=_mock_client(handler),
+        sleeper=lambda _: None,
+    )
+
+    translated = translator.translate_text("hello", target_language="es", source_language="en")
+
+    assert translated == "hola"
+    assert attempts["count"] == 2
+
+
+def test_parse_response_content_rejects_empty_content():
+    with pytest.raises(TranslationError, match="empty translation"):
+        LMStudioTranslator.parse_response_content({"choices": [{"message": {"content": "  "}}]})
+
+
+def test_translate_text_raises_on_malformed_response():
+    def handler(request: httpx.Request) -> httpx.Response:
+        return httpx.Response(200, json={"choices": []})
+
+    translator = LMStudioTranslator(TranslationConfig(), client=_mock_client(handler))
+
+    with pytest.raises(TranslationError, match="did not contain a chat completion message"):
+        translator.translate_text("hello", target_language="es", source_language="en")
+
+
+def test_translate_text_falls_back_to_user_only_prompt_for_template_error():
+    attempts = {"count": 0}
+
+    def handler(request: httpx.Request) -> httpx.Response:
+        attempts["count"] += 1
+        body = request.read().decode("utf-8")
+        if attempts["count"] == 1:
+            return httpx.Response(
+                400,
+                text='{"error":"Error rendering prompt with jinja template: \\"Conversations must start with a user prompt.\\""}',
+            )
+        assert '"role":"user"' in body
+        return httpx.Response(200, json={"choices": [{"message": {"content": "hola"}}]})
+
+    translator = LMStudioTranslator(TranslationConfig(), client=_mock_client(handler))
+
+    translated = translator.translate_text("hello", target_language="es", source_language="en")
+
+    assert translated == "hola"
+    assert attempts["count"] == 2
+
+
+def test_translate_text_falls_back_to_structured_prompt_for_custom_template():
+    attempts = {"count": 0}
+
+    def handler(request: httpx.Request) -> httpx.Response:
+        attempts["count"] += 1
+        body = request.read().decode("utf-8")
+        if attempts["count"] == 1:
+            return httpx.Response(
+                400,
+                text='{"error":"Error rendering prompt with jinja template: \\"Conversations must start with a user prompt.\\""}',
+            )
+        if attempts["count"] == 2:
+            return httpx.Response(
+                400,
+                text='{"error":"Error rendering prompt with jinja template: \\"User role must provide `content` as an iterable with exactly one item. That item must be a mapping(type:\'text\' | \'image\', source_lang_code:string, target_lang_code:string, text:string | none, image:string | none).\\""}',
+            )
+        assert '"source_lang_code":"en"' in body
+        assert '"target_lang_code":"es"' in body
+        return httpx.Response(200, json={"choices": [{"message": {"content": "hola"}}]})
+
+    translator = LMStudioTranslator(TranslationConfig(), client=_mock_client(handler))
+
+    translated = translator.translate_text("hello", target_language="es", source_language="en")
+
+    assert translated == "hola"
+    assert attempts["count"] == 3