youtube-auto-dub/tests/test_translation.py

"""Tests for the LM Studio translation layer."""

from __future__ import annotations

import httpx
import pytest

from src.core_utils import TranslationError
from src.translation import LMStudioTranslator, TranslationConfig


def _mock_client(handler):
    return httpx.Client(transport=httpx.MockTransport(handler))


def test_translation_config_normalizes_base_url():
    config = TranslationConfig.from_env(base_url="http://127.0.0.1:1234")

    assert config.base_url == "http://127.0.0.1:1234/v1"
    assert config.chat_completions_url == "http://127.0.0.1:1234/v1/chat/completions"
    assert config.model == "gemma-3-4b-it"


def test_build_payload_includes_model_and_prompt():
    translator = LMStudioTranslator(TranslationConfig(), client=_mock_client(lambda request: None))

    payload = translator.build_payload("Hello world", "en", "es")

    assert payload["model"] == "gemma-3-4b-it"
    assert payload["messages"][0]["role"] == "system"
    assert "Translate the user-provided text from en to es." in payload["messages"][0]["content"]
    assert payload["messages"][1]["content"] == "Hello world"


def test_translate_segments_preserves_order_and_blank_segments():
    def handler(request: httpx.Request) -> httpx.Response:
        text = request.read().decode("utf-8")
        if "first" in text:
            content = "primero"
        elif "third" in text:
            content = "tercero"
        else:
            content = "desconocido"
        return httpx.Response(200, json={"choices": [{"message": {"content": content}}]})

    translator = LMStudioTranslator(TranslationConfig(), client=_mock_client(handler))

    translated = translator.translate_segments(["first", "", "third"], target_language="es", source_language="en")

    assert translated == ["primero", "", "tercero"]


def test_retry_on_transient_http_error_then_succeeds():
    attempts = {"count": 0}

    def handler(request: httpx.Request) -> httpx.Response:
        attempts["count"] += 1
        if attempts["count"] == 1:
            return httpx.Response(503, json={"error": {"message": "busy"}})
        return httpx.Response(200, json={"choices": [{"message": {"content": "hola"}}]})

    translator = LMStudioTranslator(
        TranslationConfig(max_retries=2),
        client=_mock_client(handler),
        sleeper=lambda _: None,
    )

    translated = translator.translate_text("hello", target_language="es", source_language="en")

    assert translated == "hola"
    assert attempts["count"] == 2


def test_parse_response_content_rejects_empty_content():
    with pytest.raises(TranslationError, match="empty translation"):
        LMStudioTranslator.parse_response_content({"choices": [{"message": {"content": "  "}}]})


def test_translate_text_raises_on_malformed_response():
    def handler(request: httpx.Request) -> httpx.Response:
        return httpx.Response(200, json={"choices": []})

    translator = LMStudioTranslator(TranslationConfig(), client=_mock_client(handler))

    with pytest.raises(TranslationError, match="did not contain a chat completion message"):
        translator.translate_text("hello", target_language="es", source_language="en")


def test_translate_text_falls_back_to_user_only_prompt_for_template_error():
    attempts = {"count": 0}

    def handler(request: httpx.Request) -> httpx.Response:
        attempts["count"] += 1
        body = request.read().decode("utf-8")
        if attempts["count"] == 1:
            return httpx.Response(
                400,
                text='{"error":"Error rendering prompt with jinja template: \\"Conversations must start with a user prompt.\\""}',
            )
        assert '"role":"user"' in body
        return httpx.Response(200, json={"choices": [{"message": {"content": "hola"}}]})

    translator = LMStudioTranslator(TranslationConfig(), client=_mock_client(handler))

    translated = translator.translate_text("hello", target_language="es", source_language="en")

    assert translated == "hola"
    assert attempts["count"] == 2


def test_translate_text_falls_back_to_structured_prompt_for_custom_template():
    attempts = {"count": 0}

    def handler(request: httpx.Request) -> httpx.Response:
        attempts["count"] += 1
        body = request.read().decode("utf-8")
        if attempts["count"] == 1:
            return httpx.Response(
                400,
                text='{"error":"Error rendering prompt with jinja template: \\"Conversations must start with a user prompt.\\""}',
            )
        if attempts["count"] == 2:
            return httpx.Response(
                400,
                text='{"error":"Error rendering prompt with jinja template: \\"User role must provide `content` as an iterable with exactly one item. That item must be a mapping(type:\'text\' | \'image\', source_lang_code:string, target_lang_code:string, text:string | none, image:string | none).\\""}',
            )
        assert '"source_lang_code":"en"' in body
        assert '"target_lang_code":"es"' in body
        return httpx.Response(200, json={"choices": [{"message": {"content": "hola"}}]})

    translator = LMStudioTranslator(TranslationConfig(), client=_mock_client(handler))

    translated = translator.translate_text("hello", target_language="es", source_language="en")

    assert translated == "hola"
    assert attempts["count"] == 3