fixes #103: responses api max_output_tokens bug

2026-03-23 23:13:58 +05:00
parent 8754203ec6
commit da0e3c3176
9 changed files with 132 additions and 331 deletions
--- a/chatmock/cli.py
+++ b/chatmock/cli.py
@@ -284,7 +284,7 @@ def cmd_serve(
        default_web_search=default_web_search,
    )

-    app.run(host=host, debug=False, use_reloader=False, port=port, threaded=True)
+    app.run(host=host, use_reloader=False, port=port, threaded=True)
    return 0


--- a/chatmock/responses_api.py
+++ b/chatmock/responses_api.py
@@ -88,6 +88,7 @@ def normalize_responses_payload(

    normalized = dict(payload)
    normalized["model"] = normalized_model
+    normalized.pop("max_output_tokens", None)

    if "input" in normalized:
        normalized["input"] = canonicalize_responses_input(normalized.get("input"))
--- a/chatmock/routes_ollama.py
+++ b/chatmock/routes_ollama.py
@@ -250,7 +250,7 @@ def ollama_chat() -> Response:
    input_items = convert_chat_messages_to_responses_input(messages)

    model_reasoning = extract_reasoning_from_model_name(model)
-    normalized_model = normalize_model_name(model)
+    normalized_model = normalize_model_name(model, current_app.config.get("DEBUG_MODEL"))
    service_tier_resolution = resolve_service_tier(
        normalized_model,
        request_fast_mode=payload.get("fast_mode"),
@@ -306,7 +306,7 @@ def ollama_chat() -> Response:
            base_tools_only = convert_tools_chat_to_responses(normalize_ollama_tools(tools_req))
            safe_choice = payload.get("tool_choice", "auto")
            upstream2, err2 = start_upstream_request(
-                normalize_model_name(model),
+                normalize_model_name(model, current_app.config.get("DEBUG_MODEL")),
                input_items,
                instructions=BASE_INSTRUCTIONS,
                tools=base_tools_only,
@@ -570,7 +570,7 @@ def ollama_chat() -> Response:
            full_text = f"<think>{rtxt}</think>" + (full_text or "")

    out_json = {
-        "model": normalize_model_name(model),
+        "model": normalize_model_name(model, current_app.config.get("DEBUG_MODEL")),
        "created_at": created_at,
        "message": {"role": "assistant", "content": full_text, **({"tool_calls": tool_calls} if tool_calls else {})},
        "done": True,
--- a/chatmock/routes_openai.py
+++ b/chatmock/routes_openai.py
@@ -109,7 +109,6 @@ def chat_completions() -> Response:
    reasoning_effort = current_app.config.get("REASONING_EFFORT", "medium")
    reasoning_summary = current_app.config.get("REASONING_SUMMARY", "auto")
    reasoning_compat = current_app.config.get("REASONING_COMPAT", "think-tags")
-    debug_model = current_app.config.get("DEBUG_MODEL")

    raw = request.get_data(cache=True, as_text=True) or ""
    if verbose:
@@ -129,7 +128,7 @@ def chat_completions() -> Response:
            return jsonify(err), 400

    requested_model = payload.get("model")
-    model = normalize_model_name(requested_model, debug_model)
+    model = normalize_model_name(requested_model, current_app.config.get("DEBUG_MODEL"))
    messages = payload.get("messages")
    if messages is None and isinstance(payload.get("prompt"), str):
        messages = [{"role": "user", "content": payload.get("prompt") or ""}]
@@ -413,7 +412,6 @@ def chat_completions() -> Response:
 def completions() -> Response:
    verbose = bool(current_app.config.get("VERBOSE"))
    verbose_obfuscation = bool(current_app.config.get("VERBOSE_OBFUSCATION"))
-    debug_model = current_app.config.get("DEBUG_MODEL")
    reasoning_effort = current_app.config.get("REASONING_EFFORT", "medium")
    reasoning_summary = current_app.config.get("REASONING_SUMMARY", "auto")

@@ -432,7 +430,7 @@ def completions() -> Response:
        return jsonify(err), 400

    requested_model = payload.get("model")
-    model = normalize_model_name(requested_model, debug_model)
+    model = normalize_model_name(requested_model, current_app.config.get("DEBUG_MODEL"))
    prompt = payload.get("prompt")
    if isinstance(prompt, list):
        prompt = "".join([p if isinstance(p, str) else "" for p in prompt])