fixes #103: responses api max_output_tokens bug

This commit is contained in:
Game_Time
2026-03-23 23:13:58 +05:00
parent 8754203ec6
commit da0e3c3176
9 changed files with 132 additions and 331 deletions

View File

@@ -250,7 +250,7 @@ def ollama_chat() -> Response:
input_items = convert_chat_messages_to_responses_input(messages)
model_reasoning = extract_reasoning_from_model_name(model)
normalized_model = normalize_model_name(model)
normalized_model = normalize_model_name(model, current_app.config.get("DEBUG_MODEL"))
service_tier_resolution = resolve_service_tier(
normalized_model,
request_fast_mode=payload.get("fast_mode"),
@@ -306,7 +306,7 @@ def ollama_chat() -> Response:
base_tools_only = convert_tools_chat_to_responses(normalize_ollama_tools(tools_req))
safe_choice = payload.get("tool_choice", "auto")
upstream2, err2 = start_upstream_request(
normalize_model_name(model),
normalize_model_name(model, current_app.config.get("DEBUG_MODEL")),
input_items,
instructions=BASE_INSTRUCTIONS,
tools=base_tools_only,
@@ -570,7 +570,7 @@ def ollama_chat() -> Response:
full_text = f"<think>{rtxt}</think>" + (full_text or "")
out_json = {
"model": normalize_model_name(model),
"model": normalize_model_name(model, current_app.config.get("DEBUG_MODEL")),
"created_at": created_at,
"message": {"role": "assistant", "content": full_text, **({"tool_calls": tool_calls} if tool_calls else {})},
"done": True,