feat: improve compatiblity with certain apps (#72)

* feat: enable modern packaging via pyproject.toml

uvx --from (...) chatmock should just work !

* feat(ollama): add version endpoint

* feat(logging): improve verbose diagnostics

* fix(stream): always send stop chunk
This commit is contained in:
Magniquick
2025-11-19 15:32:20 +05:30
committed by GitHub
parent a4b9ae9471
commit 13b1dddf75
11 changed files with 541 additions and 81 deletions

View File

@@ -19,6 +19,52 @@ from .utils import convert_chat_messages_to_responses_input, convert_tools_chat_
ollama_bp = Blueprint("ollama", __name__)
def _log_json(prefix: str, payload: Any) -> None:
try:
print(f"{prefix}\n{json.dumps(payload, indent=2, ensure_ascii=False)}")
except Exception:
try:
print(f"{prefix}\n{payload}")
except Exception:
pass
def _wrap_stream_logging(label: str, iterator, enabled: bool):
if not enabled:
return iterator
def _gen():
for chunk in iterator:
try:
text = (
chunk.decode("utf-8", errors="replace")
if isinstance(chunk, (bytes, bytearray))
else str(chunk)
)
print(f"{label}\n{text}")
except Exception:
pass
yield chunk
return _gen()
@ollama_bp.route("/api/version", methods=["GET"])
def ollama_version() -> Response:
if bool(current_app.config.get("VERBOSE")):
print("IN GET /api/version")
version = current_app.config.get("OLLAMA_VERSION", "0.12.10")
if not isinstance(version, str) or not version.strip():
version = "0.12.10"
payload = {"version": version}
resp = make_response(jsonify(payload), 200)
for k, v in build_cors_headers().items():
resp.headers.setdefault(k, v)
if bool(current_app.config.get("VERBOSE")):
_log_json("OUT GET /api/version", payload)
return resp
def _instructions_for_model(model: str) -> str:
base = current_app.config.get("BASE_INSTRUCTIONS", BASE_INSTRUCTIONS)
if model == "gpt-5-codex":
@@ -75,28 +121,34 @@ def ollama_tags() -> Response:
},
}
)
resp = make_response(jsonify({"models": models}), 200)
payload = {"models": models}
resp = make_response(jsonify(payload), 200)
for k, v in build_cors_headers().items():
resp.headers.setdefault(k, v)
if bool(current_app.config.get("VERBOSE")):
_log_json("OUT GET /api/tags", payload)
return resp
@ollama_bp.route("/api/show", methods=["POST"])
def ollama_show() -> Response:
verbose = bool(current_app.config.get("VERBOSE"))
raw_body = request.get_data(cache=True, as_text=True) or ""
if verbose:
try:
print("IN POST /api/show\n" + raw_body)
except Exception:
pass
try:
if verbose:
body_preview = (request.get_data(cache=True, as_text=True) or "")[:2000]
print("IN POST /api/show\n" + body_preview)
payload = json.loads(raw_body) if raw_body else (request.get_json(silent=True) or {})
except Exception:
pass
try:
payload = request.get_json(silent=True) or {}
except Exception:
payload = {}
model = payload.get("model")
if not isinstance(model, str) or not model.strip():
return jsonify({"error": "Model not found"}), 400
err = {"error": "Model not found"}
if verbose:
_log_json("OUT POST /api/show", err)
return jsonify(err), 400
v1_show_response = {
"modelfile": "# Modelfile generated by \"ollama show\"\n# To build a new Modelfile based on this one, replace the FROM line with:\n# FROM llava:latest\n\nFROM /models/blobs/sha256:placeholder\nTEMPLATE \"\"\"{{ .System }}\nUSER: {{ .Prompt }}\nASSISTANT: \"\"\"\nPARAMETER num_ctx 100000\nPARAMETER stop \"</s>\"\nPARAMETER stop \"USER:\"\nPARAMETER stop \"ASSISTANT:\"",
"parameters": "num_keep 24\nstop \"<|start_header_id|>\"\nstop \"<|end_header_id|>\"\nstop \"<|eot_id|>\"",
@@ -116,6 +168,8 @@ def ollama_show() -> Response:
},
"capabilities": ["completion", "vision", "tools", "thinking"],
}
if verbose:
_log_json("OUT POST /api/show", v1_show_response)
resp = make_response(jsonify(v1_show_response), 200)
for k, v in build_cors_headers().items():
resp.headers.setdefault(k, v)
@@ -132,10 +186,13 @@ def ollama_chat() -> Response:
try:
raw = request.get_data(cache=True, as_text=True) or ""
if verbose:
print("IN POST /api/chat\n" + (raw[:2000] if isinstance(raw, str) else ""))
print("IN POST /api/chat\n" + (raw if isinstance(raw, str) else ""))
payload = json.loads(raw) if raw else {}
except Exception:
return jsonify({"error": "Invalid JSON body"}), 400
err = {"error": "Invalid JSON body"}
if verbose:
_log_json("OUT POST /api/chat", err)
return jsonify(err), 400
model = payload.get("model")
raw_messages = payload.get("messages")
@@ -166,7 +223,10 @@ def ollama_chat() -> Response:
if not (isinstance(_t, dict) and isinstance(_t.get("type"), str)):
continue
if _t.get("type") not in ("web_search", "web_search_preview"):
return jsonify({"error": "Only web_search/web_search_preview are supported in responses_tools"}), 400
err = {"error": "Only web_search/web_search_preview are supported in responses_tools"}
if verbose:
_log_json("OUT POST /api/chat", err)
return jsonify(err), 400
extra_tools.append(_t)
if not extra_tools and bool(current_app.config.get("DEFAULT_WEB_SEARCH")):
rtc = payload.get("responses_tool_choice")
@@ -180,7 +240,10 @@ def ollama_chat() -> Response:
except Exception:
size = 0
if size > MAX_TOOLS_BYTES:
return jsonify({"error": "responses_tools too large"}), 400
err = {"error": "responses_tools too large"}
if verbose:
_log_json("OUT POST /api/chat", err)
return jsonify(err), 400
had_responses_tools = True
tools_responses = (tools_responses or []) + extra_tools
@@ -189,7 +252,10 @@ def ollama_chat() -> Response:
tool_choice = rtc
if not isinstance(model, str) or not isinstance(messages, list) or not messages:
return jsonify({"error": "Invalid request format"}), 400
err = {"error": "Invalid request format"}
if verbose:
_log_json("OUT POST /api/chat", err)
return jsonify(err), 400
input_items = convert_chat_messages_to_responses_input(messages)
@@ -205,6 +271,17 @@ def ollama_chat() -> Response:
reasoning_param=build_reasoning_param(reasoning_effort, reasoning_summary, model_reasoning),
)
if error_resp is not None:
if verbose:
try:
body = error_resp.get_data(as_text=True)
if body:
try:
parsed = json.loads(body)
except Exception:
parsed = body
_log_json("OUT POST /api/chat", parsed)
except Exception:
pass
return error_resp
record_rate_limits_from_response(upstream)
@@ -232,17 +309,17 @@ def ollama_chat() -> Response:
if err2 is None and upstream2 is not None and upstream2.status_code < 400:
upstream = upstream2
else:
return (
jsonify({"error": {"message": (err_body.get("error", {}) or {}).get("message", "Upstream error"), "code": "RESPONSES_TOOLS_REJECTED"}}),
(upstream2.status_code if upstream2 is not None else upstream.status_code),
)
err = {"error": {"message": (err_body.get("error", {}) or {}).get("message", "Upstream error"), "code": "RESPONSES_TOOLS_REJECTED"}}
if verbose:
_log_json("OUT POST /api/chat", err)
return jsonify(err), (upstream2.status_code if upstream2 is not None else upstream.status_code)
else:
if verbose:
print("/api/chat upstream error status=", upstream.status_code, " body:", json.dumps(err_body)[:2000])
return (
jsonify({"error": (err_body.get("error", {}) or {}).get("message", "Upstream error")}),
upstream.status_code,
)
err = {"error": (err_body.get("error", {}) or {}).get("message", "Upstream error")}
if verbose:
_log_json("OUT POST /api/chat", err)
return jsonify(err), upstream.status_code
created_at = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
model_out = model if isinstance(model, str) and model.strip() else normalized_model
@@ -408,8 +485,12 @@ def ollama_chat() -> Response:
}
done_obj.update(_OLLAMA_FAKE_EVAL)
yield json.dumps(done_obj) + "\n"
if verbose:
print("OUT POST /api/chat (streaming response)")
stream_iter = stream_with_context(_gen())
stream_iter = _wrap_stream_logging("STREAM OUT /api/chat", stream_iter, verbose)
resp = current_app.response_class(
stream_with_context(_gen()),
stream_iter,
status=200,
mimetype="application/x-ndjson",
)
@@ -481,6 +562,8 @@ def ollama_chat() -> Response:
"done_reason": "stop",
}
out_json.update(_OLLAMA_FAKE_EVAL)
if verbose:
_log_json("OUT POST /api/chat", out_json)
resp = make_response(jsonify(out_json), 200)
for k, v in build_cors_headers().items():
resp.headers.setdefault(k, v)