feat: improve compatiblity with certain apps (#72)

* feat: enable modern packaging via pyproject.toml

uvx --from (...) chatmock should just work !

* feat(ollama): add version endpoint

* feat(logging): improve verbose diagnostics

* fix(stream): always send stop chunk
This commit is contained in:
Magniquick
2025-11-19 15:32:20 +05:30
committed by GitHub
parent a4b9ae9471
commit 13b1dddf75
11 changed files with 541 additions and 81 deletions

View File

@@ -22,6 +22,36 @@ from .utils import (
openai_bp = Blueprint("openai", __name__)
def _log_json(prefix: str, payload: Any) -> None:
try:
print(f"{prefix}\n{json.dumps(payload, indent=2, ensure_ascii=False)}")
except Exception:
try:
print(f"{prefix}\n{payload}")
except Exception:
pass
def _wrap_stream_logging(label: str, iterator, enabled: bool):
if not enabled:
return iterator
def _gen():
for chunk in iterator:
try:
text = (
chunk.decode("utf-8", errors="replace")
if isinstance(chunk, (bytes, bytearray))
else str(chunk)
)
print(f"{label}\n{text}")
except Exception:
pass
yield chunk
return _gen()
def _instructions_for_model(model: str) -> str:
base = current_app.config.get("BASE_INSTRUCTIONS", BASE_INSTRUCTIONS)
if model == "gpt-5-codex" or model == "gpt-5.1-codex":
@@ -34,26 +64,28 @@ def _instructions_for_model(model: str) -> str:
@openai_bp.route("/v1/chat/completions", methods=["POST"])
def chat_completions() -> Response:
verbose = bool(current_app.config.get("VERBOSE"))
verbose_obfuscation = bool(current_app.config.get("VERBOSE_OBFUSCATION"))
reasoning_effort = current_app.config.get("REASONING_EFFORT", "medium")
reasoning_summary = current_app.config.get("REASONING_SUMMARY", "auto")
reasoning_compat = current_app.config.get("REASONING_COMPAT", "think-tags")
debug_model = current_app.config.get("DEBUG_MODEL")
raw = request.get_data(cache=True, as_text=True) or ""
if verbose:
try:
body_preview = (request.get_data(cache=True, as_text=True) or "")[:2000]
print("IN POST /v1/chat/completions\n" + body_preview)
print("IN POST /v1/chat/completions\n" + raw)
except Exception:
pass
raw = request.get_data(cache=True, as_text=True) or ""
try:
payload = json.loads(raw) if raw else {}
except Exception:
try:
payload = json.loads(raw.replace("\r", "").replace("\n", ""))
except Exception:
return jsonify({"error": {"message": "Invalid JSON body"}}), 400
err = {"error": {"message": "Invalid JSON body"}}
if verbose:
_log_json("OUT POST /v1/chat/completions", err)
return jsonify(err), 400
requested_model = payload.get("model")
model = normalize_model_name(requested_model, debug_model)
@@ -65,7 +97,10 @@ def chat_completions() -> Response:
if messages is None:
messages = []
if not isinstance(messages, list):
return jsonify({"error": {"message": "Request must include messages: []"}}), 400
err = {"error": {"message": "Request must include messages: []"}}
if verbose:
_log_json("OUT POST /v1/chat/completions", err)
return jsonify(err), 400
if isinstance(messages, list):
sys_idx = next((i for i, m in enumerate(messages) if isinstance(m, dict) and m.get("role") == "system"), None)
@@ -88,17 +123,15 @@ def chat_completions() -> Response:
if not (isinstance(_t, dict) and isinstance(_t.get("type"), str)):
continue
if _t.get("type") not in ("web_search", "web_search_preview"):
return (
jsonify(
{
"error": {
"message": "Only web_search/web_search_preview are supported in responses_tools",
"code": "RESPONSES_TOOL_UNSUPPORTED",
}
}
),
400,
)
err = {
"error": {
"message": "Only web_search/web_search_preview are supported in responses_tools",
"code": "RESPONSES_TOOL_UNSUPPORTED",
}
}
if verbose:
_log_json("OUT POST /v1/chat/completions", err)
return jsonify(err), 400
extra_tools.append(_t)
if not extra_tools and bool(current_app.config.get("DEFAULT_WEB_SEARCH")):
@@ -114,7 +147,10 @@ def chat_completions() -> Response:
except Exception:
size = 0
if size > MAX_TOOLS_BYTES:
return jsonify({"error": {"message": "responses_tools too large", "code": "RESPONSES_TOOLS_TOO_LARGE"}}), 400
err = {"error": {"message": "responses_tools too large", "code": "RESPONSES_TOOLS_TOO_LARGE"}}
if verbose:
_log_json("OUT POST /v1/chat/completions", err)
return jsonify(err), 400
had_responses_tools = True
tools_responses = (tools_responses or []) + extra_tools
@@ -142,6 +178,17 @@ def chat_completions() -> Response:
reasoning_param=reasoning_param,
)
if error_resp is not None:
if verbose:
try:
body = error_resp.get_data(as_text=True)
if body:
try:
parsed = json.loads(body)
except Exception:
parsed = body
_log_json("OUT POST /v1/chat/completions", parsed)
except Exception:
pass
return error_resp
record_rate_limits_from_response(upstream)
@@ -171,36 +218,38 @@ def chat_completions() -> Response:
if err2 is None and upstream2 is not None and upstream2.status_code < 400:
upstream = upstream2
else:
return (
jsonify(
{
"error": {
"message": (err_body.get("error", {}) or {}).get("message", "Upstream error"),
"code": "RESPONSES_TOOLS_REJECTED",
}
}
),
(upstream2.status_code if upstream2 is not None else upstream.status_code),
)
err = {
"error": {
"message": (err_body.get("error", {}) or {}).get("message", "Upstream error"),
"code": "RESPONSES_TOOLS_REJECTED",
}
}
if verbose:
_log_json("OUT POST /v1/chat/completions", err)
return jsonify(err), (upstream2.status_code if upstream2 is not None else upstream.status_code)
else:
if verbose:
print("Upstream error status=", upstream.status_code)
return (
jsonify({"error": {"message": (err_body.get("error", {}) or {}).get("message", "Upstream error")}}),
upstream.status_code,
)
err = {"error": {"message": (err_body.get("error", {}) or {}).get("message", "Upstream error")}}
if verbose:
_log_json("OUT POST /v1/chat/completions", err)
return jsonify(err), upstream.status_code
if is_stream:
if verbose:
print("OUT POST /v1/chat/completions (streaming response)")
stream_iter = sse_translate_chat(
upstream,
requested_model or model,
created,
verbose=verbose_obfuscation,
vlog=print if verbose_obfuscation else None,
reasoning_compat=reasoning_compat,
include_usage=include_usage,
)
stream_iter = _wrap_stream_logging("STREAM OUT /v1/chat/completions", stream_iter, verbose)
resp = Response(
sse_translate_chat(
upstream,
requested_model or model,
created,
verbose=verbose,
vlog=print if verbose else None,
reasoning_compat=reasoning_compat,
include_usage=include_usage,
),
stream_iter,
status=upstream.status_code,
mimetype="text/event-stream",
headers={"Cache-Control": "no-cache", "Connection": "keep-alive"},
@@ -301,6 +350,8 @@ def chat_completions() -> Response:
],
**({"usage": usage_obj} if usage_obj else {}),
}
if verbose:
_log_json("OUT POST /v1/chat/completions", completion)
resp = make_response(jsonify(completion), upstream.status_code)
for k, v in build_cors_headers().items():
resp.headers.setdefault(k, v)
@@ -310,15 +361,24 @@ def chat_completions() -> Response:
@openai_bp.route("/v1/completions", methods=["POST"])
def completions() -> Response:
verbose = bool(current_app.config.get("VERBOSE"))
verbose_obfuscation = bool(current_app.config.get("VERBOSE_OBFUSCATION"))
debug_model = current_app.config.get("DEBUG_MODEL")
reasoning_effort = current_app.config.get("REASONING_EFFORT", "medium")
reasoning_summary = current_app.config.get("REASONING_SUMMARY", "auto")
raw = request.get_data(cache=True, as_text=True) or ""
if verbose:
try:
print("IN POST /v1/completions\n" + raw)
except Exception:
pass
try:
payload = json.loads(raw) if raw else {}
except Exception:
return jsonify({"error": {"message": "Invalid JSON body"}}), 400
err = {"error": {"message": "Invalid JSON body"}}
if verbose:
_log_json("OUT POST /v1/completions", err)
return jsonify(err), 400
requested_model = payload.get("model")
model = normalize_model_name(requested_model, debug_model)
@@ -344,6 +404,17 @@ def completions() -> Response:
reasoning_param=reasoning_param,
)
if error_resp is not None:
if verbose:
try:
body = error_resp.get_data(as_text=True)
if body:
try:
parsed = json.loads(body)
except Exception:
parsed = body
_log_json("OUT POST /v1/completions", parsed)
except Exception:
pass
return error_resp
record_rate_limits_from_response(upstream)
@@ -354,21 +425,25 @@ def completions() -> Response:
err_body = json.loads(upstream.content.decode("utf-8", errors="ignore")) if upstream.content else {"raw": upstream.text}
except Exception:
err_body = {"raw": upstream.text}
return (
jsonify({"error": {"message": (err_body.get("error", {}) or {}).get("message", "Upstream error")}}),
upstream.status_code,
)
err = {"error": {"message": (err_body.get("error", {}) or {}).get("message", "Upstream error")}}
if verbose:
_log_json("OUT POST /v1/completions", err)
return jsonify(err), upstream.status_code
if stream_req:
if verbose:
print("OUT POST /v1/completions (streaming response)")
stream_iter = sse_translate_text(
upstream,
requested_model or model,
created,
verbose=verbose_obfuscation,
vlog=(print if verbose_obfuscation else None),
include_usage=include_usage,
)
stream_iter = _wrap_stream_logging("STREAM OUT /v1/completions", stream_iter, verbose)
resp = Response(
sse_translate_text(
upstream,
requested_model or model,
created,
verbose=verbose,
vlog=(print if verbose else None),
include_usage=include_usage,
),
stream_iter,
status=upstream.status_code,
mimetype="text/event-stream",
headers={"Cache-Control": "no-cache", "Connection": "keep-alive"},
@@ -430,6 +505,8 @@ def completions() -> Response:
],
**({"usage": usage_obj} if usage_obj else {}),
}
if verbose:
_log_json("OUT POST /v1/completions", completion)
resp = make_response(jsonify(completion), upstream.status_code)
for k, v in build_cors_headers().items():
resp.headers.setdefault(k, v)
@@ -458,4 +535,3 @@ def list_models() -> Response:
for k, v in build_cors_headers().items():
resp.headers.setdefault(k, v)
return resp