feat: improve compatiblity with certain apps (#72)
* feat: enable modern packaging via pyproject.toml uvx --from (...) chatmock should just work ! * feat(ollama): add version endpoint * feat(logging): improve verbose diagnostics * fix(stream): always send stop chunk
This commit is contained in:
@@ -10,6 +10,7 @@ from .routes_ollama import ollama_bp
|
||||
|
||||
def create_app(
|
||||
verbose: bool = False,
|
||||
verbose_obfuscation: bool = False,
|
||||
reasoning_effort: str = "medium",
|
||||
reasoning_summary: str = "auto",
|
||||
reasoning_compat: str = "think-tags",
|
||||
@@ -21,6 +22,7 @@ def create_app(
|
||||
|
||||
app.config.update(
|
||||
VERBOSE=bool(verbose),
|
||||
VERBOSE_OBFUSCATION=bool(verbose_obfuscation),
|
||||
REASONING_EFFORT=reasoning_effort,
|
||||
REASONING_SUMMARY=reasoning_summary,
|
||||
REASONING_COMPAT=reasoning_compat,
|
||||
|
||||
@@ -263,6 +263,7 @@ def cmd_serve(
|
||||
host: str,
|
||||
port: int,
|
||||
verbose: bool,
|
||||
verbose_obfuscation: bool,
|
||||
reasoning_effort: str,
|
||||
reasoning_summary: str,
|
||||
reasoning_compat: str,
|
||||
@@ -272,6 +273,7 @@ def cmd_serve(
|
||||
) -> int:
|
||||
app = create_app(
|
||||
verbose=verbose,
|
||||
verbose_obfuscation=verbose_obfuscation,
|
||||
reasoning_effort=reasoning_effort,
|
||||
reasoning_summary=reasoning_summary,
|
||||
reasoning_compat=reasoning_compat,
|
||||
@@ -296,6 +298,11 @@ def main() -> None:
|
||||
p_serve.add_argument("--host", default="127.0.0.1")
|
||||
p_serve.add_argument("--port", type=int, default=8000)
|
||||
p_serve.add_argument("--verbose", action="store_true", help="Enable verbose logging")
|
||||
p_serve.add_argument(
|
||||
"--verbose-obfuscation",
|
||||
action="store_true",
|
||||
help="Also dump raw SSE/obfuscation events (in addition to --verbose request/response logs).",
|
||||
)
|
||||
p_serve.add_argument(
|
||||
"--debug-model",
|
||||
dest="debug_model",
|
||||
@@ -355,6 +362,7 @@ def main() -> None:
|
||||
host=args.host,
|
||||
port=args.port,
|
||||
verbose=args.verbose,
|
||||
verbose_obfuscation=args.verbose_obfuscation,
|
||||
reasoning_effort=args.reasoning_effort,
|
||||
reasoning_summary=args.reasoning_summary,
|
||||
reasoning_compat=args.reasoning_compat,
|
||||
|
||||
1
chatmock/prompt.md
Symbolic link
1
chatmock/prompt.md
Symbolic link
@@ -0,0 +1 @@
|
||||
../prompt.md
|
||||
1
chatmock/prompt_gpt5_codex.md
Symbolic link
1
chatmock/prompt_gpt5_codex.md
Symbolic link
@@ -0,0 +1 @@
|
||||
../prompt_gpt5_codex.md
|
||||
@@ -19,6 +19,52 @@ from .utils import convert_chat_messages_to_responses_input, convert_tools_chat_
|
||||
ollama_bp = Blueprint("ollama", __name__)
|
||||
|
||||
|
||||
def _log_json(prefix: str, payload: Any) -> None:
|
||||
try:
|
||||
print(f"{prefix}\n{json.dumps(payload, indent=2, ensure_ascii=False)}")
|
||||
except Exception:
|
||||
try:
|
||||
print(f"{prefix}\n{payload}")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def _wrap_stream_logging(label: str, iterator, enabled: bool):
|
||||
if not enabled:
|
||||
return iterator
|
||||
|
||||
def _gen():
|
||||
for chunk in iterator:
|
||||
try:
|
||||
text = (
|
||||
chunk.decode("utf-8", errors="replace")
|
||||
if isinstance(chunk, (bytes, bytearray))
|
||||
else str(chunk)
|
||||
)
|
||||
print(f"{label}\n{text}")
|
||||
except Exception:
|
||||
pass
|
||||
yield chunk
|
||||
|
||||
return _gen()
|
||||
|
||||
|
||||
@ollama_bp.route("/api/version", methods=["GET"])
|
||||
def ollama_version() -> Response:
|
||||
if bool(current_app.config.get("VERBOSE")):
|
||||
print("IN GET /api/version")
|
||||
version = current_app.config.get("OLLAMA_VERSION", "0.12.10")
|
||||
if not isinstance(version, str) or not version.strip():
|
||||
version = "0.12.10"
|
||||
payload = {"version": version}
|
||||
resp = make_response(jsonify(payload), 200)
|
||||
for k, v in build_cors_headers().items():
|
||||
resp.headers.setdefault(k, v)
|
||||
if bool(current_app.config.get("VERBOSE")):
|
||||
_log_json("OUT GET /api/version", payload)
|
||||
return resp
|
||||
|
||||
|
||||
def _instructions_for_model(model: str) -> str:
|
||||
base = current_app.config.get("BASE_INSTRUCTIONS", BASE_INSTRUCTIONS)
|
||||
if model == "gpt-5-codex":
|
||||
@@ -75,28 +121,34 @@ def ollama_tags() -> Response:
|
||||
},
|
||||
}
|
||||
)
|
||||
resp = make_response(jsonify({"models": models}), 200)
|
||||
payload = {"models": models}
|
||||
resp = make_response(jsonify(payload), 200)
|
||||
for k, v in build_cors_headers().items():
|
||||
resp.headers.setdefault(k, v)
|
||||
if bool(current_app.config.get("VERBOSE")):
|
||||
_log_json("OUT GET /api/tags", payload)
|
||||
return resp
|
||||
|
||||
|
||||
@ollama_bp.route("/api/show", methods=["POST"])
|
||||
def ollama_show() -> Response:
|
||||
verbose = bool(current_app.config.get("VERBOSE"))
|
||||
raw_body = request.get_data(cache=True, as_text=True) or ""
|
||||
if verbose:
|
||||
try:
|
||||
print("IN POST /api/show\n" + raw_body)
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
if verbose:
|
||||
body_preview = (request.get_data(cache=True, as_text=True) or "")[:2000]
|
||||
print("IN POST /api/show\n" + body_preview)
|
||||
payload = json.loads(raw_body) if raw_body else (request.get_json(silent=True) or {})
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
payload = request.get_json(silent=True) or {}
|
||||
except Exception:
|
||||
payload = {}
|
||||
model = payload.get("model")
|
||||
if not isinstance(model, str) or not model.strip():
|
||||
return jsonify({"error": "Model not found"}), 400
|
||||
err = {"error": "Model not found"}
|
||||
if verbose:
|
||||
_log_json("OUT POST /api/show", err)
|
||||
return jsonify(err), 400
|
||||
v1_show_response = {
|
||||
"modelfile": "# Modelfile generated by \"ollama show\"\n# To build a new Modelfile based on this one, replace the FROM line with:\n# FROM llava:latest\n\nFROM /models/blobs/sha256:placeholder\nTEMPLATE \"\"\"{{ .System }}\nUSER: {{ .Prompt }}\nASSISTANT: \"\"\"\nPARAMETER num_ctx 100000\nPARAMETER stop \"</s>\"\nPARAMETER stop \"USER:\"\nPARAMETER stop \"ASSISTANT:\"",
|
||||
"parameters": "num_keep 24\nstop \"<|start_header_id|>\"\nstop \"<|end_header_id|>\"\nstop \"<|eot_id|>\"",
|
||||
@@ -116,6 +168,8 @@ def ollama_show() -> Response:
|
||||
},
|
||||
"capabilities": ["completion", "vision", "tools", "thinking"],
|
||||
}
|
||||
if verbose:
|
||||
_log_json("OUT POST /api/show", v1_show_response)
|
||||
resp = make_response(jsonify(v1_show_response), 200)
|
||||
for k, v in build_cors_headers().items():
|
||||
resp.headers.setdefault(k, v)
|
||||
@@ -132,10 +186,13 @@ def ollama_chat() -> Response:
|
||||
try:
|
||||
raw = request.get_data(cache=True, as_text=True) or ""
|
||||
if verbose:
|
||||
print("IN POST /api/chat\n" + (raw[:2000] if isinstance(raw, str) else ""))
|
||||
print("IN POST /api/chat\n" + (raw if isinstance(raw, str) else ""))
|
||||
payload = json.loads(raw) if raw else {}
|
||||
except Exception:
|
||||
return jsonify({"error": "Invalid JSON body"}), 400
|
||||
err = {"error": "Invalid JSON body"}
|
||||
if verbose:
|
||||
_log_json("OUT POST /api/chat", err)
|
||||
return jsonify(err), 400
|
||||
|
||||
model = payload.get("model")
|
||||
raw_messages = payload.get("messages")
|
||||
@@ -166,7 +223,10 @@ def ollama_chat() -> Response:
|
||||
if not (isinstance(_t, dict) and isinstance(_t.get("type"), str)):
|
||||
continue
|
||||
if _t.get("type") not in ("web_search", "web_search_preview"):
|
||||
return jsonify({"error": "Only web_search/web_search_preview are supported in responses_tools"}), 400
|
||||
err = {"error": "Only web_search/web_search_preview are supported in responses_tools"}
|
||||
if verbose:
|
||||
_log_json("OUT POST /api/chat", err)
|
||||
return jsonify(err), 400
|
||||
extra_tools.append(_t)
|
||||
if not extra_tools and bool(current_app.config.get("DEFAULT_WEB_SEARCH")):
|
||||
rtc = payload.get("responses_tool_choice")
|
||||
@@ -180,7 +240,10 @@ def ollama_chat() -> Response:
|
||||
except Exception:
|
||||
size = 0
|
||||
if size > MAX_TOOLS_BYTES:
|
||||
return jsonify({"error": "responses_tools too large"}), 400
|
||||
err = {"error": "responses_tools too large"}
|
||||
if verbose:
|
||||
_log_json("OUT POST /api/chat", err)
|
||||
return jsonify(err), 400
|
||||
had_responses_tools = True
|
||||
tools_responses = (tools_responses or []) + extra_tools
|
||||
|
||||
@@ -189,7 +252,10 @@ def ollama_chat() -> Response:
|
||||
tool_choice = rtc
|
||||
|
||||
if not isinstance(model, str) or not isinstance(messages, list) or not messages:
|
||||
return jsonify({"error": "Invalid request format"}), 400
|
||||
err = {"error": "Invalid request format"}
|
||||
if verbose:
|
||||
_log_json("OUT POST /api/chat", err)
|
||||
return jsonify(err), 400
|
||||
|
||||
input_items = convert_chat_messages_to_responses_input(messages)
|
||||
|
||||
@@ -205,6 +271,17 @@ def ollama_chat() -> Response:
|
||||
reasoning_param=build_reasoning_param(reasoning_effort, reasoning_summary, model_reasoning),
|
||||
)
|
||||
if error_resp is not None:
|
||||
if verbose:
|
||||
try:
|
||||
body = error_resp.get_data(as_text=True)
|
||||
if body:
|
||||
try:
|
||||
parsed = json.loads(body)
|
||||
except Exception:
|
||||
parsed = body
|
||||
_log_json("OUT POST /api/chat", parsed)
|
||||
except Exception:
|
||||
pass
|
||||
return error_resp
|
||||
|
||||
record_rate_limits_from_response(upstream)
|
||||
@@ -232,17 +309,17 @@ def ollama_chat() -> Response:
|
||||
if err2 is None and upstream2 is not None and upstream2.status_code < 400:
|
||||
upstream = upstream2
|
||||
else:
|
||||
return (
|
||||
jsonify({"error": {"message": (err_body.get("error", {}) or {}).get("message", "Upstream error"), "code": "RESPONSES_TOOLS_REJECTED"}}),
|
||||
(upstream2.status_code if upstream2 is not None else upstream.status_code),
|
||||
)
|
||||
err = {"error": {"message": (err_body.get("error", {}) or {}).get("message", "Upstream error"), "code": "RESPONSES_TOOLS_REJECTED"}}
|
||||
if verbose:
|
||||
_log_json("OUT POST /api/chat", err)
|
||||
return jsonify(err), (upstream2.status_code if upstream2 is not None else upstream.status_code)
|
||||
else:
|
||||
if verbose:
|
||||
print("/api/chat upstream error status=", upstream.status_code, " body:", json.dumps(err_body)[:2000])
|
||||
return (
|
||||
jsonify({"error": (err_body.get("error", {}) or {}).get("message", "Upstream error")}),
|
||||
upstream.status_code,
|
||||
)
|
||||
err = {"error": (err_body.get("error", {}) or {}).get("message", "Upstream error")}
|
||||
if verbose:
|
||||
_log_json("OUT POST /api/chat", err)
|
||||
return jsonify(err), upstream.status_code
|
||||
|
||||
created_at = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
|
||||
model_out = model if isinstance(model, str) and model.strip() else normalized_model
|
||||
@@ -408,8 +485,12 @@ def ollama_chat() -> Response:
|
||||
}
|
||||
done_obj.update(_OLLAMA_FAKE_EVAL)
|
||||
yield json.dumps(done_obj) + "\n"
|
||||
if verbose:
|
||||
print("OUT POST /api/chat (streaming response)")
|
||||
stream_iter = stream_with_context(_gen())
|
||||
stream_iter = _wrap_stream_logging("STREAM OUT /api/chat", stream_iter, verbose)
|
||||
resp = current_app.response_class(
|
||||
stream_with_context(_gen()),
|
||||
stream_iter,
|
||||
status=200,
|
||||
mimetype="application/x-ndjson",
|
||||
)
|
||||
@@ -481,6 +562,8 @@ def ollama_chat() -> Response:
|
||||
"done_reason": "stop",
|
||||
}
|
||||
out_json.update(_OLLAMA_FAKE_EVAL)
|
||||
if verbose:
|
||||
_log_json("OUT POST /api/chat", out_json)
|
||||
resp = make_response(jsonify(out_json), 200)
|
||||
for k, v in build_cors_headers().items():
|
||||
resp.headers.setdefault(k, v)
|
||||
|
||||
@@ -22,6 +22,36 @@ from .utils import (
|
||||
openai_bp = Blueprint("openai", __name__)
|
||||
|
||||
|
||||
def _log_json(prefix: str, payload: Any) -> None:
|
||||
try:
|
||||
print(f"{prefix}\n{json.dumps(payload, indent=2, ensure_ascii=False)}")
|
||||
except Exception:
|
||||
try:
|
||||
print(f"{prefix}\n{payload}")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def _wrap_stream_logging(label: str, iterator, enabled: bool):
|
||||
if not enabled:
|
||||
return iterator
|
||||
|
||||
def _gen():
|
||||
for chunk in iterator:
|
||||
try:
|
||||
text = (
|
||||
chunk.decode("utf-8", errors="replace")
|
||||
if isinstance(chunk, (bytes, bytearray))
|
||||
else str(chunk)
|
||||
)
|
||||
print(f"{label}\n{text}")
|
||||
except Exception:
|
||||
pass
|
||||
yield chunk
|
||||
|
||||
return _gen()
|
||||
|
||||
|
||||
def _instructions_for_model(model: str) -> str:
|
||||
base = current_app.config.get("BASE_INSTRUCTIONS", BASE_INSTRUCTIONS)
|
||||
if model == "gpt-5-codex" or model == "gpt-5.1-codex":
|
||||
@@ -34,26 +64,28 @@ def _instructions_for_model(model: str) -> str:
|
||||
@openai_bp.route("/v1/chat/completions", methods=["POST"])
|
||||
def chat_completions() -> Response:
|
||||
verbose = bool(current_app.config.get("VERBOSE"))
|
||||
verbose_obfuscation = bool(current_app.config.get("VERBOSE_OBFUSCATION"))
|
||||
reasoning_effort = current_app.config.get("REASONING_EFFORT", "medium")
|
||||
reasoning_summary = current_app.config.get("REASONING_SUMMARY", "auto")
|
||||
reasoning_compat = current_app.config.get("REASONING_COMPAT", "think-tags")
|
||||
debug_model = current_app.config.get("DEBUG_MODEL")
|
||||
|
||||
raw = request.get_data(cache=True, as_text=True) or ""
|
||||
if verbose:
|
||||
try:
|
||||
body_preview = (request.get_data(cache=True, as_text=True) or "")[:2000]
|
||||
print("IN POST /v1/chat/completions\n" + body_preview)
|
||||
print("IN POST /v1/chat/completions\n" + raw)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
raw = request.get_data(cache=True, as_text=True) or ""
|
||||
try:
|
||||
payload = json.loads(raw) if raw else {}
|
||||
except Exception:
|
||||
try:
|
||||
payload = json.loads(raw.replace("\r", "").replace("\n", ""))
|
||||
except Exception:
|
||||
return jsonify({"error": {"message": "Invalid JSON body"}}), 400
|
||||
err = {"error": {"message": "Invalid JSON body"}}
|
||||
if verbose:
|
||||
_log_json("OUT POST /v1/chat/completions", err)
|
||||
return jsonify(err), 400
|
||||
|
||||
requested_model = payload.get("model")
|
||||
model = normalize_model_name(requested_model, debug_model)
|
||||
@@ -65,7 +97,10 @@ def chat_completions() -> Response:
|
||||
if messages is None:
|
||||
messages = []
|
||||
if not isinstance(messages, list):
|
||||
return jsonify({"error": {"message": "Request must include messages: []"}}), 400
|
||||
err = {"error": {"message": "Request must include messages: []"}}
|
||||
if verbose:
|
||||
_log_json("OUT POST /v1/chat/completions", err)
|
||||
return jsonify(err), 400
|
||||
|
||||
if isinstance(messages, list):
|
||||
sys_idx = next((i for i, m in enumerate(messages) if isinstance(m, dict) and m.get("role") == "system"), None)
|
||||
@@ -88,17 +123,15 @@ def chat_completions() -> Response:
|
||||
if not (isinstance(_t, dict) and isinstance(_t.get("type"), str)):
|
||||
continue
|
||||
if _t.get("type") not in ("web_search", "web_search_preview"):
|
||||
return (
|
||||
jsonify(
|
||||
{
|
||||
"error": {
|
||||
"message": "Only web_search/web_search_preview are supported in responses_tools",
|
||||
"code": "RESPONSES_TOOL_UNSUPPORTED",
|
||||
}
|
||||
}
|
||||
),
|
||||
400,
|
||||
)
|
||||
err = {
|
||||
"error": {
|
||||
"message": "Only web_search/web_search_preview are supported in responses_tools",
|
||||
"code": "RESPONSES_TOOL_UNSUPPORTED",
|
||||
}
|
||||
}
|
||||
if verbose:
|
||||
_log_json("OUT POST /v1/chat/completions", err)
|
||||
return jsonify(err), 400
|
||||
extra_tools.append(_t)
|
||||
|
||||
if not extra_tools and bool(current_app.config.get("DEFAULT_WEB_SEARCH")):
|
||||
@@ -114,7 +147,10 @@ def chat_completions() -> Response:
|
||||
except Exception:
|
||||
size = 0
|
||||
if size > MAX_TOOLS_BYTES:
|
||||
return jsonify({"error": {"message": "responses_tools too large", "code": "RESPONSES_TOOLS_TOO_LARGE"}}), 400
|
||||
err = {"error": {"message": "responses_tools too large", "code": "RESPONSES_TOOLS_TOO_LARGE"}}
|
||||
if verbose:
|
||||
_log_json("OUT POST /v1/chat/completions", err)
|
||||
return jsonify(err), 400
|
||||
had_responses_tools = True
|
||||
tools_responses = (tools_responses or []) + extra_tools
|
||||
|
||||
@@ -142,6 +178,17 @@ def chat_completions() -> Response:
|
||||
reasoning_param=reasoning_param,
|
||||
)
|
||||
if error_resp is not None:
|
||||
if verbose:
|
||||
try:
|
||||
body = error_resp.get_data(as_text=True)
|
||||
if body:
|
||||
try:
|
||||
parsed = json.loads(body)
|
||||
except Exception:
|
||||
parsed = body
|
||||
_log_json("OUT POST /v1/chat/completions", parsed)
|
||||
except Exception:
|
||||
pass
|
||||
return error_resp
|
||||
|
||||
record_rate_limits_from_response(upstream)
|
||||
@@ -171,36 +218,38 @@ def chat_completions() -> Response:
|
||||
if err2 is None and upstream2 is not None and upstream2.status_code < 400:
|
||||
upstream = upstream2
|
||||
else:
|
||||
return (
|
||||
jsonify(
|
||||
{
|
||||
"error": {
|
||||
"message": (err_body.get("error", {}) or {}).get("message", "Upstream error"),
|
||||
"code": "RESPONSES_TOOLS_REJECTED",
|
||||
}
|
||||
}
|
||||
),
|
||||
(upstream2.status_code if upstream2 is not None else upstream.status_code),
|
||||
)
|
||||
err = {
|
||||
"error": {
|
||||
"message": (err_body.get("error", {}) or {}).get("message", "Upstream error"),
|
||||
"code": "RESPONSES_TOOLS_REJECTED",
|
||||
}
|
||||
}
|
||||
if verbose:
|
||||
_log_json("OUT POST /v1/chat/completions", err)
|
||||
return jsonify(err), (upstream2.status_code if upstream2 is not None else upstream.status_code)
|
||||
else:
|
||||
if verbose:
|
||||
print("Upstream error status=", upstream.status_code)
|
||||
return (
|
||||
jsonify({"error": {"message": (err_body.get("error", {}) or {}).get("message", "Upstream error")}}),
|
||||
upstream.status_code,
|
||||
)
|
||||
err = {"error": {"message": (err_body.get("error", {}) or {}).get("message", "Upstream error")}}
|
||||
if verbose:
|
||||
_log_json("OUT POST /v1/chat/completions", err)
|
||||
return jsonify(err), upstream.status_code
|
||||
|
||||
if is_stream:
|
||||
if verbose:
|
||||
print("OUT POST /v1/chat/completions (streaming response)")
|
||||
stream_iter = sse_translate_chat(
|
||||
upstream,
|
||||
requested_model or model,
|
||||
created,
|
||||
verbose=verbose_obfuscation,
|
||||
vlog=print if verbose_obfuscation else None,
|
||||
reasoning_compat=reasoning_compat,
|
||||
include_usage=include_usage,
|
||||
)
|
||||
stream_iter = _wrap_stream_logging("STREAM OUT /v1/chat/completions", stream_iter, verbose)
|
||||
resp = Response(
|
||||
sse_translate_chat(
|
||||
upstream,
|
||||
requested_model or model,
|
||||
created,
|
||||
verbose=verbose,
|
||||
vlog=print if verbose else None,
|
||||
reasoning_compat=reasoning_compat,
|
||||
include_usage=include_usage,
|
||||
),
|
||||
stream_iter,
|
||||
status=upstream.status_code,
|
||||
mimetype="text/event-stream",
|
||||
headers={"Cache-Control": "no-cache", "Connection": "keep-alive"},
|
||||
@@ -301,6 +350,8 @@ def chat_completions() -> Response:
|
||||
],
|
||||
**({"usage": usage_obj} if usage_obj else {}),
|
||||
}
|
||||
if verbose:
|
||||
_log_json("OUT POST /v1/chat/completions", completion)
|
||||
resp = make_response(jsonify(completion), upstream.status_code)
|
||||
for k, v in build_cors_headers().items():
|
||||
resp.headers.setdefault(k, v)
|
||||
@@ -310,15 +361,24 @@ def chat_completions() -> Response:
|
||||
@openai_bp.route("/v1/completions", methods=["POST"])
|
||||
def completions() -> Response:
|
||||
verbose = bool(current_app.config.get("VERBOSE"))
|
||||
verbose_obfuscation = bool(current_app.config.get("VERBOSE_OBFUSCATION"))
|
||||
debug_model = current_app.config.get("DEBUG_MODEL")
|
||||
reasoning_effort = current_app.config.get("REASONING_EFFORT", "medium")
|
||||
reasoning_summary = current_app.config.get("REASONING_SUMMARY", "auto")
|
||||
|
||||
raw = request.get_data(cache=True, as_text=True) or ""
|
||||
if verbose:
|
||||
try:
|
||||
print("IN POST /v1/completions\n" + raw)
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
payload = json.loads(raw) if raw else {}
|
||||
except Exception:
|
||||
return jsonify({"error": {"message": "Invalid JSON body"}}), 400
|
||||
err = {"error": {"message": "Invalid JSON body"}}
|
||||
if verbose:
|
||||
_log_json("OUT POST /v1/completions", err)
|
||||
return jsonify(err), 400
|
||||
|
||||
requested_model = payload.get("model")
|
||||
model = normalize_model_name(requested_model, debug_model)
|
||||
@@ -344,6 +404,17 @@ def completions() -> Response:
|
||||
reasoning_param=reasoning_param,
|
||||
)
|
||||
if error_resp is not None:
|
||||
if verbose:
|
||||
try:
|
||||
body = error_resp.get_data(as_text=True)
|
||||
if body:
|
||||
try:
|
||||
parsed = json.loads(body)
|
||||
except Exception:
|
||||
parsed = body
|
||||
_log_json("OUT POST /v1/completions", parsed)
|
||||
except Exception:
|
||||
pass
|
||||
return error_resp
|
||||
|
||||
record_rate_limits_from_response(upstream)
|
||||
@@ -354,21 +425,25 @@ def completions() -> Response:
|
||||
err_body = json.loads(upstream.content.decode("utf-8", errors="ignore")) if upstream.content else {"raw": upstream.text}
|
||||
except Exception:
|
||||
err_body = {"raw": upstream.text}
|
||||
return (
|
||||
jsonify({"error": {"message": (err_body.get("error", {}) or {}).get("message", "Upstream error")}}),
|
||||
upstream.status_code,
|
||||
)
|
||||
err = {"error": {"message": (err_body.get("error", {}) or {}).get("message", "Upstream error")}}
|
||||
if verbose:
|
||||
_log_json("OUT POST /v1/completions", err)
|
||||
return jsonify(err), upstream.status_code
|
||||
|
||||
if stream_req:
|
||||
if verbose:
|
||||
print("OUT POST /v1/completions (streaming response)")
|
||||
stream_iter = sse_translate_text(
|
||||
upstream,
|
||||
requested_model or model,
|
||||
created,
|
||||
verbose=verbose_obfuscation,
|
||||
vlog=(print if verbose_obfuscation else None),
|
||||
include_usage=include_usage,
|
||||
)
|
||||
stream_iter = _wrap_stream_logging("STREAM OUT /v1/completions", stream_iter, verbose)
|
||||
resp = Response(
|
||||
sse_translate_text(
|
||||
upstream,
|
||||
requested_model or model,
|
||||
created,
|
||||
verbose=verbose,
|
||||
vlog=(print if verbose else None),
|
||||
include_usage=include_usage,
|
||||
),
|
||||
stream_iter,
|
||||
status=upstream.status_code,
|
||||
mimetype="text/event-stream",
|
||||
headers={"Cache-Control": "no-cache", "Connection": "keep-alive"},
|
||||
@@ -430,6 +505,8 @@ def completions() -> Response:
|
||||
],
|
||||
**({"usage": usage_obj} if usage_obj else {}),
|
||||
}
|
||||
if verbose:
|
||||
_log_json("OUT POST /v1/completions", completion)
|
||||
resp = make_response(jsonify(completion), upstream.status_code)
|
||||
for k, v in build_cors_headers().items():
|
||||
resp.headers.setdefault(k, v)
|
||||
@@ -458,4 +535,3 @@ def list_models() -> Response:
|
||||
for k, v in build_cors_headers().items():
|
||||
resp.headers.setdefault(k, v)
|
||||
return resp
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ import time
|
||||
from typing import Any, Dict, List, Tuple
|
||||
|
||||
import requests
|
||||
from flask import Response, jsonify, make_response
|
||||
from flask import Response, current_app, jsonify, make_response
|
||||
|
||||
from .config import CHATGPT_RESPONSES_URL
|
||||
from .http import build_cors_headers
|
||||
@@ -14,6 +14,16 @@ from flask import request as flask_request
|
||||
from .utils import get_effective_chatgpt_auth
|
||||
|
||||
|
||||
def _log_json(prefix: str, payload: Any) -> None:
|
||||
try:
|
||||
print(f"{prefix}\n{json.dumps(payload, indent=2, ensure_ascii=False)}")
|
||||
except Exception:
|
||||
try:
|
||||
print(f"{prefix}\n{payload}")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def normalize_model_name(name: str | None, debug_model: str | None = None) -> str:
|
||||
if isinstance(debug_model, str) and debug_model.strip():
|
||||
return debug_model.strip()
|
||||
@@ -102,6 +112,14 @@ def start_upstream_request(
|
||||
if reasoning_param is not None:
|
||||
responses_payload["reasoning"] = reasoning_param
|
||||
|
||||
verbose = False
|
||||
try:
|
||||
verbose = bool(current_app.config.get("VERBOSE"))
|
||||
except Exception:
|
||||
verbose = False
|
||||
if verbose:
|
||||
_log_json("OUTBOUND >> ChatGPT Responses API payload", responses_payload)
|
||||
|
||||
headers = {
|
||||
"Authorization": f"Bearer {access_token}",
|
||||
"Content-Type": "application/json",
|
||||
|
||||
@@ -389,6 +389,7 @@ def sse_translate_chat(
|
||||
think_open = False
|
||||
think_closed = False
|
||||
saw_output = False
|
||||
sent_stop_chunk = False
|
||||
saw_any_summary = False
|
||||
pending_summary_paragraph = False
|
||||
upstream_usage = None
|
||||
@@ -738,6 +739,7 @@ def sse_translate_chat(
|
||||
"choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}],
|
||||
}
|
||||
yield f"data: {json.dumps(chunk)}\n\n".encode("utf-8")
|
||||
sent_stop_chunk = True
|
||||
elif kind == "response.failed":
|
||||
err = evt.get("response", {}).get("error", {}).get("message", "response.failed")
|
||||
chunk = {"error": {"message": err}}
|
||||
@@ -757,6 +759,17 @@ def sse_translate_chat(
|
||||
yield f"data: {json.dumps(close_chunk)}\n\n".encode("utf-8")
|
||||
think_open = False
|
||||
think_closed = True
|
||||
if not sent_stop_chunk:
|
||||
chunk = {
|
||||
"id": response_id,
|
||||
"object": "chat.completion.chunk",
|
||||
"created": created,
|
||||
"model": model,
|
||||
"choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}],
|
||||
}
|
||||
yield f"data: {json.dumps(chunk)}\n\n".encode("utf-8")
|
||||
sent_stop_chunk = True
|
||||
|
||||
if include_usage and upstream_usage:
|
||||
try:
|
||||
usage_chunk = {
|
||||
|
||||
Reference in New Issue
Block a user