diff --git a/__pycache__/build.cpython-39.pyc b/__pycache__/build.cpython-39.pyc new file mode 100644 index 0000000..f06b8ed Binary files /dev/null and b/__pycache__/build.cpython-39.pyc differ diff --git a/__pycache__/chatmock.cpython-39.pyc b/__pycache__/chatmock.cpython-39.pyc new file mode 100644 index 0000000..5ff9cda Binary files /dev/null and b/__pycache__/chatmock.cpython-39.pyc differ diff --git a/__pycache__/gui.cpython-39.pyc b/__pycache__/gui.cpython-39.pyc new file mode 100644 index 0000000..6c24194 Binary files /dev/null and b/__pycache__/gui.cpython-39.pyc differ diff --git a/chatmock/__pycache__/__init__.cpython-39.pyc b/chatmock/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000..aeb9cdd Binary files /dev/null and b/chatmock/__pycache__/__init__.cpython-39.pyc differ diff --git a/chatmock/__pycache__/app.cpython-39.pyc b/chatmock/__pycache__/app.cpython-39.pyc new file mode 100644 index 0000000..18a76d1 Binary files /dev/null and b/chatmock/__pycache__/app.cpython-39.pyc differ diff --git a/chatmock/__pycache__/cli.cpython-39.pyc b/chatmock/__pycache__/cli.cpython-39.pyc new file mode 100644 index 0000000..b267d1b Binary files /dev/null and b/chatmock/__pycache__/cli.cpython-39.pyc differ diff --git a/chatmock/__pycache__/config.cpython-39.pyc b/chatmock/__pycache__/config.cpython-39.pyc new file mode 100644 index 0000000..29aadf1 Binary files /dev/null and b/chatmock/__pycache__/config.cpython-39.pyc differ diff --git a/chatmock/__pycache__/http.cpython-39.pyc b/chatmock/__pycache__/http.cpython-39.pyc new file mode 100644 index 0000000..6bebc31 Binary files /dev/null and b/chatmock/__pycache__/http.cpython-39.pyc differ diff --git a/chatmock/__pycache__/models.cpython-39.pyc b/chatmock/__pycache__/models.cpython-39.pyc new file mode 100644 index 0000000..9528eb1 Binary files /dev/null and b/chatmock/__pycache__/models.cpython-39.pyc differ diff --git a/chatmock/__pycache__/oauth.cpython-39.pyc b/chatmock/__pycache__/oauth.cpython-39.pyc new file mode 100644 index 0000000..84a74bc Binary files /dev/null and b/chatmock/__pycache__/oauth.cpython-39.pyc differ diff --git a/chatmock/__pycache__/reasoning.cpython-39.pyc b/chatmock/__pycache__/reasoning.cpython-39.pyc new file mode 100644 index 0000000..f961b69 Binary files /dev/null and b/chatmock/__pycache__/reasoning.cpython-39.pyc differ diff --git a/chatmock/__pycache__/routes_ollama.cpython-39.pyc b/chatmock/__pycache__/routes_ollama.cpython-39.pyc new file mode 100644 index 0000000..2008564 Binary files /dev/null and b/chatmock/__pycache__/routes_ollama.cpython-39.pyc differ diff --git a/chatmock/__pycache__/routes_openai.cpython-39.pyc b/chatmock/__pycache__/routes_openai.cpython-39.pyc new file mode 100644 index 0000000..c1b5ced Binary files /dev/null and b/chatmock/__pycache__/routes_openai.cpython-39.pyc differ diff --git a/chatmock/__pycache__/session.cpython-39.pyc b/chatmock/__pycache__/session.cpython-39.pyc new file mode 100644 index 0000000..5fd25f4 Binary files /dev/null and b/chatmock/__pycache__/session.cpython-39.pyc differ diff --git a/chatmock/__pycache__/transform.cpython-39.pyc b/chatmock/__pycache__/transform.cpython-39.pyc new file mode 100644 index 0000000..2470491 Binary files /dev/null and b/chatmock/__pycache__/transform.cpython-39.pyc differ diff --git a/chatmock/__pycache__/upstream.cpython-39.pyc b/chatmock/__pycache__/upstream.cpython-39.pyc new file mode 100644 index 0000000..770cb30 Binary files /dev/null and b/chatmock/__pycache__/upstream.cpython-39.pyc differ diff --git a/chatmock/__pycache__/utils.cpython-39.pyc b/chatmock/__pycache__/utils.cpython-39.pyc new file mode 100644 index 0000000..3abaff9 Binary files /dev/null and b/chatmock/__pycache__/utils.cpython-39.pyc differ diff --git a/chatmock/routes_openai.py b/chatmock/routes_openai.py index a86483d..83dc4cc 100644 --- a/chatmock/routes_openai.py +++ b/chatmock/routes_openai.py @@ -63,6 +63,8 @@ def chat_completions() -> Response: content = sys_msg.get("content") if isinstance(sys_msg, dict) else "" messages.insert(0, {"role": "user", "content": content}) is_stream = bool(payload.get("stream")) + stream_options = payload.get("stream_options") if isinstance(payload.get("stream_options"), dict) else {} + include_usage = bool(stream_options.get("include_usage", False)) tools_responses = convert_tools_chat_to_responses(payload.get("tools")) tool_choice = payload.get("tool_choice", "auto") @@ -85,6 +87,7 @@ def chat_completions() -> Response: tool_choice=tool_choice, parallel_tool_calls=parallel_tool_calls, reasoning_param=reasoning_param, + include_usage=(not is_stream) or include_usage, ) if error_resp is not None: return error_resp @@ -112,6 +115,7 @@ def chat_completions() -> Response: verbose=verbose, vlog=print if verbose else None, reasoning_compat=reasoning_compat, + include_usage=include_usage, ), status=upstream.status_code, mimetype="text/event-stream", @@ -127,6 +131,19 @@ def chat_completions() -> Response: response_id = "chatcmpl" tool_calls: List[Dict[str, Any]] = [] error_message: str | None = None + usage_obj: Dict[str, int] | None = None + + def _extract_usage(evt: Dict[str, Any]) -> Dict[str, int] | None: + try: + usage = (evt.get("response") or {}).get("usage") + if not isinstance(usage, dict): + return None + pt = int(usage.get("input_tokens") or 0) + ct = int(usage.get("output_tokens") or 0) + tt = int(usage.get("total_tokens") or (pt + ct)) + return {"prompt_tokens": pt, "completion_tokens": ct, "total_tokens": tt} + except Exception: + return None try: for raw in upstream.iter_lines(decode_unicode=False): if not raw: @@ -144,6 +161,9 @@ def chat_completions() -> Response: except Exception: continue kind = evt.get("type") + mu = _extract_usage(evt) + if mu: + usage_obj = mu if isinstance(evt.get("response"), dict) and isinstance(evt["response"].get("id"), str): response_id = evt["response"].get("id") or response_id if kind == "response.output_text.delta": @@ -183,7 +203,6 @@ def chat_completions() -> Response: if tool_calls: message["tool_calls"] = tool_calls message = apply_reasoning_to_message(message, reasoning_summary_text, reasoning_full_text, reasoning_compat) - completion = { "id": response_id or "chatcmpl", "object": "chat.completion", @@ -196,6 +215,7 @@ def chat_completions() -> Response: "finish_reason": "stop", } ], + **({"usage": usage_obj} if usage_obj else {}), } resp = make_response(jsonify(completion), upstream.status_code) for k, v in build_cors_headers().items(): @@ -223,6 +243,8 @@ def completions() -> Response: if not isinstance(prompt, str): prompt = payload.get("suffix") or "" stream_req = bool(payload.get("stream", False)) + stream_options = payload.get("stream_options") if isinstance(payload.get("stream_options"), dict) else {} + include_usage = bool(stream_options.get("include_usage", False)) messages = [{"role": "user", "content": prompt or ""}] input_items = convert_chat_messages_to_responses_input(messages) @@ -234,6 +256,7 @@ def completions() -> Response: input_items, instructions=BASE_INSTRUCTIONS, reasoning_param=reasoning_param, + include_usage=(not stream_req) or include_usage, ) if error_resp is not None: return error_resp @@ -251,7 +274,14 @@ def completions() -> Response: if stream_req: resp = Response( - sse_translate_text(upstream, model, created, verbose=verbose, vlog=(print if verbose else None)), + sse_translate_text( + upstream, + model, + created, + verbose=verbose, + vlog=(print if verbose else None), + include_usage=include_usage, + ), status=upstream.status_code, mimetype="text/event-stream", headers={"Cache-Control": "no-cache", "Connection": "keep-alive"}, @@ -262,6 +292,18 @@ def completions() -> Response: full_text = "" response_id = "cmpl" + usage_obj: Dict[str, int] | None = None + def _extract_usage(evt: Dict[str, Any]) -> Dict[str, int] | None: + try: + usage = (evt.get("response") or {}).get("usage") + if not isinstance(usage, dict): + return None + pt = int(usage.get("input_tokens") or 0) + ct = int(usage.get("output_tokens") or 0) + tt = int(usage.get("total_tokens") or (pt + ct)) + return {"prompt_tokens": pt, "completion_tokens": ct, "total_tokens": tt} + except Exception: + return None try: for raw_line in upstream.iter_lines(decode_unicode=False): if not raw_line: @@ -280,6 +322,9 @@ def completions() -> Response: continue if isinstance(evt.get("response"), dict) and isinstance(evt["response"].get("id"), str): response_id = evt["response"].get("id") or response_id + mu = _extract_usage(evt) + if mu: + usage_obj = mu kind = evt.get("type") if kind == "response.output_text.delta": full_text += evt.get("delta") or "" @@ -296,6 +341,7 @@ def completions() -> Response: "choices": [ {"index": 0, "text": full_text, "finish_reason": "stop", "logprobs": None} ], + **({"usage": usage_obj} if usage_obj else {}), } resp = make_response(jsonify(completion), upstream.status_code) for k, v in build_cors_headers().items(): @@ -310,4 +356,3 @@ def list_models() -> Response: for k, v in build_cors_headers().items(): resp.headers.setdefault(k, v) return resp - diff --git a/chatmock/upstream.py b/chatmock/upstream.py index c7c886e..20d5c5e 100644 --- a/chatmock/upstream.py +++ b/chatmock/upstream.py @@ -40,6 +40,7 @@ def start_upstream_request( tool_choice: Any | None = None, parallel_tool_calls: bool = False, reasoning_param: Dict[str, Any] | None = None, + include_usage: bool | None = None, ): access_token, account_id = get_effective_chatgpt_auth() if not access_token or not account_id: @@ -81,9 +82,10 @@ def start_upstream_request( "parallel_tool_calls": bool(parallel_tool_calls), "store": False, "stream": True, - "include": include, "prompt_cache_key": session_id, } + if include: + responses_payload["include"] = include if reasoning_param is not None: responses_payload["reasoning"] = reasoning_param diff --git a/chatmock/utils.py b/chatmock/utils.py index 247f27f..d41277e 100644 --- a/chatmock/utils.py +++ b/chatmock/utils.py @@ -239,6 +239,8 @@ def sse_translate_chat( verbose: bool = False, vlog=None, reasoning_compat: str = "think-tags", + *, + include_usage: bool = False, ): response_id = "chatcmpl-stream" compat = (reasoning_compat or "think-tags").strip().lower() @@ -247,6 +249,19 @@ def sse_translate_chat( saw_output = False saw_any_summary = False pending_summary_paragraph = False + upstream_usage = None + + def _extract_usage(evt: Dict[str, Any]) -> Dict[str, int] | None: + try: + usage = (evt.get("response") or {}).get("usage") + if not isinstance(usage, dict): + return None + pt = int(usage.get("input_tokens") or 0) + ct = int(usage.get("output_tokens") or 0) + tt = int(usage.get("total_tokens") or (pt + ct)) + return {"prompt_tokens": pt, "completion_tokens": ct, "total_tokens": tt} + except Exception: + return None try: for raw in upstream.iter_lines(decode_unicode=False): if not raw: @@ -442,6 +457,9 @@ def sse_translate_chat( chunk = {"error": {"message": err}} yield f"data: {json.dumps(chunk)}\n\n".encode("utf-8") elif kind == "response.completed": + m = _extract_usage(evt) + if m: + upstream_usage = m if compat == "think-tags" and think_open and not think_closed: close_chunk = { "id": response_id, @@ -453,14 +471,40 @@ def sse_translate_chat( yield f"data: {json.dumps(close_chunk)}\n\n".encode("utf-8") think_open = False think_closed = True + if include_usage and upstream_usage: + try: + usage_chunk = { + "id": response_id, + "object": "chat.completion.chunk", + "created": created, + "model": model, + "choices": [{"index": 0, "delta": {}, "finish_reason": None}], + "usage": upstream_usage, + } + yield f"data: {json.dumps(usage_chunk)}\n\n".encode("utf-8") + except Exception: + pass yield b"data: [DONE]\n\n" break finally: upstream.close() -def sse_translate_text(upstream, model: str, created: int, verbose: bool = False, vlog=None): +def sse_translate_text(upstream, model: str, created: int, verbose: bool = False, vlog=None, *, include_usage: bool = False): response_id = "cmpl-stream" + upstream_usage = None + + def _extract_usage(evt: Dict[str, Any]) -> Dict[str, int] | None: + try: + usage = (evt.get("response") or {}).get("usage") + if not isinstance(usage, dict): + return None + pt = int(usage.get("input_tokens") or 0) + ct = int(usage.get("output_tokens") or 0) + tt = int(usage.get("total_tokens") or (pt + ct)) + return {"prompt_tokens": pt, "completion_tokens": ct, "total_tokens": tt} + except Exception: + return None try: for raw_line in upstream.iter_lines(decode_unicode=False): if not raw_line: @@ -509,8 +553,23 @@ def sse_translate_text(upstream, model: str, created: int, verbose: bool = False } yield f"data: {json.dumps(chunk)}\n\n".encode("utf-8") elif kind == "response.completed": + m = _extract_usage(evt) + if m: + upstream_usage = m + if include_usage and upstream_usage: + try: + usage_chunk = { + "id": response_id, + "object": "text_completion.chunk", + "created": created, + "model": model, + "choices": [{"index": 0, "text": "", "finish_reason": None}], + "usage": upstream_usage, + } + yield f"data: {json.dumps(usage_chunk)}\n\n".encode("utf-8") + except Exception: + pass yield b"data: [DONE]\n\n" break finally: upstream.close() -