diff --git a/__pycache__/build.cpython-39.pyc b/__pycache__/build.cpython-39.pyc
new file mode 100644
index 0000000..f06b8ed
Binary files /dev/null and b/__pycache__/build.cpython-39.pyc differ
diff --git a/__pycache__/chatmock.cpython-39.pyc b/__pycache__/chatmock.cpython-39.pyc
new file mode 100644
index 0000000..5ff9cda
Binary files /dev/null and b/__pycache__/chatmock.cpython-39.pyc differ
diff --git a/__pycache__/gui.cpython-39.pyc b/__pycache__/gui.cpython-39.pyc
new file mode 100644
index 0000000..6c24194
Binary files /dev/null and b/__pycache__/gui.cpython-39.pyc differ
diff --git a/chatmock/__pycache__/__init__.cpython-39.pyc b/chatmock/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000..aeb9cdd
Binary files /dev/null and b/chatmock/__pycache__/__init__.cpython-39.pyc differ
diff --git a/chatmock/__pycache__/app.cpython-39.pyc b/chatmock/__pycache__/app.cpython-39.pyc
new file mode 100644
index 0000000..18a76d1
Binary files /dev/null and b/chatmock/__pycache__/app.cpython-39.pyc differ
diff --git a/chatmock/__pycache__/cli.cpython-39.pyc b/chatmock/__pycache__/cli.cpython-39.pyc
new file mode 100644
index 0000000..b267d1b
Binary files /dev/null and b/chatmock/__pycache__/cli.cpython-39.pyc differ
diff --git a/chatmock/__pycache__/config.cpython-39.pyc b/chatmock/__pycache__/config.cpython-39.pyc
new file mode 100644
index 0000000..29aadf1
Binary files /dev/null and b/chatmock/__pycache__/config.cpython-39.pyc differ
diff --git a/chatmock/__pycache__/http.cpython-39.pyc b/chatmock/__pycache__/http.cpython-39.pyc
new file mode 100644
index 0000000..6bebc31
Binary files /dev/null and b/chatmock/__pycache__/http.cpython-39.pyc differ
diff --git a/chatmock/__pycache__/models.cpython-39.pyc b/chatmock/__pycache__/models.cpython-39.pyc
new file mode 100644
index 0000000..9528eb1
Binary files /dev/null and b/chatmock/__pycache__/models.cpython-39.pyc differ
diff --git a/chatmock/__pycache__/oauth.cpython-39.pyc b/chatmock/__pycache__/oauth.cpython-39.pyc
new file mode 100644
index 0000000..84a74bc
Binary files /dev/null and b/chatmock/__pycache__/oauth.cpython-39.pyc differ
diff --git a/chatmock/__pycache__/reasoning.cpython-39.pyc b/chatmock/__pycache__/reasoning.cpython-39.pyc
new file mode 100644
index 0000000..f961b69
Binary files /dev/null and b/chatmock/__pycache__/reasoning.cpython-39.pyc differ
diff --git a/chatmock/__pycache__/routes_ollama.cpython-39.pyc b/chatmock/__pycache__/routes_ollama.cpython-39.pyc
new file mode 100644
index 0000000..2008564
Binary files /dev/null and b/chatmock/__pycache__/routes_ollama.cpython-39.pyc differ
diff --git a/chatmock/__pycache__/routes_openai.cpython-39.pyc b/chatmock/__pycache__/routes_openai.cpython-39.pyc
new file mode 100644
index 0000000..c1b5ced
Binary files /dev/null and b/chatmock/__pycache__/routes_openai.cpython-39.pyc differ
diff --git a/chatmock/__pycache__/session.cpython-39.pyc b/chatmock/__pycache__/session.cpython-39.pyc
new file mode 100644
index 0000000..5fd25f4
Binary files /dev/null and b/chatmock/__pycache__/session.cpython-39.pyc differ
diff --git a/chatmock/__pycache__/transform.cpython-39.pyc b/chatmock/__pycache__/transform.cpython-39.pyc
new file mode 100644
index 0000000..2470491
Binary files /dev/null and b/chatmock/__pycache__/transform.cpython-39.pyc differ
diff --git a/chatmock/__pycache__/upstream.cpython-39.pyc b/chatmock/__pycache__/upstream.cpython-39.pyc
new file mode 100644
index 0000000..770cb30
Binary files /dev/null and b/chatmock/__pycache__/upstream.cpython-39.pyc differ
diff --git a/chatmock/__pycache__/utils.cpython-39.pyc b/chatmock/__pycache__/utils.cpython-39.pyc
new file mode 100644
index 0000000..3abaff9
Binary files /dev/null and b/chatmock/__pycache__/utils.cpython-39.pyc differ
diff --git a/chatmock/routes_openai.py b/chatmock/routes_openai.py
index a86483d..83dc4cc 100644
--- a/chatmock/routes_openai.py
+++ b/chatmock/routes_openai.py
@@ -63,6 +63,8 @@ def chat_completions() -> Response:
             content = sys_msg.get("content") if isinstance(sys_msg, dict) else ""
             messages.insert(0, {"role": "user", "content": content})
     is_stream = bool(payload.get("stream"))
+    stream_options = payload.get("stream_options") if isinstance(payload.get("stream_options"), dict) else {}
+    include_usage = bool(stream_options.get("include_usage", False))
 
     tools_responses = convert_tools_chat_to_responses(payload.get("tools"))
     tool_choice = payload.get("tool_choice", "auto")
@@ -85,6 +87,7 @@ def chat_completions() -> Response:
         tool_choice=tool_choice,
         parallel_tool_calls=parallel_tool_calls,
         reasoning_param=reasoning_param,
+        include_usage=(not is_stream) or include_usage,
     )
     if error_resp is not None:
         return error_resp
@@ -112,6 +115,7 @@ def chat_completions() -> Response:
                 verbose=verbose,
                 vlog=print if verbose else None,
                 reasoning_compat=reasoning_compat,
+                include_usage=include_usage,
             ),
             status=upstream.status_code,
             mimetype="text/event-stream",
@@ -127,6 +131,19 @@ def chat_completions() -> Response:
     response_id = "chatcmpl"
     tool_calls: List[Dict[str, Any]] = []
     error_message: str | None = None
+    usage_obj: Dict[str, int] | None = None
+
+    def _extract_usage(evt: Dict[str, Any]) -> Dict[str, int] | None:
+        try:
+            usage = (evt.get("response") or {}).get("usage")
+            if not isinstance(usage, dict):
+                return None
+            pt = int(usage.get("input_tokens") or 0)
+            ct = int(usage.get("output_tokens") or 0)
+            tt = int(usage.get("total_tokens") or (pt + ct))
+            return {"prompt_tokens": pt, "completion_tokens": ct, "total_tokens": tt}
+        except Exception:
+            return None
     try:
         for raw in upstream.iter_lines(decode_unicode=False):
             if not raw:
@@ -144,6 +161,9 @@ def chat_completions() -> Response:
             except Exception:
                 continue
             kind = evt.get("type")
+            mu = _extract_usage(evt)
+            if mu:
+                usage_obj = mu
             if isinstance(evt.get("response"), dict) and isinstance(evt["response"].get("id"), str):
                 response_id = evt["response"].get("id") or response_id
             if kind == "response.output_text.delta":
@@ -183,7 +203,6 @@ def chat_completions() -> Response:
     if tool_calls:
         message["tool_calls"] = tool_calls
     message = apply_reasoning_to_message(message, reasoning_summary_text, reasoning_full_text, reasoning_compat)
-
     completion = {
         "id": response_id or "chatcmpl",
         "object": "chat.completion",
@@ -196,6 +215,7 @@ def chat_completions() -> Response:
                 "finish_reason": "stop",
             }
         ],
+        **({"usage": usage_obj} if usage_obj else {}),
     }
     resp = make_response(jsonify(completion), upstream.status_code)
     for k, v in build_cors_headers().items():
@@ -223,6 +243,8 @@ def completions() -> Response:
     if not isinstance(prompt, str):
         prompt = payload.get("suffix") or ""
     stream_req = bool(payload.get("stream", False))
+    stream_options = payload.get("stream_options") if isinstance(payload.get("stream_options"), dict) else {}
+    include_usage = bool(stream_options.get("include_usage", False))
 
     messages = [{"role": "user", "content": prompt or ""}]
     input_items = convert_chat_messages_to_responses_input(messages)
@@ -234,6 +256,7 @@ def completions() -> Response:
         input_items,
         instructions=BASE_INSTRUCTIONS,
         reasoning_param=reasoning_param,
+        include_usage=(not stream_req) or include_usage,
     )
     if error_resp is not None:
         return error_resp
@@ -251,7 +274,14 @@ def completions() -> Response:
 
     if stream_req:
         resp = Response(
-            sse_translate_text(upstream, model, created, verbose=verbose, vlog=(print if verbose else None)),
+            sse_translate_text(
+                upstream,
+                model,
+                created,
+                verbose=verbose,
+                vlog=(print if verbose else None),
+                include_usage=include_usage,
+            ),
             status=upstream.status_code,
             mimetype="text/event-stream",
             headers={"Cache-Control": "no-cache", "Connection": "keep-alive"},
@@ -262,6 +292,18 @@ def completions() -> Response:
 
     full_text = ""
     response_id = "cmpl"
+    usage_obj: Dict[str, int] | None = None
+    def _extract_usage(evt: Dict[str, Any]) -> Dict[str, int] | None:
+        try:
+            usage = (evt.get("response") or {}).get("usage")
+            if not isinstance(usage, dict):
+                return None
+            pt = int(usage.get("input_tokens") or 0)
+            ct = int(usage.get("output_tokens") or 0)
+            tt = int(usage.get("total_tokens") or (pt + ct))
+            return {"prompt_tokens": pt, "completion_tokens": ct, "total_tokens": tt}
+        except Exception:
+            return None
     try:
         for raw_line in upstream.iter_lines(decode_unicode=False):
             if not raw_line:
@@ -280,6 +322,9 @@ def completions() -> Response:
                 continue
             if isinstance(evt.get("response"), dict) and isinstance(evt["response"].get("id"), str):
                 response_id = evt["response"].get("id") or response_id
+            mu = _extract_usage(evt)
+            if mu:
+                usage_obj = mu
             kind = evt.get("type")
             if kind == "response.output_text.delta":
                 full_text += evt.get("delta") or ""
@@ -296,6 +341,7 @@ def completions() -> Response:
         "choices": [
             {"index": 0, "text": full_text, "finish_reason": "stop", "logprobs": None}
         ],
+        **({"usage": usage_obj} if usage_obj else {}),
     }
     resp = make_response(jsonify(completion), upstream.status_code)
     for k, v in build_cors_headers().items():
@@ -310,4 +356,3 @@ def list_models() -> Response:
     for k, v in build_cors_headers().items():
         resp.headers.setdefault(k, v)
     return resp
-
diff --git a/chatmock/upstream.py b/chatmock/upstream.py
index c7c886e..20d5c5e 100644
--- a/chatmock/upstream.py
+++ b/chatmock/upstream.py
@@ -40,6 +40,7 @@ def start_upstream_request(
     tool_choice: Any | None = None,
     parallel_tool_calls: bool = False,
     reasoning_param: Dict[str, Any] | None = None,
+    include_usage: bool | None = None,
 ):
     access_token, account_id = get_effective_chatgpt_auth()
     if not access_token or not account_id:
@@ -81,9 +82,10 @@ def start_upstream_request(
         "parallel_tool_calls": bool(parallel_tool_calls),
         "store": False,
         "stream": True,
-        "include": include,
         "prompt_cache_key": session_id,
     }
+    if include:
+        responses_payload["include"] = include
 
     if reasoning_param is not None:
         responses_payload["reasoning"] = reasoning_param
diff --git a/chatmock/utils.py b/chatmock/utils.py
index 247f27f..d41277e 100644
--- a/chatmock/utils.py
+++ b/chatmock/utils.py
@@ -239,6 +239,8 @@ def sse_translate_chat(
     verbose: bool = False,
     vlog=None,
     reasoning_compat: str = "think-tags",
+    *,
+    include_usage: bool = False,
 ):
     response_id = "chatcmpl-stream"
     compat = (reasoning_compat or "think-tags").strip().lower()
@@ -247,6 +249,19 @@ def sse_translate_chat(
     saw_output = False
     saw_any_summary = False
     pending_summary_paragraph = False
+    upstream_usage = None
+    
+    def _extract_usage(evt: Dict[str, Any]) -> Dict[str, int] | None:
+        try:
+            usage = (evt.get("response") or {}).get("usage")
+            if not isinstance(usage, dict):
+                return None
+            pt = int(usage.get("input_tokens") or 0)
+            ct = int(usage.get("output_tokens") or 0)
+            tt = int(usage.get("total_tokens") or (pt + ct))
+            return {"prompt_tokens": pt, "completion_tokens": ct, "total_tokens": tt}
+        except Exception:
+            return None
     try:
         for raw in upstream.iter_lines(decode_unicode=False):
             if not raw:
@@ -442,6 +457,9 @@ def sse_translate_chat(
                 chunk = {"error": {"message": err}}
                 yield f"data: {json.dumps(chunk)}\n\n".encode("utf-8")
             elif kind == "response.completed":
+                m = _extract_usage(evt)
+                if m:
+                    upstream_usage = m
                 if compat == "think-tags" and think_open and not think_closed:
                     close_chunk = {
                         "id": response_id,
@@ -453,14 +471,40 @@ def sse_translate_chat(
                     yield f"data: {json.dumps(close_chunk)}\n\n".encode("utf-8")
                     think_open = False
                     think_closed = True
+                if include_usage and upstream_usage:
+                    try:
+                        usage_chunk = {
+                            "id": response_id,
+                            "object": "chat.completion.chunk",
+                            "created": created,
+                            "model": model,
+                            "choices": [{"index": 0, "delta": {}, "finish_reason": None}],
+                            "usage": upstream_usage,
+                        }
+                        yield f"data: {json.dumps(usage_chunk)}\n\n".encode("utf-8")
+                    except Exception:
+                        pass
                 yield b"data: [DONE]\n\n"
                 break
     finally:
         upstream.close()
 
 
-def sse_translate_text(upstream, model: str, created: int, verbose: bool = False, vlog=None):
+def sse_translate_text(upstream, model: str, created: int, verbose: bool = False, vlog=None, *, include_usage: bool = False):
     response_id = "cmpl-stream"
+    upstream_usage = None
+    
+    def _extract_usage(evt: Dict[str, Any]) -> Dict[str, int] | None:
+        try:
+            usage = (evt.get("response") or {}).get("usage")
+            if not isinstance(usage, dict):
+                return None
+            pt = int(usage.get("input_tokens") or 0)
+            ct = int(usage.get("output_tokens") or 0)
+            tt = int(usage.get("total_tokens") or (pt + ct))
+            return {"prompt_tokens": pt, "completion_tokens": ct, "total_tokens": tt}
+        except Exception:
+            return None
     try:
         for raw_line in upstream.iter_lines(decode_unicode=False):
             if not raw_line:
@@ -509,8 +553,23 @@ def sse_translate_text(upstream, model: str, created: int, verbose: bool = False
                 }
                 yield f"data: {json.dumps(chunk)}\n\n".encode("utf-8")
             elif kind == "response.completed":
+                m = _extract_usage(evt)
+                if m:
+                    upstream_usage = m
+                if include_usage and upstream_usage:
+                    try:
+                        usage_chunk = {
+                            "id": response_id,
+                            "object": "text_completion.chunk",
+                            "created": created,
+                            "model": model,
+                            "choices": [{"index": 0, "text": "", "finish_reason": None}],
+                            "usage": upstream_usage,
+                        }
+                        yield f"data: {json.dumps(usage_chunk)}\n\n".encode("utf-8")
+                    except Exception:
+                        pass
                 yield b"data: [DONE]\n\n"
                 break
     finally:
         upstream.close()
-