From 8754203ec66fcc92b4b9e512775d429d68d42161 Mon Sep 17 00:00:00 2001
From: Game_Time <108236317+RayBytes@users.noreply.github.com>
Date: Mon, 23 Mar 2026 15:41:42 +0500
Subject: [PATCH] feat: add responses api, websocket support, and fast mode
---
DOCKER.md | 1 +
README.md | 181 ++++-----
chatmock/app.py | 6 +
chatmock/cli.py | 9 +
chatmock/fast_mode.py | 92 +++++
chatmock/model_registry.py | 8 +
chatmock/responses_api.py | 242 ++++++++++++
chatmock/routes_ollama.py | 24 +-
chatmock/routes_openai.py | 213 ++++++++++-
chatmock/session.py | 225 ++++++++++-
chatmock/upstream.py | 84 ++++-
chatmock/version.py | 2 +-
chatmock/websocket_routes.py | 225 +++++++++++
docker/entrypoint.sh | 3 +
gui.py | 15 +-
pyproject.toml | 2 +
scripts/test_responses_cached_tokens.py | 176 +++++++++
scripts/test_responses_reuse.py | 143 +++++++
tests/test_fast_mode.py | 49 +++
tests/test_models.py | 2 +
tests/test_routes.py | 473 +++++++++++++++++++++++-
uv.lock | 92 +++++
22 files changed, 2148 insertions(+), 119 deletions(-)
create mode 100644 chatmock/fast_mode.py
create mode 100644 chatmock/responses_api.py
create mode 100644 chatmock/websocket_routes.py
create mode 100644 scripts/test_responses_cached_tokens.py
create mode 100644 scripts/test_responses_reuse.py
create mode 100644 tests/test_fast_mode.py
diff --git a/DOCKER.md b/DOCKER.md
index db9191f..1314c97 100644
--- a/DOCKER.md
+++ b/DOCKER.md
@@ -24,6 +24,7 @@ Set options in `.env` or pass environment variables:
- `CHATGPT_LOCAL_REASONING_EFFORT`: minimal|low|medium|high|xhigh
- `CHATGPT_LOCAL_REASONING_SUMMARY`: auto|concise|detailed|none
- `CHATGPT_LOCAL_REASONING_COMPAT`: legacy|o3|think-tags|current
+- `CHATGPT_LOCAL_FAST_MODE`: `true|false` to enable fast mode by default for supported models
- `CHATGPT_LOCAL_DEBUG_MODEL`: force model override (e.g., `gpt-5.4`)
- `CHATGPT_LOCAL_CLIENT_ID`: OAuth client id override (rarely needed)
- `CHATGPT_LOCAL_EXPOSE_REASONING_MODELS`: `true|false` to add reasoning model variants to `/v1/models`
diff --git a/README.md b/README.md
index 1fe3188..4486dd2 100644
--- a/README.md
+++ b/README.md
@@ -1,172 +1,175 @@
-
ChatMock
-
-
-
-
OpenAI & Ollama compatible API powered by your ChatGPT plan.
-
Use your ChatGPT Plus/Pro account to call OpenAI models from code or alternate chat UIs.
-
+
+# ChatMock
+
+**Allows Codex to work in your favourite chat apps and coding tools.**
+
+[](https://pypi.org/project/chatmock/)
+[](https://pypi.org/project/chatmock/)
+[](LICENSE)
+[](https://github.com/RayBytes/ChatMock/stargazers)
+[](https://github.com/RayBytes/ChatMock/commits/main)
+[](https://github.com/RayBytes/ChatMock/issues)
+
+
+
+
-## What It Does
+
-ChatMock runs a local server that creates an OpenAI/Ollama compatible API, and requests are then fulfilled using your authenticated ChatGPT login with the oauth client of Codex, OpenAI's coding CLI tool. This allows you to use GPT-5, GPT-5-Codex, and other models right through your OpenAI account, without requiring an api key. You are then able to use it in other chat apps or other coding tools.
-This does require a paid ChatGPT account.
-
-## Quickstart
-
-### Homebrew
+## Install
+#### Homebrew
```bash
brew tap RayBytes/chatmock
brew install chatmock
```
-### CLI
-
+#### pipx / pip
```bash
pipx install chatmock
```
-### GUI
+#### GUI
+Download from [releases](https://github.com/RayBytes/ChatMock/releases) (macOS & Windows)
-If you're on **macOS** or **Windows**, you can download the GUI app from the [GitHub releases](https://github.com/RayBytes/ChatMock/releases).
+#### Docker
+See [DOCKER.md](DOCKER.md)
-### Python
-If you wish to just simply run this as a python flask server, you are also freely welcome too.
+
-Clone or download this repository, then cd into the project directory. Then follow the instrunctions listed below.
-
-1. Sign in with your ChatGPT account and follow the prompts
-```bash
-python chatmock.py login
-```
-You can make sure this worked by running `python chatmock.py info`
-
-2. After the login completes successfully, you can just simply start the local server
+## Getting Started
```bash
-python chatmock.py serve
+# 1. Sign in with your ChatGPT account
+chatmock login
+
+# 2. Start the server
+chatmock serve
```
-Then, you can simply use the address and port as the baseURL as you require (http://127.0.0.1:8000 by default)
-**Reminder:** When setting a baseURL in other applications, make you sure you include /v1/ at the end of the URL if you're using this as a OpenAI compatible endpoint (e.g http://127.0.0.1:8000/v1)
+The server runs at `http://127.0.0.1:8000` by default. Use `http://127.0.0.1:8000/v1` as your base URL for OpenAI-compatible apps.
-### Docker
+
-Read [the docker instrunctions here](https://github.com/RayBytes/ChatMock/blob/main/DOCKER.md)
+## Usage
-# Examples
-
-### Python
+
+Python
```python
from openai import OpenAI
client = OpenAI(
base_url="http://127.0.0.1:8000/v1",
- api_key="key" # ignored
+ api_key="anything" # not checked
)
-resp = client.chat.completions.create(
+response = client.chat.completions.create(
model="gpt-5.4",
- messages=[{"role": "user", "content": "hello world"}]
+ messages=[{"role": "user", "content": "hello"}]
)
-
-print(resp.choices[0].message.content)
+print(response.choices[0].message.content)
```
-### curl
+
+
+
+cURL
```bash
curl http://127.0.0.1:8000/v1/chat/completions \
- -H "Authorization: Bearer key" \
-H "Content-Type: application/json" \
-d '{
"model": "gpt-5.4",
- "messages": [{"role":"user","content":"hello world"}]
+ "messages": [{"role": "user", "content": "hello"}]
}'
```
-# What's supported
+
-- Tool/Function calling
-- Vision/Image understanding
-- Thinking summaries (through thinking tags)
-- Thinking effort
+
-## Notes & Limits
+## Supported Models
-- Requires an active, paid ChatGPT account.
-- Some context length might be taken up by internal instructions (but they dont seem to degrade the model)
-- Use responsibly and at your own risk. This project is not affiliated with OpenAI, and is a educational exercise.
-
-# Supported models
- `gpt-5.4`
- `gpt-5.4-mini`
- `gpt-5.2`
- `gpt-5.1`
- `gpt-5`
- `gpt-5.3-codex`
-- `gpt-5-codex`
+- `gpt-5.3-codex-spark`
- `gpt-5.2-codex`
+- `gpt-5-codex`
- `gpt-5.1-codex`
- `gpt-5.1-codex-max`
- `gpt-5.1-codex-mini`
- `codex-mini`
-# Customisation / Configuration
+
-### Thinking effort
+## Features
-- `--reasoning-effort` (choice of none,minimal,low,medium,high,xhigh)
-GPT-5 has a configurable amount of "effort" it can put into thinking, which may cause it to take more time for a response to return, but may overall give a smarter answer. Applying this parameter after `serve` forces the server to use this reasoning effort by default, unless overrided by the API request with a different effort set. The default reasoning effort without setting this parameter is `medium`.
- The `gpt-5.1` family (including codex) supports `low`, `medium`, and `high` while `gpt-5.1-codex-max` adds `xhigh`. The `gpt-5.2` and `gpt-5.3` families (including codex) support `low`, `medium`, `high`, and `xhigh`. `gpt-5.4` supports `none`, `low`, `medium`, `high`, and `xhigh`.
+- Tool / function calling
+- Vision / image input
+- Thinking summaries (via think tags)
+- Configurable thinking effort
+- Fast mode for supported models
+- Web search tool
+- OpenAI-compatible `/v1/responses` (HTTP + WebSocket)
+- Ollama-compatible endpoints
+- Reasoning effort exposed as separate models (optional)
-### Thinking summaries
+
-- `--reasoning-summary` (choice of auto,concise,detailed,none)
-Models like GPT-5 do not return raw thinking content, but instead return thinking summaries. These can also be customised by you.
+## Configuration
-### OpenAI Tools
+All flags go after `chatmock serve`. These can also be set as environment variables.
-- `--enable-web-search`
-You can also access OpenAI tools through this project. Currently, only web search is available.
-You can enable it by starting the server with this parameter, which will allow OpenAI to determine when a request requires a web search, or you can use the following parameters during a request to the API to enable web search:
-
-`responses_tools`: supports `[{"type":"web_search"}]` / `{ "type": "web_search_preview" }`
-`responses_tool_choice`: `"auto"` or `"none"`
+| Flag | Env var | Options | Default | Description |
+|------|---------|---------|---------|-------------|
+| `--reasoning-effort` | `CHATGPT_LOCAL_REASONING_EFFORT` | none, minimal, low, medium, high, xhigh | medium | How hard the model thinks |
+| `--reasoning-summary` | `CHATGPT_LOCAL_REASONING_SUMMARY` | auto, concise, detailed, none | auto | Thinking summary verbosity |
+| `--reasoning-compat` | `CHATGPT_LOCAL_REASONING_COMPAT` | legacy, o3, think-tags | think-tags | How reasoning is returned to the client |
+| `--fast-mode` | `CHATGPT_LOCAL_FAST_MODE` | true/false | false | Priority processing for supported models |
+| `--enable-web-search` | `CHATGPT_LOCAL_ENABLE_WEB_SEARCH` | true/false | false | Allow the model to search the web |
+| `--expose-reasoning-models` | `CHATGPT_LOCAL_EXPOSE_REASONING_MODELS` | true/false | false | List each reasoning level as its own model |
+
+
+Web search in a request
-#### Example usage
```json
{
"model": "gpt-5.4",
- "messages": [{"role":"user","content":"Find current METAR rules"}],
- "stream": true,
+ "messages": [{"role": "user", "content": "latest news on ..."}],
"responses_tools": [{"type": "web_search"}],
"responses_tool_choice": "auto"
}
```
-### Expose reasoning models
+
-- `--expose-reasoning-models`
-If your preferred app doesn’t support selecting reasoning effort, or you just want a simpler approach, this parameter exposes each reasoning level as a separate, queryable model. Each reasoning level also appears individually under /v1/models, so model pickers in your favorite chat apps will list all reasoning options as distinct models you can switch between.
+
+Fast mode in a request
+
+```json
+{
+ "model": "gpt-5.4",
+ "input": "summarize this",
+ "fast_mode": true
+}
+```
+
+
+
+
## Notes
-If you wish to have the fastest responses, I'd recommend setting `--reasoning-effort` to low, and `--reasoning-summary` to none.
-All parameters and choices can be seen by sending `python chatmock.py serve --h`
-The context size of this route is also larger than what you get access to in the regular ChatGPT app.
-When the model returns a thinking summary, the model will send back thinking tags to make it compatible with chat apps. **If you don't like this behavior, you can instead set `--reasoning-compat` to legacy, and reasoning will be set in the reasoning tag instead of being returned in the actual response text.**
+Use responsibly and at your own risk. This project is not affiliated with OpenAI.
+
## Star History
diff --git a/chatmock/app.py b/chatmock/app.py
index 9727b5a..e4541dc 100644
--- a/chatmock/app.py
+++ b/chatmock/app.py
@@ -1,11 +1,13 @@
from __future__ import annotations
from flask import Flask, jsonify
+from flask_sock import Sock
from .config import BASE_INSTRUCTIONS, GPT5_CODEX_INSTRUCTIONS
from .http import build_cors_headers
from .routes_openai import openai_bp
from .routes_ollama import ollama_bp
+from .websocket_routes import register_websocket_routes
def create_app(
@@ -14,6 +16,7 @@ def create_app(
reasoning_effort: str = "medium",
reasoning_summary: str = "auto",
reasoning_compat: str = "think-tags",
+ fast_mode: bool = False,
debug_model: str | None = None,
expose_reasoning_models: bool = False,
default_web_search: bool = False,
@@ -26,6 +29,7 @@ def create_app(
REASONING_EFFORT=reasoning_effort,
REASONING_SUMMARY=reasoning_summary,
REASONING_COMPAT=reasoning_compat,
+ FAST_MODE=bool(fast_mode),
DEBUG_MODEL=debug_model,
BASE_INSTRUCTIONS=BASE_INSTRUCTIONS,
GPT5_CODEX_INSTRUCTIONS=GPT5_CODEX_INSTRUCTIONS,
@@ -46,5 +50,7 @@ def create_app(
app.register_blueprint(openai_bp)
app.register_blueprint(ollama_bp)
+ sock = Sock(app)
+ register_websocket_routes(sock)
return app
diff --git a/chatmock/cli.py b/chatmock/cli.py
index 9ee41be..78a69ae 100644
--- a/chatmock/cli.py
+++ b/chatmock/cli.py
@@ -267,6 +267,7 @@ def cmd_serve(
reasoning_effort: str,
reasoning_summary: str,
reasoning_compat: str,
+ fast_mode: bool,
debug_model: str | None,
expose_reasoning_models: bool,
default_web_search: bool,
@@ -277,6 +278,7 @@ def cmd_serve(
reasoning_effort=reasoning_effort,
reasoning_summary=reasoning_summary,
reasoning_compat=reasoning_compat,
+ fast_mode=fast_mode,
debug_model=debug_model,
expose_reasoning_models=expose_reasoning_models,
default_web_search=default_web_search,
@@ -309,6 +311,12 @@ def main() -> None:
default=os.getenv("CHATGPT_LOCAL_DEBUG_MODEL"),
help="Forcibly override requested 'model' with this value",
)
+ p_serve.add_argument(
+ "--fast-mode",
+ action=argparse.BooleanOptionalAction,
+ default=(os.getenv("CHATGPT_LOCAL_FAST_MODE") or "").strip().lower() in ("1", "true", "yes", "on"),
+ help="Enable GPT fast mode by default for supported models; request-level overrides still take precedence.",
+ )
p_serve.add_argument(
"--reasoning-effort",
choices=["none", "minimal", "low", "medium", "high", "xhigh"],
@@ -366,6 +374,7 @@ def main() -> None:
reasoning_effort=args.reasoning_effort,
reasoning_summary=args.reasoning_summary,
reasoning_compat=args.reasoning_compat,
+ fast_mode=args.fast_mode,
debug_model=args.debug_model,
expose_reasoning_models=args.expose_reasoning_models,
default_web_search=args.enable_web_search,
diff --git a/chatmock/fast_mode.py b/chatmock/fast_mode.py
new file mode 100644
index 0000000..8dbb557
--- /dev/null
+++ b/chatmock/fast_mode.py
@@ -0,0 +1,92 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Any
+
+from .model_registry import normalize_model_name
+
+
+PRIORITY_SUPPORTED_MODELS = frozenset(
+ (
+ "gpt-5.4",
+ "gpt-5.2",
+ "gpt-5.1",
+ "gpt-5",
+ "gpt-5.1-codex",
+ "gpt-5-codex",
+ )
+)
+
+_TRUE_STRINGS = {"1", "true", "yes", "on"}
+_FALSE_STRINGS = {"0", "false", "no", "off"}
+
+
+def parse_optional_bool(value: Any) -> bool | None:
+ if isinstance(value, bool):
+ return value
+ if isinstance(value, str):
+ normalized = value.strip().lower()
+ if normalized in _TRUE_STRINGS:
+ return True
+ if normalized in _FALSE_STRINGS:
+ return False
+ return None
+
+
+def supports_priority_service_tier(model: str | None) -> bool:
+ return normalize_model_name(model) in PRIORITY_SUPPORTED_MODELS
+
+
+@dataclass(frozen=True)
+class ServiceTierResolution:
+ service_tier: str | None
+ error_message: str | None = None
+ warning_message: str | None = None
+ used_server_default: bool = False
+
+
+def resolve_service_tier(
+ model: str | None,
+ *,
+ request_fast_mode: Any = None,
+ request_service_tier: Any = None,
+ server_fast_mode: bool = False,
+) -> ServiceTierResolution:
+ explicit_fast_mode = parse_optional_bool(request_fast_mode)
+
+ tier: str | None = None
+ explicit_request = False
+ used_server_default = False
+
+ if explicit_fast_mode is not None:
+ tier = "priority" if explicit_fast_mode else None
+ explicit_request = True
+ elif isinstance(request_service_tier, str) and request_service_tier.strip():
+ tier = request_service_tier.strip().lower()
+ explicit_request = True
+ elif server_fast_mode:
+ tier = "priority"
+ used_server_default = True
+
+ if tier == "priority" and not supports_priority_service_tier(model):
+ normalized = normalize_model_name(model)
+ message = (
+ f"Fast mode is not supported for model '{normalized}'. "
+ "Use a supported GPT-5 priority-processing model or disable fast mode for this request."
+ )
+ if explicit_request:
+ return ServiceTierResolution(
+ service_tier=None,
+ error_message=message,
+ used_server_default=used_server_default,
+ )
+ return ServiceTierResolution(
+ service_tier=None,
+ warning_message=message,
+ used_server_default=used_server_default,
+ )
+
+ return ServiceTierResolution(
+ service_tier=tier,
+ used_server_default=used_server_default,
+ )
diff --git a/chatmock/model_registry.py b/chatmock/model_registry.py
index b171883..9bddbeb 100644
--- a/chatmock/model_registry.py
+++ b/chatmock/model_registry.py
@@ -62,6 +62,14 @@ _MODEL_SPECS = (
variant_efforts=("xhigh", "high", "medium", "low"),
uses_codex_instructions=True,
),
+ ModelSpec(
+ public_id="gpt-5.3-codex-spark",
+ upstream_id="gpt-5.3-codex-spark",
+ aliases=("gpt5.3-codex-spark", "gpt-5.3-codex-spark-latest"),
+ allowed_efforts=frozenset(("low", "medium", "high", "xhigh")),
+ variant_efforts=("xhigh", "high", "medium", "low"),
+ uses_codex_instructions=True,
+ ),
ModelSpec(
public_id="gpt-5-codex",
upstream_id="gpt-5-codex",
diff --git a/chatmock/responses_api.py b/chatmock/responses_api.py
new file mode 100644
index 0000000..9aae843
--- /dev/null
+++ b/chatmock/responses_api.py
@@ -0,0 +1,242 @@
+from __future__ import annotations
+
+import json
+from dataclasses import dataclass
+from typing import Any, Dict, Iterable, Iterator, List
+
+from .config import BASE_INSTRUCTIONS, GPT5_CODEX_INSTRUCTIONS
+from .fast_mode import ServiceTierResolution, resolve_service_tier
+from .model_registry import (
+ allowed_efforts_for_model,
+ extract_reasoning_from_model_name,
+ normalize_model_name,
+ uses_codex_instructions,
+)
+from .reasoning import build_reasoning_param
+from .session import ensure_session_id
+
+
+@dataclass(frozen=True)
+class ResponsesRequestError(Exception):
+ message: str
+ status_code: int = 400
+ code: str | None = None
+
+ def __str__(self) -> str:
+ return self.message
+
+
+@dataclass(frozen=True)
+class NormalizedResponsesRequest:
+ payload: Dict[str, Any]
+ requested_model: str | None
+ normalized_model: str
+ session_id: str
+ service_tier_resolution: ServiceTierResolution
+
+
+def instructions_for_model(config: Dict[str, Any], model: str) -> str:
+ base = config.get("BASE_INSTRUCTIONS", BASE_INSTRUCTIONS)
+ if uses_codex_instructions(model):
+ codex = config.get("GPT5_CODEX_INSTRUCTIONS") or GPT5_CODEX_INSTRUCTIONS
+ if isinstance(codex, str) and codex.strip():
+ return codex
+ return base
+
+
+def extract_client_session_id(headers: Any) -> str | None:
+ try:
+ return headers.get("X-Session-Id") or headers.get("session_id") or None
+ except Exception:
+ return None
+
+
+def _input_items_for_session(raw_input: Any) -> List[Dict[str, Any]]:
+ if isinstance(raw_input, list):
+ return [item for item in raw_input if isinstance(item, dict)]
+ if isinstance(raw_input, dict):
+ return [raw_input]
+ if isinstance(raw_input, str) and raw_input.strip():
+ return [
+ {
+ "type": "message",
+ "role": "user",
+ "content": [{"type": "input_text", "text": raw_input}],
+ }
+ ]
+ return []
+
+
+def canonicalize_responses_input(raw_input: Any) -> Any:
+ if isinstance(raw_input, list):
+ return [item for item in raw_input if isinstance(item, dict)]
+ if isinstance(raw_input, dict):
+ return [raw_input]
+ if isinstance(raw_input, str):
+ return _input_items_for_session(raw_input)
+ return raw_input
+
+
+def normalize_responses_payload(
+ payload: Dict[str, Any],
+ *,
+ config: Dict[str, Any],
+ client_session_id: str | None = None,
+) -> NormalizedResponsesRequest:
+ requested_model = payload.get("model") if isinstance(payload.get("model"), str) else None
+ normalized_model = normalize_model_name(requested_model, config.get("DEBUG_MODEL"))
+
+ normalized = dict(payload)
+ normalized["model"] = normalized_model
+
+ if "input" in normalized:
+ normalized["input"] = canonicalize_responses_input(normalized.get("input"))
+
+ if "store" not in normalized:
+ normalized["store"] = False
+
+ instructions = normalized.get("instructions")
+ if not isinstance(instructions, str) or not instructions.strip():
+ instructions = instructions_for_model(config, normalized_model)
+ normalized["instructions"] = instructions
+
+ reasoning_effort = config.get("REASONING_EFFORT", "medium")
+ reasoning_summary = config.get("REASONING_SUMMARY", "auto")
+ reasoning_overrides = (
+ normalized.get("reasoning")
+ if isinstance(normalized.get("reasoning"), dict)
+ else extract_reasoning_from_model_name(requested_model)
+ )
+ normalized["reasoning"] = build_reasoning_param(
+ reasoning_effort,
+ reasoning_summary,
+ reasoning_overrides,
+ allowed_efforts=allowed_efforts_for_model(normalized_model),
+ )
+
+ include = normalized.get("include")
+ include_list = [item for item in include if isinstance(item, str)] if isinstance(include, list) else []
+ if "reasoning.encrypted_content" not in include_list:
+ include_list.append("reasoning.encrypted_content")
+ normalized["include"] = include_list
+
+ tools = normalized.get("tools")
+ if (not isinstance(tools, list) or not tools) and bool(config.get("DEFAULT_WEB_SEARCH")):
+ tool_choice = normalized.get("tool_choice")
+ if not (isinstance(tool_choice, str) and tool_choice.strip().lower() == "none"):
+ normalized["tools"] = [{"type": "web_search"}]
+
+ service_tier_resolution = resolve_service_tier(
+ normalized_model,
+ request_fast_mode=normalized.get("fast_mode"),
+ request_service_tier=normalized.get("service_tier"),
+ server_fast_mode=bool(config.get("FAST_MODE")),
+ )
+ if service_tier_resolution.error_message:
+ raise ResponsesRequestError(service_tier_resolution.error_message)
+ if service_tier_resolution.service_tier is None:
+ normalized.pop("service_tier", None)
+ else:
+ normalized["service_tier"] = service_tier_resolution.service_tier
+ normalized.pop("fast_mode", None)
+
+ input_items = _input_items_for_session(normalized.get("input"))
+ session_id = ensure_session_id(instructions, input_items, client_session_id)
+ prompt_cache_key = normalized.get("prompt_cache_key")
+ if not isinstance(prompt_cache_key, str) or not prompt_cache_key.strip():
+ normalized["prompt_cache_key"] = session_id
+
+ return NormalizedResponsesRequest(
+ payload=normalized,
+ requested_model=requested_model,
+ normalized_model=normalized_model,
+ session_id=session_id,
+ service_tier_resolution=service_tier_resolution,
+ )
+
+
+def iter_sse_event_payloads(upstream: Any) -> Iterator[Dict[str, Any]]:
+ for raw in upstream.iter_lines(decode_unicode=False):
+ if not raw:
+ continue
+ line = raw.decode("utf-8", errors="ignore") if isinstance(raw, (bytes, bytearray)) else raw
+ if not line.startswith("data: "):
+ continue
+ data = line[len("data: ") :].strip()
+ if not data or data == "[DONE]":
+ if data == "[DONE]":
+ break
+ continue
+ try:
+ evt = json.loads(data)
+ except Exception:
+ continue
+ if isinstance(evt, dict):
+ yield evt
+
+
+def aggregate_response_from_sse(
+ upstream: Any,
+ *,
+ on_event: Any | None = None,
+) -> tuple[Dict[str, Any] | None, Dict[str, Any] | None]:
+ response_obj: Dict[str, Any] | None = None
+ error_obj: Dict[str, Any] | None = None
+ try:
+ for evt in iter_sse_event_payloads(upstream):
+ if callable(on_event):
+ try:
+ on_event(evt)
+ except Exception:
+ pass
+ response = evt.get("response")
+ if isinstance(response, dict):
+ response_obj = response
+ kind = evt.get("type")
+ if kind == "response.failed":
+ if isinstance(response, dict) and isinstance(response.get("error"), dict):
+ error_obj = {"error": response.get("error")}
+ else:
+ error_obj = {"error": {"message": "response.failed"}}
+ break
+ if kind == "response.completed":
+ break
+ finally:
+ upstream.close()
+ return response_obj, error_obj
+
+
+def stream_upstream_bytes(
+ upstream: Any,
+ *,
+ on_event: Any | None = None,
+) -> Iterable[bytes]:
+ buffer = b""
+ try:
+ for chunk in upstream.iter_content(chunk_size=None):
+ if chunk:
+ if callable(on_event):
+ if isinstance(chunk, bytes):
+ buffer += chunk
+ else:
+ buffer += str(chunk).encode("utf-8", errors="ignore")
+ while b"\n" in buffer:
+ line, buffer = buffer.split(b"\n", 1)
+ line = line.rstrip(b"\r")
+ if not line.startswith(b"data: "):
+ continue
+ data = line[len(b"data: ") :].strip()
+ if not data or data == b"[DONE]":
+ continue
+ try:
+ evt = json.loads(data.decode("utf-8", errors="ignore"))
+ except Exception:
+ evt = None
+ if isinstance(evt, dict):
+ try:
+ on_event(evt)
+ except Exception:
+ pass
+ yield chunk
+ finally:
+ upstream.close()
diff --git a/chatmock/routes_ollama.py b/chatmock/routes_ollama.py
index 7116d67..96c7c8b 100644
--- a/chatmock/routes_ollama.py
+++ b/chatmock/routes_ollama.py
@@ -8,9 +8,11 @@ from typing import Any, Dict, List
from flask import Blueprint, Response, current_app, jsonify, make_response, request, stream_with_context
from .config import BASE_INSTRUCTIONS, GPT5_CODEX_INSTRUCTIONS
+from .fast_mode import resolve_service_tier
from .limits import record_rate_limits_from_response
from .http import build_cors_headers
from .model_registry import list_public_models, uses_codex_instructions
+from .responses_api import instructions_for_model
from .reasoning import (
allowed_efforts_for_model,
build_reasoning_param,
@@ -71,12 +73,7 @@ def ollama_version() -> Response:
def _instructions_for_model(model: str) -> str:
- base = current_app.config.get("BASE_INSTRUCTIONS", BASE_INSTRUCTIONS)
- if uses_codex_instructions(model):
- codex = current_app.config.get("GPT5_CODEX_INSTRUCTIONS") or GPT5_CODEX_INSTRUCTIONS
- if isinstance(codex, str) and codex.strip():
- return codex
- return base
+ return instructions_for_model(current_app.config, model)
_OLLAMA_FAKE_EVAL = {
@@ -254,6 +251,19 @@ def ollama_chat() -> Response:
model_reasoning = extract_reasoning_from_model_name(model)
normalized_model = normalize_model_name(model)
+ service_tier_resolution = resolve_service_tier(
+ normalized_model,
+ request_fast_mode=payload.get("fast_mode"),
+ request_service_tier=payload.get("service_tier"),
+ server_fast_mode=bool(current_app.config.get("FAST_MODE")),
+ )
+ if service_tier_resolution.warning_message and verbose:
+ print(f"[FastMode] {service_tier_resolution.warning_message}")
+ if service_tier_resolution.error_message:
+ err = {"error": service_tier_resolution.error_message}
+ if verbose:
+ _log_json("OUT POST /api/chat", err)
+ return jsonify(err), 400
upstream, error_resp = start_upstream_request(
normalized_model,
input_items,
@@ -267,6 +277,7 @@ def ollama_chat() -> Response:
model_reasoning,
allowed_efforts=allowed_efforts_for_model(model),
),
+ service_tier=service_tier_resolution.service_tier,
)
if error_resp is not None:
if verbose:
@@ -307,6 +318,7 @@ def ollama_chat() -> Response:
model_reasoning,
allowed_efforts=allowed_efforts_for_model(model),
),
+ service_tier=service_tier_resolution.service_tier,
)
record_rate_limits_from_response(upstream2)
if err2 is None and upstream2 is not None and upstream2.status_code < 400:
diff --git a/chatmock/routes_openai.py b/chatmock/routes_openai.py
index f1c6e32..437ebef 100644
--- a/chatmock/routes_openai.py
+++ b/chatmock/routes_openai.py
@@ -7,16 +7,31 @@ from typing import Any, Dict, List
from flask import Blueprint, Response, current_app, jsonify, make_response, request
from .config import BASE_INSTRUCTIONS, GPT5_CODEX_INSTRUCTIONS
+from .fast_mode import resolve_service_tier
from .limits import record_rate_limits_from_response
from .http import build_cors_headers
from .model_registry import list_public_models, uses_codex_instructions
+from .responses_api import (
+ ResponsesRequestError,
+ aggregate_response_from_sse,
+ extract_client_session_id,
+ instructions_for_model,
+ normalize_responses_payload,
+ stream_upstream_bytes,
+)
from .reasoning import (
allowed_efforts_for_model,
apply_reasoning_to_message,
build_reasoning_param,
extract_reasoning_from_model_name,
)
-from .upstream import normalize_model_name, start_upstream_request
+from .session import (
+ clear_responses_reuse_state,
+ note_responses_final_response,
+ note_responses_stream_event,
+ prepare_responses_request_for_session,
+)
+from .upstream import normalize_model_name, start_upstream_raw_request, start_upstream_request
from .utils import (
convert_chat_messages_to_responses_input,
convert_tools_chat_to_responses,
@@ -59,12 +74,32 @@ def _wrap_stream_logging(label: str, iterator, enabled: bool):
def _instructions_for_model(model: str) -> str:
- base = current_app.config.get("BASE_INSTRUCTIONS", BASE_INSTRUCTIONS)
- if uses_codex_instructions(model):
- codex = current_app.config.get("GPT5_CODEX_INSTRUCTIONS") or GPT5_CODEX_INSTRUCTIONS
- if isinstance(codex, str) and codex.strip():
- return codex
- return base
+ return instructions_for_model(current_app.config, model)
+
+
+def _service_tier_from_payload(
+ model: str,
+ payload: Dict[str, Any],
+ *,
+ verbose: bool = False,
+) -> tuple[str | None, Response | None]:
+ resolution = resolve_service_tier(
+ model,
+ request_fast_mode=payload.get("fast_mode"),
+ request_service_tier=payload.get("service_tier"),
+ server_fast_mode=bool(current_app.config.get("FAST_MODE")),
+ )
+ if resolution.warning_message and verbose:
+ print(f"[FastMode] {resolution.warning_message}")
+ if resolution.error_message:
+ err = {"error": {"message": resolution.error_message}}
+ if verbose:
+ _log_json("OUT POST service_tier resolution", err)
+ resp = make_response(jsonify(err), 400)
+ for k, v in build_cors_headers().items():
+ resp.headers.setdefault(k, v)
+ return None, resp
+ return resolution.service_tier, None
@openai_bp.route("/v1/chat/completions", methods=["POST"])
@@ -178,6 +213,9 @@ def chat_completions() -> Response:
reasoning_overrides,
allowed_efforts=allowed_efforts_for_model(model),
)
+ service_tier, tier_error = _service_tier_from_payload(model, payload, verbose=verbose)
+ if tier_error is not None:
+ return tier_error
upstream, error_resp = start_upstream_request(
model,
@@ -187,6 +225,7 @@ def chat_completions() -> Response:
tool_choice=tool_choice,
parallel_tool_calls=parallel_tool_calls,
reasoning_param=reasoning_param,
+ service_tier=service_tier,
)
if error_resp is not None:
if verbose:
@@ -224,6 +263,7 @@ def chat_completions() -> Response:
tool_choice=safe_choice,
parallel_tool_calls=parallel_tool_calls,
reasoning_param=reasoning_param,
+ service_tier=service_tier,
)
record_rate_limits_from_response(upstream2)
if err2 is None and upstream2 is not None and upstream2.status_code < 400:
@@ -413,11 +453,15 @@ def completions() -> Response:
reasoning_overrides,
allowed_efforts=allowed_efforts_for_model(model),
)
+ service_tier, tier_error = _service_tier_from_payload(model, payload, verbose=verbose)
+ if tier_error is not None:
+ return tier_error
upstream, error_resp = start_upstream_request(
model,
input_items,
instructions=_instructions_for_model(model),
reasoning_param=reasoning_param,
+ service_tier=service_tier,
)
if error_resp is not None:
if verbose:
@@ -529,6 +573,161 @@ def completions() -> Response:
return resp
+@openai_bp.route("/v1/responses", methods=["POST"])
+def responses_create() -> Response:
+ verbose = bool(current_app.config.get("VERBOSE"))
+ raw = request.get_data(cache=True, as_text=True) or ""
+ if verbose:
+ try:
+ print("IN POST /v1/responses\n" + raw)
+ except Exception:
+ pass
+
+ try:
+ payload = json.loads(raw) if raw else {}
+ except Exception:
+ err = {"error": {"message": "Invalid JSON body"}}
+ if verbose:
+ _log_json("OUT POST /v1/responses", err)
+ return jsonify(err), 400
+
+ if not isinstance(payload, dict):
+ err = {"error": {"message": "Request body must be a JSON object"}}
+ if verbose:
+ _log_json("OUT POST /v1/responses", err)
+ return jsonify(err), 400
+
+ try:
+ normalized = normalize_responses_payload(
+ payload,
+ config=current_app.config,
+ client_session_id=extract_client_session_id(request.headers),
+ )
+ except ResponsesRequestError as exc:
+ err: Dict[str, Any] = {"error": {"message": str(exc)}}
+ if exc.code:
+ err["error"]["code"] = exc.code
+ if verbose:
+ _log_json("OUT POST /v1/responses", err)
+ return jsonify(err), exc.status_code
+
+ if normalized.service_tier_resolution.warning_message and verbose:
+ print(f"[FastMode] {normalized.service_tier_resolution.warning_message}")
+
+ prepared = prepare_responses_request_for_session(
+ normalized.session_id,
+ normalized.payload,
+ allow_previous_response_id=False,
+ )
+ stream_req = bool(prepared.payload.get("stream", False))
+ upstream_payload = dict(prepared.payload)
+ upstream_payload["stream"] = True
+ upstream, error_resp = start_upstream_raw_request(
+ upstream_payload,
+ session_id=normalized.session_id,
+ stream=True,
+ )
+ if error_resp is not None:
+ clear_responses_reuse_state(normalized.session_id)
+ if verbose:
+ try:
+ body = error_resp.get_data(as_text=True)
+ if body:
+ try:
+ parsed = json.loads(body)
+ except Exception:
+ parsed = body
+ _log_json("OUT POST /v1/responses", parsed)
+ except Exception:
+ pass
+ return error_resp
+
+ record_rate_limits_from_response(upstream)
+
+ if upstream.status_code >= 400:
+ try:
+ err_body = json.loads(upstream.content.decode("utf-8", errors="ignore")) if upstream.content else {"error": {"message": upstream.text}}
+ except Exception:
+ err_body = {"error": {"message": upstream.text or "Upstream error"}}
+ finally:
+ upstream.close()
+ clear_responses_reuse_state(normalized.session_id)
+ if verbose:
+ _log_json("OUT POST /v1/responses", err_body)
+ resp = make_response(jsonify(err_body), upstream.status_code)
+ for k, v in build_cors_headers().items():
+ resp.headers.setdefault(k, v)
+ return resp
+
+ if stream_req:
+ if verbose:
+ print("OUT POST /v1/responses (streaming response)")
+ stream_iter = _wrap_stream_logging(
+ "STREAM OUT /v1/responses",
+ stream_upstream_bytes(
+ upstream,
+ on_event=lambda evt: note_responses_stream_event(normalized.session_id, evt),
+ ),
+ verbose,
+ )
+ resp = Response(
+ stream_iter,
+ status=upstream.status_code,
+ mimetype="text/event-stream",
+ headers={"Cache-Control": "no-cache", "Connection": "keep-alive"},
+ )
+ for k, v in build_cors_headers().items():
+ resp.headers.setdefault(k, v)
+ return resp
+
+ content_type = upstream.headers.get("Content-Type", "")
+ if "application/json" in content_type.lower():
+ try:
+ body = upstream.json()
+ except Exception:
+ body = None
+ finally:
+ upstream.close()
+ if isinstance(body, dict):
+ note_responses_final_response(normalized.session_id, body)
+ if verbose:
+ _log_json("OUT POST /v1/responses", body)
+ resp = make_response(jsonify(body), upstream.status_code)
+ for k, v in build_cors_headers().items():
+ resp.headers.setdefault(k, v)
+ return resp
+
+ response_obj, error_obj = aggregate_response_from_sse(
+ upstream,
+ on_event=lambda evt: note_responses_stream_event(normalized.session_id, evt),
+ )
+ if error_obj is not None:
+ clear_responses_reuse_state(normalized.session_id)
+ if verbose:
+ _log_json("OUT POST /v1/responses", error_obj)
+ resp = make_response(jsonify(error_obj), 502)
+ for k, v in build_cors_headers().items():
+ resp.headers.setdefault(k, v)
+ return resp
+
+ if response_obj is None:
+ clear_responses_reuse_state(normalized.session_id)
+ err = {"error": {"message": "Upstream response stream did not contain a completed response object"}}
+ if verbose:
+ _log_json("OUT POST /v1/responses", err)
+ resp = make_response(jsonify(err), 502)
+ for k, v in build_cors_headers().items():
+ resp.headers.setdefault(k, v)
+ return resp
+
+ if verbose:
+ _log_json("OUT POST /v1/responses", response_obj)
+ resp = make_response(jsonify(response_obj), upstream.status_code)
+ for k, v in build_cors_headers().items():
+ resp.headers.setdefault(k, v)
+ return resp
+
+
@openai_bp.route("/v1/models", methods=["GET"])
def list_models() -> Response:
expose_variants = bool(current_app.config.get("EXPOSE_REASONING_MODELS"))
diff --git a/chatmock/session.py b/chatmock/session.py
index f2a085e..705a50c 100644
--- a/chatmock/session.py
+++ b/chatmock/session.py
@@ -1,16 +1,37 @@
from __future__ import annotations
+import copy
import hashlib
import json
import threading
import uuid
-from typing import Any, Dict, List, Tuple
+from dataclasses import dataclass, field
+from typing import Any, Dict, List
_LOCK = threading.Lock()
_FINGERPRINT_TO_UUID: Dict[str, str] = {}
_ORDER: List[str] = []
_MAX_ENTRIES = 10000
+_RESPONSES_SESSION_STATE: Dict[str, "_ResponsesSessionState"] = {}
+_RESPONSES_ORDER: List[str] = []
+
+
+@dataclass(frozen=True)
+class PreparedResponsesRequest:
+ payload: Dict[str, Any]
+ session_id: str
+
+
+@dataclass
+class _ResponsesSessionState:
+ last_request_payload: Dict[str, Any] | None = None
+ last_response_id: str | None = None
+ last_response_items: List[Dict[str, Any]] = field(default_factory=list)
+ inflight_request_payload: Dict[str, Any] | None = None
+ inflight_track_result: bool = False
+ inflight_response_id: str | None = None
+ inflight_response_items: List[Dict[str, Any]] = field(default_factory=list)
def _canonicalize_first_user_message(input_items: List[Dict[str, Any]]) -> Dict[str, Any] | None:
@@ -70,6 +91,61 @@ def _remember(fp: str, sid: str) -> None:
_FINGERPRINT_TO_UUID.pop(oldest, None)
+def _remember_responses_session(session_id: str) -> _ResponsesSessionState:
+ state = _RESPONSES_SESSION_STATE.get(session_id)
+ if state is None:
+ state = _ResponsesSessionState()
+ _RESPONSES_SESSION_STATE[session_id] = state
+ _RESPONSES_ORDER.append(session_id)
+ if len(_RESPONSES_ORDER) > _MAX_ENTRIES:
+ oldest = _RESPONSES_ORDER.pop(0)
+ _RESPONSES_SESSION_STATE.pop(oldest, None)
+ return state
+
+
+def _request_without_input(payload: Dict[str, Any]) -> Dict[str, Any]:
+ clone = copy.deepcopy(payload)
+ clone["input"] = []
+ clone.pop("previous_response_id", None)
+ return clone
+
+
+def _input_list(payload: Dict[str, Any]) -> List[Dict[str, Any]] | None:
+ raw = payload.get("input")
+ if not isinstance(raw, list):
+ return None
+ return [item for item in copy.deepcopy(raw) if isinstance(item, dict)]
+
+
+def _conversation_output_items(items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+ reusable: List[Dict[str, Any]] = []
+ for item in items:
+ if not isinstance(item, dict):
+ continue
+ item_type = item.get("type")
+ if item_type == "reasoning":
+ continue
+ reusable.append(copy.deepcopy(item))
+ return reusable
+
+
+def _clear_reuse_state(state: _ResponsesSessionState) -> None:
+ state.last_request_payload = None
+ state.last_response_id = None
+ state.last_response_items = []
+ state.inflight_request_payload = None
+ state.inflight_track_result = False
+ state.inflight_response_id = None
+ state.inflight_response_items = []
+
+
+def _clear_inflight(state: _ResponsesSessionState) -> None:
+ state.inflight_request_payload = None
+ state.inflight_track_result = False
+ state.inflight_response_id = None
+ state.inflight_response_items = []
+
+
def ensure_session_id(
instructions: str | None,
input_items: List[Dict[str, Any]],
@@ -87,3 +163,150 @@ def ensure_session_id(
_remember(fp, sid)
return sid
+
+def prepare_responses_request_for_session(
+ session_id: str,
+ payload: Dict[str, Any],
+ *,
+ allow_previous_response_id: bool = True,
+) -> PreparedResponsesRequest:
+ full_payload = copy.deepcopy(payload)
+ outbound_payload = copy.deepcopy(payload)
+ explicit_previous_response_id = (
+ isinstance(full_payload.get("previous_response_id"), str)
+ and bool(full_payload.get("previous_response_id").strip())
+ )
+
+ with _LOCK:
+ state = _remember_responses_session(session_id)
+
+ if explicit_previous_response_id:
+ _clear_reuse_state(state)
+ return PreparedResponsesRequest(
+ payload=outbound_payload,
+ session_id=session_id,
+ )
+
+ request_input = _input_list(full_payload)
+ if (
+ allow_previous_response_id
+ and
+ state.last_request_payload is not None
+ and state.last_response_id
+ and request_input is not None
+ and _request_without_input(state.last_request_payload) == _request_without_input(full_payload)
+ ):
+ baseline: List[Dict[str, Any]] = []
+ previous_input = _input_list(state.last_request_payload)
+ if previous_input is not None:
+ baseline.extend(previous_input)
+ baseline.extend(copy.deepcopy(state.last_response_items))
+ baseline_len = len(baseline)
+ if request_input[:baseline_len] == baseline and baseline_len <= len(request_input):
+ outbound_payload["input"] = copy.deepcopy(request_input[baseline_len:])
+ outbound_payload["previous_response_id"] = state.last_response_id
+
+ state.inflight_request_payload = full_payload
+ state.inflight_track_result = True
+ state.inflight_response_id = None
+ state.inflight_response_items = []
+
+ return PreparedResponsesRequest(
+ payload=outbound_payload,
+ session_id=session_id,
+ )
+
+
+def note_responses_stream_event(session_id: str, event: Dict[str, Any]) -> None:
+ if not isinstance(session_id, str) or not session_id.strip():
+ return
+ if not isinstance(event, dict):
+ return
+
+ with _LOCK:
+ state = _RESPONSES_SESSION_STATE.get(session_id)
+ if state is None:
+ return
+
+ kind = event.get("type")
+ if kind == "response.created":
+ response = event.get("response")
+ if isinstance(response, dict) and isinstance(response.get("id"), str):
+ state.inflight_response_id = response.get("id")
+ return
+
+ if kind == "response.output_item.done":
+ item = event.get("item")
+ if isinstance(item, dict):
+ state.inflight_response_items.append(copy.deepcopy(item))
+ return
+
+ if kind == "response.completed":
+ response = event.get("response")
+ response_id = None
+ response_items: List[Dict[str, Any]] = copy.deepcopy(state.inflight_response_items)
+ if isinstance(response, dict):
+ if isinstance(response.get("id"), str):
+ response_id = response.get("id")
+ output = response.get("output")
+ if isinstance(output, list) and output:
+ response_items = [copy.deepcopy(item) for item in output if isinstance(item, dict)]
+ if not response_id:
+ response_id = state.inflight_response_id
+
+ if state.inflight_track_result and state.inflight_request_payload is not None and response_id:
+ state.last_request_payload = copy.deepcopy(state.inflight_request_payload)
+ state.last_response_id = response_id
+ state.last_response_items = _conversation_output_items(response_items)
+ else:
+ state.last_request_payload = None
+ state.last_response_id = None
+ state.last_response_items = []
+ _clear_inflight(state)
+ return
+
+ if kind in ("response.failed", "error"):
+ _clear_reuse_state(state)
+
+
+def note_responses_final_response(session_id: str, response_obj: Dict[str, Any]) -> None:
+ if not isinstance(session_id, str) or not session_id.strip():
+ return
+ if not isinstance(response_obj, dict):
+ return
+
+ with _LOCK:
+ state = _RESPONSES_SESSION_STATE.get(session_id)
+ if state is None:
+ return
+
+ response_id = response_obj.get("id") if isinstance(response_obj.get("id"), str) else None
+ output = response_obj.get("output")
+ output_items = [copy.deepcopy(item) for item in output if isinstance(item, dict)] if isinstance(output, list) else []
+ if state.inflight_track_result and state.inflight_request_payload is not None and response_id:
+ state.last_request_payload = copy.deepcopy(state.inflight_request_payload)
+ state.last_response_id = response_id
+ state.last_response_items = _conversation_output_items(output_items)
+ else:
+ state.last_request_payload = None
+ state.last_response_id = None
+ state.last_response_items = []
+ _clear_inflight(state)
+
+
+def clear_responses_reuse_state(session_id: str) -> None:
+ if not isinstance(session_id, str) or not session_id.strip():
+ return
+ with _LOCK:
+ state = _RESPONSES_SESSION_STATE.get(session_id)
+ if state is None:
+ return
+ _clear_reuse_state(state)
+
+
+def reset_session_state() -> None:
+ with _LOCK:
+ _FINGERPRINT_TO_UUID.clear()
+ _ORDER.clear()
+ _RESPONSES_SESSION_STATE.clear()
+ _RESPONSES_ORDER.clear()
diff --git a/chatmock/upstream.py b/chatmock/upstream.py
index e264e6f..ba995cb 100644
--- a/chatmock/upstream.py
+++ b/chatmock/upstream.py
@@ -3,6 +3,7 @@ from __future__ import annotations
import json
import time
from typing import Any, Dict, List, Tuple
+from urllib.parse import urlparse, urlunparse
import requests
from flask import Response, current_app, jsonify, make_response
@@ -33,6 +34,7 @@ def start_upstream_request(
tool_choice: Any | None = None,
parallel_tool_calls: bool = False,
reasoning_param: Dict[str, Any] | None = None,
+ service_tier: str | None = None,
):
access_token, account_id = get_effective_chatgpt_auth()
if not access_token or not account_id:
@@ -81,6 +83,62 @@ def start_upstream_request(
if reasoning_param is not None:
responses_payload["reasoning"] = reasoning_param
+ if isinstance(service_tier, str) and service_tier.strip():
+ responses_payload["service_tier"] = service_tier.strip().lower()
+
+ return start_upstream_raw_request(
+ responses_payload,
+ session_id=session_id,
+ stream=True,
+ )
+
+
+def build_upstream_headers(
+ access_token: str,
+ account_id: str,
+ session_id: str,
+ *,
+ accept: str = "text/event-stream",
+) -> Dict[str, str]:
+ return {
+ "Authorization": f"Bearer {access_token}",
+ "Content-Type": "application/json",
+ "Accept": accept,
+ "chatgpt-account-id": account_id,
+ "OpenAI-Beta": "responses=experimental",
+ "session_id": session_id,
+ }
+
+
+def start_upstream_raw_request(
+ responses_payload: Dict[str, Any],
+ *,
+ session_id: str | None = None,
+ stream: bool = True,
+):
+ access_token, account_id = get_effective_chatgpt_auth()
+ if not access_token or not account_id:
+ resp = make_response(
+ jsonify(
+ {
+ "error": {
+ "message": "Missing ChatGPT credentials. Run 'python3 chatmock.py login' first.",
+ }
+ }
+ ),
+ 401,
+ )
+ for k, v in build_cors_headers().items():
+ resp.headers.setdefault(k, v)
+ return None, resp
+
+ effective_session_id = session_id
+ if not isinstance(effective_session_id, str) or not effective_session_id.strip():
+ payload_prompt_cache_key = responses_payload.get("prompt_cache_key")
+ if isinstance(payload_prompt_cache_key, str) and payload_prompt_cache_key.strip():
+ effective_session_id = payload_prompt_cache_key.strip()
+ if not isinstance(effective_session_id, str) or not effective_session_id.strip():
+ effective_session_id = str(int(time.time() * 1000))
verbose = False
try:
@@ -90,21 +148,19 @@ def start_upstream_request(
if verbose:
_log_json("OUTBOUND >> ChatGPT Responses API payload", responses_payload)
- headers = {
- "Authorization": f"Bearer {access_token}",
- "Content-Type": "application/json",
- "Accept": "text/event-stream",
- "chatgpt-account-id": account_id,
- "OpenAI-Beta": "responses=experimental",
- "session_id": session_id,
- }
+ headers = build_upstream_headers(
+ access_token,
+ account_id,
+ effective_session_id,
+ accept=("text/event-stream" if stream else "application/json"),
+ )
try:
upstream = requests.post(
CHATGPT_RESPONSES_URL,
headers=headers,
json=responses_payload,
- stream=True,
+ stream=stream,
timeout=600,
)
except requests.RequestException as e:
@@ -113,3 +169,13 @@ def start_upstream_request(
resp.headers.setdefault(k, v)
return None, resp
return upstream, None
+
+
+def build_upstream_websocket_url() -> str:
+ parsed = urlparse(CHATGPT_RESPONSES_URL)
+ scheme = parsed.scheme.lower()
+ if scheme == "https":
+ parsed = parsed._replace(scheme="wss")
+ elif scheme == "http":
+ parsed = parsed._replace(scheme="ws")
+ return urlunparse(parsed)
diff --git a/chatmock/version.py b/chatmock/version.py
index 72f72b8..17d6d1a 100644
--- a/chatmock/version.py
+++ b/chatmock/version.py
@@ -1,4 +1,4 @@
from __future__ import annotations
-__version__ = "1.36"
+__version__ = "1.37"
diff --git a/chatmock/websocket_routes.py b/chatmock/websocket_routes.py
new file mode 100644
index 0000000..37fcfe0
--- /dev/null
+++ b/chatmock/websocket_routes.py
@@ -0,0 +1,225 @@
+from __future__ import annotations
+
+import json
+import os
+import ssl
+from typing import Any, Dict
+
+import certifi
+from flask import current_app, request
+from flask_sock import Sock
+from websockets.sync.client import connect as websocket_connect
+from websockets.exceptions import ConnectionClosed
+
+from .responses_api import (
+ ResponsesRequestError,
+ extract_client_session_id,
+ normalize_responses_payload,
+)
+from .session import (
+ clear_responses_reuse_state,
+ note_responses_stream_event,
+ prepare_responses_request_for_session,
+)
+from .upstream import build_upstream_headers, build_upstream_websocket_url
+from .utils import get_effective_chatgpt_auth
+
+
+def _log_json(prefix: str, payload: Any) -> None:
+ try:
+ print(f"{prefix}\n{json.dumps(payload, indent=2, ensure_ascii=False)}")
+ except Exception:
+ try:
+ print(f"{prefix}\n{payload}")
+ except Exception:
+ pass
+
+
+def _error_event(message: str, *, status_code: int = 400, code: str | None = None) -> Dict[str, Any]:
+ error: Dict[str, Any] = {"message": message}
+ if code:
+ error["code"] = code
+ return {"type": "error", "status_code": status_code, "error": error}
+
+
+def _is_terminal_event(event: Any) -> bool:
+ if not isinstance(event, dict):
+ return False
+ kind = event.get("type")
+ return kind in ("response.completed", "response.failed", "error")
+
+
+def _build_websocket_ssl_context() -> ssl.SSLContext:
+ cafile = (
+ os.getenv("CODEX_CA_CERTIFICATE")
+ or os.getenv("SSL_CERT_FILE")
+ or certifi.where()
+ )
+ return ssl.create_default_context(cafile=cafile)
+
+
+def connect_upstream_websocket(url: str, headers: Dict[str, str]):
+ return websocket_connect(
+ url,
+ additional_headers=headers,
+ open_timeout=15,
+ ssl=_build_websocket_ssl_context(),
+ )
+
+
+def register_websocket_routes(sock: Sock) -> None:
+ @sock.route("/v1/responses")
+ def responses_websocket(ws) -> None:
+ verbose = bool(current_app.config.get("VERBOSE"))
+ upstream_ws = None
+ upstream_session_id: str | None = None
+ active_session_id: str | None = None
+
+ def _send_error(message: str, *, status_code: int = 400, code: str | None = None) -> None:
+ evt = _error_event(message, status_code=status_code, code=code)
+ if verbose:
+ _log_json("STREAM OUT WS /v1/responses (error)", evt)
+ try:
+ ws.send(json.dumps(evt))
+ except Exception:
+ pass
+
+ try:
+ while True:
+ incoming = ws.receive()
+ if incoming is None:
+ break
+
+ if isinstance(incoming, bytes):
+ incoming_text = incoming.decode("utf-8", errors="ignore")
+ else:
+ incoming_text = str(incoming)
+ if verbose:
+ print("IN WS /v1/responses\n" + incoming_text)
+
+ try:
+ payload = json.loads(incoming_text)
+ except Exception:
+ _send_error("Websocket frames must be valid JSON objects.", status_code=400)
+ break
+
+ if not isinstance(payload, dict):
+ _send_error("Websocket frames must be JSON objects.", status_code=400)
+ break
+
+ client_session_id = extract_client_session_id(request.headers)
+ outbound_text = incoming_text
+ session_id = upstream_session_id
+
+ if payload.get("type") == "response.create":
+ try:
+ normalized = normalize_responses_payload(
+ payload,
+ config=current_app.config,
+ client_session_id=client_session_id,
+ )
+ except ResponsesRequestError as exc:
+ _send_error(str(exc), status_code=exc.status_code, code=exc.code)
+ continue
+
+ if normalized.service_tier_resolution.warning_message and verbose:
+ print(f"[FastMode] {normalized.service_tier_resolution.warning_message}")
+ prepared = prepare_responses_request_for_session(
+ normalized.session_id,
+ normalized.payload,
+ allow_previous_response_id=True,
+ )
+ outbound_text = json.dumps(prepared.payload)
+ session_id = normalized.session_id
+ active_session_id = normalized.session_id
+ if verbose:
+ _log_json("OUTBOUND >> ChatGPT Responses WS payload", prepared.payload)
+ elif upstream_ws is None:
+ _send_error(
+ "The first websocket message must be a response.create request.",
+ status_code=400,
+ )
+ break
+
+ if upstream_ws is None or (session_id and session_id != upstream_session_id):
+ access_token, account_id = get_effective_chatgpt_auth()
+ if not access_token or not account_id:
+ if session_id:
+ clear_responses_reuse_state(session_id)
+ _send_error(
+ "Missing ChatGPT credentials. Run 'python3 chatmock.py login' first.",
+ status_code=401,
+ )
+ break
+
+ if upstream_ws is not None:
+ try:
+ upstream_ws.close()
+ except Exception:
+ pass
+
+ effective_session_id = session_id or client_session_id or ""
+ try:
+ upstream_ws = connect_upstream_websocket(
+ build_upstream_websocket_url(),
+ build_upstream_headers(
+ access_token,
+ account_id,
+ effective_session_id,
+ accept="application/json",
+ ),
+ )
+ except Exception as exc:
+ if session_id:
+ clear_responses_reuse_state(session_id)
+ _send_error(
+ f"Upstream websocket connection failed: {exc}",
+ status_code=502,
+ )
+ break
+ upstream_session_id = effective_session_id
+
+ upstream_ws.send(outbound_text)
+
+ while True:
+ try:
+ upstream_message = upstream_ws.recv()
+ except ConnectionClosed:
+ if active_session_id:
+ clear_responses_reuse_state(active_session_id)
+ _send_error("Upstream websocket closed unexpectedly.", status_code=502)
+ return
+ if upstream_message is None:
+ if active_session_id:
+ clear_responses_reuse_state(active_session_id)
+ _send_error("Upstream websocket closed unexpectedly.", status_code=502)
+ return
+ if verbose:
+ try:
+ print("STREAM OUT WS /v1/responses\n" + str(upstream_message))
+ except Exception:
+ pass
+ ws.send(upstream_message)
+
+ try:
+ parsed = json.loads(upstream_message)
+ except Exception:
+ parsed = None
+ if isinstance(parsed, dict) and active_session_id:
+ note_responses_stream_event(active_session_id, parsed)
+ if _is_terminal_event(parsed):
+ if isinstance(parsed, dict) and parsed.get("type") in ("response.failed", "error"):
+ if upstream_ws is not None:
+ try:
+ upstream_ws.close()
+ except Exception:
+ pass
+ upstream_ws = None
+ upstream_session_id = None
+ break
+ finally:
+ if upstream_ws is not None:
+ try:
+ upstream_ws.close()
+ except Exception:
+ pass
diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh
index cb6c126..af26c64 100644
--- a/docker/entrypoint.sh
+++ b/docker/entrypoint.sh
@@ -23,6 +23,9 @@ if [[ "$cmd" == "serve" ]]; then
if bool "${VERBOSE_OBFUSCATION:-}" || bool "${CHATGPT_LOCAL_VERBOSE_OBFUSCATION:-}"; then
ARGS+=(--verbose-obfuscation)
fi
+ if bool "${FAST_MODE:-}" || bool "${CHATGPT_LOCAL_FAST_MODE:-}"; then
+ ARGS+=(--fast-mode)
+ fi
if [[ "$#" -gt 0 ]]; then
ARGS+=("$@")
diff --git a/gui.py b/gui.py
index e9efe8f..82929fe 100644
--- a/gui.py
+++ b/gui.py
@@ -18,6 +18,7 @@ def run_server(
reasoning_effort: str = "medium",
reasoning_summary: str = "auto",
reasoning_compat: str = "think-tags",
+ fast_mode: bool = False,
expose_reasoning_models: bool = False,
default_web_search: bool = False,
) -> None:
@@ -25,6 +26,7 @@ def run_server(
reasoning_effort=reasoning_effort,
reasoning_summary=reasoning_summary,
reasoning_compat=reasoning_compat,
+ fast_mode=fast_mode,
expose_reasoning_models=expose_reasoning_models,
default_web_search=default_web_search,
)
@@ -42,6 +44,7 @@ class ServerProcess(QtCore.QObject):
self._effort = "medium"
self._summary = "auto"
self._compat = "think-tags"
+ self._fast_mode = False
self._expose_reasoning_models = False
self._default_web_search = False
@@ -55,6 +58,7 @@ class ServerProcess(QtCore.QObject):
effort: str,
summary: str,
compat: str,
+ fast_mode: bool,
expose_reasoning_models: bool,
default_web_search: bool,
) -> None:
@@ -63,6 +67,7 @@ class ServerProcess(QtCore.QObject):
self._host, self._port = host, port
self._effort, self._summary = effort, summary
self._compat = compat
+ self._fast_mode = fast_mode
self._expose_reasoning_models = expose_reasoning_models
self._default_web_search = default_web_search
self._proc = QtCore.QProcess()
@@ -75,6 +80,8 @@ class ServerProcess(QtCore.QObject):
"--summary", summary,
"--compat", compat,
]
+ if fast_mode:
+ args.append("--fast-mode")
if expose_reasoning_models:
args.append("--expose-reasoning-models")
if default_web_search:
@@ -352,8 +359,10 @@ class MainWindow(QtWidgets.QMainWindow):
opts.addWidget(self.compat, 1, 1)
self.expose_reasoning_models = QtWidgets.QCheckBox("Expose reasoning models")
opts.addWidget(self.expose_reasoning_models, 1, 2)
+ self.fast_mode = QtWidgets.QCheckBox("Enable fast mode")
+ opts.addWidget(self.fast_mode, 1, 3)
self.enable_web_search = QtWidgets.QCheckBox("Enable web search")
- opts.addWidget(self.enable_web_search, 1, 3)
+ opts.addWidget(self.enable_web_search, 2, 0)
opts.setColumnStretch(1, 1)
opts.setColumnStretch(3, 1)
srv_layout.addLayout(opts)
@@ -463,6 +472,7 @@ class MainWindow(QtWidgets.QMainWindow):
effort = self.effort.currentText().strip()
summary = self.summary.currentText().strip()
compat = self.compat.currentText().strip()
+ fast_mode = self.fast_mode.isChecked()
expose_reasoning_models = self.expose_reasoning_models.isChecked()
default_web_search = self.enable_web_search.isChecked()
self.status.setText(f"Starting server at http://{host}:{port} …")
@@ -473,6 +483,7 @@ class MainWindow(QtWidgets.QMainWindow):
effort,
summary,
compat,
+ fast_mode,
expose_reasoning_models,
default_web_search,
)
@@ -524,6 +535,7 @@ def main() -> None:
p.add_argument("--effort", default="medium")
p.add_argument("--summary", default="auto")
p.add_argument("--compat", default="think-tags")
+ p.add_argument("--fast-mode", action="store_true")
p.add_argument("--expose-reasoning-models", action="store_true")
p.add_argument("--enable-web-search", action="store_true")
args, _ = p.parse_known_args()
@@ -533,6 +545,7 @@ def main() -> None:
args.effort,
args.summary,
args.compat,
+ args.fast_mode,
args.expose_reasoning_models,
args.enable_web_search,
)
diff --git a/pyproject.toml b/pyproject.toml
index 8cfe979..747e3d1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -11,12 +11,14 @@ dependencies = [
"blinker==1.9.0",
"certifi==2025.8.3",
"flask==3.1.1",
+ "flask-sock==0.7.0",
"idna==3.10",
"itsdangerous==2.2.0",
"jinja2==3.1.6",
"markupsafe==3.0.2",
"requests==2.32.5",
"urllib3==2.5.0",
+ "websockets==15.0.1",
"werkzeug==3.1.3",
]
diff --git a/scripts/test_responses_cached_tokens.py b/scripts/test_responses_cached_tokens.py
new file mode 100644
index 0000000..9cf05f5
--- /dev/null
+++ b/scripts/test_responses_cached_tokens.py
@@ -0,0 +1,176 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+
+import argparse
+import json
+import sys
+import uuid
+from typing import Any, Dict
+
+import requests
+
+
+def _post(url: str, api_key: str, session_id: str, payload: Dict[str, Any]) -> Dict[str, Any]:
+ response = requests.post(
+ url,
+ headers={
+ "Authorization": f"Bearer {api_key}",
+ "Content-Type": "application/json",
+ "X-Session-Id": session_id,
+ },
+ json=payload,
+ timeout=180,
+ )
+ try:
+ body = response.json()
+ except Exception:
+ body = {"raw": response.text}
+ if response.status_code >= 400:
+ raise RuntimeError(
+ f"POST {url} failed with {response.status_code}: {json.dumps(body, ensure_ascii=False)}"
+ )
+ if not isinstance(body, dict):
+ raise RuntimeError(f"Expected JSON object response, got: {body!r}")
+ return body
+
+
+def _usage_summary(body: Dict[str, Any]) -> Dict[str, Any]:
+ usage = body.get("usage")
+ if not isinstance(usage, dict):
+ return {}
+ return usage
+
+
+def _cached_tokens(body: Dict[str, Any]) -> int | None:
+ usage = _usage_summary(body)
+ details = usage.get("input_tokens_details")
+ if not isinstance(details, dict):
+ return None
+ value = details.get("cached_tokens")
+ try:
+ return int(value)
+ except Exception:
+ return None
+
+
+def _assistant_message_item(body: Dict[str, Any]) -> Dict[str, Any]:
+ output = body.get("output")
+ if not isinstance(output, list):
+ raise RuntimeError("Response did not include an output list.")
+ for item in output:
+ if isinstance(item, dict) and item.get("type") == "message" and item.get("role") == "assistant":
+ return item
+ raise RuntimeError("Response did not include an assistant message item.")
+
+
+def _user_message(text: str) -> Dict[str, Any]:
+ return {
+ "type": "message",
+ "role": "user",
+ "content": [{"type": "input_text", "text": text}],
+ }
+
+
+def _default_prefix() -> str:
+ seed = "Cache test prefix. Repeat this context exactly for cache measurement. "
+ return "".join(seed for _ in range(220))
+
+
+def main() -> int:
+ parser = argparse.ArgumentParser(
+ description="Drive two raw /v1/responses turns through ChatMock and check cached input tokens."
+ )
+ parser.add_argument("--base-url", default="http://127.0.0.1:8000", help="ChatMock base URL.")
+ parser.add_argument("--api-key", default="key", help="Bearer token to send to ChatMock.")
+ parser.add_argument("--model", default="gpt-5.4", help="Model to request.")
+ parser.add_argument(
+ "--session-id",
+ default=f"cache-check-{uuid.uuid4()}",
+ help="Fixed X-Session-Id for both turns.",
+ )
+ parser.add_argument(
+ "--prefix",
+ default=_default_prefix(),
+ help="Large repeated first-turn prompt prefix.",
+ )
+ parser.add_argument(
+ "--first-question",
+ default="Reply with exactly: alpha",
+ help="Trailing instruction for the first turn.",
+ )
+ parser.add_argument(
+ "--second-question",
+ default="Reply with exactly: beta",
+ help="Trailing instruction for the second turn.",
+ )
+ args = parser.parse_args()
+
+ responses_url = args.base_url.rstrip("/") + "/v1/responses"
+ session_id = args.session_id
+ first_text = f"{args.prefix}\n\n{args.first_question}"
+ second_text = args.second_question
+
+ print(f"Using session id: {session_id}")
+ print(f"POST target: {responses_url}")
+ print("This checks the raw Responses usage object returned through ChatMock.")
+ print()
+
+ first_payload = {
+ "model": args.model,
+ "store": False,
+ "stream": False,
+ "input": first_text,
+ }
+ first_response = _post(responses_url, args.api_key, session_id, first_payload)
+ assistant_item = _assistant_message_item(first_response)
+
+ second_payload = {
+ "model": args.model,
+ "store": False,
+ "stream": False,
+ "input": [
+ _user_message(first_text),
+ assistant_item,
+ _user_message(second_text),
+ ],
+ }
+ second_response = _post(responses_url, args.api_key, session_id, second_payload)
+
+ first_usage = _usage_summary(first_response)
+ second_usage = _usage_summary(second_response)
+ first_cached = _cached_tokens(first_response)
+ second_cached = _cached_tokens(second_response)
+
+ print("Turn 1")
+ print(json.dumps(first_usage, indent=2, ensure_ascii=False) if first_usage else " no usage object")
+ print()
+ print("Turn 2")
+ print(json.dumps(second_usage, indent=2, ensure_ascii=False) if second_usage else " no usage object")
+ print()
+
+ if second_cached is None:
+ first_input_tokens = first_usage.get("input_tokens") if isinstance(first_usage, dict) else None
+ second_input_tokens = second_usage.get("input_tokens") if isinstance(second_usage, dict) else None
+ print("Result: inconclusive")
+ print("Reason: upstream did not include `usage.input_tokens_details.cached_tokens`.")
+ if isinstance(first_input_tokens, int) and isinstance(second_input_tokens, int):
+ print(f"Observed input_tokens delta: first={first_input_tokens}, second={second_input_tokens}")
+ print("Codex treats cached-token reporting as the direct cache-hit signal; without it, this script cannot prove caching.")
+ return 2
+
+ if second_cached > 0:
+ print(f"Result: success, follow-up turn reported cached_tokens={second_cached}.")
+ return 0
+
+ print("Result: failure, follow-up turn reported cached_tokens=0.")
+ return 1
+
+
+if __name__ == "__main__":
+ try:
+ raise SystemExit(main())
+ except KeyboardInterrupt:
+ raise SystemExit(130)
+ except Exception as exc:
+ print(f"error: {exc}", file=sys.stderr)
+ raise SystemExit(1)
diff --git a/scripts/test_responses_reuse.py b/scripts/test_responses_reuse.py
new file mode 100644
index 0000000..5e506ab
--- /dev/null
+++ b/scripts/test_responses_reuse.py
@@ -0,0 +1,143 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+
+import argparse
+import json
+import sys
+import uuid
+from typing import Any, Dict, Tuple
+
+from websockets.sync.client import connect
+
+
+def _user_message(text: str) -> Dict[str, Any]:
+ return {
+ "type": "message",
+ "role": "user",
+ "content": [{"type": "input_text", "text": text}],
+ }
+
+
+def _receive_turn(ws) -> Tuple[str, Dict[str, Any]]:
+ response_id: str | None = None
+ assistant_item: Dict[str, Any] | None = None
+
+ while True:
+ raw = ws.recv(timeout=120)
+ event = json.loads(raw)
+ event_type = event.get("type")
+ if event_type == "error":
+ raise RuntimeError(f"websocket error: {json.dumps(event, ensure_ascii=False)}")
+ if event_type == "response.created":
+ response = event.get("response")
+ if isinstance(response, dict) and isinstance(response.get("id"), str):
+ response_id = response["id"]
+ elif event_type == "response.output_item.done":
+ item = event.get("item")
+ if (
+ isinstance(item, dict)
+ and item.get("type") == "message"
+ and item.get("role") == "assistant"
+ ):
+ assistant_item = item
+ elif event_type == "response.completed":
+ if not response_id:
+ response = event.get("response")
+ if isinstance(response, dict) and isinstance(response.get("id"), str):
+ response_id = response["id"]
+ if not response_id:
+ raise RuntimeError("turn completed without a response id")
+ if assistant_item is None:
+ raise RuntimeError("turn completed without an assistant message item")
+ return response_id, assistant_item
+
+
+def main() -> int:
+ parser = argparse.ArgumentParser(
+ description="Exercise ChatMock websocket reuse the same way Codex does."
+ )
+ parser.add_argument(
+ "--ws-url",
+ default="ws://127.0.0.1:8000/v1/responses",
+ help="ChatMock websocket URL.",
+ )
+ parser.add_argument("--model", default="gpt-5.4", help="Model to request.")
+ parser.add_argument(
+ "--session-id",
+ default=f"reuse-demo-{uuid.uuid4()}",
+ help="Fixed X-Session-Id for the whole run.",
+ )
+ parser.add_argument(
+ "--first-prompt",
+ default="Say exactly: alpha",
+ help="Prompt for the first turn.",
+ )
+ parser.add_argument(
+ "--second-prompt",
+ default="Now say exactly: beta",
+ help="Prompt appended in the reuse-candidate turn.",
+ )
+ parser.add_argument(
+ "--no-fast-mode",
+ action="store_true",
+ help="Do not send fast_mode=true.",
+ )
+ args = parser.parse_args()
+
+ headers = {"X-Session-Id": args.session_id}
+ fast_mode = not args.no_fast_mode
+
+ print(f"Using websocket session id: {args.session_id}")
+ print(f"Connecting to: {args.ws_url}")
+ print("Run ChatMock with `python3 chatmock.py serve --verbose` in another terminal.")
+ print("This verifies the Codex-aligned path: websocket `response.create` reuse.")
+ print("HTTP `/v1/responses` is not expected to send `previous_response_id`.")
+ print()
+
+ with connect(args.ws_url, additional_headers=headers, open_timeout=15) as ws:
+ first_request = {
+ "type": "response.create",
+ "model": args.model,
+ "store": False,
+ "input": args.first_prompt,
+ "fast_mode": fast_mode,
+ }
+ ws.send(json.dumps(first_request))
+ first_response_id, assistant_item = _receive_turn(ws)
+
+ second_request = {
+ "type": "response.create",
+ "model": args.model,
+ "store": False,
+ "input": [
+ _user_message(args.first_prompt),
+ assistant_item,
+ _user_message(args.second_prompt),
+ ],
+ "fast_mode": fast_mode,
+ }
+ ws.send(json.dumps(second_request))
+ second_response_id, _ = _receive_turn(ws)
+
+ print("Turn 1 completed.")
+ print(f" response id: {first_response_id}")
+ print("Turn 2 completed.")
+ print(f" response id: {second_response_id}")
+ print()
+ print("Expected in the verbose ChatMock server log for turn 2:")
+ print(" - outbound websocket payload includes `previous_response_id`")
+ print(" - `previous_response_id` equals the first response id")
+ print(" - outbound `input` only contains the new trailing user message")
+ print()
+ print("If turn 2 still shows the full conversation in the outbound websocket payload, reuse is not working.")
+ return 0
+
+
+if __name__ == "__main__":
+ try:
+ raise SystemExit(main())
+ except KeyboardInterrupt:
+ raise SystemExit(130)
+ except Exception as exc:
+ print(f"error: {exc}", file=sys.stderr)
+ raise SystemExit(1)
diff --git a/tests/test_fast_mode.py b/tests/test_fast_mode.py
new file mode 100644
index 0000000..6892ec5
--- /dev/null
+++ b/tests/test_fast_mode.py
@@ -0,0 +1,49 @@
+from __future__ import annotations
+
+import unittest
+
+from chatmock.fast_mode import parse_optional_bool, resolve_service_tier, supports_priority_service_tier
+
+
+class FastModeTests(unittest.TestCase):
+ def test_parse_optional_bool(self) -> None:
+ self.assertTrue(parse_optional_bool(True))
+ self.assertTrue(parse_optional_bool("true"))
+ self.assertFalse(parse_optional_bool(False))
+ self.assertFalse(parse_optional_bool("off"))
+ self.assertIsNone(parse_optional_bool("maybe"))
+
+ def test_priority_allowlist_uses_normalized_model_ids(self) -> None:
+ self.assertTrue(supports_priority_service_tier("gpt5.4"))
+ self.assertFalse(supports_priority_service_tier("gpt-5.3-codex"))
+
+ def test_explicit_fast_mode_true_errors_for_unsupported_model(self) -> None:
+ resolution = resolve_service_tier(
+ "gpt-5.3-codex",
+ request_fast_mode=True,
+ server_fast_mode=False,
+ )
+ self.assertIsNone(resolution.service_tier)
+ self.assertIsNotNone(resolution.error_message)
+
+ def test_server_default_fast_mode_falls_back_on_unsupported_model(self) -> None:
+ resolution = resolve_service_tier(
+ "gpt-5.3-codex",
+ server_fast_mode=True,
+ )
+ self.assertIsNone(resolution.service_tier)
+ self.assertIsNone(resolution.error_message)
+ self.assertIsNotNone(resolution.warning_message)
+
+ def test_request_fast_mode_false_overrides_server_default(self) -> None:
+ resolution = resolve_service_tier(
+ "gpt-5.4",
+ request_fast_mode=False,
+ server_fast_mode=True,
+ )
+ self.assertIsNone(resolution.service_tier)
+ self.assertIsNone(resolution.error_message)
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/tests/test_models.py b/tests/test_models.py
index 4d690cf..e82b516 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -10,6 +10,7 @@ class ModelRegistryTests(unittest.TestCase):
self.assertEqual(normalize_model_name("gpt5"), "gpt-5")
self.assertEqual(normalize_model_name("gpt5.4"), "gpt-5.4")
self.assertEqual(normalize_model_name("gpt5.4-mini"), "gpt-5.4-mini")
+ self.assertEqual(normalize_model_name("gpt5.3-codex-spark"), "gpt-5.3-codex-spark")
self.assertEqual(normalize_model_name("codex"), "codex-mini-latest")
def test_strips_reasoning_suffixes(self) -> None:
@@ -28,6 +29,7 @@ class ModelRegistryTests(unittest.TestCase):
model_ids = list_public_models(expose_reasoning_models=True)
self.assertIn("gpt-5.4", model_ids)
self.assertIn("gpt-5.4-mini", model_ids)
+ self.assertIn("gpt-5.3-codex-spark", model_ids)
self.assertIn("gpt-5.4-none", model_ids)
self.assertIn("gpt-5.4-mini-xhigh", model_ids)
self.assertNotIn("gpt-5.4-mini-none", model_ids)
diff --git a/tests/test_routes.py b/tests/test_routes.py
index b0d3422..1316bc8 100644
--- a/tests/test_routes.py
+++ b/tests/test_routes.py
@@ -1,31 +1,56 @@
from __future__ import annotations
import json
+import socket
+import threading
+import time
import unittest
from unittest.mock import patch
from chatmock.app import create_app
+from chatmock.session import reset_session_state
+from websockets.sync.client import connect as ws_connect
class FakeUpstream:
- def __init__(self, events: list[dict[str, object]], status_code: int = 200) -> None:
+ def __init__(
+ self,
+ events: list[dict[str, object]] | None = None,
+ *,
+ status_code: int = 200,
+ headers: dict[str, str] | None = None,
+ content: bytes | None = None,
+ text: str = "",
+ ) -> None:
self._events = events
self.status_code = status_code
- self.headers = {}
- self.content = b""
- self.text = ""
+ self.headers = headers or {}
+ self.content = content or b""
+ self.text = text
def iter_lines(self, decode_unicode: bool = False):
- for event in self._events:
+ for event in self._events or []:
payload = f"data: {json.dumps(event)}"
yield payload if decode_unicode else payload.encode("utf-8")
+ def iter_content(self, chunk_size=None):
+ if self.content:
+ yield self.content
+ return
+ for event in self._events or []:
+ payload = f"data: {json.dumps(event)}\n\n".encode("utf-8")
+ yield payload
+
+ def json(self):
+ return json.loads(self.content.decode("utf-8"))
+
def close(self) -> None:
return None
class RouteTests(unittest.TestCase):
def setUp(self) -> None:
+ reset_session_state()
self.app = create_app()
self.client = self.app.test_client()
@@ -36,6 +61,7 @@ class RouteTests(unittest.TestCase):
model_ids = [item["id"] for item in body["data"]]
self.assertIn("gpt-5.4", model_ids)
self.assertIn("gpt-5.4-mini", model_ids)
+ self.assertIn("gpt-5.3-codex-spark", model_ids)
def test_ollama_tags_list(self) -> None:
response = self.client.get("/api/tags")
@@ -85,6 +111,443 @@ class RouteTests(unittest.TestCase):
self.assertEqual(body["message"]["content"], "hello")
self.assertEqual(body["model"], "gpt-5.4")
+ @patch("chatmock.routes_openai.start_upstream_request")
+ def test_chat_completions_fast_mode_sets_priority_service_tier(self, mock_start) -> None:
+ mock_start.return_value = (
+ FakeUpstream(
+ [
+ {"type": "response.output_text.delta", "delta": "hello"},
+ {"type": "response.completed", "response": {"id": "resp-openai"}},
+ ]
+ ),
+ None,
+ )
+ response = self.client.post(
+ "/v1/chat/completions",
+ json={
+ "model": "gpt-5.4",
+ "fast_mode": True,
+ "messages": [{"role": "user", "content": "hi"}],
+ },
+ )
+ self.assertEqual(response.status_code, 200)
+ self.assertEqual(mock_start.call_args.kwargs["service_tier"], "priority")
+
+ @patch("chatmock.routes_openai.start_upstream_request")
+ def test_chat_completions_fast_mode_false_overrides_server_default(self, mock_start) -> None:
+ app = create_app(fast_mode=True)
+ client = app.test_client()
+ mock_start.return_value = (
+ FakeUpstream(
+ [
+ {"type": "response.output_text.delta", "delta": "hello"},
+ {"type": "response.completed", "response": {"id": "resp-openai"}},
+ ]
+ ),
+ None,
+ )
+ response = client.post(
+ "/v1/chat/completions",
+ json={
+ "model": "gpt-5.4",
+ "fast_mode": False,
+ "messages": [{"role": "user", "content": "hi"}],
+ },
+ )
+ self.assertEqual(response.status_code, 200)
+ self.assertIsNone(mock_start.call_args.kwargs["service_tier"])
+
+ @patch("chatmock.routes_openai.start_upstream_request")
+ def test_chat_completions_rejects_unsupported_explicit_fast_mode(self, mock_start) -> None:
+ response = self.client.post(
+ "/v1/chat/completions",
+ json={
+ "model": "gpt-5.3-codex",
+ "fast_mode": True,
+ "messages": [{"role": "user", "content": "hi"}],
+ },
+ )
+ body = response.get_json()
+ self.assertEqual(response.status_code, 400)
+ self.assertIn("Fast mode is not supported", body["error"]["message"])
+ mock_start.assert_not_called()
+
+ @patch("chatmock.routes_openai.start_upstream_raw_request")
+ def test_responses_route_returns_completed_response_object(self, mock_start) -> None:
+ mock_start.return_value = (
+ FakeUpstream(
+ [
+ {
+ "type": "response.created",
+ "response": {"id": "resp_123", "object": "response", "status": "in_progress"},
+ },
+ {
+ "type": "response.completed",
+ "response": {
+ "id": "resp_123",
+ "object": "response",
+ "status": "completed",
+ "output": [],
+ },
+ },
+ ],
+ headers={"Content-Type": "text/event-stream"},
+ ),
+ None,
+ )
+ response = self.client.post(
+ "/v1/responses",
+ json={"model": "gpt5.4-mini", "input": "hello"},
+ )
+ body = response.get_json()
+ self.assertEqual(response.status_code, 200)
+ self.assertEqual(body["id"], "resp_123")
+ outbound_payload = mock_start.call_args.args[0]
+ self.assertEqual(outbound_payload["model"], "gpt-5.4-mini")
+ self.assertEqual(outbound_payload["store"], False)
+ self.assertEqual(
+ outbound_payload["input"],
+ [{"type": "message", "role": "user", "content": [{"type": "input_text", "text": "hello"}]}],
+ )
+ self.assertEqual(outbound_payload["reasoning"]["effort"], "medium")
+ self.assertIsInstance(outbound_payload["prompt_cache_key"], str)
+
+ @patch("chatmock.routes_openai.start_upstream_raw_request")
+ def test_responses_route_does_not_use_previous_response_id_for_http_follow_up(self, mock_start) -> None:
+ mock_start.side_effect = [
+ (
+ FakeUpstream(
+ [
+ {
+ "type": "response.created",
+ "response": {"id": "resp_1", "object": "response", "status": "in_progress"},
+ },
+ {
+ "type": "response.output_item.done",
+ "item": {
+ "type": "message",
+ "role": "assistant",
+ "id": "msg_1",
+ "content": [{"type": "output_text", "text": "assistant output"}],
+ },
+ },
+ {
+ "type": "response.completed",
+ "response": {"id": "resp_1", "object": "response", "status": "completed", "output": []},
+ },
+ ],
+ headers={"Content-Type": "text/event-stream"},
+ ),
+ None,
+ ),
+ (
+ FakeUpstream(
+ [
+ {
+ "type": "response.created",
+ "response": {"id": "resp_2", "object": "response", "status": "in_progress"},
+ },
+ {
+ "type": "response.completed",
+ "response": {"id": "resp_2", "object": "response", "status": "completed", "output": []},
+ },
+ ],
+ headers={"Content-Type": "text/event-stream"},
+ ),
+ None,
+ ),
+ ]
+
+ first = self.client.post("/v1/responses", json={"model": "gpt-5.4", "input": "hello"})
+ second = self.client.post(
+ "/v1/responses",
+ json={
+ "model": "gpt-5.4",
+ "input": [
+ {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "hello"}]},
+ {"type": "message", "role": "assistant", "id": "msg_1", "content": [{"type": "output_text", "text": "assistant output"}]},
+ {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "second"}]},
+ ],
+ },
+ )
+
+ self.assertEqual(first.status_code, 200)
+ self.assertEqual(second.status_code, 200)
+ outbound_payload = mock_start.call_args_list[1].args[0]
+ self.assertNotIn("previous_response_id", outbound_payload)
+ self.assertEqual(
+ outbound_payload["input"],
+ [
+ {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "hello"}]},
+ {"type": "message", "role": "assistant", "id": "msg_1", "content": [{"type": "output_text", "text": "assistant output"}]},
+ {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "second"}]},
+ ],
+ )
+
+ @patch("chatmock.routes_openai.start_upstream_raw_request")
+ def test_responses_route_falls_back_to_full_create_when_non_input_fields_change(self, mock_start) -> None:
+ mock_start.side_effect = [
+ (
+ FakeUpstream(
+ [
+ {
+ "type": "response.created",
+ "response": {"id": "resp_1", "object": "response", "status": "in_progress"},
+ },
+ {
+ "type": "response.completed",
+ "response": {"id": "resp_1", "object": "response", "status": "completed", "output": []},
+ },
+ ],
+ headers={"Content-Type": "text/event-stream"},
+ ),
+ None,
+ ),
+ (
+ FakeUpstream(
+ [
+ {
+ "type": "response.created",
+ "response": {"id": "resp_2", "object": "response", "status": "in_progress"},
+ },
+ {
+ "type": "response.completed",
+ "response": {"id": "resp_2", "object": "response", "status": "completed", "output": []},
+ },
+ ],
+ headers={"Content-Type": "text/event-stream"},
+ ),
+ None,
+ ),
+ ]
+
+ headers = {"X-Session-Id": "session-fixed"}
+ first = self.client.post("/v1/responses", json={"model": "gpt-5.4", "input": "hello"}, headers=headers)
+ second = self.client.post(
+ "/v1/responses",
+ json={
+ "model": "gpt-5.4",
+ "instructions": "changed",
+ "input": [
+ {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "hello"}]},
+ {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "second"}]},
+ ],
+ },
+ headers=headers,
+ )
+
+ self.assertEqual(first.status_code, 200)
+ self.assertEqual(second.status_code, 200)
+ outbound_payload = mock_start.call_args_list[1].args[0]
+ self.assertNotIn("previous_response_id", outbound_payload)
+ self.assertEqual(
+ outbound_payload["input"],
+ [
+ {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "hello"}]},
+ {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "second"}]},
+ ],
+ )
+
+ @patch("chatmock.routes_openai.start_upstream_raw_request")
+ def test_responses_route_clears_reuse_state_after_error(self, mock_start) -> None:
+ mock_start.side_effect = [
+ (
+ FakeUpstream(
+ [
+ {"type": "response.created", "response": {"id": "resp_1"}},
+ {"type": "response.completed", "response": {"id": "resp_1", "output": []}},
+ ],
+ headers={"Content-Type": "text/event-stream"},
+ ),
+ None,
+ ),
+ (
+ FakeUpstream(
+ [
+ {"type": "response.failed", "response": {"error": {"message": "boom"}}},
+ ],
+ headers={"Content-Type": "text/event-stream"},
+ ),
+ None,
+ ),
+ (
+ FakeUpstream(
+ [
+ {"type": "response.created", "response": {"id": "resp_3"}},
+ {"type": "response.completed", "response": {"id": "resp_3", "output": []}},
+ ],
+ headers={"Content-Type": "text/event-stream"},
+ ),
+ None,
+ ),
+ ]
+
+ headers = {"X-Session-Id": "session-fixed"}
+ first = self.client.post("/v1/responses", json={"model": "gpt-5.4", "input": "hello"}, headers=headers)
+ second = self.client.post(
+ "/v1/responses",
+ json={
+ "model": "gpt-5.4",
+ "input": [
+ {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "hello"}]},
+ {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "second"}]},
+ ],
+ },
+ headers=headers,
+ )
+ third = self.client.post(
+ "/v1/responses",
+ json={
+ "model": "gpt-5.4",
+ "input": [
+ {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "hello"}]},
+ {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "second"}]},
+ {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "third"}]},
+ ],
+ },
+ headers=headers,
+ )
+
+ self.assertEqual(first.status_code, 200)
+ self.assertEqual(second.status_code, 502)
+ self.assertEqual(third.status_code, 200)
+ outbound_payload = mock_start.call_args_list[2].args[0]
+ self.assertNotIn("previous_response_id", outbound_payload)
+ self.assertEqual(
+ outbound_payload["input"],
+ [
+ {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "hello"}]},
+ {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "second"}]},
+ {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "third"}]},
+ ],
+ )
+
+ @patch("chatmock.routes_openai.start_upstream_raw_request")
+ def test_responses_route_stream_passthrough(self, mock_start) -> None:
+ chunk = b'data: {"type":"response.output_text.delta","delta":"hello"}\n\n'
+ mock_start.return_value = (
+ FakeUpstream(
+ headers={"Content-Type": "text/event-stream"},
+ content=chunk,
+ ),
+ None,
+ )
+ response = self.client.post(
+ "/v1/responses",
+ json={"model": "gpt-5.4", "input": "hello", "stream": True},
+ )
+ self.assertEqual(response.status_code, 200)
+ self.assertIn("response.output_text.delta", response.get_data(as_text=True))
+
+ @patch("chatmock.routes_openai.start_upstream_raw_request")
+ def test_responses_route_rejects_unsupported_explicit_priority(self, mock_start) -> None:
+ response = self.client.post(
+ "/v1/responses",
+ json={"model": "gpt-5.3-codex", "input": "hello", "service_tier": "priority"},
+ )
+ body = response.get_json()
+ self.assertEqual(response.status_code, 400)
+ self.assertIn("Fast mode is not supported", body["error"]["message"])
+ mock_start.assert_not_called()
+
+ @patch("chatmock.websocket_routes.get_effective_chatgpt_auth", return_value=("token", "acct"))
+ @patch("chatmock.websocket_routes.connect_upstream_websocket")
+ def test_responses_websocket_rewrites_response_create(self, mock_connect, _mock_auth) -> None:
+ class FakeUpstreamWebsocket:
+ def __init__(self) -> None:
+ self.sent: list[str] = []
+ self._messages = [
+ json.dumps({"type": "response.created", "response": {"id": "resp_ws_1"}}),
+ json.dumps({
+ "type": "response.output_item.done",
+ "item": {
+ "type": "message",
+ "role": "assistant",
+ "id": "msg_1",
+ "content": [{"type": "output_text", "text": "assistant output"}],
+ },
+ }),
+ json.dumps({"type": "response.completed", "response": {"id": "resp_ws_1"}}),
+ json.dumps({"type": "response.created", "response": {"id": "resp_ws_2"}}),
+ json.dumps({"type": "response.completed", "response": {"id": "resp_ws_2"}}),
+ ]
+
+ def send(self, message: str) -> None:
+ self.sent.append(message)
+
+ def recv(self) -> str:
+ return self._messages.pop(0)
+
+ def close(self) -> None:
+ return None
+
+ fake_upstream = FakeUpstreamWebsocket()
+ mock_connect.return_value = fake_upstream
+
+ app = create_app()
+
+ sock = socket.socket()
+ sock.bind(("127.0.0.1", 0))
+ host, port = sock.getsockname()
+ sock.close()
+
+ server_thread = threading.Thread(
+ target=app.run,
+ kwargs={
+ "host": host,
+ "port": port,
+ "debug": False,
+ "use_reloader": False,
+ "threaded": True,
+ },
+ daemon=True,
+ )
+ server_thread.start()
+ time.sleep(0.5)
+
+ with ws_connect(f"ws://{host}:{port}/v1/responses") as client:
+ client.send(json.dumps({"type": "response.create", "model": "gpt-5.4", "input": "hello", "fast_mode": True}))
+ first = json.loads(client.recv())
+ assistant = json.loads(client.recv())
+ second = json.loads(client.recv())
+ client.send(
+ json.dumps(
+ {
+ "type": "response.create",
+ "model": "gpt-5.4",
+ "fast_mode": True,
+ "input": [
+ {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "hello"}]},
+ {"type": "message", "role": "assistant", "id": "msg_1", "content": [{"type": "output_text", "text": "assistant output"}]},
+ {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "second"}]},
+ ],
+ }
+ )
+ )
+ third = json.loads(client.recv())
+ fourth = json.loads(client.recv())
+
+ self.assertEqual(first["type"], "response.created")
+ self.assertEqual(assistant["type"], "response.output_item.done")
+ self.assertEqual(second["type"], "response.completed")
+ self.assertEqual(third["type"], "response.created")
+ self.assertEqual(fourth["type"], "response.completed")
+ outbound = json.loads(fake_upstream.sent[0])
+ self.assertEqual(outbound["model"], "gpt-5.4")
+ self.assertEqual(outbound["service_tier"], "priority")
+ self.assertEqual(outbound["type"], "response.create")
+ self.assertEqual(
+ outbound["input"],
+ [{"type": "message", "role": "user", "content": [{"type": "input_text", "text": "hello"}]}],
+ )
+ self.assertIn("prompt_cache_key", outbound)
+ follow_up = json.loads(fake_upstream.sent[1])
+ self.assertEqual(follow_up["previous_response_id"], "resp_ws_1")
+ self.assertEqual(
+ follow_up["input"],
+ [{"type": "message", "role": "user", "content": [{"type": "input_text", "text": "second"}]}],
+ )
+
if __name__ == "__main__":
unittest.main()
diff --git a/uv.lock b/uv.lock
index 1d63a16..fd1ec98 100644
--- a/uv.lock
+++ b/uv.lock
@@ -109,12 +109,14 @@ dependencies = [
{ name = "blinker" },
{ name = "certifi" },
{ name = "flask" },
+ { name = "flask-sock" },
{ name = "idna" },
{ name = "itsdangerous" },
{ name = "jinja2" },
{ name = "markupsafe" },
{ name = "requests" },
{ name = "urllib3" },
+ { name = "websockets" },
{ name = "werkzeug" },
]
@@ -130,6 +132,7 @@ requires-dist = [
{ name = "blinker", specifier = "==1.9.0" },
{ name = "certifi", specifier = "==2025.8.3" },
{ name = "flask", specifier = "==3.1.1" },
+ { name = "flask-sock", specifier = "==0.7.0" },
{ name = "idna", specifier = "==3.10" },
{ name = "itsdangerous", specifier = "==2.2.0" },
{ name = "jinja2", specifier = "==3.1.6" },
@@ -139,6 +142,7 @@ requires-dist = [
{ name = "pyside6", marker = "extra == 'gui'", specifier = "==6.9.2" },
{ name = "requests", specifier = "==2.32.5" },
{ name = "urllib3", specifier = "==2.5.0" },
+ { name = "websockets", specifier = "==15.0.1" },
{ name = "werkzeug", specifier = "==3.1.3" },
]
provides-extras = ["gui"]
@@ -181,6 +185,28 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/3d/68/9d4508e893976286d2ead7f8f571314af6c2037af34853a30fd769c02e9d/flask-3.1.1-py3-none-any.whl", hash = "sha256:07aae2bb5eaf77993ef57e357491839f5fd9f4dc281593a81a9e4d79a24f295c", size = 103305 },
]
+[[package]]
+name = "flask-sock"
+version = "0.7.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "flask" },
+ { name = "simple-websocket" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/8d/8f/c6ab717dc90f4e46d1430335cd4ab13e3629410bb760c0ead6de476760fb/flask-sock-0.7.0.tar.gz", hash = "sha256:e023b578284195a443b8d8bdb4469e6a6acf694b89aeb51315b1a34fcf427b7d", size = 4334 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/d8/98/107728ce3f430b5481eb426ccc5e1f7c8ab0bd01eaf231c62a8d528ff721/flask_sock-0.7.0-py3-none-any.whl", hash = "sha256:caac4d679392aaf010d02fabcf73d52019f5bdaf1c9c131ec5a428cb3491204a", size = 3982 },
+]
+
+[[package]]
+name = "h11"
+version = "0.16.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/01/ee/02a2c011bdab74c6fb3c75474d40b3052059d95df7e73351460c8588d963/h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1", size = 101250 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515 },
+]
+
[[package]]
name = "idna"
version = "3.10"
@@ -507,6 +533,18 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/48/64/562a527fc55fbf41fa70dae735929988215505cb5ec0809fb0aef921d4a0/shiboken6-6.9.2-cp39-abi3-win_arm64.whl", hash = "sha256:c5b827797b3d89d9b9a3753371ff533fcd4afc4531ca51a7c696952132098054", size = 1708948 },
]
+[[package]]
+name = "simple-websocket"
+version = "1.1.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "wsproto" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/b0/d4/bfa032f961103eba93de583b161f0e6a5b63cebb8f2c7d0c6e6efe1e3d2e/simple_websocket-1.1.0.tar.gz", hash = "sha256:7939234e7aa067c534abdab3a9ed933ec9ce4691b0713c78acb195560aa52ae4", size = 17300 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/52/59/0782e51887ac6b07ffd1570e0364cf901ebc36345fea669969d2084baebb/simple_websocket-1.1.0-py3-none-any.whl", hash = "sha256:4af6069630a38ed6c561010f0e11a5bc0d4ca569b36306eb257cd9a192497c8c", size = 13842 },
+]
+
[[package]]
name = "urllib3"
version = "2.5.0"
@@ -516,6 +554,48 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/a7/c2/fe1e52489ae3122415c51f387e221dd0773709bad6c6cdaa599e8a2c5185/urllib3-2.5.0-py3-none-any.whl", hash = "sha256:e6b01673c0fa6a13e374b50871808eb3bf7046c4b125b216f6bf1cc604cff0dc", size = 129795 },
]
+[[package]]
+name = "websockets"
+version = "15.0.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/21/e6/26d09fab466b7ca9c7737474c52be4f76a40301b08362eb2dbc19dcc16c1/websockets-15.0.1.tar.gz", hash = "sha256:82544de02076bafba038ce055ee6412d68da13ab47f0c60cab827346de828dee", size = 177016 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/9f/32/18fcd5919c293a398db67443acd33fde142f283853076049824fc58e6f75/websockets-15.0.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:823c248b690b2fd9303ba00c4f66cd5e2d8c3ba4aa968b2779be9532a4dad431", size = 175423 },
+ { url = "https://files.pythonhosted.org/packages/76/70/ba1ad96b07869275ef42e2ce21f07a5b0148936688c2baf7e4a1f60d5058/websockets-15.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678999709e68425ae2593acf2e3ebcbcf2e69885a5ee78f9eb80e6e371f1bf57", size = 173082 },
+ { url = "https://files.pythonhosted.org/packages/86/f2/10b55821dd40eb696ce4704a87d57774696f9451108cff0d2824c97e0f97/websockets-15.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d50fd1ee42388dcfb2b3676132c78116490976f1300da28eb629272d5d93e905", size = 173330 },
+ { url = "https://files.pythonhosted.org/packages/a5/90/1c37ae8b8a113d3daf1065222b6af61cc44102da95388ac0018fcb7d93d9/websockets-15.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d99e5546bf73dbad5bf3547174cd6cb8ba7273062a23808ffea025ecb1cf8562", size = 182878 },
+ { url = "https://files.pythonhosted.org/packages/8e/8d/96e8e288b2a41dffafb78e8904ea7367ee4f891dafc2ab8d87e2124cb3d3/websockets-15.0.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:66dd88c918e3287efc22409d426c8f729688d89a0c587c88971a0faa2c2f3792", size = 181883 },
+ { url = "https://files.pythonhosted.org/packages/93/1f/5d6dbf551766308f6f50f8baf8e9860be6182911e8106da7a7f73785f4c4/websockets-15.0.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8dd8327c795b3e3f219760fa603dcae1dcc148172290a8ab15158cf85a953413", size = 182252 },
+ { url = "https://files.pythonhosted.org/packages/d4/78/2d4fed9123e6620cbf1706c0de8a1632e1a28e7774d94346d7de1bba2ca3/websockets-15.0.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8fdc51055e6ff4adeb88d58a11042ec9a5eae317a0a53d12c062c8a8865909e8", size = 182521 },
+ { url = "https://files.pythonhosted.org/packages/e7/3b/66d4c1b444dd1a9823c4a81f50231b921bab54eee2f69e70319b4e21f1ca/websockets-15.0.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:693f0192126df6c2327cce3baa7c06f2a117575e32ab2308f7f8216c29d9e2e3", size = 181958 },
+ { url = "https://files.pythonhosted.org/packages/08/ff/e9eed2ee5fed6f76fdd6032ca5cd38c57ca9661430bb3d5fb2872dc8703c/websockets-15.0.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:54479983bd5fb469c38f2f5c7e3a24f9a4e70594cd68cd1fa6b9340dadaff7cf", size = 181918 },
+ { url = "https://files.pythonhosted.org/packages/d8/75/994634a49b7e12532be6a42103597b71098fd25900f7437d6055ed39930a/websockets-15.0.1-cp311-cp311-win32.whl", hash = "sha256:16b6c1b3e57799b9d38427dda63edcbe4926352c47cf88588c0be4ace18dac85", size = 176388 },
+ { url = "https://files.pythonhosted.org/packages/98/93/e36c73f78400a65f5e236cd376713c34182e6663f6889cd45a4a04d8f203/websockets-15.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:27ccee0071a0e75d22cb35849b1db43f2ecd3e161041ac1ee9d2352ddf72f065", size = 176828 },
+ { url = "https://files.pythonhosted.org/packages/51/6b/4545a0d843594f5d0771e86463606a3988b5a09ca5123136f8a76580dd63/websockets-15.0.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:3e90baa811a5d73f3ca0bcbf32064d663ed81318ab225ee4f427ad4e26e5aff3", size = 175437 },
+ { url = "https://files.pythonhosted.org/packages/f4/71/809a0f5f6a06522af902e0f2ea2757f71ead94610010cf570ab5c98e99ed/websockets-15.0.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:592f1a9fe869c778694f0aa806ba0374e97648ab57936f092fd9d87f8bc03665", size = 173096 },
+ { url = "https://files.pythonhosted.org/packages/3d/69/1a681dd6f02180916f116894181eab8b2e25b31e484c5d0eae637ec01f7c/websockets-15.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0701bc3cfcb9164d04a14b149fd74be7347a530ad3bbf15ab2c678a2cd3dd9a2", size = 173332 },
+ { url = "https://files.pythonhosted.org/packages/a6/02/0073b3952f5bce97eafbb35757f8d0d54812b6174ed8dd952aa08429bcc3/websockets-15.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e8b56bdcdb4505c8078cb6c7157d9811a85790f2f2b3632c7d1462ab5783d215", size = 183152 },
+ { url = "https://files.pythonhosted.org/packages/74/45/c205c8480eafd114b428284840da0b1be9ffd0e4f87338dc95dc6ff961a1/websockets-15.0.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0af68c55afbd5f07986df82831c7bff04846928ea8d1fd7f30052638788bc9b5", size = 182096 },
+ { url = "https://files.pythonhosted.org/packages/14/8f/aa61f528fba38578ec553c145857a181384c72b98156f858ca5c8e82d9d3/websockets-15.0.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:64dee438fed052b52e4f98f76c5790513235efaa1ef7f3f2192c392cd7c91b65", size = 182523 },
+ { url = "https://files.pythonhosted.org/packages/ec/6d/0267396610add5bc0d0d3e77f546d4cd287200804fe02323797de77dbce9/websockets-15.0.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d5f6b181bb38171a8ad1d6aa58a67a6aa9d4b38d0f8c5f496b9e42561dfc62fe", size = 182790 },
+ { url = "https://files.pythonhosted.org/packages/02/05/c68c5adbf679cf610ae2f74a9b871ae84564462955d991178f95a1ddb7dd/websockets-15.0.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:5d54b09eba2bada6011aea5375542a157637b91029687eb4fdb2dab11059c1b4", size = 182165 },
+ { url = "https://files.pythonhosted.org/packages/29/93/bb672df7b2f5faac89761cb5fa34f5cec45a4026c383a4b5761c6cea5c16/websockets-15.0.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3be571a8b5afed347da347bfcf27ba12b069d9d7f42cb8c7028b5e98bbb12597", size = 182160 },
+ { url = "https://files.pythonhosted.org/packages/ff/83/de1f7709376dc3ca9b7eeb4b9a07b4526b14876b6d372a4dc62312bebee0/websockets-15.0.1-cp312-cp312-win32.whl", hash = "sha256:c338ffa0520bdb12fbc527265235639fb76e7bc7faafbb93f6ba80d9c06578a9", size = 176395 },
+ { url = "https://files.pythonhosted.org/packages/7d/71/abf2ebc3bbfa40f391ce1428c7168fb20582d0ff57019b69ea20fa698043/websockets-15.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:fcd5cf9e305d7b8338754470cf69cf81f420459dbae8a3b40cee57417f4614a7", size = 176841 },
+ { url = "https://files.pythonhosted.org/packages/cb/9f/51f0cf64471a9d2b4d0fc6c534f323b664e7095640c34562f5182e5a7195/websockets-15.0.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ee443ef070bb3b6ed74514f5efaa37a252af57c90eb33b956d35c8e9c10a1931", size = 175440 },
+ { url = "https://files.pythonhosted.org/packages/8a/05/aa116ec9943c718905997412c5989f7ed671bc0188ee2ba89520e8765d7b/websockets-15.0.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5a939de6b7b4e18ca683218320fc67ea886038265fd1ed30173f5ce3f8e85675", size = 173098 },
+ { url = "https://files.pythonhosted.org/packages/ff/0b/33cef55ff24f2d92924923c99926dcce78e7bd922d649467f0eda8368923/websockets-15.0.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:746ee8dba912cd6fc889a8147168991d50ed70447bf18bcda7039f7d2e3d9151", size = 173329 },
+ { url = "https://files.pythonhosted.org/packages/31/1d/063b25dcc01faa8fada1469bdf769de3768b7044eac9d41f734fd7b6ad6d/websockets-15.0.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:595b6c3969023ecf9041b2936ac3827e4623bfa3ccf007575f04c5a6aa318c22", size = 183111 },
+ { url = "https://files.pythonhosted.org/packages/93/53/9a87ee494a51bf63e4ec9241c1ccc4f7c2f45fff85d5bde2ff74fcb68b9e/websockets-15.0.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3c714d2fc58b5ca3e285461a4cc0c9a66bd0e24c5da9911e30158286c9b5be7f", size = 182054 },
+ { url = "https://files.pythonhosted.org/packages/ff/b2/83a6ddf56cdcbad4e3d841fcc55d6ba7d19aeb89c50f24dd7e859ec0805f/websockets-15.0.1-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0f3c1e2ab208db911594ae5b4f79addeb3501604a165019dd221c0bdcabe4db8", size = 182496 },
+ { url = "https://files.pythonhosted.org/packages/98/41/e7038944ed0abf34c45aa4635ba28136f06052e08fc2168520bb8b25149f/websockets-15.0.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:229cf1d3ca6c1804400b0a9790dc66528e08a6a1feec0d5040e8b9eb14422375", size = 182829 },
+ { url = "https://files.pythonhosted.org/packages/e0/17/de15b6158680c7623c6ef0db361da965ab25d813ae54fcfeae2e5b9ef910/websockets-15.0.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:756c56e867a90fb00177d530dca4b097dd753cde348448a1012ed6c5131f8b7d", size = 182217 },
+ { url = "https://files.pythonhosted.org/packages/33/2b/1f168cb6041853eef0362fb9554c3824367c5560cbdaad89ac40f8c2edfc/websockets-15.0.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:558d023b3df0bffe50a04e710bc87742de35060580a293c2a984299ed83bc4e4", size = 182195 },
+ { url = "https://files.pythonhosted.org/packages/86/eb/20b6cdf273913d0ad05a6a14aed4b9a85591c18a987a3d47f20fa13dcc47/websockets-15.0.1-cp313-cp313-win32.whl", hash = "sha256:ba9e56e8ceeeedb2e080147ba85ffcd5cd0711b89576b83784d8605a7df455fa", size = 176393 },
+ { url = "https://files.pythonhosted.org/packages/1b/6c/c65773d6cab416a64d191d6ee8a8b1c68a09970ea6909d16965d26bfed1e/websockets-15.0.1-cp313-cp313-win_amd64.whl", hash = "sha256:e09473f095a819042ecb2ab9465aee615bd9c2028e4ef7d933600a8401c79561", size = 176837 },
+ { url = "https://files.pythonhosted.org/packages/fa/a8/5b41e0da817d64113292ab1f8247140aac61cbf6cfd085d6a0fa77f4984f/websockets-15.0.1-py3-none-any.whl", hash = "sha256:f7a866fbc1e97b5c617ee4116daaa09b722101d4a3c170c787450ba409f9736f", size = 169743 },
+]
+
[[package]]
name = "werkzeug"
version = "3.1.3"
@@ -527,3 +607,15 @@ sdist = { url = "https://files.pythonhosted.org/packages/9f/69/83029f1f6300c5fb2
wheels = [
{ url = "https://files.pythonhosted.org/packages/52/24/ab44c871b0f07f491e5d2ad12c9bd7358e527510618cb1b803a88e986db1/werkzeug-3.1.3-py3-none-any.whl", hash = "sha256:54b78bf3716d19a65be4fceccc0d1d7b89e608834989dfae50ea87564639213e", size = 224498 },
]
+
+[[package]]
+name = "wsproto"
+version = "1.3.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "h11" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c7/79/12135bdf8b9c9367b8701c2c19a14c913c120b882d50b014ca0d38083c2c/wsproto-1.3.2.tar.gz", hash = "sha256:b86885dcf294e15204919950f666e06ffc6c7c114ca900b060d6e16293528294", size = 50116 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/a4/f5/10b68b7b1544245097b2a1b8238f66f2fc6dcaeb24ba5d917f52bd2eed4f/wsproto-1.3.2-py3-none-any.whl", hash = "sha256:61eea322cdf56e8cc904bd3ad7573359a242ba65688716b0710a5eb12beab584", size = 24405 },
+]