From 8754203ec66fcc92b4b9e512775d429d68d42161 Mon Sep 17 00:00:00 2001 From: Game_Time <108236317+RayBytes@users.noreply.github.com> Date: Mon, 23 Mar 2026 15:41:42 +0500 Subject: [PATCH] feat: add responses api, websocket support, and fast mode --- DOCKER.md | 1 + README.md | 181 ++++----- chatmock/app.py | 6 + chatmock/cli.py | 9 + chatmock/fast_mode.py | 92 +++++ chatmock/model_registry.py | 8 + chatmock/responses_api.py | 242 ++++++++++++ chatmock/routes_ollama.py | 24 +- chatmock/routes_openai.py | 213 ++++++++++- chatmock/session.py | 225 ++++++++++- chatmock/upstream.py | 84 ++++- chatmock/version.py | 2 +- chatmock/websocket_routes.py | 225 +++++++++++ docker/entrypoint.sh | 3 + gui.py | 15 +- pyproject.toml | 2 + scripts/test_responses_cached_tokens.py | 176 +++++++++ scripts/test_responses_reuse.py | 143 +++++++ tests/test_fast_mode.py | 49 +++ tests/test_models.py | 2 + tests/test_routes.py | 473 +++++++++++++++++++++++- uv.lock | 92 +++++ 22 files changed, 2148 insertions(+), 119 deletions(-) create mode 100644 chatmock/fast_mode.py create mode 100644 chatmock/responses_api.py create mode 100644 chatmock/websocket_routes.py create mode 100644 scripts/test_responses_cached_tokens.py create mode 100644 scripts/test_responses_reuse.py create mode 100644 tests/test_fast_mode.py diff --git a/DOCKER.md b/DOCKER.md index db9191f..1314c97 100644 --- a/DOCKER.md +++ b/DOCKER.md @@ -24,6 +24,7 @@ Set options in `.env` or pass environment variables: - `CHATGPT_LOCAL_REASONING_EFFORT`: minimal|low|medium|high|xhigh - `CHATGPT_LOCAL_REASONING_SUMMARY`: auto|concise|detailed|none - `CHATGPT_LOCAL_REASONING_COMPAT`: legacy|o3|think-tags|current +- `CHATGPT_LOCAL_FAST_MODE`: `true|false` to enable fast mode by default for supported models - `CHATGPT_LOCAL_DEBUG_MODEL`: force model override (e.g., `gpt-5.4`) - `CHATGPT_LOCAL_CLIENT_ID`: OAuth client id override (rarely needed) - `CHATGPT_LOCAL_EXPOSE_REASONING_MODELS`: `true|false` to add reasoning model variants to `/v1/models` diff --git a/README.md b/README.md index 1fe3188..4486dd2 100644 --- a/README.md +++ b/README.md @@ -1,172 +1,175 @@
-

ChatMock -
-Stars Badge -Forks Badge -Pull Requests Badge -Issues Badge -GitHub contributors -License Badge -
-

- -

OpenAI & Ollama compatible API powered by your ChatGPT plan.

-

Use your ChatGPT Plus/Pro account to call OpenAI models from code or alternate chat UIs.

-
+ +# ChatMock + +**Allows Codex to work in your favourite chat apps and coding tools.** + +[![PyPI](https://img.shields.io/pypi/v/chatmock?color=blue&label=pypi)](https://pypi.org/project/chatmock/) +[![Python](https://img.shields.io/pypi/pyversions/chatmock)](https://pypi.org/project/chatmock/) +[![License](https://img.shields.io/github/license/RayBytes/ChatMock)](LICENSE) +[![Stars](https://img.shields.io/github/stars/RayBytes/ChatMock?style=flat)](https://github.com/RayBytes/ChatMock/stargazers) +[![Last Commit](https://img.shields.io/github/last-commit/RayBytes/ChatMock)](https://github.com/RayBytes/ChatMock/commits/main) +[![Issues](https://img.shields.io/github/issues/RayBytes/ChatMock)](https://github.com/RayBytes/ChatMock/issues) + +
+ +
-## What It Does +
-ChatMock runs a local server that creates an OpenAI/Ollama compatible API, and requests are then fulfilled using your authenticated ChatGPT login with the oauth client of Codex, OpenAI's coding CLI tool. This allows you to use GPT-5, GPT-5-Codex, and other models right through your OpenAI account, without requiring an api key. You are then able to use it in other chat apps or other coding tools.
-This does require a paid ChatGPT account. - -## Quickstart - -### Homebrew +## Install +#### Homebrew ```bash brew tap RayBytes/chatmock brew install chatmock ``` -### CLI - +#### pipx / pip ```bash pipx install chatmock ``` -### GUI +#### GUI +Download from [releases](https://github.com/RayBytes/ChatMock/releases) (macOS & Windows) -If you're on **macOS** or **Windows**, you can download the GUI app from the [GitHub releases](https://github.com/RayBytes/ChatMock/releases). +#### Docker +See [DOCKER.md](DOCKER.md) -### Python -If you wish to just simply run this as a python flask server, you are also freely welcome too. +
-Clone or download this repository, then cd into the project directory. Then follow the instrunctions listed below. - -1. Sign in with your ChatGPT account and follow the prompts -```bash -python chatmock.py login -``` -You can make sure this worked by running `python chatmock.py info` - -2. After the login completes successfully, you can just simply start the local server +## Getting Started ```bash -python chatmock.py serve +# 1. Sign in with your ChatGPT account +chatmock login + +# 2. Start the server +chatmock serve ``` -Then, you can simply use the address and port as the baseURL as you require (http://127.0.0.1:8000 by default) -**Reminder:** When setting a baseURL in other applications, make you sure you include /v1/ at the end of the URL if you're using this as a OpenAI compatible endpoint (e.g http://127.0.0.1:8000/v1) +The server runs at `http://127.0.0.1:8000` by default. Use `http://127.0.0.1:8000/v1` as your base URL for OpenAI-compatible apps. -### Docker +
-Read [the docker instrunctions here](https://github.com/RayBytes/ChatMock/blob/main/DOCKER.md) +## Usage -# Examples - -### Python +
+Python ```python from openai import OpenAI client = OpenAI( base_url="http://127.0.0.1:8000/v1", - api_key="key" # ignored + api_key="anything" # not checked ) -resp = client.chat.completions.create( +response = client.chat.completions.create( model="gpt-5.4", - messages=[{"role": "user", "content": "hello world"}] + messages=[{"role": "user", "content": "hello"}] ) - -print(resp.choices[0].message.content) +print(response.choices[0].message.content) ``` -### curl +
+ +
+cURL ```bash curl http://127.0.0.1:8000/v1/chat/completions \ - -H "Authorization: Bearer key" \ -H "Content-Type: application/json" \ -d '{ "model": "gpt-5.4", - "messages": [{"role":"user","content":"hello world"}] + "messages": [{"role": "user", "content": "hello"}] }' ``` -# What's supported +
-- Tool/Function calling -- Vision/Image understanding -- Thinking summaries (through thinking tags) -- Thinking effort +
-## Notes & Limits +## Supported Models -- Requires an active, paid ChatGPT account. -- Some context length might be taken up by internal instructions (but they dont seem to degrade the model) -- Use responsibly and at your own risk. This project is not affiliated with OpenAI, and is a educational exercise. - -# Supported models - `gpt-5.4` - `gpt-5.4-mini` - `gpt-5.2` - `gpt-5.1` - `gpt-5` - `gpt-5.3-codex` -- `gpt-5-codex` +- `gpt-5.3-codex-spark` - `gpt-5.2-codex` +- `gpt-5-codex` - `gpt-5.1-codex` - `gpt-5.1-codex-max` - `gpt-5.1-codex-mini` - `codex-mini` -# Customisation / Configuration +
-### Thinking effort +## Features -- `--reasoning-effort` (choice of none,minimal,low,medium,high,xhigh)
-GPT-5 has a configurable amount of "effort" it can put into thinking, which may cause it to take more time for a response to return, but may overall give a smarter answer. Applying this parameter after `serve` forces the server to use this reasoning effort by default, unless overrided by the API request with a different effort set. The default reasoning effort without setting this parameter is `medium`.
- The `gpt-5.1` family (including codex) supports `low`, `medium`, and `high` while `gpt-5.1-codex-max` adds `xhigh`. The `gpt-5.2` and `gpt-5.3` families (including codex) support `low`, `medium`, `high`, and `xhigh`. `gpt-5.4` supports `none`, `low`, `medium`, `high`, and `xhigh`. +- Tool / function calling +- Vision / image input +- Thinking summaries (via think tags) +- Configurable thinking effort +- Fast mode for supported models +- Web search tool +- OpenAI-compatible `/v1/responses` (HTTP + WebSocket) +- Ollama-compatible endpoints +- Reasoning effort exposed as separate models (optional) -### Thinking summaries +
-- `--reasoning-summary` (choice of auto,concise,detailed,none)
-Models like GPT-5 do not return raw thinking content, but instead return thinking summaries. These can also be customised by you. +## Configuration -### OpenAI Tools +All flags go after `chatmock serve`. These can also be set as environment variables. -- `--enable-web-search`
-You can also access OpenAI tools through this project. Currently, only web search is available. -You can enable it by starting the server with this parameter, which will allow OpenAI to determine when a request requires a web search, or you can use the following parameters during a request to the API to enable web search: -

-`responses_tools`: supports `[{"type":"web_search"}]` / `{ "type": "web_search_preview" }`
-`responses_tool_choice`: `"auto"` or `"none"` +| Flag | Env var | Options | Default | Description | +|------|---------|---------|---------|-------------| +| `--reasoning-effort` | `CHATGPT_LOCAL_REASONING_EFFORT` | none, minimal, low, medium, high, xhigh | medium | How hard the model thinks | +| `--reasoning-summary` | `CHATGPT_LOCAL_REASONING_SUMMARY` | auto, concise, detailed, none | auto | Thinking summary verbosity | +| `--reasoning-compat` | `CHATGPT_LOCAL_REASONING_COMPAT` | legacy, o3, think-tags | think-tags | How reasoning is returned to the client | +| `--fast-mode` | `CHATGPT_LOCAL_FAST_MODE` | true/false | false | Priority processing for supported models | +| `--enable-web-search` | `CHATGPT_LOCAL_ENABLE_WEB_SEARCH` | true/false | false | Allow the model to search the web | +| `--expose-reasoning-models` | `CHATGPT_LOCAL_EXPOSE_REASONING_MODELS` | true/false | false | List each reasoning level as its own model | + +
+Web search in a request -#### Example usage ```json { "model": "gpt-5.4", - "messages": [{"role":"user","content":"Find current METAR rules"}], - "stream": true, + "messages": [{"role": "user", "content": "latest news on ..."}], "responses_tools": [{"type": "web_search"}], "responses_tool_choice": "auto" } ``` -### Expose reasoning models +
-- `--expose-reasoning-models`
-If your preferred app doesn’t support selecting reasoning effort, or you just want a simpler approach, this parameter exposes each reasoning level as a separate, queryable model. Each reasoning level also appears individually under ⁠/v1/models, so model pickers in your favorite chat apps will list all reasoning options as distinct models you can switch between. +
+Fast mode in a request + +```json +{ + "model": "gpt-5.4", + "input": "summarize this", + "fast_mode": true +} +``` + +
+ +
## Notes -If you wish to have the fastest responses, I'd recommend setting `--reasoning-effort` to low, and `--reasoning-summary` to none.
-All parameters and choices can be seen by sending `python chatmock.py serve --h`
-The context size of this route is also larger than what you get access to in the regular ChatGPT app.
-When the model returns a thinking summary, the model will send back thinking tags to make it compatible with chat apps. **If you don't like this behavior, you can instead set `--reasoning-compat` to legacy, and reasoning will be set in the reasoning tag instead of being returned in the actual response text.** +Use responsibly and at your own risk. This project is not affiliated with OpenAI. +
## Star History diff --git a/chatmock/app.py b/chatmock/app.py index 9727b5a..e4541dc 100644 --- a/chatmock/app.py +++ b/chatmock/app.py @@ -1,11 +1,13 @@ from __future__ import annotations from flask import Flask, jsonify +from flask_sock import Sock from .config import BASE_INSTRUCTIONS, GPT5_CODEX_INSTRUCTIONS from .http import build_cors_headers from .routes_openai import openai_bp from .routes_ollama import ollama_bp +from .websocket_routes import register_websocket_routes def create_app( @@ -14,6 +16,7 @@ def create_app( reasoning_effort: str = "medium", reasoning_summary: str = "auto", reasoning_compat: str = "think-tags", + fast_mode: bool = False, debug_model: str | None = None, expose_reasoning_models: bool = False, default_web_search: bool = False, @@ -26,6 +29,7 @@ def create_app( REASONING_EFFORT=reasoning_effort, REASONING_SUMMARY=reasoning_summary, REASONING_COMPAT=reasoning_compat, + FAST_MODE=bool(fast_mode), DEBUG_MODEL=debug_model, BASE_INSTRUCTIONS=BASE_INSTRUCTIONS, GPT5_CODEX_INSTRUCTIONS=GPT5_CODEX_INSTRUCTIONS, @@ -46,5 +50,7 @@ def create_app( app.register_blueprint(openai_bp) app.register_blueprint(ollama_bp) + sock = Sock(app) + register_websocket_routes(sock) return app diff --git a/chatmock/cli.py b/chatmock/cli.py index 9ee41be..78a69ae 100644 --- a/chatmock/cli.py +++ b/chatmock/cli.py @@ -267,6 +267,7 @@ def cmd_serve( reasoning_effort: str, reasoning_summary: str, reasoning_compat: str, + fast_mode: bool, debug_model: str | None, expose_reasoning_models: bool, default_web_search: bool, @@ -277,6 +278,7 @@ def cmd_serve( reasoning_effort=reasoning_effort, reasoning_summary=reasoning_summary, reasoning_compat=reasoning_compat, + fast_mode=fast_mode, debug_model=debug_model, expose_reasoning_models=expose_reasoning_models, default_web_search=default_web_search, @@ -309,6 +311,12 @@ def main() -> None: default=os.getenv("CHATGPT_LOCAL_DEBUG_MODEL"), help="Forcibly override requested 'model' with this value", ) + p_serve.add_argument( + "--fast-mode", + action=argparse.BooleanOptionalAction, + default=(os.getenv("CHATGPT_LOCAL_FAST_MODE") or "").strip().lower() in ("1", "true", "yes", "on"), + help="Enable GPT fast mode by default for supported models; request-level overrides still take precedence.", + ) p_serve.add_argument( "--reasoning-effort", choices=["none", "minimal", "low", "medium", "high", "xhigh"], @@ -366,6 +374,7 @@ def main() -> None: reasoning_effort=args.reasoning_effort, reasoning_summary=args.reasoning_summary, reasoning_compat=args.reasoning_compat, + fast_mode=args.fast_mode, debug_model=args.debug_model, expose_reasoning_models=args.expose_reasoning_models, default_web_search=args.enable_web_search, diff --git a/chatmock/fast_mode.py b/chatmock/fast_mode.py new file mode 100644 index 0000000..8dbb557 --- /dev/null +++ b/chatmock/fast_mode.py @@ -0,0 +1,92 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any + +from .model_registry import normalize_model_name + + +PRIORITY_SUPPORTED_MODELS = frozenset( + ( + "gpt-5.4", + "gpt-5.2", + "gpt-5.1", + "gpt-5", + "gpt-5.1-codex", + "gpt-5-codex", + ) +) + +_TRUE_STRINGS = {"1", "true", "yes", "on"} +_FALSE_STRINGS = {"0", "false", "no", "off"} + + +def parse_optional_bool(value: Any) -> bool | None: + if isinstance(value, bool): + return value + if isinstance(value, str): + normalized = value.strip().lower() + if normalized in _TRUE_STRINGS: + return True + if normalized in _FALSE_STRINGS: + return False + return None + + +def supports_priority_service_tier(model: str | None) -> bool: + return normalize_model_name(model) in PRIORITY_SUPPORTED_MODELS + + +@dataclass(frozen=True) +class ServiceTierResolution: + service_tier: str | None + error_message: str | None = None + warning_message: str | None = None + used_server_default: bool = False + + +def resolve_service_tier( + model: str | None, + *, + request_fast_mode: Any = None, + request_service_tier: Any = None, + server_fast_mode: bool = False, +) -> ServiceTierResolution: + explicit_fast_mode = parse_optional_bool(request_fast_mode) + + tier: str | None = None + explicit_request = False + used_server_default = False + + if explicit_fast_mode is not None: + tier = "priority" if explicit_fast_mode else None + explicit_request = True + elif isinstance(request_service_tier, str) and request_service_tier.strip(): + tier = request_service_tier.strip().lower() + explicit_request = True + elif server_fast_mode: + tier = "priority" + used_server_default = True + + if tier == "priority" and not supports_priority_service_tier(model): + normalized = normalize_model_name(model) + message = ( + f"Fast mode is not supported for model '{normalized}'. " + "Use a supported GPT-5 priority-processing model or disable fast mode for this request." + ) + if explicit_request: + return ServiceTierResolution( + service_tier=None, + error_message=message, + used_server_default=used_server_default, + ) + return ServiceTierResolution( + service_tier=None, + warning_message=message, + used_server_default=used_server_default, + ) + + return ServiceTierResolution( + service_tier=tier, + used_server_default=used_server_default, + ) diff --git a/chatmock/model_registry.py b/chatmock/model_registry.py index b171883..9bddbeb 100644 --- a/chatmock/model_registry.py +++ b/chatmock/model_registry.py @@ -62,6 +62,14 @@ _MODEL_SPECS = ( variant_efforts=("xhigh", "high", "medium", "low"), uses_codex_instructions=True, ), + ModelSpec( + public_id="gpt-5.3-codex-spark", + upstream_id="gpt-5.3-codex-spark", + aliases=("gpt5.3-codex-spark", "gpt-5.3-codex-spark-latest"), + allowed_efforts=frozenset(("low", "medium", "high", "xhigh")), + variant_efforts=("xhigh", "high", "medium", "low"), + uses_codex_instructions=True, + ), ModelSpec( public_id="gpt-5-codex", upstream_id="gpt-5-codex", diff --git a/chatmock/responses_api.py b/chatmock/responses_api.py new file mode 100644 index 0000000..9aae843 --- /dev/null +++ b/chatmock/responses_api.py @@ -0,0 +1,242 @@ +from __future__ import annotations + +import json +from dataclasses import dataclass +from typing import Any, Dict, Iterable, Iterator, List + +from .config import BASE_INSTRUCTIONS, GPT5_CODEX_INSTRUCTIONS +from .fast_mode import ServiceTierResolution, resolve_service_tier +from .model_registry import ( + allowed_efforts_for_model, + extract_reasoning_from_model_name, + normalize_model_name, + uses_codex_instructions, +) +from .reasoning import build_reasoning_param +from .session import ensure_session_id + + +@dataclass(frozen=True) +class ResponsesRequestError(Exception): + message: str + status_code: int = 400 + code: str | None = None + + def __str__(self) -> str: + return self.message + + +@dataclass(frozen=True) +class NormalizedResponsesRequest: + payload: Dict[str, Any] + requested_model: str | None + normalized_model: str + session_id: str + service_tier_resolution: ServiceTierResolution + + +def instructions_for_model(config: Dict[str, Any], model: str) -> str: + base = config.get("BASE_INSTRUCTIONS", BASE_INSTRUCTIONS) + if uses_codex_instructions(model): + codex = config.get("GPT5_CODEX_INSTRUCTIONS") or GPT5_CODEX_INSTRUCTIONS + if isinstance(codex, str) and codex.strip(): + return codex + return base + + +def extract_client_session_id(headers: Any) -> str | None: + try: + return headers.get("X-Session-Id") or headers.get("session_id") or None + except Exception: + return None + + +def _input_items_for_session(raw_input: Any) -> List[Dict[str, Any]]: + if isinstance(raw_input, list): + return [item for item in raw_input if isinstance(item, dict)] + if isinstance(raw_input, dict): + return [raw_input] + if isinstance(raw_input, str) and raw_input.strip(): + return [ + { + "type": "message", + "role": "user", + "content": [{"type": "input_text", "text": raw_input}], + } + ] + return [] + + +def canonicalize_responses_input(raw_input: Any) -> Any: + if isinstance(raw_input, list): + return [item for item in raw_input if isinstance(item, dict)] + if isinstance(raw_input, dict): + return [raw_input] + if isinstance(raw_input, str): + return _input_items_for_session(raw_input) + return raw_input + + +def normalize_responses_payload( + payload: Dict[str, Any], + *, + config: Dict[str, Any], + client_session_id: str | None = None, +) -> NormalizedResponsesRequest: + requested_model = payload.get("model") if isinstance(payload.get("model"), str) else None + normalized_model = normalize_model_name(requested_model, config.get("DEBUG_MODEL")) + + normalized = dict(payload) + normalized["model"] = normalized_model + + if "input" in normalized: + normalized["input"] = canonicalize_responses_input(normalized.get("input")) + + if "store" not in normalized: + normalized["store"] = False + + instructions = normalized.get("instructions") + if not isinstance(instructions, str) or not instructions.strip(): + instructions = instructions_for_model(config, normalized_model) + normalized["instructions"] = instructions + + reasoning_effort = config.get("REASONING_EFFORT", "medium") + reasoning_summary = config.get("REASONING_SUMMARY", "auto") + reasoning_overrides = ( + normalized.get("reasoning") + if isinstance(normalized.get("reasoning"), dict) + else extract_reasoning_from_model_name(requested_model) + ) + normalized["reasoning"] = build_reasoning_param( + reasoning_effort, + reasoning_summary, + reasoning_overrides, + allowed_efforts=allowed_efforts_for_model(normalized_model), + ) + + include = normalized.get("include") + include_list = [item for item in include if isinstance(item, str)] if isinstance(include, list) else [] + if "reasoning.encrypted_content" not in include_list: + include_list.append("reasoning.encrypted_content") + normalized["include"] = include_list + + tools = normalized.get("tools") + if (not isinstance(tools, list) or not tools) and bool(config.get("DEFAULT_WEB_SEARCH")): + tool_choice = normalized.get("tool_choice") + if not (isinstance(tool_choice, str) and tool_choice.strip().lower() == "none"): + normalized["tools"] = [{"type": "web_search"}] + + service_tier_resolution = resolve_service_tier( + normalized_model, + request_fast_mode=normalized.get("fast_mode"), + request_service_tier=normalized.get("service_tier"), + server_fast_mode=bool(config.get("FAST_MODE")), + ) + if service_tier_resolution.error_message: + raise ResponsesRequestError(service_tier_resolution.error_message) + if service_tier_resolution.service_tier is None: + normalized.pop("service_tier", None) + else: + normalized["service_tier"] = service_tier_resolution.service_tier + normalized.pop("fast_mode", None) + + input_items = _input_items_for_session(normalized.get("input")) + session_id = ensure_session_id(instructions, input_items, client_session_id) + prompt_cache_key = normalized.get("prompt_cache_key") + if not isinstance(prompt_cache_key, str) or not prompt_cache_key.strip(): + normalized["prompt_cache_key"] = session_id + + return NormalizedResponsesRequest( + payload=normalized, + requested_model=requested_model, + normalized_model=normalized_model, + session_id=session_id, + service_tier_resolution=service_tier_resolution, + ) + + +def iter_sse_event_payloads(upstream: Any) -> Iterator[Dict[str, Any]]: + for raw in upstream.iter_lines(decode_unicode=False): + if not raw: + continue + line = raw.decode("utf-8", errors="ignore") if isinstance(raw, (bytes, bytearray)) else raw + if not line.startswith("data: "): + continue + data = line[len("data: ") :].strip() + if not data or data == "[DONE]": + if data == "[DONE]": + break + continue + try: + evt = json.loads(data) + except Exception: + continue + if isinstance(evt, dict): + yield evt + + +def aggregate_response_from_sse( + upstream: Any, + *, + on_event: Any | None = None, +) -> tuple[Dict[str, Any] | None, Dict[str, Any] | None]: + response_obj: Dict[str, Any] | None = None + error_obj: Dict[str, Any] | None = None + try: + for evt in iter_sse_event_payloads(upstream): + if callable(on_event): + try: + on_event(evt) + except Exception: + pass + response = evt.get("response") + if isinstance(response, dict): + response_obj = response + kind = evt.get("type") + if kind == "response.failed": + if isinstance(response, dict) and isinstance(response.get("error"), dict): + error_obj = {"error": response.get("error")} + else: + error_obj = {"error": {"message": "response.failed"}} + break + if kind == "response.completed": + break + finally: + upstream.close() + return response_obj, error_obj + + +def stream_upstream_bytes( + upstream: Any, + *, + on_event: Any | None = None, +) -> Iterable[bytes]: + buffer = b"" + try: + for chunk in upstream.iter_content(chunk_size=None): + if chunk: + if callable(on_event): + if isinstance(chunk, bytes): + buffer += chunk + else: + buffer += str(chunk).encode("utf-8", errors="ignore") + while b"\n" in buffer: + line, buffer = buffer.split(b"\n", 1) + line = line.rstrip(b"\r") + if not line.startswith(b"data: "): + continue + data = line[len(b"data: ") :].strip() + if not data or data == b"[DONE]": + continue + try: + evt = json.loads(data.decode("utf-8", errors="ignore")) + except Exception: + evt = None + if isinstance(evt, dict): + try: + on_event(evt) + except Exception: + pass + yield chunk + finally: + upstream.close() diff --git a/chatmock/routes_ollama.py b/chatmock/routes_ollama.py index 7116d67..96c7c8b 100644 --- a/chatmock/routes_ollama.py +++ b/chatmock/routes_ollama.py @@ -8,9 +8,11 @@ from typing import Any, Dict, List from flask import Blueprint, Response, current_app, jsonify, make_response, request, stream_with_context from .config import BASE_INSTRUCTIONS, GPT5_CODEX_INSTRUCTIONS +from .fast_mode import resolve_service_tier from .limits import record_rate_limits_from_response from .http import build_cors_headers from .model_registry import list_public_models, uses_codex_instructions +from .responses_api import instructions_for_model from .reasoning import ( allowed_efforts_for_model, build_reasoning_param, @@ -71,12 +73,7 @@ def ollama_version() -> Response: def _instructions_for_model(model: str) -> str: - base = current_app.config.get("BASE_INSTRUCTIONS", BASE_INSTRUCTIONS) - if uses_codex_instructions(model): - codex = current_app.config.get("GPT5_CODEX_INSTRUCTIONS") or GPT5_CODEX_INSTRUCTIONS - if isinstance(codex, str) and codex.strip(): - return codex - return base + return instructions_for_model(current_app.config, model) _OLLAMA_FAKE_EVAL = { @@ -254,6 +251,19 @@ def ollama_chat() -> Response: model_reasoning = extract_reasoning_from_model_name(model) normalized_model = normalize_model_name(model) + service_tier_resolution = resolve_service_tier( + normalized_model, + request_fast_mode=payload.get("fast_mode"), + request_service_tier=payload.get("service_tier"), + server_fast_mode=bool(current_app.config.get("FAST_MODE")), + ) + if service_tier_resolution.warning_message and verbose: + print(f"[FastMode] {service_tier_resolution.warning_message}") + if service_tier_resolution.error_message: + err = {"error": service_tier_resolution.error_message} + if verbose: + _log_json("OUT POST /api/chat", err) + return jsonify(err), 400 upstream, error_resp = start_upstream_request( normalized_model, input_items, @@ -267,6 +277,7 @@ def ollama_chat() -> Response: model_reasoning, allowed_efforts=allowed_efforts_for_model(model), ), + service_tier=service_tier_resolution.service_tier, ) if error_resp is not None: if verbose: @@ -307,6 +318,7 @@ def ollama_chat() -> Response: model_reasoning, allowed_efforts=allowed_efforts_for_model(model), ), + service_tier=service_tier_resolution.service_tier, ) record_rate_limits_from_response(upstream2) if err2 is None and upstream2 is not None and upstream2.status_code < 400: diff --git a/chatmock/routes_openai.py b/chatmock/routes_openai.py index f1c6e32..437ebef 100644 --- a/chatmock/routes_openai.py +++ b/chatmock/routes_openai.py @@ -7,16 +7,31 @@ from typing import Any, Dict, List from flask import Blueprint, Response, current_app, jsonify, make_response, request from .config import BASE_INSTRUCTIONS, GPT5_CODEX_INSTRUCTIONS +from .fast_mode import resolve_service_tier from .limits import record_rate_limits_from_response from .http import build_cors_headers from .model_registry import list_public_models, uses_codex_instructions +from .responses_api import ( + ResponsesRequestError, + aggregate_response_from_sse, + extract_client_session_id, + instructions_for_model, + normalize_responses_payload, + stream_upstream_bytes, +) from .reasoning import ( allowed_efforts_for_model, apply_reasoning_to_message, build_reasoning_param, extract_reasoning_from_model_name, ) -from .upstream import normalize_model_name, start_upstream_request +from .session import ( + clear_responses_reuse_state, + note_responses_final_response, + note_responses_stream_event, + prepare_responses_request_for_session, +) +from .upstream import normalize_model_name, start_upstream_raw_request, start_upstream_request from .utils import ( convert_chat_messages_to_responses_input, convert_tools_chat_to_responses, @@ -59,12 +74,32 @@ def _wrap_stream_logging(label: str, iterator, enabled: bool): def _instructions_for_model(model: str) -> str: - base = current_app.config.get("BASE_INSTRUCTIONS", BASE_INSTRUCTIONS) - if uses_codex_instructions(model): - codex = current_app.config.get("GPT5_CODEX_INSTRUCTIONS") or GPT5_CODEX_INSTRUCTIONS - if isinstance(codex, str) and codex.strip(): - return codex - return base + return instructions_for_model(current_app.config, model) + + +def _service_tier_from_payload( + model: str, + payload: Dict[str, Any], + *, + verbose: bool = False, +) -> tuple[str | None, Response | None]: + resolution = resolve_service_tier( + model, + request_fast_mode=payload.get("fast_mode"), + request_service_tier=payload.get("service_tier"), + server_fast_mode=bool(current_app.config.get("FAST_MODE")), + ) + if resolution.warning_message and verbose: + print(f"[FastMode] {resolution.warning_message}") + if resolution.error_message: + err = {"error": {"message": resolution.error_message}} + if verbose: + _log_json("OUT POST service_tier resolution", err) + resp = make_response(jsonify(err), 400) + for k, v in build_cors_headers().items(): + resp.headers.setdefault(k, v) + return None, resp + return resolution.service_tier, None @openai_bp.route("/v1/chat/completions", methods=["POST"]) @@ -178,6 +213,9 @@ def chat_completions() -> Response: reasoning_overrides, allowed_efforts=allowed_efforts_for_model(model), ) + service_tier, tier_error = _service_tier_from_payload(model, payload, verbose=verbose) + if tier_error is not None: + return tier_error upstream, error_resp = start_upstream_request( model, @@ -187,6 +225,7 @@ def chat_completions() -> Response: tool_choice=tool_choice, parallel_tool_calls=parallel_tool_calls, reasoning_param=reasoning_param, + service_tier=service_tier, ) if error_resp is not None: if verbose: @@ -224,6 +263,7 @@ def chat_completions() -> Response: tool_choice=safe_choice, parallel_tool_calls=parallel_tool_calls, reasoning_param=reasoning_param, + service_tier=service_tier, ) record_rate_limits_from_response(upstream2) if err2 is None and upstream2 is not None and upstream2.status_code < 400: @@ -413,11 +453,15 @@ def completions() -> Response: reasoning_overrides, allowed_efforts=allowed_efforts_for_model(model), ) + service_tier, tier_error = _service_tier_from_payload(model, payload, verbose=verbose) + if tier_error is not None: + return tier_error upstream, error_resp = start_upstream_request( model, input_items, instructions=_instructions_for_model(model), reasoning_param=reasoning_param, + service_tier=service_tier, ) if error_resp is not None: if verbose: @@ -529,6 +573,161 @@ def completions() -> Response: return resp +@openai_bp.route("/v1/responses", methods=["POST"]) +def responses_create() -> Response: + verbose = bool(current_app.config.get("VERBOSE")) + raw = request.get_data(cache=True, as_text=True) or "" + if verbose: + try: + print("IN POST /v1/responses\n" + raw) + except Exception: + pass + + try: + payload = json.loads(raw) if raw else {} + except Exception: + err = {"error": {"message": "Invalid JSON body"}} + if verbose: + _log_json("OUT POST /v1/responses", err) + return jsonify(err), 400 + + if not isinstance(payload, dict): + err = {"error": {"message": "Request body must be a JSON object"}} + if verbose: + _log_json("OUT POST /v1/responses", err) + return jsonify(err), 400 + + try: + normalized = normalize_responses_payload( + payload, + config=current_app.config, + client_session_id=extract_client_session_id(request.headers), + ) + except ResponsesRequestError as exc: + err: Dict[str, Any] = {"error": {"message": str(exc)}} + if exc.code: + err["error"]["code"] = exc.code + if verbose: + _log_json("OUT POST /v1/responses", err) + return jsonify(err), exc.status_code + + if normalized.service_tier_resolution.warning_message and verbose: + print(f"[FastMode] {normalized.service_tier_resolution.warning_message}") + + prepared = prepare_responses_request_for_session( + normalized.session_id, + normalized.payload, + allow_previous_response_id=False, + ) + stream_req = bool(prepared.payload.get("stream", False)) + upstream_payload = dict(prepared.payload) + upstream_payload["stream"] = True + upstream, error_resp = start_upstream_raw_request( + upstream_payload, + session_id=normalized.session_id, + stream=True, + ) + if error_resp is not None: + clear_responses_reuse_state(normalized.session_id) + if verbose: + try: + body = error_resp.get_data(as_text=True) + if body: + try: + parsed = json.loads(body) + except Exception: + parsed = body + _log_json("OUT POST /v1/responses", parsed) + except Exception: + pass + return error_resp + + record_rate_limits_from_response(upstream) + + if upstream.status_code >= 400: + try: + err_body = json.loads(upstream.content.decode("utf-8", errors="ignore")) if upstream.content else {"error": {"message": upstream.text}} + except Exception: + err_body = {"error": {"message": upstream.text or "Upstream error"}} + finally: + upstream.close() + clear_responses_reuse_state(normalized.session_id) + if verbose: + _log_json("OUT POST /v1/responses", err_body) + resp = make_response(jsonify(err_body), upstream.status_code) + for k, v in build_cors_headers().items(): + resp.headers.setdefault(k, v) + return resp + + if stream_req: + if verbose: + print("OUT POST /v1/responses (streaming response)") + stream_iter = _wrap_stream_logging( + "STREAM OUT /v1/responses", + stream_upstream_bytes( + upstream, + on_event=lambda evt: note_responses_stream_event(normalized.session_id, evt), + ), + verbose, + ) + resp = Response( + stream_iter, + status=upstream.status_code, + mimetype="text/event-stream", + headers={"Cache-Control": "no-cache", "Connection": "keep-alive"}, + ) + for k, v in build_cors_headers().items(): + resp.headers.setdefault(k, v) + return resp + + content_type = upstream.headers.get("Content-Type", "") + if "application/json" in content_type.lower(): + try: + body = upstream.json() + except Exception: + body = None + finally: + upstream.close() + if isinstance(body, dict): + note_responses_final_response(normalized.session_id, body) + if verbose: + _log_json("OUT POST /v1/responses", body) + resp = make_response(jsonify(body), upstream.status_code) + for k, v in build_cors_headers().items(): + resp.headers.setdefault(k, v) + return resp + + response_obj, error_obj = aggregate_response_from_sse( + upstream, + on_event=lambda evt: note_responses_stream_event(normalized.session_id, evt), + ) + if error_obj is not None: + clear_responses_reuse_state(normalized.session_id) + if verbose: + _log_json("OUT POST /v1/responses", error_obj) + resp = make_response(jsonify(error_obj), 502) + for k, v in build_cors_headers().items(): + resp.headers.setdefault(k, v) + return resp + + if response_obj is None: + clear_responses_reuse_state(normalized.session_id) + err = {"error": {"message": "Upstream response stream did not contain a completed response object"}} + if verbose: + _log_json("OUT POST /v1/responses", err) + resp = make_response(jsonify(err), 502) + for k, v in build_cors_headers().items(): + resp.headers.setdefault(k, v) + return resp + + if verbose: + _log_json("OUT POST /v1/responses", response_obj) + resp = make_response(jsonify(response_obj), upstream.status_code) + for k, v in build_cors_headers().items(): + resp.headers.setdefault(k, v) + return resp + + @openai_bp.route("/v1/models", methods=["GET"]) def list_models() -> Response: expose_variants = bool(current_app.config.get("EXPOSE_REASONING_MODELS")) diff --git a/chatmock/session.py b/chatmock/session.py index f2a085e..705a50c 100644 --- a/chatmock/session.py +++ b/chatmock/session.py @@ -1,16 +1,37 @@ from __future__ import annotations +import copy import hashlib import json import threading import uuid -from typing import Any, Dict, List, Tuple +from dataclasses import dataclass, field +from typing import Any, Dict, List _LOCK = threading.Lock() _FINGERPRINT_TO_UUID: Dict[str, str] = {} _ORDER: List[str] = [] _MAX_ENTRIES = 10000 +_RESPONSES_SESSION_STATE: Dict[str, "_ResponsesSessionState"] = {} +_RESPONSES_ORDER: List[str] = [] + + +@dataclass(frozen=True) +class PreparedResponsesRequest: + payload: Dict[str, Any] + session_id: str + + +@dataclass +class _ResponsesSessionState: + last_request_payload: Dict[str, Any] | None = None + last_response_id: str | None = None + last_response_items: List[Dict[str, Any]] = field(default_factory=list) + inflight_request_payload: Dict[str, Any] | None = None + inflight_track_result: bool = False + inflight_response_id: str | None = None + inflight_response_items: List[Dict[str, Any]] = field(default_factory=list) def _canonicalize_first_user_message(input_items: List[Dict[str, Any]]) -> Dict[str, Any] | None: @@ -70,6 +91,61 @@ def _remember(fp: str, sid: str) -> None: _FINGERPRINT_TO_UUID.pop(oldest, None) +def _remember_responses_session(session_id: str) -> _ResponsesSessionState: + state = _RESPONSES_SESSION_STATE.get(session_id) + if state is None: + state = _ResponsesSessionState() + _RESPONSES_SESSION_STATE[session_id] = state + _RESPONSES_ORDER.append(session_id) + if len(_RESPONSES_ORDER) > _MAX_ENTRIES: + oldest = _RESPONSES_ORDER.pop(0) + _RESPONSES_SESSION_STATE.pop(oldest, None) + return state + + +def _request_without_input(payload: Dict[str, Any]) -> Dict[str, Any]: + clone = copy.deepcopy(payload) + clone["input"] = [] + clone.pop("previous_response_id", None) + return clone + + +def _input_list(payload: Dict[str, Any]) -> List[Dict[str, Any]] | None: + raw = payload.get("input") + if not isinstance(raw, list): + return None + return [item for item in copy.deepcopy(raw) if isinstance(item, dict)] + + +def _conversation_output_items(items: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + reusable: List[Dict[str, Any]] = [] + for item in items: + if not isinstance(item, dict): + continue + item_type = item.get("type") + if item_type == "reasoning": + continue + reusable.append(copy.deepcopy(item)) + return reusable + + +def _clear_reuse_state(state: _ResponsesSessionState) -> None: + state.last_request_payload = None + state.last_response_id = None + state.last_response_items = [] + state.inflight_request_payload = None + state.inflight_track_result = False + state.inflight_response_id = None + state.inflight_response_items = [] + + +def _clear_inflight(state: _ResponsesSessionState) -> None: + state.inflight_request_payload = None + state.inflight_track_result = False + state.inflight_response_id = None + state.inflight_response_items = [] + + def ensure_session_id( instructions: str | None, input_items: List[Dict[str, Any]], @@ -87,3 +163,150 @@ def ensure_session_id( _remember(fp, sid) return sid + +def prepare_responses_request_for_session( + session_id: str, + payload: Dict[str, Any], + *, + allow_previous_response_id: bool = True, +) -> PreparedResponsesRequest: + full_payload = copy.deepcopy(payload) + outbound_payload = copy.deepcopy(payload) + explicit_previous_response_id = ( + isinstance(full_payload.get("previous_response_id"), str) + and bool(full_payload.get("previous_response_id").strip()) + ) + + with _LOCK: + state = _remember_responses_session(session_id) + + if explicit_previous_response_id: + _clear_reuse_state(state) + return PreparedResponsesRequest( + payload=outbound_payload, + session_id=session_id, + ) + + request_input = _input_list(full_payload) + if ( + allow_previous_response_id + and + state.last_request_payload is not None + and state.last_response_id + and request_input is not None + and _request_without_input(state.last_request_payload) == _request_without_input(full_payload) + ): + baseline: List[Dict[str, Any]] = [] + previous_input = _input_list(state.last_request_payload) + if previous_input is not None: + baseline.extend(previous_input) + baseline.extend(copy.deepcopy(state.last_response_items)) + baseline_len = len(baseline) + if request_input[:baseline_len] == baseline and baseline_len <= len(request_input): + outbound_payload["input"] = copy.deepcopy(request_input[baseline_len:]) + outbound_payload["previous_response_id"] = state.last_response_id + + state.inflight_request_payload = full_payload + state.inflight_track_result = True + state.inflight_response_id = None + state.inflight_response_items = [] + + return PreparedResponsesRequest( + payload=outbound_payload, + session_id=session_id, + ) + + +def note_responses_stream_event(session_id: str, event: Dict[str, Any]) -> None: + if not isinstance(session_id, str) or not session_id.strip(): + return + if not isinstance(event, dict): + return + + with _LOCK: + state = _RESPONSES_SESSION_STATE.get(session_id) + if state is None: + return + + kind = event.get("type") + if kind == "response.created": + response = event.get("response") + if isinstance(response, dict) and isinstance(response.get("id"), str): + state.inflight_response_id = response.get("id") + return + + if kind == "response.output_item.done": + item = event.get("item") + if isinstance(item, dict): + state.inflight_response_items.append(copy.deepcopy(item)) + return + + if kind == "response.completed": + response = event.get("response") + response_id = None + response_items: List[Dict[str, Any]] = copy.deepcopy(state.inflight_response_items) + if isinstance(response, dict): + if isinstance(response.get("id"), str): + response_id = response.get("id") + output = response.get("output") + if isinstance(output, list) and output: + response_items = [copy.deepcopy(item) for item in output if isinstance(item, dict)] + if not response_id: + response_id = state.inflight_response_id + + if state.inflight_track_result and state.inflight_request_payload is not None and response_id: + state.last_request_payload = copy.deepcopy(state.inflight_request_payload) + state.last_response_id = response_id + state.last_response_items = _conversation_output_items(response_items) + else: + state.last_request_payload = None + state.last_response_id = None + state.last_response_items = [] + _clear_inflight(state) + return + + if kind in ("response.failed", "error"): + _clear_reuse_state(state) + + +def note_responses_final_response(session_id: str, response_obj: Dict[str, Any]) -> None: + if not isinstance(session_id, str) or not session_id.strip(): + return + if not isinstance(response_obj, dict): + return + + with _LOCK: + state = _RESPONSES_SESSION_STATE.get(session_id) + if state is None: + return + + response_id = response_obj.get("id") if isinstance(response_obj.get("id"), str) else None + output = response_obj.get("output") + output_items = [copy.deepcopy(item) for item in output if isinstance(item, dict)] if isinstance(output, list) else [] + if state.inflight_track_result and state.inflight_request_payload is not None and response_id: + state.last_request_payload = copy.deepcopy(state.inflight_request_payload) + state.last_response_id = response_id + state.last_response_items = _conversation_output_items(output_items) + else: + state.last_request_payload = None + state.last_response_id = None + state.last_response_items = [] + _clear_inflight(state) + + +def clear_responses_reuse_state(session_id: str) -> None: + if not isinstance(session_id, str) or not session_id.strip(): + return + with _LOCK: + state = _RESPONSES_SESSION_STATE.get(session_id) + if state is None: + return + _clear_reuse_state(state) + + +def reset_session_state() -> None: + with _LOCK: + _FINGERPRINT_TO_UUID.clear() + _ORDER.clear() + _RESPONSES_SESSION_STATE.clear() + _RESPONSES_ORDER.clear() diff --git a/chatmock/upstream.py b/chatmock/upstream.py index e264e6f..ba995cb 100644 --- a/chatmock/upstream.py +++ b/chatmock/upstream.py @@ -3,6 +3,7 @@ from __future__ import annotations import json import time from typing import Any, Dict, List, Tuple +from urllib.parse import urlparse, urlunparse import requests from flask import Response, current_app, jsonify, make_response @@ -33,6 +34,7 @@ def start_upstream_request( tool_choice: Any | None = None, parallel_tool_calls: bool = False, reasoning_param: Dict[str, Any] | None = None, + service_tier: str | None = None, ): access_token, account_id = get_effective_chatgpt_auth() if not access_token or not account_id: @@ -81,6 +83,62 @@ def start_upstream_request( if reasoning_param is not None: responses_payload["reasoning"] = reasoning_param + if isinstance(service_tier, str) and service_tier.strip(): + responses_payload["service_tier"] = service_tier.strip().lower() + + return start_upstream_raw_request( + responses_payload, + session_id=session_id, + stream=True, + ) + + +def build_upstream_headers( + access_token: str, + account_id: str, + session_id: str, + *, + accept: str = "text/event-stream", +) -> Dict[str, str]: + return { + "Authorization": f"Bearer {access_token}", + "Content-Type": "application/json", + "Accept": accept, + "chatgpt-account-id": account_id, + "OpenAI-Beta": "responses=experimental", + "session_id": session_id, + } + + +def start_upstream_raw_request( + responses_payload: Dict[str, Any], + *, + session_id: str | None = None, + stream: bool = True, +): + access_token, account_id = get_effective_chatgpt_auth() + if not access_token or not account_id: + resp = make_response( + jsonify( + { + "error": { + "message": "Missing ChatGPT credentials. Run 'python3 chatmock.py login' first.", + } + } + ), + 401, + ) + for k, v in build_cors_headers().items(): + resp.headers.setdefault(k, v) + return None, resp + + effective_session_id = session_id + if not isinstance(effective_session_id, str) or not effective_session_id.strip(): + payload_prompt_cache_key = responses_payload.get("prompt_cache_key") + if isinstance(payload_prompt_cache_key, str) and payload_prompt_cache_key.strip(): + effective_session_id = payload_prompt_cache_key.strip() + if not isinstance(effective_session_id, str) or not effective_session_id.strip(): + effective_session_id = str(int(time.time() * 1000)) verbose = False try: @@ -90,21 +148,19 @@ def start_upstream_request( if verbose: _log_json("OUTBOUND >> ChatGPT Responses API payload", responses_payload) - headers = { - "Authorization": f"Bearer {access_token}", - "Content-Type": "application/json", - "Accept": "text/event-stream", - "chatgpt-account-id": account_id, - "OpenAI-Beta": "responses=experimental", - "session_id": session_id, - } + headers = build_upstream_headers( + access_token, + account_id, + effective_session_id, + accept=("text/event-stream" if stream else "application/json"), + ) try: upstream = requests.post( CHATGPT_RESPONSES_URL, headers=headers, json=responses_payload, - stream=True, + stream=stream, timeout=600, ) except requests.RequestException as e: @@ -113,3 +169,13 @@ def start_upstream_request( resp.headers.setdefault(k, v) return None, resp return upstream, None + + +def build_upstream_websocket_url() -> str: + parsed = urlparse(CHATGPT_RESPONSES_URL) + scheme = parsed.scheme.lower() + if scheme == "https": + parsed = parsed._replace(scheme="wss") + elif scheme == "http": + parsed = parsed._replace(scheme="ws") + return urlunparse(parsed) diff --git a/chatmock/version.py b/chatmock/version.py index 72f72b8..17d6d1a 100644 --- a/chatmock/version.py +++ b/chatmock/version.py @@ -1,4 +1,4 @@ from __future__ import annotations -__version__ = "1.36" +__version__ = "1.37" diff --git a/chatmock/websocket_routes.py b/chatmock/websocket_routes.py new file mode 100644 index 0000000..37fcfe0 --- /dev/null +++ b/chatmock/websocket_routes.py @@ -0,0 +1,225 @@ +from __future__ import annotations + +import json +import os +import ssl +from typing import Any, Dict + +import certifi +from flask import current_app, request +from flask_sock import Sock +from websockets.sync.client import connect as websocket_connect +from websockets.exceptions import ConnectionClosed + +from .responses_api import ( + ResponsesRequestError, + extract_client_session_id, + normalize_responses_payload, +) +from .session import ( + clear_responses_reuse_state, + note_responses_stream_event, + prepare_responses_request_for_session, +) +from .upstream import build_upstream_headers, build_upstream_websocket_url +from .utils import get_effective_chatgpt_auth + + +def _log_json(prefix: str, payload: Any) -> None: + try: + print(f"{prefix}\n{json.dumps(payload, indent=2, ensure_ascii=False)}") + except Exception: + try: + print(f"{prefix}\n{payload}") + except Exception: + pass + + +def _error_event(message: str, *, status_code: int = 400, code: str | None = None) -> Dict[str, Any]: + error: Dict[str, Any] = {"message": message} + if code: + error["code"] = code + return {"type": "error", "status_code": status_code, "error": error} + + +def _is_terminal_event(event: Any) -> bool: + if not isinstance(event, dict): + return False + kind = event.get("type") + return kind in ("response.completed", "response.failed", "error") + + +def _build_websocket_ssl_context() -> ssl.SSLContext: + cafile = ( + os.getenv("CODEX_CA_CERTIFICATE") + or os.getenv("SSL_CERT_FILE") + or certifi.where() + ) + return ssl.create_default_context(cafile=cafile) + + +def connect_upstream_websocket(url: str, headers: Dict[str, str]): + return websocket_connect( + url, + additional_headers=headers, + open_timeout=15, + ssl=_build_websocket_ssl_context(), + ) + + +def register_websocket_routes(sock: Sock) -> None: + @sock.route("/v1/responses") + def responses_websocket(ws) -> None: + verbose = bool(current_app.config.get("VERBOSE")) + upstream_ws = None + upstream_session_id: str | None = None + active_session_id: str | None = None + + def _send_error(message: str, *, status_code: int = 400, code: str | None = None) -> None: + evt = _error_event(message, status_code=status_code, code=code) + if verbose: + _log_json("STREAM OUT WS /v1/responses (error)", evt) + try: + ws.send(json.dumps(evt)) + except Exception: + pass + + try: + while True: + incoming = ws.receive() + if incoming is None: + break + + if isinstance(incoming, bytes): + incoming_text = incoming.decode("utf-8", errors="ignore") + else: + incoming_text = str(incoming) + if verbose: + print("IN WS /v1/responses\n" + incoming_text) + + try: + payload = json.loads(incoming_text) + except Exception: + _send_error("Websocket frames must be valid JSON objects.", status_code=400) + break + + if not isinstance(payload, dict): + _send_error("Websocket frames must be JSON objects.", status_code=400) + break + + client_session_id = extract_client_session_id(request.headers) + outbound_text = incoming_text + session_id = upstream_session_id + + if payload.get("type") == "response.create": + try: + normalized = normalize_responses_payload( + payload, + config=current_app.config, + client_session_id=client_session_id, + ) + except ResponsesRequestError as exc: + _send_error(str(exc), status_code=exc.status_code, code=exc.code) + continue + + if normalized.service_tier_resolution.warning_message and verbose: + print(f"[FastMode] {normalized.service_tier_resolution.warning_message}") + prepared = prepare_responses_request_for_session( + normalized.session_id, + normalized.payload, + allow_previous_response_id=True, + ) + outbound_text = json.dumps(prepared.payload) + session_id = normalized.session_id + active_session_id = normalized.session_id + if verbose: + _log_json("OUTBOUND >> ChatGPT Responses WS payload", prepared.payload) + elif upstream_ws is None: + _send_error( + "The first websocket message must be a response.create request.", + status_code=400, + ) + break + + if upstream_ws is None or (session_id and session_id != upstream_session_id): + access_token, account_id = get_effective_chatgpt_auth() + if not access_token or not account_id: + if session_id: + clear_responses_reuse_state(session_id) + _send_error( + "Missing ChatGPT credentials. Run 'python3 chatmock.py login' first.", + status_code=401, + ) + break + + if upstream_ws is not None: + try: + upstream_ws.close() + except Exception: + pass + + effective_session_id = session_id or client_session_id or "" + try: + upstream_ws = connect_upstream_websocket( + build_upstream_websocket_url(), + build_upstream_headers( + access_token, + account_id, + effective_session_id, + accept="application/json", + ), + ) + except Exception as exc: + if session_id: + clear_responses_reuse_state(session_id) + _send_error( + f"Upstream websocket connection failed: {exc}", + status_code=502, + ) + break + upstream_session_id = effective_session_id + + upstream_ws.send(outbound_text) + + while True: + try: + upstream_message = upstream_ws.recv() + except ConnectionClosed: + if active_session_id: + clear_responses_reuse_state(active_session_id) + _send_error("Upstream websocket closed unexpectedly.", status_code=502) + return + if upstream_message is None: + if active_session_id: + clear_responses_reuse_state(active_session_id) + _send_error("Upstream websocket closed unexpectedly.", status_code=502) + return + if verbose: + try: + print("STREAM OUT WS /v1/responses\n" + str(upstream_message)) + except Exception: + pass + ws.send(upstream_message) + + try: + parsed = json.loads(upstream_message) + except Exception: + parsed = None + if isinstance(parsed, dict) and active_session_id: + note_responses_stream_event(active_session_id, parsed) + if _is_terminal_event(parsed): + if isinstance(parsed, dict) and parsed.get("type") in ("response.failed", "error"): + if upstream_ws is not None: + try: + upstream_ws.close() + except Exception: + pass + upstream_ws = None + upstream_session_id = None + break + finally: + if upstream_ws is not None: + try: + upstream_ws.close() + except Exception: + pass diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh index cb6c126..af26c64 100644 --- a/docker/entrypoint.sh +++ b/docker/entrypoint.sh @@ -23,6 +23,9 @@ if [[ "$cmd" == "serve" ]]; then if bool "${VERBOSE_OBFUSCATION:-}" || bool "${CHATGPT_LOCAL_VERBOSE_OBFUSCATION:-}"; then ARGS+=(--verbose-obfuscation) fi + if bool "${FAST_MODE:-}" || bool "${CHATGPT_LOCAL_FAST_MODE:-}"; then + ARGS+=(--fast-mode) + fi if [[ "$#" -gt 0 ]]; then ARGS+=("$@") diff --git a/gui.py b/gui.py index e9efe8f..82929fe 100644 --- a/gui.py +++ b/gui.py @@ -18,6 +18,7 @@ def run_server( reasoning_effort: str = "medium", reasoning_summary: str = "auto", reasoning_compat: str = "think-tags", + fast_mode: bool = False, expose_reasoning_models: bool = False, default_web_search: bool = False, ) -> None: @@ -25,6 +26,7 @@ def run_server( reasoning_effort=reasoning_effort, reasoning_summary=reasoning_summary, reasoning_compat=reasoning_compat, + fast_mode=fast_mode, expose_reasoning_models=expose_reasoning_models, default_web_search=default_web_search, ) @@ -42,6 +44,7 @@ class ServerProcess(QtCore.QObject): self._effort = "medium" self._summary = "auto" self._compat = "think-tags" + self._fast_mode = False self._expose_reasoning_models = False self._default_web_search = False @@ -55,6 +58,7 @@ class ServerProcess(QtCore.QObject): effort: str, summary: str, compat: str, + fast_mode: bool, expose_reasoning_models: bool, default_web_search: bool, ) -> None: @@ -63,6 +67,7 @@ class ServerProcess(QtCore.QObject): self._host, self._port = host, port self._effort, self._summary = effort, summary self._compat = compat + self._fast_mode = fast_mode self._expose_reasoning_models = expose_reasoning_models self._default_web_search = default_web_search self._proc = QtCore.QProcess() @@ -75,6 +80,8 @@ class ServerProcess(QtCore.QObject): "--summary", summary, "--compat", compat, ] + if fast_mode: + args.append("--fast-mode") if expose_reasoning_models: args.append("--expose-reasoning-models") if default_web_search: @@ -352,8 +359,10 @@ class MainWindow(QtWidgets.QMainWindow): opts.addWidget(self.compat, 1, 1) self.expose_reasoning_models = QtWidgets.QCheckBox("Expose reasoning models") opts.addWidget(self.expose_reasoning_models, 1, 2) + self.fast_mode = QtWidgets.QCheckBox("Enable fast mode") + opts.addWidget(self.fast_mode, 1, 3) self.enable_web_search = QtWidgets.QCheckBox("Enable web search") - opts.addWidget(self.enable_web_search, 1, 3) + opts.addWidget(self.enable_web_search, 2, 0) opts.setColumnStretch(1, 1) opts.setColumnStretch(3, 1) srv_layout.addLayout(opts) @@ -463,6 +472,7 @@ class MainWindow(QtWidgets.QMainWindow): effort = self.effort.currentText().strip() summary = self.summary.currentText().strip() compat = self.compat.currentText().strip() + fast_mode = self.fast_mode.isChecked() expose_reasoning_models = self.expose_reasoning_models.isChecked() default_web_search = self.enable_web_search.isChecked() self.status.setText(f"Starting server at http://{host}:{port} …") @@ -473,6 +483,7 @@ class MainWindow(QtWidgets.QMainWindow): effort, summary, compat, + fast_mode, expose_reasoning_models, default_web_search, ) @@ -524,6 +535,7 @@ def main() -> None: p.add_argument("--effort", default="medium") p.add_argument("--summary", default="auto") p.add_argument("--compat", default="think-tags") + p.add_argument("--fast-mode", action="store_true") p.add_argument("--expose-reasoning-models", action="store_true") p.add_argument("--enable-web-search", action="store_true") args, _ = p.parse_known_args() @@ -533,6 +545,7 @@ def main() -> None: args.effort, args.summary, args.compat, + args.fast_mode, args.expose_reasoning_models, args.enable_web_search, ) diff --git a/pyproject.toml b/pyproject.toml index 8cfe979..747e3d1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,12 +11,14 @@ dependencies = [ "blinker==1.9.0", "certifi==2025.8.3", "flask==3.1.1", + "flask-sock==0.7.0", "idna==3.10", "itsdangerous==2.2.0", "jinja2==3.1.6", "markupsafe==3.0.2", "requests==2.32.5", "urllib3==2.5.0", + "websockets==15.0.1", "werkzeug==3.1.3", ] diff --git a/scripts/test_responses_cached_tokens.py b/scripts/test_responses_cached_tokens.py new file mode 100644 index 0000000..9cf05f5 --- /dev/null +++ b/scripts/test_responses_cached_tokens.py @@ -0,0 +1,176 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import argparse +import json +import sys +import uuid +from typing import Any, Dict + +import requests + + +def _post(url: str, api_key: str, session_id: str, payload: Dict[str, Any]) -> Dict[str, Any]: + response = requests.post( + url, + headers={ + "Authorization": f"Bearer {api_key}", + "Content-Type": "application/json", + "X-Session-Id": session_id, + }, + json=payload, + timeout=180, + ) + try: + body = response.json() + except Exception: + body = {"raw": response.text} + if response.status_code >= 400: + raise RuntimeError( + f"POST {url} failed with {response.status_code}: {json.dumps(body, ensure_ascii=False)}" + ) + if not isinstance(body, dict): + raise RuntimeError(f"Expected JSON object response, got: {body!r}") + return body + + +def _usage_summary(body: Dict[str, Any]) -> Dict[str, Any]: + usage = body.get("usage") + if not isinstance(usage, dict): + return {} + return usage + + +def _cached_tokens(body: Dict[str, Any]) -> int | None: + usage = _usage_summary(body) + details = usage.get("input_tokens_details") + if not isinstance(details, dict): + return None + value = details.get("cached_tokens") + try: + return int(value) + except Exception: + return None + + +def _assistant_message_item(body: Dict[str, Any]) -> Dict[str, Any]: + output = body.get("output") + if not isinstance(output, list): + raise RuntimeError("Response did not include an output list.") + for item in output: + if isinstance(item, dict) and item.get("type") == "message" and item.get("role") == "assistant": + return item + raise RuntimeError("Response did not include an assistant message item.") + + +def _user_message(text: str) -> Dict[str, Any]: + return { + "type": "message", + "role": "user", + "content": [{"type": "input_text", "text": text}], + } + + +def _default_prefix() -> str: + seed = "Cache test prefix. Repeat this context exactly for cache measurement. " + return "".join(seed for _ in range(220)) + + +def main() -> int: + parser = argparse.ArgumentParser( + description="Drive two raw /v1/responses turns through ChatMock and check cached input tokens." + ) + parser.add_argument("--base-url", default="http://127.0.0.1:8000", help="ChatMock base URL.") + parser.add_argument("--api-key", default="key", help="Bearer token to send to ChatMock.") + parser.add_argument("--model", default="gpt-5.4", help="Model to request.") + parser.add_argument( + "--session-id", + default=f"cache-check-{uuid.uuid4()}", + help="Fixed X-Session-Id for both turns.", + ) + parser.add_argument( + "--prefix", + default=_default_prefix(), + help="Large repeated first-turn prompt prefix.", + ) + parser.add_argument( + "--first-question", + default="Reply with exactly: alpha", + help="Trailing instruction for the first turn.", + ) + parser.add_argument( + "--second-question", + default="Reply with exactly: beta", + help="Trailing instruction for the second turn.", + ) + args = parser.parse_args() + + responses_url = args.base_url.rstrip("/") + "/v1/responses" + session_id = args.session_id + first_text = f"{args.prefix}\n\n{args.first_question}" + second_text = args.second_question + + print(f"Using session id: {session_id}") + print(f"POST target: {responses_url}") + print("This checks the raw Responses usage object returned through ChatMock.") + print() + + first_payload = { + "model": args.model, + "store": False, + "stream": False, + "input": first_text, + } + first_response = _post(responses_url, args.api_key, session_id, first_payload) + assistant_item = _assistant_message_item(first_response) + + second_payload = { + "model": args.model, + "store": False, + "stream": False, + "input": [ + _user_message(first_text), + assistant_item, + _user_message(second_text), + ], + } + second_response = _post(responses_url, args.api_key, session_id, second_payload) + + first_usage = _usage_summary(first_response) + second_usage = _usage_summary(second_response) + first_cached = _cached_tokens(first_response) + second_cached = _cached_tokens(second_response) + + print("Turn 1") + print(json.dumps(first_usage, indent=2, ensure_ascii=False) if first_usage else " no usage object") + print() + print("Turn 2") + print(json.dumps(second_usage, indent=2, ensure_ascii=False) if second_usage else " no usage object") + print() + + if second_cached is None: + first_input_tokens = first_usage.get("input_tokens") if isinstance(first_usage, dict) else None + second_input_tokens = second_usage.get("input_tokens") if isinstance(second_usage, dict) else None + print("Result: inconclusive") + print("Reason: upstream did not include `usage.input_tokens_details.cached_tokens`.") + if isinstance(first_input_tokens, int) and isinstance(second_input_tokens, int): + print(f"Observed input_tokens delta: first={first_input_tokens}, second={second_input_tokens}") + print("Codex treats cached-token reporting as the direct cache-hit signal; without it, this script cannot prove caching.") + return 2 + + if second_cached > 0: + print(f"Result: success, follow-up turn reported cached_tokens={second_cached}.") + return 0 + + print("Result: failure, follow-up turn reported cached_tokens=0.") + return 1 + + +if __name__ == "__main__": + try: + raise SystemExit(main()) + except KeyboardInterrupt: + raise SystemExit(130) + except Exception as exc: + print(f"error: {exc}", file=sys.stderr) + raise SystemExit(1) diff --git a/scripts/test_responses_reuse.py b/scripts/test_responses_reuse.py new file mode 100644 index 0000000..5e506ab --- /dev/null +++ b/scripts/test_responses_reuse.py @@ -0,0 +1,143 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import argparse +import json +import sys +import uuid +from typing import Any, Dict, Tuple + +from websockets.sync.client import connect + + +def _user_message(text: str) -> Dict[str, Any]: + return { + "type": "message", + "role": "user", + "content": [{"type": "input_text", "text": text}], + } + + +def _receive_turn(ws) -> Tuple[str, Dict[str, Any]]: + response_id: str | None = None + assistant_item: Dict[str, Any] | None = None + + while True: + raw = ws.recv(timeout=120) + event = json.loads(raw) + event_type = event.get("type") + if event_type == "error": + raise RuntimeError(f"websocket error: {json.dumps(event, ensure_ascii=False)}") + if event_type == "response.created": + response = event.get("response") + if isinstance(response, dict) and isinstance(response.get("id"), str): + response_id = response["id"] + elif event_type == "response.output_item.done": + item = event.get("item") + if ( + isinstance(item, dict) + and item.get("type") == "message" + and item.get("role") == "assistant" + ): + assistant_item = item + elif event_type == "response.completed": + if not response_id: + response = event.get("response") + if isinstance(response, dict) and isinstance(response.get("id"), str): + response_id = response["id"] + if not response_id: + raise RuntimeError("turn completed without a response id") + if assistant_item is None: + raise RuntimeError("turn completed without an assistant message item") + return response_id, assistant_item + + +def main() -> int: + parser = argparse.ArgumentParser( + description="Exercise ChatMock websocket reuse the same way Codex does." + ) + parser.add_argument( + "--ws-url", + default="ws://127.0.0.1:8000/v1/responses", + help="ChatMock websocket URL.", + ) + parser.add_argument("--model", default="gpt-5.4", help="Model to request.") + parser.add_argument( + "--session-id", + default=f"reuse-demo-{uuid.uuid4()}", + help="Fixed X-Session-Id for the whole run.", + ) + parser.add_argument( + "--first-prompt", + default="Say exactly: alpha", + help="Prompt for the first turn.", + ) + parser.add_argument( + "--second-prompt", + default="Now say exactly: beta", + help="Prompt appended in the reuse-candidate turn.", + ) + parser.add_argument( + "--no-fast-mode", + action="store_true", + help="Do not send fast_mode=true.", + ) + args = parser.parse_args() + + headers = {"X-Session-Id": args.session_id} + fast_mode = not args.no_fast_mode + + print(f"Using websocket session id: {args.session_id}") + print(f"Connecting to: {args.ws_url}") + print("Run ChatMock with `python3 chatmock.py serve --verbose` in another terminal.") + print("This verifies the Codex-aligned path: websocket `response.create` reuse.") + print("HTTP `/v1/responses` is not expected to send `previous_response_id`.") + print() + + with connect(args.ws_url, additional_headers=headers, open_timeout=15) as ws: + first_request = { + "type": "response.create", + "model": args.model, + "store": False, + "input": args.first_prompt, + "fast_mode": fast_mode, + } + ws.send(json.dumps(first_request)) + first_response_id, assistant_item = _receive_turn(ws) + + second_request = { + "type": "response.create", + "model": args.model, + "store": False, + "input": [ + _user_message(args.first_prompt), + assistant_item, + _user_message(args.second_prompt), + ], + "fast_mode": fast_mode, + } + ws.send(json.dumps(second_request)) + second_response_id, _ = _receive_turn(ws) + + print("Turn 1 completed.") + print(f" response id: {first_response_id}") + print("Turn 2 completed.") + print(f" response id: {second_response_id}") + print() + print("Expected in the verbose ChatMock server log for turn 2:") + print(" - outbound websocket payload includes `previous_response_id`") + print(" - `previous_response_id` equals the first response id") + print(" - outbound `input` only contains the new trailing user message") + print() + print("If turn 2 still shows the full conversation in the outbound websocket payload, reuse is not working.") + return 0 + + +if __name__ == "__main__": + try: + raise SystemExit(main()) + except KeyboardInterrupt: + raise SystemExit(130) + except Exception as exc: + print(f"error: {exc}", file=sys.stderr) + raise SystemExit(1) diff --git a/tests/test_fast_mode.py b/tests/test_fast_mode.py new file mode 100644 index 0000000..6892ec5 --- /dev/null +++ b/tests/test_fast_mode.py @@ -0,0 +1,49 @@ +from __future__ import annotations + +import unittest + +from chatmock.fast_mode import parse_optional_bool, resolve_service_tier, supports_priority_service_tier + + +class FastModeTests(unittest.TestCase): + def test_parse_optional_bool(self) -> None: + self.assertTrue(parse_optional_bool(True)) + self.assertTrue(parse_optional_bool("true")) + self.assertFalse(parse_optional_bool(False)) + self.assertFalse(parse_optional_bool("off")) + self.assertIsNone(parse_optional_bool("maybe")) + + def test_priority_allowlist_uses_normalized_model_ids(self) -> None: + self.assertTrue(supports_priority_service_tier("gpt5.4")) + self.assertFalse(supports_priority_service_tier("gpt-5.3-codex")) + + def test_explicit_fast_mode_true_errors_for_unsupported_model(self) -> None: + resolution = resolve_service_tier( + "gpt-5.3-codex", + request_fast_mode=True, + server_fast_mode=False, + ) + self.assertIsNone(resolution.service_tier) + self.assertIsNotNone(resolution.error_message) + + def test_server_default_fast_mode_falls_back_on_unsupported_model(self) -> None: + resolution = resolve_service_tier( + "gpt-5.3-codex", + server_fast_mode=True, + ) + self.assertIsNone(resolution.service_tier) + self.assertIsNone(resolution.error_message) + self.assertIsNotNone(resolution.warning_message) + + def test_request_fast_mode_false_overrides_server_default(self) -> None: + resolution = resolve_service_tier( + "gpt-5.4", + request_fast_mode=False, + server_fast_mode=True, + ) + self.assertIsNone(resolution.service_tier) + self.assertIsNone(resolution.error_message) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_models.py b/tests/test_models.py index 4d690cf..e82b516 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -10,6 +10,7 @@ class ModelRegistryTests(unittest.TestCase): self.assertEqual(normalize_model_name("gpt5"), "gpt-5") self.assertEqual(normalize_model_name("gpt5.4"), "gpt-5.4") self.assertEqual(normalize_model_name("gpt5.4-mini"), "gpt-5.4-mini") + self.assertEqual(normalize_model_name("gpt5.3-codex-spark"), "gpt-5.3-codex-spark") self.assertEqual(normalize_model_name("codex"), "codex-mini-latest") def test_strips_reasoning_suffixes(self) -> None: @@ -28,6 +29,7 @@ class ModelRegistryTests(unittest.TestCase): model_ids = list_public_models(expose_reasoning_models=True) self.assertIn("gpt-5.4", model_ids) self.assertIn("gpt-5.4-mini", model_ids) + self.assertIn("gpt-5.3-codex-spark", model_ids) self.assertIn("gpt-5.4-none", model_ids) self.assertIn("gpt-5.4-mini-xhigh", model_ids) self.assertNotIn("gpt-5.4-mini-none", model_ids) diff --git a/tests/test_routes.py b/tests/test_routes.py index b0d3422..1316bc8 100644 --- a/tests/test_routes.py +++ b/tests/test_routes.py @@ -1,31 +1,56 @@ from __future__ import annotations import json +import socket +import threading +import time import unittest from unittest.mock import patch from chatmock.app import create_app +from chatmock.session import reset_session_state +from websockets.sync.client import connect as ws_connect class FakeUpstream: - def __init__(self, events: list[dict[str, object]], status_code: int = 200) -> None: + def __init__( + self, + events: list[dict[str, object]] | None = None, + *, + status_code: int = 200, + headers: dict[str, str] | None = None, + content: bytes | None = None, + text: str = "", + ) -> None: self._events = events self.status_code = status_code - self.headers = {} - self.content = b"" - self.text = "" + self.headers = headers or {} + self.content = content or b"" + self.text = text def iter_lines(self, decode_unicode: bool = False): - for event in self._events: + for event in self._events or []: payload = f"data: {json.dumps(event)}" yield payload if decode_unicode else payload.encode("utf-8") + def iter_content(self, chunk_size=None): + if self.content: + yield self.content + return + for event in self._events or []: + payload = f"data: {json.dumps(event)}\n\n".encode("utf-8") + yield payload + + def json(self): + return json.loads(self.content.decode("utf-8")) + def close(self) -> None: return None class RouteTests(unittest.TestCase): def setUp(self) -> None: + reset_session_state() self.app = create_app() self.client = self.app.test_client() @@ -36,6 +61,7 @@ class RouteTests(unittest.TestCase): model_ids = [item["id"] for item in body["data"]] self.assertIn("gpt-5.4", model_ids) self.assertIn("gpt-5.4-mini", model_ids) + self.assertIn("gpt-5.3-codex-spark", model_ids) def test_ollama_tags_list(self) -> None: response = self.client.get("/api/tags") @@ -85,6 +111,443 @@ class RouteTests(unittest.TestCase): self.assertEqual(body["message"]["content"], "hello") self.assertEqual(body["model"], "gpt-5.4") + @patch("chatmock.routes_openai.start_upstream_request") + def test_chat_completions_fast_mode_sets_priority_service_tier(self, mock_start) -> None: + mock_start.return_value = ( + FakeUpstream( + [ + {"type": "response.output_text.delta", "delta": "hello"}, + {"type": "response.completed", "response": {"id": "resp-openai"}}, + ] + ), + None, + ) + response = self.client.post( + "/v1/chat/completions", + json={ + "model": "gpt-5.4", + "fast_mode": True, + "messages": [{"role": "user", "content": "hi"}], + }, + ) + self.assertEqual(response.status_code, 200) + self.assertEqual(mock_start.call_args.kwargs["service_tier"], "priority") + + @patch("chatmock.routes_openai.start_upstream_request") + def test_chat_completions_fast_mode_false_overrides_server_default(self, mock_start) -> None: + app = create_app(fast_mode=True) + client = app.test_client() + mock_start.return_value = ( + FakeUpstream( + [ + {"type": "response.output_text.delta", "delta": "hello"}, + {"type": "response.completed", "response": {"id": "resp-openai"}}, + ] + ), + None, + ) + response = client.post( + "/v1/chat/completions", + json={ + "model": "gpt-5.4", + "fast_mode": False, + "messages": [{"role": "user", "content": "hi"}], + }, + ) + self.assertEqual(response.status_code, 200) + self.assertIsNone(mock_start.call_args.kwargs["service_tier"]) + + @patch("chatmock.routes_openai.start_upstream_request") + def test_chat_completions_rejects_unsupported_explicit_fast_mode(self, mock_start) -> None: + response = self.client.post( + "/v1/chat/completions", + json={ + "model": "gpt-5.3-codex", + "fast_mode": True, + "messages": [{"role": "user", "content": "hi"}], + }, + ) + body = response.get_json() + self.assertEqual(response.status_code, 400) + self.assertIn("Fast mode is not supported", body["error"]["message"]) + mock_start.assert_not_called() + + @patch("chatmock.routes_openai.start_upstream_raw_request") + def test_responses_route_returns_completed_response_object(self, mock_start) -> None: + mock_start.return_value = ( + FakeUpstream( + [ + { + "type": "response.created", + "response": {"id": "resp_123", "object": "response", "status": "in_progress"}, + }, + { + "type": "response.completed", + "response": { + "id": "resp_123", + "object": "response", + "status": "completed", + "output": [], + }, + }, + ], + headers={"Content-Type": "text/event-stream"}, + ), + None, + ) + response = self.client.post( + "/v1/responses", + json={"model": "gpt5.4-mini", "input": "hello"}, + ) + body = response.get_json() + self.assertEqual(response.status_code, 200) + self.assertEqual(body["id"], "resp_123") + outbound_payload = mock_start.call_args.args[0] + self.assertEqual(outbound_payload["model"], "gpt-5.4-mini") + self.assertEqual(outbound_payload["store"], False) + self.assertEqual( + outbound_payload["input"], + [{"type": "message", "role": "user", "content": [{"type": "input_text", "text": "hello"}]}], + ) + self.assertEqual(outbound_payload["reasoning"]["effort"], "medium") + self.assertIsInstance(outbound_payload["prompt_cache_key"], str) + + @patch("chatmock.routes_openai.start_upstream_raw_request") + def test_responses_route_does_not_use_previous_response_id_for_http_follow_up(self, mock_start) -> None: + mock_start.side_effect = [ + ( + FakeUpstream( + [ + { + "type": "response.created", + "response": {"id": "resp_1", "object": "response", "status": "in_progress"}, + }, + { + "type": "response.output_item.done", + "item": { + "type": "message", + "role": "assistant", + "id": "msg_1", + "content": [{"type": "output_text", "text": "assistant output"}], + }, + }, + { + "type": "response.completed", + "response": {"id": "resp_1", "object": "response", "status": "completed", "output": []}, + }, + ], + headers={"Content-Type": "text/event-stream"}, + ), + None, + ), + ( + FakeUpstream( + [ + { + "type": "response.created", + "response": {"id": "resp_2", "object": "response", "status": "in_progress"}, + }, + { + "type": "response.completed", + "response": {"id": "resp_2", "object": "response", "status": "completed", "output": []}, + }, + ], + headers={"Content-Type": "text/event-stream"}, + ), + None, + ), + ] + + first = self.client.post("/v1/responses", json={"model": "gpt-5.4", "input": "hello"}) + second = self.client.post( + "/v1/responses", + json={ + "model": "gpt-5.4", + "input": [ + {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "hello"}]}, + {"type": "message", "role": "assistant", "id": "msg_1", "content": [{"type": "output_text", "text": "assistant output"}]}, + {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "second"}]}, + ], + }, + ) + + self.assertEqual(first.status_code, 200) + self.assertEqual(second.status_code, 200) + outbound_payload = mock_start.call_args_list[1].args[0] + self.assertNotIn("previous_response_id", outbound_payload) + self.assertEqual( + outbound_payload["input"], + [ + {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "hello"}]}, + {"type": "message", "role": "assistant", "id": "msg_1", "content": [{"type": "output_text", "text": "assistant output"}]}, + {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "second"}]}, + ], + ) + + @patch("chatmock.routes_openai.start_upstream_raw_request") + def test_responses_route_falls_back_to_full_create_when_non_input_fields_change(self, mock_start) -> None: + mock_start.side_effect = [ + ( + FakeUpstream( + [ + { + "type": "response.created", + "response": {"id": "resp_1", "object": "response", "status": "in_progress"}, + }, + { + "type": "response.completed", + "response": {"id": "resp_1", "object": "response", "status": "completed", "output": []}, + }, + ], + headers={"Content-Type": "text/event-stream"}, + ), + None, + ), + ( + FakeUpstream( + [ + { + "type": "response.created", + "response": {"id": "resp_2", "object": "response", "status": "in_progress"}, + }, + { + "type": "response.completed", + "response": {"id": "resp_2", "object": "response", "status": "completed", "output": []}, + }, + ], + headers={"Content-Type": "text/event-stream"}, + ), + None, + ), + ] + + headers = {"X-Session-Id": "session-fixed"} + first = self.client.post("/v1/responses", json={"model": "gpt-5.4", "input": "hello"}, headers=headers) + second = self.client.post( + "/v1/responses", + json={ + "model": "gpt-5.4", + "instructions": "changed", + "input": [ + {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "hello"}]}, + {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "second"}]}, + ], + }, + headers=headers, + ) + + self.assertEqual(first.status_code, 200) + self.assertEqual(second.status_code, 200) + outbound_payload = mock_start.call_args_list[1].args[0] + self.assertNotIn("previous_response_id", outbound_payload) + self.assertEqual( + outbound_payload["input"], + [ + {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "hello"}]}, + {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "second"}]}, + ], + ) + + @patch("chatmock.routes_openai.start_upstream_raw_request") + def test_responses_route_clears_reuse_state_after_error(self, mock_start) -> None: + mock_start.side_effect = [ + ( + FakeUpstream( + [ + {"type": "response.created", "response": {"id": "resp_1"}}, + {"type": "response.completed", "response": {"id": "resp_1", "output": []}}, + ], + headers={"Content-Type": "text/event-stream"}, + ), + None, + ), + ( + FakeUpstream( + [ + {"type": "response.failed", "response": {"error": {"message": "boom"}}}, + ], + headers={"Content-Type": "text/event-stream"}, + ), + None, + ), + ( + FakeUpstream( + [ + {"type": "response.created", "response": {"id": "resp_3"}}, + {"type": "response.completed", "response": {"id": "resp_3", "output": []}}, + ], + headers={"Content-Type": "text/event-stream"}, + ), + None, + ), + ] + + headers = {"X-Session-Id": "session-fixed"} + first = self.client.post("/v1/responses", json={"model": "gpt-5.4", "input": "hello"}, headers=headers) + second = self.client.post( + "/v1/responses", + json={ + "model": "gpt-5.4", + "input": [ + {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "hello"}]}, + {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "second"}]}, + ], + }, + headers=headers, + ) + third = self.client.post( + "/v1/responses", + json={ + "model": "gpt-5.4", + "input": [ + {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "hello"}]}, + {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "second"}]}, + {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "third"}]}, + ], + }, + headers=headers, + ) + + self.assertEqual(first.status_code, 200) + self.assertEqual(second.status_code, 502) + self.assertEqual(third.status_code, 200) + outbound_payload = mock_start.call_args_list[2].args[0] + self.assertNotIn("previous_response_id", outbound_payload) + self.assertEqual( + outbound_payload["input"], + [ + {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "hello"}]}, + {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "second"}]}, + {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "third"}]}, + ], + ) + + @patch("chatmock.routes_openai.start_upstream_raw_request") + def test_responses_route_stream_passthrough(self, mock_start) -> None: + chunk = b'data: {"type":"response.output_text.delta","delta":"hello"}\n\n' + mock_start.return_value = ( + FakeUpstream( + headers={"Content-Type": "text/event-stream"}, + content=chunk, + ), + None, + ) + response = self.client.post( + "/v1/responses", + json={"model": "gpt-5.4", "input": "hello", "stream": True}, + ) + self.assertEqual(response.status_code, 200) + self.assertIn("response.output_text.delta", response.get_data(as_text=True)) + + @patch("chatmock.routes_openai.start_upstream_raw_request") + def test_responses_route_rejects_unsupported_explicit_priority(self, mock_start) -> None: + response = self.client.post( + "/v1/responses", + json={"model": "gpt-5.3-codex", "input": "hello", "service_tier": "priority"}, + ) + body = response.get_json() + self.assertEqual(response.status_code, 400) + self.assertIn("Fast mode is not supported", body["error"]["message"]) + mock_start.assert_not_called() + + @patch("chatmock.websocket_routes.get_effective_chatgpt_auth", return_value=("token", "acct")) + @patch("chatmock.websocket_routes.connect_upstream_websocket") + def test_responses_websocket_rewrites_response_create(self, mock_connect, _mock_auth) -> None: + class FakeUpstreamWebsocket: + def __init__(self) -> None: + self.sent: list[str] = [] + self._messages = [ + json.dumps({"type": "response.created", "response": {"id": "resp_ws_1"}}), + json.dumps({ + "type": "response.output_item.done", + "item": { + "type": "message", + "role": "assistant", + "id": "msg_1", + "content": [{"type": "output_text", "text": "assistant output"}], + }, + }), + json.dumps({"type": "response.completed", "response": {"id": "resp_ws_1"}}), + json.dumps({"type": "response.created", "response": {"id": "resp_ws_2"}}), + json.dumps({"type": "response.completed", "response": {"id": "resp_ws_2"}}), + ] + + def send(self, message: str) -> None: + self.sent.append(message) + + def recv(self) -> str: + return self._messages.pop(0) + + def close(self) -> None: + return None + + fake_upstream = FakeUpstreamWebsocket() + mock_connect.return_value = fake_upstream + + app = create_app() + + sock = socket.socket() + sock.bind(("127.0.0.1", 0)) + host, port = sock.getsockname() + sock.close() + + server_thread = threading.Thread( + target=app.run, + kwargs={ + "host": host, + "port": port, + "debug": False, + "use_reloader": False, + "threaded": True, + }, + daemon=True, + ) + server_thread.start() + time.sleep(0.5) + + with ws_connect(f"ws://{host}:{port}/v1/responses") as client: + client.send(json.dumps({"type": "response.create", "model": "gpt-5.4", "input": "hello", "fast_mode": True})) + first = json.loads(client.recv()) + assistant = json.loads(client.recv()) + second = json.loads(client.recv()) + client.send( + json.dumps( + { + "type": "response.create", + "model": "gpt-5.4", + "fast_mode": True, + "input": [ + {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "hello"}]}, + {"type": "message", "role": "assistant", "id": "msg_1", "content": [{"type": "output_text", "text": "assistant output"}]}, + {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "second"}]}, + ], + } + ) + ) + third = json.loads(client.recv()) + fourth = json.loads(client.recv()) + + self.assertEqual(first["type"], "response.created") + self.assertEqual(assistant["type"], "response.output_item.done") + self.assertEqual(second["type"], "response.completed") + self.assertEqual(third["type"], "response.created") + self.assertEqual(fourth["type"], "response.completed") + outbound = json.loads(fake_upstream.sent[0]) + self.assertEqual(outbound["model"], "gpt-5.4") + self.assertEqual(outbound["service_tier"], "priority") + self.assertEqual(outbound["type"], "response.create") + self.assertEqual( + outbound["input"], + [{"type": "message", "role": "user", "content": [{"type": "input_text", "text": "hello"}]}], + ) + self.assertIn("prompt_cache_key", outbound) + follow_up = json.loads(fake_upstream.sent[1]) + self.assertEqual(follow_up["previous_response_id"], "resp_ws_1") + self.assertEqual( + follow_up["input"], + [{"type": "message", "role": "user", "content": [{"type": "input_text", "text": "second"}]}], + ) + if __name__ == "__main__": unittest.main() diff --git a/uv.lock b/uv.lock index 1d63a16..fd1ec98 100644 --- a/uv.lock +++ b/uv.lock @@ -109,12 +109,14 @@ dependencies = [ { name = "blinker" }, { name = "certifi" }, { name = "flask" }, + { name = "flask-sock" }, { name = "idna" }, { name = "itsdangerous" }, { name = "jinja2" }, { name = "markupsafe" }, { name = "requests" }, { name = "urllib3" }, + { name = "websockets" }, { name = "werkzeug" }, ] @@ -130,6 +132,7 @@ requires-dist = [ { name = "blinker", specifier = "==1.9.0" }, { name = "certifi", specifier = "==2025.8.3" }, { name = "flask", specifier = "==3.1.1" }, + { name = "flask-sock", specifier = "==0.7.0" }, { name = "idna", specifier = "==3.10" }, { name = "itsdangerous", specifier = "==2.2.0" }, { name = "jinja2", specifier = "==3.1.6" }, @@ -139,6 +142,7 @@ requires-dist = [ { name = "pyside6", marker = "extra == 'gui'", specifier = "==6.9.2" }, { name = "requests", specifier = "==2.32.5" }, { name = "urllib3", specifier = "==2.5.0" }, + { name = "websockets", specifier = "==15.0.1" }, { name = "werkzeug", specifier = "==3.1.3" }, ] provides-extras = ["gui"] @@ -181,6 +185,28 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/3d/68/9d4508e893976286d2ead7f8f571314af6c2037af34853a30fd769c02e9d/flask-3.1.1-py3-none-any.whl", hash = "sha256:07aae2bb5eaf77993ef57e357491839f5fd9f4dc281593a81a9e4d79a24f295c", size = 103305 }, ] +[[package]] +name = "flask-sock" +version = "0.7.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "flask" }, + { name = "simple-websocket" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/8d/8f/c6ab717dc90f4e46d1430335cd4ab13e3629410bb760c0ead6de476760fb/flask-sock-0.7.0.tar.gz", hash = "sha256:e023b578284195a443b8d8bdb4469e6a6acf694b89aeb51315b1a34fcf427b7d", size = 4334 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d8/98/107728ce3f430b5481eb426ccc5e1f7c8ab0bd01eaf231c62a8d528ff721/flask_sock-0.7.0-py3-none-any.whl", hash = "sha256:caac4d679392aaf010d02fabcf73d52019f5bdaf1c9c131ec5a428cb3491204a", size = 3982 }, +] + +[[package]] +name = "h11" +version = "0.16.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/01/ee/02a2c011bdab74c6fb3c75474d40b3052059d95df7e73351460c8588d963/h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1", size = 101250 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515 }, +] + [[package]] name = "idna" version = "3.10" @@ -507,6 +533,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/48/64/562a527fc55fbf41fa70dae735929988215505cb5ec0809fb0aef921d4a0/shiboken6-6.9.2-cp39-abi3-win_arm64.whl", hash = "sha256:c5b827797b3d89d9b9a3753371ff533fcd4afc4531ca51a7c696952132098054", size = 1708948 }, ] +[[package]] +name = "simple-websocket" +version = "1.1.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "wsproto" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b0/d4/bfa032f961103eba93de583b161f0e6a5b63cebb8f2c7d0c6e6efe1e3d2e/simple_websocket-1.1.0.tar.gz", hash = "sha256:7939234e7aa067c534abdab3a9ed933ec9ce4691b0713c78acb195560aa52ae4", size = 17300 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/52/59/0782e51887ac6b07ffd1570e0364cf901ebc36345fea669969d2084baebb/simple_websocket-1.1.0-py3-none-any.whl", hash = "sha256:4af6069630a38ed6c561010f0e11a5bc0d4ca569b36306eb257cd9a192497c8c", size = 13842 }, +] + [[package]] name = "urllib3" version = "2.5.0" @@ -516,6 +554,48 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a7/c2/fe1e52489ae3122415c51f387e221dd0773709bad6c6cdaa599e8a2c5185/urllib3-2.5.0-py3-none-any.whl", hash = "sha256:e6b01673c0fa6a13e374b50871808eb3bf7046c4b125b216f6bf1cc604cff0dc", size = 129795 }, ] +[[package]] +name = "websockets" +version = "15.0.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/21/e6/26d09fab466b7ca9c7737474c52be4f76a40301b08362eb2dbc19dcc16c1/websockets-15.0.1.tar.gz", hash = "sha256:82544de02076bafba038ce055ee6412d68da13ab47f0c60cab827346de828dee", size = 177016 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9f/32/18fcd5919c293a398db67443acd33fde142f283853076049824fc58e6f75/websockets-15.0.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:823c248b690b2fd9303ba00c4f66cd5e2d8c3ba4aa968b2779be9532a4dad431", size = 175423 }, + { url = "https://files.pythonhosted.org/packages/76/70/ba1ad96b07869275ef42e2ce21f07a5b0148936688c2baf7e4a1f60d5058/websockets-15.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678999709e68425ae2593acf2e3ebcbcf2e69885a5ee78f9eb80e6e371f1bf57", size = 173082 }, + { url = "https://files.pythonhosted.org/packages/86/f2/10b55821dd40eb696ce4704a87d57774696f9451108cff0d2824c97e0f97/websockets-15.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d50fd1ee42388dcfb2b3676132c78116490976f1300da28eb629272d5d93e905", size = 173330 }, + { url = "https://files.pythonhosted.org/packages/a5/90/1c37ae8b8a113d3daf1065222b6af61cc44102da95388ac0018fcb7d93d9/websockets-15.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d99e5546bf73dbad5bf3547174cd6cb8ba7273062a23808ffea025ecb1cf8562", size = 182878 }, + { url = "https://files.pythonhosted.org/packages/8e/8d/96e8e288b2a41dffafb78e8904ea7367ee4f891dafc2ab8d87e2124cb3d3/websockets-15.0.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:66dd88c918e3287efc22409d426c8f729688d89a0c587c88971a0faa2c2f3792", size = 181883 }, + { url = "https://files.pythonhosted.org/packages/93/1f/5d6dbf551766308f6f50f8baf8e9860be6182911e8106da7a7f73785f4c4/websockets-15.0.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8dd8327c795b3e3f219760fa603dcae1dcc148172290a8ab15158cf85a953413", size = 182252 }, + { url = "https://files.pythonhosted.org/packages/d4/78/2d4fed9123e6620cbf1706c0de8a1632e1a28e7774d94346d7de1bba2ca3/websockets-15.0.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8fdc51055e6ff4adeb88d58a11042ec9a5eae317a0a53d12c062c8a8865909e8", size = 182521 }, + { url = "https://files.pythonhosted.org/packages/e7/3b/66d4c1b444dd1a9823c4a81f50231b921bab54eee2f69e70319b4e21f1ca/websockets-15.0.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:693f0192126df6c2327cce3baa7c06f2a117575e32ab2308f7f8216c29d9e2e3", size = 181958 }, + { url = "https://files.pythonhosted.org/packages/08/ff/e9eed2ee5fed6f76fdd6032ca5cd38c57ca9661430bb3d5fb2872dc8703c/websockets-15.0.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:54479983bd5fb469c38f2f5c7e3a24f9a4e70594cd68cd1fa6b9340dadaff7cf", size = 181918 }, + { url = "https://files.pythonhosted.org/packages/d8/75/994634a49b7e12532be6a42103597b71098fd25900f7437d6055ed39930a/websockets-15.0.1-cp311-cp311-win32.whl", hash = "sha256:16b6c1b3e57799b9d38427dda63edcbe4926352c47cf88588c0be4ace18dac85", size = 176388 }, + { url = "https://files.pythonhosted.org/packages/98/93/e36c73f78400a65f5e236cd376713c34182e6663f6889cd45a4a04d8f203/websockets-15.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:27ccee0071a0e75d22cb35849b1db43f2ecd3e161041ac1ee9d2352ddf72f065", size = 176828 }, + { url = "https://files.pythonhosted.org/packages/51/6b/4545a0d843594f5d0771e86463606a3988b5a09ca5123136f8a76580dd63/websockets-15.0.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:3e90baa811a5d73f3ca0bcbf32064d663ed81318ab225ee4f427ad4e26e5aff3", size = 175437 }, + { url = "https://files.pythonhosted.org/packages/f4/71/809a0f5f6a06522af902e0f2ea2757f71ead94610010cf570ab5c98e99ed/websockets-15.0.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:592f1a9fe869c778694f0aa806ba0374e97648ab57936f092fd9d87f8bc03665", size = 173096 }, + { url = "https://files.pythonhosted.org/packages/3d/69/1a681dd6f02180916f116894181eab8b2e25b31e484c5d0eae637ec01f7c/websockets-15.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0701bc3cfcb9164d04a14b149fd74be7347a530ad3bbf15ab2c678a2cd3dd9a2", size = 173332 }, + { url = "https://files.pythonhosted.org/packages/a6/02/0073b3952f5bce97eafbb35757f8d0d54812b6174ed8dd952aa08429bcc3/websockets-15.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e8b56bdcdb4505c8078cb6c7157d9811a85790f2f2b3632c7d1462ab5783d215", size = 183152 }, + { url = "https://files.pythonhosted.org/packages/74/45/c205c8480eafd114b428284840da0b1be9ffd0e4f87338dc95dc6ff961a1/websockets-15.0.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0af68c55afbd5f07986df82831c7bff04846928ea8d1fd7f30052638788bc9b5", size = 182096 }, + { url = "https://files.pythonhosted.org/packages/14/8f/aa61f528fba38578ec553c145857a181384c72b98156f858ca5c8e82d9d3/websockets-15.0.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:64dee438fed052b52e4f98f76c5790513235efaa1ef7f3f2192c392cd7c91b65", size = 182523 }, + { url = "https://files.pythonhosted.org/packages/ec/6d/0267396610add5bc0d0d3e77f546d4cd287200804fe02323797de77dbce9/websockets-15.0.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d5f6b181bb38171a8ad1d6aa58a67a6aa9d4b38d0f8c5f496b9e42561dfc62fe", size = 182790 }, + { url = "https://files.pythonhosted.org/packages/02/05/c68c5adbf679cf610ae2f74a9b871ae84564462955d991178f95a1ddb7dd/websockets-15.0.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:5d54b09eba2bada6011aea5375542a157637b91029687eb4fdb2dab11059c1b4", size = 182165 }, + { url = "https://files.pythonhosted.org/packages/29/93/bb672df7b2f5faac89761cb5fa34f5cec45a4026c383a4b5761c6cea5c16/websockets-15.0.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3be571a8b5afed347da347bfcf27ba12b069d9d7f42cb8c7028b5e98bbb12597", size = 182160 }, + { url = "https://files.pythonhosted.org/packages/ff/83/de1f7709376dc3ca9b7eeb4b9a07b4526b14876b6d372a4dc62312bebee0/websockets-15.0.1-cp312-cp312-win32.whl", hash = "sha256:c338ffa0520bdb12fbc527265235639fb76e7bc7faafbb93f6ba80d9c06578a9", size = 176395 }, + { url = "https://files.pythonhosted.org/packages/7d/71/abf2ebc3bbfa40f391ce1428c7168fb20582d0ff57019b69ea20fa698043/websockets-15.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:fcd5cf9e305d7b8338754470cf69cf81f420459dbae8a3b40cee57417f4614a7", size = 176841 }, + { url = "https://files.pythonhosted.org/packages/cb/9f/51f0cf64471a9d2b4d0fc6c534f323b664e7095640c34562f5182e5a7195/websockets-15.0.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ee443ef070bb3b6ed74514f5efaa37a252af57c90eb33b956d35c8e9c10a1931", size = 175440 }, + { url = "https://files.pythonhosted.org/packages/8a/05/aa116ec9943c718905997412c5989f7ed671bc0188ee2ba89520e8765d7b/websockets-15.0.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5a939de6b7b4e18ca683218320fc67ea886038265fd1ed30173f5ce3f8e85675", size = 173098 }, + { url = "https://files.pythonhosted.org/packages/ff/0b/33cef55ff24f2d92924923c99926dcce78e7bd922d649467f0eda8368923/websockets-15.0.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:746ee8dba912cd6fc889a8147168991d50ed70447bf18bcda7039f7d2e3d9151", size = 173329 }, + { url = "https://files.pythonhosted.org/packages/31/1d/063b25dcc01faa8fada1469bdf769de3768b7044eac9d41f734fd7b6ad6d/websockets-15.0.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:595b6c3969023ecf9041b2936ac3827e4623bfa3ccf007575f04c5a6aa318c22", size = 183111 }, + { url = "https://files.pythonhosted.org/packages/93/53/9a87ee494a51bf63e4ec9241c1ccc4f7c2f45fff85d5bde2ff74fcb68b9e/websockets-15.0.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3c714d2fc58b5ca3e285461a4cc0c9a66bd0e24c5da9911e30158286c9b5be7f", size = 182054 }, + { url = "https://files.pythonhosted.org/packages/ff/b2/83a6ddf56cdcbad4e3d841fcc55d6ba7d19aeb89c50f24dd7e859ec0805f/websockets-15.0.1-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0f3c1e2ab208db911594ae5b4f79addeb3501604a165019dd221c0bdcabe4db8", size = 182496 }, + { url = "https://files.pythonhosted.org/packages/98/41/e7038944ed0abf34c45aa4635ba28136f06052e08fc2168520bb8b25149f/websockets-15.0.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:229cf1d3ca6c1804400b0a9790dc66528e08a6a1feec0d5040e8b9eb14422375", size = 182829 }, + { url = "https://files.pythonhosted.org/packages/e0/17/de15b6158680c7623c6ef0db361da965ab25d813ae54fcfeae2e5b9ef910/websockets-15.0.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:756c56e867a90fb00177d530dca4b097dd753cde348448a1012ed6c5131f8b7d", size = 182217 }, + { url = "https://files.pythonhosted.org/packages/33/2b/1f168cb6041853eef0362fb9554c3824367c5560cbdaad89ac40f8c2edfc/websockets-15.0.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:558d023b3df0bffe50a04e710bc87742de35060580a293c2a984299ed83bc4e4", size = 182195 }, + { url = "https://files.pythonhosted.org/packages/86/eb/20b6cdf273913d0ad05a6a14aed4b9a85591c18a987a3d47f20fa13dcc47/websockets-15.0.1-cp313-cp313-win32.whl", hash = "sha256:ba9e56e8ceeeedb2e080147ba85ffcd5cd0711b89576b83784d8605a7df455fa", size = 176393 }, + { url = "https://files.pythonhosted.org/packages/1b/6c/c65773d6cab416a64d191d6ee8a8b1c68a09970ea6909d16965d26bfed1e/websockets-15.0.1-cp313-cp313-win_amd64.whl", hash = "sha256:e09473f095a819042ecb2ab9465aee615bd9c2028e4ef7d933600a8401c79561", size = 176837 }, + { url = "https://files.pythonhosted.org/packages/fa/a8/5b41e0da817d64113292ab1f8247140aac61cbf6cfd085d6a0fa77f4984f/websockets-15.0.1-py3-none-any.whl", hash = "sha256:f7a866fbc1e97b5c617ee4116daaa09b722101d4a3c170c787450ba409f9736f", size = 169743 }, +] + [[package]] name = "werkzeug" version = "3.1.3" @@ -527,3 +607,15 @@ sdist = { url = "https://files.pythonhosted.org/packages/9f/69/83029f1f6300c5fb2 wheels = [ { url = "https://files.pythonhosted.org/packages/52/24/ab44c871b0f07f491e5d2ad12c9bd7358e527510618cb1b803a88e986db1/werkzeug-3.1.3-py3-none-any.whl", hash = "sha256:54b78bf3716d19a65be4fceccc0d1d7b89e608834989dfae50ea87564639213e", size = 224498 }, ] + +[[package]] +name = "wsproto" +version = "1.3.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "h11" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c7/79/12135bdf8b9c9367b8701c2c19a14c913c120b882d50b014ca0d38083c2c/wsproto-1.3.2.tar.gz", hash = "sha256:b86885dcf294e15204919950f666e06ffc6c7c114ca900b060d6e16293528294", size = 50116 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a4/f5/10b68b7b1544245097b2a1b8238f66f2fc6dcaeb24ba5d917f52bd2eed4f/wsproto-1.3.2-py3-none-any.whl", hash = "sha256:61eea322cdf56e8cc904bd3ad7573359a242ba65688716b0710a5eb12beab584", size = 24405 }, +]