diff --git a/.README.md.swp b/.README.md.swp new file mode 100644 index 0000000..a40e22d Binary files /dev/null and b/.README.md.swp differ diff --git a/DOCKER.md b/DOCKER.md index 2a705b5..6eb8074 100644 --- a/DOCKER.md +++ b/DOCKER.md @@ -22,7 +22,7 @@ Set options in `.env` or pass environment variables: - `PORT`: Container listening port (default 8000) - `VERBOSE`: `true|false` to enable request/stream logs -- `CHATGPT_LOCAL_REASONING_EFFORT`: minimal|low|medium|high +- `CHATGPT_LOCAL_REASONING_EFFORT`: minimal|low|medium|high|xhigh - `CHATGPT_LOCAL_REASONING_SUMMARY`: auto|concise|detailed|none - `CHATGPT_LOCAL_REASONING_COMPAT`: legacy|o3|think-tags|current - `CHATGPT_LOCAL_DEBUG_MODEL`: force model override (e.g., `gpt-5`) diff --git a/README.md b/README.md index 960e850..fce611b 100644 --- a/README.md +++ b/README.md @@ -115,6 +115,7 @@ curl http://127.0.0.1:8000/v1/chat/completions \ # Supported models - `gpt-5` - `gpt-5.1` +- `gpt-5.2` - `gpt-5-codex` - `gpt-5.1-codex` - `gpt-5.1-codex-max` @@ -126,7 +127,8 @@ curl http://127.0.0.1:8000/v1/chat/completions \ ### Thinking effort - `--reasoning-effort` (choice of minimal,low,medium,high,xhigh)
-GPT-5 has a configurable amount of "effort" it can put into thinking, which may cause it to take more time for a response to return, but may overall give a smarter answer. Applying this parameter after `serve` forces the server to use this reasoning effort by default, unless overrided by the API request with a different effort set. The default reasoning effort without setting this parameter is `medium`. The `gpt-5.1` family (including codex) supports `low`, `medium`, and `high` while `gpt-5.1-codex-max` adds `xhigh`; neither offers a `minimal` variant. +GPT-5 has a configurable amount of "effort" it can put into thinking, which may cause it to take more time for a response to return, but may overall give a smarter answer. Applying this parameter after `serve` forces the server to use this reasoning effort by default, unless overrided by the API request with a different effort set. The default reasoning effort without setting this parameter is `medium`.
+ The `gpt-5.1` family (including codex) supports `low`, `medium`, and `high` while `gpt-5.1-codex-max` adds `xhigh`. `gpt-5.2` supports `low`, `medium`, `high`, and `xhigh`. ### Thinking summaries @@ -159,7 +161,7 @@ You can enable it by starting the server with this parameter, which will allow O If your preferred app doesn’t support selecting reasoning effort, or you just want a simpler approach, this parameter exposes each reasoning level as a separate, queryable model. Each reasoning level also appears individually under ⁠/v1/models, so model pickers in your favorite chat apps will list all reasoning options as distinct models you can switch between. ## Notes -If you wish to have the fastest responses, I'd recommend setting `--reasoning-effort` to minimal, and `--reasoning-summary` to none.
+If you wish to have the fastest responses, I'd recommend setting `--reasoning-effort` to low, and `--reasoning-summary` to none.
All parameters and choices can be seen by sending `python chatmock.py serve --h`
The context size of this route is also larger than what you get access to in the regular ChatGPT app.
@@ -172,4 +174,3 @@ When the model returns a thinking summary, the model will send back thinking tag - diff --git a/chatmock/cli.py b/chatmock/cli.py index 2ebaf85..d9c1a5e 100644 --- a/chatmock/cli.py +++ b/chatmock/cli.py @@ -335,7 +335,7 @@ def main() -> None: action="store_true", default=(os.getenv("CHATGPT_LOCAL_EXPOSE_REASONING_MODELS") or "").strip().lower() in ("1", "true", "yes", "on"), help=( - "Expose gpt-5 reasoning effort variants (minimal|low|medium|high|xhigh where supported) " + "Expose GPT-5 family reasoning effort variants (minimal|low|medium|high|xhigh where supported) " "as separate models from /v1/models. This allows choosing effort via model selection in compatible UIs." ), ) diff --git a/chatmock/reasoning.py b/chatmock/reasoning.py index 79566cb..ac6fa96 100644 --- a/chatmock/reasoning.py +++ b/chatmock/reasoning.py @@ -11,6 +11,8 @@ def allowed_efforts_for_model(model: str | None) -> Set[str]: if not base: return DEFAULT_REASONING_EFFORTS normalized = base.split(":", 1)[0] + if normalized.startswith("gpt-5.2"): + return {"none", "low", "medium", "high", "xhigh"} if normalized.startswith("gpt-5.1-codex-max"): return {"low", "medium", "high", "xhigh"} if normalized.startswith("gpt-5.1"): diff --git a/chatmock/routes_ollama.py b/chatmock/routes_ollama.py index 1fd8699..a2b9bec 100644 --- a/chatmock/routes_ollama.py +++ b/chatmock/routes_ollama.py @@ -96,6 +96,7 @@ def ollama_tags() -> Response: model_ids = [ "gpt-5", "gpt-5.1", + "gpt-5.2", "gpt-5-codex", "gpt-5.1-codex", "gpt-5.1-codex-max", @@ -112,6 +113,11 @@ def ollama_tags() -> Response: "gpt-5.1-high", "gpt-5.1-medium", "gpt-5.1-low", + "gpt-5.2-xhigh", + "gpt-5.2-high", + "gpt-5.2-medium", + "gpt-5.2-low", + "gpt-5.2-none", "gpt-5-codex-high", "gpt-5-codex-medium", "gpt-5-codex-low", diff --git a/chatmock/routes_openai.py b/chatmock/routes_openai.py index 5d97bed..1e737b2 100644 --- a/chatmock/routes_openai.py +++ b/chatmock/routes_openai.py @@ -534,6 +534,7 @@ def list_models() -> Response: model_groups = [ ("gpt-5", ["high", "medium", "low", "minimal"]), ("gpt-5.1", ["high", "medium", "low"]), + ("gpt-5.2", ["xhigh", "high", "medium", "low", "none"]), ("gpt-5-codex", ["high", "medium", "low"]), ("gpt-5.1-codex", ["high", "medium", "low"]), ("gpt-5.1-codex-max", ["xhigh", "high", "medium", "low"]), diff --git a/chatmock/upstream.py b/chatmock/upstream.py index 8f377a0..52261f1 100644 --- a/chatmock/upstream.py +++ b/chatmock/upstream.py @@ -42,6 +42,9 @@ def normalize_model_name(name: str | None, debug_model: str | None = None) -> st "gpt-5-latest": "gpt-5", "gpt-5": "gpt-5", "gpt-5.1": "gpt-5.1", + "gpt5.2": "gpt-5.2", + "gpt-5.2": "gpt-5.2", + "gpt-5.2-latest": "gpt-5.2", "gpt5-codex": "gpt-5-codex", "gpt-5-codex": "gpt-5-codex", "gpt-5-codex-latest": "gpt-5-codex",