diff --git a/README.md b/README.md index 2681874..3ac76f7 100644 --- a/README.md +++ b/README.md @@ -116,6 +116,7 @@ curl http://127.0.0.1:8000/v1/chat/completions \ - `gpt-5` - `gpt-5.1` - `gpt-5.2` +- `gpt-5.4` - `gpt-5-codex` - `gpt-5.2-codex` - `gpt-5.3-codex` @@ -128,9 +129,9 @@ curl http://127.0.0.1:8000/v1/chat/completions \ ### Thinking effort -- `--reasoning-effort` (choice of minimal,low,medium,high,xhigh)
+- `--reasoning-effort` (choice of none,minimal,low,medium,high,xhigh)
GPT-5 has a configurable amount of "effort" it can put into thinking, which may cause it to take more time for a response to return, but may overall give a smarter answer. Applying this parameter after `serve` forces the server to use this reasoning effort by default, unless overrided by the API request with a different effort set. The default reasoning effort without setting this parameter is `medium`.
- The `gpt-5.1` family (including codex) supports `low`, `medium`, and `high` while `gpt-5.1-codex-max` adds `xhigh`. The `gpt-5.2` and `gpt-5.3` families (including codex) support `low`, `medium`, `high`, and `xhigh`. + The `gpt-5.1` family (including codex) supports `low`, `medium`, and `high` while `gpt-5.1-codex-max` adds `xhigh`. The `gpt-5.2` and `gpt-5.3` families (including codex) support `low`, `medium`, `high`, and `xhigh`. GPT-5.4 supports `none`, `low`, `medium`, `high`, and `xhigh`. ### Thinking summaries diff --git a/chatmock/cli.py b/chatmock/cli.py index d9c1a5e..4a64976 100644 --- a/chatmock/cli.py +++ b/chatmock/cli.py @@ -311,7 +311,7 @@ def main() -> None: ) p_serve.add_argument( "--reasoning-effort", - choices=["minimal", "low", "medium", "high", "xhigh"], + choices=["none", "minimal", "low", "medium", "high", "xhigh"], default=os.getenv("CHATGPT_LOCAL_REASONING_EFFORT", "medium").lower(), help="Reasoning effort level for Responses API (default: medium)", ) @@ -335,7 +335,7 @@ def main() -> None: action="store_true", default=(os.getenv("CHATGPT_LOCAL_EXPOSE_REASONING_MODELS") or "").strip().lower() in ("1", "true", "yes", "on"), help=( - "Expose GPT-5 family reasoning effort variants (minimal|low|medium|high|xhigh where supported) " + "Expose GPT-5 family reasoning effort variants (none|minimal|low|medium|high|xhigh where supported) " "as separate models from /v1/models. This allows choosing effort via model selection in compatible UIs." ), ) diff --git a/chatmock/reasoning.py b/chatmock/reasoning.py index a6df9c9..3a4fb6d 100644 --- a/chatmock/reasoning.py +++ b/chatmock/reasoning.py @@ -3,7 +3,7 @@ from __future__ import annotations from typing import Any, Dict, Set -DEFAULT_REASONING_EFFORTS: Set[str] = {"minimal", "low", "medium", "high", "xhigh"} +DEFAULT_REASONING_EFFORTS: Set[str] = {"minimal", "low", "medium", "high", "xhigh", "none"} def allowed_efforts_for_model(model: str | None) -> Set[str]: @@ -11,6 +11,8 @@ def allowed_efforts_for_model(model: str | None) -> Set[str]: if not base: return DEFAULT_REASONING_EFFORTS normalized = base.split(":", 1)[0] + if normalized.startswith("gpt-5.4"): + return {"none", "low", "medium", "high", "xhigh"} if normalized.startswith("gpt-5.3"): return {"low", "medium", "high", "xhigh"} if normalized.startswith("gpt-5.2"): @@ -103,7 +105,7 @@ def extract_reasoning_from_model_name(model: str | None) -> Dict[str, Any] | Non s = model.strip().lower() if not s: return None - efforts = {"minimal", "low", "medium", "high", "xhigh"} + efforts = {"minimal", "low", "medium", "high", "xhigh", "none"} if ":" in s: maybe = s.rsplit(":", 1)[-1].strip() @@ -113,6 +115,8 @@ def extract_reasoning_from_model_name(model: str | None) -> Dict[str, Any] | Non for sep in ("-", "_"): if s.endswith(sep + "minimal"): return {"effort": "minimal"} + if s.endswith(sep + "none"): + return {"effort": "none"} if s.endswith(sep + "low"): return {"effort": "low"} if s.endswith(sep + "medium"): diff --git a/chatmock/routes_ollama.py b/chatmock/routes_ollama.py index 6974f24..9f27b90 100644 --- a/chatmock/routes_ollama.py +++ b/chatmock/routes_ollama.py @@ -97,6 +97,7 @@ def ollama_tags() -> Response: "gpt-5", "gpt-5.1", "gpt-5.2", + "gpt-5.4", "gpt-5.3-codex", "gpt-5-codex", "gpt-5.2-codex", @@ -115,6 +116,11 @@ def ollama_tags() -> Response: "gpt-5.1-high", "gpt-5.1-medium", "gpt-5.1-low", + "gpt-5.4-xhigh", + "gpt-5.4-high", + "gpt-5.4-medium", + "gpt-5.4-low", + "gpt-5.4-none", "gpt-5.2-xhigh", "gpt-5.2-high", "gpt-5.2-medium", diff --git a/chatmock/routes_openai.py b/chatmock/routes_openai.py index 219a15f..42f35f4 100644 --- a/chatmock/routes_openai.py +++ b/chatmock/routes_openai.py @@ -535,6 +535,7 @@ def list_models() -> Response: ("gpt-5", ["high", "medium", "low", "minimal"]), ("gpt-5.1", ["high", "medium", "low"]), ("gpt-5.2", ["xhigh", "high", "medium", "low"]), + ("gpt-5.4", ["xhigh", "high", "medium", "low", "none"]), ("gpt-5.3-codex", ["xhigh", "high", "medium", "low"]), ("gpt-5-codex", ["high", "medium", "low"]), ("gpt-5.2-codex", ["xhigh", "high", "medium", "low"]), diff --git a/chatmock/upstream.py b/chatmock/upstream.py index 6dc4156..fdc9f2d 100644 --- a/chatmock/upstream.py +++ b/chatmock/upstream.py @@ -42,6 +42,9 @@ def normalize_model_name(name: str | None, debug_model: str | None = None) -> st "gpt-5-latest": "gpt-5", "gpt-5": "gpt-5", "gpt-5.1": "gpt-5.1", + "gpt5.4": "gpt-5.4", + "gpt-5.4": "gpt-5.4", + "gpt-5.4-latest": "gpt-5.4", "gpt5.2": "gpt-5.2", "gpt-5.2": "gpt-5.2", "gpt-5.2-latest": "gpt-5.2",