Add support for GPT-5.2
This commit is contained in:
BIN
.README.md.swp
Normal file
BIN
.README.md.swp
Normal file
Binary file not shown.
@@ -22,7 +22,7 @@
|
|||||||
Set options in `.env` or pass environment variables:
|
Set options in `.env` or pass environment variables:
|
||||||
- `PORT`: Container listening port (default 8000)
|
- `PORT`: Container listening port (default 8000)
|
||||||
- `VERBOSE`: `true|false` to enable request/stream logs
|
- `VERBOSE`: `true|false` to enable request/stream logs
|
||||||
- `CHATGPT_LOCAL_REASONING_EFFORT`: minimal|low|medium|high
|
- `CHATGPT_LOCAL_REASONING_EFFORT`: minimal|low|medium|high|xhigh
|
||||||
- `CHATGPT_LOCAL_REASONING_SUMMARY`: auto|concise|detailed|none
|
- `CHATGPT_LOCAL_REASONING_SUMMARY`: auto|concise|detailed|none
|
||||||
- `CHATGPT_LOCAL_REASONING_COMPAT`: legacy|o3|think-tags|current
|
- `CHATGPT_LOCAL_REASONING_COMPAT`: legacy|o3|think-tags|current
|
||||||
- `CHATGPT_LOCAL_DEBUG_MODEL`: force model override (e.g., `gpt-5`)
|
- `CHATGPT_LOCAL_DEBUG_MODEL`: force model override (e.g., `gpt-5`)
|
||||||
|
|||||||
@@ -115,6 +115,7 @@ curl http://127.0.0.1:8000/v1/chat/completions \
|
|||||||
# Supported models
|
# Supported models
|
||||||
- `gpt-5`
|
- `gpt-5`
|
||||||
- `gpt-5.1`
|
- `gpt-5.1`
|
||||||
|
- `gpt-5.2`
|
||||||
- `gpt-5-codex`
|
- `gpt-5-codex`
|
||||||
- `gpt-5.1-codex`
|
- `gpt-5.1-codex`
|
||||||
- `gpt-5.1-codex-max`
|
- `gpt-5.1-codex-max`
|
||||||
@@ -126,7 +127,8 @@ curl http://127.0.0.1:8000/v1/chat/completions \
|
|||||||
### Thinking effort
|
### Thinking effort
|
||||||
|
|
||||||
- `--reasoning-effort` (choice of minimal,low,medium,high,xhigh)<br>
|
- `--reasoning-effort` (choice of minimal,low,medium,high,xhigh)<br>
|
||||||
GPT-5 has a configurable amount of "effort" it can put into thinking, which may cause it to take more time for a response to return, but may overall give a smarter answer. Applying this parameter after `serve` forces the server to use this reasoning effort by default, unless overrided by the API request with a different effort set. The default reasoning effort without setting this parameter is `medium`. The `gpt-5.1` family (including codex) supports `low`, `medium`, and `high` while `gpt-5.1-codex-max` adds `xhigh`; neither offers a `minimal` variant.
|
GPT-5 has a configurable amount of "effort" it can put into thinking, which may cause it to take more time for a response to return, but may overall give a smarter answer. Applying this parameter after `serve` forces the server to use this reasoning effort by default, unless overrided by the API request with a different effort set. The default reasoning effort without setting this parameter is `medium`.<br>
|
||||||
|
The `gpt-5.1` family (including codex) supports `low`, `medium`, and `high` while `gpt-5.1-codex-max` adds `xhigh`. `gpt-5.2` supports `low`, `medium`, `high`, and `xhigh`.
|
||||||
|
|
||||||
### Thinking summaries
|
### Thinking summaries
|
||||||
|
|
||||||
@@ -159,7 +161,7 @@ You can enable it by starting the server with this parameter, which will allow O
|
|||||||
If your preferred app doesn’t support selecting reasoning effort, or you just want a simpler approach, this parameter exposes each reasoning level as a separate, queryable model. Each reasoning level also appears individually under /v1/models, so model pickers in your favorite chat apps will list all reasoning options as distinct models you can switch between.
|
If your preferred app doesn’t support selecting reasoning effort, or you just want a simpler approach, this parameter exposes each reasoning level as a separate, queryable model. Each reasoning level also appears individually under /v1/models, so model pickers in your favorite chat apps will list all reasoning options as distinct models you can switch between.
|
||||||
|
|
||||||
## Notes
|
## Notes
|
||||||
If you wish to have the fastest responses, I'd recommend setting `--reasoning-effort` to minimal, and `--reasoning-summary` to none. <br>
|
If you wish to have the fastest responses, I'd recommend setting `--reasoning-effort` to low, and `--reasoning-summary` to none. <br>
|
||||||
All parameters and choices can be seen by sending `python chatmock.py serve --h`<br>
|
All parameters and choices can be seen by sending `python chatmock.py serve --h`<br>
|
||||||
The context size of this route is also larger than what you get access to in the regular ChatGPT app.<br>
|
The context size of this route is also larger than what you get access to in the regular ChatGPT app.<br>
|
||||||
|
|
||||||
@@ -172,4 +174,3 @@ When the model returns a thinking summary, the model will send back thinking tag
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -335,7 +335,7 @@ def main() -> None:
|
|||||||
action="store_true",
|
action="store_true",
|
||||||
default=(os.getenv("CHATGPT_LOCAL_EXPOSE_REASONING_MODELS") or "").strip().lower() in ("1", "true", "yes", "on"),
|
default=(os.getenv("CHATGPT_LOCAL_EXPOSE_REASONING_MODELS") or "").strip().lower() in ("1", "true", "yes", "on"),
|
||||||
help=(
|
help=(
|
||||||
"Expose gpt-5 reasoning effort variants (minimal|low|medium|high|xhigh where supported) "
|
"Expose GPT-5 family reasoning effort variants (minimal|low|medium|high|xhigh where supported) "
|
||||||
"as separate models from /v1/models. This allows choosing effort via model selection in compatible UIs."
|
"as separate models from /v1/models. This allows choosing effort via model selection in compatible UIs."
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -11,6 +11,8 @@ def allowed_efforts_for_model(model: str | None) -> Set[str]:
|
|||||||
if not base:
|
if not base:
|
||||||
return DEFAULT_REASONING_EFFORTS
|
return DEFAULT_REASONING_EFFORTS
|
||||||
normalized = base.split(":", 1)[0]
|
normalized = base.split(":", 1)[0]
|
||||||
|
if normalized.startswith("gpt-5.2"):
|
||||||
|
return {"none", "low", "medium", "high", "xhigh"}
|
||||||
if normalized.startswith("gpt-5.1-codex-max"):
|
if normalized.startswith("gpt-5.1-codex-max"):
|
||||||
return {"low", "medium", "high", "xhigh"}
|
return {"low", "medium", "high", "xhigh"}
|
||||||
if normalized.startswith("gpt-5.1"):
|
if normalized.startswith("gpt-5.1"):
|
||||||
|
|||||||
@@ -96,6 +96,7 @@ def ollama_tags() -> Response:
|
|||||||
model_ids = [
|
model_ids = [
|
||||||
"gpt-5",
|
"gpt-5",
|
||||||
"gpt-5.1",
|
"gpt-5.1",
|
||||||
|
"gpt-5.2",
|
||||||
"gpt-5-codex",
|
"gpt-5-codex",
|
||||||
"gpt-5.1-codex",
|
"gpt-5.1-codex",
|
||||||
"gpt-5.1-codex-max",
|
"gpt-5.1-codex-max",
|
||||||
@@ -112,6 +113,11 @@ def ollama_tags() -> Response:
|
|||||||
"gpt-5.1-high",
|
"gpt-5.1-high",
|
||||||
"gpt-5.1-medium",
|
"gpt-5.1-medium",
|
||||||
"gpt-5.1-low",
|
"gpt-5.1-low",
|
||||||
|
"gpt-5.2-xhigh",
|
||||||
|
"gpt-5.2-high",
|
||||||
|
"gpt-5.2-medium",
|
||||||
|
"gpt-5.2-low",
|
||||||
|
"gpt-5.2-none",
|
||||||
"gpt-5-codex-high",
|
"gpt-5-codex-high",
|
||||||
"gpt-5-codex-medium",
|
"gpt-5-codex-medium",
|
||||||
"gpt-5-codex-low",
|
"gpt-5-codex-low",
|
||||||
|
|||||||
@@ -534,6 +534,7 @@ def list_models() -> Response:
|
|||||||
model_groups = [
|
model_groups = [
|
||||||
("gpt-5", ["high", "medium", "low", "minimal"]),
|
("gpt-5", ["high", "medium", "low", "minimal"]),
|
||||||
("gpt-5.1", ["high", "medium", "low"]),
|
("gpt-5.1", ["high", "medium", "low"]),
|
||||||
|
("gpt-5.2", ["xhigh", "high", "medium", "low", "none"]),
|
||||||
("gpt-5-codex", ["high", "medium", "low"]),
|
("gpt-5-codex", ["high", "medium", "low"]),
|
||||||
("gpt-5.1-codex", ["high", "medium", "low"]),
|
("gpt-5.1-codex", ["high", "medium", "low"]),
|
||||||
("gpt-5.1-codex-max", ["xhigh", "high", "medium", "low"]),
|
("gpt-5.1-codex-max", ["xhigh", "high", "medium", "low"]),
|
||||||
|
|||||||
@@ -42,6 +42,9 @@ def normalize_model_name(name: str | None, debug_model: str | None = None) -> st
|
|||||||
"gpt-5-latest": "gpt-5",
|
"gpt-5-latest": "gpt-5",
|
||||||
"gpt-5": "gpt-5",
|
"gpt-5": "gpt-5",
|
||||||
"gpt-5.1": "gpt-5.1",
|
"gpt-5.1": "gpt-5.1",
|
||||||
|
"gpt5.2": "gpt-5.2",
|
||||||
|
"gpt-5.2": "gpt-5.2",
|
||||||
|
"gpt-5.2-latest": "gpt-5.2",
|
||||||
"gpt5-codex": "gpt-5-codex",
|
"gpt5-codex": "gpt-5-codex",
|
||||||
"gpt-5-codex": "gpt-5-codex",
|
"gpt-5-codex": "gpt-5-codex",
|
||||||
"gpt-5-codex-latest": "gpt-5-codex",
|
"gpt-5-codex-latest": "gpt-5-codex",
|
||||||
|
|||||||
Reference in New Issue
Block a user