GPT-5.1 models "minimal" removed, add gpt-5.1-codex-max (#80)
This commit is contained in:
@@ -114,15 +114,19 @@ curl http://127.0.0.1:8000/v1/chat/completions \
|
|||||||
|
|
||||||
# Supported models
|
# Supported models
|
||||||
- `gpt-5`
|
- `gpt-5`
|
||||||
|
- `gpt-5.1`
|
||||||
- `gpt-5-codex`
|
- `gpt-5-codex`
|
||||||
|
- `gpt-5.1-codex`
|
||||||
|
- `gpt-5.1-codex-max`
|
||||||
|
- `gpt-5.1-codex-mini`
|
||||||
- `codex-mini`
|
- `codex-mini`
|
||||||
|
|
||||||
# Customisation / Configuration
|
# Customisation / Configuration
|
||||||
|
|
||||||
### Thinking effort
|
### Thinking effort
|
||||||
|
|
||||||
- `--reasoning-effort` (choice of minimal,low,medium,high)<br>
|
- `--reasoning-effort` (choice of minimal,low,medium,high,xhigh)<br>
|
||||||
GPT-5 has a configurable amount of "effort" it can put into thinking, which may cause it to take more time for a response to return, but may overall give a smarter answer. Applying this parameter after `serve` forces the server to use this reasoning effort by default, unless overrided by the API request with a different effort set. The default reasoning effort without setting this parameter is `medium`.
|
GPT-5 has a configurable amount of "effort" it can put into thinking, which may cause it to take more time for a response to return, but may overall give a smarter answer. Applying this parameter after `serve` forces the server to use this reasoning effort by default, unless overrided by the API request with a different effort set. The default reasoning effort without setting this parameter is `medium`. The `gpt-5.1` family (including codex) supports `low`, `medium`, and `high` while `gpt-5.1-codex-max` adds `xhigh`; neither offers a `minimal` variant.
|
||||||
|
|
||||||
### Thinking summaries
|
### Thinking summaries
|
||||||
|
|
||||||
|
|||||||
@@ -311,7 +311,7 @@ def main() -> None:
|
|||||||
)
|
)
|
||||||
p_serve.add_argument(
|
p_serve.add_argument(
|
||||||
"--reasoning-effort",
|
"--reasoning-effort",
|
||||||
choices=["minimal", "low", "medium", "high"],
|
choices=["minimal", "low", "medium", "high", "xhigh"],
|
||||||
default=os.getenv("CHATGPT_LOCAL_REASONING_EFFORT", "medium").lower(),
|
default=os.getenv("CHATGPT_LOCAL_REASONING_EFFORT", "medium").lower(),
|
||||||
help="Reasoning effort level for Responses API (default: medium)",
|
help="Reasoning effort level for Responses API (default: medium)",
|
||||||
)
|
)
|
||||||
@@ -335,8 +335,8 @@ def main() -> None:
|
|||||||
action="store_true",
|
action="store_true",
|
||||||
default=(os.getenv("CHATGPT_LOCAL_EXPOSE_REASONING_MODELS") or "").strip().lower() in ("1", "true", "yes", "on"),
|
default=(os.getenv("CHATGPT_LOCAL_EXPOSE_REASONING_MODELS") or "").strip().lower() in ("1", "true", "yes", "on"),
|
||||||
help=(
|
help=(
|
||||||
"Expose gpt-5 reasoning effort variants (minimal|low|medium|high) as separate models from /v1/models. "
|
"Expose gpt-5 reasoning effort variants (minimal|low|medium|high|xhigh where supported) "
|
||||||
"This allows choosing effort via model selection in compatible UIs."
|
"as separate models from /v1/models. This allows choosing effort via model selection in compatible UIs."
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
p_serve.add_argument(
|
p_serve.add_argument(
|
||||||
|
|||||||
@@ -1,15 +1,34 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from typing import Any, Dict
|
from typing import Any, Dict, Set
|
||||||
|
|
||||||
|
|
||||||
|
DEFAULT_REASONING_EFFORTS: Set[str] = {"minimal", "low", "medium", "high", "xhigh"}
|
||||||
|
|
||||||
|
|
||||||
|
def allowed_efforts_for_model(model: str | None) -> Set[str]:
|
||||||
|
base = (model or "").strip().lower()
|
||||||
|
if not base:
|
||||||
|
return DEFAULT_REASONING_EFFORTS
|
||||||
|
normalized = base.split(":", 1)[0]
|
||||||
|
if normalized.startswith("gpt-5.1-codex-max"):
|
||||||
|
return {"low", "medium", "high", "xhigh"}
|
||||||
|
if normalized.startswith("gpt-5.1"):
|
||||||
|
return {"low", "medium", "high"}
|
||||||
|
return DEFAULT_REASONING_EFFORTS
|
||||||
|
|
||||||
|
|
||||||
def build_reasoning_param(
|
def build_reasoning_param(
|
||||||
base_effort: str = "medium", base_summary: str = "auto", overrides: Dict[str, Any] | None = None
|
base_effort: str = "medium",
|
||||||
|
base_summary: str = "auto",
|
||||||
|
overrides: Dict[str, Any] | None = None,
|
||||||
|
*,
|
||||||
|
allowed_efforts: Set[str] | None = None,
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
effort = (base_effort or "").strip().lower()
|
effort = (base_effort or "").strip().lower()
|
||||||
summary = (base_summary or "").strip().lower()
|
summary = (base_summary or "").strip().lower()
|
||||||
|
|
||||||
valid_efforts = {"minimal", "low", "medium", "high"}
|
valid_efforts = allowed_efforts or DEFAULT_REASONING_EFFORTS
|
||||||
valid_summaries = {"auto", "concise", "detailed", "none"}
|
valid_summaries = {"auto", "concise", "detailed", "none"}
|
||||||
|
|
||||||
if isinstance(overrides, dict):
|
if isinstance(overrides, dict):
|
||||||
@@ -80,7 +99,7 @@ def extract_reasoning_from_model_name(model: str | None) -> Dict[str, Any] | Non
|
|||||||
s = model.strip().lower()
|
s = model.strip().lower()
|
||||||
if not s:
|
if not s:
|
||||||
return None
|
return None
|
||||||
efforts = {"minimal", "low", "medium", "high"}
|
efforts = {"minimal", "low", "medium", "high", "xhigh"}
|
||||||
|
|
||||||
if ":" in s:
|
if ":" in s:
|
||||||
maybe = s.rsplit(":", 1)[-1].strip()
|
maybe = s.rsplit(":", 1)[-1].strip()
|
||||||
@@ -96,5 +115,7 @@ def extract_reasoning_from_model_name(model: str | None) -> Dict[str, Any] | Non
|
|||||||
return {"effort": "medium"}
|
return {"effort": "medium"}
|
||||||
if s.endswith(sep + "high"):
|
if s.endswith(sep + "high"):
|
||||||
return {"effort": "high"}
|
return {"effort": "high"}
|
||||||
|
if s.endswith(sep + "xhigh"):
|
||||||
|
return {"effort": "xhigh"}
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|||||||
@@ -10,7 +10,11 @@ from flask import Blueprint, Response, current_app, jsonify, make_response, requ
|
|||||||
from .config import BASE_INSTRUCTIONS, GPT5_CODEX_INSTRUCTIONS
|
from .config import BASE_INSTRUCTIONS, GPT5_CODEX_INSTRUCTIONS
|
||||||
from .limits import record_rate_limits_from_response
|
from .limits import record_rate_limits_from_response
|
||||||
from .http import build_cors_headers
|
from .http import build_cors_headers
|
||||||
from .reasoning import build_reasoning_param, extract_reasoning_from_model_name
|
from .reasoning import (
|
||||||
|
allowed_efforts_for_model,
|
||||||
|
build_reasoning_param,
|
||||||
|
extract_reasoning_from_model_name,
|
||||||
|
)
|
||||||
from .transform import convert_ollama_messages, normalize_ollama_tools
|
from .transform import convert_ollama_messages, normalize_ollama_tools
|
||||||
from .upstream import normalize_model_name, start_upstream_request
|
from .upstream import normalize_model_name, start_upstream_request
|
||||||
from .utils import convert_chat_messages_to_responses_input, convert_tools_chat_to_responses
|
from .utils import convert_chat_messages_to_responses_input, convert_tools_chat_to_responses
|
||||||
@@ -67,7 +71,7 @@ def ollama_version() -> Response:
|
|||||||
|
|
||||||
def _instructions_for_model(model: str) -> str:
|
def _instructions_for_model(model: str) -> str:
|
||||||
base = current_app.config.get("BASE_INSTRUCTIONS", BASE_INSTRUCTIONS)
|
base = current_app.config.get("BASE_INSTRUCTIONS", BASE_INSTRUCTIONS)
|
||||||
if model == "gpt-5-codex" or model == "gpt-5.1-codex":
|
if model.startswith("gpt-5-codex") or model.startswith("gpt-5.1-codex"):
|
||||||
codex = current_app.config.get("GPT5_CODEX_INSTRUCTIONS") or GPT5_CODEX_INSTRUCTIONS
|
codex = current_app.config.get("GPT5_CODEX_INSTRUCTIONS") or GPT5_CODEX_INSTRUCTIONS
|
||||||
if isinstance(codex, str) and codex.strip():
|
if isinstance(codex, str) and codex.strip():
|
||||||
return codex
|
return codex
|
||||||
@@ -89,7 +93,15 @@ def ollama_tags() -> Response:
|
|||||||
if bool(current_app.config.get("VERBOSE")):
|
if bool(current_app.config.get("VERBOSE")):
|
||||||
print("IN GET /api/tags")
|
print("IN GET /api/tags")
|
||||||
expose_variants = bool(current_app.config.get("EXPOSE_REASONING_MODELS"))
|
expose_variants = bool(current_app.config.get("EXPOSE_REASONING_MODELS"))
|
||||||
model_ids = ["gpt-5", "gpt-5.1", "gpt-5-codex", "gpt-5.1-codex", "gpt-5.1-codex-mini", "codex-mini"]
|
model_ids = [
|
||||||
|
"gpt-5",
|
||||||
|
"gpt-5.1",
|
||||||
|
"gpt-5-codex",
|
||||||
|
"gpt-5.1-codex",
|
||||||
|
"gpt-5.1-codex-max",
|
||||||
|
"gpt-5.1-codex-mini",
|
||||||
|
"codex-mini",
|
||||||
|
]
|
||||||
if expose_variants:
|
if expose_variants:
|
||||||
model_ids.extend(
|
model_ids.extend(
|
||||||
[
|
[
|
||||||
@@ -100,13 +112,16 @@ def ollama_tags() -> Response:
|
|||||||
"gpt-5.1-high",
|
"gpt-5.1-high",
|
||||||
"gpt-5.1-medium",
|
"gpt-5.1-medium",
|
||||||
"gpt-5.1-low",
|
"gpt-5.1-low",
|
||||||
"gpt-5.1-minimal",
|
|
||||||
"gpt-5-codex-high",
|
"gpt-5-codex-high",
|
||||||
"gpt-5-codex-medium",
|
"gpt-5-codex-medium",
|
||||||
"gpt-5-codex-low",
|
"gpt-5-codex-low",
|
||||||
"gpt-5.1-codex-high",
|
"gpt-5.1-codex-high",
|
||||||
"gpt-5.1-codex-medium",
|
"gpt-5.1-codex-medium",
|
||||||
"gpt-5.1-codex-low",
|
"gpt-5.1-codex-low",
|
||||||
|
"gpt-5.1-codex-max-xhigh",
|
||||||
|
"gpt-5.1-codex-max-high",
|
||||||
|
"gpt-5.1-codex-max-medium",
|
||||||
|
"gpt-5.1-codex-max-low",
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
models = []
|
models = []
|
||||||
@@ -275,7 +290,12 @@ def ollama_chat() -> Response:
|
|||||||
tools=tools_responses,
|
tools=tools_responses,
|
||||||
tool_choice=tool_choice,
|
tool_choice=tool_choice,
|
||||||
parallel_tool_calls=parallel_tool_calls,
|
parallel_tool_calls=parallel_tool_calls,
|
||||||
reasoning_param=build_reasoning_param(reasoning_effort, reasoning_summary, model_reasoning),
|
reasoning_param=build_reasoning_param(
|
||||||
|
reasoning_effort,
|
||||||
|
reasoning_summary,
|
||||||
|
model_reasoning,
|
||||||
|
allowed_efforts=allowed_efforts_for_model(model),
|
||||||
|
),
|
||||||
)
|
)
|
||||||
if error_resp is not None:
|
if error_resp is not None:
|
||||||
if verbose:
|
if verbose:
|
||||||
@@ -310,7 +330,12 @@ def ollama_chat() -> Response:
|
|||||||
tools=base_tools_only,
|
tools=base_tools_only,
|
||||||
tool_choice=safe_choice,
|
tool_choice=safe_choice,
|
||||||
parallel_tool_calls=parallel_tool_calls,
|
parallel_tool_calls=parallel_tool_calls,
|
||||||
reasoning_param=build_reasoning_param(reasoning_effort, reasoning_summary, model_reasoning),
|
reasoning_param=build_reasoning_param(
|
||||||
|
reasoning_effort,
|
||||||
|
reasoning_summary,
|
||||||
|
model_reasoning,
|
||||||
|
allowed_efforts=allowed_efforts_for_model(model),
|
||||||
|
),
|
||||||
)
|
)
|
||||||
record_rate_limits_from_response(upstream2)
|
record_rate_limits_from_response(upstream2)
|
||||||
if err2 is None and upstream2 is not None and upstream2.status_code < 400:
|
if err2 is None and upstream2 is not None and upstream2.status_code < 400:
|
||||||
|
|||||||
@@ -9,7 +9,12 @@ from flask import Blueprint, Response, current_app, jsonify, make_response, requ
|
|||||||
from .config import BASE_INSTRUCTIONS, GPT5_CODEX_INSTRUCTIONS
|
from .config import BASE_INSTRUCTIONS, GPT5_CODEX_INSTRUCTIONS
|
||||||
from .limits import record_rate_limits_from_response
|
from .limits import record_rate_limits_from_response
|
||||||
from .http import build_cors_headers
|
from .http import build_cors_headers
|
||||||
from .reasoning import apply_reasoning_to_message, build_reasoning_param, extract_reasoning_from_model_name
|
from .reasoning import (
|
||||||
|
allowed_efforts_for_model,
|
||||||
|
apply_reasoning_to_message,
|
||||||
|
build_reasoning_param,
|
||||||
|
extract_reasoning_from_model_name,
|
||||||
|
)
|
||||||
from .upstream import normalize_model_name, start_upstream_request
|
from .upstream import normalize_model_name, start_upstream_request
|
||||||
from .utils import (
|
from .utils import (
|
||||||
convert_chat_messages_to_responses_input,
|
convert_chat_messages_to_responses_input,
|
||||||
@@ -54,7 +59,7 @@ def _wrap_stream_logging(label: str, iterator, enabled: bool):
|
|||||||
|
|
||||||
def _instructions_for_model(model: str) -> str:
|
def _instructions_for_model(model: str) -> str:
|
||||||
base = current_app.config.get("BASE_INSTRUCTIONS", BASE_INSTRUCTIONS)
|
base = current_app.config.get("BASE_INSTRUCTIONS", BASE_INSTRUCTIONS)
|
||||||
if model == "gpt-5-codex" or model == "gpt-5.1-codex":
|
if model.startswith("gpt-5-codex") or model.startswith("gpt-5.1-codex"):
|
||||||
codex = current_app.config.get("GPT5_CODEX_INSTRUCTIONS") or GPT5_CODEX_INSTRUCTIONS
|
codex = current_app.config.get("GPT5_CODEX_INSTRUCTIONS") or GPT5_CODEX_INSTRUCTIONS
|
||||||
if isinstance(codex, str) and codex.strip():
|
if isinstance(codex, str) and codex.strip():
|
||||||
return codex
|
return codex
|
||||||
@@ -166,7 +171,12 @@ def chat_completions() -> Response:
|
|||||||
|
|
||||||
model_reasoning = extract_reasoning_from_model_name(requested_model)
|
model_reasoning = extract_reasoning_from_model_name(requested_model)
|
||||||
reasoning_overrides = payload.get("reasoning") if isinstance(payload.get("reasoning"), dict) else model_reasoning
|
reasoning_overrides = payload.get("reasoning") if isinstance(payload.get("reasoning"), dict) else model_reasoning
|
||||||
reasoning_param = build_reasoning_param(reasoning_effort, reasoning_summary, reasoning_overrides)
|
reasoning_param = build_reasoning_param(
|
||||||
|
reasoning_effort,
|
||||||
|
reasoning_summary,
|
||||||
|
reasoning_overrides,
|
||||||
|
allowed_efforts=allowed_efforts_for_model(model),
|
||||||
|
)
|
||||||
|
|
||||||
upstream, error_resp = start_upstream_request(
|
upstream, error_resp = start_upstream_request(
|
||||||
model,
|
model,
|
||||||
@@ -396,7 +406,12 @@ def completions() -> Response:
|
|||||||
|
|
||||||
model_reasoning = extract_reasoning_from_model_name(requested_model)
|
model_reasoning = extract_reasoning_from_model_name(requested_model)
|
||||||
reasoning_overrides = payload.get("reasoning") if isinstance(payload.get("reasoning"), dict) else model_reasoning
|
reasoning_overrides = payload.get("reasoning") if isinstance(payload.get("reasoning"), dict) else model_reasoning
|
||||||
reasoning_param = build_reasoning_param(reasoning_effort, reasoning_summary, reasoning_overrides)
|
reasoning_param = build_reasoning_param(
|
||||||
|
reasoning_effort,
|
||||||
|
reasoning_summary,
|
||||||
|
reasoning_overrides,
|
||||||
|
allowed_efforts=allowed_efforts_for_model(model),
|
||||||
|
)
|
||||||
upstream, error_resp = start_upstream_request(
|
upstream, error_resp = start_upstream_request(
|
||||||
model,
|
model,
|
||||||
input_items,
|
input_items,
|
||||||
@@ -518,9 +533,10 @@ def list_models() -> Response:
|
|||||||
expose_variants = bool(current_app.config.get("EXPOSE_REASONING_MODELS"))
|
expose_variants = bool(current_app.config.get("EXPOSE_REASONING_MODELS"))
|
||||||
model_groups = [
|
model_groups = [
|
||||||
("gpt-5", ["high", "medium", "low", "minimal"]),
|
("gpt-5", ["high", "medium", "low", "minimal"]),
|
||||||
("gpt-5.1", ["high", "medium", "low", "minimal"]),
|
("gpt-5.1", ["high", "medium", "low"]),
|
||||||
("gpt-5-codex", ["high", "medium", "low"]),
|
("gpt-5-codex", ["high", "medium", "low"]),
|
||||||
("gpt-5.1-codex", ["high", "medium", "low"]),
|
("gpt-5.1-codex", ["high", "medium", "low"]),
|
||||||
|
("gpt-5.1-codex-max", ["xhigh", "high", "medium", "low"]),
|
||||||
("gpt-5.1-codex-mini", []),
|
("gpt-5.1-codex-mini", []),
|
||||||
("codex-mini", []),
|
("codex-mini", []),
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -32,7 +32,7 @@ def normalize_model_name(name: str | None, debug_model: str | None = None) -> st
|
|||||||
base = name.split(":", 1)[0].strip()
|
base = name.split(":", 1)[0].strip()
|
||||||
for sep in ("-", "_"):
|
for sep in ("-", "_"):
|
||||||
lowered = base.lower()
|
lowered = base.lower()
|
||||||
for effort in ("minimal", "low", "medium", "high"):
|
for effort in ("minimal", "low", "medium", "high", "xhigh"):
|
||||||
suffix = f"{sep}{effort}"
|
suffix = f"{sep}{effort}"
|
||||||
if lowered.endswith(suffix):
|
if lowered.endswith(suffix):
|
||||||
base = base[: -len(suffix)]
|
base = base[: -len(suffix)]
|
||||||
@@ -46,6 +46,7 @@ def normalize_model_name(name: str | None, debug_model: str | None = None) -> st
|
|||||||
"gpt-5-codex": "gpt-5-codex",
|
"gpt-5-codex": "gpt-5-codex",
|
||||||
"gpt-5-codex-latest": "gpt-5-codex",
|
"gpt-5-codex-latest": "gpt-5-codex",
|
||||||
"gpt-5.1-codex": "gpt-5.1-codex",
|
"gpt-5.1-codex": "gpt-5.1-codex",
|
||||||
|
"gpt-5.1-codex-max": "gpt-5.1-codex-max",
|
||||||
"codex": "codex-mini-latest",
|
"codex": "codex-mini-latest",
|
||||||
"codex-mini": "codex-mini-latest",
|
"codex-mini": "codex-mini-latest",
|
||||||
"codex-mini-latest": "codex-mini-latest",
|
"codex-mini-latest": "codex-mini-latest",
|
||||||
|
|||||||
Reference in New Issue
Block a user