feat: Add GPT-5.4 mini. (#101)
* feat: Add gpt-5.4-mini. This just adds gpt-5.4-mini at (low/med/high/xhigh) reasoning levels. * Fix formatting in README for reasoning effort section --------- Co-authored-by: Ra's al Ghul <ras@alghul.com> Co-authored-by: Game_Time <108236317+RayBytes@users.noreply.github.com>
This commit is contained in:
@@ -110,6 +110,7 @@ curl http://127.0.0.1:8000/v1/chat/completions \
|
||||
|
||||
# Supported models
|
||||
- `gpt-5.4`
|
||||
- `gpt-5.4-mini`
|
||||
- `gpt-5.2`
|
||||
- `gpt-5.1`
|
||||
- `gpt-5`
|
||||
@@ -127,7 +128,7 @@ curl http://127.0.0.1:8000/v1/chat/completions \
|
||||
|
||||
- `--reasoning-effort` (choice of none,minimal,low,medium,high,xhigh)<br>
|
||||
GPT-5 has a configurable amount of "effort" it can put into thinking, which may cause it to take more time for a response to return, but may overall give a smarter answer. Applying this parameter after `serve` forces the server to use this reasoning effort by default, unless overrided by the API request with a different effort set. The default reasoning effort without setting this parameter is `medium`.<br>
|
||||
The `gpt-5.1` family (including codex) supports `low`, `medium`, and `high` while `gpt-5.1-codex-max` adds `xhigh`. The `gpt-5.2` and `gpt-5.3` families (including codex) support `low`, `medium`, `high`, and `xhigh`. GPT-5.4 supports `none`, `low`, `medium`, `high`, and `xhigh`.
|
||||
The `gpt-5.1` family (including codex) supports `low`, `medium`, and `high` while `gpt-5.1-codex-max` adds `xhigh`. The `gpt-5.2` and `gpt-5.3` families (including codex) support `low`, `medium`, `high`, and `xhigh`. `gpt-5.4` supports `none`, `low`, `medium`, `high`, and `xhigh`.
|
||||
|
||||
### Thinking summaries
|
||||
|
||||
|
||||
@@ -47,6 +47,13 @@ _MODEL_SPECS = (
|
||||
allowed_efforts=frozenset(("none", "low", "medium", "high", "xhigh")),
|
||||
variant_efforts=("xhigh", "high", "medium", "low", "none"),
|
||||
),
|
||||
ModelSpec(
|
||||
public_id="gpt-5.4-mini",
|
||||
upstream_id="gpt-5.4-mini",
|
||||
aliases=("gpt5.4-mini", "gpt-5.4-mini-latest"),
|
||||
allowed_efforts=frozenset(("low", "medium", "high", "xhigh")),
|
||||
variant_efforts=("xhigh", "high", "medium", "low"),
|
||||
),
|
||||
ModelSpec(
|
||||
public_id="gpt-5.3-codex",
|
||||
upstream_id="gpt-5.3-codex",
|
||||
|
||||
@@ -9,22 +9,28 @@ class ModelRegistryTests(unittest.TestCase):
|
||||
def test_normalizes_aliases(self) -> None:
|
||||
self.assertEqual(normalize_model_name("gpt5"), "gpt-5")
|
||||
self.assertEqual(normalize_model_name("gpt5.4"), "gpt-5.4")
|
||||
self.assertEqual(normalize_model_name("gpt5.4-mini"), "gpt-5.4-mini")
|
||||
self.assertEqual(normalize_model_name("codex"), "codex-mini-latest")
|
||||
|
||||
def test_strips_reasoning_suffixes(self) -> None:
|
||||
self.assertEqual(normalize_model_name("gpt-5.4-high"), "gpt-5.4")
|
||||
self.assertEqual(normalize_model_name("gpt-5.4-mini-high"), "gpt-5.4-mini")
|
||||
self.assertEqual(normalize_model_name("gpt-5.2_codemirror"), "gpt-5.2_codemirror")
|
||||
self.assertEqual(normalize_model_name("gpt-5.1-codex:max"), "gpt-5.1-codex:max")
|
||||
self.assertEqual(normalize_model_name("gpt-5.1-codex:high"), "gpt-5.1-codex")
|
||||
|
||||
def test_allowed_efforts_follow_registry(self) -> None:
|
||||
self.assertEqual(allowed_efforts_for_model("gpt-5.4"), frozenset(("none", "low", "medium", "high", "xhigh")))
|
||||
self.assertEqual(allowed_efforts_for_model("gpt-5.4-mini"), frozenset(("low", "medium", "high", "xhigh")))
|
||||
self.assertEqual(allowed_efforts_for_model("gpt-5.1-codex"), frozenset(("low", "medium", "high")))
|
||||
|
||||
def test_public_models_include_variants(self) -> None:
|
||||
model_ids = list_public_models(expose_reasoning_models=True)
|
||||
self.assertIn("gpt-5.4", model_ids)
|
||||
self.assertIn("gpt-5.4-mini", model_ids)
|
||||
self.assertIn("gpt-5.4-none", model_ids)
|
||||
self.assertIn("gpt-5.4-mini-xhigh", model_ids)
|
||||
self.assertNotIn("gpt-5.4-mini-none", model_ids)
|
||||
self.assertIn("gpt-5.1-codex-max-xhigh", model_ids)
|
||||
self.assertNotIn("codex-mini-high", model_ids)
|
||||
|
||||
|
||||
@@ -33,13 +33,17 @@ class RouteTests(unittest.TestCase):
|
||||
response = self.client.get("/v1/models")
|
||||
body = response.get_json()
|
||||
self.assertEqual(response.status_code, 200)
|
||||
self.assertIn("gpt-5.4", [item["id"] for item in body["data"]])
|
||||
model_ids = [item["id"] for item in body["data"]]
|
||||
self.assertIn("gpt-5.4", model_ids)
|
||||
self.assertIn("gpt-5.4-mini", model_ids)
|
||||
|
||||
def test_ollama_tags_list(self) -> None:
|
||||
response = self.client.get("/api/tags")
|
||||
body = response.get_json()
|
||||
self.assertEqual(response.status_code, 200)
|
||||
self.assertIn("gpt-5.4", [item["name"] for item in body["models"]])
|
||||
model_names = [item["name"] for item in body["models"]]
|
||||
self.assertIn("gpt-5.4", model_names)
|
||||
self.assertIn("gpt-5.4-mini", model_names)
|
||||
|
||||
@patch("chatmock.routes_openai.start_upstream_request")
|
||||
def test_chat_completions(self, mock_start) -> None:
|
||||
@@ -54,12 +58,12 @@ class RouteTests(unittest.TestCase):
|
||||
)
|
||||
response = self.client.post(
|
||||
"/v1/chat/completions",
|
||||
json={"model": "gpt5.4", "messages": [{"role": "user", "content": "hi"}]},
|
||||
json={"model": "gpt5.4-mini", "messages": [{"role": "user", "content": "hi"}]},
|
||||
)
|
||||
body = response.get_json()
|
||||
self.assertEqual(response.status_code, 200)
|
||||
self.assertEqual(body["choices"][0]["message"]["content"], "hello")
|
||||
self.assertEqual(body["model"], "gpt5.4")
|
||||
self.assertEqual(body["model"], "gpt5.4-mini")
|
||||
|
||||
@patch("chatmock.routes_ollama.start_upstream_request")
|
||||
def test_ollama_chat(self, mock_start) -> None:
|
||||
|
||||
Reference in New Issue
Block a user