publishing pipeline update + model spec

2026-03-16 16:56:23 +05:00
parent baf312a02e
commit e6eeae8fa6
26 changed files with 1089 additions and 428 deletions
--- a/chatmock/init.py
+++ b/chatmock/init.py
@@ -2,4 +2,4 @@ from __future__ import annotations

 from .app import create_app
 from .cli import main
-
+from .version import __version__
--- a/chatmock/cli.py
+++ b/chatmock/cli.py
@@ -287,7 +287,7 @@ def cmd_serve(


 def main() -> None:
-    parser = argparse.ArgumentParser(description="ChatGPT Local: login & OpenAI-compatible proxy")
+    parser = argparse.ArgumentParser(description="ChatMock: login & OpenAI-compatible proxy")
    sub = parser.add_subparsers(dest="command", required=True)

    p_login = sub.add_parser("login", help="Authorize with ChatGPT and store tokens")
--- a/chatmock/model_registry.py
+++ b/chatmock/model_registry.py
@@ -0,0 +1,183 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Iterable
+
+
+ALL_REASONING_EFFORTS = ("none", "minimal", "low", "medium", "high", "xhigh")
+DEFAULT_REASONING_EFFORTS = frozenset(ALL_REASONING_EFFORTS)
+
+
+@dataclass(frozen=True)
+class ModelSpec:
+    public_id: str
+    upstream_id: str
+    aliases: tuple[str, ...]
+    allowed_efforts: frozenset[str]
+    variant_efforts: tuple[str, ...]
+    uses_codex_instructions: bool = False
+
+
+_MODEL_SPECS = (
+    ModelSpec(
+        public_id="gpt-5",
+        upstream_id="gpt-5",
+        aliases=("gpt5", "gpt-5-latest"),
+        allowed_efforts=DEFAULT_REASONING_EFFORTS,
+        variant_efforts=("high", "medium", "low", "minimal"),
+    ),
+    ModelSpec(
+        public_id="gpt-5.1",
+        upstream_id="gpt-5.1",
+        aliases=(),
+        allowed_efforts=frozenset(("low", "medium", "high")),
+        variant_efforts=("high", "medium", "low"),
+    ),
+    ModelSpec(
+        public_id="gpt-5.2",
+        upstream_id="gpt-5.2",
+        aliases=("gpt5.2", "gpt-5.2-latest"),
+        allowed_efforts=frozenset(("low", "medium", "high", "xhigh")),
+        variant_efforts=("xhigh", "high", "medium", "low"),
+    ),
+    ModelSpec(
+        public_id="gpt-5.4",
+        upstream_id="gpt-5.4",
+        aliases=("gpt5.4", "gpt-5.4-latest"),
+        allowed_efforts=frozenset(("none", "low", "medium", "high", "xhigh")),
+        variant_efforts=("xhigh", "high", "medium", "low", "none"),
+    ),
+    ModelSpec(
+        public_id="gpt-5.3-codex",
+        upstream_id="gpt-5.3-codex",
+        aliases=("gpt5.3-codex", "gpt-5.3-codex-latest"),
+        allowed_efforts=frozenset(("low", "medium", "high", "xhigh")),
+        variant_efforts=("xhigh", "high", "medium", "low"),
+        uses_codex_instructions=True,
+    ),
+    ModelSpec(
+        public_id="gpt-5-codex",
+        upstream_id="gpt-5-codex",
+        aliases=("gpt5-codex", "gpt-5-codex-latest"),
+        allowed_efforts=DEFAULT_REASONING_EFFORTS,
+        variant_efforts=("high", "medium", "low"),
+        uses_codex_instructions=True,
+    ),
+    ModelSpec(
+        public_id="gpt-5.2-codex",
+        upstream_id="gpt-5.2-codex",
+        aliases=("gpt5.2-codex", "gpt-5.2-codex-latest"),
+        allowed_efforts=frozenset(("low", "medium", "high", "xhigh")),
+        variant_efforts=("xhigh", "high", "medium", "low"),
+        uses_codex_instructions=True,
+    ),
+    ModelSpec(
+        public_id="gpt-5.1-codex",
+        upstream_id="gpt-5.1-codex",
+        aliases=(),
+        allowed_efforts=frozenset(("low", "medium", "high")),
+        variant_efforts=("high", "medium", "low"),
+        uses_codex_instructions=True,
+    ),
+    ModelSpec(
+        public_id="gpt-5.1-codex-max",
+        upstream_id="gpt-5.1-codex-max",
+        aliases=(),
+        allowed_efforts=frozenset(("low", "medium", "high", "xhigh")),
+        variant_efforts=("xhigh", "high", "medium", "low"),
+        uses_codex_instructions=True,
+    ),
+    ModelSpec(
+        public_id="gpt-5.1-codex-mini",
+        upstream_id="gpt-5.1-codex-mini",
+        aliases=(),
+        allowed_efforts=frozenset(("low", "medium", "high")),
+        variant_efforts=(),
+        uses_codex_instructions=True,
+    ),
+    ModelSpec(
+        public_id="codex-mini",
+        upstream_id="codex-mini-latest",
+        aliases=("codex", "codex-mini-latest"),
+        allowed_efforts=DEFAULT_REASONING_EFFORTS,
+        variant_efforts=(),
+        uses_codex_instructions=True,
+    ),
+)
+
+_SPECS_BY_UPSTREAM = {spec.upstream_id: spec for spec in _MODEL_SPECS}
+_ALIASES = {}
+for _spec in _MODEL_SPECS:
+    _ALIASES[_spec.public_id] = _spec.upstream_id
+    for _alias in _spec.aliases:
+        _ALIASES[_alias] = _spec.upstream_id
+
+
+def _strip_model_name(model: str | None) -> tuple[str, str | None]:
+    if not isinstance(model, str):
+        return "", None
+    value = model.strip().lower()
+    if not value:
+        return "", None
+    if ":" in value:
+        base, maybe_effort = value.rsplit(":", 1)
+        if maybe_effort in DEFAULT_REASONING_EFFORTS:
+            return base, maybe_effort
+    for separator in ("-", "_"):
+        for effort in ALL_REASONING_EFFORTS:
+            suffix = f"{separator}{effort}"
+            if value.endswith(suffix):
+                return value[: -len(suffix)], effort
+    return value, None
+
+
+def model_spec_for_name(model: str | None) -> ModelSpec | None:
+    base, _ = _strip_model_name(model)
+    upstream_id = _ALIASES.get(base)
+    if not upstream_id:
+        return None
+    return _SPECS_BY_UPSTREAM.get(upstream_id)
+
+
+def normalize_model_name(model: str | None, debug_model: str | None = None) -> str:
+    if isinstance(debug_model, str) and debug_model.strip():
+        return debug_model.strip()
+    spec = model_spec_for_name(model)
+    if spec is not None:
+        return spec.upstream_id
+    base, _ = _strip_model_name(model)
+    return base or "gpt-5.4"
+
+
+def uses_codex_instructions(model: str | None) -> bool:
+    spec = model_spec_for_name(model)
+    if spec is not None:
+        return spec.uses_codex_instructions
+    return "codex" in ((model or "").strip().lower())
+
+
+def allowed_efforts_for_model(model: str | None) -> frozenset[str]:
+    spec = model_spec_for_name(model)
+    if spec is not None:
+        return spec.allowed_efforts
+    return DEFAULT_REASONING_EFFORTS
+
+
+def extract_reasoning_from_model_name(model: str | None) -> dict[str, str] | None:
+    _, effort = _strip_model_name(model)
+    if not effort:
+        return None
+    return {"effort": effort}
+
+
+def list_public_models(expose_reasoning_models: bool = False) -> list[str]:
+    model_ids: list[str] = []
+    for spec in _MODEL_SPECS:
+        model_ids.append(spec.public_id)
+        if expose_reasoning_models:
+            model_ids.extend(f"{spec.public_id}-{effort}" for effort in spec.variant_efforts)
+    return model_ids
+
+
+def iter_public_models() -> Iterable[ModelSpec]:
+    return _MODEL_SPECS
--- a/chatmock/oauth.py
+++ b/chatmock/oauth.py
@@ -152,7 +152,7 @@ class OAuthHTTPServer(http.server.HTTPServer):
                "requested_token": "openai-api-key",
                "subject_token": token_data.id_token,
                "subject_token_type": "urn:ietf:params:oauth:token-type:id_token",
-                "name": f"ChatGPT Local [auto-generated] ({today})",
+                "name": f"ChatMock [auto-generated] ({today})",
            }
        ).encode()

@@ -311,7 +311,7 @@ class OAuthHandler(http.server.BaseHTTPRequestHandler):
                "requested_token": "openai-api-key",
                "subject_token": token_data.id_token,
                "subject_token_type": "urn:ietf:params:oauth:token-type:id_token",
-                "name": f"ChatGPT Local [auto-generated] ({today})",
+                "name": f"ChatMock [auto-generated] ({today})",
            }
        ).encode()

--- a/chatmock/reasoning.py
+++ b/chatmock/reasoning.py
@@ -1,27 +1,8 @@
 from __future__ import annotations

-from typing import Any, Dict, Set
+from typing import Any, Dict

-
-DEFAULT_REASONING_EFFORTS: Set[str] = {"minimal", "low", "medium", "high", "xhigh", "none"}
-
-
-def allowed_efforts_for_model(model: str | None) -> Set[str]:
-    base = (model or "").strip().lower()
-    if not base:
-        return DEFAULT_REASONING_EFFORTS
-    normalized = base.split(":", 1)[0]
-    if normalized.startswith("gpt-5.4"):
-        return {"none", "low", "medium", "high", "xhigh"}
-    if normalized.startswith("gpt-5.3"):
-        return {"low", "medium", "high", "xhigh"}
-    if normalized.startswith("gpt-5.2"):
-        return {"low", "medium", "high", "xhigh"}
-    if normalized.startswith("gpt-5.1-codex-max"):
-        return {"low", "medium", "high", "xhigh"}
-    if normalized.startswith("gpt-5.1"):
-        return {"low", "medium", "high"}
-    return DEFAULT_REASONING_EFFORTS
+from .model_registry import DEFAULT_REASONING_EFFORTS, allowed_efforts_for_model, extract_reasoning_from_model_name


 def build_reasoning_param(
@@ -29,7 +10,7 @@ def build_reasoning_param(
    base_summary: str = "auto",
    overrides: Dict[str, Any] | None = None,
    *,
-    allowed_efforts: Set[str] | None = None,
+    allowed_efforts: frozenset[str] | None = None,
 ) -> Dict[str, Any]:
    effort = (base_effort or "").strip().lower()
    summary = (base_summary or "").strip().lower()
@@ -96,34 +77,3 @@ def apply_reasoning_to_message(
        if isinstance(content_text, str):
            message["content"] = think_block + (content_text or "")
    return message
-
-
-def extract_reasoning_from_model_name(model: str | None) -> Dict[str, Any] | None:
-    """Infer reasoning overrides from a model."""
-    if not isinstance(model, str) or not model:
-        return None
-    s = model.strip().lower()
-    if not s:
-        return None
-    efforts = {"minimal", "low", "medium", "high", "xhigh", "none"}
-
-    if ":" in s:
-        maybe = s.rsplit(":", 1)[-1].strip()
-        if maybe in efforts:
-            return {"effort": maybe}
-
-    for sep in ("-", "_"):
-        if s.endswith(sep + "minimal"):
-            return {"effort": "minimal"}
-        if s.endswith(sep + "none"):
-            return {"effort": "none"}
-        if s.endswith(sep + "low"):
-            return {"effort": "low"}
-        if s.endswith(sep + "medium"):
-            return {"effort": "medium"}
-        if s.endswith(sep + "high"):
-            return {"effort": "high"}
-        if s.endswith(sep + "xhigh"):
-            return {"effort": "xhigh"}
-
-    return None
--- a/chatmock/routes_ollama.py
+++ b/chatmock/routes_ollama.py
@@ -10,6 +10,7 @@ from flask import Blueprint, Response, current_app, jsonify, make_response, requ
 from .config import BASE_INSTRUCTIONS, GPT5_CODEX_INSTRUCTIONS
 from .limits import record_rate_limits_from_response
 from .http import build_cors_headers
+from .model_registry import list_public_models, uses_codex_instructions
 from .reasoning import (
    allowed_efforts_for_model,
    build_reasoning_param,
@@ -71,7 +72,7 @@ def ollama_version() -> Response:

 def _instructions_for_model(model: str) -> str:
    base = current_app.config.get("BASE_INSTRUCTIONS", BASE_INSTRUCTIONS)
-    if "codex" in (model or "").lower():
+    if uses_codex_instructions(model):
        codex = current_app.config.get("GPT5_CODEX_INSTRUCTIONS") or GPT5_CODEX_INSTRUCTIONS
        if isinstance(codex, str) and codex.strip():
            return codex
@@ -93,58 +94,7 @@ def ollama_tags() -> Response:
    if bool(current_app.config.get("VERBOSE")):
        print("IN GET /api/tags")
    expose_variants = bool(current_app.config.get("EXPOSE_REASONING_MODELS"))
-    model_ids = [
-        "gpt-5",
-        "gpt-5.1",
-        "gpt-5.2",
-        "gpt-5.4",
-        "gpt-5.3-codex",
-        "gpt-5-codex",
-        "gpt-5.2-codex",
-        "gpt-5.1-codex",
-        "gpt-5.1-codex-max",
-        "gpt-5.1-codex-mini",
-        "codex-mini",
-    ]
-    if expose_variants:
-        model_ids.extend(
-            [
-                "gpt-5-high",
-                "gpt-5-medium",
-                "gpt-5-low",
-                "gpt-5-minimal",
-                "gpt-5.1-high",
-                "gpt-5.1-medium",
-                "gpt-5.1-low",
-                "gpt-5.4-xhigh",
-                "gpt-5.4-high",
-                "gpt-5.4-medium",
-                "gpt-5.4-low",
-                "gpt-5.4-none",
-                "gpt-5.2-xhigh",
-                "gpt-5.2-high",
-                "gpt-5.2-medium",
-                "gpt-5.2-low",
-                "gpt-5-codex-high",
-                "gpt-5-codex-medium",
-                "gpt-5-codex-low",
-                "gpt-5.2-codex-xhigh",
-                "gpt-5.2-codex-high",
-                "gpt-5.2-codex-medium",
-                "gpt-5.2-codex-low",
-                "gpt-5.3-codex-xhigh",
-                "gpt-5.3-codex-high",
-                "gpt-5.3-codex-medium",
-                "gpt-5.3-codex-low",
-                "gpt-5.1-codex-high",
-                "gpt-5.1-codex-medium",
-                "gpt-5.1-codex-low",
-                "gpt-5.1-codex-max-xhigh",
-                "gpt-5.1-codex-max-high",
-                "gpt-5.1-codex-max-medium",
-                "gpt-5.1-codex-max-low",
-            ]
-        )
+    model_ids = list_public_models(expose_reasoning_models=expose_variants)
    models = []
    for model_id in model_ids:
        models.append(
--- a/chatmock/routes_openai.py
+++ b/chatmock/routes_openai.py
@@ -9,6 +9,7 @@ from flask import Blueprint, Response, current_app, jsonify, make_response, requ
 from .config import BASE_INSTRUCTIONS, GPT5_CODEX_INSTRUCTIONS
 from .limits import record_rate_limits_from_response
 from .http import build_cors_headers
+from .model_registry import list_public_models, uses_codex_instructions
 from .reasoning import (
    allowed_efforts_for_model,
    apply_reasoning_to_message,
@@ -59,7 +60,7 @@ def _wrap_stream_logging(label: str, iterator, enabled: bool):

 def _instructions_for_model(model: str) -> str:
    base = current_app.config.get("BASE_INSTRUCTIONS", BASE_INSTRUCTIONS)
-    if "codex" in (model or "").lower():
+    if uses_codex_instructions(model):
        codex = current_app.config.get("GPT5_CODEX_INSTRUCTIONS") or GPT5_CODEX_INSTRUCTIONS
        if isinstance(codex, str) and codex.strip():
            return codex
@@ -531,24 +532,7 @@ def completions() -> Response:
@openai_bp.route("/v1/models", methods=["GET"])
 def list_models() -> Response:
    expose_variants = bool(current_app.config.get("EXPOSE_REASONING_MODELS"))
-    model_groups = [
-        ("gpt-5", ["high", "medium", "low", "minimal"]),
-        ("gpt-5.1", ["high", "medium", "low"]),
-        ("gpt-5.2", ["xhigh", "high", "medium", "low"]),
-        ("gpt-5.4", ["xhigh", "high", "medium", "low", "none"]),
-        ("gpt-5.3-codex", ["xhigh", "high", "medium", "low"]),
-        ("gpt-5-codex", ["high", "medium", "low"]),
-        ("gpt-5.2-codex", ["xhigh", "high", "medium", "low"]),
-        ("gpt-5.1-codex", ["high", "medium", "low"]),
-        ("gpt-5.1-codex-max", ["xhigh", "high", "medium", "low"]),
-        ("gpt-5.1-codex-mini", []),
-        ("codex-mini", []),
-    ]
-    model_ids: List[str] = []
-    for base, efforts in model_groups:
-        model_ids.append(base)
-        if expose_variants:
-            model_ids.extend([f"{base}-{effort}" for effort in efforts])
+    model_ids = list_public_models(expose_reasoning_models=expose_variants)
    data = [{"id": mid, "object": "model", "owned_by": "owner"} for mid in model_ids]
    models = {"object": "list", "data": data}
    resp = make_response(jsonify(models), 200)
--- a/chatmock/upstream.py
+++ b/chatmock/upstream.py
@@ -9,6 +9,7 @@ from flask import Response, current_app, jsonify, make_response

 from .config import CHATGPT_RESPONSES_URL
 from .http import build_cors_headers
+from .model_registry import normalize_model_name
 from .session import ensure_session_id
 from flask import request as flask_request
 from .utils import get_effective_chatgpt_auth
@@ -23,50 +24,6 @@ def _log_json(prefix: str, payload: Any) -> None:
        except Exception:
            pass

-
-def normalize_model_name(name: str | None, debug_model: str | None = None) -> str:
-    if isinstance(debug_model, str) and debug_model.strip():
-        return debug_model.strip()
-    if not isinstance(name, str) or not name.strip():
-        return "gpt-5"
-    base = name.split(":", 1)[0].strip()
-    for sep in ("-", "_"):
-        lowered = base.lower()
-        for effort in ("minimal", "low", "medium", "high", "xhigh"):
-            suffix = f"{sep}{effort}"
-            if lowered.endswith(suffix):
-                base = base[: -len(suffix)]
-                break
-    mapping = {
-        "gpt5": "gpt-5",
-        "gpt-5-latest": "gpt-5",
-        "gpt-5": "gpt-5",
-        "gpt-5.1": "gpt-5.1",
-        "gpt5.4": "gpt-5.4",
-        "gpt-5.4": "gpt-5.4",
-        "gpt-5.4-latest": "gpt-5.4",
-        "gpt5.2": "gpt-5.2",
-        "gpt-5.2": "gpt-5.2",
-        "gpt-5.2-latest": "gpt-5.2",
-        "gpt5.3-codex": "gpt-5.3-codex",
-        "gpt-5.3-codex": "gpt-5.3-codex",
-        "gpt-5.3-codex-latest": "gpt-5.3-codex",
-        "gpt5.2-codex": "gpt-5.2-codex",
-        "gpt-5.2-codex": "gpt-5.2-codex",
-        "gpt-5.2-codex-latest": "gpt-5.2-codex",
-        "gpt5-codex": "gpt-5-codex",
-        "gpt-5-codex": "gpt-5-codex",
-        "gpt-5-codex-latest": "gpt-5-codex",
-        "gpt-5.1-codex": "gpt-5.1-codex",
-        "gpt-5.1-codex-max": "gpt-5.1-codex-max",
-        "codex": "codex-mini-latest",
-        "codex-mini": "codex-mini-latest",
-        "codex-mini-latest": "codex-mini-latest",
-        "gpt-5.1-codex-mini": "gpt-5.1-codex-mini",
-    }
-    return mapping.get(base, base)
-
-
 def start_upstream_request(
    model: str,
    input_items: List[Dict[str, Any]],
--- a/chatmock/version.py
+++ b/chatmock/version.py
@@ -0,0 +1,4 @@
+from __future__ import annotations
+
+
+__version__ = "1.35"