publishing pipeline update + model spec
This commit is contained in:
@@ -2,4 +2,4 @@ from __future__ import annotations
|
||||
|
||||
from .app import create_app
|
||||
from .cli import main
|
||||
|
||||
from .version import __version__
|
||||
|
||||
@@ -287,7 +287,7 @@ def cmd_serve(
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(description="ChatGPT Local: login & OpenAI-compatible proxy")
|
||||
parser = argparse.ArgumentParser(description="ChatMock: login & OpenAI-compatible proxy")
|
||||
sub = parser.add_subparsers(dest="command", required=True)
|
||||
|
||||
p_login = sub.add_parser("login", help="Authorize with ChatGPT and store tokens")
|
||||
|
||||
183
chatmock/model_registry.py
Normal file
183
chatmock/model_registry.py
Normal file
@@ -0,0 +1,183 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Iterable
|
||||
|
||||
|
||||
ALL_REASONING_EFFORTS = ("none", "minimal", "low", "medium", "high", "xhigh")
|
||||
DEFAULT_REASONING_EFFORTS = frozenset(ALL_REASONING_EFFORTS)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ModelSpec:
|
||||
public_id: str
|
||||
upstream_id: str
|
||||
aliases: tuple[str, ...]
|
||||
allowed_efforts: frozenset[str]
|
||||
variant_efforts: tuple[str, ...]
|
||||
uses_codex_instructions: bool = False
|
||||
|
||||
|
||||
_MODEL_SPECS = (
|
||||
ModelSpec(
|
||||
public_id="gpt-5",
|
||||
upstream_id="gpt-5",
|
||||
aliases=("gpt5", "gpt-5-latest"),
|
||||
allowed_efforts=DEFAULT_REASONING_EFFORTS,
|
||||
variant_efforts=("high", "medium", "low", "minimal"),
|
||||
),
|
||||
ModelSpec(
|
||||
public_id="gpt-5.1",
|
||||
upstream_id="gpt-5.1",
|
||||
aliases=(),
|
||||
allowed_efforts=frozenset(("low", "medium", "high")),
|
||||
variant_efforts=("high", "medium", "low"),
|
||||
),
|
||||
ModelSpec(
|
||||
public_id="gpt-5.2",
|
||||
upstream_id="gpt-5.2",
|
||||
aliases=("gpt5.2", "gpt-5.2-latest"),
|
||||
allowed_efforts=frozenset(("low", "medium", "high", "xhigh")),
|
||||
variant_efforts=("xhigh", "high", "medium", "low"),
|
||||
),
|
||||
ModelSpec(
|
||||
public_id="gpt-5.4",
|
||||
upstream_id="gpt-5.4",
|
||||
aliases=("gpt5.4", "gpt-5.4-latest"),
|
||||
allowed_efforts=frozenset(("none", "low", "medium", "high", "xhigh")),
|
||||
variant_efforts=("xhigh", "high", "medium", "low", "none"),
|
||||
),
|
||||
ModelSpec(
|
||||
public_id="gpt-5.3-codex",
|
||||
upstream_id="gpt-5.3-codex",
|
||||
aliases=("gpt5.3-codex", "gpt-5.3-codex-latest"),
|
||||
allowed_efforts=frozenset(("low", "medium", "high", "xhigh")),
|
||||
variant_efforts=("xhigh", "high", "medium", "low"),
|
||||
uses_codex_instructions=True,
|
||||
),
|
||||
ModelSpec(
|
||||
public_id="gpt-5-codex",
|
||||
upstream_id="gpt-5-codex",
|
||||
aliases=("gpt5-codex", "gpt-5-codex-latest"),
|
||||
allowed_efforts=DEFAULT_REASONING_EFFORTS,
|
||||
variant_efforts=("high", "medium", "low"),
|
||||
uses_codex_instructions=True,
|
||||
),
|
||||
ModelSpec(
|
||||
public_id="gpt-5.2-codex",
|
||||
upstream_id="gpt-5.2-codex",
|
||||
aliases=("gpt5.2-codex", "gpt-5.2-codex-latest"),
|
||||
allowed_efforts=frozenset(("low", "medium", "high", "xhigh")),
|
||||
variant_efforts=("xhigh", "high", "medium", "low"),
|
||||
uses_codex_instructions=True,
|
||||
),
|
||||
ModelSpec(
|
||||
public_id="gpt-5.1-codex",
|
||||
upstream_id="gpt-5.1-codex",
|
||||
aliases=(),
|
||||
allowed_efforts=frozenset(("low", "medium", "high")),
|
||||
variant_efforts=("high", "medium", "low"),
|
||||
uses_codex_instructions=True,
|
||||
),
|
||||
ModelSpec(
|
||||
public_id="gpt-5.1-codex-max",
|
||||
upstream_id="gpt-5.1-codex-max",
|
||||
aliases=(),
|
||||
allowed_efforts=frozenset(("low", "medium", "high", "xhigh")),
|
||||
variant_efforts=("xhigh", "high", "medium", "low"),
|
||||
uses_codex_instructions=True,
|
||||
),
|
||||
ModelSpec(
|
||||
public_id="gpt-5.1-codex-mini",
|
||||
upstream_id="gpt-5.1-codex-mini",
|
||||
aliases=(),
|
||||
allowed_efforts=frozenset(("low", "medium", "high")),
|
||||
variant_efforts=(),
|
||||
uses_codex_instructions=True,
|
||||
),
|
||||
ModelSpec(
|
||||
public_id="codex-mini",
|
||||
upstream_id="codex-mini-latest",
|
||||
aliases=("codex", "codex-mini-latest"),
|
||||
allowed_efforts=DEFAULT_REASONING_EFFORTS,
|
||||
variant_efforts=(),
|
||||
uses_codex_instructions=True,
|
||||
),
|
||||
)
|
||||
|
||||
_SPECS_BY_UPSTREAM = {spec.upstream_id: spec for spec in _MODEL_SPECS}
|
||||
_ALIASES = {}
|
||||
for _spec in _MODEL_SPECS:
|
||||
_ALIASES[_spec.public_id] = _spec.upstream_id
|
||||
for _alias in _spec.aliases:
|
||||
_ALIASES[_alias] = _spec.upstream_id
|
||||
|
||||
|
||||
def _strip_model_name(model: str | None) -> tuple[str, str | None]:
|
||||
if not isinstance(model, str):
|
||||
return "", None
|
||||
value = model.strip().lower()
|
||||
if not value:
|
||||
return "", None
|
||||
if ":" in value:
|
||||
base, maybe_effort = value.rsplit(":", 1)
|
||||
if maybe_effort in DEFAULT_REASONING_EFFORTS:
|
||||
return base, maybe_effort
|
||||
for separator in ("-", "_"):
|
||||
for effort in ALL_REASONING_EFFORTS:
|
||||
suffix = f"{separator}{effort}"
|
||||
if value.endswith(suffix):
|
||||
return value[: -len(suffix)], effort
|
||||
return value, None
|
||||
|
||||
|
||||
def model_spec_for_name(model: str | None) -> ModelSpec | None:
|
||||
base, _ = _strip_model_name(model)
|
||||
upstream_id = _ALIASES.get(base)
|
||||
if not upstream_id:
|
||||
return None
|
||||
return _SPECS_BY_UPSTREAM.get(upstream_id)
|
||||
|
||||
|
||||
def normalize_model_name(model: str | None, debug_model: str | None = None) -> str:
|
||||
if isinstance(debug_model, str) and debug_model.strip():
|
||||
return debug_model.strip()
|
||||
spec = model_spec_for_name(model)
|
||||
if spec is not None:
|
||||
return spec.upstream_id
|
||||
base, _ = _strip_model_name(model)
|
||||
return base or "gpt-5.4"
|
||||
|
||||
|
||||
def uses_codex_instructions(model: str | None) -> bool:
|
||||
spec = model_spec_for_name(model)
|
||||
if spec is not None:
|
||||
return spec.uses_codex_instructions
|
||||
return "codex" in ((model or "").strip().lower())
|
||||
|
||||
|
||||
def allowed_efforts_for_model(model: str | None) -> frozenset[str]:
|
||||
spec = model_spec_for_name(model)
|
||||
if spec is not None:
|
||||
return spec.allowed_efforts
|
||||
return DEFAULT_REASONING_EFFORTS
|
||||
|
||||
|
||||
def extract_reasoning_from_model_name(model: str | None) -> dict[str, str] | None:
|
||||
_, effort = _strip_model_name(model)
|
||||
if not effort:
|
||||
return None
|
||||
return {"effort": effort}
|
||||
|
||||
|
||||
def list_public_models(expose_reasoning_models: bool = False) -> list[str]:
|
||||
model_ids: list[str] = []
|
||||
for spec in _MODEL_SPECS:
|
||||
model_ids.append(spec.public_id)
|
||||
if expose_reasoning_models:
|
||||
model_ids.extend(f"{spec.public_id}-{effort}" for effort in spec.variant_efforts)
|
||||
return model_ids
|
||||
|
||||
|
||||
def iter_public_models() -> Iterable[ModelSpec]:
|
||||
return _MODEL_SPECS
|
||||
@@ -152,7 +152,7 @@ class OAuthHTTPServer(http.server.HTTPServer):
|
||||
"requested_token": "openai-api-key",
|
||||
"subject_token": token_data.id_token,
|
||||
"subject_token_type": "urn:ietf:params:oauth:token-type:id_token",
|
||||
"name": f"ChatGPT Local [auto-generated] ({today})",
|
||||
"name": f"ChatMock [auto-generated] ({today})",
|
||||
}
|
||||
).encode()
|
||||
|
||||
@@ -311,7 +311,7 @@ class OAuthHandler(http.server.BaseHTTPRequestHandler):
|
||||
"requested_token": "openai-api-key",
|
||||
"subject_token": token_data.id_token,
|
||||
"subject_token_type": "urn:ietf:params:oauth:token-type:id_token",
|
||||
"name": f"ChatGPT Local [auto-generated] ({today})",
|
||||
"name": f"ChatMock [auto-generated] ({today})",
|
||||
}
|
||||
).encode()
|
||||
|
||||
|
||||
@@ -1,27 +1,8 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, Set
|
||||
from typing import Any, Dict
|
||||
|
||||
|
||||
DEFAULT_REASONING_EFFORTS: Set[str] = {"minimal", "low", "medium", "high", "xhigh", "none"}
|
||||
|
||||
|
||||
def allowed_efforts_for_model(model: str | None) -> Set[str]:
|
||||
base = (model or "").strip().lower()
|
||||
if not base:
|
||||
return DEFAULT_REASONING_EFFORTS
|
||||
normalized = base.split(":", 1)[0]
|
||||
if normalized.startswith("gpt-5.4"):
|
||||
return {"none", "low", "medium", "high", "xhigh"}
|
||||
if normalized.startswith("gpt-5.3"):
|
||||
return {"low", "medium", "high", "xhigh"}
|
||||
if normalized.startswith("gpt-5.2"):
|
||||
return {"low", "medium", "high", "xhigh"}
|
||||
if normalized.startswith("gpt-5.1-codex-max"):
|
||||
return {"low", "medium", "high", "xhigh"}
|
||||
if normalized.startswith("gpt-5.1"):
|
||||
return {"low", "medium", "high"}
|
||||
return DEFAULT_REASONING_EFFORTS
|
||||
from .model_registry import DEFAULT_REASONING_EFFORTS, allowed_efforts_for_model, extract_reasoning_from_model_name
|
||||
|
||||
|
||||
def build_reasoning_param(
|
||||
@@ -29,7 +10,7 @@ def build_reasoning_param(
|
||||
base_summary: str = "auto",
|
||||
overrides: Dict[str, Any] | None = None,
|
||||
*,
|
||||
allowed_efforts: Set[str] | None = None,
|
||||
allowed_efforts: frozenset[str] | None = None,
|
||||
) -> Dict[str, Any]:
|
||||
effort = (base_effort or "").strip().lower()
|
||||
summary = (base_summary or "").strip().lower()
|
||||
@@ -96,34 +77,3 @@ def apply_reasoning_to_message(
|
||||
if isinstance(content_text, str):
|
||||
message["content"] = think_block + (content_text or "")
|
||||
return message
|
||||
|
||||
|
||||
def extract_reasoning_from_model_name(model: str | None) -> Dict[str, Any] | None:
|
||||
"""Infer reasoning overrides from a model."""
|
||||
if not isinstance(model, str) or not model:
|
||||
return None
|
||||
s = model.strip().lower()
|
||||
if not s:
|
||||
return None
|
||||
efforts = {"minimal", "low", "medium", "high", "xhigh", "none"}
|
||||
|
||||
if ":" in s:
|
||||
maybe = s.rsplit(":", 1)[-1].strip()
|
||||
if maybe in efforts:
|
||||
return {"effort": maybe}
|
||||
|
||||
for sep in ("-", "_"):
|
||||
if s.endswith(sep + "minimal"):
|
||||
return {"effort": "minimal"}
|
||||
if s.endswith(sep + "none"):
|
||||
return {"effort": "none"}
|
||||
if s.endswith(sep + "low"):
|
||||
return {"effort": "low"}
|
||||
if s.endswith(sep + "medium"):
|
||||
return {"effort": "medium"}
|
||||
if s.endswith(sep + "high"):
|
||||
return {"effort": "high"}
|
||||
if s.endswith(sep + "xhigh"):
|
||||
return {"effort": "xhigh"}
|
||||
|
||||
return None
|
||||
|
||||
@@ -10,6 +10,7 @@ from flask import Blueprint, Response, current_app, jsonify, make_response, requ
|
||||
from .config import BASE_INSTRUCTIONS, GPT5_CODEX_INSTRUCTIONS
|
||||
from .limits import record_rate_limits_from_response
|
||||
from .http import build_cors_headers
|
||||
from .model_registry import list_public_models, uses_codex_instructions
|
||||
from .reasoning import (
|
||||
allowed_efforts_for_model,
|
||||
build_reasoning_param,
|
||||
@@ -71,7 +72,7 @@ def ollama_version() -> Response:
|
||||
|
||||
def _instructions_for_model(model: str) -> str:
|
||||
base = current_app.config.get("BASE_INSTRUCTIONS", BASE_INSTRUCTIONS)
|
||||
if "codex" in (model or "").lower():
|
||||
if uses_codex_instructions(model):
|
||||
codex = current_app.config.get("GPT5_CODEX_INSTRUCTIONS") or GPT5_CODEX_INSTRUCTIONS
|
||||
if isinstance(codex, str) and codex.strip():
|
||||
return codex
|
||||
@@ -93,58 +94,7 @@ def ollama_tags() -> Response:
|
||||
if bool(current_app.config.get("VERBOSE")):
|
||||
print("IN GET /api/tags")
|
||||
expose_variants = bool(current_app.config.get("EXPOSE_REASONING_MODELS"))
|
||||
model_ids = [
|
||||
"gpt-5",
|
||||
"gpt-5.1",
|
||||
"gpt-5.2",
|
||||
"gpt-5.4",
|
||||
"gpt-5.3-codex",
|
||||
"gpt-5-codex",
|
||||
"gpt-5.2-codex",
|
||||
"gpt-5.1-codex",
|
||||
"gpt-5.1-codex-max",
|
||||
"gpt-5.1-codex-mini",
|
||||
"codex-mini",
|
||||
]
|
||||
if expose_variants:
|
||||
model_ids.extend(
|
||||
[
|
||||
"gpt-5-high",
|
||||
"gpt-5-medium",
|
||||
"gpt-5-low",
|
||||
"gpt-5-minimal",
|
||||
"gpt-5.1-high",
|
||||
"gpt-5.1-medium",
|
||||
"gpt-5.1-low",
|
||||
"gpt-5.4-xhigh",
|
||||
"gpt-5.4-high",
|
||||
"gpt-5.4-medium",
|
||||
"gpt-5.4-low",
|
||||
"gpt-5.4-none",
|
||||
"gpt-5.2-xhigh",
|
||||
"gpt-5.2-high",
|
||||
"gpt-5.2-medium",
|
||||
"gpt-5.2-low",
|
||||
"gpt-5-codex-high",
|
||||
"gpt-5-codex-medium",
|
||||
"gpt-5-codex-low",
|
||||
"gpt-5.2-codex-xhigh",
|
||||
"gpt-5.2-codex-high",
|
||||
"gpt-5.2-codex-medium",
|
||||
"gpt-5.2-codex-low",
|
||||
"gpt-5.3-codex-xhigh",
|
||||
"gpt-5.3-codex-high",
|
||||
"gpt-5.3-codex-medium",
|
||||
"gpt-5.3-codex-low",
|
||||
"gpt-5.1-codex-high",
|
||||
"gpt-5.1-codex-medium",
|
||||
"gpt-5.1-codex-low",
|
||||
"gpt-5.1-codex-max-xhigh",
|
||||
"gpt-5.1-codex-max-high",
|
||||
"gpt-5.1-codex-max-medium",
|
||||
"gpt-5.1-codex-max-low",
|
||||
]
|
||||
)
|
||||
model_ids = list_public_models(expose_reasoning_models=expose_variants)
|
||||
models = []
|
||||
for model_id in model_ids:
|
||||
models.append(
|
||||
|
||||
@@ -9,6 +9,7 @@ from flask import Blueprint, Response, current_app, jsonify, make_response, requ
|
||||
from .config import BASE_INSTRUCTIONS, GPT5_CODEX_INSTRUCTIONS
|
||||
from .limits import record_rate_limits_from_response
|
||||
from .http import build_cors_headers
|
||||
from .model_registry import list_public_models, uses_codex_instructions
|
||||
from .reasoning import (
|
||||
allowed_efforts_for_model,
|
||||
apply_reasoning_to_message,
|
||||
@@ -59,7 +60,7 @@ def _wrap_stream_logging(label: str, iterator, enabled: bool):
|
||||
|
||||
def _instructions_for_model(model: str) -> str:
|
||||
base = current_app.config.get("BASE_INSTRUCTIONS", BASE_INSTRUCTIONS)
|
||||
if "codex" in (model or "").lower():
|
||||
if uses_codex_instructions(model):
|
||||
codex = current_app.config.get("GPT5_CODEX_INSTRUCTIONS") or GPT5_CODEX_INSTRUCTIONS
|
||||
if isinstance(codex, str) and codex.strip():
|
||||
return codex
|
||||
@@ -531,24 +532,7 @@ def completions() -> Response:
|
||||
@openai_bp.route("/v1/models", methods=["GET"])
|
||||
def list_models() -> Response:
|
||||
expose_variants = bool(current_app.config.get("EXPOSE_REASONING_MODELS"))
|
||||
model_groups = [
|
||||
("gpt-5", ["high", "medium", "low", "minimal"]),
|
||||
("gpt-5.1", ["high", "medium", "low"]),
|
||||
("gpt-5.2", ["xhigh", "high", "medium", "low"]),
|
||||
("gpt-5.4", ["xhigh", "high", "medium", "low", "none"]),
|
||||
("gpt-5.3-codex", ["xhigh", "high", "medium", "low"]),
|
||||
("gpt-5-codex", ["high", "medium", "low"]),
|
||||
("gpt-5.2-codex", ["xhigh", "high", "medium", "low"]),
|
||||
("gpt-5.1-codex", ["high", "medium", "low"]),
|
||||
("gpt-5.1-codex-max", ["xhigh", "high", "medium", "low"]),
|
||||
("gpt-5.1-codex-mini", []),
|
||||
("codex-mini", []),
|
||||
]
|
||||
model_ids: List[str] = []
|
||||
for base, efforts in model_groups:
|
||||
model_ids.append(base)
|
||||
if expose_variants:
|
||||
model_ids.extend([f"{base}-{effort}" for effort in efforts])
|
||||
model_ids = list_public_models(expose_reasoning_models=expose_variants)
|
||||
data = [{"id": mid, "object": "model", "owned_by": "owner"} for mid in model_ids]
|
||||
models = {"object": "list", "data": data}
|
||||
resp = make_response(jsonify(models), 200)
|
||||
|
||||
@@ -9,6 +9,7 @@ from flask import Response, current_app, jsonify, make_response
|
||||
|
||||
from .config import CHATGPT_RESPONSES_URL
|
||||
from .http import build_cors_headers
|
||||
from .model_registry import normalize_model_name
|
||||
from .session import ensure_session_id
|
||||
from flask import request as flask_request
|
||||
from .utils import get_effective_chatgpt_auth
|
||||
@@ -23,50 +24,6 @@ def _log_json(prefix: str, payload: Any) -> None:
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def normalize_model_name(name: str | None, debug_model: str | None = None) -> str:
|
||||
if isinstance(debug_model, str) and debug_model.strip():
|
||||
return debug_model.strip()
|
||||
if not isinstance(name, str) or not name.strip():
|
||||
return "gpt-5"
|
||||
base = name.split(":", 1)[0].strip()
|
||||
for sep in ("-", "_"):
|
||||
lowered = base.lower()
|
||||
for effort in ("minimal", "low", "medium", "high", "xhigh"):
|
||||
suffix = f"{sep}{effort}"
|
||||
if lowered.endswith(suffix):
|
||||
base = base[: -len(suffix)]
|
||||
break
|
||||
mapping = {
|
||||
"gpt5": "gpt-5",
|
||||
"gpt-5-latest": "gpt-5",
|
||||
"gpt-5": "gpt-5",
|
||||
"gpt-5.1": "gpt-5.1",
|
||||
"gpt5.4": "gpt-5.4",
|
||||
"gpt-5.4": "gpt-5.4",
|
||||
"gpt-5.4-latest": "gpt-5.4",
|
||||
"gpt5.2": "gpt-5.2",
|
||||
"gpt-5.2": "gpt-5.2",
|
||||
"gpt-5.2-latest": "gpt-5.2",
|
||||
"gpt5.3-codex": "gpt-5.3-codex",
|
||||
"gpt-5.3-codex": "gpt-5.3-codex",
|
||||
"gpt-5.3-codex-latest": "gpt-5.3-codex",
|
||||
"gpt5.2-codex": "gpt-5.2-codex",
|
||||
"gpt-5.2-codex": "gpt-5.2-codex",
|
||||
"gpt-5.2-codex-latest": "gpt-5.2-codex",
|
||||
"gpt5-codex": "gpt-5-codex",
|
||||
"gpt-5-codex": "gpt-5-codex",
|
||||
"gpt-5-codex-latest": "gpt-5-codex",
|
||||
"gpt-5.1-codex": "gpt-5.1-codex",
|
||||
"gpt-5.1-codex-max": "gpt-5.1-codex-max",
|
||||
"codex": "codex-mini-latest",
|
||||
"codex-mini": "codex-mini-latest",
|
||||
"codex-mini-latest": "codex-mini-latest",
|
||||
"gpt-5.1-codex-mini": "gpt-5.1-codex-mini",
|
||||
}
|
||||
return mapping.get(base, base)
|
||||
|
||||
|
||||
def start_upstream_request(
|
||||
model: str,
|
||||
input_items: List[Dict[str, Any]],
|
||||
|
||||
4
chatmock/version.py
Normal file
4
chatmock/version.py
Normal file
@@ -0,0 +1,4 @@
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
__version__ = "1.35"
|
||||
Reference in New Issue
Block a user