Add GPT-5-Codex support (#37)

2025-09-16 14:58:41 +02:00
parent 2f23cd5a89
commit 77d60fe321
10 changed files with 189 additions and 45 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,9 +1,21 @@
+# Python bytecode
 __pycache__/
 *.py[cod]
-.pytest_cache/
+*$py.class
+
+# Virtual environments
+.env/
 .venv/
 venv/
-ENV/
+
+# Packaging artifacts
 build/
 dist/
 *.egg-info/
+
+# Tool caches
+.pytest_cache/
+.mypy_cache/
+
+# OS clutter
+.DS_Store
--- a/DOCKER.md
+++ b/DOCKER.md
@@ -36,5 +36,5 @@ Set `VERBOSE=true` to include extra logging for debugging issues in upstream or
 ```
 curl -s http://localhost:8000/v1/chat/completions \
   -H 'Content-Type: application/json' \
-   -d '{"model":"gpt-5","messages":[{"role":"user","content":"Hello world!"}]}' | jq .
+   -d '{"model":"gpt-5-codex","messages":[{"role":"user","content":"Hello world!"}]}' | jq .
 ```
--- a/README.md
+++ b/README.md
@@ -7,7 +7,7 @@

 ## What It Does

-ChatMock runs a local server that creates an OpenAI/Ollama compatible API, and requests are then fulfilled using your authenticated ChatGPT login with the oauth client of Codex, OpenAI's coding CLI tool. This allows you to use GPT-5 and other models right through your OpenAI account, without requiring an api key.
+ChatMock runs a local server that creates an OpenAI/Ollama compatible API, and requests are then fulfilled using your authenticated ChatGPT login with the oauth client of Codex, OpenAI's coding CLI tool. This allows you to use GPT-5, GPT-5-Codex, and other models right through your OpenAI account, without requiring an api key.
 This does require a paid ChatGPT account.

 ## Quickstart
@@ -100,6 +100,7 @@ curl http://127.0.0.1:8000/v1/chat/completions \

 # Supported models
 - `gpt-5`
+- `gpt-5-codex`
 - `codex-mini`

 # Customisation / Configuration
--- a/chatmock/app.py
+++ b/chatmock/app.py
@@ -2,7 +2,7 @@ from __future__ import annotations

 from flask import Flask, jsonify

-from .config import BASE_INSTRUCTIONS
+from .config import BASE_INSTRUCTIONS, GPT5_CODEX_INSTRUCTIONS
 from .http import build_cors_headers
 from .routes_openai import openai_bp
 from .routes_ollama import ollama_bp
@@ -26,6 +26,7 @@ def create_app(
        REASONING_COMPAT=reasoning_compat,
        DEBUG_MODEL=debug_model,
        BASE_INSTRUCTIONS=BASE_INSTRUCTIONS,
+        GPT5_CODEX_INSTRUCTIONS=GPT5_CODEX_INSTRUCTIONS,
        EXPOSE_REASONING_MODELS=bool(expose_reasoning_models),
        DEFAULT_WEB_SEARCH=bool(default_web_search),
    )
--- a/chatmock/config.py
+++ b/chatmock/config.py
@@ -10,26 +10,37 @@ CLIENT_ID_DEFAULT = os.getenv("CHATGPT_LOCAL_CLIENT_ID") or "app_EMoamEEZ73f0CkX
 CHATGPT_RESPONSES_URL = "https://chatgpt.com/backend-api/codex/responses"


-def read_base_instructions() -> str:
+def _read_prompt_text(filename: str) -> str | None:
    candidates = [
-        Path(__file__).parent.parent / "prompt.md",
-        Path(__file__).parent / "prompt.md",
-        Path(getattr(sys, "_MEIPASS", "")) / "prompt.md" if getattr(sys, "_MEIPASS", None) else None,
-        Path.cwd() / "prompt.md",
+        Path(__file__).parent.parent / filename,
+        Path(__file__).parent / filename,
+        Path(getattr(sys, "_MEIPASS", "")) / filename if getattr(sys, "_MEIPASS", None) else None,
+        Path.cwd() / filename,
    ]
-    for p in candidates:
-        if not p:
+    for candidate in candidates:
+        if not candidate:
            continue
        try:
-            if p.exists():
-                content = p.read_text(encoding="utf-8")
+            if candidate.exists():
+                content = candidate.read_text(encoding="utf-8")
                if isinstance(content, str) and content.strip():
                    return content
        except Exception:
            continue
-    raise FileNotFoundError(
-        "Failed to read prompt.md; expected adjacent to package or CWD."
-    )
+    return None
+
+
+def read_base_instructions() -> str:
+    content = _read_prompt_text("prompt.md")
+    if content is None:
+        raise FileNotFoundError("Failed to read prompt.md; expected adjacent to package or CWD.")
+    return content
+
+
+def read_gpt5_codex_instructions(fallback: str) -> str:
+    content = _read_prompt_text("prompt_gpt5_codex.md")
+    return content if isinstance(content, str) and content.strip() else fallback


 BASE_INSTRUCTIONS = read_base_instructions()
+GPT5_CODEX_INSTRUCTIONS = read_gpt5_codex_instructions(BASE_INSTRUCTIONS)
--- a/chatmock/routes_ollama.py
+++ b/chatmock/routes_ollama.py
@@ -7,7 +7,7 @@ from typing import Any, Dict, List

 from flask import Blueprint, Response, current_app, jsonify, make_response, request, stream_with_context

-from .config import BASE_INSTRUCTIONS
+from .config import BASE_INSTRUCTIONS, GPT5_CODEX_INSTRUCTIONS
 from .http import build_cors_headers
 from .reasoning import build_reasoning_param, extract_reasoning_from_model_name
 from .transform import convert_ollama_messages, normalize_ollama_tools
@@ -18,6 +18,15 @@ from .utils import convert_chat_messages_to_responses_input, convert_tools_chat_
 ollama_bp = Blueprint("ollama", __name__)


+def _instructions_for_model(model: str) -> str:
+    base = current_app.config.get("BASE_INSTRUCTIONS", BASE_INSTRUCTIONS)
+    if model == "gpt-5-codex":
+        codex = current_app.config.get("GPT5_CODEX_INSTRUCTIONS") or GPT5_CODEX_INSTRUCTIONS
+        if isinstance(codex, str) and codex.strip():
+            return codex
+    return base
+
+
 _OLLAMA_FAKE_EVAL = {
    "total_duration": 8497226791,
    "load_duration": 1747193958,
@@ -33,19 +42,19 @@ def ollama_tags() -> Response:
    if bool(current_app.config.get("VERBOSE")):
        print("IN GET /api/tags")
    expose_variants = bool(current_app.config.get("EXPOSE_REASONING_MODELS"))
-    model_ids = [
-        "gpt-5",
-        *(
+    model_ids = ["gpt-5", "gpt-5-codex"]
+    if expose_variants:
+        model_ids.extend(
            [
                "gpt-5-high",
                "gpt-5-medium",
                "gpt-5-low",
                "gpt-5-minimal",
+                "gpt-5-codex-high",
+                "gpt-5-codex-medium",
+                "gpt-5-codex-low",
            ]
-            if expose_variants
-            else []
-        ),
-    ]
+        )
    models = []
    for model_id in model_ids:
        models.append(
@@ -184,10 +193,11 @@ def ollama_chat() -> Response:
    input_items = convert_chat_messages_to_responses_input(messages)

    model_reasoning = extract_reasoning_from_model_name(model)
+    normalized_model = normalize_model_name(model)
    upstream, error_resp = start_upstream_request(
-        normalize_model_name(model),
+        normalized_model,
        input_items,
-        instructions=BASE_INSTRUCTIONS,
+        instructions=_instructions_for_model(normalized_model),
        tools=tools_responses,
        tool_choice=tool_choice,
        parallel_tool_calls=parallel_tool_calls,
@@ -231,7 +241,7 @@ def ollama_chat() -> Response:
            )

    created_at = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
-    model_out = model if isinstance(model, str) and model.strip() else normalize_model_name(model)
+    model_out = model if isinstance(model, str) and model.strip() else normalized_model

    if stream_req:
        def _gen():
--- a/chatmock/routes_openai.py
+++ b/chatmock/routes_openai.py
@@ -6,7 +6,7 @@ from typing import Any, Dict, List

 from flask import Blueprint, Response, current_app, jsonify, make_response, request

-from .config import BASE_INSTRUCTIONS
+from .config import BASE_INSTRUCTIONS, GPT5_CODEX_INSTRUCTIONS
 from .http import build_cors_headers
 from .reasoning import apply_reasoning_to_message, build_reasoning_param, extract_reasoning_from_model_name
 from .upstream import normalize_model_name, start_upstream_request
@@ -21,6 +21,15 @@ from .utils import (
 openai_bp = Blueprint("openai", __name__)


+def _instructions_for_model(model: str) -> str:
+    base = current_app.config.get("BASE_INSTRUCTIONS", BASE_INSTRUCTIONS)
+    if model == "gpt-5-codex":
+        codex = current_app.config.get("GPT5_CODEX_INSTRUCTIONS") or GPT5_CODEX_INSTRUCTIONS
+        if isinstance(codex, str) and codex.strip():
+            return codex
+    return base
+
+
@openai_bp.route("/v1/chat/completions", methods=["POST"])
 def chat_completions() -> Response:
    verbose = bool(current_app.config.get("VERBOSE"))
@@ -125,7 +134,7 @@ def chat_completions() -> Response:
    upstream, error_resp = start_upstream_request(
        model,
        input_items,
-        instructions=BASE_INSTRUCTIONS,
+        instructions=_instructions_for_model(model),
        tools=tools_responses,
        tool_choice=tool_choice,
        parallel_tool_calls=parallel_tool_calls,
@@ -327,7 +336,7 @@ def completions() -> Response:
    upstream, error_resp = start_upstream_request(
        model,
        input_items,
-        instructions=BASE_INSTRUCTIONS,
+        instructions=_instructions_for_model(model),
        reasoning_param=reasoning_param,
    )
    if error_resp is not None:
@@ -424,18 +433,16 @@ def completions() -> Response:
@openai_bp.route("/v1/models", methods=["GET"])
 def list_models() -> Response:
    expose_variants = bool(current_app.config.get("EXPOSE_REASONING_MODELS"))
-    data = []
-    if expose_variants:
-        variant_ids = [
-            "gpt-5",
-            "gpt-5-high",
-            "gpt-5-medium",
-            "gpt-5-low",
-            "gpt-5-minimal",
-        ]
-        data = [{"id": mid, "object": "model", "owned_by": "owner"} for mid in variant_ids]
-    else:
-        data = [{"id": "gpt-5", "object": "model", "owned_by": "owner"}]
+    model_groups = [
+        ("gpt-5", ["high", "medium", "low", "minimal"]),
+        ("gpt-5-codex", ["high", "medium", "low"]),
+    ]
+    model_ids: List[str] = []
+    for base, efforts in model_groups:
+        model_ids.append(base)
+        if expose_variants:
+            model_ids.extend([f"{base}-{effort}" for effort in efforts])
+    data = [{"id": mid, "object": "model", "owned_by": "owner"} for mid in model_ids]
    models = {"object": "list", "data": data}
    resp = make_response(jsonify(models), 200)
    for k, v in build_cors_headers().items():
--- a/chatmock/upstream.py
+++ b/chatmock/upstream.py
@@ -31,6 +31,9 @@ def normalize_model_name(name: str | None, debug_model: str | None = None) -> st
        "gpt5": "gpt-5",
        "gpt-5-latest": "gpt-5",
        "gpt-5": "gpt-5",
+        "gpt5-codex": "gpt-5-codex",
+        "gpt-5-codex": "gpt-5-codex",
+        "gpt-5-codex-latest": "gpt-5-codex",
        "codex": "codex-mini-latest",
        "codex-mini": "codex-mini-latest",
        "codex-mini-latest": "codex-mini-latest",
--- a/gui.py
+++ b/gui.py
@@ -481,4 +481,3 @@ def main() -> None:

 if __name__ == "__main__":
    main()
-
--- a/prompt_gpt5_codex.md
+++ b/prompt_gpt5_codex.md
@@ -0,0 +1,100 @@
+You are Codex, based on GPT-5. You are running as a coding agent in the Codex CLI on a user's computer.
+
+## General
+
+- The arguments to `shell` will be passed to execvp(). Most terminal commands should be prefixed with ["bash", "-lc"].
+- Always set the `workdir` param when using the shell function. Do not use `cd` unless absolutely necessary.
+- When searching for text or files, prefer using `rg` or `rg --files` respectively because `rg` is much faster than alternatives like `grep`. (If the `rg` command is not found, then use alternatives.)
+
+## Editing constraints
+
+- Default to ASCII when editing or creating files. Only introduce non-ASCII or other Unicode characters when there is a clear justification and the file already uses them.
+- Add succinct code comments that explain what is going on if code is not self-explanatory. You should not add comments like "Assigns the value to the variable", but a brief comment might be useful ahead of a complex code block that the user would otherwise have to spend time parsing out. Usage of these comments should be rare.
+- You may be in a dirty git worktree.
+    * NEVER revert existing changes you did not make unless explicitly requested, since these changes were made by the user.
+    * If asked to make a commit or code edits and there are unrelated changes to your work or changes that you didn't make in those files, don't revert those changes.
+    * If the changes are in files you've touched recently, you should read carefully and understand how you can work with the changes rather than reverting them.
+    * If the changes are in unrelated files, just ignore them and don't revert them.
+- While you are working, you might notice unexpected changes that you didn't make. If this happens, STOP IMMEDIATELY and ask the user how they would like to proceed.
+
+## Plan tool
+
+When using the planning tool:
+- Skip using the planning tool for straightforward tasks (roughly the easiest 25%).
+- Do not make single-step plans.
+- When you made a plan, update it after having performed one of the sub-tasks that you shared on the plan.
+
+## Codex CLI harness, sandboxing, and approvals
+
+The Codex CLI harness supports several different sandboxing, and approval configurations that the user can choose from.
+
+Filesystem sandboxing defines which files can be read or written. The options are:
+- **read-only**: You can only read files.
+- **workspace-write**: You can read files. You can write to files in this folder, but not outside it.
+- **danger-full-access**: No filesystem sandboxing.
+
+Network sandboxing defines whether network can be accessed without approval. Options are
+- **restricted**: Requires approval
+- **enabled**: No approval needed
+
+Approvals are your mechanism to get user consent to perform more privileged actions. Although they introduce friction to the user because your work is paused until the user responds, you should leverage them to accomplish your important work. Do not let these settings or the sandbox deter you from attempting to accomplish the user's task unless it is set to "never", in which case never ask for approvals.
+
+Approval options are
+- **untrusted**: The harness will escalate most commands for user approval, apart from a limited allowlist of safe "read" commands.
+- **on-failure**: The harness will allow all commands to run in the sandbox (if enabled), and failures will be escalated to the user for approval to run again without the sandbox.
+- **on-request**: Commands will be run in the sandbox by default, and you can specify in your tool call if you want to escalate a command to run without sandboxing. (Note that this mode is not always available. If it is, you'll see parameters for it in the `shell` command description.)
+- **never**: This is a non-interactive mode where you may NEVER ask the user for approval to run commands. Instead, you must always persist and work around constraints to solve the task for the user. You MUST do your utmost best to finish the task and validate your work before yielding. If this mode is paired with `danger-full-access`, take advantage of it to deliver the best outcome for the user. Further, in this mode, your default testing philosophy is overridden: Even if you don't see local patterns for testing, you may add tests and scripts to validate your work. Just remove them before yielding.
+
+When you are running with approvals `on-request`, and sandboxing enabled, here are scenarios where you'll need to request approval:
+- You need to run a command that writes to a directory that requires it (e.g. running tests that write to /tmp)
+- You need to run a GUI app (e.g., open/xdg-open/osascript) to open browsers or files.
+- You are running sandboxed and need to run a command that requires network access (e.g. installing packages)
+- If you run a command that is important to solving the user's query, but it fails because of sandboxing, rerun the command with approval.
+- You are about to take a potentially destructive action such as an `rm` or `git reset` that the user did not explicitly ask for
+- (for all of these, you should weigh alternative paths that do not require approval)
+
+When sandboxing is set to read-only, you'll need to request approval for any command that isn't a read.
+
+You will be told what filesystem sandboxing, network sandboxing, and approval mode are active in a developer or user message. If you are not told about this, assume that you are running with workspace-write, network sandboxing enabled, and approval on-failure.
+
+## Special user requests
+
+- If the user makes a simple request (such as asking for the time) which you can fulfill by running a terminal command (such as `date`), you should do so.
+- If the user asks for a "review", default to a code review mindset: prioritise identifying bugs, risks, behavioural regressions, and missing tests. Findings must be the primary focus of the response - keep summaries or overviews brief and only after enumerating the issues. Present findings first (ordered by severity with file/line references), follow with open questions or assumptions, and offer a change-summary only as a secondary detail. If no findings are discovered, state that explicitly and mention any residual risks or testing gaps.
+
+## Presenting your work and final message
+
+You are producing plain text that will later be styled by the CLI. Follow these rules exactly. Formatting should make results easy to scan, but not feel mechanical. Use judgment to decide how much structure adds value.
+
+- Default: be very concise; friendly coding teammate tone.
+- Ask only when needed; suggest ideas; mirror the user's style.
+- For substantial work, summarize clearly; follow final‑answer formatting.
+- Skip heavy formatting for simple confirmations.
+- Don't dump large files you've written; reference paths only.
+- No "save/copy this file" - User is on the same machine.
+- Offer logical next steps (tests, commits, build) briefly; add verify steps if you couldn't do something.
+- For code changes:
+  * Lead with a quick explanation of the change, and then give more details on the context covering where and why a change was made. Do not start this explanation with "summary", just jump right in.
+  * If there are natural next steps the user may want to take, suggest them at the end of your response. Do not make suggestions if there are no natural next steps.
+  * When suggesting multiple options, use numeric lists for the suggestions so the user can quickly respond with a single number.
+- The user does not command execution outputs. When asked to show the output of a command (e.g. `git show`), relay the important details in your answer or summarize the key lines so the user understands the result.
+
+### Final answer structure and style guidelines
+
+- Plain text; CLI handles styling. Use structure only when it helps scanability.
+- Headers: optional; short Title Case (1-3 words) wrapped in **…**; no blank line before the first bullet; add only if they truly help.
+- Bullets: use - ; merge related points; keep to one line when possible; 4–6 per list ordered by importance; keep phrasing consistent.
+- Monospace: backticks for commands/paths/env vars/code ids and inline examples; use for literal keyword bullets; never combine with **.
+- Code samples or multi-line snippets should be wrapped in fenced code blocks; add a language hint whenever obvious.
+- Structure: group related bullets; order sections general → specific → supporting; for subsections, start with a bolded keyword bullet, then items; match complexity to the task.
+- Tone: collaborative, concise, factual; present tense, active voice; self‑contained; no "above/below"; parallel wording.
+- Don'ts: no nested bullets/hierarchies; no ANSI codes; don't cram unrelated keywords; keep keyword lists short—wrap/reformat if long; avoid naming formatting styles in answers.
+- Adaptation: code explanations → precise, structured with code refs; simple tasks → lead with outcome; big changes → logical walkthrough + rationale + next actions; casual one-offs → plain sentences, no headers/bullets.
+- File References: When referencing files in your response, make sure to include the relevant start line and always follow the below rules:
+  * Use inline code to make file paths clickable.
+  * Each reference should have a stand alone path. Even if it's the same file.
+  * Accepted: absolute, workspace‑relative, a/ or b/ diff prefixes, or bare filename/suffix.
+  * Line/column (1‑based, optional): :line[:column] or #Lline[Ccolumn] (column defaults to 1).
+  * Do not use URIs like file://, vscode://, or https://.
+  * Do not provide range of lines
+  * Examples: src/app.ts, src/app.ts:42, b/server/index.js#L10, C:\repo\project\main.rs:12:5