diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..1f7e645
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,9 @@
+__pycache__/
+*.py[cod]
+.pytest_cache/
+.venv/
+venv/
+ENV/
+build/
+dist/
+*.egg-info/
diff --git a/README.md b/README.md
index 8e922e1..b8e0432 100644
--- a/README.md
+++ b/README.md
@@ -114,19 +114,37 @@ GPT-5 has a configurable amount of "effort" it can put into thinking, which may
- `--reasoning-summary` (choice of auto,concise,detailed,none)
Models like GPT-5 do not return raw thinking content, but instead return thinking summaries. These can also be customised by you.
+### OpenAI Tools
+
+You can also access OpenAI tools through this project. Currently, only web search is available.
+You can enable it by starting the server with `--enable-web-search`, which will allow OpenAI to determine when a request requires a web search, or you can use the following parameters during a request to enable web search:
+
+- `responses_tools`: supports `[{"type":"web_search"}]` / `{ "type": "web_search_preview" }`
+- `responses_tool_choice`: `"auto"` or `"none"`
+
+### Example usage
+```json
+{
+ "model": "gpt-5",
+ "messages": [{"role":"user","content":"Find current METAR rules"}],
+ "stream": true,
+ "responses_tools": [{"type": "web_search"}],
+ "responses_tool_choice": "auto"
+}
+```
+
## Notes
-If you wish to have the fastest responses, I'd recommend setting `--reasoning-effort` to low, and `--reasoning-summary` to none.
+If you wish to have the fastest responses, I'd recommend setting `--reasoning-effort` to minimal, and `--reasoning-summary` to none.
All parameters and choices can be seen by sending `python chatmock.py serve --h`
-The context size of this route is also larger than what you get access to in the regular ChatGPT app.
+The context size of this route is also larger than what you get access to in the regular ChatGPT app.
-**When the model returns a thinking summary, the model will send back thinking tags to make it compatible with chat apps. If you don't like this behavior, you can instead set `--reasoning-compat` to legacy, and reasoning will be set in the reasoning tag instead of being returned in the actual response text.**
+When the model returns a thinking summary, the model will send back thinking tags to make it compatible with chat apps. **If you don't like this behavior, you can instead set `--reasoning-compat` to legacy, and reasoning will be set in the reasoning tag instead of being returned in the actual response text.**
-# TODO
-- ~~Implement Ollama support~~ ✅
-- Explore to see if we can make more model settings accessible
-- Implement analytics (token counting, etc, to track usage)
## Star History
[](https://www.star-history.com/#RayBytes/ChatMock&Timeline)
+
+
+
diff --git a/chatmock/app.py b/chatmock/app.py
index 5499e59..c69bd05 100644
--- a/chatmock/app.py
+++ b/chatmock/app.py
@@ -15,6 +15,7 @@ def create_app(
reasoning_compat: str = "think-tags",
debug_model: str | None = None,
expose_reasoning_models: bool = False,
+ default_web_search: bool = False,
) -> Flask:
app = Flask(__name__)
@@ -26,6 +27,7 @@ def create_app(
DEBUG_MODEL=debug_model,
BASE_INSTRUCTIONS=BASE_INSTRUCTIONS,
EXPOSE_REASONING_MODELS=bool(expose_reasoning_models),
+ DEFAULT_WEB_SEARCH=bool(default_web_search),
)
@app.get("/")
diff --git a/chatmock/cli.py b/chatmock/cli.py
index 7daa0fa..9280d42 100644
--- a/chatmock/cli.py
+++ b/chatmock/cli.py
@@ -96,6 +96,7 @@ def cmd_serve(
reasoning_compat: str,
debug_model: str | None,
expose_reasoning_models: bool,
+ default_web_search: bool,
) -> int:
app = create_app(
verbose=verbose,
@@ -104,6 +105,7 @@ def cmd_serve(
reasoning_compat=reasoning_compat,
debug_model=debug_model,
expose_reasoning_models=expose_reasoning_models,
+ default_web_search=default_web_search,
)
app.run(host=host, debug=False, use_reloader=False, port=port, threaded=True)
@@ -158,6 +160,11 @@ def main() -> None:
"This allows choosing effort via model selection in compatible UIs."
),
)
+ p_serve.add_argument(
+ "--enable-web-search",
+ action="store_true",
+ help="Enable default web_search tool when a request omits responses_tools (off by default)",
+ )
p_info = sub.add_parser("info", help="Print current stored tokens and derived account id")
p_info.add_argument("--json", action="store_true", help="Output raw auth.json contents")
@@ -177,6 +184,7 @@ def main() -> None:
reasoning_compat=args.reasoning_compat,
debug_model=args.debug_model,
expose_reasoning_models=args.expose_reasoning_models,
+ default_web_search=args.enable_web_search,
)
)
elif args.command == "info":
@@ -218,3 +226,4 @@ def main() -> None:
if __name__ == "__main__":
main()
+
diff --git a/chatmock/routes_ollama.py b/chatmock/routes_ollama.py
index a6ae950..3f4ad6e 100644
--- a/chatmock/routes_ollama.py
+++ b/chatmock/routes_ollama.py
@@ -147,12 +147,42 @@ def ollama_chat() -> Response:
tool_choice = payload.get("tool_choice", "auto")
parallel_tool_calls = bool(payload.get("parallel_tool_calls", False))
+ # Passthrough Responses API tools (web_search) via ChatMock extension fields
+ extra_tools: List[Dict[str, Any]] = []
+ had_responses_tools = False
+ rt_payload = payload.get("responses_tools") if isinstance(payload.get("responses_tools"), list) else []
+ if isinstance(rt_payload, list):
+ for _t in rt_payload:
+ if not (isinstance(_t, dict) and isinstance(_t.get("type"), str)):
+ continue
+ if _t.get("type") not in ("web_search", "web_search_preview"):
+ return jsonify({"error": "Only web_search/web_search_preview are supported in responses_tools"}), 400
+ extra_tools.append(_t)
+ if not extra_tools and bool(current_app.config.get("DEFAULT_WEB_SEARCH")):
+ rtc = payload.get("responses_tool_choice")
+ if not (isinstance(rtc, str) and rtc == "none"):
+ extra_tools = [{"type": "web_search"}]
+ if extra_tools:
+ import json as _json
+ MAX_TOOLS_BYTES = 32768
+ try:
+ size = len(_json.dumps(extra_tools))
+ except Exception:
+ size = 0
+ if size > MAX_TOOLS_BYTES:
+ return jsonify({"error": "responses_tools too large"}), 400
+ had_responses_tools = True
+ tools_responses = (tools_responses or []) + extra_tools
+
+ rtc = payload.get("responses_tool_choice")
+ if isinstance(rtc, str) and rtc in ("auto", "none"):
+ tool_choice = rtc
+
if not isinstance(model, str) or not isinstance(messages, list) or not messages:
return jsonify({"error": "Invalid request format"}), 400
input_items = convert_chat_messages_to_responses_input(messages)
- # Infer effort from model variant (gpt-5-high, etc.) but send base model upstream
model_reasoning = extract_reasoning_from_model_name(model)
upstream, error_resp = start_upstream_request(
normalize_model_name(model),
@@ -171,12 +201,34 @@ def ollama_chat() -> Response:
err_body = json.loads(upstream.content.decode("utf-8", errors="ignore")) if upstream.content else {"raw": upstream.text}
except Exception:
err_body = {"raw": upstream.text}
- if verbose:
- print("/api/chat upstream error status=", upstream.status_code, " body:", json.dumps(err_body)[:2000])
- return (
- jsonify({"error": (err_body.get("error", {}) or {}).get("message", "Upstream error")}),
- upstream.status_code,
- )
+ if had_responses_tools:
+ if verbose:
+ print("[Passthrough] Upstream rejected tools; retrying without extras (args redacted)")
+ base_tools_only = convert_tools_chat_to_responses(normalize_ollama_tools(tools_req))
+ safe_choice = payload.get("tool_choice", "auto")
+ upstream2, err2 = start_upstream_request(
+ normalize_model_name(model),
+ input_items,
+ instructions=BASE_INSTRUCTIONS,
+ tools=base_tools_only,
+ tool_choice=safe_choice,
+ parallel_tool_calls=parallel_tool_calls,
+ reasoning_param=build_reasoning_param(reasoning_effort, reasoning_summary, model_reasoning),
+ )
+ if err2 is None and upstream2 is not None and upstream2.status_code < 400:
+ upstream = upstream2
+ else:
+ return (
+ jsonify({"error": {"message": (err_body.get("error", {}) or {}).get("message", "Upstream error"), "code": "RESPONSES_TOOLS_REJECTED"}}),
+ (upstream2.status_code if upstream2 is not None else upstream.status_code),
+ )
+ else:
+ if verbose:
+ print("/api/chat upstream error status=", upstream.status_code, " body:", json.dumps(err_body)[:2000])
+ return (
+ jsonify({"error": (err_body.get("error", {}) or {}).get("message", "Upstream error")}),
+ upstream.status_code,
+ )
created_at = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
model_out = model if isinstance(model, str) and model.strip() else normalize_model_name(model)
diff --git a/chatmock/routes_openai.py b/chatmock/routes_openai.py
index fc0b711..2aa1f08 100644
--- a/chatmock/routes_openai.py
+++ b/chatmock/routes_openai.py
@@ -70,6 +70,47 @@ def chat_completions() -> Response:
tools_responses = convert_tools_chat_to_responses(payload.get("tools"))
tool_choice = payload.get("tool_choice", "auto")
parallel_tool_calls = bool(payload.get("parallel_tool_calls", False))
+ responses_tools_payload = payload.get("responses_tools") if isinstance(payload.get("responses_tools"), list) else []
+ extra_tools: List[Dict[str, Any]] = []
+ had_responses_tools = False
+ if isinstance(responses_tools_payload, list):
+ for _t in responses_tools_payload:
+ if not (isinstance(_t, dict) and isinstance(_t.get("type"), str)):
+ continue
+ if _t.get("type") not in ("web_search", "web_search_preview"):
+ return (
+ jsonify(
+ {
+ "error": {
+ "message": "Only web_search/web_search_preview are supported in responses_tools",
+ "code": "RESPONSES_TOOL_UNSUPPORTED",
+ }
+ }
+ ),
+ 400,
+ )
+ extra_tools.append(_t)
+
+ if not extra_tools and bool(current_app.config.get("DEFAULT_WEB_SEARCH")):
+ responses_tool_choice = payload.get("responses_tool_choice")
+ if not (isinstance(responses_tool_choice, str) and responses_tool_choice == "none"):
+ extra_tools = [{"type": "web_search"}]
+
+ if extra_tools:
+ import json as _json
+ MAX_TOOLS_BYTES = 32768
+ try:
+ size = len(_json.dumps(extra_tools))
+ except Exception:
+ size = 0
+ if size > MAX_TOOLS_BYTES:
+ return jsonify({"error": {"message": "responses_tools too large", "code": "RESPONSES_TOOLS_TOO_LARGE"}}), 400
+ had_responses_tools = True
+ tools_responses = (tools_responses or []) + extra_tools
+
+ responses_tool_choice = payload.get("responses_tool_choice")
+ if isinstance(responses_tool_choice, str) and responses_tool_choice in ("auto", "none"):
+ tool_choice = responses_tool_choice
input_items = convert_chat_messages_to_responses_input(messages)
if not input_items and isinstance(payload.get("prompt"), str) and payload.get("prompt").strip():
@@ -100,12 +141,41 @@ def chat_completions() -> Response:
err_body = json.loads(raw.decode("utf-8", errors="ignore")) if raw else {"raw": upstream.text}
except Exception:
err_body = {"raw": upstream.text}
- if verbose:
- print("Upstream error status=", upstream.status_code, " body:", json.dumps(err_body)[:2000])
- return (
- jsonify({"error": {"message": (err_body.get("error", {}) or {}).get("message", "Upstream error")}}),
- upstream.status_code,
- )
+ if had_responses_tools:
+ if verbose:
+ print("[Passthrough] Upstream rejected tools; retrying without extra tools (args redacted)")
+ base_tools_only = convert_tools_chat_to_responses(payload.get("tools"))
+ safe_choice = payload.get("tool_choice", "auto")
+ upstream2, err2 = start_upstream_request(
+ model,
+ input_items,
+ instructions=BASE_INSTRUCTIONS,
+ tools=base_tools_only,
+ tool_choice=safe_choice,
+ parallel_tool_calls=parallel_tool_calls,
+ reasoning_param=reasoning_param,
+ )
+ if err2 is None and upstream2 is not None and upstream2.status_code < 400:
+ upstream = upstream2
+ else:
+ return (
+ jsonify(
+ {
+ "error": {
+ "message": (err_body.get("error", {}) or {}).get("message", "Upstream error"),
+ "code": "RESPONSES_TOOLS_REJECTED",
+ }
+ }
+ ),
+ (upstream2.status_code if upstream2 is not None else upstream.status_code),
+ )
+ else:
+ if verbose:
+ print("Upstream error status=", upstream.status_code)
+ return (
+ jsonify({"error": {"message": (err_body.get("error", {}) or {}).get("message", "Upstream error")}}),
+ upstream.status_code,
+ )
if is_stream:
resp = Response(
@@ -371,3 +441,4 @@ def list_models() -> Response:
for k, v in build_cors_headers().items():
resp.headers.setdefault(k, v)
return resp
+
diff --git a/chatmock/utils.py b/chatmock/utils.py
index 091c626..6c344be 100644
--- a/chatmock/utils.py
+++ b/chatmock/utils.py
@@ -250,6 +250,9 @@ def sse_translate_chat(
saw_any_summary = False
pending_summary_paragraph = False
upstream_usage = None
+ ws_state: dict[str, Any] = {}
+ ws_index: dict[str, int] = {}
+ ws_next_index: int = 0
def _extract_usage(evt: Dict[str, Any]) -> Dict[str, int] | None:
try:
@@ -284,6 +287,86 @@ def sse_translate_chat(
if isinstance(evt.get("response"), dict) and isinstance(evt["response"].get("id"), str):
response_id = evt["response"].get("id") or response_id
+ if isinstance(kind, str) and ("web_search_call" in kind):
+ try:
+ call_id = evt.get("item_id") or "ws_call"
+ if verbose and vlog:
+ try:
+ vlog(f"CM_TOOLS {kind} id={call_id} -> tool_calls(web_search)")
+ except Exception:
+ pass
+ item = evt.get('item') if isinstance(evt.get('item'), dict) else {}
+ params_dict = ws_state.setdefault(call_id, {}) if isinstance(ws_state.get(call_id), dict) else {}
+ def _merge_from(src):
+ if not isinstance(src, dict):
+ return
+ for whole in ('parameters','args','arguments','input'):
+ if isinstance(src.get(whole), dict):
+ params_dict.update(src.get(whole))
+ if isinstance(src.get('query'), str): params_dict.setdefault('query', src.get('query'))
+ if isinstance(src.get('q'), str): params_dict.setdefault('query', src.get('q'))
+ for rk in ('recency','time_range','days'):
+ if src.get(rk) is not None and rk not in params_dict: params_dict[rk] = src.get(rk)
+ for dk in ('domains','include_domains','include'):
+ if isinstance(src.get(dk), list) and 'domains' not in params_dict: params_dict['domains'] = src.get(dk)
+ for mk in ('max_results','topn','limit'):
+ if src.get(mk) is not None and 'max_results' not in params_dict: params_dict['max_results'] = src.get(mk)
+ _merge_from(item)
+ _merge_from(evt if isinstance(evt, dict) else None)
+ params = params_dict if params_dict else None
+ if isinstance(params, dict):
+ try:
+ ws_state.setdefault(call_id, {}).update(params)
+ except Exception:
+ pass
+ eff_params = ws_state.get(call_id, params if isinstance(params, (dict, list, str)) else {})
+ if isinstance(eff_params, (dict, list)):
+ args_str = json.dumps(eff_params)
+ elif isinstance(eff_params, str):
+ args_str = json.dumps({"query": eff_params})
+ else:
+ args_str = "{}"
+ if call_id not in ws_index:
+ ws_index[call_id] = ws_next_index
+ ws_next_index += 1
+ _idx = ws_index.get(call_id, 0)
+ delta_chunk = {
+ "id": response_id,
+ "object": "chat.completion.chunk",
+ "created": created,
+ "model": model,
+ "choices": [
+ {
+ "index": 0,
+ "delta": {
+ "tool_calls": [
+ {
+ "index": _idx,
+ "id": call_id,
+ "type": "function",
+ "function": {"name": "web_search", "arguments": args_str},
+ }
+ ]
+ },
+ "finish_reason": None,
+ }
+ ],
+ }
+ yield f"data: {json.dumps(delta_chunk)}\n\n".encode("utf-8")
+ if kind.endswith(".completed") or kind.endswith(".done"):
+ finish_chunk = {
+ "id": response_id,
+ "object": "chat.completion.chunk",
+ "created": created,
+ "model": model,
+ "choices": [
+ {"index": 0, "delta": {}, "finish_reason": "tool_calls"}
+ ],
+ }
+ yield f"data: {json.dumps(finish_chunk)}\n\n".encode("utf-8")
+ except Exception:
+ pass
+
if kind == "response.output_text.delta":
delta = evt.get("delta") or ""
if compat == "think-tags" and think_open and not think_closed:
@@ -308,10 +391,34 @@ def sse_translate_chat(
yield f"data: {json.dumps(chunk)}\n\n".encode("utf-8")
elif kind == "response.output_item.done":
item = evt.get("item") or {}
- if isinstance(item, dict) and item.get("type") == "function_call":
+ if isinstance(item, dict) and (item.get("type") == "function_call" or item.get("type") == "web_search_call"):
call_id = item.get("call_id") or item.get("id") or ""
- name = item.get("name") or ""
- args = item.get("arguments") or ""
+ name = item.get("name") or ("web_search" if item.get("type") == "web_search_call" else "")
+ raw_args = item.get("arguments") or item.get("parameters")
+ if isinstance(raw_args, dict):
+ try:
+ ws_state.setdefault(call_id, {}).update(raw_args)
+ except Exception:
+ pass
+ eff_args = ws_state.get(call_id, raw_args if isinstance(raw_args, (dict, list, str)) else {})
+ try:
+ if isinstance(eff_args, (dict, list)):
+ args = json.dumps(eff_args)
+ elif isinstance(eff_args, str):
+ args = json.dumps({"query": eff_args})
+ else:
+ args = "{}"
+ except Exception:
+ args = "{}"
+ if item.get("type") == "web_search_call" and verbose and vlog:
+ try:
+ vlog(f"CM_TOOLS response.output_item.done web_search_call id={call_id} has_args={bool(args)}")
+ except Exception:
+ pass
+ if call_id not in ws_index:
+ ws_index[call_id] = ws_next_index
+ ws_next_index += 1
+ _idx = ws_index.get(call_id, 0)
if isinstance(call_id, str) and isinstance(name, str) and isinstance(args, str):
delta_chunk = {
"id": response_id,
@@ -324,7 +431,7 @@ def sse_translate_chat(
"delta": {
"tool_calls": [
{
- "index": 0,
+ "index": _idx,
"id": call_id,
"type": "function",
"function": {"name": name, "arguments": args},
@@ -573,3 +680,4 @@ def sse_translate_text(upstream, model: str, created: int, verbose: bool = False
break
finally:
upstream.close()
+