feat: add responses api, websocket support, and fast mode
This commit is contained in:
@@ -1,31 +1,56 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import socket
|
||||
import threading
|
||||
import time
|
||||
import unittest
|
||||
from unittest.mock import patch
|
||||
|
||||
from chatmock.app import create_app
|
||||
from chatmock.session import reset_session_state
|
||||
from websockets.sync.client import connect as ws_connect
|
||||
|
||||
|
||||
class FakeUpstream:
|
||||
def __init__(self, events: list[dict[str, object]], status_code: int = 200) -> None:
|
||||
def __init__(
|
||||
self,
|
||||
events: list[dict[str, object]] | None = None,
|
||||
*,
|
||||
status_code: int = 200,
|
||||
headers: dict[str, str] | None = None,
|
||||
content: bytes | None = None,
|
||||
text: str = "",
|
||||
) -> None:
|
||||
self._events = events
|
||||
self.status_code = status_code
|
||||
self.headers = {}
|
||||
self.content = b""
|
||||
self.text = ""
|
||||
self.headers = headers or {}
|
||||
self.content = content or b""
|
||||
self.text = text
|
||||
|
||||
def iter_lines(self, decode_unicode: bool = False):
|
||||
for event in self._events:
|
||||
for event in self._events or []:
|
||||
payload = f"data: {json.dumps(event)}"
|
||||
yield payload if decode_unicode else payload.encode("utf-8")
|
||||
|
||||
def iter_content(self, chunk_size=None):
|
||||
if self.content:
|
||||
yield self.content
|
||||
return
|
||||
for event in self._events or []:
|
||||
payload = f"data: {json.dumps(event)}\n\n".encode("utf-8")
|
||||
yield payload
|
||||
|
||||
def json(self):
|
||||
return json.loads(self.content.decode("utf-8"))
|
||||
|
||||
def close(self) -> None:
|
||||
return None
|
||||
|
||||
|
||||
class RouteTests(unittest.TestCase):
|
||||
def setUp(self) -> None:
|
||||
reset_session_state()
|
||||
self.app = create_app()
|
||||
self.client = self.app.test_client()
|
||||
|
||||
@@ -36,6 +61,7 @@ class RouteTests(unittest.TestCase):
|
||||
model_ids = [item["id"] for item in body["data"]]
|
||||
self.assertIn("gpt-5.4", model_ids)
|
||||
self.assertIn("gpt-5.4-mini", model_ids)
|
||||
self.assertIn("gpt-5.3-codex-spark", model_ids)
|
||||
|
||||
def test_ollama_tags_list(self) -> None:
|
||||
response = self.client.get("/api/tags")
|
||||
@@ -85,6 +111,443 @@ class RouteTests(unittest.TestCase):
|
||||
self.assertEqual(body["message"]["content"], "hello")
|
||||
self.assertEqual(body["model"], "gpt-5.4")
|
||||
|
||||
@patch("chatmock.routes_openai.start_upstream_request")
|
||||
def test_chat_completions_fast_mode_sets_priority_service_tier(self, mock_start) -> None:
|
||||
mock_start.return_value = (
|
||||
FakeUpstream(
|
||||
[
|
||||
{"type": "response.output_text.delta", "delta": "hello"},
|
||||
{"type": "response.completed", "response": {"id": "resp-openai"}},
|
||||
]
|
||||
),
|
||||
None,
|
||||
)
|
||||
response = self.client.post(
|
||||
"/v1/chat/completions",
|
||||
json={
|
||||
"model": "gpt-5.4",
|
||||
"fast_mode": True,
|
||||
"messages": [{"role": "user", "content": "hi"}],
|
||||
},
|
||||
)
|
||||
self.assertEqual(response.status_code, 200)
|
||||
self.assertEqual(mock_start.call_args.kwargs["service_tier"], "priority")
|
||||
|
||||
@patch("chatmock.routes_openai.start_upstream_request")
|
||||
def test_chat_completions_fast_mode_false_overrides_server_default(self, mock_start) -> None:
|
||||
app = create_app(fast_mode=True)
|
||||
client = app.test_client()
|
||||
mock_start.return_value = (
|
||||
FakeUpstream(
|
||||
[
|
||||
{"type": "response.output_text.delta", "delta": "hello"},
|
||||
{"type": "response.completed", "response": {"id": "resp-openai"}},
|
||||
]
|
||||
),
|
||||
None,
|
||||
)
|
||||
response = client.post(
|
||||
"/v1/chat/completions",
|
||||
json={
|
||||
"model": "gpt-5.4",
|
||||
"fast_mode": False,
|
||||
"messages": [{"role": "user", "content": "hi"}],
|
||||
},
|
||||
)
|
||||
self.assertEqual(response.status_code, 200)
|
||||
self.assertIsNone(mock_start.call_args.kwargs["service_tier"])
|
||||
|
||||
@patch("chatmock.routes_openai.start_upstream_request")
|
||||
def test_chat_completions_rejects_unsupported_explicit_fast_mode(self, mock_start) -> None:
|
||||
response = self.client.post(
|
||||
"/v1/chat/completions",
|
||||
json={
|
||||
"model": "gpt-5.3-codex",
|
||||
"fast_mode": True,
|
||||
"messages": [{"role": "user", "content": "hi"}],
|
||||
},
|
||||
)
|
||||
body = response.get_json()
|
||||
self.assertEqual(response.status_code, 400)
|
||||
self.assertIn("Fast mode is not supported", body["error"]["message"])
|
||||
mock_start.assert_not_called()
|
||||
|
||||
@patch("chatmock.routes_openai.start_upstream_raw_request")
|
||||
def test_responses_route_returns_completed_response_object(self, mock_start) -> None:
|
||||
mock_start.return_value = (
|
||||
FakeUpstream(
|
||||
[
|
||||
{
|
||||
"type": "response.created",
|
||||
"response": {"id": "resp_123", "object": "response", "status": "in_progress"},
|
||||
},
|
||||
{
|
||||
"type": "response.completed",
|
||||
"response": {
|
||||
"id": "resp_123",
|
||||
"object": "response",
|
||||
"status": "completed",
|
||||
"output": [],
|
||||
},
|
||||
},
|
||||
],
|
||||
headers={"Content-Type": "text/event-stream"},
|
||||
),
|
||||
None,
|
||||
)
|
||||
response = self.client.post(
|
||||
"/v1/responses",
|
||||
json={"model": "gpt5.4-mini", "input": "hello"},
|
||||
)
|
||||
body = response.get_json()
|
||||
self.assertEqual(response.status_code, 200)
|
||||
self.assertEqual(body["id"], "resp_123")
|
||||
outbound_payload = mock_start.call_args.args[0]
|
||||
self.assertEqual(outbound_payload["model"], "gpt-5.4-mini")
|
||||
self.assertEqual(outbound_payload["store"], False)
|
||||
self.assertEqual(
|
||||
outbound_payload["input"],
|
||||
[{"type": "message", "role": "user", "content": [{"type": "input_text", "text": "hello"}]}],
|
||||
)
|
||||
self.assertEqual(outbound_payload["reasoning"]["effort"], "medium")
|
||||
self.assertIsInstance(outbound_payload["prompt_cache_key"], str)
|
||||
|
||||
@patch("chatmock.routes_openai.start_upstream_raw_request")
|
||||
def test_responses_route_does_not_use_previous_response_id_for_http_follow_up(self, mock_start) -> None:
|
||||
mock_start.side_effect = [
|
||||
(
|
||||
FakeUpstream(
|
||||
[
|
||||
{
|
||||
"type": "response.created",
|
||||
"response": {"id": "resp_1", "object": "response", "status": "in_progress"},
|
||||
},
|
||||
{
|
||||
"type": "response.output_item.done",
|
||||
"item": {
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"id": "msg_1",
|
||||
"content": [{"type": "output_text", "text": "assistant output"}],
|
||||
},
|
||||
},
|
||||
{
|
||||
"type": "response.completed",
|
||||
"response": {"id": "resp_1", "object": "response", "status": "completed", "output": []},
|
||||
},
|
||||
],
|
||||
headers={"Content-Type": "text/event-stream"},
|
||||
),
|
||||
None,
|
||||
),
|
||||
(
|
||||
FakeUpstream(
|
||||
[
|
||||
{
|
||||
"type": "response.created",
|
||||
"response": {"id": "resp_2", "object": "response", "status": "in_progress"},
|
||||
},
|
||||
{
|
||||
"type": "response.completed",
|
||||
"response": {"id": "resp_2", "object": "response", "status": "completed", "output": []},
|
||||
},
|
||||
],
|
||||
headers={"Content-Type": "text/event-stream"},
|
||||
),
|
||||
None,
|
||||
),
|
||||
]
|
||||
|
||||
first = self.client.post("/v1/responses", json={"model": "gpt-5.4", "input": "hello"})
|
||||
second = self.client.post(
|
||||
"/v1/responses",
|
||||
json={
|
||||
"model": "gpt-5.4",
|
||||
"input": [
|
||||
{"type": "message", "role": "user", "content": [{"type": "input_text", "text": "hello"}]},
|
||||
{"type": "message", "role": "assistant", "id": "msg_1", "content": [{"type": "output_text", "text": "assistant output"}]},
|
||||
{"type": "message", "role": "user", "content": [{"type": "input_text", "text": "second"}]},
|
||||
],
|
||||
},
|
||||
)
|
||||
|
||||
self.assertEqual(first.status_code, 200)
|
||||
self.assertEqual(second.status_code, 200)
|
||||
outbound_payload = mock_start.call_args_list[1].args[0]
|
||||
self.assertNotIn("previous_response_id", outbound_payload)
|
||||
self.assertEqual(
|
||||
outbound_payload["input"],
|
||||
[
|
||||
{"type": "message", "role": "user", "content": [{"type": "input_text", "text": "hello"}]},
|
||||
{"type": "message", "role": "assistant", "id": "msg_1", "content": [{"type": "output_text", "text": "assistant output"}]},
|
||||
{"type": "message", "role": "user", "content": [{"type": "input_text", "text": "second"}]},
|
||||
],
|
||||
)
|
||||
|
||||
@patch("chatmock.routes_openai.start_upstream_raw_request")
|
||||
def test_responses_route_falls_back_to_full_create_when_non_input_fields_change(self, mock_start) -> None:
|
||||
mock_start.side_effect = [
|
||||
(
|
||||
FakeUpstream(
|
||||
[
|
||||
{
|
||||
"type": "response.created",
|
||||
"response": {"id": "resp_1", "object": "response", "status": "in_progress"},
|
||||
},
|
||||
{
|
||||
"type": "response.completed",
|
||||
"response": {"id": "resp_1", "object": "response", "status": "completed", "output": []},
|
||||
},
|
||||
],
|
||||
headers={"Content-Type": "text/event-stream"},
|
||||
),
|
||||
None,
|
||||
),
|
||||
(
|
||||
FakeUpstream(
|
||||
[
|
||||
{
|
||||
"type": "response.created",
|
||||
"response": {"id": "resp_2", "object": "response", "status": "in_progress"},
|
||||
},
|
||||
{
|
||||
"type": "response.completed",
|
||||
"response": {"id": "resp_2", "object": "response", "status": "completed", "output": []},
|
||||
},
|
||||
],
|
||||
headers={"Content-Type": "text/event-stream"},
|
||||
),
|
||||
None,
|
||||
),
|
||||
]
|
||||
|
||||
headers = {"X-Session-Id": "session-fixed"}
|
||||
first = self.client.post("/v1/responses", json={"model": "gpt-5.4", "input": "hello"}, headers=headers)
|
||||
second = self.client.post(
|
||||
"/v1/responses",
|
||||
json={
|
||||
"model": "gpt-5.4",
|
||||
"instructions": "changed",
|
||||
"input": [
|
||||
{"type": "message", "role": "user", "content": [{"type": "input_text", "text": "hello"}]},
|
||||
{"type": "message", "role": "user", "content": [{"type": "input_text", "text": "second"}]},
|
||||
],
|
||||
},
|
||||
headers=headers,
|
||||
)
|
||||
|
||||
self.assertEqual(first.status_code, 200)
|
||||
self.assertEqual(second.status_code, 200)
|
||||
outbound_payload = mock_start.call_args_list[1].args[0]
|
||||
self.assertNotIn("previous_response_id", outbound_payload)
|
||||
self.assertEqual(
|
||||
outbound_payload["input"],
|
||||
[
|
||||
{"type": "message", "role": "user", "content": [{"type": "input_text", "text": "hello"}]},
|
||||
{"type": "message", "role": "user", "content": [{"type": "input_text", "text": "second"}]},
|
||||
],
|
||||
)
|
||||
|
||||
@patch("chatmock.routes_openai.start_upstream_raw_request")
|
||||
def test_responses_route_clears_reuse_state_after_error(self, mock_start) -> None:
|
||||
mock_start.side_effect = [
|
||||
(
|
||||
FakeUpstream(
|
||||
[
|
||||
{"type": "response.created", "response": {"id": "resp_1"}},
|
||||
{"type": "response.completed", "response": {"id": "resp_1", "output": []}},
|
||||
],
|
||||
headers={"Content-Type": "text/event-stream"},
|
||||
),
|
||||
None,
|
||||
),
|
||||
(
|
||||
FakeUpstream(
|
||||
[
|
||||
{"type": "response.failed", "response": {"error": {"message": "boom"}}},
|
||||
],
|
||||
headers={"Content-Type": "text/event-stream"},
|
||||
),
|
||||
None,
|
||||
),
|
||||
(
|
||||
FakeUpstream(
|
||||
[
|
||||
{"type": "response.created", "response": {"id": "resp_3"}},
|
||||
{"type": "response.completed", "response": {"id": "resp_3", "output": []}},
|
||||
],
|
||||
headers={"Content-Type": "text/event-stream"},
|
||||
),
|
||||
None,
|
||||
),
|
||||
]
|
||||
|
||||
headers = {"X-Session-Id": "session-fixed"}
|
||||
first = self.client.post("/v1/responses", json={"model": "gpt-5.4", "input": "hello"}, headers=headers)
|
||||
second = self.client.post(
|
||||
"/v1/responses",
|
||||
json={
|
||||
"model": "gpt-5.4",
|
||||
"input": [
|
||||
{"type": "message", "role": "user", "content": [{"type": "input_text", "text": "hello"}]},
|
||||
{"type": "message", "role": "user", "content": [{"type": "input_text", "text": "second"}]},
|
||||
],
|
||||
},
|
||||
headers=headers,
|
||||
)
|
||||
third = self.client.post(
|
||||
"/v1/responses",
|
||||
json={
|
||||
"model": "gpt-5.4",
|
||||
"input": [
|
||||
{"type": "message", "role": "user", "content": [{"type": "input_text", "text": "hello"}]},
|
||||
{"type": "message", "role": "user", "content": [{"type": "input_text", "text": "second"}]},
|
||||
{"type": "message", "role": "user", "content": [{"type": "input_text", "text": "third"}]},
|
||||
],
|
||||
},
|
||||
headers=headers,
|
||||
)
|
||||
|
||||
self.assertEqual(first.status_code, 200)
|
||||
self.assertEqual(second.status_code, 502)
|
||||
self.assertEqual(third.status_code, 200)
|
||||
outbound_payload = mock_start.call_args_list[2].args[0]
|
||||
self.assertNotIn("previous_response_id", outbound_payload)
|
||||
self.assertEqual(
|
||||
outbound_payload["input"],
|
||||
[
|
||||
{"type": "message", "role": "user", "content": [{"type": "input_text", "text": "hello"}]},
|
||||
{"type": "message", "role": "user", "content": [{"type": "input_text", "text": "second"}]},
|
||||
{"type": "message", "role": "user", "content": [{"type": "input_text", "text": "third"}]},
|
||||
],
|
||||
)
|
||||
|
||||
@patch("chatmock.routes_openai.start_upstream_raw_request")
|
||||
def test_responses_route_stream_passthrough(self, mock_start) -> None:
|
||||
chunk = b'data: {"type":"response.output_text.delta","delta":"hello"}\n\n'
|
||||
mock_start.return_value = (
|
||||
FakeUpstream(
|
||||
headers={"Content-Type": "text/event-stream"},
|
||||
content=chunk,
|
||||
),
|
||||
None,
|
||||
)
|
||||
response = self.client.post(
|
||||
"/v1/responses",
|
||||
json={"model": "gpt-5.4", "input": "hello", "stream": True},
|
||||
)
|
||||
self.assertEqual(response.status_code, 200)
|
||||
self.assertIn("response.output_text.delta", response.get_data(as_text=True))
|
||||
|
||||
@patch("chatmock.routes_openai.start_upstream_raw_request")
|
||||
def test_responses_route_rejects_unsupported_explicit_priority(self, mock_start) -> None:
|
||||
response = self.client.post(
|
||||
"/v1/responses",
|
||||
json={"model": "gpt-5.3-codex", "input": "hello", "service_tier": "priority"},
|
||||
)
|
||||
body = response.get_json()
|
||||
self.assertEqual(response.status_code, 400)
|
||||
self.assertIn("Fast mode is not supported", body["error"]["message"])
|
||||
mock_start.assert_not_called()
|
||||
|
||||
@patch("chatmock.websocket_routes.get_effective_chatgpt_auth", return_value=("token", "acct"))
|
||||
@patch("chatmock.websocket_routes.connect_upstream_websocket")
|
||||
def test_responses_websocket_rewrites_response_create(self, mock_connect, _mock_auth) -> None:
|
||||
class FakeUpstreamWebsocket:
|
||||
def __init__(self) -> None:
|
||||
self.sent: list[str] = []
|
||||
self._messages = [
|
||||
json.dumps({"type": "response.created", "response": {"id": "resp_ws_1"}}),
|
||||
json.dumps({
|
||||
"type": "response.output_item.done",
|
||||
"item": {
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"id": "msg_1",
|
||||
"content": [{"type": "output_text", "text": "assistant output"}],
|
||||
},
|
||||
}),
|
||||
json.dumps({"type": "response.completed", "response": {"id": "resp_ws_1"}}),
|
||||
json.dumps({"type": "response.created", "response": {"id": "resp_ws_2"}}),
|
||||
json.dumps({"type": "response.completed", "response": {"id": "resp_ws_2"}}),
|
||||
]
|
||||
|
||||
def send(self, message: str) -> None:
|
||||
self.sent.append(message)
|
||||
|
||||
def recv(self) -> str:
|
||||
return self._messages.pop(0)
|
||||
|
||||
def close(self) -> None:
|
||||
return None
|
||||
|
||||
fake_upstream = FakeUpstreamWebsocket()
|
||||
mock_connect.return_value = fake_upstream
|
||||
|
||||
app = create_app()
|
||||
|
||||
sock = socket.socket()
|
||||
sock.bind(("127.0.0.1", 0))
|
||||
host, port = sock.getsockname()
|
||||
sock.close()
|
||||
|
||||
server_thread = threading.Thread(
|
||||
target=app.run,
|
||||
kwargs={
|
||||
"host": host,
|
||||
"port": port,
|
||||
"debug": False,
|
||||
"use_reloader": False,
|
||||
"threaded": True,
|
||||
},
|
||||
daemon=True,
|
||||
)
|
||||
server_thread.start()
|
||||
time.sleep(0.5)
|
||||
|
||||
with ws_connect(f"ws://{host}:{port}/v1/responses") as client:
|
||||
client.send(json.dumps({"type": "response.create", "model": "gpt-5.4", "input": "hello", "fast_mode": True}))
|
||||
first = json.loads(client.recv())
|
||||
assistant = json.loads(client.recv())
|
||||
second = json.loads(client.recv())
|
||||
client.send(
|
||||
json.dumps(
|
||||
{
|
||||
"type": "response.create",
|
||||
"model": "gpt-5.4",
|
||||
"fast_mode": True,
|
||||
"input": [
|
||||
{"type": "message", "role": "user", "content": [{"type": "input_text", "text": "hello"}]},
|
||||
{"type": "message", "role": "assistant", "id": "msg_1", "content": [{"type": "output_text", "text": "assistant output"}]},
|
||||
{"type": "message", "role": "user", "content": [{"type": "input_text", "text": "second"}]},
|
||||
],
|
||||
}
|
||||
)
|
||||
)
|
||||
third = json.loads(client.recv())
|
||||
fourth = json.loads(client.recv())
|
||||
|
||||
self.assertEqual(first["type"], "response.created")
|
||||
self.assertEqual(assistant["type"], "response.output_item.done")
|
||||
self.assertEqual(second["type"], "response.completed")
|
||||
self.assertEqual(third["type"], "response.created")
|
||||
self.assertEqual(fourth["type"], "response.completed")
|
||||
outbound = json.loads(fake_upstream.sent[0])
|
||||
self.assertEqual(outbound["model"], "gpt-5.4")
|
||||
self.assertEqual(outbound["service_tier"], "priority")
|
||||
self.assertEqual(outbound["type"], "response.create")
|
||||
self.assertEqual(
|
||||
outbound["input"],
|
||||
[{"type": "message", "role": "user", "content": [{"type": "input_text", "text": "hello"}]}],
|
||||
)
|
||||
self.assertIn("prompt_cache_key", outbound)
|
||||
follow_up = json.loads(fake_upstream.sent[1])
|
||||
self.assertEqual(follow_up["previous_response_id"], "resp_ws_1")
|
||||
self.assertEqual(
|
||||
follow_up["input"],
|
||||
[{"type": "message", "role": "user", "content": [{"type": "input_text", "text": "second"}]}],
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
||||
Reference in New Issue
Block a user