From 2ed6dbe2d4eb887bb890af1832696dacf84e13d5 Mon Sep 17 00:00:00 2001 From: Game_Time <108236317+RayBytes@users.noreply.github.com> Date: Tue, 19 Aug 2025 21:26:47 +0500 Subject: [PATCH] fix ollama regression --- chatmock/routes_ollama.py | 123 +++++++++++++++++++++++++++++++++++--- 1 file changed, 114 insertions(+), 9 deletions(-) diff --git a/chatmock/routes_ollama.py b/chatmock/routes_ollama.py index f19ed36..00311b5 100644 --- a/chatmock/routes_ollama.py +++ b/chatmock/routes_ollama.py @@ -1,10 +1,11 @@ from __future__ import annotations import json +import datetime import time from typing import Any, Dict, List -from flask import Blueprint, Response, current_app, jsonify, make_response, request +from flask import Blueprint, Response, current_app, jsonify, make_response, request, stream_with_context from .config import BASE_INSTRUCTIONS from .http import build_cors_headers @@ -160,7 +161,8 @@ def ollama_chat() -> Response: upstream.status_code, ) - created_at = str(int(time.time() * 1000)) + created_at = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ") + model_out = normalize_model_name(model) if stream_req: def _gen(): @@ -169,6 +171,7 @@ def ollama_chat() -> Response: think_closed = False saw_any_summary = False pending_summary_paragraph = False + full_parts: List[str] = [] try: for raw_line in upstream.iter_lines(decode_unicode=False): if not raw_line: @@ -196,31 +199,134 @@ def ollama_chat() -> Response: delta_txt = evt.get("delta") or "" if compat == "o3": if kind == "response.reasoning_summary_text.delta" and pending_summary_paragraph: - yield json.dumps({"message": {"role": "assistant", "content": "\n"}}) + "\n" + yield ( + json.dumps( + { + "model": model_out, + "created_at": created_at, + "message": {"role": "assistant", "content": "\n"}, + "done": False, + } + ) + + "\n" + ) + full_parts.append("\n") pending_summary_paragraph = False + if delta_txt: + yield ( + json.dumps( + { + "model": model_out, + "created_at": created_at, + "message": {"role": "assistant", "content": delta_txt}, + "done": False, + } + ) + + "\n" + ) + full_parts.append(delta_txt) elif compat == "think-tags": if not think_open and not think_closed: - yield json.dumps({"message": {"role": "assistant", "content": ""}}) + "\n" + yield ( + json.dumps( + { + "model": model_out, + "created_at": created_at, + "message": {"role": "assistant", "content": ""}, + "done": False, + } + ) + + "\n" + ) + full_parts.append("") think_open = True if think_open and not think_closed: if kind == "response.reasoning_summary_text.delta" and pending_summary_paragraph: - yield json.dumps({"message": {"role": "assistant", "content": "\n"}}) + "\n" + yield ( + json.dumps( + { + "model": model_out, + "created_at": created_at, + "message": {"role": "assistant", "content": "\n"}, + "done": False, + } + ) + + "\n" + ) + full_parts.append("\n") pending_summary_paragraph = False + if delta_txt: + yield ( + json.dumps( + { + "model": model_out, + "created_at": created_at, + "message": {"role": "assistant", "content": delta_txt}, + "done": False, + } + ) + + "\n" + ) + full_parts.append(delta_txt) else: pass elif kind == "response.output_text.delta": delta = evt.get("delta") or "" if compat == "think-tags" and think_open and not think_closed: - yield json.dumps({"message": {"role": "assistant", "content": ""}}) + "\n" + yield ( + json.dumps( + { + "model": model_out, + "created_at": created_at, + "message": {"role": "assistant", "content": ""}, + "done": False, + } + ) + + "\n" + ) + full_parts.append("") think_open = False think_closed = True - yield json.dumps({"message": {"role": "assistant", "content": delta}}) + "\n" + if delta: + yield ( + json.dumps( + { + "model": model_out, + "created_at": created_at, + "message": {"role": "assistant", "content": delta}, + "done": False, + } + ) + + "\n" + ) + full_parts.append(delta) elif kind == "response.completed": break finally: upstream.close() + if compat == "think-tags" and think_open and not think_closed: + yield ( + json.dumps( + { + "model": model_out, + "created_at": created_at, + "message": {"role": "assistant", "content": ""}, + "done": False, + } + ) + + "\n" + ) + full_parts.append("") + done_obj = { + "model": model_out, + "created_at": created_at, + "message": {"role": "assistant", "content": "".join(full_parts)}, + "done": True, + } + done_obj.update(_OLLAMA_FAKE_EVAL) + yield json.dumps(done_obj) + "\n" resp = current_app.response_class( - _gen(), + stream_with_context(_gen()), status=200, mimetype="application/x-ndjson", ) @@ -296,4 +402,3 @@ def ollama_chat() -> Response: for k, v in build_cors_headers().items(): resp.headers.setdefault(k, v) return resp -