hopefully patch token counting?

This commit is contained in:
Game_Time
2025-08-22 02:01:15 +05:00
parent c8c6540d23
commit cadd959778
20 changed files with 112 additions and 6 deletions

View File

@@ -239,6 +239,8 @@ def sse_translate_chat(
verbose: bool = False,
vlog=None,
reasoning_compat: str = "think-tags",
*,
include_usage: bool = False,
):
response_id = "chatcmpl-stream"
compat = (reasoning_compat or "think-tags").strip().lower()
@@ -247,6 +249,19 @@ def sse_translate_chat(
saw_output = False
saw_any_summary = False
pending_summary_paragraph = False
upstream_usage = None
def _extract_usage(evt: Dict[str, Any]) -> Dict[str, int] | None:
try:
usage = (evt.get("response") or {}).get("usage")
if not isinstance(usage, dict):
return None
pt = int(usage.get("input_tokens") or 0)
ct = int(usage.get("output_tokens") or 0)
tt = int(usage.get("total_tokens") or (pt + ct))
return {"prompt_tokens": pt, "completion_tokens": ct, "total_tokens": tt}
except Exception:
return None
try:
for raw in upstream.iter_lines(decode_unicode=False):
if not raw:
@@ -442,6 +457,9 @@ def sse_translate_chat(
chunk = {"error": {"message": err}}
yield f"data: {json.dumps(chunk)}\n\n".encode("utf-8")
elif kind == "response.completed":
m = _extract_usage(evt)
if m:
upstream_usage = m
if compat == "think-tags" and think_open and not think_closed:
close_chunk = {
"id": response_id,
@@ -453,14 +471,40 @@ def sse_translate_chat(
yield f"data: {json.dumps(close_chunk)}\n\n".encode("utf-8")
think_open = False
think_closed = True
if include_usage and upstream_usage:
try:
usage_chunk = {
"id": response_id,
"object": "chat.completion.chunk",
"created": created,
"model": model,
"choices": [{"index": 0, "delta": {}, "finish_reason": None}],
"usage": upstream_usage,
}
yield f"data: {json.dumps(usage_chunk)}\n\n".encode("utf-8")
except Exception:
pass
yield b"data: [DONE]\n\n"
break
finally:
upstream.close()
def sse_translate_text(upstream, model: str, created: int, verbose: bool = False, vlog=None):
def sse_translate_text(upstream, model: str, created: int, verbose: bool = False, vlog=None, *, include_usage: bool = False):
response_id = "cmpl-stream"
upstream_usage = None
def _extract_usage(evt: Dict[str, Any]) -> Dict[str, int] | None:
try:
usage = (evt.get("response") or {}).get("usage")
if not isinstance(usage, dict):
return None
pt = int(usage.get("input_tokens") or 0)
ct = int(usage.get("output_tokens") or 0)
tt = int(usage.get("total_tokens") or (pt + ct))
return {"prompt_tokens": pt, "completion_tokens": ct, "total_tokens": tt}
except Exception:
return None
try:
for raw_line in upstream.iter_lines(decode_unicode=False):
if not raw_line:
@@ -509,8 +553,23 @@ def sse_translate_text(upstream, model: str, created: int, verbose: bool = False
}
yield f"data: {json.dumps(chunk)}\n\n".encode("utf-8")
elif kind == "response.completed":
m = _extract_usage(evt)
if m:
upstream_usage = m
if include_usage and upstream_usage:
try:
usage_chunk = {
"id": response_id,
"object": "text_completion.chunk",
"created": created,
"model": model,
"choices": [{"index": 0, "text": "", "finish_reason": None}],
"usage": upstream_usage,
}
yield f"data: {json.dumps(usage_chunk)}\n\n".encode("utf-8")
except Exception:
pass
yield b"data: [DONE]\n\n"
break
finally:
upstream.close()