implemented caching! should increase the usage available

This commit is contained in:
Game_Time
2025-08-19 22:43:40 +05:00
parent 2ed6dbe2d4
commit 8129b62083
2 changed files with 104 additions and 1 deletions

89
chatmock/session.py Normal file
View File

@@ -0,0 +1,89 @@
from __future__ import annotations
import hashlib
import json
import threading
import uuid
from typing import Any, Dict, List, Tuple
_LOCK = threading.Lock()
_FINGERPRINT_TO_UUID: Dict[str, str] = {}
_ORDER: List[str] = []
_MAX_ENTRIES = 10000
def _canonicalize_first_user_message(input_items: List[Dict[str, Any]]) -> Dict[str, Any] | None:
"""
Extract the first stable user message from Responses input items. Good use for a fingerprint for prompt caching.
"""
for item in input_items:
if not isinstance(item, dict):
continue
if item.get("type") != "message":
continue
role = item.get("role")
if role != "user":
continue
content = item.get("content")
if not isinstance(content, list):
continue
norm_content = []
for part in content:
if not isinstance(part, dict):
continue
ptype = part.get("type")
if ptype == "input_text":
text = part.get("text") if isinstance(part.get("text"), str) else ""
if text:
norm_content.append({"type": "input_text", "text": text})
elif ptype == "input_image":
url = part.get("image_url") if isinstance(part.get("image_url"), str) else None
if url:
norm_content.append({"type": "input_image", "image_url": url})
if norm_content:
return {"type": "message", "role": "user", "content": norm_content}
return None
def canonicalize_prefix(instructions: str | None, input_items: List[Dict[str, Any]]) -> str:
prefix: Dict[str, Any] = {}
if isinstance(instructions, str) and instructions.strip():
prefix["instructions"] = instructions.strip()
first_user = _canonicalize_first_user_message(input_items)
if first_user is not None:
prefix["first_user_message"] = first_user
return json.dumps(prefix, sort_keys=True, separators=(",", ":"))
def _fingerprint(s: str) -> str:
return hashlib.sha256(s.encode("utf-8")).hexdigest()
def _remember(fp: str, sid: str) -> None:
if fp in _FINGERPRINT_TO_UUID:
return
_FINGERPRINT_TO_UUID[fp] = sid
_ORDER.append(fp)
if len(_ORDER) > _MAX_ENTRIES:
oldest = _ORDER.pop(0)
_FINGERPRINT_TO_UUID.pop(oldest, None)
def ensure_session_id(
instructions: str | None,
input_items: List[Dict[str, Any]],
client_supplied: str | None = None,
) -> str:
if isinstance(client_supplied, str) and client_supplied.strip():
return client_supplied.strip()
canon = canonicalize_prefix(instructions, input_items)
fp = _fingerprint(canon)
with _LOCK:
if fp in _FINGERPRINT_TO_UUID:
return _FINGERPRINT_TO_UUID[fp]
sid = str(uuid.uuid4())
_remember(fp, sid)
return sid

View File

@@ -9,6 +9,8 @@ from flask import Response, jsonify, make_response
from .config import CHATGPT_RESPONSES_URL from .config import CHATGPT_RESPONSES_URL
from .http import build_cors_headers from .http import build_cors_headers
from .session import ensure_session_id
from flask import request as flask_request
from .utils import get_effective_chatgpt_auth from .utils import get_effective_chatgpt_auth
@@ -59,6 +61,17 @@ def start_upstream_request(
if isinstance(reasoning_param, dict) and reasoning_param.get("effort") != "none": if isinstance(reasoning_param, dict) and reasoning_param.get("effort") != "none":
include.append("reasoning.encrypted_content") include.append("reasoning.encrypted_content")
client_session_id = None
try:
client_session_id = (
flask_request.headers.get("X-Session-Id")
or flask_request.headers.get("session_id")
or None
)
except Exception:
client_session_id = None
session_id = ensure_session_id(instructions, input_items, client_session_id)
responses_payload = { responses_payload = {
"model": model, "model": model,
"instructions": instructions if isinstance(instructions, str) and instructions.strip() else instructions, "instructions": instructions if isinstance(instructions, str) and instructions.strip() else instructions,
@@ -69,6 +82,7 @@ def start_upstream_request(
"store": False, "store": False,
"stream": True, "stream": True,
"include": include, "include": include,
"prompt_cache_key": session_id,
} }
if reasoning_param is not None: if reasoning_param is not None:
@@ -80,6 +94,7 @@ def start_upstream_request(
"Accept": "text/event-stream", "Accept": "text/event-stream",
"chatgpt-account-id": account_id, "chatgpt-account-id": account_id,
"OpenAI-Beta": "responses=experimental", "OpenAI-Beta": "responses=experimental",
"session_id": session_id,
} }
try: try:
@@ -96,4 +111,3 @@ def start_upstream_request(
resp.headers.setdefault(k, v) resp.headers.setdefault(k, v)
return None, resp return None, resp
return upstream, None return upstream, None