Add performance features: caching, cost tracking, retry, compaction, classification, scrubbing

Inspired by zeroclaw's lightweight patterns for slow hardware:
- Response cache (SQLite + SHA-256 keyed) to skip redundant LLM calls
- History compaction — LLM-summarize old messages when history exceeds 50
- Query classifier routes simple/research queries to cheaper models
- Credential scrubbing removes secrets from tool output before sending to LLM
- Cost tracker with daily/monthly budget enforcement (SQLite)
- Resilient provider with retry + exponential backoff + fallback provider
- Approval engine gains session "always allow" and audit log

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Kaloyan Danchev
2026-02-19 09:20:52 +02:00
parent e24e3026b6
commit 872ed24f0c
10 changed files with 694 additions and 17 deletions

View File

@@ -2,6 +2,7 @@
from __future__ import annotations
import time
from enum import Enum
from typing import Any
@@ -15,7 +16,7 @@ class ApprovalPolicy(Enum):
class ApprovalEngine:
"""Deny-by-default tool approval."""
"""Deny-by-default tool approval with session allowlist and audit log."""
def __init__(
self,
@@ -26,6 +27,8 @@ class ApprovalEngine:
self._auto_approve = set(auto_approve or [])
self._require_approval = set(require_approval or [])
self._interactive = interactive
self._session_allowed: set[str] = set()
self._audit_log: list[dict[str, Any]] = []
def get_policy(self, tool_name: str) -> ApprovalPolicy:
"""Get the approval policy for a tool."""
@@ -43,27 +46,54 @@ class ApprovalEngine:
policy = self.get_policy(tool_name)
if policy == ApprovalPolicy.AUTO_APPROVE:
self._log_decision(tool_name, arguments, "auto_approved")
return True
# Session-scoped "always allow"
if tool_name in self._session_allowed:
self._log_decision(tool_name, arguments, "session_allowed")
return True
if policy == ApprovalPolicy.DENY:
logger.warning(f"Tool '{tool_name}' denied by policy")
self._log_decision(tool_name, arguments, "denied")
return False
# REQUIRE_APPROVAL
if not self._interactive:
logger.warning(f"Tool '{tool_name}' requires approval but running non-interactively — denied")
self._log_decision(tool_name, arguments, "denied_non_interactive")
return False
# In interactive mode, prompt the user
logger.info(f"Tool '{tool_name}' requires approval. Args: {arguments}")
return await self._prompt_user(tool_name, arguments)
approved, always = await self._prompt_user(tool_name, arguments)
if approved and always:
self._session_allowed.add(tool_name)
self._log_decision(tool_name, arguments, "user_approved" if approved else "user_denied")
return approved
async def _prompt_user(self, tool_name: str, arguments: dict[str, Any]) -> bool:
"""Prompt user for tool approval (interactive mode)."""
async def _prompt_user(self, tool_name: str, arguments: dict[str, Any]) -> tuple[bool, bool]:
"""Prompt user for tool approval. Returns (approved, always_allow)."""
print(f"\n[APPROVAL REQUIRED] Tool: {tool_name}")
print(f" Arguments: {arguments}")
try:
answer = input(" Allow? [y/N]: ").strip().lower()
return answer in ("y", "yes")
answer = input(" Allow? [y/N/a(lways)]: ").strip().lower()
if answer in ("a", "always"):
return True, True
return answer in ("y", "yes"), False
except (EOFError, KeyboardInterrupt):
return False
return False, False
def _log_decision(self, tool_name: str, arguments: dict[str, Any], decision: str) -> None:
"""Record an approval decision in the audit log."""
self._audit_log.append({
"tool": tool_name,
"arguments": arguments,
"decision": decision,
"timestamp": time.time(),
})
def get_audit_log(self) -> list[dict[str, Any]]:
"""Return the audit log for inspection."""
return list(self._audit_log)