Add performance features: caching, cost tracking, retry, compaction, classification, scrubbing

Inspired by zeroclaw's lightweight patterns for slow hardware:
- Response cache (SQLite + SHA-256 keyed) to skip redundant LLM calls
- History compaction — LLM-summarize old messages when history exceeds 50
- Query classifier routes simple/research queries to cheaper models
- Credential scrubbing removes secrets from tool output before sending to LLM
- Cost tracker with daily/monthly budget enforcement (SQLite)
- Resilient provider with retry + exponential backoff + fallback provider
- Approval engine gains session "always allow" and audit log

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Kaloyan Danchev
2026-02-19 09:20:52 +02:00
parent e24e3026b6
commit 872ed24f0c
10 changed files with 694 additions and 17 deletions

View File

@@ -10,7 +10,10 @@ from typing import Any
from loguru import logger
from xtrm_agent.bus import AgentMessage, InboundMessage, MessageBus, OutboundMessage
from xtrm_agent.cache import ResponseCache
from xtrm_agent.classifier import QueryClassifier
from xtrm_agent.config import Config, AgentFileConfig, parse_agent_file
from xtrm_agent.cost import BudgetConfig, CostTracker
from xtrm_agent.engine import Engine
from xtrm_agent.llm.anthropic import AnthropicProvider
from xtrm_agent.llm.litellm import LiteLLMProvider
@@ -35,6 +38,8 @@ class Orchestrator:
self._agent_configs: dict[str, AgentFileConfig] = {}
self._mcp_stack = AsyncExitStack()
self._running = False
self._cache: ResponseCache | None = None
self._cost_tracker: CostTracker | None = None
# Channel defaults for routing
channel_defaults = {}
@@ -53,6 +58,27 @@ class Orchestrator:
workspace = Path(self.config.tools.workspace).resolve()
workspace.mkdir(parents=True, exist_ok=True)
# Initialize shared response cache
self._cache = ResponseCache(
db_path=workspace / "cache.db",
ttl=self.config.performance.cache_ttl,
)
await self._cache.setup()
# Initialize cost tracker
budget = BudgetConfig(
daily_limit_usd=self.config.performance.daily_budget_usd,
monthly_limit_usd=self.config.performance.monthly_budget_usd,
)
self._cost_tracker = CostTracker(
db_path=workspace / "costs.db",
budget=budget,
)
await self._cost_tracker.setup()
# Initialize query classifier
classifier = QueryClassifier(model_map=self.config.performance.model_routing)
# Parse all agent definitions
for agent_name, agent_path in self.config.agents.items():
p = Path(agent_path)
@@ -74,6 +100,10 @@ class Orchestrator:
await self._mcp_stack.__aenter__()
await connect_mcp_servers(self.config.mcp_servers, global_registry, self._mcp_stack)
# Create fallback provider (LiteLLM with a cheap model)
fallback_model = self.config.performance.fallback_model
fallback_provider = LiteLLMProvider(model=fallback_model) if fallback_model else None
# Create one engine per agent
agent_names = list(self._agent_configs.keys())
for agent_name, agent_cfg in self._agent_configs.items():
@@ -107,6 +137,10 @@ class Orchestrator:
provider=provider,
tools=agent_registry,
approval=approval,
cache=self._cache,
cost_tracker=self._cost_tracker,
classifier=classifier,
fallback_provider=fallback_provider,
)
self._engines[agent_name] = engine
@@ -190,6 +224,10 @@ class Orchestrator:
async def stop(self) -> None:
self._running = False
if self._cache:
await self._cache.close()
if self._cost_tracker:
await self._cost_tracker.close()
await self._mcp_stack.aclose()
logger.info("Orchestrator stopped")