Add performance features: caching, cost tracking, retry, compaction, classification, scrubbing

Inspired by zeroclaw's lightweight patterns for slow hardware: - Response cache (SQLite + SHA-256 keyed) to skip redundant LLM calls - History compaction — LLM-summarize old messages when history exceeds 50 - Query classifier routes simple/research queries to cheaper models - Credential scrubbing removes secrets from tool output before sending to LLM - Cost tracker with daily/monthly budget enforcement (SQLite) - Resilient provider with retry + exponential backoff + fallback provider - Approval engine gains session "always allow" and audit log Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-19 09:20:52 +02:00
parent e24e3026b6
commit 872ed24f0c
10 changed files with 694 additions and 17 deletions
--- a/xtrm_agent/orchestrator.py
+++ b/xtrm_agent/orchestrator.py
@@ -10,7 +10,10 @@ from typing import Any
 from loguru import logger

 from xtrm_agent.bus import AgentMessage, InboundMessage, MessageBus, OutboundMessage
+from xtrm_agent.cache import ResponseCache
+from xtrm_agent.classifier import QueryClassifier
 from xtrm_agent.config import Config, AgentFileConfig, parse_agent_file
+from xtrm_agent.cost import BudgetConfig, CostTracker
 from xtrm_agent.engine import Engine
 from xtrm_agent.llm.anthropic import AnthropicProvider
 from xtrm_agent.llm.litellm import LiteLLMProvider
@@ -35,6 +38,8 @@ class Orchestrator:
        self._agent_configs: dict[str, AgentFileConfig] = {}
        self._mcp_stack = AsyncExitStack()
        self._running = False
+        self._cache: ResponseCache | None = None
+        self._cost_tracker: CostTracker | None = None

        # Channel defaults for routing
        channel_defaults = {}
@@ -53,6 +58,27 @@ class Orchestrator:
        workspace = Path(self.config.tools.workspace).resolve()
        workspace.mkdir(parents=True, exist_ok=True)

+        # Initialize shared response cache
+        self._cache = ResponseCache(
+            db_path=workspace / "cache.db",
+            ttl=self.config.performance.cache_ttl,
+        )
+        await self._cache.setup()
+
+        # Initialize cost tracker
+        budget = BudgetConfig(
+            daily_limit_usd=self.config.performance.daily_budget_usd,
+            monthly_limit_usd=self.config.performance.monthly_budget_usd,
+        )
+        self._cost_tracker = CostTracker(
+            db_path=workspace / "costs.db",
+            budget=budget,
+        )
+        await self._cost_tracker.setup()
+
+        # Initialize query classifier
+        classifier = QueryClassifier(model_map=self.config.performance.model_routing)
+
        # Parse all agent definitions
        for agent_name, agent_path in self.config.agents.items():
            p = Path(agent_path)
@@ -74,6 +100,10 @@ class Orchestrator:
        await self._mcp_stack.__aenter__()
        await connect_mcp_servers(self.config.mcp_servers, global_registry, self._mcp_stack)

+        # Create fallback provider (LiteLLM with a cheap model)
+        fallback_model = self.config.performance.fallback_model
+        fallback_provider = LiteLLMProvider(model=fallback_model) if fallback_model else None
+
        # Create one engine per agent
        agent_names = list(self._agent_configs.keys())
        for agent_name, agent_cfg in self._agent_configs.items():
@@ -107,6 +137,10 @@ class Orchestrator:
                provider=provider,
                tools=agent_registry,
                approval=approval,
+                cache=self._cache,
+                cost_tracker=self._cost_tracker,
+                classifier=classifier,
+                fallback_provider=fallback_provider,
            )
            self._engines[agent_name] = engine

@@ -190,6 +224,10 @@ class Orchestrator:

    async def stop(self) -> None:
        self._running = False
+        if self._cache:
+            await self._cache.close()
+        if self._cost_tracker:
+            await self._cost_tracker.close()
        await self._mcp_stack.aclose()
        logger.info("Orchestrator stopped")