Add performance features: caching, cost tracking, retry, compaction, classification, scrubbing
Inspired by zeroclaw's lightweight patterns for slow hardware: - Response cache (SQLite + SHA-256 keyed) to skip redundant LLM calls - History compaction — LLM-summarize old messages when history exceeds 50 - Query classifier routes simple/research queries to cheaper models - Credential scrubbing removes secrets from tool output before sending to LLM - Cost tracker with daily/monthly budget enforcement (SQLite) - Resilient provider with retry + exponential backoff + fallback provider - Approval engine gains session "always allow" and audit log Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -10,7 +10,10 @@ from typing import Any
|
||||
from loguru import logger
|
||||
|
||||
from xtrm_agent.bus import AgentMessage, InboundMessage, MessageBus, OutboundMessage
|
||||
from xtrm_agent.cache import ResponseCache
|
||||
from xtrm_agent.classifier import QueryClassifier
|
||||
from xtrm_agent.config import Config, AgentFileConfig, parse_agent_file
|
||||
from xtrm_agent.cost import BudgetConfig, CostTracker
|
||||
from xtrm_agent.engine import Engine
|
||||
from xtrm_agent.llm.anthropic import AnthropicProvider
|
||||
from xtrm_agent.llm.litellm import LiteLLMProvider
|
||||
@@ -35,6 +38,8 @@ class Orchestrator:
|
||||
self._agent_configs: dict[str, AgentFileConfig] = {}
|
||||
self._mcp_stack = AsyncExitStack()
|
||||
self._running = False
|
||||
self._cache: ResponseCache | None = None
|
||||
self._cost_tracker: CostTracker | None = None
|
||||
|
||||
# Channel defaults for routing
|
||||
channel_defaults = {}
|
||||
@@ -53,6 +58,27 @@ class Orchestrator:
|
||||
workspace = Path(self.config.tools.workspace).resolve()
|
||||
workspace.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Initialize shared response cache
|
||||
self._cache = ResponseCache(
|
||||
db_path=workspace / "cache.db",
|
||||
ttl=self.config.performance.cache_ttl,
|
||||
)
|
||||
await self._cache.setup()
|
||||
|
||||
# Initialize cost tracker
|
||||
budget = BudgetConfig(
|
||||
daily_limit_usd=self.config.performance.daily_budget_usd,
|
||||
monthly_limit_usd=self.config.performance.monthly_budget_usd,
|
||||
)
|
||||
self._cost_tracker = CostTracker(
|
||||
db_path=workspace / "costs.db",
|
||||
budget=budget,
|
||||
)
|
||||
await self._cost_tracker.setup()
|
||||
|
||||
# Initialize query classifier
|
||||
classifier = QueryClassifier(model_map=self.config.performance.model_routing)
|
||||
|
||||
# Parse all agent definitions
|
||||
for agent_name, agent_path in self.config.agents.items():
|
||||
p = Path(agent_path)
|
||||
@@ -74,6 +100,10 @@ class Orchestrator:
|
||||
await self._mcp_stack.__aenter__()
|
||||
await connect_mcp_servers(self.config.mcp_servers, global_registry, self._mcp_stack)
|
||||
|
||||
# Create fallback provider (LiteLLM with a cheap model)
|
||||
fallback_model = self.config.performance.fallback_model
|
||||
fallback_provider = LiteLLMProvider(model=fallback_model) if fallback_model else None
|
||||
|
||||
# Create one engine per agent
|
||||
agent_names = list(self._agent_configs.keys())
|
||||
for agent_name, agent_cfg in self._agent_configs.items():
|
||||
@@ -107,6 +137,10 @@ class Orchestrator:
|
||||
provider=provider,
|
||||
tools=agent_registry,
|
||||
approval=approval,
|
||||
cache=self._cache,
|
||||
cost_tracker=self._cost_tracker,
|
||||
classifier=classifier,
|
||||
fallback_provider=fallback_provider,
|
||||
)
|
||||
self._engines[agent_name] = engine
|
||||
|
||||
@@ -190,6 +224,10 @@ class Orchestrator:
|
||||
|
||||
async def stop(self) -> None:
|
||||
self._running = False
|
||||
if self._cache:
|
||||
await self._cache.close()
|
||||
if self._cost_tracker:
|
||||
await self._cost_tracker.close()
|
||||
await self._mcp_stack.aclose()
|
||||
logger.info("Orchestrator stopped")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user