Add performance features: caching, cost tracking, retry, compaction, classification, scrubbing

Inspired by zeroclaw's lightweight patterns for slow hardware: - Response cache (SQLite + SHA-256 keyed) to skip redundant LLM calls - History compaction — LLM-summarize old messages when history exceeds 50 - Query classifier routes simple/research queries to cheaper models - Credential scrubbing removes secrets from tool output before sending to LLM - Cost tracker with daily/monthly budget enforcement (SQLite) - Resilient provider with retry + exponential backoff + fallback provider - Approval engine gains session "always allow" and audit log Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-19 09:20:52 +02:00
parent e24e3026b6
commit 872ed24f0c
10 changed files with 694 additions and 17 deletions
--- a/xtrm_agent/config.py
+++ b/xtrm_agent/config.py
@@ -61,6 +61,16 @@ class MCPServerConfig(BaseModel):
    url: str = ""


+class PerformanceConfig(BaseModel):
+    """Performance tuning — caching, cost tracking, model routing."""
+
+    cache_ttl: int = 3600
+    daily_budget_usd: float = 0.0
+    monthly_budget_usd: float = 0.0
+    fallback_model: str = ""
+    model_routing: dict[str, str] = Field(default_factory=dict)
+
+
 class OrchestratorConfig(BaseModel):
    max_concurrent: int = 5
    delegation_timeout: int = 120
@@ -87,6 +97,7 @@ class Config(BaseModel):
    mcp_servers: dict[str, MCPServerConfig] = Field(default_factory=dict)
    agents: dict[str, str] = Field(default_factory=dict)
    orchestrator: OrchestratorConfig = Field(default_factory=OrchestratorConfig)
+    performance: PerformanceConfig = Field(default_factory=PerformanceConfig)


 def load_config(path: str | Path = "config.yaml") -> Config: