Inspired by zeroclaw's lightweight patterns for slow hardware: - Response cache (SQLite + SHA-256 keyed) to skip redundant LLM calls - History compaction — LLM-summarize old messages when history exceeds 50 - Query classifier routes simple/research queries to cheaper models - Credential scrubbing removes secrets from tool output before sending to LLM - Cost tracker with daily/monthly budget enforcement (SQLite) - Resilient provider with retry + exponential backoff + fallback provider - Approval engine gains session "always allow" and audit log Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
55 lines
1.0 KiB
YAML
55 lines
1.0 KiB
YAML
llm:
|
|
providers:
|
|
kimi:
|
|
provider: litellm
|
|
model: nvidia_nim/moonshotai/kimi-k2.5
|
|
deepseek:
|
|
provider: litellm
|
|
model: nvidia_nim/deepseek-ai/deepseek-v3.1
|
|
minimax:
|
|
provider: litellm
|
|
model: nvidia_nim/minimaxai/minimax-m2.1
|
|
|
|
channels:
|
|
cli:
|
|
enabled: true
|
|
default_agent: coder
|
|
discord:
|
|
enabled: true
|
|
token_env: DISCORD_BOT_TOKEN
|
|
default_agent: coder
|
|
allowed_users:
|
|
- "1367816056244273243"
|
|
|
|
tools:
|
|
workspace: ./data
|
|
auto_approve:
|
|
- read_file
|
|
- list_dir
|
|
- web_fetch
|
|
- web_search
|
|
- delegate
|
|
- write_file
|
|
- edit_file
|
|
- bash
|
|
|
|
mcp_servers: {}
|
|
|
|
agents:
|
|
coder: agents/coder.md
|
|
researcher: agents/researcher.md
|
|
reviewer: agents/reviewer.md
|
|
|
|
orchestrator:
|
|
max_concurrent: 5
|
|
delegation_timeout: 120
|
|
|
|
performance:
|
|
cache_ttl: 3600
|
|
daily_budget_usd: 5.0
|
|
monthly_budget_usd: 50.0
|
|
fallback_model: nvidia_nim/deepseek-ai/deepseek-v3.1
|
|
model_routing:
|
|
simple: nvidia_nim/deepseek-ai/deepseek-v3.1
|
|
research: nvidia_nim/moonshotai/kimi-k2.5
|