Files
xtrm-agent/xtrm_agent/scrub.py
Kaloyan Danchev 872ed24f0c Add performance features: caching, cost tracking, retry, compaction, classification, scrubbing
Inspired by zeroclaw's lightweight patterns for slow hardware:
- Response cache (SQLite + SHA-256 keyed) to skip redundant LLM calls
- History compaction — LLM-summarize old messages when history exceeds 50
- Query classifier routes simple/research queries to cheaper models
- Credential scrubbing removes secrets from tool output before sending to LLM
- Cost tracker with daily/monthly budget enforcement (SQLite)
- Resilient provider with retry + exponential backoff + fallback provider
- Approval engine gains session "always allow" and audit log

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-19 09:20:52 +02:00

47 lines
1.6 KiB
Python

"""Credential scrubbing — prevent secret leakage in tool output."""
from __future__ import annotations
import re
# Patterns that match common secret formats
_SECRET_PATTERNS = [
# Key=value patterns (API keys, tokens, passwords)
re.compile(
r"(?i)(api[_-]?key|token|password|passwd|secret|access[_-]?key|private[_-]?key|auth)"
r"[\s]*[=:]\s*['\"]?([^\s'\"]{8,})['\"]?",
),
# Bearer tokens
re.compile(r"Bearer\s+[A-Za-z0-9\-._~+/]+=*", re.IGNORECASE),
# AWS access keys (AKIA...)
re.compile(r"AKIA[0-9A-Z]{16}"),
# AWS secret keys (40 char base64)
re.compile(r"(?i)aws[_-]?secret[_-]?access[_-]?key[\s]*[=:]\s*['\"]?([A-Za-z0-9/+=]{40})['\"]?"),
# GitHub tokens
re.compile(r"gh[pousr]_[A-Za-z0-9_]{36,}"),
# Generic long hex strings that look like secrets (32+ hex chars after key= or token=)
re.compile(r"(?i)(?:key|token|secret)[=:]\s*['\"]?([0-9a-f]{32,})['\"]?"),
]
_REDACTED = "[REDACTED]"
def scrub_credentials(text: str) -> str:
"""Scrub potential secrets from text, replacing with [REDACTED]."""
result = text
for pattern in _SECRET_PATTERNS:
result = pattern.sub(_redact_match, result)
return result
def _redact_match(match: re.Match) -> str:
"""Replace the secret value while keeping the key name visible."""
full = match.group(0)
# For key=value patterns, keep the key part
for sep in ("=", ":"):
if sep in full:
key_part = full[: full.index(sep) + 1]
return f"{key_part} {_REDACTED}"
# For standalone patterns (Bearer, AKIA, gh*_), redact the whole thing
return _REDACTED