Inspired by zeroclaw's lightweight patterns for slow hardware: - Response cache (SQLite + SHA-256 keyed) to skip redundant LLM calls - History compaction — LLM-summarize old messages when history exceeds 50 - Query classifier routes simple/research queries to cheaper models - Credential scrubbing removes secrets from tool output before sending to LLM - Cost tracker with daily/monthly budget enforcement (SQLite) - Resilient provider with retry + exponential backoff + fallback provider - Approval engine gains session "always allow" and audit log Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
47 lines
1.6 KiB
Python
47 lines
1.6 KiB
Python
"""Credential scrubbing — prevent secret leakage in tool output."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import re
|
|
|
|
# Patterns that match common secret formats
|
|
_SECRET_PATTERNS = [
|
|
# Key=value patterns (API keys, tokens, passwords)
|
|
re.compile(
|
|
r"(?i)(api[_-]?key|token|password|passwd|secret|access[_-]?key|private[_-]?key|auth)"
|
|
r"[\s]*[=:]\s*['\"]?([^\s'\"]{8,})['\"]?",
|
|
),
|
|
# Bearer tokens
|
|
re.compile(r"Bearer\s+[A-Za-z0-9\-._~+/]+=*", re.IGNORECASE),
|
|
# AWS access keys (AKIA...)
|
|
re.compile(r"AKIA[0-9A-Z]{16}"),
|
|
# AWS secret keys (40 char base64)
|
|
re.compile(r"(?i)aws[_-]?secret[_-]?access[_-]?key[\s]*[=:]\s*['\"]?([A-Za-z0-9/+=]{40})['\"]?"),
|
|
# GitHub tokens
|
|
re.compile(r"gh[pousr]_[A-Za-z0-9_]{36,}"),
|
|
# Generic long hex strings that look like secrets (32+ hex chars after key= or token=)
|
|
re.compile(r"(?i)(?:key|token|secret)[=:]\s*['\"]?([0-9a-f]{32,})['\"]?"),
|
|
]
|
|
|
|
_REDACTED = "[REDACTED]"
|
|
|
|
|
|
def scrub_credentials(text: str) -> str:
|
|
"""Scrub potential secrets from text, replacing with [REDACTED]."""
|
|
result = text
|
|
for pattern in _SECRET_PATTERNS:
|
|
result = pattern.sub(_redact_match, result)
|
|
return result
|
|
|
|
|
|
def _redact_match(match: re.Match) -> str:
|
|
"""Replace the secret value while keeping the key name visible."""
|
|
full = match.group(0)
|
|
# For key=value patterns, keep the key part
|
|
for sep in ("=", ":"):
|
|
if sep in full:
|
|
key_part = full[: full.index(sep) + 1]
|
|
return f"{key_part} {_REDACTED}"
|
|
# For standalone patterns (Bearer, AKIA, gh*_), redact the whole thing
|
|
return _REDACTED
|