Add Discord attachment reading and web search capabilities
- Discord channel now downloads and extracts text from attachments (text files, PDFs) - Added WebSearchTool using DuckDuckGo for researcher and coder agents - Improved WebFetchTool with User-Agent header and HTML-to-text stripping - Added pypdf and duckduckgo-search dependencies Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -216,6 +216,26 @@ class BashTool(Tool):
|
||||
return f"Error: Command timed out after {self._timeout}s"
|
||||
|
||||
|
||||
def _strip_html(html: str) -> str:
|
||||
"""Strip HTML tags and collapse whitespace to get readable text."""
|
||||
# Remove script and style blocks
|
||||
text = re.sub(r"<(script|style)[^>]*>.*?</\1>", "", html, flags=re.DOTALL | re.IGNORECASE)
|
||||
# Replace <br>, <p>, <div>, <li> etc. with newlines
|
||||
text = re.sub(r"<(br|p|div|li|h[1-6]|tr)[^>]*/?>", "\n", text, flags=re.IGNORECASE)
|
||||
# Strip remaining tags
|
||||
text = re.sub(r"<[^>]+>", "", text)
|
||||
# Decode common HTML entities
|
||||
text = text.replace("&", "&").replace("<", "<").replace(">", ">")
|
||||
text = text.replace(""", '"').replace("'", "'").replace(" ", " ")
|
||||
# Collapse whitespace
|
||||
text = re.sub(r"[ \t]+", " ", text)
|
||||
text = re.sub(r"\n{3,}", "\n\n", text)
|
||||
return text.strip()
|
||||
|
||||
|
||||
_WEB_USER_AGENT = "Mozilla/5.0 (compatible; XtrmAgent/1.0; +https://github.com)"
|
||||
|
||||
|
||||
class WebFetchTool(Tool):
|
||||
@property
|
||||
def name(self) -> str:
|
||||
@@ -223,7 +243,7 @@ class WebFetchTool(Tool):
|
||||
|
||||
@property
|
||||
def description(self) -> str:
|
||||
return "Fetch the content of a URL."
|
||||
return "Fetch the content of a URL and return it as readable text."
|
||||
|
||||
@property
|
||||
def parameters(self) -> dict[str, Any]:
|
||||
@@ -237,9 +257,14 @@ class WebFetchTool(Tool):
|
||||
|
||||
async def execute(self, url: str, **_: Any) -> str:
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=30, follow_redirects=True) as client:
|
||||
async with httpx.AsyncClient(
|
||||
timeout=30, follow_redirects=True, headers={"User-Agent": _WEB_USER_AGENT}
|
||||
) as client:
|
||||
resp = await client.get(url)
|
||||
content_type = resp.headers.get("content-type", "")
|
||||
text = resp.text
|
||||
if "html" in content_type:
|
||||
text = _strip_html(text)
|
||||
if len(text) > 20_000:
|
||||
text = text[:20_000] + "\n... (truncated)"
|
||||
return text
|
||||
@@ -247,6 +272,51 @@ class WebFetchTool(Tool):
|
||||
return f"Error fetching URL: {e}"
|
||||
|
||||
|
||||
class WebSearchTool(Tool):
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return "web_search"
|
||||
|
||||
@property
|
||||
def description(self) -> str:
|
||||
return "Search the web using DuckDuckGo and return a list of results with title, URL, and snippet."
|
||||
|
||||
@property
|
||||
def parameters(self) -> dict[str, Any]:
|
||||
return {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"query": {"type": "string", "description": "Search query"},
|
||||
"max_results": {
|
||||
"type": "integer",
|
||||
"description": "Maximum number of results (default: 5)",
|
||||
"default": 5,
|
||||
},
|
||||
},
|
||||
"required": ["query"],
|
||||
}
|
||||
|
||||
async def execute(self, query: str, max_results: int = 5, **_: Any) -> str:
|
||||
try:
|
||||
from duckduckgo_search import AsyncDDGS
|
||||
|
||||
async with AsyncDDGS() as ddgs:
|
||||
results = await ddgs.atext(query, max_results=max_results)
|
||||
|
||||
if not results:
|
||||
return "No results found."
|
||||
|
||||
lines: list[str] = []
|
||||
for r in results:
|
||||
lines.append(f"**{r.get('title', '')}**")
|
||||
lines.append(r.get("href", ""))
|
||||
lines.append(r.get("body", ""))
|
||||
lines.append("---")
|
||||
return "\n".join(lines)
|
||||
except Exception as e:
|
||||
return f"Error searching: {e}"
|
||||
|
||||
|
||||
def register_builtin_tools(registry: Any, workspace: Path) -> None:
|
||||
"""Register all built-in tools into a ToolRegistry."""
|
||||
registry.register(ReadFileTool(workspace))
|
||||
@@ -255,3 +325,4 @@ def register_builtin_tools(registry: Any, workspace: Path) -> None:
|
||||
registry.register(ListDirTool(workspace))
|
||||
registry.register(BashTool(workspace))
|
||||
registry.register(WebFetchTool())
|
||||
registry.register(WebSearchTool())
|
||||
|
||||
Reference in New Issue
Block a user