Add Discord attachment reading and web search capabilities
- Discord channel now downloads and extracts text from attachments (text files, PDFs) - Added WebSearchTool using DuckDuckGo for researcher and coder agents - Improved WebFetchTool with User-Agent header and HTML-to-text stripping - Added pypdf and duckduckgo-search dependencies Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -10,6 +10,7 @@ tools:
|
|||||||
- edit_file
|
- edit_file
|
||||||
- list_dir
|
- list_dir
|
||||||
- bash
|
- bash
|
||||||
|
- web_search
|
||||||
- delegate
|
- delegate
|
||||||
---
|
---
|
||||||
|
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ model: nvidia_nim/deepseek-ai/deepseek-v3.1
|
|||||||
temperature: 0.5
|
temperature: 0.5
|
||||||
max_iterations: 20
|
max_iterations: 20
|
||||||
tools:
|
tools:
|
||||||
|
- web_search
|
||||||
- web_fetch
|
- web_fetch
|
||||||
- read_file
|
- read_file
|
||||||
- list_dir
|
- list_dir
|
||||||
|
|||||||
@@ -27,6 +27,7 @@ tools:
|
|||||||
- read_file
|
- read_file
|
||||||
- list_dir
|
- list_dir
|
||||||
- web_fetch
|
- web_fetch
|
||||||
|
- web_search
|
||||||
- delegate
|
- delegate
|
||||||
- write_file
|
- write_file
|
||||||
- edit_file
|
- edit_file
|
||||||
|
|||||||
@@ -17,6 +17,8 @@ dependencies = [
|
|||||||
"httpx>=0.28.0",
|
"httpx>=0.28.0",
|
||||||
"loguru>=0.7.0",
|
"loguru>=0.7.0",
|
||||||
"json-repair>=0.30.0",
|
"json-repair>=0.30.0",
|
||||||
|
"duckduckgo-search>=7.0.0",
|
||||||
|
"pypdf>=5.0.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
[project.scripts]
|
[project.scripts]
|
||||||
|
|||||||
@@ -8,6 +8,14 @@ from datetime import datetime, timezone
|
|||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Attachment:
|
||||||
|
"""A text-extracted attachment from a user message."""
|
||||||
|
|
||||||
|
filename: str
|
||||||
|
content: str
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class InboundMessage:
|
class InboundMessage:
|
||||||
"""Message from a channel (user) heading to an agent."""
|
"""Message from a channel (user) heading to an agent."""
|
||||||
@@ -19,6 +27,7 @@ class InboundMessage:
|
|||||||
target_agent: str | None = None
|
target_agent: str | None = None
|
||||||
timestamp: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
|
timestamp: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
|
||||||
metadata: dict[str, Any] = field(default_factory=dict)
|
metadata: dict[str, Any] = field(default_factory=dict)
|
||||||
|
attachments: list[Attachment] = field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
|||||||
@@ -3,14 +3,29 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import io
|
||||||
import os
|
import os
|
||||||
|
|
||||||
import discord
|
import discord
|
||||||
|
import httpx
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from xtrm_agent.bus import InboundMessage, MessageBus, OutboundMessage
|
from xtrm_agent.bus import Attachment, InboundMessage, MessageBus, OutboundMessage
|
||||||
from xtrm_agent.channels.base import BaseChannel
|
from xtrm_agent.channels.base import BaseChannel
|
||||||
|
|
||||||
|
# Extensions treated as plain text (decoded as UTF-8)
|
||||||
|
_TEXT_EXTENSIONS = frozenset({
|
||||||
|
".txt", ".py", ".md", ".json", ".yaml", ".yml", ".csv", ".log",
|
||||||
|
".js", ".ts", ".html", ".css", ".xml", ".toml", ".ini", ".sh",
|
||||||
|
".sql", ".rs", ".go", ".java", ".c", ".cpp", ".h", ".rb", ".php",
|
||||||
|
".swift", ".kt", ".r", ".cfg", ".env", ".conf", ".dockerfile",
|
||||||
|
".makefile", ".bat", ".ps1", ".lua", ".zig", ".hs",
|
||||||
|
})
|
||||||
|
|
||||||
|
_IMAGE_EXTENSIONS = frozenset({".png", ".jpg", ".jpeg", ".gif", ".webp", ".bmp", ".svg"})
|
||||||
|
|
||||||
|
_MAX_ATTACHMENT_SIZE = 1_024_000 # 1 MB
|
||||||
|
|
||||||
|
|
||||||
class DiscordChannel(BaseChannel):
|
class DiscordChannel(BaseChannel):
|
||||||
"""Discord bot channel."""
|
"""Discord bot channel."""
|
||||||
@@ -54,12 +69,15 @@ class DiscordChannel(BaseChannel):
|
|||||||
if self.client.user:
|
if self.client.user:
|
||||||
content = content.replace(f"<@{self.client.user.id}>", "").strip()
|
content = content.replace(f"<@{self.client.user.id}>", "").strip()
|
||||||
|
|
||||||
|
attachments = await self._extract_attachments(message.attachments)
|
||||||
|
|
||||||
msg = InboundMessage(
|
msg = InboundMessage(
|
||||||
channel="discord",
|
channel="discord",
|
||||||
sender_id=str(message.author.id),
|
sender_id=str(message.author.id),
|
||||||
chat_id=str(message.channel.id),
|
chat_id=str(message.channel.id),
|
||||||
content=content,
|
content=content,
|
||||||
metadata={"guild_id": str(message.guild.id) if message.guild else ""},
|
metadata={"guild_id": str(message.guild.id) if message.guild else ""},
|
||||||
|
attachments=attachments,
|
||||||
)
|
)
|
||||||
await self.bus.publish_inbound(msg)
|
await self.bus.publish_inbound(msg)
|
||||||
|
|
||||||
@@ -71,6 +89,80 @@ class DiscordChannel(BaseChannel):
|
|||||||
except asyncio.TimeoutError:
|
except asyncio.TimeoutError:
|
||||||
await message.channel.send("Sorry, I timed out processing your request.")
|
await message.channel.send("Sorry, I timed out processing your request.")
|
||||||
|
|
||||||
|
async def _extract_attachments(
|
||||||
|
self, discord_attachments: list[discord.Attachment]
|
||||||
|
) -> list[Attachment]:
|
||||||
|
"""Download Discord attachments and extract text content."""
|
||||||
|
results: list[Attachment] = []
|
||||||
|
for att in discord_attachments:
|
||||||
|
name = att.filename.lower()
|
||||||
|
ext = "." + name.rsplit(".", 1)[-1] if "." in name else ""
|
||||||
|
|
||||||
|
if att.size > _MAX_ATTACHMENT_SIZE:
|
||||||
|
results.append(Attachment(
|
||||||
|
filename=att.filename,
|
||||||
|
content=f"(file skipped — {att.size / 1_048_576:.1f} MB exceeds 1 MB limit)",
|
||||||
|
))
|
||||||
|
continue
|
||||||
|
|
||||||
|
if ext in _IMAGE_EXTENSIONS:
|
||||||
|
results.append(Attachment(
|
||||||
|
filename=att.filename,
|
||||||
|
content="(image attached — cannot read image content)",
|
||||||
|
))
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
async with httpx.AsyncClient(timeout=30) as client:
|
||||||
|
resp = await client.get(att.url)
|
||||||
|
resp.raise_for_status()
|
||||||
|
raw = resp.content
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to download attachment {att.filename}: {e}")
|
||||||
|
results.append(Attachment(
|
||||||
|
filename=att.filename,
|
||||||
|
content=f"(failed to download: {e})",
|
||||||
|
))
|
||||||
|
continue
|
||||||
|
|
||||||
|
if ext == ".pdf":
|
||||||
|
try:
|
||||||
|
from pypdf import PdfReader
|
||||||
|
|
||||||
|
reader = PdfReader(io.BytesIO(raw))
|
||||||
|
text = "\n".join(
|
||||||
|
page.extract_text() or "" for page in reader.pages
|
||||||
|
).strip()
|
||||||
|
if text:
|
||||||
|
results.append(Attachment(filename=att.filename, content=text))
|
||||||
|
else:
|
||||||
|
results.append(Attachment(
|
||||||
|
filename=att.filename,
|
||||||
|
content="(PDF has no extractable text)",
|
||||||
|
))
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to extract PDF text from {att.filename}: {e}")
|
||||||
|
results.append(Attachment(
|
||||||
|
filename=att.filename,
|
||||||
|
content=f"(failed to read PDF: {e})",
|
||||||
|
))
|
||||||
|
elif ext in _TEXT_EXTENSIONS or (att.content_type and att.content_type.startswith("text/")):
|
||||||
|
try:
|
||||||
|
text = raw.decode("utf-8", errors="replace")
|
||||||
|
results.append(Attachment(filename=att.filename, content=text))
|
||||||
|
except Exception as e:
|
||||||
|
results.append(Attachment(
|
||||||
|
filename=att.filename,
|
||||||
|
content=f"(failed to decode text: {e})",
|
||||||
|
))
|
||||||
|
else:
|
||||||
|
results.append(Attachment(
|
||||||
|
filename=att.filename,
|
||||||
|
content=f"(unsupported file type: {ext or 'unknown'})",
|
||||||
|
))
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
async def _send_chunked(
|
async def _send_chunked(
|
||||||
self, channel: discord.abc.Messageable, content: str
|
self, channel: discord.abc.Messageable, content: str
|
||||||
) -> None:
|
) -> None:
|
||||||
|
|||||||
@@ -136,6 +136,15 @@ class Orchestrator:
|
|||||||
return f"Error: Agent '{agent_name}' not found"
|
return f"Error: Agent '{agent_name}' not found"
|
||||||
|
|
||||||
content = self.router.strip_mention(msg.content) if msg.content.startswith("@") else msg.content
|
content = self.router.strip_mention(msg.content) if msg.content.startswith("@") else msg.content
|
||||||
|
|
||||||
|
# Prepend attachment content so the LLM can see it
|
||||||
|
if msg.attachments:
|
||||||
|
parts: list[str] = []
|
||||||
|
for att in msg.attachments:
|
||||||
|
parts.append(f"[Attached file: {att.filename}]\n{att.content}")
|
||||||
|
parts.append(content)
|
||||||
|
content = "\n\n".join(parts)
|
||||||
|
|
||||||
logger.info(f"[{agent_name}] Processing: {content[:80]}")
|
logger.info(f"[{agent_name}] Processing: {content[:80]}")
|
||||||
return await engine.run(content)
|
return await engine.run(content)
|
||||||
|
|
||||||
|
|||||||
@@ -216,6 +216,26 @@ class BashTool(Tool):
|
|||||||
return f"Error: Command timed out after {self._timeout}s"
|
return f"Error: Command timed out after {self._timeout}s"
|
||||||
|
|
||||||
|
|
||||||
|
def _strip_html(html: str) -> str:
|
||||||
|
"""Strip HTML tags and collapse whitespace to get readable text."""
|
||||||
|
# Remove script and style blocks
|
||||||
|
text = re.sub(r"<(script|style)[^>]*>.*?</\1>", "", html, flags=re.DOTALL | re.IGNORECASE)
|
||||||
|
# Replace <br>, <p>, <div>, <li> etc. with newlines
|
||||||
|
text = re.sub(r"<(br|p|div|li|h[1-6]|tr)[^>]*/?>", "\n", text, flags=re.IGNORECASE)
|
||||||
|
# Strip remaining tags
|
||||||
|
text = re.sub(r"<[^>]+>", "", text)
|
||||||
|
# Decode common HTML entities
|
||||||
|
text = text.replace("&", "&").replace("<", "<").replace(">", ">")
|
||||||
|
text = text.replace(""", '"').replace("'", "'").replace(" ", " ")
|
||||||
|
# Collapse whitespace
|
||||||
|
text = re.sub(r"[ \t]+", " ", text)
|
||||||
|
text = re.sub(r"\n{3,}", "\n\n", text)
|
||||||
|
return text.strip()
|
||||||
|
|
||||||
|
|
||||||
|
_WEB_USER_AGENT = "Mozilla/5.0 (compatible; XtrmAgent/1.0; +https://github.com)"
|
||||||
|
|
||||||
|
|
||||||
class WebFetchTool(Tool):
|
class WebFetchTool(Tool):
|
||||||
@property
|
@property
|
||||||
def name(self) -> str:
|
def name(self) -> str:
|
||||||
@@ -223,7 +243,7 @@ class WebFetchTool(Tool):
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def description(self) -> str:
|
def description(self) -> str:
|
||||||
return "Fetch the content of a URL."
|
return "Fetch the content of a URL and return it as readable text."
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def parameters(self) -> dict[str, Any]:
|
def parameters(self) -> dict[str, Any]:
|
||||||
@@ -237,9 +257,14 @@ class WebFetchTool(Tool):
|
|||||||
|
|
||||||
async def execute(self, url: str, **_: Any) -> str:
|
async def execute(self, url: str, **_: Any) -> str:
|
||||||
try:
|
try:
|
||||||
async with httpx.AsyncClient(timeout=30, follow_redirects=True) as client:
|
async with httpx.AsyncClient(
|
||||||
|
timeout=30, follow_redirects=True, headers={"User-Agent": _WEB_USER_AGENT}
|
||||||
|
) as client:
|
||||||
resp = await client.get(url)
|
resp = await client.get(url)
|
||||||
|
content_type = resp.headers.get("content-type", "")
|
||||||
text = resp.text
|
text = resp.text
|
||||||
|
if "html" in content_type:
|
||||||
|
text = _strip_html(text)
|
||||||
if len(text) > 20_000:
|
if len(text) > 20_000:
|
||||||
text = text[:20_000] + "\n... (truncated)"
|
text = text[:20_000] + "\n... (truncated)"
|
||||||
return text
|
return text
|
||||||
@@ -247,6 +272,51 @@ class WebFetchTool(Tool):
|
|||||||
return f"Error fetching URL: {e}"
|
return f"Error fetching URL: {e}"
|
||||||
|
|
||||||
|
|
||||||
|
class WebSearchTool(Tool):
|
||||||
|
@property
|
||||||
|
def name(self) -> str:
|
||||||
|
return "web_search"
|
||||||
|
|
||||||
|
@property
|
||||||
|
def description(self) -> str:
|
||||||
|
return "Search the web using DuckDuckGo and return a list of results with title, URL, and snippet."
|
||||||
|
|
||||||
|
@property
|
||||||
|
def parameters(self) -> dict[str, Any]:
|
||||||
|
return {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"query": {"type": "string", "description": "Search query"},
|
||||||
|
"max_results": {
|
||||||
|
"type": "integer",
|
||||||
|
"description": "Maximum number of results (default: 5)",
|
||||||
|
"default": 5,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"required": ["query"],
|
||||||
|
}
|
||||||
|
|
||||||
|
async def execute(self, query: str, max_results: int = 5, **_: Any) -> str:
|
||||||
|
try:
|
||||||
|
from duckduckgo_search import AsyncDDGS
|
||||||
|
|
||||||
|
async with AsyncDDGS() as ddgs:
|
||||||
|
results = await ddgs.atext(query, max_results=max_results)
|
||||||
|
|
||||||
|
if not results:
|
||||||
|
return "No results found."
|
||||||
|
|
||||||
|
lines: list[str] = []
|
||||||
|
for r in results:
|
||||||
|
lines.append(f"**{r.get('title', '')}**")
|
||||||
|
lines.append(r.get("href", ""))
|
||||||
|
lines.append(r.get("body", ""))
|
||||||
|
lines.append("---")
|
||||||
|
return "\n".join(lines)
|
||||||
|
except Exception as e:
|
||||||
|
return f"Error searching: {e}"
|
||||||
|
|
||||||
|
|
||||||
def register_builtin_tools(registry: Any, workspace: Path) -> None:
|
def register_builtin_tools(registry: Any, workspace: Path) -> None:
|
||||||
"""Register all built-in tools into a ToolRegistry."""
|
"""Register all built-in tools into a ToolRegistry."""
|
||||||
registry.register(ReadFileTool(workspace))
|
registry.register(ReadFileTool(workspace))
|
||||||
@@ -255,3 +325,4 @@ def register_builtin_tools(registry: Any, workspace: Path) -> None:
|
|||||||
registry.register(ListDirTool(workspace))
|
registry.register(ListDirTool(workspace))
|
||||||
registry.register(BashTool(workspace))
|
registry.register(BashTool(workspace))
|
||||||
registry.register(WebFetchTool())
|
registry.register(WebFetchTool())
|
||||||
|
registry.register(WebSearchTool())
|
||||||
|
|||||||
Reference in New Issue
Block a user