diff --git a/agents/coder.md b/agents/coder.md index 1b790fc..31b810a 100644 --- a/agents/coder.md +++ b/agents/coder.md @@ -10,6 +10,7 @@ tools: - edit_file - list_dir - bash + - web_search - delegate --- diff --git a/agents/researcher.md b/agents/researcher.md index 62692c3..4ef1777 100644 --- a/agents/researcher.md +++ b/agents/researcher.md @@ -5,6 +5,7 @@ model: nvidia_nim/deepseek-ai/deepseek-v3.1 temperature: 0.5 max_iterations: 20 tools: + - web_search - web_fetch - read_file - list_dir diff --git a/config.yaml b/config.yaml index 4794677..af8f198 100644 --- a/config.yaml +++ b/config.yaml @@ -27,6 +27,7 @@ tools: - read_file - list_dir - web_fetch + - web_search - delegate - write_file - edit_file diff --git a/pyproject.toml b/pyproject.toml index 203bab5..60af028 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,6 +17,8 @@ dependencies = [ "httpx>=0.28.0", "loguru>=0.7.0", "json-repair>=0.30.0", + "duckduckgo-search>=7.0.0", + "pypdf>=5.0.0", ] [project.scripts] diff --git a/xtrm_agent/bus.py b/xtrm_agent/bus.py index 7e16bf1..d1fe47e 100644 --- a/xtrm_agent/bus.py +++ b/xtrm_agent/bus.py @@ -8,6 +8,14 @@ from datetime import datetime, timezone from typing import Any +@dataclass +class Attachment: + """A text-extracted attachment from a user message.""" + + filename: str + content: str + + @dataclass class InboundMessage: """Message from a channel (user) heading to an agent.""" @@ -19,6 +27,7 @@ class InboundMessage: target_agent: str | None = None timestamp: datetime = field(default_factory=lambda: datetime.now(timezone.utc)) metadata: dict[str, Any] = field(default_factory=dict) + attachments: list[Attachment] = field(default_factory=list) @dataclass diff --git a/xtrm_agent/channels/discord.py b/xtrm_agent/channels/discord.py index 5541e33..6e00fab 100644 --- a/xtrm_agent/channels/discord.py +++ b/xtrm_agent/channels/discord.py @@ -3,14 +3,29 @@ from __future__ import annotations import asyncio +import io import os import discord +import httpx from loguru import logger -from xtrm_agent.bus import InboundMessage, MessageBus, OutboundMessage +from xtrm_agent.bus import Attachment, InboundMessage, MessageBus, OutboundMessage from xtrm_agent.channels.base import BaseChannel +# Extensions treated as plain text (decoded as UTF-8) +_TEXT_EXTENSIONS = frozenset({ + ".txt", ".py", ".md", ".json", ".yaml", ".yml", ".csv", ".log", + ".js", ".ts", ".html", ".css", ".xml", ".toml", ".ini", ".sh", + ".sql", ".rs", ".go", ".java", ".c", ".cpp", ".h", ".rb", ".php", + ".swift", ".kt", ".r", ".cfg", ".env", ".conf", ".dockerfile", + ".makefile", ".bat", ".ps1", ".lua", ".zig", ".hs", +}) + +_IMAGE_EXTENSIONS = frozenset({".png", ".jpg", ".jpeg", ".gif", ".webp", ".bmp", ".svg"}) + +_MAX_ATTACHMENT_SIZE = 1_024_000 # 1 MB + class DiscordChannel(BaseChannel): """Discord bot channel.""" @@ -54,12 +69,15 @@ class DiscordChannel(BaseChannel): if self.client.user: content = content.replace(f"<@{self.client.user.id}>", "").strip() + attachments = await self._extract_attachments(message.attachments) + msg = InboundMessage( channel="discord", sender_id=str(message.author.id), chat_id=str(message.channel.id), content=content, metadata={"guild_id": str(message.guild.id) if message.guild else ""}, + attachments=attachments, ) await self.bus.publish_inbound(msg) @@ -71,6 +89,80 @@ class DiscordChannel(BaseChannel): except asyncio.TimeoutError: await message.channel.send("Sorry, I timed out processing your request.") + async def _extract_attachments( + self, discord_attachments: list[discord.Attachment] + ) -> list[Attachment]: + """Download Discord attachments and extract text content.""" + results: list[Attachment] = [] + for att in discord_attachments: + name = att.filename.lower() + ext = "." + name.rsplit(".", 1)[-1] if "." in name else "" + + if att.size > _MAX_ATTACHMENT_SIZE: + results.append(Attachment( + filename=att.filename, + content=f"(file skipped — {att.size / 1_048_576:.1f} MB exceeds 1 MB limit)", + )) + continue + + if ext in _IMAGE_EXTENSIONS: + results.append(Attachment( + filename=att.filename, + content="(image attached — cannot read image content)", + )) + continue + + try: + async with httpx.AsyncClient(timeout=30) as client: + resp = await client.get(att.url) + resp.raise_for_status() + raw = resp.content + except Exception as e: + logger.warning(f"Failed to download attachment {att.filename}: {e}") + results.append(Attachment( + filename=att.filename, + content=f"(failed to download: {e})", + )) + continue + + if ext == ".pdf": + try: + from pypdf import PdfReader + + reader = PdfReader(io.BytesIO(raw)) + text = "\n".join( + page.extract_text() or "" for page in reader.pages + ).strip() + if text: + results.append(Attachment(filename=att.filename, content=text)) + else: + results.append(Attachment( + filename=att.filename, + content="(PDF has no extractable text)", + )) + except Exception as e: + logger.warning(f"Failed to extract PDF text from {att.filename}: {e}") + results.append(Attachment( + filename=att.filename, + content=f"(failed to read PDF: {e})", + )) + elif ext in _TEXT_EXTENSIONS or (att.content_type and att.content_type.startswith("text/")): + try: + text = raw.decode("utf-8", errors="replace") + results.append(Attachment(filename=att.filename, content=text)) + except Exception as e: + results.append(Attachment( + filename=att.filename, + content=f"(failed to decode text: {e})", + )) + else: + results.append(Attachment( + filename=att.filename, + content=f"(unsupported file type: {ext or 'unknown'})", + )) + + return results + async def _send_chunked( self, channel: discord.abc.Messageable, content: str ) -> None: diff --git a/xtrm_agent/orchestrator.py b/xtrm_agent/orchestrator.py index 0b8850a..ff4a6aa 100644 --- a/xtrm_agent/orchestrator.py +++ b/xtrm_agent/orchestrator.py @@ -136,6 +136,15 @@ class Orchestrator: return f"Error: Agent '{agent_name}' not found" content = self.router.strip_mention(msg.content) if msg.content.startswith("@") else msg.content + + # Prepend attachment content so the LLM can see it + if msg.attachments: + parts: list[str] = [] + for att in msg.attachments: + parts.append(f"[Attached file: {att.filename}]\n{att.content}") + parts.append(content) + content = "\n\n".join(parts) + logger.info(f"[{agent_name}] Processing: {content[:80]}") return await engine.run(content) diff --git a/xtrm_agent/tools/builtin.py b/xtrm_agent/tools/builtin.py index 5fb4d57..b42c4cb 100644 --- a/xtrm_agent/tools/builtin.py +++ b/xtrm_agent/tools/builtin.py @@ -216,6 +216,26 @@ class BashTool(Tool): return f"Error: Command timed out after {self._timeout}s" +def _strip_html(html: str) -> str: + """Strip HTML tags and collapse whitespace to get readable text.""" + # Remove script and style blocks + text = re.sub(r"<(script|style)[^>]*>.*?", "", html, flags=re.DOTALL | re.IGNORECASE) + # Replace
,

,

,
  • etc. with newlines + text = re.sub(r"<(br|p|div|li|h[1-6]|tr)[^>]*/?>", "\n", text, flags=re.IGNORECASE) + # Strip remaining tags + text = re.sub(r"<[^>]+>", "", text) + # Decode common HTML entities + text = text.replace("&", "&").replace("<", "<").replace(">", ">") + text = text.replace(""", '"').replace("'", "'").replace(" ", " ") + # Collapse whitespace + text = re.sub(r"[ \t]+", " ", text) + text = re.sub(r"\n{3,}", "\n\n", text) + return text.strip() + + +_WEB_USER_AGENT = "Mozilla/5.0 (compatible; XtrmAgent/1.0; +https://github.com)" + + class WebFetchTool(Tool): @property def name(self) -> str: @@ -223,7 +243,7 @@ class WebFetchTool(Tool): @property def description(self) -> str: - return "Fetch the content of a URL." + return "Fetch the content of a URL and return it as readable text." @property def parameters(self) -> dict[str, Any]: @@ -237,9 +257,14 @@ class WebFetchTool(Tool): async def execute(self, url: str, **_: Any) -> str: try: - async with httpx.AsyncClient(timeout=30, follow_redirects=True) as client: + async with httpx.AsyncClient( + timeout=30, follow_redirects=True, headers={"User-Agent": _WEB_USER_AGENT} + ) as client: resp = await client.get(url) + content_type = resp.headers.get("content-type", "") text = resp.text + if "html" in content_type: + text = _strip_html(text) if len(text) > 20_000: text = text[:20_000] + "\n... (truncated)" return text @@ -247,6 +272,51 @@ class WebFetchTool(Tool): return f"Error fetching URL: {e}" +class WebSearchTool(Tool): + @property + def name(self) -> str: + return "web_search" + + @property + def description(self) -> str: + return "Search the web using DuckDuckGo and return a list of results with title, URL, and snippet." + + @property + def parameters(self) -> dict[str, Any]: + return { + "type": "object", + "properties": { + "query": {"type": "string", "description": "Search query"}, + "max_results": { + "type": "integer", + "description": "Maximum number of results (default: 5)", + "default": 5, + }, + }, + "required": ["query"], + } + + async def execute(self, query: str, max_results: int = 5, **_: Any) -> str: + try: + from duckduckgo_search import AsyncDDGS + + async with AsyncDDGS() as ddgs: + results = await ddgs.atext(query, max_results=max_results) + + if not results: + return "No results found." + + lines: list[str] = [] + for r in results: + lines.append(f"**{r.get('title', '')}**") + lines.append(r.get("href", "")) + lines.append(r.get("body", "")) + lines.append("---") + return "\n".join(lines) + except Exception as e: + return f"Error searching: {e}" + + def register_builtin_tools(registry: Any, workspace: Path) -> None: """Register all built-in tools into a ToolRegistry.""" registry.register(ReadFileTool(workspace)) @@ -255,3 +325,4 @@ def register_builtin_tools(registry: Any, workspace: Path) -> None: registry.register(ListDirTool(workspace)) registry.register(BashTool(workspace)) registry.register(WebFetchTool()) + registry.register(WebSearchTool())