- Discord channel now downloads and extracts text from attachments (text files, PDFs) - Added WebSearchTool using DuckDuckGo for researcher and coder agents - Improved WebFetchTool with User-Agent header and HTML-to-text stripping - Added pypdf and duckduckgo-search dependencies Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
185 lines
6.8 KiB
Python
185 lines
6.8 KiB
Python
"""Discord channel — bot integration via discord.py."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import io
|
|
import os
|
|
|
|
import discord
|
|
import httpx
|
|
from loguru import logger
|
|
|
|
from xtrm_agent.bus import Attachment, InboundMessage, MessageBus, OutboundMessage
|
|
from xtrm_agent.channels.base import BaseChannel
|
|
|
|
# Extensions treated as plain text (decoded as UTF-8)
|
|
_TEXT_EXTENSIONS = frozenset({
|
|
".txt", ".py", ".md", ".json", ".yaml", ".yml", ".csv", ".log",
|
|
".js", ".ts", ".html", ".css", ".xml", ".toml", ".ini", ".sh",
|
|
".sql", ".rs", ".go", ".java", ".c", ".cpp", ".h", ".rb", ".php",
|
|
".swift", ".kt", ".r", ".cfg", ".env", ".conf", ".dockerfile",
|
|
".makefile", ".bat", ".ps1", ".lua", ".zig", ".hs",
|
|
})
|
|
|
|
_IMAGE_EXTENSIONS = frozenset({".png", ".jpg", ".jpeg", ".gif", ".webp", ".bmp", ".svg"})
|
|
|
|
_MAX_ATTACHMENT_SIZE = 1_024_000 # 1 MB
|
|
|
|
|
|
class DiscordChannel(BaseChannel):
|
|
"""Discord bot channel."""
|
|
|
|
def __init__(
|
|
self,
|
|
bus: MessageBus,
|
|
token_env: str = "DISCORD_BOT_TOKEN",
|
|
default_agent: str = "coder",
|
|
allowed_users: list[str] | None = None,
|
|
) -> None:
|
|
super().__init__(bus)
|
|
self.token_env = token_env
|
|
self.default_agent = default_agent
|
|
self.allowed_users = set(allowed_users or [])
|
|
self._outbound_queue = bus.subscribe_outbound("discord")
|
|
|
|
intents = discord.Intents.default()
|
|
intents.message_content = True
|
|
self.client = discord.Client(intents=intents)
|
|
self._setup_events()
|
|
|
|
def _setup_events(self) -> None:
|
|
@self.client.event
|
|
async def on_ready() -> None:
|
|
logger.info(f"Discord bot connected as {self.client.user}")
|
|
|
|
@self.client.event
|
|
async def on_message(message: discord.Message) -> None:
|
|
if message.author == self.client.user:
|
|
return
|
|
if message.author.bot:
|
|
return
|
|
|
|
# Check allowlist — if set, only respond to listed users
|
|
if self.allowed_users and str(message.author.id) not in self.allowed_users:
|
|
return
|
|
|
|
content = message.content
|
|
# Strip bot mention from content
|
|
if self.client.user:
|
|
content = content.replace(f"<@{self.client.user.id}>", "").strip()
|
|
|
|
attachments = await self._extract_attachments(message.attachments)
|
|
|
|
msg = InboundMessage(
|
|
channel="discord",
|
|
sender_id=str(message.author.id),
|
|
chat_id=str(message.channel.id),
|
|
content=content,
|
|
metadata={"guild_id": str(message.guild.id) if message.guild else ""},
|
|
attachments=attachments,
|
|
)
|
|
await self.bus.publish_inbound(msg)
|
|
|
|
# Wait for response and send it
|
|
try:
|
|
async with message.channel.typing():
|
|
out = await asyncio.wait_for(self._outbound_queue.get(), timeout=300)
|
|
await self._send_chunked(message.channel, out.content)
|
|
except asyncio.TimeoutError:
|
|
await message.channel.send("Sorry, I timed out processing your request.")
|
|
|
|
async def _extract_attachments(
|
|
self, discord_attachments: list[discord.Attachment]
|
|
) -> list[Attachment]:
|
|
"""Download Discord attachments and extract text content."""
|
|
results: list[Attachment] = []
|
|
for att in discord_attachments:
|
|
name = att.filename.lower()
|
|
ext = "." + name.rsplit(".", 1)[-1] if "." in name else ""
|
|
|
|
if att.size > _MAX_ATTACHMENT_SIZE:
|
|
results.append(Attachment(
|
|
filename=att.filename,
|
|
content=f"(file skipped — {att.size / 1_048_576:.1f} MB exceeds 1 MB limit)",
|
|
))
|
|
continue
|
|
|
|
if ext in _IMAGE_EXTENSIONS:
|
|
results.append(Attachment(
|
|
filename=att.filename,
|
|
content="(image attached — cannot read image content)",
|
|
))
|
|
continue
|
|
|
|
try:
|
|
async with httpx.AsyncClient(timeout=30) as client:
|
|
resp = await client.get(att.url)
|
|
resp.raise_for_status()
|
|
raw = resp.content
|
|
except Exception as e:
|
|
logger.warning(f"Failed to download attachment {att.filename}: {e}")
|
|
results.append(Attachment(
|
|
filename=att.filename,
|
|
content=f"(failed to download: {e})",
|
|
))
|
|
continue
|
|
|
|
if ext == ".pdf":
|
|
try:
|
|
from pypdf import PdfReader
|
|
|
|
reader = PdfReader(io.BytesIO(raw))
|
|
text = "\n".join(
|
|
page.extract_text() or "" for page in reader.pages
|
|
).strip()
|
|
if text:
|
|
results.append(Attachment(filename=att.filename, content=text))
|
|
else:
|
|
results.append(Attachment(
|
|
filename=att.filename,
|
|
content="(PDF has no extractable text)",
|
|
))
|
|
except Exception as e:
|
|
logger.warning(f"Failed to extract PDF text from {att.filename}: {e}")
|
|
results.append(Attachment(
|
|
filename=att.filename,
|
|
content=f"(failed to read PDF: {e})",
|
|
))
|
|
elif ext in _TEXT_EXTENSIONS or (att.content_type and att.content_type.startswith("text/")):
|
|
try:
|
|
text = raw.decode("utf-8", errors="replace")
|
|
results.append(Attachment(filename=att.filename, content=text))
|
|
except Exception as e:
|
|
results.append(Attachment(
|
|
filename=att.filename,
|
|
content=f"(failed to decode text: {e})",
|
|
))
|
|
else:
|
|
results.append(Attachment(
|
|
filename=att.filename,
|
|
content=f"(unsupported file type: {ext or 'unknown'})",
|
|
))
|
|
|
|
return results
|
|
|
|
async def _send_chunked(
|
|
self, channel: discord.abc.Messageable, content: str
|
|
) -> None:
|
|
"""Send a message, splitting into 2000-char chunks if needed."""
|
|
while content:
|
|
chunk = content[:2000]
|
|
content = content[2000:]
|
|
await channel.send(chunk)
|
|
|
|
async def start(self) -> None:
|
|
token = os.environ.get(self.token_env)
|
|
if not token:
|
|
logger.error(f"Discord token not found in env var '{self.token_env}'")
|
|
return
|
|
logger.info("Starting Discord bot...")
|
|
await self.client.start(token)
|
|
|
|
async def stop(self) -> None:
|
|
await self.client.close()
|