xtrm-agent/xtrm_agent/llm/litellm.py

"""LiteLLM provider — DeepSeek, Kimi, MiniMax, and more."""

from __future__ import annotations

import json
from typing import Any

import litellm
from json_repair import repair_json

from xtrm_agent.llm.provider import LLMProvider, LLMResponse, ToolCallRequest


class LiteLLMProvider(LLMProvider):
    """Multi-provider via LiteLLM."""

    def __init__(self, model: str = "deepseek/deepseek-chat-v3.1") -> None:
        self.model = model
        litellm.drop_params = True

    async def complete(
        self,
        messages: list[dict[str, Any]],
        tools: list[dict[str, Any]] | None = None,
        model: str | None = None,
        max_tokens: int = 8192,
        temperature: float = 0.3,
    ) -> LLMResponse:
        model = model or self.model

        kwargs: dict[str, Any] = {
            "model": model,
            "messages": messages,
            "max_tokens": max_tokens,
            "temperature": temperature,
        }
        if tools:
            kwargs["tools"] = tools
            kwargs["tool_choice"] = "auto"

        response = await litellm.acompletion(**kwargs)
        return self._parse_response(response)

    def get_default_model(self) -> str:
        return self.model

    def _parse_response(self, response: Any) -> LLMResponse:
        """Parse LiteLLM (OpenAI-format) response."""
        choice = response.choices[0]
        message = choice.message

        content = message.content or ""
        tool_calls: list[ToolCallRequest] = []

        if message.tool_calls:
            for tc in message.tool_calls:
                args = self._parse_arguments(tc.function.arguments)
                tool_calls.append(
                    ToolCallRequest(
                        id=tc.id,
                        name=tc.function.name,
                        arguments=args,
                    )
                )

        usage_data = {}
        if hasattr(response, "usage") and response.usage:
            usage_data = {
                "input_tokens": getattr(response.usage, "prompt_tokens", 0),
                "output_tokens": getattr(response.usage, "completion_tokens", 0),
            }

        return LLMResponse(
            content=content,
            tool_calls=tool_calls,
            finish_reason=choice.finish_reason or "",
            usage=usage_data,
        )

    def _parse_arguments(self, raw: str | dict) -> dict[str, Any]:
        """Parse tool call arguments, using json-repair for malformed JSON."""
        if isinstance(raw, dict):
            return raw
        try:
            return json.loads(raw)
        except (json.JSONDecodeError, TypeError):
            try:
                repaired = repair_json(raw)
                result = json.loads(repaired)
                return result if isinstance(result, dict) else {}
            except Exception:
                return {}