Source code for agentscope_runtime.engine.schemas.oai_llm

# -*- coding: utf-8 -*-
# pylint:disable=not-an-iterable, redefined-builtin

import json
import time
import uuid
from typing import Dict, List, Optional, Union

from openai.types.chat import ChatCompletion, ChatCompletionChunk
from openai.types.chat.chat_completion_stream_options_param import (
    ChatCompletionStreamOptionsParam,
)
from pydantic import BaseModel, Field, model_validator
from typing_extensions import Annotated, Literal

from .agent_schemas import Role, Tool, FunctionCall



[docs]
def generate_tool_call_id(prefix: str = "call_") -> str:
    #  generate a random uuid
    random_uuid = uuid.uuid4()
    # replace uuid to string and remove '-', then get latest 22 characters
    random_part = str(random_uuid).replace("-", "")[:22]
    # add prefix
    tool_call_id = f"{prefix}{random_part}"
    return tool_call_id




[docs]
class ImageMessageContent(BaseModel):

[docs]
    class ImageUrl(BaseModel):
        """
        Model class for image prompt message content.
        """

        url: str
        """Either a URL of the image or the base64 encoded image data."""

        detail: Literal["auto", "low", "high"] = "low"
        """Specifies the detail level of the image."""


    type: Literal["image_url"] = "image_url"
    """The type of the content part."""

    image_url: ImageUrl
    """The image URL details."""




[docs]
class TextMessageContent(BaseModel):
    type: Literal["text"] = "text"
    """The type of the content part."""

    text: str
    """The text content."""




[docs]
class AudioMessageContent(BaseModel):

[docs]
    class InputAudioDetail(BaseModel):
        """
        Model class for audio prompt message content.
        """

        base64_data: str = Field(
            default="",
            description="the base64 data of multi-modal file",
        )
        """The base64 encoded audio data."""

        format: str = Field(
            default="mp3",
            description="The format of the encoded audio data.  supports "
            "'wav' and 'mp3'.",
        )
        """The format of the encoded audio data. Supports 'wav' and 'mp3'."""

        @property
        def data(self) -> str:
            return f"data:{self.format};base64,{self.base64_data}"


    type: Literal["input_audio"] = "input_audio"
    """The type of the content part."""

    input_audio: InputAudioDetail
    """The input audio details."""



ChatCompletionMessage = Annotated[
    Union[TextMessageContent, ImageMessageContent, AudioMessageContent],
    Field(discriminator="type"),
]



[docs]
class ToolCall(BaseModel):
    """
    Model class for assistant prompt message tool call.
    """

    index: int = 0
    """The index of the tool call in the tool calls array."""

    id: str
    """The ID of the tool call."""

    type: Optional[str] = None
    """The type of the tool. Currently, only `function` is supported."""

    function: FunctionCall
    """The function that the model called."""




[docs]
class OpenAIMessage(BaseModel):
    """
    Model class for prompt message.
    """

    role: str
    """The role of the messages author, should be in `user`,`system`,
    'assistant', 'tool'."""

    content: Optional[Union[List[ChatCompletionMessage], str]] = None
    """The contents of the message.

    Can be a string, a list of content parts for multimodal messages.
    """

    name: Optional[str] = None
    """An optional name for the participant.

    Provides the model information to differentiate between participants of the
    same role.
    """

    tool_calls: Optional[List[ToolCall]] = None
    """The tool calls generated by the model, such as function calls."""


[docs]
    def get_text_content(self) -> Optional[str]:
        """
        Extract the first text content from the message.

        :return: First text string found in the content, or None if no text
            content.
        """
        if self.content is None:
            return None

        # Case 1: content is a simple string
        if isinstance(self.content, str):
            return self.content
        # Case 2: content is a list
        elif isinstance(self.content, list):
            for item in self.content:
                if hasattr(item, "type"):
                    if item.type == "text" and hasattr(item, "text"):
                        return item.text
        return None



[docs]
    def get_image_content(self) -> List[str]:
        """
        Extract all image content (URLs or base64 data) from the message.

        :return: List of image URLs or base64 encoded strings found in the
            content.
        """
        images = []

        if self.content is None:
            return images

        # Case 1: content is a simple string - no images
        if isinstance(self.content, str):
            return images
        # Case 2: content is a list
        elif isinstance(self.content, list):
            for item in self.content:
                if hasattr(item, "type"):
                    if item.type == "image_url" and hasattr(item, "image_url"):
                        if hasattr(item.image_url, "url"):
                            images.append(item.image_url.url)

        return images



[docs]
    def get_audio_content(self) -> List[str]:
        """
        Extract all audio content (URLs or base64 data) from the message.

        :return: List of audio URLs or base64 encoded strings found in the
            content.
        """
        audios = []

        if self.content is None:
            return audios

        # Case 1: content is a simple string - no audios
        if isinstance(self.content, str):
            return audios
        # Case 2: content is a list
        elif isinstance(self.content, list):
            for item in self.content:
                if hasattr(item, "type"):
                    if item.type == "input_audio" and hasattr(
                        item,
                        "input_audio",
                    ):
                        if hasattr(item.input_audio, "data"):
                            audios.append(item.input_audio.data)
                        elif hasattr(item.input_audio, "base64_data"):
                            # Construct data URL for audio
                            format_type = getattr(
                                item.input_audio,
                                "format",
                                "mp3",
                            )
                            audios.append(
                                f"data:{format_type};base64,"
                                f"{item.input_audio.base64_data}",
                            )

        return audios



[docs]
    def has_multimodal_content(self) -> bool:
        """
        Check if the message contains multimodal content (images, audio,
        or video).

        :return: True if the message contains non-text content, False otherwise
        """
        return bool(
            self.get_image_content() or self.get_audio_content(),
        )



[docs]
    def get_content_summary(self) -> Dict[str, int]:
        """
        Get a summary of different content types in the message.

        :return: Dictionary with counts of different content types
        """
        return {
            "text_count": 1 if self.get_text_content() is not None else 0,
            "image_count": len(self.get_image_content()),
            "audio_count": len(self.get_audio_content()),
        }





[docs]
class UserMessage(OpenAIMessage):
    """
    Model class for user prompt message.
    """

    role: str = Role.USER
    """The role of the messages author, in this case `user`."""




[docs]
class AssistantMessage(OpenAIMessage):
    """
    Model class for assistant prompt message.
    """

    role: str = Role.ASSISTANT
    """The role of the messages author, in this case `assistant`."""




[docs]
class SystemMessage(OpenAIMessage):
    """
    Model class for system prompt message.
    """

    role: str = Role.SYSTEM
    """The role of the messages author, in this case `system`."""




[docs]
class ToolMessage(OpenAIMessage):
    """
    Model class for tool prompt message.
    """

    role: str = Role.TOOL
    """The role of the messages author, in this case `tool`."""

    tool_call_id: str
    """Tool call that this message is responding to."""




[docs]
class ResponseFormat(BaseModel):

[docs]
    class JsonSchema(BaseModel):
        name: str
        """The name of the response format. """

        description: Union[str, None] = None
        """A description of what the response format is for, used by the
        model to determine how to respond in the format.
        """

        schema_param: dict = Field(None, alias="schema")
        """The schema for the response format, described as a JSON Schema
        object."""

        strict: Union[bool, None] = False
        """Whether to enable strict schema adherence when generating the output

        If set to true, the model will follow the exact schema defined in the
        `schema` field. Only a subset of JSON Schema is supported when `strict`
        is `true`. Learn more about Structured Outputs in the
        [function calling guide](docs/guides/function-calling).
        """


    type: Literal["text", "json_object", "json_schema"] = "text"
    """The type of response format being defined.

    - `text`: The default response format, which can be either text or any
        value needed.
    - `json_object`: Enables JSON mode, which guarantees the message the model
        generates is valid JSON.
    - `json_schema`: Enables Structured Outputs which guarantees the model will
        match your supplied JSON schema.
    """

    json_schema: Optional[JsonSchema] = None
    """The JSON schema for the response format."""


[docs]
    @model_validator(mode="after")
    def validate_schema(self) -> "ResponseFormat":
        if (
            self.type
            in [
                "text",
                "json_object",
            ]
            and self.json_schema is not None
        ):
            raise ValueError(
                f"Json schema is not allowed for type {self.type}",
            )
        if self.type == "json_schema" and self.json_schema is None:
            raise ValueError(
                f"Json schema is required for type {self.type}",
            )
        return self





[docs]
class ToolChoiceInputFunction(BaseModel):
    name: str
    """The name of the function to call."""




[docs]
class ToolChoice(BaseModel):
    type: str
    """The type of the tool. Currently, only `function` is supported."""

    function: ToolChoiceInputFunction
    """The function that the model called."""




[docs]
class Parameters(BaseModel):
    """
    General Parameters for LLM
    """

    top_p: Optional[float] = None
    """Nucleus sampling, between (0, 1.0],  where the model considers the
    results of the tokens with top_p probability  mass.

    So 0.1 means only the tokens comprising the top 10% probability mass are
    considered.

    We generally recommend altering this or `temperature` but not both.
    """

    temperature: Optional[float] = None
    """What sampling temperature to use, between 0 and 2.

    Higher values like 0.8 will make the output more random, while lower values
    like 0.2 will make it more focused and deterministic.

    We generally recommend altering this or `top_p` but not both.
    """

    frequency_penalty: Optional[float] = None
    """Positive values penalize new tokens based on their existing frequency in
    the text so far, decreasing the model's likelihood to repeat the same line
    verbatim.

    """

    presence_penalty: Optional[float] = None
    """Number between -2.0 and 2.0.

    Positive values penalize new tokens based on whether they appear in the
    text so far, increasing the model's likelihood to talk about new topics.

    """

    max_tokens: Optional[int] = None
    """The maximum number of [tokens](/tokenizer) that can be generated in the
    chat completion.

    The total length of input tokens and generated tokens is limited by the
    model's context length.
    """

    stop: Optional[Union[Optional[str], List[str]]] = None
    """Up to 4 sequences where the API will stop generating further tokens."""

    stream: bool = True
    """If set, partial message deltas will be sent, like in ChatGPT. """

    stream_options: Optional[ChatCompletionStreamOptionsParam] = None
    """Options for streaming response. Only set this when you set
    `stream: true`."""

    tools: Optional[List[Union[Tool, Dict]]] = None
    """A list of tools the model may call.

    Currently, only functions are supported as a tool. Use this to provide a
    list of functions the model may generate JSON inputs for.
    """

    tool_choice: Optional[Union[str, ToolChoice]] = None
    """Controls which (if any) tool is called by the model.

    """

    parallel_tool_calls: bool = False
    """Whether to enable parallel function calling during tool use."""

    logit_bias: Optional[Dict[str, int]] = None
    """Modify the likelihood of specified tokens appearing in the completion.

    Accepts a JSON object that maps tokens (specified by their token ID in the
    tokenizer) to an associated bias value from -100 to 100. Mathematically,
    the bias is added to the logits generated by the model prior to
    sampling. The exact effect will vary per model, but values between -1
    and 1 should decrease or increase likelihood of selection; values like
    -100 or 100 should result in a ban or exclusive selection of the relevant
    token.
    """

    top_logprobs: Optional[int] = None
    """An integer between 0 and 20 specifying the number of most likely
    tokens to return at each token position, each with an associated log
    probability.

    `logprobs` must be set to `true` if this parameter is used.
    """

    logprobs: Optional[bool] = None
    """Whether to return log probabilities of the output tokens or not.

    If true, returns the log probabilities of each output token returned in the
    `content` of `message`.
    """

    n: Optional[int] = Field(default=1, ge=1, le=5)
    """How many chat completion choices to generate for each input message.

    Note that you will be charged based on the number of generated tokens
    across all of the choices. Keep `n` as `1` to minimize costs.
    """

    seed: Optional[int] = None
    """If specified, system will make a best effort to sample
    deterministically, such that repeated requests with the same `seed` and
    parameters should return the same result.
    """

    response_format: Optional[Union[ResponseFormat, str]] = ResponseFormat(
        type="text",
    )
    """An object specifying the format that the model must output.

    Setting to `{ "type": "json_object" }` enables JSON mode,
    which guarantees the message the model generates is valid JSON.
    """




[docs]
def create_chat_completion(
    message: OpenAIMessage,
    model_name: str,
    id: str = "",
    finish_reason: Optional[str] = None,
) -> ChatCompletion:
    # Create Choice object
    choice = {
        "finish_reason": finish_reason,
        "index": 0,
        "message": message.model_dump(),
        "logprobs": None,
    }

    # Construct ChatCompletion object
    return ChatCompletion(
        id=id,  # Generate unique ID
        choices=[choice],  # List containing at least one Choice
        created=int(time.time()),  # Current timestamp
        model=model_name,  # Adjust based on actual model used
        object="chat.completion",  # Fixed literal value
        # Optional fields below
        service_tier=None,
        system_fingerprint=None,
        usage=None,
    )




[docs]
def create_chat_completion_chunk(
    message: OpenAIMessage,
    model_name: str,
    id: str = "",
    finish_reason: Optional[str] = None,
) -> ChatCompletionChunk:
    # Create Choice object for chunk
    choice = {
        "finish_reason": finish_reason,
        "index": 0,
        "logprobs": None,
        "delta": message.model_dump(),
    }

    # Construct ChatCompletionChunk object
    return ChatCompletionChunk(
        id=id,  # Generate unique ID
        choices=[choice],  # List containing at least one Choice
        created=int(time.time()),  # Current timestamp
        model=model_name,  # Adjust based on actual model used
        object="chat.completion.chunk",  # Fixed literal value
        # Optional fields below
        service_tier=None,
        system_fingerprint=None,
        usage=None,
    )




[docs]
def is_json_string(s: Union[str, Dict, BaseModel, None]) -> bool:
    try:
        obj = json.loads(s)  # type: ignore[arg-type]
        if isinstance(obj, (dict, list)):
            return True
        return False
    except Exception:
        return False