# -*- coding: utf-8 -*-
# pylint:disable=not-an-iterable, redefined-builtin
import json
import time
import uuid
from typing import Dict, List, Optional, Union
from openai.types.chat import ChatCompletion, ChatCompletionChunk
from openai.types.chat.chat_completion_stream_options_param import (
ChatCompletionStreamOptionsParam,
)
from pydantic import BaseModel, Field, model_validator
from typing_extensions import Annotated, Literal
from .agent_schemas import Role, Tool, FunctionCall
[docs]
class ImageMessageContent(BaseModel):
[docs]
class ImageUrl(BaseModel):
"""
Model class for image prompt message content.
"""
url: str
"""Either a URL of the image or the base64 encoded image data."""
detail: Literal["auto", "low", "high"] = "low"
"""Specifies the detail level of the image."""
type: Literal["image_url"] = "image_url"
"""The type of the content part."""
image_url: ImageUrl
"""The image URL details."""
[docs]
class TextMessageContent(BaseModel):
type: Literal["text"] = "text"
"""The type of the content part."""
text: str
"""The text content."""
[docs]
class AudioMessageContent(BaseModel):
[docs]
class InputAudioDetail(BaseModel):
"""
Model class for audio prompt message content.
"""
base64_data: str = Field(
default="",
description="the base64 data of multi-modal file",
)
"""The base64 encoded audio data."""
format: str = Field(
default="mp3",
description="The format of the encoded audio data. supports "
"'wav' and 'mp3'.",
)
"""The format of the encoded audio data. Supports 'wav' and 'mp3'."""
@property
def data(self) -> str:
return f"data:{self.format};base64,{self.base64_data}"
type: Literal["input_audio"] = "input_audio"
"""The type of the content part."""
input_audio: InputAudioDetail
"""The input audio details."""
ChatCompletionMessage = Annotated[
Union[TextMessageContent, ImageMessageContent, AudioMessageContent],
Field(discriminator="type"),
]
[docs]
class OpenAIMessage(BaseModel):
"""
Model class for prompt message.
"""
role: str
"""The role of the messages author, should be in `user`,`system`,
'assistant', 'tool'."""
content: Optional[Union[List[ChatCompletionMessage], str]] = None
"""The contents of the message.
Can be a string, a list of content parts for multimodal messages.
"""
name: Optional[str] = None
"""An optional name for the participant.
Provides the model information to differentiate between participants of the
same role.
"""
tool_calls: Optional[List[ToolCall]] = None
"""The tool calls generated by the model, such as function calls."""
[docs]
def get_text_content(self) -> Optional[str]:
"""
Extract the first text content from the message.
:return: First text string found in the content, or None if no text
content.
"""
if self.content is None:
return None
# Case 1: content is a simple string
if isinstance(self.content, str):
return self.content
# Case 2: content is a list
elif isinstance(self.content, list):
for item in self.content:
if hasattr(item, "type"):
if item.type == "text" and hasattr(item, "text"):
return item.text
return None
[docs]
def get_image_content(self) -> List[str]:
"""
Extract all image content (URLs or base64 data) from the message.
:return: List of image URLs or base64 encoded strings found in the
content.
"""
images = []
if self.content is None:
return images
# Case 1: content is a simple string - no images
if isinstance(self.content, str):
return images
# Case 2: content is a list
elif isinstance(self.content, list):
for item in self.content:
if hasattr(item, "type"):
if item.type == "image_url" and hasattr(item, "image_url"):
if hasattr(item.image_url, "url"):
images.append(item.image_url.url)
return images
[docs]
def get_audio_content(self) -> List[str]:
"""
Extract all audio content (URLs or base64 data) from the message.
:return: List of audio URLs or base64 encoded strings found in the
content.
"""
audios = []
if self.content is None:
return audios
# Case 1: content is a simple string - no audios
if isinstance(self.content, str):
return audios
# Case 2: content is a list
elif isinstance(self.content, list):
for item in self.content:
if hasattr(item, "type"):
if item.type == "input_audio" and hasattr(
item,
"input_audio",
):
if hasattr(item.input_audio, "data"):
audios.append(item.input_audio.data)
elif hasattr(item.input_audio, "base64_data"):
# Construct data URL for audio
format_type = getattr(
item.input_audio,
"format",
"mp3",
)
audios.append(
f"data:{format_type};base64,"
f"{item.input_audio.base64_data}",
)
return audios
[docs]
def has_multimodal_content(self) -> bool:
"""
Check if the message contains multimodal content (images, audio,
or video).
:return: True if the message contains non-text content, False otherwise
"""
return bool(
self.get_image_content() or self.get_audio_content(),
)
[docs]
def get_content_summary(self) -> Dict[str, int]:
"""
Get a summary of different content types in the message.
:return: Dictionary with counts of different content types
"""
return {
"text_count": 1 if self.get_text_content() is not None else 0,
"image_count": len(self.get_image_content()),
"audio_count": len(self.get_audio_content()),
}
[docs]
class UserMessage(OpenAIMessage):
"""
Model class for user prompt message.
"""
role: str = Role.USER
"""The role of the messages author, in this case `user`."""
[docs]
class AssistantMessage(OpenAIMessage):
"""
Model class for assistant prompt message.
"""
role: str = Role.ASSISTANT
"""The role of the messages author, in this case `assistant`."""
[docs]
class SystemMessage(OpenAIMessage):
"""
Model class for system prompt message.
"""
role: str = Role.SYSTEM
"""The role of the messages author, in this case `system`."""
[docs]
class Parameters(BaseModel):
"""
General Parameters for LLM
"""
top_p: Optional[float] = None
"""Nucleus sampling, between (0, 1.0], where the model considers the
results of the tokens with top_p probability mass.
So 0.1 means only the tokens comprising the top 10% probability mass are
considered.
We generally recommend altering this or `temperature` but not both.
"""
temperature: Optional[float] = None
"""What sampling temperature to use, between 0 and 2.
Higher values like 0.8 will make the output more random, while lower values
like 0.2 will make it more focused and deterministic.
We generally recommend altering this or `top_p` but not both.
"""
frequency_penalty: Optional[float] = None
"""Positive values penalize new tokens based on their existing frequency in
the text so far, decreasing the model's likelihood to repeat the same line
verbatim.
"""
presence_penalty: Optional[float] = None
"""Number between -2.0 and 2.0.
Positive values penalize new tokens based on whether they appear in the
text so far, increasing the model's likelihood to talk about new topics.
"""
max_tokens: Optional[int] = None
"""The maximum number of [tokens](/tokenizer) that can be generated in the
chat completion.
The total length of input tokens and generated tokens is limited by the
model's context length.
"""
stop: Optional[Union[Optional[str], List[str]]] = None
"""Up to 4 sequences where the API will stop generating further tokens."""
stream: bool = True
"""If set, partial message deltas will be sent, like in ChatGPT. """
stream_options: Optional[ChatCompletionStreamOptionsParam] = None
"""Options for streaming response. Only set this when you set
`stream: true`."""
tools: Optional[List[Union[Tool, Dict]]] = None
"""A list of tools the model may call.
Currently, only functions are supported as a tool. Use this to provide a
list of functions the model may generate JSON inputs for.
"""
tool_choice: Optional[Union[str, ToolChoice]] = None
"""Controls which (if any) tool is called by the model.
"""
parallel_tool_calls: bool = False
"""Whether to enable parallel function calling during tool use."""
logit_bias: Optional[Dict[str, int]] = None
"""Modify the likelihood of specified tokens appearing in the completion.
Accepts a JSON object that maps tokens (specified by their token ID in the
tokenizer) to an associated bias value from -100 to 100. Mathematically,
the bias is added to the logits generated by the model prior to
sampling. The exact effect will vary per model, but values between -1
and 1 should decrease or increase likelihood of selection; values like
-100 or 100 should result in a ban or exclusive selection of the relevant
token.
"""
top_logprobs: Optional[int] = None
"""An integer between 0 and 20 specifying the number of most likely
tokens to return at each token position, each with an associated log
probability.
`logprobs` must be set to `true` if this parameter is used.
"""
logprobs: Optional[bool] = None
"""Whether to return log probabilities of the output tokens or not.
If true, returns the log probabilities of each output token returned in the
`content` of `message`.
"""
n: Optional[int] = Field(default=1, ge=1, le=5)
"""How many chat completion choices to generate for each input message.
Note that you will be charged based on the number of generated tokens
across all of the choices. Keep `n` as `1` to minimize costs.
"""
seed: Optional[int] = None
"""If specified, system will make a best effort to sample
deterministically, such that repeated requests with the same `seed` and
parameters should return the same result.
"""
response_format: Optional[Union[ResponseFormat, str]] = ResponseFormat(
type="text",
)
"""An object specifying the format that the model must output.
Setting to `{ "type": "json_object" }` enables JSON mode,
which guarantees the message the model generates is valid JSON.
"""
[docs]
def create_chat_completion(
message: OpenAIMessage,
model_name: str,
id: str = "",
finish_reason: Optional[str] = None,
) -> ChatCompletion:
# Create Choice object
choice = {
"finish_reason": finish_reason,
"index": 0,
"message": message.model_dump(),
"logprobs": None,
}
# Construct ChatCompletion object
return ChatCompletion(
id=id, # Generate unique ID
choices=[choice], # List containing at least one Choice
created=int(time.time()), # Current timestamp
model=model_name, # Adjust based on actual model used
object="chat.completion", # Fixed literal value
# Optional fields below
service_tier=None,
system_fingerprint=None,
usage=None,
)
[docs]
def create_chat_completion_chunk(
message: OpenAIMessage,
model_name: str,
id: str = "",
finish_reason: Optional[str] = None,
) -> ChatCompletionChunk:
# Create Choice object for chunk
choice = {
"finish_reason": finish_reason,
"index": 0,
"logprobs": None,
"delta": message.model_dump(),
}
# Construct ChatCompletionChunk object
return ChatCompletionChunk(
id=id, # Generate unique ID
choices=[choice], # List containing at least one Choice
created=int(time.time()), # Current timestamp
model=model_name, # Adjust based on actual model used
object="chat.completion.chunk", # Fixed literal value
# Optional fields below
service_tier=None,
system_fingerprint=None,
usage=None,
)
[docs]
def is_json_string(s: Union[str, Dict, BaseModel, None]) -> bool:
try:
obj = json.loads(s) # type: ignore[arg-type]
if isinstance(obj, (dict, list)):
return True
return False
except Exception:
return False