Source code for agentscope_runtime.engine.schemas.modelstudio_llm

# -*- coding: utf-8 -*-
import os
from typing import List, Literal, Optional, Union

from openai.types.chat import ChatCompletion, ChatCompletionChunk
from pydantic import (
    BaseModel,
    StrictInt,
    field_validator,
    Field,
)

from .oai_llm import (
    Parameters,
    OpenAIMessage,
)



[docs]
class KnowledgeHolder(BaseModel):
    source: str
    """The source identifier or URL where the knowledge was retrieved from."""

    content: str
    """The actual content or knowledge text retrieved from the source."""




[docs]
class IntentionOptions(BaseModel):
    white_list: List[str] = Field(default_factory=list)
    """A list of allowed intentions that can be processed."""

    black_list: List[str] = Field(default_factory=list)
    """A list of blocked intentions that should not be processed."""

    search_model: str = "search_v6"
    """The search model version to use for intentions recognition."""

    intensity: Optional[int] = None
    """The intensity level for intentions matching and processing."""

    scene_id: Optional[str] = None
    """The scene identifier for context-aware intentions processing."""




[docs]
class SearchOptions(BaseModel):
    """
    Search Options on Modelstudio platform for knowledge retrieval and web
    search.
    """

    enable_source: bool = False
    """Whether to include source information in search results."""

    enable_citation: bool = False
    """Whether to include citation information for retrieved content."""

    enable_readpage: bool = False
    """Whether to enable full page reading for web content."""

    enable_online_read: bool = False
    """Whether to enable online reading capabilities for real-time content."""

    citation_format: str = "[<number>]"
    """The format string for citations in the response."""

    search_strategy: Literal[
        "standard",
        "pro_ultra",
        "pro",
        "lite",
        "pro_max",
        "image",
        "turbo",
        "max",
    ] = "turbo"
    """The search strategy to use ('standard', 'pro_ultra',
    'pro', 'lite','pro_max', 'image','turbo','max'). """

    forced_search: bool = False
    """Whether to force search even when cached results are available."""

    prepend_search_result: bool = False
    """Whether to prepend search results to the response."""

    enable_search_extension: bool = False
    """Whether to enable extended search capabilities."""

    item_cnt: int = 20000
    """The maximum number of items to retrieve in search results."""

    top_n: int = 0
    """The number of top results to return (0 means return all)."""

    intention_options: Union[IntentionOptions, None] = IntentionOptions()
    """Options for intentions recognition and processing during search."""



# maximum chunk size from knowledge base [1, 20]
PARAM_MAXIMUM_ALLOWED_CHUNK_NUM_MIN = int(
    os.getenv(
        "PARAM_MAXIMUM_ALLOWED_CHUNK_NUM_MIN",
        "1",
    ),
)
PARAM_MAXIMUM_ALLOWED_CHUNK_NUM_MAX = int(
    os.getenv(
        "PARAM_MAXIMUM_ALLOWED_CHUNK_NUM_MAX",
        "20",
    ),
)



[docs]
class RagOptions(BaseModel):
    model_config = {"populate_by_name": True}


[docs]
    class FallbackOptions(BaseModel):
        default_response_type: Optional[str] = "llm"
        """The type of default response when RAG fails ('llm', 'template',
        'none'). """

        default_response: Optional[str] = ""
        """The default response text to use when RAG fails."""



[docs]
    class RewriteOptions(BaseModel):
        model_name: Optional[str] = None
        """The model name to use for rewriting."""

        class_name: Optional[str] = None
        """The class name to use for rewriting."""



[docs]
    class RerankOptions(BaseModel):
        model_name: Optional[str] = None
        """The model name to use for reranking."""


    workspace_id: Optional[str] = ""
    """The modelstudio workspace id"""

    replaced_word: str = "${documents}"
    """The placeholder word in prompts that will be replaced with retrieved
    documents. """

    index_names: Optional[List[str]] = Field(default_factory=list)
    """List of index names to use for document processing and retrieval."""

    pipeline_ids: Optional[List[str]] = Field(default_factory=list)
    """List of pipeline IDs to use for document processing and retrieval."""

    file_ids: Optional[List[str]] = Field(
        default_factory=list,
        alias="file_id_list",
    )
    """List of specific file IDs to search within."""

    prompt_strategy: Optional[str] = Field(
        default="topK",
        alias="prompt_strategy_name",
    )
    """The strategy for selecting and organizing retrieved content in
    prompts. """

    maximum_allowed_chunk_num: Optional[int] = 5
    """The maximum number of document chunks to include in the context."""

    maximum_allowed_length: Optional[int] = 2000
    """The maximum total length of retrieved content in characters."""

    enable_citation: bool = Field(
        default=False,
        alias="prompt_enable_citation",
    )
    """Whether to include citation information for retrieved documents."""

    fallback_options: Optional[FallbackOptions] = None
    """Options for handling cases when RAG retrieval fails."""

    enable_web_search: bool = False
    """Whether to enable web search as part of the RAG pipeline."""

    session_file_ids: Optional[List[str]] = Field(default_factory=list)
    """List of file IDs that are specific to the current session."""

    dense_similarity_top_k: Optional[int] = 100
    """The number of most similar dense vectors to retrieve."""

    sparse_similarity_top_k: Optional[int] = 100
    """The number of most similar sparse vectors to retrieve."""

    enable_rewrite: Optional[bool] = None
    """Whether to enable content rewrite during RAG."""

    rewrite: Optional[List[RewriteOptions]] = None
    """Options for content rewrite."""

    enable_reranking: Optional[bool] = None
    """Whether to enable content reranking."""

    rerank_min_score: Optional[float] = None
    """The minimum score threshold for content reranking."""

    rerank_top_n: Optional[int] = None
    """The number of top results to return for content reranking."""

    rerank: Optional[List[RerankOptions]] = None

    enable_reject_filter: Optional[bool] = None
    """Whether to enable content rejection filtering."""

    reject_filter_type: Optional[str] = None
    """The type of content rejection filter to use."""

    reject_filter_model_name: Optional[str] = None
    """The name of the model to use for content rejection filtering."""

    reject_filter_prompt: Optional[str] = None
    """The prompt to use for content rejection filtering."""

    enable_agg_search: Optional[bool] = None
    """Whether to enable aggregation search."""

    enable_hybrid_gen: Optional[bool] = None
    """Whether to enable hybrid generations."""


[docs]
    @field_validator("prompt_strategy")
    @classmethod
    def prompt_strategy_check(cls, value: str) -> str:
        if value:
            value = value.lower()
            if value in ["topk", "top_k"]:
                return "topK"
        return value



[docs]
    @field_validator("maximum_allowed_chunk_num")
    @classmethod
    def maximum_allowed_chunk_num_check(cls, value: int) -> int:
        if value < int(PARAM_MAXIMUM_ALLOWED_CHUNK_NUM_MIN) or value > int(
            PARAM_MAXIMUM_ALLOWED_CHUNK_NUM_MAX,
        ):
            raise KeyError(
                f"Range of maximum_allowed_chunk_num should be "
                f"[{PARAM_MAXIMUM_ALLOWED_CHUNK_NUM_MIN}, "
                f"{PARAM_MAXIMUM_ALLOWED_CHUNK_NUM_MAX}]",
            )
        return value





[docs]
class ModelstudioParameters(Parameters):
    """
    Parameters for Modelstudio platform, extending the base Parameters with
    Modelstudio-specific options.
    """

    repetition_penalty: Union[float, None] = None
    """Penalty for repeating tokens. Higher values reduce repetition."""

    length_penalty: Union[float, None] = None
    """Penalty applied to longer sequences. Affects the length of generated
    text. """

    top_k: Union[StrictInt, None] = None
    """The number of highest probability vocabulary tokens to keep for top-k
    filtering."""

    min_tokens: Optional[int] = None
    """The minimum number of tokens to generate before stopping."""

    result_format: Literal["text", "message"] = "message"
    """The format of the response ('text' for plain text, 'message' for
    structured message) """

    incremental_output: bool = False
    """Whether to return incremental output during generations."""

    # Search
    enable_search: bool = False
    """Whether to enable search capabilities for knowledge retrieval."""

    search_options: Optional[SearchOptions] = SearchOptions()
    """Configuration options for search functionality."""

    # RAG
    enable_rag: bool = False  # RAGs of modelstudio assistant service
    """Whether to enable Retrieval-Augmented Generation (RAG) for the
    Modelstudio assistant service. """

    rag_options: Union[RagOptions, None] = None
    """Configuration options for RAG functionality."""

    selected_model: Optional[str] = "qwen-max"
    """The selected model name to use for generations."""

    # Intention
    intention_options: Optional[IntentionOptions] = None
    """Options for intentions recognition and processing."""

    # MCP Servers
    mcp_config_file: Optional[str] = None
    """Path to the MCP (Model Context Protocol) configuration file."""




[docs]
class ModelstudioChatRequest(ModelstudioParameters):
    messages: List[OpenAIMessage]
    """A list of messages comprising the conversation so far."""

    model: str
    """ID of the model to use for the chat completion."""




[docs]
class ModelstudioChatResponse(ChatCompletion):
    pass




[docs]
class ModelstudioChatCompletionChunk(ChatCompletionChunk):
    pass