Source code for agentscope_runtime.engine.schemas.modelstudio_llm

# -*- coding: utf-8 -*-
import os
from typing import List, Literal, Optional, Union

from openai.types.chat import ChatCompletion, ChatCompletionChunk
from pydantic import (
    BaseModel,
    StrictInt,
    field_validator,
    Field,
)

from .oai_llm import (
    Parameters,
    OpenAIMessage,
)


[docs] class KnowledgeHolder(BaseModel): source: str """The source identifier or URL where the knowledge was retrieved from.""" content: str """The actual content or knowledge text retrieved from the source."""
[docs] class IntentionOptions(BaseModel): white_list: List[str] = Field(default_factory=list) """A list of allowed intentions that can be processed.""" black_list: List[str] = Field(default_factory=list) """A list of blocked intentions that should not be processed.""" search_model: str = "search_v6" """The search model version to use for intentions recognition.""" intensity: Optional[int] = None """The intensity level for intentions matching and processing.""" scene_id: Optional[str] = None """The scene identifier for context-aware intentions processing."""
[docs] class SearchOptions(BaseModel): """ Search Options on Modelstudio platform for knowledge retrieval and web search. """ enable_source: bool = False """Whether to include source information in search results.""" enable_citation: bool = False """Whether to include citation information for retrieved content.""" enable_readpage: bool = False """Whether to enable full page reading for web content.""" enable_online_read: bool = False """Whether to enable online reading capabilities for real-time content.""" citation_format: str = "[<number>]" """The format string for citations in the response.""" search_strategy: Literal[ "standard", "pro_ultra", "pro", "lite", "pro_max", "image", "turbo", "max", ] = "turbo" """The search strategy to use ('standard', 'pro_ultra', 'pro', 'lite','pro_max', 'image','turbo','max'). """ forced_search: bool = False """Whether to force search even when cached results are available.""" prepend_search_result: bool = False """Whether to prepend search results to the response.""" enable_search_extension: bool = False """Whether to enable extended search capabilities.""" item_cnt: int = 20000 """The maximum number of items to retrieve in search results.""" top_n: int = 0 """The number of top results to return (0 means return all).""" intention_options: Union[IntentionOptions, None] = IntentionOptions() """Options for intentions recognition and processing during search."""
# maximum chunk size from knowledge base [1, 20] PARAM_MAXIMUM_ALLOWED_CHUNK_NUM_MIN = int( os.getenv( "PARAM_MAXIMUM_ALLOWED_CHUNK_NUM_MIN", "1", ), ) PARAM_MAXIMUM_ALLOWED_CHUNK_NUM_MAX = int( os.getenv( "PARAM_MAXIMUM_ALLOWED_CHUNK_NUM_MAX", "20", ), )
[docs] class RagOptions(BaseModel): model_config = {"populate_by_name": True}
[docs] class FallbackOptions(BaseModel): default_response_type: Optional[str] = "llm" """The type of default response when RAG fails ('llm', 'template', 'none'). """ default_response: Optional[str] = "" """The default response text to use when RAG fails."""
[docs] class RewriteOptions(BaseModel): model_name: Optional[str] = None """The model name to use for rewriting.""" class_name: Optional[str] = None """The class name to use for rewriting."""
[docs] class RerankOptions(BaseModel): model_name: Optional[str] = None """The model name to use for reranking."""
workspace_id: Optional[str] = "" """The modelstudio workspace id""" replaced_word: str = "${documents}" """The placeholder word in prompts that will be replaced with retrieved documents. """ index_names: Optional[List[str]] = Field(default_factory=list) """List of index names to use for document processing and retrieval.""" pipeline_ids: Optional[List[str]] = Field(default_factory=list) """List of pipeline IDs to use for document processing and retrieval.""" file_ids: Optional[List[str]] = Field( default_factory=list, alias="file_id_list", ) """List of specific file IDs to search within.""" prompt_strategy: Optional[str] = Field( default="topK", alias="prompt_strategy_name", ) """The strategy for selecting and organizing retrieved content in prompts. """ maximum_allowed_chunk_num: Optional[int] = 5 """The maximum number of document chunks to include in the context.""" maximum_allowed_length: Optional[int] = 2000 """The maximum total length of retrieved content in characters.""" enable_citation: bool = Field( default=False, alias="prompt_enable_citation", ) """Whether to include citation information for retrieved documents.""" fallback_options: Optional[FallbackOptions] = None """Options for handling cases when RAG retrieval fails.""" enable_web_search: bool = False """Whether to enable web search as part of the RAG pipeline.""" session_file_ids: Optional[List[str]] = Field(default_factory=list) """List of file IDs that are specific to the current session.""" dense_similarity_top_k: Optional[int] = 100 """The number of most similar dense vectors to retrieve.""" sparse_similarity_top_k: Optional[int] = 100 """The number of most similar sparse vectors to retrieve.""" enable_rewrite: Optional[bool] = None """Whether to enable content rewrite during RAG.""" rewrite: Optional[List[RewriteOptions]] = None """Options for content rewrite.""" enable_reranking: Optional[bool] = None """Whether to enable content reranking.""" rerank_min_score: Optional[float] = None """The minimum score threshold for content reranking.""" rerank_top_n: Optional[int] = None """The number of top results to return for content reranking.""" rerank: Optional[List[RerankOptions]] = None enable_reject_filter: Optional[bool] = None """Whether to enable content rejection filtering.""" reject_filter_type: Optional[str] = None """The type of content rejection filter to use.""" reject_filter_model_name: Optional[str] = None """The name of the model to use for content rejection filtering.""" reject_filter_prompt: Optional[str] = None """The prompt to use for content rejection filtering.""" enable_agg_search: Optional[bool] = None """Whether to enable aggregation search.""" enable_hybrid_gen: Optional[bool] = None """Whether to enable hybrid generations."""
[docs] @field_validator("prompt_strategy") @classmethod def prompt_strategy_check(cls, value: str) -> str: if value: value = value.lower() if value in ["topk", "top_k"]: return "topK" return value
[docs] @field_validator("maximum_allowed_chunk_num") @classmethod def maximum_allowed_chunk_num_check(cls, value: int) -> int: if value < int(PARAM_MAXIMUM_ALLOWED_CHUNK_NUM_MIN) or value > int( PARAM_MAXIMUM_ALLOWED_CHUNK_NUM_MAX, ): raise KeyError( f"Range of maximum_allowed_chunk_num should be " f"[{PARAM_MAXIMUM_ALLOWED_CHUNK_NUM_MIN}, " f"{PARAM_MAXIMUM_ALLOWED_CHUNK_NUM_MAX}]", ) return value
[docs] class ModelstudioParameters(Parameters): """ Parameters for Modelstudio platform, extending the base Parameters with Modelstudio-specific options. """ repetition_penalty: Union[float, None] = None """Penalty for repeating tokens. Higher values reduce repetition.""" length_penalty: Union[float, None] = None """Penalty applied to longer sequences. Affects the length of generated text. """ top_k: Union[StrictInt, None] = None """The number of highest probability vocabulary tokens to keep for top-k filtering.""" min_tokens: Optional[int] = None """The minimum number of tokens to generate before stopping.""" result_format: Literal["text", "message"] = "message" """The format of the response ('text' for plain text, 'message' for structured message) """ incremental_output: bool = False """Whether to return incremental output during generations.""" # Search enable_search: bool = False """Whether to enable search capabilities for knowledge retrieval.""" search_options: Optional[SearchOptions] = SearchOptions() """Configuration options for search functionality.""" # RAG enable_rag: bool = False # RAGs of modelstudio assistant service """Whether to enable Retrieval-Augmented Generation (RAG) for the Modelstudio assistant service. """ rag_options: Union[RagOptions, None] = None """Configuration options for RAG functionality.""" selected_model: Optional[str] = "qwen-max" """The selected model name to use for generations.""" # Intention intention_options: Optional[IntentionOptions] = None """Options for intentions recognition and processing.""" # MCP Servers mcp_config_file: Optional[str] = None """Path to the MCP (Model Context Protocol) configuration file."""
[docs] class ModelstudioChatRequest(ModelstudioParameters): messages: List[OpenAIMessage] """A list of messages comprising the conversation so far.""" model: str """ID of the model to use for the chat completion."""
[docs] class ModelstudioChatResponse(ChatCompletion): pass
[docs] class ModelstudioChatCompletionChunk(ChatCompletionChunk): pass