diff --git a/Dockerfile b/Dockerfile index 51ea82487f3236e0a8d1d432cd49310a3884bd11..a2c6dab1d8da8fb998d366305f5b8c8a83cf572e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,7 +1,7 @@ FROM hub.oepkgs.net/neocopilot/framework_base:0.9.6-x86-test ENV PYTHONPATH=/app -ENV TIKTOKEN_CACHE_DIR=/app/assets/tiktoken +ENV TIKTOKEN_CACHE_DIR=/app/data/tiktoken_cache COPY --chmod=550 ./ /app/ RUN chmod 766 /root diff --git a/apps/scheduler/call/facts/func.py b/apps/scheduler/call/facts/func.py index d66be4f17b3a00692f6d2569a341a48a6d0114d9..d7b72bbfd1c3399519f5bf3f7124b2ef6adb2c65 100644 --- a/apps/scheduler/call/facts/func.py +++ b/apps/scheduler/call/facts/func.py @@ -20,10 +20,6 @@ DOMAIN_FUNCTION: dict[LanguageType, dict[str, Any]] = { }, "required": ["keywords"], }, - "examples": [ - {"keywords": ["北京", "天气"]}, - {"keywords": ["Python", "装饰器", "设计模式"]}, - ], }, LanguageType.ENGLISH: { "name": "extract_domain", @@ -39,10 +35,6 @@ DOMAIN_FUNCTION: dict[LanguageType, dict[str, Any]] = { }, "required": ["keywords"], }, - "examples": [ - {"keywords": ["Beijing", "weather"]}, - {"keywords": ["Python", "decorator", "design pattern"]}, - ], }, } @@ -61,10 +53,6 @@ FACTS_FUNCTION: dict[LanguageType, dict[str, Any]] = { }, "required": ["facts"], }, - "examples": [ - {"facts": ["杭州西湖有苏堤、白堤、断桥、三潭印月等景点"]}, - {"facts": ["用户喜欢看科幻电影", "用户可能对《星际穿越》感兴趣"]}, - ], }, LanguageType.ENGLISH: { "name": "extract_facts", @@ -80,14 +68,5 @@ FACTS_FUNCTION: dict[LanguageType, dict[str, Any]] = { }, "required": ["facts"], }, - "examples": [ - { - "facts": [ - "Hangzhou West Lake has Su Causeway, Bai Causeway, Broken Bridge, " - "Three Pools Mirroring the Moon, etc.", - ], - }, - {"facts": ["User likes watching sci-fi movies", "User may be interested in Interstellar"]}, - ], }, } diff --git a/apps/scheduler/call/rag/func.py b/apps/scheduler/call/rag/func.py new file mode 100644 index 0000000000000000000000000000000000000000..ef4a3c3a2ba05f3bec4ee2cea76df300a9f5bac5 --- /dev/null +++ b/apps/scheduler/call/rag/func.py @@ -0,0 +1,35 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. +"""RAG工具的提示词""" + +from apps.models import LanguageType + +QUESTION_REWRITE_FUNCTION: dict[LanguageType, dict[str, object]] = { + LanguageType.CHINESE: { + "name": "rewrite_question", + "description": "基于上下文优化用户问题,使其更适合知识库检索", + "parameters": { + "type": "object", + "properties": { + "question": { + "type": "string", + "description": "优化后的问题。应该完整、明确、包含关键信息,便于知识库检索", + }, + }, + "required": ["question"], + }, + }, + LanguageType.ENGLISH: { + "name": "rewrite_question", + "description": "Optimize user question based on context for better knowledge base retrieval", + "parameters": { + "type": "object", + "properties": { + "question": { + "type": "string", + "description": "The optimized question that is complete, clear, and retrieval-friendly", + }, + }, + "required": ["question"], + }, + }, +} diff --git a/apps/scheduler/call/rag/prompt.py b/apps/scheduler/call/rag/prompt.py deleted file mode 100644 index cd6560fe6ca62726f1c4c27ed52b4122ddb0f2db..0000000000000000000000000000000000000000 --- a/apps/scheduler/call/rag/prompt.py +++ /dev/null @@ -1,106 +0,0 @@ -# Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. -"""RAG工具的提示词""" - -from textwrap import dedent - -from apps.models import LanguageType - -QUESTION_REWRITE: dict[LanguageType, str] = { - LanguageType.CHINESE: dedent( - r""" - 你需要分析用户的当前提问,结合对话历史上下文,理解用户的真实意图并优化问题表述,使其更适合知识库检索。 - - ## 要求 - - 参考对话历史理解用户的真实意图,补全省略的信息(如代词、缩略语等) - - 如果问题已经足够完整和明确,直接使用原问题,不要过度修改 - - 优化后的问题应该更加精准、具体,便于知识库检索匹配 - - 保持问题的核心语义不变,不要编造原问题中没有的信息 - - 适当扩展相关的关键术语和概念,提高检索召回率 - - ## 示例 - - **示例1:补全上下文中的指代关系** - - 对话历史: - - 用户: openEuler是什么? - - 助手: openEuler是一个开源操作系统。 - - 当前问题:它的优势有哪些? - - 优化结果:openEuler操作系统的优势和特点是什么? - - **示例2:扩展关键术语** - - 对话历史:无 - - 当前问题:如何安装Docker? - - 优化结果:如何在Linux系统上安装和配置Docker容器引擎? - - ## 用户当前问题 - {{question}} - """, - ).strip(), - LanguageType.ENGLISH: dedent( - r""" - Analyze the user's current question in the context of the conversation history to understand their true \ -intent and optimize the phrasing for knowledge base retrieval. - - ## Requirements - - Reference conversation history to understand true intent and complete omitted information (pronouns, \ -abbreviations, etc.) - - If the question is already complete and clear, use it as-is without over-modification - - The optimized question should be more precise and specific for better knowledge base matching - - Maintain the core semantics without fabricating information not present in the original question - - Appropriately expand related key terms and concepts to improve retrieval recall - - ## Examples - - **Example 1: Complete contextual references** - - Conversation history: - - User: What is openEuler? - - Assistant: openEuler is an open source operating system. - - Current question: What are its features? - - Optimized result: What are the features and advantages of the openEuler operating system? - - **Example 2: Expand key terms** - - Conversation history: None - - Current question: How to install Docker? - - Optimized result: How to install and configure Docker container engine on Linux system? - - ## User's Current Question - {{question}} - """, - ).strip(), -} - -QUESTION_REWRITE_FUNCTION: dict[LanguageType, dict[str, object]] = { - LanguageType.CHINESE: { - "name": "rewrite_question", - "description": "基于上下文优化用户问题,使其更适合知识库检索", - "parameters": { - "type": "object", - "properties": { - "question": { - "type": "string", - "description": "优化后的问题。应该完整、明确、包含关键信息,便于知识库检索", - }, - }, - "required": ["question"], - }, - "examples": [ - {"question": "openEuler操作系统的优势和特点是什么?"}, - ], - }, - LanguageType.ENGLISH: { - "name": "rewrite_question", - "description": "Optimize user question based on context for better knowledge base retrieval", - "parameters": { - "type": "object", - "properties": { - "question": { - "type": "string", - "description": "The optimized question that is complete, clear, and retrieval-friendly", - }, - }, - "required": ["question"], - }, - "examples": [ - {"question": "How to install and configure Docker container engine on Linux system?"}, - ], - }, -} diff --git a/apps/scheduler/call/rag/rag.py b/apps/scheduler/call/rag/rag.py index aaf6c9d9c8424aae82ecb72d78af544ed64d4f1e..2514529c1b141223a25a8535408d58b1a4e2a6cc 100644 --- a/apps/scheduler/call/rag/rag.py +++ b/apps/scheduler/call/rag/rag.py @@ -26,7 +26,7 @@ from apps.schemas.scheduler import ( ) from apps.services.document import DocumentManager -from .prompt import QUESTION_REWRITE, QUESTION_REWRITE_FUNCTION +from .func import QUESTION_REWRITE_FUNCTION from .schema import ( DocItem, QuestionRewriteOutput, @@ -170,7 +170,7 @@ class RAG(CoreCall, input_model=RAGInput, output_model=RAGOutput): trim_blocks=True, lstrip_blocks=True, ) - tmpl = env.from_string(QUESTION_REWRITE[self._sys_vars.language]) + tmpl = env.from_string(self._load_prompt("question_rewrite")) prompt = tmpl.render(question=data.query) # 使用json_generator直接获取JSON结果 @@ -179,9 +179,8 @@ class RAG(CoreCall, input_model=RAGInput, output_model=RAGOutput): conversation=[ {"role": "system", "content": "You are a helpful assistant."}, *self._sys_vars.background.conversation[-self.history_len:], - {"role": "user", "content": prompt}, ], - language=self._sys_vars.language, + prompt=prompt, ) # 直接使用解析后的JSON结果 data.query = QuestionRewriteOutput.model_validate(json_result).question diff --git a/apps/scheduler/call/suggest/func.py b/apps/scheduler/call/suggest/func.py new file mode 100644 index 0000000000000000000000000000000000000000..960ab35533ae7f000d0a0d0c847107353ce6e683 --- /dev/null +++ b/apps/scheduler/call/suggest/func.py @@ -0,0 +1,44 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. +"""问题推荐工具的提示词和Function Schema""" + +from apps.models import LanguageType + +SUGGEST_FUNCTION: dict[LanguageType, dict] = { + LanguageType.CHINESE: { + "name": "generate_suggestions", + "description": "基于对话上下文和用户兴趣生成推荐的后续问题", + "parameters": { + "type": "object", + "properties": { + "predicted_questions": { + "type": "array", + "description": "预测的问题列表,每个问题应该是完整的疑问句或祈使句", + "items": { + "type": "string", + "description": "单个推荐问题,长度不超过30字", + }, + }, + }, + "required": ["predicted_questions"], + }, + }, + LanguageType.ENGLISH: { + "name": "generate_suggestions", + "description": "Generate recommended follow-up questions based on conversation context and user interests", + "parameters": { + "type": "object", + "properties": { + "predicted_questions": { + "type": "array", + "description": "List of predicted questions, each should be a complete interrogative or " + "imperative sentence", + "items": { + "type": "string", + "description": "Single recommended question, not exceeding 30 words", + }, + }, + }, + "required": ["predicted_questions"], + }, + }, +} diff --git a/apps/scheduler/call/suggest/prompt.py b/apps/scheduler/call/suggest/prompt.py deleted file mode 100644 index 3cbf9b0a58a6633a4a6ee910100c0429e57b057d..0000000000000000000000000000000000000000 --- a/apps/scheduler/call/suggest/prompt.py +++ /dev/null @@ -1,170 +0,0 @@ -# Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. -"""问题推荐工具的提示词和Function Schema""" - -from textwrap import dedent - -from apps.models import LanguageType - -SUGGEST_FUNCTION: dict[LanguageType, dict] = { - LanguageType.CHINESE: { - "name": "generate_suggestions", - "description": "基于对话上下文和用户兴趣生成推荐的后续问题", - "parameters": { - "type": "object", - "properties": { - "predicted_questions": { - "type": "array", - "description": "预测的问题列表,每个问题应该是完整的疑问句或祈使句", - "items": { - "type": "string", - "description": "单个推荐问题,长度不超过30字", - }, - }, - }, - "required": ["predicted_questions"], - }, - "examples": [ - { - "predicted_questions": [ - "杭州的最佳旅游季节是什么时候?", - "灵隐寺的开放时间和门票信息?", - "杭州有哪些适合亲子游的景点?", - ], - }, - { - "predicted_questions": [ - "字典和集合有什么特点?", - "如何在Python中处理异常?", - "列表推导式怎么使用?", - ], - }, - ], - }, - LanguageType.ENGLISH: { - "name": "generate_suggestions", - "description": "Generate recommended follow-up questions based on conversation context and user interests", - "parameters": { - "type": "object", - "properties": { - "predicted_questions": { - "type": "array", - "description": "List of predicted questions, each should be a complete interrogative or " - "imperative sentence", - "items": { - "type": "string", - "description": "Single recommended question, not exceeding 30 words", - }, - }, - }, - "required": ["predicted_questions"], - }, - "examples": [ - { - "predicted_questions": [ - "What is the best season to visit Hangzhou?", - "What are the opening hours and ticket information for Lingyin Temple?", - "Which attractions in Hangzhou are suitable for family trips?", - ], - }, - { - "predicted_questions": [ - "What are the characteristics of dictionaries and sets?", - "How to handle exceptions in Python?", - "How to use list comprehensions?", - ], - }, - ], - }, -} - -SUGGEST_PROMPT: dict[LanguageType, str] = { - LanguageType.CHINESE: dedent( - r""" - 请根据对话历史和用户兴趣,生成{% if target_num %}{{ target_num }}{% else %}3-5{% endif %}个\ -用户可能感兴趣的后续问题。 - - {% if history or generated %} - **已讨论的问题:** - {% for question in history %} - - {{ question }} - {% endfor %} - {% for question in generated %} - - {{ question }} - {% endfor %} - {% endif %} - - {% if tool %} - **可用工具:**{{ tool.name }}({{ tool.description }}) - {% endif %} - - {% if preference %} - **用户兴趣:**{{ preference | join('、') }} - {% endif %} - - **要求:** - - 以用户口吻提问,使用疑问句或祈使句 - - 每个问题不超过30字,具体明确、富有探索性 - - 避免与已讨论问题重复 - - 问题应与可用工具和用户兴趣相关,能推进对话深度或拓展话题 - - **参考示例:** - - 示例1 - 旅游场景: - 当用户已讨论"杭州简介、杭州著名景点、西湖门票价格",可用工具为"景点查询", - 用户兴趣为"杭州、旅游"时,可生成: - 杭州的最佳旅游季节是什么时候?灵隐寺的开放时间和门票信息? - 杭州有哪些适合亲子游的景点? - - 示例2 - 编程场景: - 当用户已讨论"Python基础语法、列表和元组的区别",可用工具为"代码搜索", - 用户兴趣为"Python编程、数据结构"时,可生成: - 字典和集合有什么特点?如何在Python中处理异常?列表推导式怎么使用? - """, - ), - LanguageType.ENGLISH: dedent( - r""" - Please generate {% if target_num %}{{ target_num }}{% else %}3-5{% endif %} follow-up questions \ -that the user might be interested in, based on conversation history and user interests. - - {% if history or generated %} - **Questions already discussed:** - {% for question in history %} - - {{ question }} - {% endfor %} - {% for question in generated %} - - {{ question }} - {% endfor %} - {% endif %} - - {% if tool %} - **Available tool:** {{ tool.name }} ({{ tool.description }}) - {% endif %} - - {% if preference %} - **User interests:** {{ preference | join(', ') }} - {% endif %} - - **Requirements:** - - Use the user's voice with interrogative or imperative sentences - - Each question under 30 words, specific and exploratory - - Avoid repeating discussed questions - - Questions should relate to available tools and user interests, deepening or expanding the conversation - - **Reference examples:** - - Example 1 - Tourism scenario: - When the user has discussed "Hangzhou introduction, famous attractions in Hangzhou, - West Lake ticket prices", available tool is "Scenic Spot Search", and user interests are - "Hangzhou, Tourism", you can generate: - What is the best season to visit Hangzhou? - What are the opening hours and ticket information for Lingyin Temple? - Which attractions in Hangzhou are suitable for family trips? - - Example 2 - Programming scenario: - When the user has discussed "Python basics, difference between lists and tuples", available tool is - "Code Search", and user interests are "Python programming, Data structures", you can generate: - What are the characteristics of dictionaries and sets? How to handle exceptions in Python? - How to use list comprehensions? - """, - ), -} diff --git a/apps/scheduler/call/suggest/suggest.py b/apps/scheduler/call/suggest/suggest.py index 143569f641230a153f30dbec849c4ad6640dddd2..24a86e6ba9eed4f337a59f1f2d95d1d5ed201339 100644 --- a/apps/scheduler/call/suggest/suggest.py +++ b/apps/scheduler/call/suggest/suggest.py @@ -23,7 +23,7 @@ from apps.schemas.scheduler import ( ) from apps.services.user_tag import UserTagManager -from .prompt import SUGGEST_FUNCTION, SUGGEST_PROMPT +from .func import SUGGEST_FUNCTION from .schema import ( SingleFlowSuggestionConfig, SuggestGenResult, @@ -108,7 +108,8 @@ class Suggestion(CoreCall, input_model=SuggestionInput, output_model=SuggestionO user_domain_info = await UserTagManager.get_user_domain_by_user_and_topk(data.user_id, 5) user_domain = [tag.name for tag in user_domain_info] - prompt_tpl = self._env.from_string(SUGGEST_PROMPT[self._sys_vars.language]) + prompt_content = self._load_prompt("suggest") + prompt_tpl = self._env.from_string(prompt_content) if self.configs: async for output_chunk in self._process_configs(): @@ -149,12 +150,11 @@ class Suggestion(CoreCall, input_model=SuggestionInput, output_model=SuggestionO messages = [ {"role": "system", "content": "You are a helpful assistant."}, *self._sys_vars.background.conversation, - {"role": "user", "content": prompt}, ] result = await json_generator.generate( function=SUGGEST_FUNCTION[self._sys_vars.language], conversation=messages, - language=self._sys_vars.language, + prompt=prompt, ) return SuggestGenResult.model_validate(result) diff --git a/apps/scheduler/executor/agent.py b/apps/scheduler/executor/agent.py index b1b309a09f8b0cb6475c47e5f8de0fd497e56880..05bef263940b792dbd3d9ccff8eb73f0e954e2d1 100644 --- a/apps/scheduler/executor/agent.py +++ b/apps/scheduler/executor/agent.py @@ -111,29 +111,25 @@ class MCPAgentExecutor(BaseExecutor): _logger.error(err) raise RuntimeError(err) - if is_first: - # 获取第一个输入参数 - self._current_tool = self._tool_list[self.task.state.stepName] - # 更新host的task引用以确保使用最新的context - self._current_input = await self._host.get_first_input_params( - self._current_tool, self.task, - ) + # 获取输入参数 + if isinstance(self.params, FlowParams): + params = self.params.content + params_description = self.params.description else: - # 获取后续输入参数 - if isinstance(self.params, FlowParams): - params = self.params.content - params_description = self.params.description - else: - params = {} - params_description = "" - self._current_tool = self._tool_list[self.task.state.stepName] - self._current_input = await self._host.fill_params( - self._current_tool, - self.task, - self._current_input, - params, - params_description, - ) + params = {} + params_description = "" + self._current_tool = self._tool_list[self.task.state.stepName] + + # 对于首次调用,使用空的current_input + current_input = {} if is_first else self._current_input + + self._current_input = await self._host.fill_params( + self._current_tool, + self.task, + current_input, + params, + params_description, + ) def _get_error_message_str(self, error_message: dict | str | None) -> str: """将错误消息转换为字符串""" diff --git a/apps/scheduler/mcp_agent/base.py b/apps/scheduler/mcp_agent/base.py index 430a1a9c5f1e12ca57d98c2356977f7baaa51cf6..c09263347462b3ab94cd7dda0d9b29fe098eade6 100644 --- a/apps/scheduler/mcp_agent/base.py +++ b/apps/scheduler/mcp_agent/base.py @@ -5,9 +5,11 @@ import json import logging from typing import Any +from anyio import Path from jinja2 import BaseLoader from jinja2.sandbox import SandboxedEnvironment +from apps.common.config import config from apps.models import LanguageType from apps.scheduler.mcp.prompt import MEMORY_TEMPLATE from apps.schemas.task import TaskData @@ -42,6 +44,17 @@ class MCPBase: self._goal = task.runtime.userInput self._language = task.runtime.language + async def _load_prompt(self, prompt_id: str) -> str: + """ + 从Markdown文件加载提示词 + + :param prompt_id: 提示词ID,例如 "gen_params" 等 + :return: 提示词内容 + """ + filename = f"{prompt_id}.{self._language.value}.md" + prompt_file = Path(config.deploy.data_dir) / "prompts" / "system" / "mcp" / filename + return await prompt_file.read_text(encoding="utf-8") + @staticmethod async def assemble_memory(task: TaskData) -> list[dict[str, str]]: """组装记忆""" diff --git a/apps/scheduler/mcp_agent/host.py b/apps/scheduler/mcp_agent/host.py index c3f06d4fc0fd7f841ee6827095be87ea7394e4a6..3474ffd2a7e42e9d6cef2624a26c73146c373ef5 100644 --- a/apps/scheduler/mcp_agent/host.py +++ b/apps/scheduler/mcp_agent/host.py @@ -10,7 +10,6 @@ from jinja2.sandbox import SandboxedEnvironment from apps.llm import json_generator from apps.models import LanguageType, MCPTools from apps.scheduler.mcp_agent.base import MCPBase -from apps.scheduler.mcp_agent.prompt import REPAIR_PARAMS, get_gen_params_prompt from apps.schemas.task import TaskData _logger = logging.getLogger(__name__) @@ -29,38 +28,6 @@ _LLM_QUERY_FIX = { class MCPHost(MCPBase): """MCP宿主服务""" - async def get_first_input_params( - self, mcp_tool: MCPTools, task: TaskData, - ) -> dict[str, Any]: - """填充工具参数""" - # 加载提示词模板 - prompt_template = get_gen_params_prompt(task.runtime.language) - - # 更清晰的输入指令,这样可以调用generate - prompt = _env.from_string(prompt_template).render( - tool_name=mcp_tool.toolName, - tool_description=mcp_tool.description, - goal=task.runtime.userInput, - current_goal=task.runtime.userInput, - input_schema=mcp_tool.inputSchema, - background_info=await self.assemble_memory(task), - ) - _logger.info("[MCPHost] 填充工具参数: %s", mcp_tool.toolName) - # 使用json_generator解析结果 - function = { - "name": mcp_tool.toolName, - "description": mcp_tool.description, - "parameters": mcp_tool.inputSchema, - } - return await json_generator.generate( - function=function, - conversation=[ - {"role": "system", "content": "You are a helpful assistant."}, - {"role": "user", "content": prompt}, - ], - prompt=task.runtime.language, - ) - async def fill_params( self, mcp_tool: MCPTools, @@ -69,10 +36,10 @@ class MCPHost(MCPBase): params: dict[str, Any] | None = None, params_description: str = "", ) -> dict[str, Any]: - """填充并修复工具参数""" + """生成工具参数""" llm_query = _LLM_QUERY_FIX[task.runtime.language] error_message = task.state.errorMessage if task.state else {} - prompt = _env.from_string(REPAIR_PARAMS[task.runtime.language]).render( + prompt = _env.from_string(await self._load_prompt("gen_params")).render( tool_name=mcp_tool.toolName, goal=task.runtime.userInput, current_goal=task.runtime.userInput, @@ -94,8 +61,9 @@ class MCPHost(MCPBase): return await json_generator.generate( function=function, conversation=[ + {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": prompt}, {"role": "user", "content": llm_query}, ], - language=task.runtime.language, + prompt=prompt, ) diff --git a/apps/scheduler/mcp_agent/prompt.py b/apps/scheduler/mcp_agent/prompt.py index 9a09b15d145669f97c1f112fe5720f106bda77c8..e18dee97378409cba17eea98846231d8306ed133 100644 --- a/apps/scheduler/mcp_agent/prompt.py +++ b/apps/scheduler/mcp_agent/prompt.py @@ -1,26 +1,10 @@ # Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. """MCP相关的大模型Prompt""" -from pathlib import Path from textwrap import dedent from apps.models import LanguageType - -def _load_prompt(prompt_id: str, language: LanguageType) -> str: - """ - 从Markdown文件加载提示词 - - :param prompt_id: 提示词ID,例如 "gen_params" 等 - :param language: 语言类型 - :return: 提示词内容 - """ - # 组装Prompt文件路径: prompt_id.language.md (例如: gen_params.en.md) - filename = f"{prompt_id}.{language.value}.md" - prompt_dir = Path(__file__).parent.parent.parent / "data" / "prompts" / "system" / "mcp" - prompt_file = prompt_dir / filename - return prompt_file.read_text(encoding="utf-8") - GENERATE_FLOW_NAME: dict[LanguageType, str] = { LanguageType.CHINESE: dedent( r""" @@ -401,11 +385,6 @@ IS_PARAM_ERROR_FUNCTION: dict[LanguageType, dict] = { }, "required": ["is_param_error"], }, - "examples": [ - { - "is_param_error": True, - }, - ], }, LanguageType.ENGLISH: { "name": "check_parameter_error", @@ -421,11 +400,6 @@ IS_PARAM_ERROR_FUNCTION: dict[LanguageType, dict] = { }, "required": ["is_param_error"], }, - "examples": [ - { - "is_param_error": True, - }, - ], }, } @@ -617,232 +591,6 @@ for user to provide credentials again ), } - -def get_gen_params_prompt(language: LanguageType) -> str: - """ - 获取GEN_PARAMS提示词 - - :param language: 语言类型 - :return: 提示词内容 - """ - return _load_prompt("gen_params", language) - - -REPAIR_PARAMS: dict[LanguageType, str] = { - LanguageType.CHINESE: dedent( - r""" - 你是一个工具参数修复器。 - 你的任务是根据当前的工具信息、目标、工具入参的schema、工具当前的入参、工具的报错、补充的参数和补充的参数描述,修复当前工具的入参。 - - 注意: - 1.最终修复的参数要符合目标和工具入参的schema。 - - # 样例 - ## 工具信息 - - mysql_analyzer - 分析MySQL数据库性能 - - - ## 总目标 - 我需要扫描当前mysql数据库,分析性能瓶颈, 并调优 - - ## 当前阶段目标 - 我要连接MySQL数据库,分析性能瓶颈,并调优。 - - ## 工具入参的schema - { - "type": "object", - "properties": { - "host": { - "type": "string", - "description": "MySQL数据库的主机地址" - }, - "port": { - "type": "integer", - "description": "MySQL数据库的端口号" - }, - "username": { - "type": "string", - "description": "MySQL数据库的用户名" - }, - "password": { - "type": "string", - "description": "MySQL数据库的密码" - } - }, - "required": ["host", "port", "username", "password"] - } - - ## 工具当前的入参 - { - "host": "192.0.0.1", - "port": 3306, - "username": "root", - "password": "password" - } - - ## 工具的报错 - 执行端口扫描命令时,出现了错误:`password is not correct`。 - - ## 补充的参数 - { - "username": "admin", - "password": "admin123" - } - - ## 补充的参数描述 - 用户希望使用admin用户和admin123密码来连接MySQL数据库。 - - ## 输出 - ```json - { - "host": "192.0.0.1", - "port": 3306, - "username": "admin", - "password": "admin123" - } - ``` - - # 现在开始修复工具入参: - ## 工具 - - {{tool_name}} - {{tool_description}} - - - ## 总目标 - {{goal}} - - ## 当前阶段目标 - {{current_goal}} - - ## 工具入参Schema - {{input_schema}} - - ## 工具当前的入参 - {{input_params}} - - ## 运行报错 - {{error_message}} - - ## 补充的参数 - {{params}} - - ## 补充的参数描述 - {{params_description}} - - ## 输出 - """, - ), - LanguageType.ENGLISH: dedent( - r""" - You are a tool parameter fixer. - Your task is to fix the current tool input parameters based on the current tool information, tool input \ -parameter schema, tool current input parameters, tool error, supplemented parameters, and supplemented \ -parameter descriptions. - - # Example - ## Tool information - - mysql_analyzer - Analyze MySQL database performance - - - ## Tool input parameter schema - { - "type": "object", - "properties": { - "host": { - "type": "string", - "description": "MySQL database host address" - }, - "port": { - "type": "integer", - "description": "MySQL database port number" - }, - "username": { - "type": "string", - "description": "MySQL database username" - }, - "password": { - "type": "string", - "description": "MySQL database password" - } - }, - "required": ["host", "port", "username", "password"] - } - - ## Current tool input parameters - { - "host": "192.0.0.1", - "port": 3306, - "username": "root", - "password": "password" - } - - ## Tool error - When executing the port scan command, an error occurred: `password is not correct`. - - ## Supplementary parameters - { - "username": "admin", - "password": "admin123" - } - - ## Supplementary parameter description - The user wants to use the admin user and the admin123 password to connect to the MySQL database. - - ## Output - ```json - { - "host": "192.0.0.1", - "port": 3306, - "username": "admin", - "password": "admin123" - } - ``` - - # Now start fixing tool input parameters: - ## Tool - - {{tool_name}} - {{tool_description}} - - - ## Tool input schema - {{input_schema}} - - ## Current tool input parameters - {{input_params}} - - ## Runtime error - {{error_message}} - - ## Supplementary parameters - {{params}} - - ## Supplementary parameter descriptions - {{params_description}} - - ## Output - """, - ), -} - -GET_MISSING_PARAMS_FUNCTION: dict[LanguageType, dict] = { - LanguageType.CHINESE: { - "name": "get_missing_parameters", - "description": "根据错误反馈提取并提供缺失或错误的参数", - "parameters": None, - }, - LanguageType.ENGLISH: { - "name": "get_missing_parameters", - "description": "Extract and provide the missing or incorrect parameters based on error feedback", - "parameters": None, - }, -} - FINAL_ANSWER: dict[LanguageType, str] = { LanguageType.CHINESE: dedent( r""" diff --git a/apps/scheduler/scheduler/flow.py b/apps/scheduler/scheduler/flow.py index 12e87acf2f559a2b08f6310f73781385c0d42a1e..ed167a3132ca58f075a579d9554009524f8914a2 100644 --- a/apps/scheduler/scheduler/flow.py +++ b/apps/scheduler/scheduler/flow.py @@ -4,15 +4,17 @@ import logging from copy import deepcopy +from anyio import Path from jinja2.sandbox import SandboxedEnvironment +from apps.common.config import config from apps.llm import json_generator from apps.scheduler.pool.pool import pool from apps.schemas.request_data import RequestData from apps.schemas.scheduler import TopFlow from apps.schemas.task import TaskData -from .prompt import FLOW_SELECT, FLOW_SELECT_FUNCTION +from .func import FLOW_SELECT_FUNCTION _logger = logging.getLogger(__name__) @@ -43,9 +45,12 @@ class FlowMixin: "description": f"{flow.name}, {flow.description}", } for flow in flow_list] - template = self._env.from_string(FLOW_SELECT[self.task.runtime.language]) + language = self.task.runtime.language.value + template_path = Path(config.deploy.data_dir) / "prompts" / "system" / "scheduler" / f"flow_select.{language}.md" + template_content = await template_path.read_text(encoding="utf-8") + + template = self._env.from_string(template_content) prompt = template.render( - template, question=self.post_body.question, choice_list=choices, ) @@ -55,9 +60,8 @@ class FlowMixin: function=function, conversation=[ {"role": "system", "content": "You are a helpful assistant."}, - {"role": "user", "content": prompt}, ], - language=self.task.runtime.language, + prompt=prompt, ) result = TopFlow.model_validate(result_str) return result.choice diff --git a/apps/scheduler/scheduler/func.py b/apps/scheduler/scheduler/func.py new file mode 100644 index 0000000000000000000000000000000000000000..8fde54576029864633574f634278327c5b5749b4 --- /dev/null +++ b/apps/scheduler/scheduler/func.py @@ -0,0 +1,17 @@ +# Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. +"""Scheduler相关的大模型提示词""" + +FLOW_SELECT_FUNCTION = { + "name": "select_flow", + "description": "Select the appropriate flow", + "parameters": { + "type": "object", + "properties": { + "choice": { + "type": "string", + "description": "最匹配用户输入的Flow的名称", + }, + }, + "required": ["choice"], + }, +} diff --git a/apps/scheduler/scheduler/prompt.py b/apps/scheduler/scheduler/prompt.py deleted file mode 100644 index 395ee2d1919cfe6274bbb4bf03ce34dddb030297..0000000000000000000000000000000000000000 --- a/apps/scheduler/scheduler/prompt.py +++ /dev/null @@ -1,77 +0,0 @@ -# Copyright (c) Huawei Technologies Co., Ltd. 2023-2025. All rights reserved. -"""Scheduler相关的大模型提示词""" - -from apps.models import LanguageType - -FLOW_SELECT: dict[LanguageType, str] = { - LanguageType.CHINESE: r""" - ## 任务说明 - - 根据对话历史和用户查询,从可用选项中选择最匹配的一项。 - - ## 示例 - - **用户查询:** - > 使用天气API,查询明天杭州的天气信息 - - **可用选项:** - - **API**:HTTP请求,获取返回的JSON数据 - - **SQL**:查询数据库,获取数据库表中的数据 - - **回答:** - 用户明确提到使用天气API,天气数据通常通过外部API获取而非数据库存储,因此选择 API 选项。 - - --- - - ## 当前任务 - - **用户查询:** - {{question}} - - **可用选项:** - {{choice_list}} - """, - LanguageType.ENGLISH: r""" - ## Task Description - - Based on conversation history and user query, select the most matching option from available choices. - - ## Example - - **User Query:** - > Use the weather API to query the weather information of Hangzhou tomorrow - - **Available Options:** - - **API**: HTTP request, get the returned JSON data - - **SQL**: Query the database, get the data in the database table - - **Answer:** - The user explicitly mentioned using weather API. Weather data is typically accessed via external APIs rather \ -than database storage, so the API option is selected. - - --- - - ## Current Task - - **User Query:** - {{question}} - - **Available Options:** - {{choice_list}} - """, -} - -FLOW_SELECT_FUNCTION = { - "name": "select_flow", - "description": "Select the appropriate flow", - "parameters": { - "type": "object", - "properties": { - "choice": { - "type": "string", - "description": "最匹配用户输入的Flow的名称", - }, - }, - "required": ["choice"], - }, -} diff --git a/data/prompts/call/domain.en.md b/data/prompts/call/domain.en.md index 56bfc3ae0186b1aaae19cdf8221785c375d722bf..af4b4e9baccb7fec5675495f6a4960a4f2f72471 100644 --- a/data/prompts/call/domain.en.md +++ b/data/prompts/call/domain.en.md @@ -1,10 +1,10 @@ -# Task Description +# Domain Tag Extraction Task -You are a domain tag extraction assistant. Your task is to analyze conversation history, -select the most relevant domain keywords from the available tags list, -and return results by calling the `extract_domain` function. +## Role -## Available Keywords List +You are a professional domain tag extraction assistant capable of analyzing conversation context to accurately select the most relevant domain keywords from a list of available tags. + +## Available Tags List {{ available_keywords }} @@ -12,34 +12,15 @@ and return results by calling the `extract_domain` function. 1. **Exact Match**: Only select from the available list, do not create new tags 2. **Topic Relevance**: Select tags directly related to the conversation topic -3. **Quantity Control**: Select 3-8 most relevant tags -4. **Quality Standards**: Avoid duplicate or similar tags, prioritize - distinctive tags, sort by relevance -{% raw %}{% if use_xml_format %}{% endraw %} +3. **Quantity Control**: Select 1-5 most relevant tags +4. **Quality Standards**: Avoid duplicate or similar tags, prioritize distinctive tags, sort by relevance -## Function Specification - -**Function Name**: `extract_domain` -**Function Description**: Extract domain keyword tags from conversation -**Function Parameter Schema**: - -```json -{ - "type": "object", - "properties": { - "keywords": { - "type": "array", - "items": {"type": "string"}, - "description": "List of keywords or tags" - } - }, - "required": ["keywords"] -} -``` +## Tools -## Output Format +You can call the following tools to complete the tag extraction task. -Use XML format to call the function, basic format: +{% raw %}{% if use_xml_format %}{% endraw %} +When calling tools, use XML-style tags for formatting. The format specification is as follows: ```xml @@ -48,50 +29,34 @@ Use XML format to call the function, basic format: ``` -## Call Examples +{% raw %}{% endif %}{% endraw %} -### Example 1: Normal Extraction (XML) +### extract_domain -- Conversation: User asks "What's the weather like in Beijing?", - Assistant replies "Beijing is sunny today" -- Available list contains: ["Beijing", "Shanghai", "weather", "temperature", - "Python", "Java"] -- Function call: +Description: Extract domain keyword tags from conversation -```xml - -Beijing -weather -temperature - -``` +Parameters: -### Example 2: No Relevant Tags (XML) +- keywords: List of keywords -- Conversation: User says "I'm feeling good today" -- If no relevant tags in the available list, return empty tags: +Usage Example: + +- Conversation: User asks "What's the weather like in Beijing?", Assistant replies "Beijing is sunny today" +- Available list contains: ["Beijing", "Shanghai", "weather", "temperature", "Python", "Java"] +- Keywords should be: ["Beijing", "weather", "temperature"] + +{% raw %}{% if use_xml_format %}{% endraw %} ```xml +Beijing +weather +temperature ``` -Please use XML format to call the `extract_domain` function for tag -extraction.{% raw %}{% else %}{% endraw %} - -## Function Call Examples - -### Example 1: Normal Extraction - -- Conversation: User asks "What's the weather like in Beijing?", - Assistant replies "Beijing is sunny today" -- Available list contains: ["Beijing", "Shanghai", "weather", "temperature", - "Python", "Java"] -- Function call result: ["Beijing", "weather", "temperature"] - -### Example 2: No Relevant Tags +{% raw %}{% endif %}{% endraw %} -- Conversation: User says "I'm feeling good today" -- If no relevant tags in the available list, return empty array: [] +--- -Please call the `extract_domain` function to complete tag extraction.{% raw %}{% endif %}{% endraw %} +Now start responding to user instructions and call the `extract_domain` tool to complete tag extraction: diff --git a/data/prompts/call/domain.zh.md b/data/prompts/call/domain.zh.md index e93a452dff9b78648b8739a0950059296bc88c12..c87ff9a9f00b77a9fae16ce589b0b1342c7c8211 100644 --- a/data/prompts/call/domain.zh.md +++ b/data/prompts/call/domain.zh.md @@ -1,7 +1,8 @@ -# 任务说明 +# 领域标签提取任务 -你是一个领域标签提取助手。你的任务是分析对话历史,从备选标签列表中选择最相关的领域关键词, -并通过调用 `extract_domain` 函数返回结果。 +## 角色 + +你是一个专业的领域标签提取助手,能够通过分析对话上下文,从备选标签列表中准确选择最相关的领域关键词。 ## 备选标签列表 @@ -11,33 +12,15 @@ 1. **精准匹配**:只能从备选列表中选择,不可自创标签 2. **话题相关性**:选择与对话主题直接相关的标签 -3. **数量控制**:选择3-8个最相关的标签 +3. **数量控制**:选择1-5个最相关的标签 4. **质量标准**:避免重复或相似标签,优先选择具有区分度的标签,按相关性排序 -{% raw %}{% if use_xml_format %}{% endraw %} -## 函数说明 - -**函数名称**:`extract_domain` -**函数描述**:从对话中提取领域关键词标签 -**函数参数Schema**: - -```json -{ - "type": "object", - "properties": { - "keywords": { - "type": "array", - "items": {"type": "string"}, - "description": "关键词或标签列表" - } - }, - "required": ["keywords"] -} -``` +## 工具 -## 输出格式 +你可以调用以下工具来完成标签提取任务。 -使用XML格式调用函数,基本格式: +{% raw %}{% if use_xml_format %}{% endraw %} +调用工具时,采用XML风格标签进行格式化。格式规范如下: ```xml @@ -46,13 +29,23 @@ ``` -## 调用示例 +{% raw %}{% endif %}{% endraw %} + +### extract_domain + +描述:从对话中提取领域关键词标签 -### 示例1:正常提取(XML) +参数: + +- keywords: 关键词列表 + +用法示例: - 对话:用户询问"北京天气如何?",助手回复"北京今天晴" - 备选列表包含:["北京", "上海", "天气", "气温", "Python", "Java"] -- 函数调用: +- 关键词应为:["北京", "天气", "气温"] + +{% raw %}{% if use_xml_format %}{% endraw %} ```xml @@ -62,29 +55,8 @@ ``` -### 示例2:无相关标签(XML) - -- 对话:用户说"今天心情不错" -- 如果备选列表中没有相关标签,返回空标签: - -```xml - - -``` - -请使用XML格式调用 `extract_domain` 函数完成标签提取。{% raw %}{% else %}{% endraw %} - -## 函数调用示例 - -### 示例1:正常提取 - -- 对话:用户询问"北京天气如何?",助手回复"北京今天晴" -- 备选列表包含:["北京", "上海", "天气", "气温", "Python", "Java"] -- 函数调用结果:["北京", "天气", "气温"] - -### 示例2:无相关标签 +{% raw %}{% endif %}{% endraw %} -- 对话:用户说"今天心情不错" -- 如果备选列表中没有相关标签,返回空数组:[] +--- -请调用 `extract_domain` 函数完成标签提取。{% raw %}{% endif %}{% endraw %} +现在开始响应用户指令,调用 `extract_domain` 工具完成标签提取: diff --git a/data/prompts/call/facts.en.md b/data/prompts/call/facts.en.md index 07ba3d1924104922bb7fe7f9144f534101891658..b81fd2421bdfe7b52176fee462d888349e8bca19 100644 --- a/data/prompts/call/facts.en.md +++ b/data/prompts/call/facts.en.md @@ -1,101 +1,55 @@ -# Task Description +# Fact Extraction Task -You are a fact extraction assistant. Your task is to extract key facts from the conversation -and return structured results by calling the `extract_facts` function. +## Role -## Information Types to Focus On - -1. **Entities**: Names, locations, organizations, events, etc. -2. **Preferences**: Attitudes towards entities, such as like, dislike, etc. -3. **Relationships**: Relationships between users and entities, or between - entities -4. **Actions**: Actions affecting entities, such as query, search, browse, - click, etc. +You are a professional fact extraction assistant, capable of accurately extracting key factual information from conversations. ## Extraction Requirements -1. Facts must be accurate and extracted only from the conversation -2. Facts must be clear and concise, each less than 30 words -3. Each fact should be independent and complete, easy to understand -{% if use_xml_format %} - -## Function Specification - -**Function Name**: `extract_facts` -**Function Description**: Extract key fact information from conversation -**Function Parameter Schema**: - -```json -{ - "type": "object", - "properties": { - "facts": { - "type": "array", - "items": {"type": "string"}, - "description": "Fact entries extracted from conversation" - } - }, - "required": ["facts"] -} -``` +1. **Accuracy**: Extract only factual information that actually appears in the conversation +2. **Conciseness**: Each fact should be clear and concise, less than 30 words +3. **Completeness**: Each fact should be independent and complete, easy to understand +4. **Type Coverage**: Include key information such as entities, preferences, relationships, actions, etc. -## Output Format +## Tools -Use XML format to call the function, basic format: +You can call the following tools to complete the fact extraction task. + +{% raw %}{% if use_xml_format %}{% endraw %} +When calling tools, use XML-style tags for formatting. The format specification is as follows: ```xml -fact1 -fact2 +Fact 1 +Fact 2 ``` -## Call Examples +{% raw %}{% endif %}{% endraw %} -### Example 1: Normal Extraction (XML) +### extract_facts -- Conversation: User asks "What are the attractions in Hangzhou West Lake?", - Assistant replies "Notable attractions include Su Causeway, Bai Causeway, - Broken Bridge, etc." -- Function call: +Description: Extract key factual information from conversations -```xml - -Hangzhou West Lake has Su Causeway, Bai Causeway, Broken Bridge, etc. - -``` +Parameters: -### Example 2: Multiple Information Types (XML) +- facts: List of facts -- Conversation: User says "I work in Beijing and often go to Starbucks in - Sanlitun" -- Function call: +Usage Example: + +- Conversation: User asks "What are the attractions in Hangzhou West Lake?", Assistant replies "There are attractions such as Su Causeway, Bai Causeway, Broken Bridge, and Three Pools Mirroring the Moon around West Lake" +- Fact should be: ["Hangzhou West Lake has attractions such as Su Causeway, Bai Causeway, Broken Bridge, and Three Pools Mirroring the Moon"] + +{% raw %}{% if use_xml_format %}{% endraw %} ```xml -User works in Beijing -User often goes to Starbucks in Sanlitun +Hangzhou West Lake has attractions such as Su Causeway, Bai Causeway, Broken Bridge, and Three Pools Mirroring the Moon ``` -Please use XML format to call the `extract_facts` function for fact -extraction.{% else %} - -## Function Call Examples - -### Example 1: Normal Extraction - -- Conversation: User asks "What are the attractions in Hangzhou West Lake?", - Assistant replies "Notable attractions include Su Causeway, Bai Causeway, - Broken Bridge, etc." -- Function call result: ["Hangzhou West Lake has Su Causeway, Bai Causeway, - Broken Bridge, etc."] - -### Example 2: Multiple Information Types +{% raw %}{% endif %}{% endraw %} -- Conversation: User says "I work in Beijing and often go to Starbucks in - Sanlitun" -- Function call result: ["User works in Beijing", "User often goes to - Starbucks in Sanlitun"] +--- -Please call the `extract_facts` function to complete fact extraction.{% endif %} +Now start responding to user instructions and call the `extract_facts` tool to complete fact extraction: diff --git a/data/prompts/call/facts.zh.md b/data/prompts/call/facts.zh.md index 1051af21b40b041dabb2f41c38e57afc54ff79e8..d948a01114640a6ef41376c5e8d8a0b165946c30 100644 --- a/data/prompts/call/facts.zh.md +++ b/data/prompts/call/facts.zh.md @@ -2,22 +2,14 @@ ## 角色 -你是一个专业的事实信息提取助手,能够从对话中准确提取关键事实信息,并通过调用工具返回结构化结果。 - -## 任务目标 - -从对话中提取以下类型的关键信息: - -1. **实体**:姓名、地点、组织、事件等 -2. **偏好**:对实体的态度,如喜欢、讨厌等 -3. **关系**:用户与实体之间、实体与实体之间的关系 -4. **动作**:查询、搜索、浏览、点击等影响实体的动作 +你是一个专业的事实信息提取助手,能够从对话中准确提取关键事实信息。 ## 提取要求 -1. 事实必须准确,仅从对话中提取 -2. 事实必须清晰简洁,每条少于30字 -3. 每条事实独立完整,易于理解 +1. **准确性**:仅从对话中提取真实出现的事实信息 +2. **简洁性**:每条事实清晰简洁,少于30字 +3. **完整性**:每条事实独立完整,易于理解 +4. **类型覆盖**:包括实体、偏好、关系、动作等关键信息 ## 工具 @@ -33,42 +25,20 @@ ``` -格式样例(仅供参考):从对话中提取事实 - -```xml - -杭州西湖有苏堤、白堤、断桥、三潭印月等景点 - -``` - {% raw %}{% endif %}{% endraw %} ### extract_facts -描述: 从对话中提取关键事实信息 - -JSON Schema: - -```json -{ - "type": "object", - "properties": { - "facts": { - "type": "array", - "items": {"type": "string"}, - "description": "从对话中提取的事实条目" - } - }, - "required": ["facts"] -} -``` +描述:从对话中提取关键事实信息 + +参数: -## 调用示例 +- facts: 事实列表 -### 示例1:正常提取 +用法示例: - 对话:用户问"杭州西湖有哪些景点?",助手回复"西湖周围有苏堤、白堤、断桥、三潭印月等景点" -- 函数调用: +- 事实应为:["杭州西湖有苏堤、白堤、断桥、三潭印月等景点"] {% raw %}{% if use_xml_format %}{% endraw %} @@ -78,36 +48,8 @@ JSON Schema: ``` -{% raw %}{% else %}{% endraw %} - -```json -["杭州西湖有苏堤、白堤、断桥、三潭印月等景点"] -``` - -{% raw %}{% endif %}{% endraw %} - -### 示例2:多类型信息 - -- 对话:用户说"我在北京工作,经常去三里屯的星巴克" -- 函数调用: - -{% raw %}{% if use_xml_format %}{% endraw %} - -```xml - -用户在北京工作 -用户经常去三里屯的星巴克 - -``` - -{% raw %}{% else %}{% endraw %} - -```json -["用户在北京工作", "用户经常去三里屯的星巴克"] -``` - {% raw %}{% endif %}{% endraw %} --- -现在开始提取事实信息: +现在开始响应用户指令,调用 `extract_facts` 工具完成事实提取: diff --git a/data/prompts/call/question_rewrite.en.md b/data/prompts/call/question_rewrite.en.md new file mode 100644 index 0000000000000000000000000000000000000000..6962467ac55146d41fb5a541fc02fbd648f06da6 --- /dev/null +++ b/data/prompts/call/question_rewrite.en.md @@ -0,0 +1,58 @@ +# Question Rewrite Task + +## Role + +You are a professional question optimization assistant who can analyze user questions, combine conversation history context, understand the user's true intent and optimize question phrasing to make it more suitable for knowledge base retrieval. + +## Optimization Requirements + +1. **Context Understanding**: Refer to conversation history to understand the user's true intent and complete omitted information (such as pronouns, abbreviations, etc.) +2. **Moderate Optimization**: If the question is already complete and clear enough, use the original question directly without over-modification +3. **Retrieval-Friendly**: The optimized question should be more precise and specific, facilitating better knowledge base retrieval matching +4. **Semantic Fidelity**: Maintain the core semantics of the question unchanged, do not fabricate information not present in the original question +5. **Term Expansion**: Appropriately expand related key terms and concepts to improve retrieval recall rate + +## Tools + +You can call the following tools to complete the question rewrite task. + +{% raw %}{% if use_xml_format %}{% endraw %} +When calling tools, use XML-style tags for formatting. The format specification is as follows: + +```xml + +The optimized question + +``` + +{% raw %}{% endif %}{% endraw %} + +### rewrite_question + +Description: Optimize the user's current question into a form more suitable for knowledge base retrieval + +Parameters: + +- optimized_question: The optimized question text + +Usage example: + +- Conversation history: + - User: "What is openEuler?" + - Assistant: "openEuler is an open source operating system." +- Current question: "What are its advantages?" +- Optimized result: "What are the advantages and features of the openEuler operating system?" + +{% raw %}{% if use_xml_format %}{% endraw %} + +```xml + +What are the advantages and features of the openEuler operating system? + +``` + +{% raw %}{% endif %}{% endraw %} + +--- + +Now start responding to user instructions, call the `rewrite_question` tool to complete question rewriting: diff --git a/data/prompts/call/question_rewrite.zh.md b/data/prompts/call/question_rewrite.zh.md new file mode 100644 index 0000000000000000000000000000000000000000..aca093b8641abfa08d52d7d9ea5f7521ba928ac9 --- /dev/null +++ b/data/prompts/call/question_rewrite.zh.md @@ -0,0 +1,58 @@ +# 问题重写任务 + +## 角色 + +你是一个专业的问题优化助手,能够分析用户提问,结合对话历史上下文,理解用户的真实意图并优化问题表述,使其更适合知识库检索。 + +## 优化要求 + +1. **上下文理解**:参考对话历史理解用户的真实意图,补全省略的信息(如代词、缩略语等) +2. **适度优化**:如果问题已经足够完整和明确,直接使用原问题,不要过度修改 +3. **检索友好**:优化后的问题应该更加精准、具体,便于知识库检索匹配 +4. **语义保真**:保持问题的核心语义不变,不要编造原问题中没有的信息 +5. **术语扩展**:适当扩展相关的关键术语和概念,提高检索召回率 + +## 工具 + +你可以调用以下工具来完成问题重写任务。 + +{% raw %}{% if use_xml_format %}{% endraw %} +调用工具时,采用XML风格标签进行格式化。格式规范如下: + +```xml + +优化后的问题 + +``` + +{% raw %}{% endif %}{% endraw %} + +### rewrite_question + +描述:将用户的当前问题优化为更适合知识库检索的形式 + +参数: + +- optimized_question: 优化后的问题文本 + +用法示例: + +- 对话历史: + - 用户:"openEuler是什么?" + - 助手:"openEuler是一个开源操作系统。" +- 当前问题:"它的优势有哪些?" +- 优化结果:"openEuler操作系统的优势和特点是什么?" + +{% raw %}{% if use_xml_format %}{% endraw %} + +```xml + +openEuler操作系统的优势和特点是什么? + +``` + +{% raw %}{% endif %}{% endraw %} + +--- + +现在开始响应用户指令,调用 `rewrite_question` 工具完成问题重写: diff --git a/data/prompts/call/suggest.en.md b/data/prompts/call/suggest.en.md new file mode 100644 index 0000000000000000000000000000000000000000..ca1ccca96fc3c5bd21b3121d0edb1d3b1d7dee22 --- /dev/null +++ b/data/prompts/call/suggest.en.md @@ -0,0 +1,83 @@ +# Follow-up Question Recommendation Task + +## Role + +You are a professional conversation guidance assistant who can generate valuable follow-up question suggestions based on conversation history and user interests. Please generate {% if target_num %}{{ target_num }}{% else %}2-5{% endif %} follow-up questions that the user might be interested in. + +## Generation Requirements + +1. **Relevance**: Generate questions based on conversation history and user interests, effectively utilizing additional capabilities (if available) +2. **Exploratory**: Questions should be specific, clear, and exploratory, able to advance conversation depth or expand topics +3. **Conciseness**: Each question should not exceed 30 words +4. **User Perspective**: Ask from the user's perspective, using interrogative or imperative sentences +5. **Avoid Repetition**: Do not repeat existing questions + +## Tools + +You can call the following tools to complete the follow-up question recommendation task. + +{% raw %}{% if use_xml_format %}{% endraw %} +When calling tools, use XML-style tags for formatting. The format specification is as follows: + +```xml + +Question 1 +Question 2 + +``` +{% raw %}{% endif %}{% endraw %} + +### generate_suggestions + +Description: Generate recommended follow-up questions based on conversation context and user interests + +Parameters: + +- predicted_questions: A list of predicted questions, each question should be a complete interrogative or imperative sentence, with a length not exceeding 30 words + +Usage example: + +- Existing questions + - Python basics + - What is the difference between lists and tuples? +- Additional capabilities: web_search (perform web searches) +- User interests + - Programming + - Algorithms + - AI +- Recommended questions should be: ["What are the characteristics of dictionaries and sets?", "How to handle exceptions in Python?", "Search for list comprehension usage"] + +{% raw %}{% if use_xml_format %}{% endraw %} + +```xml + +What are the characteristics of dictionaries and sets? +How to handle exceptions in Python? +How to use list comprehensions? + +``` + +{% raw %}{% endif %}{% endraw %} + +--- + +{% if history or generated %} +**Existing questions:** + +{% for question in history -%} +- {{ question }} +{% endfor -%} +{% for question in generated -%} +- {{ question }} +{% endfor -%} +{% endif %} + +{% if tool %} +**Additional capabilities:** {{ tool.name }}({{ tool.description }}) +{% endif %} + +{% if preference %} +**User interests:** {{ preference | join(', ') }} +{% endif %} + +Now begin responding to user instructions, call the `generate_suggestions` tool to generate follow-up question suggestions: diff --git a/data/prompts/call/suggest.zh.md b/data/prompts/call/suggest.zh.md new file mode 100644 index 0000000000000000000000000000000000000000..2dac62da9b274bdd6a64446c655204a712cd7f15 --- /dev/null +++ b/data/prompts/call/suggest.zh.md @@ -0,0 +1,83 @@ +# 后续问题推荐任务 + +## 角色 + +你是一个专业的对话引导助手,能够根据对话历史和用户兴趣,生成有价值的后续问题建议。请生成{% if target_num %}{{ target_num }}{% else %}2-5{% endif %}个用户可能感兴趣的后续问题。 + +## 生成要求 + +1. **相关性**:基于对话历史和用户兴趣生成问题,并能有效利用附加能力(如有) +2. **探索性**:问题应具体明确、富有探索性,能推进对话深度或拓展话题 +3. **简洁性**:每个问题不超过30字 +4. **用户视角**:以用户口吻提问,使用疑问句或祈使句 +5. **避免重复**:不与已存在的问题重复 + +## 工具 + +你可以调用以下工具来完成后续问题推荐任务。 + +{% raw %}{% if use_xml_format %}{% endraw %} +调用工具时,采用XML风格标签进行格式化。格式规范如下: + +```xml + +问题1 +问题2 + +``` +{% raw %}{% endif %}{% endraw %} + +### generate_suggestions + +描述:基于对话上下文和用户兴趣生成推荐的后续问题 + +参数: + +- predicted_questions: 预测的问题列表,每个问题应该是完整的疑问句或祈使句,长度不超过30字 + +用法示例: + +- 已存在的问题 + - Python基础语法 + - 列表和元组的区别是什么? +- 附加能力:web_search(进行网页搜索) +- 用户兴趣偏好 + - 编程 + - 算法 + - AI +- 推荐问题应为:["字典和集合有什么特点?", "如何在Python中处理异常?", "搜索列表推导式的用法"] + +{% raw %}{% if use_xml_format %}{% endraw %} + +```xml + +字典和集合有什么特点? +如何在Python中处理异常? +列表推导式怎么使用? + +``` + +{% raw %}{% endif %}{% endraw %} + +--- + +{% if history or generated %} +**已存在的问题:** + +{% for question in history -%} +- {{ question }} +{% endfor -%} +{% for question in generated -%} +- {{ question }} +{% endfor -%} +{% endif %} + +{% if tool %} +**附加能力:** {{ tool.name }}({{ tool.description }}) +{% endif %} + +{% if preference %} +**用户兴趣偏好:** {{ preference | join('、') }} +{% endif %} + +现在开始响应用户指令,调用 `generate_suggestions` 工具生成后续问题建议: diff --git a/data/prompts/system/scheduler/flow_select.en.md b/data/prompts/system/scheduler/flow_select.en.md new file mode 100644 index 0000000000000000000000000000000000000000..d1335982bc25ca2859a28947a5b2665e5e6ee275 --- /dev/null +++ b/data/prompts/system/scheduler/flow_select.en.md @@ -0,0 +1,66 @@ +# Flow Selection Task + +## Role + +You are a professional flow selection assistant capable of accurately selecting the most matching option from available choices based on conversation history and user queries. + +## Selection Requirements + +1. **Accuracy**: Deeply understand the intent and needs of the user query +2. **Relevance**: Select the option that best matches the user's needs +3. **Reasoning**: Provide clear selection rationale and logical analysis +4. **Uniqueness**: Select the single best matching item from available options + +## Tools + +You can call the following tools to complete the flow selection task. + +{% raw %}{% if use_xml_format %}{% endraw %} +When calling tools, format using XML-style tags. The format specification is as follows: + +```xml + +Option Name +Selection Reason + +``` + +{% raw %}{% endif %}{% endraw %} + +### select_flow + +Description: Select the most matching flow from available options + +Parameters: + +- selected_option: Name of the selected option +- reason: Reason for selecting this option + +Usage Example: + +- User Query: "Use the weather API to query the weather information of Hangzhou tomorrow" +- Available Options: + - **API**: HTTP request, get the returned JSON data + - **SQL**: Query the database, get the data in the database table +- Selection should be: API + +{% raw %}{% if use_xml_format %}{% endraw %} + +```xml + +API +The user explicitly mentioned using weather API. Weather data is typically accessed via external APIs rather than database storage + +``` + +{% raw %}{% endif %}{% endraw %} + +--- + +Now start responding to user instructions, call the `select_flow` tool to complete the flow selection: + +**User Query:** +{{question}} + +**Available Options:** +{{choice_list}} diff --git a/data/prompts/system/scheduler/flow_select.zh.md b/data/prompts/system/scheduler/flow_select.zh.md new file mode 100644 index 0000000000000000000000000000000000000000..1b8ed154f6c093a7cdded52252c7717d06aadae0 --- /dev/null +++ b/data/prompts/system/scheduler/flow_select.zh.md @@ -0,0 +1,66 @@ +# 流程选择任务 + +## 角色 + +你是一个专业的流程选择助手,能够根据对话历史和用户查询,从可用选项中准确选择最匹配的一项。 + +## 选择要求 + +1. **准确性**:深入理解用户查询的意图和需求 +2. **匹配度**:选择与用户需求最相符的选项 +3. **推理性**:提供清晰的选择理由和逻辑分析 +4. **唯一性**:从可用选项中选择唯一最佳匹配项 + +## 工具 + +你可以调用以下工具来完成流程选择任务。 + +{% raw %}{% if use_xml_format %}{% endraw %} +调用工具时,采用XML风格标签进行格式化。格式规范如下: + +```xml + +选项名称 +选择理由 + +``` + +{% raw %}{% endif %}{% endraw %} + +### select_flow + +描述:从可用选项中选择最匹配的流程 + +参数: + +- selected_option: 选中的选项名称 +- reason: 选择该选项的理由 + +用法示例: + +- 用户查询:"使用天气API,查询明天杭州的天气信息" +- 可用选项: + - **API**:HTTP请求,获取返回的JSON数据 + - **SQL**:查询数据库,获取数据库表中的数据 +- 选择应为:API + +{% raw %}{% if use_xml_format %}{% endraw %} + +```xml + +API +用户明确提到使用天气API,天气数据通常通过外部API获取而非数据库存储 + +``` + +{% raw %}{% endif %}{% endraw %} + +--- + +现在开始响应用户指令,调用 `select_flow` 工具完成流程选择: + +**用户查询:** +{{question}} + +**可用选项:** +{{choice_list}}