From 5503b0d5f307dc27fea1a14b7a0e4575d288ec2b Mon Sep 17 00:00:00 2001 From: JzoNg Date: Fri, 7 Jun 2024 17:00:13 +0800 Subject: [PATCH 01/26] add role knowledge admin --- .../members-page/invite-modal/index.tsx | 74 ++------------- .../invite-modal/role-selector.tsx | 93 +++++++++++++++++++ .../members-page/operation/index.tsx | 3 +- web/i18n/en-US/common.ts | 2 + web/i18n/zh-Hans/common.ts | 2 + 5 files changed, 106 insertions(+), 68 deletions(-) create mode 100644 web/app/components/header/account-setting/members-page/invite-modal/role-selector.tsx diff --git a/web/app/components/header/account-setting/members-page/invite-modal/index.tsx b/web/app/components/header/account-setting/members-page/invite-modal/index.tsx index 51c137dca4472..23cf3bb850971 100644 --- a/web/app/components/header/account-setting/members-page/invite-modal/index.tsx +++ b/web/app/components/header/account-setting/members-page/invite-modal/index.tsx @@ -1,13 +1,12 @@ 'use client' -import { Fragment, useCallback, useMemo, useState } from 'react' +import { useCallback, useState } from 'react' import { useContext } from 'use-context-selector' import { XMarkIcon } from '@heroicons/react/24/outline' import { useTranslation } from 'react-i18next' import { ReactMultiEmail } from 'react-multi-email' -import { Listbox, Transition } from '@headlessui/react' -import { CheckIcon } from '@heroicons/react/20/solid' import cn from 'classnames' import s from './index.module.css' +import RoleSelector from './role-selector' import Modal from '@/app/components/base/modal' import Button from '@/app/components/base/button' import { inviteMember } from '@/service/common' @@ -31,29 +30,14 @@ const InviteModal = ({ const { notify } = useContext(ToastContext) const { locale } = useContext(I18n) - - const InvitingRoles = useMemo(() => [ - { - name: 'normal', - description: t('common.members.normalTip'), - }, - { - name: 'editor', - description: t('common.members.editorTip'), - }, - { - name: 'admin', - description: t('common.members.adminTip'), - }, - ], [t]) - const [role, setRole] = useState(InvitingRoles[0]) + const [role, setRole] = useState('normal') const handleSend = useCallback(async () => { if (emails.map((email: string) => emailRegex.test(email)).every(Boolean)) { try { const { result, invitation_results } = await inviteMember({ url: '/workspaces/current/members/invite-email', - body: { emails, role: role.name, language: locale }, + body: { emails, role, language: locale }, }) if (result === 'success') { @@ -99,53 +83,9 @@ const InviteModal = ({ placeholder={t('common.members.emailPlaceholder') || ''} /> - -
- - {t('common.members.invitedAsRole', { role: t(`common.members.${role.name}`) })} - - - - {InvitingRoles.map(role => - - `${active ? ' bg-gray-50 rounded-xl' : ' bg-transparent'} - cursor-default select-none relative py-2 px-4 mx-2 flex flex-col` - } - value={role} - > - {({ selected }) => ( -
- - {selected && ( -
- - {t(`common.members.${role.name}`)} - - - {role.description} - -
-
- )} -
, - )} -
-
-
-
+
+ +
- - - +
+
+
) From e8b8f6c6ddc57e5046935f9d6b3e6c16e14c5873 Mon Sep 17 00:00:00 2001 From: Joe <79627742+ZhouhaoJiang@users.noreply.github.com> Date: Fri, 28 Jun 2024 00:24:37 +0800 Subject: [PATCH 22/26] Feat/fix ops trace (#5672) Co-authored-by: takatost --- .devcontainer/post_create_command.sh | 2 +- .vscode/launch.json | 14 ++- api/README.md | 2 +- api/app.py | 2 - api/core/moderation/input_moderation.py | 2 +- api/core/ops/entities/trace_entity.py | 12 +- api/core/ops/langfuse_trace/langfuse_trace.py | 31 ++++- api/core/ops/ops_trace_manager.py | 115 ++++++++++++------ api/core/rag/retrieval/dataset_retrieval.py | 4 +- .../parameter_extractor_node.py | 2 +- api/docker/entrypoint.sh | 2 +- ...9b_update_appmodelconfig_and_add_table_.py | 8 +- ...9_remove_app_model_config_trace_config_.py | 7 -- api/models/dataset.py | 95 +++++++++++++++ api/models/model.py | 43 +++++++ api/models/workflow.py | 49 ++++++++ api/tasks/ops_trace_task.py | 46 +++++++ 17 files changed, 372 insertions(+), 64 deletions(-) create mode 100644 api/tasks/ops_trace_task.py diff --git a/.devcontainer/post_create_command.sh b/.devcontainer/post_create_command.sh index 965c0c36ad93e..3ebc06e60520a 100755 --- a/.devcontainer/post_create_command.sh +++ b/.devcontainer/post_create_command.sh @@ -3,7 +3,7 @@ cd web && npm install echo 'alias start-api="cd /workspaces/dify/api && flask run --host 0.0.0.0 --port=5001 --debug"' >> ~/.bashrc -echo 'alias start-worker="cd /workspaces/dify/api && celery -A app.celery worker -P gevent -c 1 --loglevel INFO -Q dataset,generation,mail"' >> ~/.bashrc +echo 'alias start-worker="cd /workspaces/dify/api && celery -A app.celery worker -P gevent -c 1 --loglevel INFO -Q dataset,generation,mail,ops_trace"' >> ~/.bashrc echo 'alias start-web="cd /workspaces/dify/web && npm run dev"' >> ~/.bashrc echo 'alias start-containers="cd /workspaces/dify/docker && docker-compose -f docker-compose.middleware.yaml -p dify up -d"' >> ~/.bashrc diff --git a/.vscode/launch.json b/.vscode/launch.json index 55fdbb8b50f7c..03b15e7f27a82 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -37,7 +37,19 @@ "FLASK_DEBUG": "1", "GEVENT_SUPPORT": "True" }, - "args": ["-A", "app.celery", "worker", "-P", "gevent", "-c", "1", "--loglevel", "info", "-Q", "dataset,generation,mail"], + "args": [ + "-A", + "app.celery", + "worker", + "-P", + "gevent", + "-c", + "1", + "--loglevel", + "info", + "-Q", + "dataset,generation,mail,ops_trace" + ] }, ] } \ No newline at end of file diff --git a/api/README.md b/api/README.md index 5f71dbe5f07f7..125cd8a78c5c6 100644 --- a/api/README.md +++ b/api/README.md @@ -66,7 +66,7 @@ 10. If you need to debug local async processing, please start the worker service. ```bash - poetry run python -m celery -A app.celery worker -P gevent -c 1 --loglevel INFO -Q dataset,generation,mail + poetry run python -m celery -A app.celery worker -P gevent -c 1 --loglevel INFO -Q dataset,generation,mail,ops_trace ``` The started celery app handles the async tasks, e.g. dataset importing and documents indexing. diff --git a/api/app.py b/api/app.py index 2c9b59706b4ea..2ea7c6d235a9c 100644 --- a/api/app.py +++ b/api/app.py @@ -26,7 +26,6 @@ from commands import register_commands # DO NOT REMOVE BELOW -from events import event_handlers from extensions import ( ext_celery, ext_code_based_extension, @@ -43,7 +42,6 @@ from extensions.ext_database import db from extensions.ext_login import login_manager from libs.passport import PassportService -from models import account, dataset, model, source, task, tool, tools, web from services.account_service import AccountService # DO NOT REMOVE ABOVE diff --git a/api/core/moderation/input_moderation.py b/api/core/moderation/input_moderation.py index 3482d5c5cfd33..c5dd88fb2458b 100644 --- a/api/core/moderation/input_moderation.py +++ b/api/core/moderation/input_moderation.py @@ -57,7 +57,7 @@ def check( timer=timer ) ) - + if not moderation_result.flagged: return False, inputs, query diff --git a/api/core/ops/entities/trace_entity.py b/api/core/ops/entities/trace_entity.py index b615f21e6c99f..db7e0806ee8d7 100644 --- a/api/core/ops/entities/trace_entity.py +++ b/api/core/ops/entities/trace_entity.py @@ -94,5 +94,15 @@ class ToolTraceInfo(BaseTraceInfo): class GenerateNameTraceInfo(BaseTraceInfo): - conversation_id: str + conversation_id: Optional[str] = None tenant_id: str + +trace_info_info_map = { + 'WorkflowTraceInfo': WorkflowTraceInfo, + 'MessageTraceInfo': MessageTraceInfo, + 'ModerationTraceInfo': ModerationTraceInfo, + 'SuggestedQuestionTraceInfo': SuggestedQuestionTraceInfo, + 'DatasetRetrievalTraceInfo': DatasetRetrievalTraceInfo, + 'ToolTraceInfo': ToolTraceInfo, + 'GenerateNameTraceInfo': GenerateNameTraceInfo, +} \ No newline at end of file diff --git a/api/core/ops/langfuse_trace/langfuse_trace.py b/api/core/ops/langfuse_trace/langfuse_trace.py index 05d34c5527b0a..46795c8c3cc4a 100644 --- a/api/core/ops/langfuse_trace/langfuse_trace.py +++ b/api/core/ops/langfuse_trace/langfuse_trace.py @@ -147,6 +147,7 @@ def workflow_trace(self, trace_info: WorkflowTraceInfo): # add span if trace_info.message_id: span_data = LangfuseSpan( + id=node_execution_id, name=f"{node_name}_{node_execution_id}", input=inputs, output=outputs, @@ -160,6 +161,7 @@ def workflow_trace(self, trace_info: WorkflowTraceInfo): ) else: span_data = LangfuseSpan( + id=node_execution_id, name=f"{node_name}_{node_execution_id}", input=inputs, output=outputs, @@ -173,6 +175,30 @@ def workflow_trace(self, trace_info: WorkflowTraceInfo): self.add_span(langfuse_span_data=span_data) + process_data = json.loads(node_execution.process_data) if node_execution.process_data else {} + if process_data and process_data.get("model_mode") == "chat": + total_token = metadata.get("total_tokens", 0) + # add generation + generation_usage = GenerationUsage( + totalTokens=total_token, + ) + + node_generation_data = LangfuseGeneration( + name=f"generation_{node_execution_id}", + trace_id=trace_id, + parent_observation_id=node_execution_id, + start_time=created_at, + end_time=finished_at, + input=inputs, + output=outputs, + metadata=metadata, + level=LevelEnum.DEFAULT if status == 'succeeded' else LevelEnum.ERROR, + status_message=trace_info.error if trace_info.error else "", + usage=generation_usage, + ) + + self.add_generation(langfuse_generation_data=node_generation_data) + def message_trace( self, trace_info: MessageTraceInfo, **kwargs ): @@ -186,7 +212,7 @@ def message_trace( if message_data.from_end_user_id: end_user_data: EndUser = db.session.query(EndUser).filter( EndUser.id == message_data.from_end_user_id - ).first().session_id + ).first() user_id = end_user_data.session_id trace_data = LangfuseTrace( @@ -220,6 +246,7 @@ def message_trace( output=trace_info.answer_tokens, total=trace_info.total_tokens, unit=UnitEnum.TOKENS, + totalCost=message_data.total_price, ) langfuse_generation_data = LangfuseGeneration( @@ -303,7 +330,7 @@ def tool_trace(self, trace_info: ToolTraceInfo): start_time=trace_info.start_time, end_time=trace_info.end_time, metadata=trace_info.metadata, - level=LevelEnum.DEFAULT if trace_info.error == "" else LevelEnum.ERROR, + level=LevelEnum.DEFAULT if trace_info.error == "" or trace_info.error is None else LevelEnum.ERROR, status_message=trace_info.error, ) diff --git a/api/core/ops/ops_trace_manager.py b/api/core/ops/ops_trace_manager.py index 00750ab81f630..2ce12f28d1232 100644 --- a/api/core/ops/ops_trace_manager.py +++ b/api/core/ops/ops_trace_manager.py @@ -1,16 +1,17 @@ import json +import logging import os import queue import threading +import time from datetime import timedelta from enum import Enum from typing import Any, Optional, Union from uuid import UUID -from flask import Flask, current_app +from flask import current_app from core.helper.encrypter import decrypt_token, encrypt_token, obfuscated_token -from core.ops.base_trace_instance import BaseTraceInstance from core.ops.entities.config_entity import ( LangfuseConfig, LangSmithConfig, @@ -31,6 +32,7 @@ from extensions.ext_database import db from models.model import App, AppModelConfig, Conversation, Message, MessageAgentThought, MessageFile, TraceAppConfig from models.workflow import WorkflowAppLog, WorkflowRun +from tasks.ops_trace_task import process_trace_tasks provider_config_map = { TracingProviderEnum.LANGFUSE.value: { @@ -105,7 +107,7 @@ def decrypt_tracing_config(cls, tenant_id: str, tracing_provider: str, tracing_c return config_class(**new_config).model_dump() @classmethod - def obfuscated_decrypt_token(cls, tracing_provider: str, decrypt_tracing_config:dict): + def obfuscated_decrypt_token(cls, tracing_provider: str, decrypt_tracing_config: dict): """ Decrypt tracing config :param tracing_provider: tracing provider @@ -295,11 +297,9 @@ def __init__( self.kwargs = kwargs self.file_base_url = os.getenv("FILES_URL", "http://127.0.0.1:5001") - def execute(self, trace_instance: BaseTraceInstance): + def execute(self): method_name, trace_info = self.preprocess() - if trace_instance: - method = trace_instance.trace - method(trace_info) + return trace_info def preprocess(self): if self.trace_type == TraceTaskName.CONVERSATION_TRACE: @@ -372,7 +372,7 @@ def workflow_trace(self, workflow_run: WorkflowRun, conversation_id): } workflow_trace_info = WorkflowTraceInfo( - workflow_data=workflow_run, + workflow_data=workflow_run.to_dict(), conversation_id=conversation_id, workflow_id=workflow_id, tenant_id=tenant_id, @@ -427,7 +427,8 @@ def message_trace(self, message_id): message_tokens = message_data.message_tokens message_trace_info = MessageTraceInfo( - message_data=message_data, + message_id=message_id, + message_data=message_data.to_dict(), conversation_model=conversation_mode, message_tokens=message_tokens, answer_tokens=message_data.answer_tokens, @@ -469,7 +470,7 @@ def moderation_trace(self, message_id, timer, **kwargs): moderation_trace_info = ModerationTraceInfo( message_id=workflow_app_log_id if workflow_app_log_id else message_id, inputs=inputs, - message_data=message_data, + message_data=message_data.to_dict(), flagged=moderation_result.flagged, action=moderation_result.action, preset_response=moderation_result.preset_response, @@ -508,7 +509,7 @@ def suggested_question_trace(self, message_id, timer, **kwargs): suggested_question_trace_info = SuggestedQuestionTraceInfo( message_id=workflow_app_log_id if workflow_app_log_id else message_id, - message_data=message_data, + message_data=message_data.to_dict(), inputs=message_data.message, outputs=message_data.answer, start_time=timer.get("start"), @@ -550,11 +551,11 @@ def dataset_retrieval_trace(self, message_id, timer, **kwargs): dataset_retrieval_trace_info = DatasetRetrievalTraceInfo( message_id=message_id, inputs=message_data.query if message_data.query else message_data.inputs, - documents=documents, + documents=[doc.model_dump() for doc in documents], start_time=timer.get("start"), end_time=timer.get("end"), metadata=metadata, - message_data=message_data, + message_data=message_data.to_dict(), ) return dataset_retrieval_trace_info @@ -613,7 +614,7 @@ def tool_trace(self, message_id, timer, **kwargs): tool_trace_info = ToolTraceInfo( message_id=message_id, - message_data=message_data, + message_data=message_data.to_dict(), tool_name=tool_name, start_time=timer.get("start") if timer else created_time, end_time=timer.get("end") if timer else end_time, @@ -657,31 +658,71 @@ def generate_name_trace(self, conversation_id, timer, **kwargs): return generate_name_trace_info +trace_manager_timer = None +trace_manager_queue = queue.Queue() +trace_manager_interval = int(os.getenv("TRACE_QUEUE_MANAGER_INTERVAL", 1)) +trace_manager_batch_size = int(os.getenv("TRACE_QUEUE_MANAGER_BATCH_SIZE", 100)) + + class TraceQueueManager: def __init__(self, app_id=None, conversation_id=None, message_id=None): - tracing_instance = OpsTraceManager.get_ops_trace_instance(app_id, conversation_id, message_id) - self.queue = queue.Queue() - self.is_running = True - self.thread = threading.Thread( - target=self.process_queue, kwargs={ - 'flask_app': current_app._get_current_object(), - 'trace_instance': tracing_instance - } - ) - self.thread.start() + global trace_manager_timer - def stop(self): - self.is_running = False - - def process_queue(self, flask_app: Flask, trace_instance: BaseTraceInstance): - with flask_app.app_context(): - while self.is_running: - try: - task = self.queue.get(timeout=60) - task.execute(trace_instance) - self.queue.task_done() - except queue.Empty: - self.stop() + self.app_id = app_id + self.conversation_id = conversation_id + self.message_id = message_id + self.trace_instance = OpsTraceManager.get_ops_trace_instance(app_id, conversation_id, message_id) + self.flask_app = current_app._get_current_object() + if trace_manager_timer is None: + self.start_timer() def add_trace_task(self, trace_task: TraceTask): - self.queue.put(trace_task) + global trace_manager_timer + global trace_manager_queue + try: + if self.trace_instance: + trace_manager_queue.put(trace_task) + except Exception as e: + logging.debug(f"Error adding trace task: {e}") + finally: + self.start_timer() + + def collect_tasks(self): + global trace_manager_queue + tasks = [] + while len(tasks) < trace_manager_batch_size and not trace_manager_queue.empty(): + task = trace_manager_queue.get_nowait() + tasks.append(task) + trace_manager_queue.task_done() + return tasks + + def run(self): + try: + tasks = self.collect_tasks() + if tasks: + self.send_to_celery(tasks) + except Exception as e: + logging.debug(f"Error processing trace tasks: {e}") + + def start_timer(self): + global trace_manager_timer + if trace_manager_timer is None or not trace_manager_timer.is_alive(): + trace_manager_timer = threading.Timer( + trace_manager_interval, self.run + ) + trace_manager_timer.name = f"trace_manager_timer_{time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())}" + trace_manager_timer.daemon = False + trace_manager_timer.start() + + def send_to_celery(self, tasks: list[TraceTask]): + with self.flask_app.app_context(): + for task in tasks: + trace_info = task.execute() + task_data = { + "app_id": self.app_id, + "conversation_id": self.conversation_id, + "message_id": self.message_id, + "trace_info_type": type(trace_info).__name__, + "trace_info": trace_info.model_dump() if trace_info else {}, + } + process_trace_tasks.delay(task_data) diff --git a/api/core/rag/retrieval/dataset_retrieval.py b/api/core/rag/retrieval/dataset_retrieval.py index 8544d7c3c86c4..ea2a194a687a4 100644 --- a/api/core/rag/retrieval/dataset_retrieval.py +++ b/api/core/rag/retrieval/dataset_retrieval.py @@ -12,7 +12,7 @@ from core.model_runtime.entities.message_entities import PromptMessageTool from core.model_runtime.entities.model_entities import ModelFeature, ModelType from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel -from core.ops.ops_trace_manager import TraceTask, TraceTaskName +from core.ops.ops_trace_manager import TraceQueueManager, TraceTask, TraceTaskName from core.ops.utils import measure_time from core.rag.datasource.retrieval_service import RetrievalService from core.rag.models.document import Document @@ -357,7 +357,7 @@ def _on_retrival_end( db.session.commit() # get tracing instance - trace_manager = self.application_generate_entity.trace_manager if self.application_generate_entity else None + trace_manager: TraceQueueManager = self.application_generate_entity.trace_manager if self.application_generate_entity else None if trace_manager: trace_manager.add_trace_task( TraceTask( diff --git a/api/core/workflow/nodes/parameter_extractor/parameter_extractor_node.py b/api/core/workflow/nodes/parameter_extractor/parameter_extractor_node.py index 386fa410aa720..ea0cdf96e7d0f 100644 --- a/api/core/workflow/nodes/parameter_extractor/parameter_extractor_node.py +++ b/api/core/workflow/nodes/parameter_extractor/parameter_extractor_node.py @@ -94,7 +94,7 @@ def _run(self, variable_pool: VariablePool) -> NodeRunResult: memory = self._fetch_memory(node_data.memory, variable_pool, model_instance) if set(model_schema.features or []) & {ModelFeature.TOOL_CALL, ModelFeature.MULTI_TOOL_CALL} \ - and node_data.reasoning_mode == 'function_call': + and node_data.reasoning_mode == 'function_call': # use function call prompt_messages, prompt_message_tools = self._generate_function_call_prompt( node_data, query, variable_pool, model_config, memory diff --git a/api/docker/entrypoint.sh b/api/docker/entrypoint.sh index 7b1f1dfc03c67..0bb494abd7626 100755 --- a/api/docker/entrypoint.sh +++ b/api/docker/entrypoint.sh @@ -9,7 +9,7 @@ fi if [[ "${MODE}" == "worker" ]]; then celery -A app.celery worker -P ${CELERY_WORKER_CLASS:-gevent} -c ${CELERY_WORKER_AMOUNT:-1} --loglevel INFO \ - -Q ${CELERY_QUEUES:-dataset,generation,mail} + -Q ${CELERY_QUEUES:-dataset,generation,mail,ops_trace} elif [[ "${MODE}" == "beat" ]]; then celery -A app.celery beat --loglevel INFO else diff --git a/api/migrations/versions/04c602f5dc9b_update_appmodelconfig_and_add_table_.py b/api/migrations/versions/04c602f5dc9b_update_appmodelconfig_and_add_table_.py index a322b9f50290c..be2c615525026 100644 --- a/api/migrations/versions/04c602f5dc9b_update_appmodelconfig_and_add_table_.py +++ b/api/migrations/versions/04c602f5dc9b_update_appmodelconfig_and_add_table_.py @@ -31,17 +31,11 @@ def upgrade(): with op.batch_alter_table('tracing_app_configs', schema=None) as batch_op: batch_op.create_index('tracing_app_config_app_id_idx', ['app_id'], unique=False) - with op.batch_alter_table('app_model_configs', schema=None) as batch_op: - batch_op.add_column(sa.Column('trace_config', sa.Text(), nullable=True)) - # ### end Alembic commands ### def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('app_model_configs', schema=None) as batch_op: - batch_op.drop_column('trace_config') - + # ### commands auto generated by Alembic - please adjust! ## with op.batch_alter_table('tracing_app_configs', schema=None) as batch_op: batch_op.drop_index('tracing_app_config_app_id_idx') diff --git a/api/migrations/versions/c031d46af369_remove_app_model_config_trace_config_.py b/api/migrations/versions/c031d46af369_remove_app_model_config_trace_config_.py index 20d9c5d1fb452..1ac44d083aaf4 100644 --- a/api/migrations/versions/c031d46af369_remove_app_model_config_trace_config_.py +++ b/api/migrations/versions/c031d46af369_remove_app_model_config_trace_config_.py @@ -35,18 +35,11 @@ def upgrade(): with op.batch_alter_table('tracing_app_configs', schema=None) as batch_op: batch_op.drop_index('tracing_app_config_app_id_idx') - - with op.batch_alter_table('app_model_configs', schema=None) as batch_op: - batch_op.drop_column('trace_config') - # ### end Alembic commands ### def downgrade(): # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('app_model_configs', schema=None) as batch_op: - batch_op.add_column(sa.Column('trace_config', sa.TEXT(), autoincrement=False, nullable=True)) - op.create_table('tracing_app_configs', sa.Column('id', sa.UUID(), server_default=sa.text('uuid_generate_v4()'), autoincrement=False, nullable=False), sa.Column('app_id', sa.UUID(), autoincrement=False, nullable=False), diff --git a/api/models/dataset.py b/api/models/dataset.py index 757a5bf8deb6c..672c2be8fabdc 100644 --- a/api/models/dataset.py +++ b/api/models/dataset.py @@ -352,6 +352,101 @@ def hit_count(self): return DocumentSegment.query.with_entities(func.coalesce(func.sum(DocumentSegment.hit_count))) \ .filter(DocumentSegment.document_id == self.id).scalar() + def to_dict(self): + return { + 'id': self.id, + 'tenant_id': self.tenant_id, + 'dataset_id': self.dataset_id, + 'position': self.position, + 'data_source_type': self.data_source_type, + 'data_source_info': self.data_source_info, + 'dataset_process_rule_id': self.dataset_process_rule_id, + 'batch': self.batch, + 'name': self.name, + 'created_from': self.created_from, + 'created_by': self.created_by, + 'created_api_request_id': self.created_api_request_id, + 'created_at': self.created_at, + 'processing_started_at': self.processing_started_at, + 'file_id': self.file_id, + 'word_count': self.word_count, + 'parsing_completed_at': self.parsing_completed_at, + 'cleaning_completed_at': self.cleaning_completed_at, + 'splitting_completed_at': self.splitting_completed_at, + 'tokens': self.tokens, + 'indexing_latency': self.indexing_latency, + 'completed_at': self.completed_at, + 'is_paused': self.is_paused, + 'paused_by': self.paused_by, + 'paused_at': self.paused_at, + 'error': self.error, + 'stopped_at': self.stopped_at, + 'indexing_status': self.indexing_status, + 'enabled': self.enabled, + 'disabled_at': self.disabled_at, + 'disabled_by': self.disabled_by, + 'archived': self.archived, + 'archived_reason': self.archived_reason, + 'archived_by': self.archived_by, + 'archived_at': self.archived_at, + 'updated_at': self.updated_at, + 'doc_type': self.doc_type, + 'doc_metadata': self.doc_metadata, + 'doc_form': self.doc_form, + 'doc_language': self.doc_language, + 'display_status': self.display_status, + 'data_source_info_dict': self.data_source_info_dict, + 'average_segment_length': self.average_segment_length, + 'dataset_process_rule': self.dataset_process_rule.to_dict() if self.dataset_process_rule else None, + 'dataset': self.dataset.to_dict() if self.dataset else None, + 'segment_count': self.segment_count, + 'hit_count': self.hit_count + } + + @classmethod + def from_dict(cls, data: dict): + return cls( + id=data.get('id'), + tenant_id=data.get('tenant_id'), + dataset_id=data.get('dataset_id'), + position=data.get('position'), + data_source_type=data.get('data_source_type'), + data_source_info=data.get('data_source_info'), + dataset_process_rule_id=data.get('dataset_process_rule_id'), + batch=data.get('batch'), + name=data.get('name'), + created_from=data.get('created_from'), + created_by=data.get('created_by'), + created_api_request_id=data.get('created_api_request_id'), + created_at=data.get('created_at'), + processing_started_at=data.get('processing_started_at'), + file_id=data.get('file_id'), + word_count=data.get('word_count'), + parsing_completed_at=data.get('parsing_completed_at'), + cleaning_completed_at=data.get('cleaning_completed_at'), + splitting_completed_at=data.get('splitting_completed_at'), + tokens=data.get('tokens'), + indexing_latency=data.get('indexing_latency'), + completed_at=data.get('completed_at'), + is_paused=data.get('is_paused'), + paused_by=data.get('paused_by'), + paused_at=data.get('paused_at'), + error=data.get('error'), + stopped_at=data.get('stopped_at'), + indexing_status=data.get('indexing_status'), + enabled=data.get('enabled'), + disabled_at=data.get('disabled_at'), + disabled_by=data.get('disabled_by'), + archived=data.get('archived'), + archived_reason=data.get('archived_reason'), + archived_by=data.get('archived_by'), + archived_at=data.get('archived_at'), + updated_at=data.get('updated_at'), + doc_type=data.get('doc_type'), + doc_metadata=data.get('doc_metadata'), + doc_form=data.get('doc_form'), + doc_language=data.get('doc_language') + ) class DocumentSegment(db.Model): __tablename__ = 'document_segments' diff --git a/api/models/model.py b/api/models/model.py index ecb89861db941..07d7f6d8917f8 100644 --- a/api/models/model.py +++ b/api/models/model.py @@ -838,6 +838,49 @@ def workflow_run(self): return None + def to_dict(self) -> dict: + return { + 'id': self.id, + 'app_id': self.app_id, + 'conversation_id': self.conversation_id, + 'inputs': self.inputs, + 'query': self.query, + 'message': self.message, + 'answer': self.answer, + 'status': self.status, + 'error': self.error, + 'message_metadata': self.message_metadata_dict, + 'from_source': self.from_source, + 'from_end_user_id': self.from_end_user_id, + 'from_account_id': self.from_account_id, + 'created_at': self.created_at.isoformat(), + 'updated_at': self.updated_at.isoformat(), + 'agent_based': self.agent_based, + 'workflow_run_id': self.workflow_run_id + } + + @classmethod + def from_dict(cls, data: dict): + return cls( + id=data['id'], + app_id=data['app_id'], + conversation_id=data['conversation_id'], + inputs=data['inputs'], + query=data['query'], + message=data['message'], + answer=data['answer'], + status=data['status'], + error=data['error'], + message_metadata=json.dumps(data['message_metadata']), + from_source=data['from_source'], + from_end_user_id=data['from_end_user_id'], + from_account_id=data['from_account_id'], + created_at=data['created_at'], + updated_at=data['updated_at'], + agent_based=data['agent_based'], + workflow_run_id=data['workflow_run_id'] + ) + class MessageFeedback(db.Model): __tablename__ = 'message_feedbacks' diff --git a/api/models/workflow.py b/api/models/workflow.py index d9bc784878792..2d6491032b966 100644 --- a/api/models/workflow.py +++ b/api/models/workflow.py @@ -324,6 +324,55 @@ def message(self) -> Optional['Message']: def workflow(self): return db.session.query(Workflow).filter(Workflow.id == self.workflow_id).first() + def to_dict(self): + return { + 'id': self.id, + 'tenant_id': self.tenant_id, + 'app_id': self.app_id, + 'sequence_number': self.sequence_number, + 'workflow_id': self.workflow_id, + 'type': self.type, + 'triggered_from': self.triggered_from, + 'version': self.version, + 'graph': self.graph_dict, + 'inputs': self.inputs_dict, + 'status': self.status, + 'outputs': self.outputs_dict, + 'error': self.error, + 'elapsed_time': self.elapsed_time, + 'total_tokens': self.total_tokens, + 'total_steps': self.total_steps, + 'created_by_role': self.created_by_role, + 'created_by': self.created_by, + 'created_at': self.created_at, + 'finished_at': self.finished_at, + } + + @classmethod + def from_dict(cls, data: dict) -> 'WorkflowRun': + return cls( + id=data.get('id'), + tenant_id=data.get('tenant_id'), + app_id=data.get('app_id'), + sequence_number=data.get('sequence_number'), + workflow_id=data.get('workflow_id'), + type=data.get('type'), + triggered_from=data.get('triggered_from'), + version=data.get('version'), + graph=json.dumps(data.get('graph')), + inputs=json.dumps(data.get('inputs')), + status=data.get('status'), + outputs=json.dumps(data.get('outputs')), + error=data.get('error'), + elapsed_time=data.get('elapsed_time'), + total_tokens=data.get('total_tokens'), + total_steps=data.get('total_steps'), + created_by_role=data.get('created_by_role'), + created_by=data.get('created_by'), + created_at=data.get('created_at'), + finished_at=data.get('finished_at'), + ) + class WorkflowNodeExecutionTriggeredFrom(Enum): """ diff --git a/api/tasks/ops_trace_task.py b/api/tasks/ops_trace_task.py new file mode 100644 index 0000000000000..1d336092051f1 --- /dev/null +++ b/api/tasks/ops_trace_task.py @@ -0,0 +1,46 @@ +import logging +import time + +from celery import shared_task +from flask import current_app + +from core.ops.entities.trace_entity import trace_info_info_map +from core.rag.models.document import Document +from models.model import Message +from models.workflow import WorkflowRun + + +@shared_task(queue='ops_trace') +def process_trace_tasks(tasks_data): + """ + Async process trace tasks + :param tasks_data: List of dictionaries containing task data + + Usage: process_trace_tasks.delay(tasks_data) + """ + from core.ops.ops_trace_manager import OpsTraceManager + + trace_info = tasks_data.get('trace_info') + app_id = tasks_data.get('app_id') + conversation_id = tasks_data.get('conversation_id') + message_id = tasks_data.get('message_id') + trace_info_type = tasks_data.get('trace_info_type') + trace_instance = OpsTraceManager.get_ops_trace_instance(app_id, conversation_id, message_id) + + if trace_info.get('message_data'): + trace_info['message_data'] = Message.from_dict(data=trace_info['message_data']) + if trace_info.get('workflow_data'): + trace_info['workflow_data'] = WorkflowRun.from_dict(data=trace_info['workflow_data']) + if trace_info.get('documents'): + trace_info['documents'] = [Document(**doc) for doc in trace_info['documents']] + + try: + if trace_instance: + with current_app.app_context(): + trace_type = trace_info_info_map.get(trace_info_type) + if trace_type: + trace_info = trace_type(**trace_info) + trace_instance.trace(trace_info) + end_at = time.perf_counter() + except Exception: + logging.exception("Processing trace tasks failed") From 2b080b5cfcf55aeb3013615169f44c6eef2e876d Mon Sep 17 00:00:00 2001 From: liuzhenghua <1090179900@qq.com> Date: Fri, 28 Jun 2024 00:27:20 +0800 Subject: [PATCH 23/26] =?UTF-8?q?feature:=20Add=20presence=5Fpenalty=20and?= =?UTF-8?q?=20frequency=5Fpenalty=20parameters=20to=20the=20=E2=80=A6=20(#?= =?UTF-8?q?5637)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: liuzhenghua-jk --- .../model_providers/xinference/llm/llm.py | 98 ++++++++++++++----- 1 file changed, 73 insertions(+), 25 deletions(-) diff --git a/api/core/model_runtime/model_providers/xinference/llm/llm.py b/api/core/model_runtime/model_providers/xinference/llm/llm.py index 637e9b32e69c0..0ef63f8e23491 100644 --- a/api/core/model_runtime/model_providers/xinference/llm/llm.py +++ b/api/core/model_runtime/model_providers/xinference/llm/llm.py @@ -39,6 +39,7 @@ ) from core.model_runtime.entities.model_entities import ( AIModelEntity, + DefaultParameterName, FetchFrom, ModelFeature, ModelPropertyKey, @@ -67,7 +68,7 @@ class XinferenceAILargeLanguageModel(LargeLanguageModel): def _invoke(self, model: str, credentials: dict, prompt_messages: list[PromptMessage], model_parameters: dict, tools: list[PromptMessageTool] | None = None, stop: list[str] | None = None, stream: bool = True, user: str | None = None) \ - -> LLMResult | Generator: + -> LLMResult | Generator: """ invoke LLM @@ -113,7 +114,8 @@ def validate_credentials(self, model: str, credentials: dict) -> None: elif 'generate' in extra_param.model_ability: credentials['completion_type'] = 'completion' else: - raise ValueError(f'xinference model ability {extra_param.model_ability} is not supported, check if you have the right model type') + raise ValueError( + f'xinference model ability {extra_param.model_ability} is not supported, check if you have the right model type') if extra_param.support_function_call: credentials['support_function_call'] = True @@ -206,6 +208,7 @@ def _num_tokens_for_tools(self, tools: list[PromptMessageTool]) -> int: :param tools: tools for tool calling :return: number of tokens """ + def tokens(text: str): return self._get_num_tokens_by_gpt2(text) @@ -339,6 +342,45 @@ def get_customizable_model_schema(self, model: str, credentials: dict) -> AIMode zh_Hans='最大生成长度', en_US='Max Tokens' ) + ), + ParameterRule( + name=DefaultParameterName.PRESENCE_PENALTY, + use_template=DefaultParameterName.PRESENCE_PENALTY, + type=ParameterType.FLOAT, + label=I18nObject( + en_US='Presence Penalty', + zh_Hans='存在惩罚', + ), + required=False, + help=I18nObject( + en_US='Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they ' + 'appear in the text so far, increasing the model\'s likelihood to talk about new topics.', + zh_Hans='介于 -2.0 和 2.0 之间的数字。正值会根据新词是否已出现在文本中对其进行惩罚,从而增加模型谈论新话题的可能性。' + ), + default=0.0, + min=-2.0, + max=2.0, + precision=2 + ), + ParameterRule( + name=DefaultParameterName.FREQUENCY_PENALTY, + use_template=DefaultParameterName.FREQUENCY_PENALTY, + type=ParameterType.FLOAT, + label=I18nObject( + en_US='Frequency Penalty', + zh_Hans='频率惩罚', + ), + required=False, + help=I18nObject( + en_US='Number between -2.0 and 2.0. Positive values penalize new tokens based on their ' + 'existing frequency in the text so far, decreasing the model\'s likelihood to repeat the ' + 'same line verbatim.', + zh_Hans='介于 -2.0 和 2.0 之间的数字。正值会根据新词在文本中的现有频率对其进行惩罚,从而降低模型逐字重复相同内容的可能性。' + ), + default=0.0, + min=-2.0, + max=2.0, + precision=2 ) ] @@ -364,7 +406,6 @@ def get_customizable_model_schema(self, model: str, credentials: dict) -> AIMode else: raise ValueError(f'xinference model ability {extra_args.model_ability} is not supported') - features = [] support_function_call = credentials.get('support_function_call', False) @@ -395,9 +436,9 @@ def get_customizable_model_schema(self, model: str, credentials: dict) -> AIMode return entity def _generate(self, model: str, credentials: dict, prompt_messages: list[PromptMessage], - model_parameters: dict, extra_model_kwargs: XinferenceModelExtraParameter, - tools: list[PromptMessageTool] | None = None, - stop: list[str] | None = None, stream: bool = True, user: str | None = None) \ + model_parameters: dict, extra_model_kwargs: XinferenceModelExtraParameter, + tools: list[PromptMessageTool] | None = None, + stop: list[str] | None = None, stream: bool = True, user: str | None = None) \ -> LLMResult | Generator: """ generate text from LLM @@ -429,6 +470,8 @@ def _generate(self, model: str, credentials: dict, prompt_messages: list[PromptM 'temperature': model_parameters.get('temperature', 1.0), 'top_p': model_parameters.get('top_p', 0.7), 'max_tokens': model_parameters.get('max_tokens', 512), + 'presence_penalty': model_parameters.get('presence_penalty', 0.0), + 'frequency_penalty': model_parameters.get('frequency_penalty', 0.0), } if stop: @@ -453,10 +496,12 @@ def _generate(self, model: str, credentials: dict, prompt_messages: list[PromptM if stream: if tools and len(tools) > 0: raise InvokeBadRequestError('xinference tool calls does not support stream mode') - return self._handle_chat_stream_response(model=model, credentials=credentials, prompt_messages=prompt_messages, - tools=tools, resp=resp) - return self._handle_chat_generate_response(model=model, credentials=credentials, prompt_messages=prompt_messages, - tools=tools, resp=resp) + return self._handle_chat_stream_response(model=model, credentials=credentials, + prompt_messages=prompt_messages, + tools=tools, resp=resp) + return self._handle_chat_generate_response(model=model, credentials=credentials, + prompt_messages=prompt_messages, + tools=tools, resp=resp) elif isinstance(xinference_model, RESTfulGenerateModelHandle): resp = client.completions.create( model=credentials['model_uid'], @@ -466,10 +511,12 @@ def _generate(self, model: str, credentials: dict, prompt_messages: list[PromptM **generate_config, ) if stream: - return self._handle_completion_stream_response(model=model, credentials=credentials, prompt_messages=prompt_messages, - tools=tools, resp=resp) - return self._handle_completion_generate_response(model=model, credentials=credentials, prompt_messages=prompt_messages, - tools=tools, resp=resp) + return self._handle_completion_stream_response(model=model, credentials=credentials, + prompt_messages=prompt_messages, + tools=tools, resp=resp) + return self._handle_completion_generate_response(model=model, credentials=credentials, + prompt_messages=prompt_messages, + tools=tools, resp=resp) else: raise NotImplementedError(f'xinference model handle type {type(xinference_model)} is not supported') @@ -523,8 +570,8 @@ def _extract_response_function_call(self, response_function_call: FunctionCall | return tool_call def _handle_chat_generate_response(self, model: str, credentials: dict, prompt_messages: list[PromptMessage], - tools: list[PromptMessageTool], - resp: ChatCompletion) -> LLMResult: + tools: list[PromptMessageTool], + resp: ChatCompletion) -> LLMResult: """ handle normal chat generate response """ @@ -549,7 +596,8 @@ def _handle_chat_generate_response(self, model: str, credentials: dict, prompt_m prompt_tokens = self._num_tokens_from_messages(messages=prompt_messages, tools=tools) completion_tokens = self._num_tokens_from_messages(messages=[assistant_prompt_message], tools=tools) - usage = self._calc_response_usage(model=model, credentials=credentials, prompt_tokens=prompt_tokens, completion_tokens=completion_tokens) + usage = self._calc_response_usage(model=model, credentials=credentials, prompt_tokens=prompt_tokens, + completion_tokens=completion_tokens) response = LLMResult( model=model, @@ -560,10 +608,10 @@ def _handle_chat_generate_response(self, model: str, credentials: dict, prompt_m ) return response - + def _handle_chat_stream_response(self, model: str, credentials: dict, prompt_messages: list[PromptMessage], - tools: list[PromptMessageTool], - resp: Iterator[ChatCompletionChunk]) -> Generator: + tools: list[PromptMessageTool], + resp: Iterator[ChatCompletionChunk]) -> Generator: """ handle stream chat generate response """ @@ -634,8 +682,8 @@ def _handle_chat_stream_response(self, model: str, credentials: dict, prompt_mes full_response += delta.delta.content def _handle_completion_generate_response(self, model: str, credentials: dict, prompt_messages: list[PromptMessage], - tools: list[PromptMessageTool], - resp: Completion) -> LLMResult: + tools: list[PromptMessageTool], + resp: Completion) -> LLMResult: """ handle normal completion generate response """ @@ -671,8 +719,8 @@ def _handle_completion_generate_response(self, model: str, credentials: dict, pr return response def _handle_completion_stream_response(self, model: str, credentials: dict, prompt_messages: list[PromptMessage], - tools: list[PromptMessageTool], - resp: Iterator[Completion]) -> Generator: + tools: list[PromptMessageTool], + resp: Iterator[Completion]) -> Generator: """ handle stream completion generate response """ @@ -764,4 +812,4 @@ def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]] InvokeBadRequestError: [ ValueError ] - } \ No newline at end of file + } From f9e4b4e74c0b902cc3cc1a87b59942d0d094a8e8 Mon Sep 17 00:00:00 2001 From: William Espegren <131612909+WilliamEspegren@users.noreply.github.com> Date: Thu, 27 Jun 2024 19:23:01 +0200 Subject: [PATCH 24/26] Fix docker command (#5681) --- api/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/README.md b/api/README.md index 125cd8a78c5c6..9e3c7c446b46e 100644 --- a/api/README.md +++ b/api/README.md @@ -11,7 +11,7 @@ ```bash cd ../docker - docker-compose -f docker-compose.middleware.yaml -p dify up -d + docker compose -f docker-compose.middleware.yaml -p dify up -d cd ../api ``` From b3d6726f6596819602af234c60447589668248f9 Mon Sep 17 00:00:00 2001 From: Kevin Date: Fri, 28 Jun 2024 11:06:29 +0800 Subject: [PATCH 25/26] Feature/add qwen llm (#5659) --- .../model_providers/tongyi/llm/qwen-long.yaml | 81 +++++++++++++++++++ .../tongyi/llm/qwen-max-0403.yaml | 2 +- .../tongyi/llm/qwen-max-0428.yaml | 81 +++++++++++++++++++ .../tongyi/llm/qwen-max-1201.yaml | 2 +- .../tongyi/llm/qwen-max-longcontext.yaml | 2 +- .../model_providers/tongyi/llm/qwen-max.yaml | 2 +- .../tongyi/llm/qwen-plus-chat.yaml | 4 +- .../model_providers/tongyi/llm/qwen-plus.yaml | 4 +- .../tongyi/llm/qwen-turbo-chat.yaml | 4 +- .../tongyi/llm/qwen-turbo.yaml | 4 +- 10 files changed, 174 insertions(+), 12 deletions(-) create mode 100644 api/core/model_runtime/model_providers/tongyi/llm/qwen-long.yaml create mode 100644 api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0428.yaml diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-long.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-long.yaml new file mode 100644 index 0000000000000..b2cf3dd486f4f --- /dev/null +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-long.yaml @@ -0,0 +1,81 @@ +model: qwen-long +label: + en_US: qwen-long +model_type: llm +features: + - multi-tool-call + - agent-thought + - stream-tool-call +model_properties: + mode: chat + context_size: 10000000 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 2000 + min: 1 + max: 2000 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: enable_search + type: boolean + default: false + help: + zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。 + en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic. + - name: response_format + use_template: response_format +pricing: + input: '0.0005' + output: '0.002' + unit: '0.001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0403.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0403.yaml index 865c0c8138688..935a16ebcb116 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0403.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0403.yaml @@ -75,7 +75,7 @@ parameter_rules: - name: response_format use_template: response_format pricing: - input: '0.12' + input: '0.04' output: '0.12' unit: '0.001' currency: RMB diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0428.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0428.yaml new file mode 100644 index 0000000000000..c39799a71fdcd --- /dev/null +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0428.yaml @@ -0,0 +1,81 @@ +model: qwen-max-0428 +label: + en_US: qwen-max-0428 +model_type: llm +features: + - multi-tool-call + - agent-thought + - stream-tool-call +model_properties: + mode: chat + context_size: 8192 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 2000 + min: 1 + max: 2000 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: enable_search + type: boolean + default: false + help: + zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。 + en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic. + - name: response_format + use_template: response_format +pricing: + input: '0.04' + output: '0.12' + unit: '0.001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-1201.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-1201.yaml index 533d99aa55dff..0368a4a01e4c6 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-1201.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-1201.yaml @@ -75,7 +75,7 @@ parameter_rules: - name: response_format use_template: response_format pricing: - input: '0.12' + input: '0.04' output: '0.12' unit: '0.001' currency: RMB diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-longcontext.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-longcontext.yaml index dbe3ece3967f5..1c705670ca6a2 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-longcontext.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-longcontext.yaml @@ -75,7 +75,7 @@ parameter_rules: - name: response_format use_template: response_format pricing: - input: '0.12' + input: '0.04' output: '0.12' unit: '0.001' currency: RMB diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max.yaml index 9a0f1afc03038..64094effbbec8 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max.yaml @@ -75,7 +75,7 @@ parameter_rules: - name: response_format use_template: response_format pricing: - input: '0.12' + input: '0.04' output: '0.12' unit: '0.001' currency: RMB diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-chat.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-chat.yaml index 5681f5c7b0666..bc848072edd7f 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-chat.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-chat.yaml @@ -75,7 +75,7 @@ parameter_rules: - name: response_format use_template: response_format pricing: - input: '0.02' - output: '0.02' + input: '0.004' + output: '0.012' unit: '0.001' currency: RMB diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus.yaml index 71dabb55f07fd..4be78627f0495 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus.yaml @@ -73,7 +73,7 @@ parameter_rules: - name: response_format use_template: response_format pricing: - input: '0.02' - output: '0.02' + input: '0.004' + output: '0.012' unit: '0.001' currency: RMB diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-chat.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-chat.yaml index dc8208fac62b4..f1950577ec03a 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-chat.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-chat.yaml @@ -75,7 +75,7 @@ parameter_rules: - name: response_format use_template: response_format pricing: - input: '0.008' - output: '0.008' + input: '0.002' + output: '0.006' unit: '0.001' currency: RMB diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo.yaml index 140dc68af8e20..d4c03100ecbee 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo.yaml @@ -73,7 +73,7 @@ parameter_rules: - name: response_format use_template: response_format pricing: - input: '0.008' - output: '0.008' + input: '0.002' + output: '0.006' unit: '0.001' currency: RMB From d37ee498cd2e586ed873c6edcfbf3bc493a2a10a Mon Sep 17 00:00:00 2001 From: luckylhb90 Date: Fri, 28 Jun 2024 06:19:34 +0300 Subject: [PATCH 26/26] fix: do not remove (#5682) Co-authored-by: hobo.l --- api/app.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/api/app.py b/api/app.py index 2ea7c6d235a9c..2c9b59706b4ea 100644 --- a/api/app.py +++ b/api/app.py @@ -26,6 +26,7 @@ from commands import register_commands # DO NOT REMOVE BELOW +from events import event_handlers from extensions import ( ext_celery, ext_code_based_extension, @@ -42,6 +43,7 @@ from extensions.ext_database import db from extensions.ext_login import login_manager from libs.passport import PassportService +from models import account, dataset, model, source, task, tool, tools, web from services.account_service import AccountService # DO NOT REMOVE ABOVE