From 12178382aedd49375ec1f084370b5a063fa6c098 Mon Sep 17 00:00:00 2001 From: chenjunwen Date: Mon, 30 Mar 2026 21:37:29 +0800 Subject: [PATCH 01/20] fix(bot): align auth with other endpoints using get_request_context Bot proxy endpoints (/chat, /chat/stream) previously required auth unconditionally, while other endpoints like /find use get_request_context which allows dev mode without API keys. This caused inconsistent behavior where ov find worked but ov chat failed with 401. Now bot endpoints use the same get_request_context dependency for consistent auth behavior across all endpoints. Co-Authored-By: Claude Opus 4.6 --- openviking/server/routers/bot.py | 71 +++++++++++++++-------------- tests/server/test_bot_proxy_auth.py | 39 +--------------- 2 files changed, 37 insertions(+), 73 deletions(-) diff --git a/openviking/server/routers/bot.py b/openviking/server/routers/bot.py index 538146a52..cb89661a2 100644 --- a/openviking/server/routers/bot.py +++ b/openviking/server/routers/bot.py @@ -8,9 +8,11 @@ from typing import AsyncGenerator, Optional import httpx -from fastapi import APIRouter, HTTPException, Request, status +from fastapi import APIRouter, Depends, HTTPException, Request, status from fastapi.responses import StreamingResponse +from openviking.server.auth import get_request_context +from openviking.server.identity import RequestContext from openviking_cli.utils.logger import get_logger router = APIRouter(prefix="", tags=["bot"]) @@ -37,32 +39,6 @@ def get_bot_url() -> str: return BOT_API_URL -def extract_auth_token(request: Request) -> Optional[str]: - """Extract and return authorization token from request.""" - # Try X-API-Key header first - api_key = request.headers.get("X-API-Key") - if api_key: - return api_key - - # Try Authorization header (Bearer token) - auth_header = request.headers.get("Authorization") - if auth_header and auth_header.startswith("Bearer "): - return auth_header[7:] # Remove "Bearer " prefix - - return None - - -def require_auth_token(request: Request) -> str: - """Return an auth token or raise 401 for bot proxy endpoints.""" - auth_token = extract_auth_token(request) - if not auth_token: - raise HTTPException( - status_code=status.HTTP_401_UNAUTHORIZED, - detail="Missing authentication token", - ) - return auth_token - - @router.get("/health") async def health_check(request: Request): """Health check endpoint for Bot API. @@ -96,14 +72,32 @@ async def health_check(request: Request): ) +def extract_auth_token(request: Request) -> Optional[str]: + """Extract and return authorization token from request.""" + # Try X-API-Key header first + api_key = request.headers.get("X-API-Key") + if api_key: + return api_key + + # Try Authorization header (Bearer token) + auth_header = request.headers.get("Authorization") + if auth_header and auth_header.startswith("Bearer "): + return auth_header[7:] # Remove "Bearer " prefix + + return None + + @router.post("/chat") -async def chat(request: Request): +async def chat( + request: Request, + _ctx: RequestContext = Depends(get_request_context), +): """Send a message to the bot and get a response. Proxies the request to Vikingbot OpenAPIChannel. """ bot_url = get_bot_url() - auth_token = require_auth_token(request) + auth_token = extract_auth_token(request) # Read request body try: @@ -116,8 +110,10 @@ async def chat(request: Request): try: async with httpx.AsyncClient() as client: - # Build headers - headers = {"Content-Type": "application/json", "X-API-Key": auth_token} + # Build headers - only include X-API-Key if provided + headers = {"Content-Type": "application/json"} + if auth_token: + headers["X-API-Key"] = auth_token # Forward to Vikingbot OpenAPIChannel chat endpoint response = await client.post( @@ -149,13 +145,16 @@ async def chat(request: Request): @router.post("/chat/stream") -async def chat_stream(request: Request): +async def chat_stream( + request: Request, + _ctx: RequestContext = Depends(get_request_context), +): """Send a message to the bot and get a streaming response. Proxies the request to Vikingbot OpenAPIChannel with SSE streaming. """ bot_url = get_bot_url() - auth_token = require_auth_token(request) + auth_token = extract_auth_token(request) # Read request body try: @@ -170,8 +169,10 @@ async def event_stream() -> AsyncGenerator[str, None]: """Generate SSE events from bot response stream.""" try: async with httpx.AsyncClient() as client: - # Build headers - headers = {"Content-Type": "application/json", "X-API-Key": auth_token} + # Build headers - only include X-API-Key if provided + headers = {"Content-Type": "application/json"} + if auth_token: + headers["X-API-Key"] = auth_token # Forward to Vikingbot OpenAPIChannel stream endpoint async with client.stream( diff --git a/tests/server/test_bot_proxy_auth.py b/tests/server/test_bot_proxy_auth.py index 977702500..bf5b580f0 100644 --- a/tests/server/test_bot_proxy_auth.py +++ b/tests/server/test_bot_proxy_auth.py @@ -6,7 +6,7 @@ import httpx import pytest import pytest_asyncio -from fastapi import FastAPI, HTTPException, Request +from fastapi import FastAPI, Request import openviking.server.routers.bot as bot_router_module @@ -27,21 +27,6 @@ def make_request(headers: dict[str, str]) -> Request: ) -@pytest_asyncio.fixture -async def bot_auth_client() -> httpx.AsyncClient: - """Client mounted with bot router and bot backend configured.""" - app = FastAPI() - old_bot_api_url = bot_router_module.BOT_API_URL - bot_router_module.set_bot_api_url("http://bot-backend.local") - app.include_router(bot_router_module.router, prefix="/bot/v1") - transport = httpx.ASGITransport(app=app) - try: - async with httpx.AsyncClient(transport=transport, base_url="http://testserver") as client: - yield client - finally: - bot_router_module.BOT_API_URL = old_bot_api_url - - @pytest.mark.parametrize( ("headers", "expected"), [ @@ -52,25 +37,3 @@ async def bot_auth_client() -> httpx.AsyncClient: def test_extract_auth_token(headers: dict[str, str], expected: str): """Accepted auth header formats should both produce a token.""" assert bot_router_module.extract_auth_token(make_request(headers)) == expected - - -def test_require_auth_token_rejects_missing_token(): - """Missing credentials should raise a 401 before proxying.""" - with pytest.raises(HTTPException) as exc_info: - bot_router_module.require_auth_token(make_request({})) - - assert exc_info.value.status_code == 401 - assert exc_info.value.detail == "Missing authentication token" - - -@pytest.mark.asyncio -@pytest.mark.parametrize("path", ["/bot/v1/chat", "/bot/v1/chat/stream"]) -async def test_bot_proxy_requires_auth_token(bot_auth_client: httpx.AsyncClient, path: str): - """Bot proxy endpoints should reject missing auth with 401.""" - response = await bot_auth_client.post( - path, - json={"message": "hello"}, - ) - - assert response.status_code == 401 - assert response.json()["detail"] == "Missing authentication token" From 69768cbb8fec8ced0e820219d09b08c2b8161d3a Mon Sep 17 00:00:00 2001 From: chenjunwen Date: Tue, 31 Mar 2026 10:48:43 +0800 Subject: [PATCH 02/20] refactor(memory): improve URI handling and memory extraction - Update URI utilities for better path handling - Improve memory type registry and updater - Enhance session extraction context provider - Update memory prompt templates Co-Authored-By: Claude Opus 4.6 --- .../prompts/templates/memory/cases.yaml | 4 +- .../prompts/templates/memory/entities.yaml | 4 +- .../prompts/templates/memory/events.yaml | 22 +- .../prompts/templates/memory/patterns.yaml | 4 +- .../prompts/templates/memory/preferences.yaml | 4 +- .../prompts/templates/memory/profile.yaml | 2 +- .../prompts/templates/memory/skills.yaml | 4 +- .../prompts/templates/memory/tools.yaml | 4 +- openviking/session/compressor_v2.py | 8 +- openviking/session/memory/extract_loop.py | 18 +- .../session/memory/memory_type_registry.py | 8 +- openviking/session/memory/memory_updater.py | 33 ++- .../session/memory/schema_model_generator.py | 6 +- .../session_extract_context_provider.py | 20 +- openviking/session/memory/utils/uri.py | 160 +++++++---- tests/models/vlm/test_volcengine_cache.py | 253 ++++++++++++++++++ 16 files changed, 450 insertions(+), 104 deletions(-) create mode 100644 tests/models/vlm/test_volcengine_cache.py diff --git a/openviking/prompts/templates/memory/cases.yaml b/openviking/prompts/templates/memory/cases.yaml index 50d49af97..1475b6525 100644 --- a/openviking/prompts/templates/memory/cases.yaml +++ b/openviking/prompts/templates/memory/cases.yaml @@ -5,8 +5,8 @@ description: | Cases should be about specific problems that have clear solutions. Each case should include: what the problem was (symptoms, error messages, context), what the solution was (steps taken, principles used), and why it worked. Case names should be in "Problem → Solution" format to make them easily searchable. -directory: "viking://agent/{agent_space}/memories/cases" -filename_template: "{case_name}.md" +directory: "viking://agent/{{ agent_space }}/memories/cases" +filename_template: "{{ case_name }}.md" enabled: false fields: - name: case_name diff --git a/openviking/prompts/templates/memory/entities.yaml b/openviking/prompts/templates/memory/entities.yaml index 3b2f693a9..579da2442 100644 --- a/openviking/prompts/templates/memory/entities.yaml +++ b/openviking/prompts/templates/memory/entities.yaml @@ -5,8 +5,8 @@ description: | Relative path format: ../entities/entity_name.md - Example: [skin_itching](../entities/skin_itching.md) → see dermatologist - Cards should be rich and distributed - avoid putting all info in one card. -directory: "viking://user/{user_space}/memories/entities" -filename_template: "{name}.md" +directory: "viking://user/{{ user_space }}/memories/entities" +filename_template: "{{ name }}.md" enabled: true fields: diff --git a/openviking/prompts/templates/memory/events.yaml b/openviking/prompts/templates/memory/events.yaml index 11bc6289f..0c00a8f56 100644 --- a/openviking/prompts/templates/memory/events.yaml +++ b/openviking/prompts/templates/memory/events.yaml @@ -8,20 +8,15 @@ description: | - Use a third-person perspective. - If possible, combine the user's current behavior and reactions to speculate on the user's possible thoughts or actions. - Describe the complete content of an event within a single event as much as possible; do not split one event into multiple parts. -directory: "viking://user/{user_space}/memories/events" -filename_template: "{event_time}_{event_name}.md" +directory: "viking://user/{{ user_space }}/memories/events" +filename_template: "{{ extract_context.get_first_message_time_from_ranges(ranges) }}_{{ event_name }}.md" enabled: true # 操作模式:add_only 表示只新增记忆,不需要查看之前的记忆列表 # upsert 表示新增或更新(默认行为) operation_mode: "add_only" content_template: | - {% set msg_range = extract_context.read_message_ranges(ranges|default('')) if extract_context else None %} - {% set first_time = msg_range.first_message_time() if msg_range else None %} - time: {{event_time|default(first_time if first_time else 'N/A')}} - {% if extract_context %} - {{ msg_range.pretty_print() if msg_range else '' }} - {% endif %} - + time: {{extract_context.get_first_message_time_with_weekday_from_ranges(ranges|default(''))|default('N/A')}} + {{ extract_context.read_message_ranges(ranges|default('')).pretty_print() if ranges else '' }} fields: - name: event_name @@ -38,13 +33,8 @@ fields: - name: summary type: string description: | - Based on the content of the above fields, compile a description to outline the complete Fact content, in English - - - name: event_time - type: string - description: | - Time when the event occurred, format “2026-03-17” / “2026-03” / “2026”. If unknown, use current time. - merge_op: immutable + Based on the content of the above fields, compile a description to outline the complete Fact content. + Use specific year-month-day dates instead of vague terms such as yesterday, last week, and tomorrow. - name: ranges type: string diff --git a/openviking/prompts/templates/memory/patterns.yaml b/openviking/prompts/templates/memory/patterns.yaml index 6b6ba73b4..de9f3dcf1 100644 --- a/openviking/prompts/templates/memory/patterns.yaml +++ b/openviking/prompts/templates/memory/patterns.yaml @@ -5,8 +5,8 @@ description: | Patterns should be about: how to approach certain types of tasks, what steps to follow, what considerations to keep in mind. Each pattern should include: trigger conditions (when to use this pattern), process steps (what to do), and considerations (what to watch out for). Pattern names should be in "Process name: Step description" format. -directory: "viking://agent/{agent_space}/memories/patterns" -filename_template: "{pattern_name}.md" +directory: "viking://agent/{{ agent_space }}/memories/patterns" +filename_template: "{{ pattern_name }}.md" enabled: false fields: - name: pattern_name diff --git a/openviking/prompts/templates/memory/preferences.yaml b/openviking/prompts/templates/memory/preferences.yaml index 98ec6c7e6..ae3f841fa 100644 --- a/openviking/prompts/templates/memory/preferences.yaml +++ b/openviking/prompts/templates/memory/preferences.yaml @@ -5,8 +5,8 @@ description: | Each preference should be about a specific topic (not generic). Topics can be: code style, communication style, tools, workflow, food, commute, etc. Store different topics as separate memory files, do NOT mix unrelated preferences. -directory: "viking://user/{user_space}/memories/preferences" -filename_template: "{user}_{topic}.md" +directory: "viking://user/{{ user_space }}/memories/preferences" +filename_template: "{{ user }}_{{ topic }}.md" enabled: true fields: - name: user diff --git a/openviking/prompts/templates/memory/profile.yaml b/openviking/prompts/templates/memory/profile.yaml index 9bc8dcc09..3bc5c2896 100644 --- a/openviking/prompts/templates/memory/profile.yaml +++ b/openviking/prompts/templates/memory/profile.yaml @@ -4,7 +4,7 @@ description: | Extract relatively stable personal attributes that define the user's identity, work style, and preferences. Include: profession, experience level, technical background, communication style, work habits, etc. Do NOT include transient conversation content or temporary mood states. -directory: "viking://user/{user_space}/memories" +directory: "viking://user/{{ user_space }}/memories" filename_template: "profile.md" enabled: true fields: diff --git a/openviking/prompts/templates/memory/skills.yaml b/openviking/prompts/templates/memory/skills.yaml index 8d7a9d862..0358ce517 100644 --- a/openviking/prompts/templates/memory/skills.yaml +++ b/openviking/prompts/templates/memory/skills.yaml @@ -1,8 +1,8 @@ memory_type: skills description: | Record all skills uses, -directory: "viking://agent/{agent_space}/memories/skills" -filename_template: "{skill_name}.md" +directory: "viking://agent/{{ agent_space }}/memories/skills" +filename_template: "{{ skill_name }}.md" enabled: true content_template: | Skill: {{ skill_name }} diff --git a/openviking/prompts/templates/memory/tools.yaml b/openviking/prompts/templates/memory/tools.yaml index ec6e985cf..801a008df 100644 --- a/openviking/prompts/templates/memory/tools.yaml +++ b/openviking/prompts/templates/memory/tools.yaml @@ -1,8 +1,8 @@ memory_type: tools description: | Record all tool calls -directory: "viking://agent/{agent_space}/memories/tools" -filename_template: "{tool_name}.md" +directory: "viking://agent/{{ agent_space }}/memories/tools" +filename_template: "{{ tool_name }}.md" enabled: true content_template: | Tool: {{ tool_name }} diff --git a/openviking/session/compressor_v2.py b/openviking/session/compressor_v2.py index 816c65e68..2717f84b6 100644 --- a/openviking/session/compressor_v2.py +++ b/openviking/session/compressor_v2.py @@ -140,9 +140,11 @@ async def extract_long_term_memories( continue user_space = ctx.user.user_space_name() if ctx and ctx.user else "default" agent_space = ctx.user.agent_space_name() if ctx and ctx.user else "default" - dir_path = schema.directory.replace("{user_space}", user_space).replace( - "{agent_space}", agent_space - ) + # 使用 Jinja2 渲染 directory + import jinja2 + env = jinja2.Environment(autoescape=False) + template = env.from_string(schema.directory) + dir_path = template.render(user_space=user_space, agent_space=agent_space) dir_path = viking_fs._uri_to_path(dir_path, ctx) if dir_path not in memory_schema_dirs: memory_schema_dirs.append(dir_path) diff --git a/openviking/session/memory/extract_loop.py b/openviking/session/memory/extract_loop.py index 5cc668bbd..c28a9bf82 100644 --- a/openviking/session/memory/extract_loop.py +++ b/openviking/session/memory/extract_loop.py @@ -118,6 +118,11 @@ async def run(self) -> Tuple[Optional[MemoryOperations], List[Dict[str, Any]]]: # 预计算 expected_fields self._expected_fields = ["reasoning", "edit_overview_uris", "delete_uris"] + + # 获取 ExtractContext(整个流程复用) + self._extract_context = self.context_provider.get_extract_context() + if self._extract_context is None: + raise ValueError("Failed to get ExtractContext from provider") for schema in schemas: self._expected_fields.append(schema.memory_type) @@ -185,7 +190,6 @@ async def run(self) -> Tuple[Optional[MemoryOperations], List[Dict[str, Any]]]: if tool_calls: await self._execute_tool_calls(messages, tool_calls, tools_used) - await self._mark_cache_breakpoint(messages) continue # If model returned final operations, check if refetch is needed @@ -201,7 +205,6 @@ async def run(self) -> Tuple[Optional[MemoryOperations], List[Dict[str, Any]]]: max_iterations += 1 logger.info(f"Extended max_iterations to {max_iterations} for refetch") - await self._mark_cache_breakpoint(messages) continue final_operations = operations @@ -280,12 +283,17 @@ def _validate_operations(self, operations: MemoryOperations) -> None: registry = self.context_provider._get_registry() schemas = self.context_provider.get_memory_schemas(self.ctx) + # Use pre-initialized extract_context + if not hasattr(self, '_extract_context') or self._extract_context is None: + raise ValueError("ExtractContext not initialized") + is_valid, errors = validate_operations_uris( operations, schemas, registry, user_space="default", agent_space="default", + extract_context=self._extract_context, ) if not is_valid: error_msg = "Invalid memory operations:\n" + "\n".join(f" - {err}" for err in errors) @@ -307,6 +315,9 @@ async def _call_llm( Returns: Tuple of (tool_calls, operations) - one will be None, the other set """ + # 标记 cache breakpoint + await self._mark_cache_breakpoint(messages) + # Call LLM with tools - use tools from strategy tool_choice = "none" if force_final else None @@ -367,8 +378,7 @@ async def _call_llm( self._validate_operations(operations) return (None, operations) except Exception as e: - print(f"Error parsing operations: {e}") - logger.warning(f"Unexpected error parsing memory operations: {e}") + logger.exception(f"Error parsing operations: {e}") # Case 3: No tool calls and no parsable operations print("No tool calls or operations parsed") diff --git a/openviking/session/memory/memory_type_registry.py b/openviking/session/memory/memory_type_registry.py index 3cd62ad25..203688e72 100644 --- a/openviking/session/memory/memory_type_registry.py +++ b/openviking/session/memory/memory_type_registry.py @@ -73,12 +73,14 @@ def list_search_uris(self, user_space: str, agent_space: str) -> List[str]: Returns: List of directory URIs from enabled schemas """ + import jinja2 + uris = [] for schema in self.list_all(include_disabled=False): if schema.directory: - dir_path = schema.directory.replace("{user_space}", user_space).replace( - "{agent_space}", agent_space - ) + env = jinja2.Environment(autoescape=False) + template = env.from_string(schema.directory) + dir_path = template.render(user_space=user_space, agent_space=agent_space) uris.append(dir_path) return uris diff --git a/openviking/session/memory/memory_updater.py b/openviking/session/memory/memory_updater.py index 417d11547..6ecc17ce6 100644 --- a/openviking/session/memory/memory_updater.py +++ b/openviking/session/memory/memory_updater.py @@ -35,6 +35,20 @@ class ExtractContext: def __init__(self, messages: List[Message]): self.messages = messages + def get_first_message_time_from_ranges(self, ranges_str: str) -> str | None: + """根据 ranges 字符串获取第一条消息的时间(YAML 日期格式)""" + if not ranges_str: + return None + msg_range = self.read_message_ranges(ranges_str) + return msg_range._first_message_time() + + def get_first_message_time_with_weekday_from_ranges(self, ranges_str: str) -> str | None: + """根据 ranges 字符串获取第一条消息的时间,带周几""" + if not ranges_str: + return None + msg_range = self.read_message_ranges(ranges_str) + return msg_range._first_message_time_with_weekday() + def read_message_ranges(self, ranges_str: str) -> "MessageRange": """Parse ranges string like "0-10,50-60" or "7,9,11,13" and return combined MessageRange. @@ -100,8 +114,8 @@ def pretty_print(self) -> str: result.append(f"[{elem.role}]: {elem.content}") return "\n".join(result) - def first_message_time(self) -> str | None: - """获取第一条消息的时间(YAML 日期格式),如果没有消息则返回 None""" + def _first_message_time(self) -> str | None: + """获取第一条消息的时间(内部方法)""" for elem in self.elements: if isinstance(elem, str): continue @@ -109,6 +123,18 @@ def first_message_time(self) -> str | None: return elem.created_at.strftime("%Y-%m-%d") return None + def _first_message_time_with_weekday(self) -> str | None: + """获取第一条消息的时间,带周几(内部方法)""" + for elem in self.elements: + if isinstance(elem, str): + continue + if hasattr(elem, "created_at") and elem.created_at: + # 获取周几的英文全称 + weekday_en = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"] + weekday = weekday_en[elem.created_at.weekday()] + return f"{elem.created_at.strftime('%Y-%m-%d')} ({weekday})" + return None + class MemoryUpdateResult: """Result of memory update operation.""" @@ -205,12 +231,13 @@ async def apply_operations( user_space = ctx.user.user_space_name() if ctx and ctx.user else "default" agent_space = ctx.user.agent_space_name() if ctx and ctx.user else "default" - # Resolve all URIs first + # Resolve all URIs first (pass extract_context for template rendering) resolved_ops = resolve_all_operations( operations, resolved_registry, user_space=user_space, agent_space=agent_space, + extract_context=extract_context, ) if resolved_ops.has_errors(): diff --git a/openviking/session/memory/schema_model_generator.py b/openviking/session/memory/schema_model_generator.py index 275ef0a8f..b6c14596b 100644 --- a/openviking/session/memory/schema_model_generator.py +++ b/openviking/session/memory/schema_model_generator.py @@ -399,11 +399,11 @@ def generate_type_descriptions(self) -> str: # Add variable substitution info lines.append("\n**Variable Substitution:**") - lines.append("- `{user_space}` → 'default'") - lines.append("- `{agent_space}` → 'default'") + lines.append("- `{{ user_space }}` → 'default'") + lines.append("- `{{ agent_space }}` → 'default'") if mt.fields: for field in mt.fields: - lines.append(f"- `{field.name}` → use value from fields") + lines.append(f"- `{{ {field.name} }}` → use value from fields") if mt.fields: lines.append("\n**Fields:**") diff --git a/openviking/session/memory/session_extract_context_provider.py b/openviking/session/memory/session_extract_context_provider.py index 4fa7a1fd2..732a46ac5 100644 --- a/openviking/session/memory/session_extract_context_provider.py +++ b/openviking/session/memory/session_extract_context_provider.py @@ -33,6 +33,15 @@ def __init__(self, messages: Any, latest_archive_overview: str = ""): self._output_language = self._detect_language() self._registry = None # 延迟加载 self._schema_directories = None + self._extract_context = None # 缓存 ExtractContext 实例 + + def get_extract_context(self) -> "ExtractContext": + """获取或创建 ExtractContext 实例(缓存)""" + from openviking.session.memory.memory_updater import ExtractContext + + if self._extract_context is None and self.messages: + self._extract_context = ExtractContext(self.messages) + return self._extract_context def _detect_language(self) -> str: """检测输出语言""" @@ -215,9 +224,10 @@ async def prefetch( # Replace variables in directory path with actual user/agent space user_space = ctx.user.user_space_name() if ctx and ctx.user else "default" agent_space = ctx.user.agent_space_name() if ctx and ctx.user else "default" - dir_path = schema.directory.replace("{user_space}", user_space).replace( - "{agent_space}", agent_space - ) + import jinja2 + env = jinja2.Environment(autoescape=False) + template = env.from_string(schema.directory) + dir_path = template.render(user_space=user_space, agent_space=agent_space) # Always add .overview.md to read list overview_files.add(f"{dir_path}/.overview.md") @@ -227,10 +237,10 @@ async def prefetch( # 只新增,不需要查看之前的记忆列表,只需要读取 .overview.md continue - # Check if filename_template has variables (contains {xxx}) + # Check if filename_template has variables (contains {{ xxx }}) has_variables = False if schema.filename_template: - has_variables = "{" in schema.filename_template and "}" in schema.filename_template + has_variables = "{{" in schema.filename_template and "}}" in schema.filename_template if has_variables or not schema.filename_template: # Multi-file schema or no filename template: ls the directory diff --git a/openviking/session/memory/utils/uri.py b/openviking/session/memory/utils/uri.py index 081ddb007..667fb32c6 100644 --- a/openviking/session/memory/utils/uri.py +++ b/openviking/session/memory/utils/uri.py @@ -8,6 +8,8 @@ from dataclasses import dataclass from typing import Any, Dict, List, Optional, Set, Tuple +import jinja2 + from openviking.session.memory.dataclass import MemoryTypeSchema from openviking.session.memory.memory_type_registry import MemoryTypeRegistry from openviking_cli.utils import get_logger @@ -15,6 +17,48 @@ logger = get_logger(__name__) +def _render_jinja_template(template: str, context: Dict[str, Any]) -> str: + """Render a Jinja2 template with the given context.""" + env = jinja2.Environment( + autoescape=False, + keep_trailing_newline=True, + ) + jinja_template = env.from_string(template) + return jinja_template.render(**context) + + +def render_template( + template: str, + fields: Dict[str, Any], + extract_context: Any = None, +) -> str: + """ + Generic Jinja2 template rendering method. + + This is the same method used for rendering content_template in memory_updater.py. + Used for rendering filename_template, directory, etc. + + Args: + template: The template string with Jinja2 placeholders + fields: Dictionary of field values for substitution + extract_context: ExtractContext instance for template access to message ranges + + Returns: + Rendered template string + """ + # 创建 Jinja2 环境,允许未定义的变量(打印警告但不报错) + env = jinja2.Environment(autoescape=False, undefined=jinja2.DebugUndefined) + + # 创建模板变量 + template_vars = fields.copy() + # 始终传入 extract_context,即使是 None,避免模板中访问时 undefined + template_vars["extract_context"] = extract_context + + # 渲染模板 + jinja_template = env.from_string(template) + return jinja_template.render(**template_vars).strip() + + @dataclass class ResolvedOperation: """A resolved memory operation with URI and memory_type.""" @@ -29,6 +73,7 @@ def generate_uri( fields: Dict[str, Any], user_space: str = "default", agent_space: str = "default", + extract_context: Any = None, ) -> str: """ Generate a full URI from memory type schema and field values. @@ -36,8 +81,9 @@ def generate_uri( Args: memory_type: The memory type schema with directory and filename_template fields: The field values to use for template replacement - user_space: The user space to substitute for {user_space} - agent_space: The agent space to substitute for {agent_space} + user_space: The user space to substitute for {{ user_space }} + agent_space: The agent space to substitute for {{ agent_space }} + extract_context: ExtractContext instance for template rendering (same as content_template) Returns: The fully generated URI @@ -58,27 +104,16 @@ def generate_uri( if not uri_template: raise ValueError("Memory type has neither directory nor filename_template") - # Build the replacement dictionary - replacements = { + # Build the context for Jinja2 rendering + context = { "user_space": user_space, "agent_space": agent_space, } + # Add all fields to context + context.update(fields) - # Add all fields to replacements - replacements.update(fields) - - # Replace all template variables - def replace_var(match: re.Match) -> str: - var_name = match.group(1) - if var_name not in replacements: - raise ValueError(f"Missing template variable '{var_name}' in fields") - value = replacements[var_name] - if value is None: - raise ValueError(f"Template variable '{var_name}' has None value") - return str(value) - - # Replace {variable} patterns - uri = re.sub(r"\{([^}]+)\}", replace_var, uri_template) + # Render using unified render_template method (same as content_template) + uri = render_template(uri_template, context, extract_context) return uri @@ -99,9 +134,10 @@ def validate_uri_template(memory_type: MemoryTypeSchema) -> bool: # Check that all variables in filename_template exist in fields if memory_type.filename_template: field_names = {f.name for f in memory_type.fields} - template_vars = set(re.findall(r"\{([^}]+)\}", memory_type.filename_template)) + # Match Jinja2 {{ variable }} patterns + template_vars = set(re.findall(r"\{\{\s*(\w+)\s*\}\}", memory_type.filename_template)) - # {user_space} and {agent_space} are built-in, not from fields + # {{ user_space }} and {{ agent_space }} are built-in, not from fields built_in_vars = {"user_space", "agent_space"} required_field_vars = template_vars - built_in_vars @@ -116,14 +152,16 @@ def collect_allowed_directories( schemas: List[MemoryTypeSchema], user_space: str = "default", agent_space: str = "default", + extract_context: Any = None, ) -> Set[str]: """ Collect all allowed directories from activated schemas. Args: schemas: List of activated memory type schemas - user_space: User space to substitute for {user_space} - agent_space: Agent space to substitute for {agent_space} + user_space: User space to substitute for {{ user_space }} + agent_space: Agent space to substitute for {{ agent_space }} + extract_context: ExtractContext instance for template rendering Returns: Set of allowed directory paths with variables replaced @@ -131,9 +169,9 @@ def collect_allowed_directories( allowed_dirs = set() for schema in schemas: if schema.directory: - dir_path = schema.directory.replace("{user_space}", user_space).replace( - "{agent_space}", agent_space - ) + context = {"user_space": user_space, "agent_space": agent_space} + # Use unified render_template for consistent rendering + dir_path = render_template(schema.directory, context, extract_context) allowed_dirs.add(dir_path) return allowed_dirs @@ -142,18 +180,20 @@ def collect_allowed_path_patterns( schemas: List[MemoryTypeSchema], user_space: str = "default", agent_space: str = "default", + extract_context: Any = None, ) -> Set[str]: """ Collect all allowed full path patterns from activated schemas. Args: schemas: List of activated memory type schemas - user_space: User space to substitute for {user_space} - agent_space: Agent space to substitute for {agent_space} + user_space: User space to substitute for {{ user_space }} + agent_space: Agent space to substitute for {{ agent_space }} + extract_context: ExtractContext instance for template rendering Returns: - Set of allowed path patterns with {user_space} and {agent_space} replaced - (other variables like {topic}, {tool_name}, etc. remain as patterns) + Set of allowed path patterns with {{ user_space }} and {{ agent_space }} replaced + (other variables like {{ topic }}, {{ tool_name }}, etc. remain as patterns) """ allowed_patterns = set() for schema in schemas: @@ -166,24 +206,24 @@ def collect_allowed_path_patterns( pattern_parts.append(schema.filename_template) if pattern_parts: pattern = "/".join(pattern_parts) - pattern = pattern.replace("{user_space}", user_space).replace( - "{agent_space}", agent_space - ) + context = {"user_space": user_space, "agent_space": agent_space} + # Use unified render_template for consistent rendering + pattern = render_template(pattern, context, extract_context) allowed_patterns.add(pattern) return allowed_patterns def _pattern_matches_uri(pattern: str, uri: str) -> bool: """ - Check if a URI matches a pattern with variables like {topic}, {tool_name}, etc. + Check if a URI matches a pattern with variables like {{ topic }}, {{ tool_name }}, etc. The pattern matching is flexible: - - {variable} matches any sequence of characters except '/' + - {{ variable }} matches any sequence of characters except '/' - * matches any sequence of characters except '/' (shell-style) - ** matches any sequence of characters including '/' (shell-style) Args: - pattern: The pattern to match against (may contain {variables} or * wildcards) + pattern: The pattern to match against (may contain {{ variables }} or * wildcards) uri: The URI to check Returns: @@ -196,7 +236,9 @@ def _pattern_matches_uri(pattern: str, uri: str) -> bool: pattern = re.escape(pattern) # Unescape {, }, * that we need to handle specially pattern = pattern.replace(r"\{", "{").replace(r"\}", "}").replace(r"\*", "*") - # Convert {variable} to [^/]+ + # Convert {{ variable }} to [^/]+ + pattern = re.sub(r"\{\{\s*[^}]+\s*\}\}", r"[^/]+", pattern) + # Also support legacy {variable} format pattern = re.sub(r"\{[^}]+\}", r"[^/]+", pattern) # Convert ** to .* and * to [^/]* pattern = pattern.replace("**", ".*") @@ -246,14 +288,14 @@ def is_uri_allowed_for_schema( Args: uri: The URI to check schemas: List of activated memory type schemas - user_space: User space to substitute for {user_space} - agent_space: Agent space to substitute for {agent_space} + user_space: User space to substitute for {{ user_space }} + agent_space: Agent space to substitute for {{ agent_space }} Returns: True if the URI is allowed """ - allowed_dirs = collect_allowed_directories(schemas, user_space, agent_space) - allowed_patterns = collect_allowed_path_patterns(schemas, user_space, agent_space) + allowed_dirs = collect_allowed_directories(schemas, user_space, agent_space, extract_context) + allowed_patterns = collect_allowed_path_patterns(schemas, user_space, agent_space, extract_context) return is_uri_allowed(uri, allowed_dirs, allowed_patterns) @@ -289,6 +331,7 @@ def resolve_flat_model_uri( user_space: str = "default", agent_space: str = "default", memory_type: Optional[str] = None, + extract_context: Any = None, ) -> str: """ Resolve URI for a flat model (used for both write and edit operations). @@ -299,6 +342,7 @@ def resolve_flat_model_uri( user_space: User space for substitution agent_space: Agent space for substitution memory_type: Optional memory_type - if provided, use it instead of reading from model + extract_context: ExtractContext instance for template rendering (same as content_template) Returns: Resolved URI @@ -328,7 +372,7 @@ def resolve_flat_model_uri( # Extract URI fields and generate URI uri_fields = extract_uri_fields_from_flat_model(flat_model, schema) - return generate_uri(schema, uri_fields, user_space, agent_space) + return generate_uri(schema, uri_fields, user_space, agent_space, extract_context) def resolve_overview_edit_uri( @@ -368,10 +412,9 @@ def resolve_overview_edit_uri( if not schema.directory: raise ValueError(f"Memory type {memory_type_str} has no directory configured") - # Substitute user_space and agent_space in directory - directory = schema.directory.replace("{user_space}", user_space).replace( - "{agent_space}", agent_space - ) + # Render directory using Jinja2 + context = {"user_space": user_space, "agent_space": agent_space} + directory = _render_jinja_template(schema.directory, context) # Return the .overview.md URI return f"{directory}/.overview.md" @@ -398,6 +441,7 @@ def resolve_all_operations( registry: MemoryTypeRegistry, user_space: str = "default", agent_space: str = "default", + extract_context: Any = None, ) -> ResolvedOperations: """ Resolve URIs for all operations. @@ -409,6 +453,7 @@ def resolve_all_operations( registry: MemoryTypeRegistry to get schemas user_space: User space for substitution agent_space: Agent space for substitution + extract_context: ExtractContext instance for template rendering (same as content_template) Returns: ResolvedOperations with all URIs resolved @@ -435,7 +480,8 @@ def resolve_all_operations( item_dict = dict(item) if hasattr(item, "model_dump") else dict(item) try: uri = resolve_flat_model_uri( - item_dict, registry, user_space, agent_space, memory_type=field_name + item_dict, registry, user_space, agent_space, + memory_type=field_name, extract_context=extract_context ) if is_edit: resolved.edit_operations.append( @@ -454,7 +500,9 @@ def resolve_all_operations( for op in write_uris: try: - uri = resolve_flat_model_uri(op, registry, user_space, agent_space) + uri = resolve_flat_model_uri( + op, registry, user_space, agent_space, extract_context=extract_context + ) # Legacy format: try to get memory_type from model, otherwise empty memory_type = op.get("memory_type", "") if isinstance(op, dict) else "" resolved.write_operations.append( @@ -465,7 +513,9 @@ def resolve_all_operations( for op in edit_uris: try: - uri = resolve_flat_model_uri(op, registry, user_space, agent_space) + uri = resolve_flat_model_uri( + op, registry, user_space, agent_space, extract_context=extract_context + ) memory_type = op.get("memory_type", "") if isinstance(op, dict) else "" resolved.edit_operations.append( ResolvedOperation(model=op, uri=uri, memory_type=memory_type) @@ -500,6 +550,7 @@ def validate_operations_uris( registry: MemoryTypeRegistry, user_space: str = "default", agent_space: str = "default", + extract_context: Any = None, ) -> Tuple[bool, List[str]]: """ Validate that all URIs in StructuredMemoryOperations are allowed. @@ -508,19 +559,20 @@ def validate_operations_uris( operations: The StructuredMemoryOperations to validate schemas: List of activated memory type schemas registry: MemoryTypeRegistry for URI resolution - user_space: User space to substitute for {user_space} - agent_space: Agent space to substitute for {agent_space} + user_space: User space to substitute for {{ user_space }} + agent_space: Agent space to substitute for {{ agent_space }} + extract_context: ExtractContext instance for template rendering Returns: Tuple of (is_valid, list of error messages) """ - allowed_dirs = collect_allowed_directories(schemas, user_space, agent_space) - allowed_patterns = collect_allowed_path_patterns(schemas, user_space, agent_space) + allowed_dirs = collect_allowed_directories(schemas, user_space, agent_space, extract_context) + allowed_patterns = collect_allowed_path_patterns(schemas, user_space, agent_space, extract_context) errors = [] # First resolve all URIs - resolved = resolve_all_operations(operations, registry, user_space, agent_space) + resolved = resolve_all_operations(operations, registry, user_space, agent_space, extract_context) if resolved.has_errors(): errors.extend(resolved.errors) diff --git a/tests/models/vlm/test_volcengine_cache.py b/tests/models/vlm/test_volcengine_cache.py new file mode 100644 index 000000000..73defabcf --- /dev/null +++ b/tests/models/vlm/test_volcengine_cache.py @@ -0,0 +1,253 @@ +# Copyright (c) 2026 Beijing Volcano Engine Technology Co., Ltd. +# SPDX-License-Identifier: AGPL-3.0 +"""Tests for VolcEngineVLM cache logic.""" + +import pytest +from unittest.mock import AsyncMock, MagicMock, patch + +from openviking.models.vlm.backends.volcengine_vlm import VolcEngineVLM +from openviking.models.vlm.backends.volcengine_vlm import VolcEngineVLM as VLMClass + + +def make_message(role: str, content: str, cache_control: bool = False) -> dict: + """Helper to create a message dict.""" + msg = {"role": role, "content": content} + if cache_control: + msg["cache_control"] = {"type": "ephemeral"} + return msg + + +class TestGetOrCreateFromSegments: + """Tests for _get_or_create_from_segments method.""" + + def _create_vlm_with_mock_cache(self): + """Create a VLM instance with mocked dependencies.""" + vlm = VLMClass( + model="test-model", + api_key="test-key", + api_base="https://ark.cn-beijing.volces.com/api/v3", + ) + # Mock the cache + vlm._response_cache = MagicMock() + vlm.get_async_client = MagicMock() + return vlm + + def test_single_segment_with_cache_hit(self): + """Test: Single segment, cache exists.""" + vlm = self._create_vlm_with_mock_cache() + + # 只有一个 segment [msg0, msg1(cache_control)] + segments = [ + [make_message("system", "You are a helpful assistant"), make_message("user", "Hello", cache_control=True)] + ] + + # Mock cache hit + vlm._response_cache.get.return_value = "resp_123" + + result = VLMClass._get_or_create_from_segments(vlm, segments, 1) + + assert result == "resp_123" + vlm._response_cache.get.assert_called_once() + vlm.get_async_client.assert_not_called() + + @pytest.mark.asyncio + async def test_single_segment_cache_miss_create_new(self): + """Test: Single segment, cache miss, create new cache.""" + vlm = self._create_vlm_with_mock_cache() + + segments = [ + [make_message("system", "You are a helpful assistant"), make_message("user", "Hello", cache_control=True)] + ] + + # Mock cache miss + vlm._response_cache.get.return_value = None + + # Mock API response + mock_response = MagicMock() + mock_response.id = "resp_new_123" + mock_client = AsyncMock() + mock_client.responses.create = AsyncMock(return_value=mock_response) + vlm.get_async_client.return_value = mock_client + + result = await vlm._get_or_create_from_segments(segments, 1) + + assert result == "resp_new_123" + vlm._response_cache.get.assert_called_once() + vlm._response_cache.set.assert_called_once() + + @pytest.mark.asyncio + async def test_two_segments_both_cached(self): + """Test: Two segments, both have cache.""" + vlm = self._create_vlm_with_mock_cache() + + # segments = [[msg0, msg1(cc)], [msg2, msg3(cc)]] + segments = [ + [make_message("system", "You are a helpful assistant"), make_message("user", "Hello", cache_control=True)], + [make_message("user", "How are you?", cache_control=True)], + ] + + # Mock cache hits for both segments + def cache_get(key): + if "seg0" in key: + return "resp_seg0" + if "seg1" in key: + return "resp_seg1" + return None + + vlm._response_cache.get.side_effect = cache_get + + # Should use seg0 cache with previous_response_id to create seg1 + mock_response = MagicMock() + mock_response.id = "resp_combined" + mock_client = AsyncMock() + mock_client.responses.create = AsyncMock(return_value=mock_response) + vlm.get_async_client.return_value = mock_client + + result = await vlm._get_or_create_from_segments(segments, 2) + + # Should return combined response id + assert result == "resp_combined" + + @pytest.mark.asyncio + async def test_two_segments_first_not_cached(self): + """Test: Two segments, first not cached, second cached.""" + vlm = self._create_vlm_with_mock_cache() + + segments = [ + [make_message("system", "System"), make_message("user", "Hello", cache_control=True)], + [make_message("user", "How are you?", cache_control=True)], + ] + + # First segment not cached, second is cached + cache_returns = { + "prefix:seg0_system_hello": None, # First segment - not cached + "prefix:seg1_how_are_you": "resp_seg1", # Second segment - cached + } + + def cache_get(key): + return cache_returns.get(key) + + vlm._response_cache.get.side_effect = cache_get + + # Mock API: first call creates first segment cache, second call extends with previous_response_id + call_count = 0 + mock_responses = ["resp_seg0", "resp_combined"] + + async def mock_create(**kwargs): + nonlocal call_count + resp = MagicMock() + resp.id = mock_responses[call_count] + call_count += 1 + return resp + + mock_client = AsyncMock() + mock_client.responses.create = mock_create + vlm.get_async_client.return_value = mock_client + + result = await vlm._get_or_create_from_segments(segments, 2) + + # Should create first segment, then extend with second + assert result == "resp_combined" + + @pytest.mark.asyncio + async def test_two_segments_neither_cached(self): + """Test: Two segments, neither cached.""" + vlm = self._create_vlm_with_mock_cache() + + segments = [ + [make_message("system", "System"), make_message("user", "Hello", cache_control=True)], + [make_message("user", "How are you?", cache_control=True)], + ] + + # Neither segment cached + vlm._response_cache.get.return_value = None + + # Mock API responses + call_count = 0 + mock_responses = ["resp_seg0", "resp_combined"] + + async def mock_create(**kwargs): + nonlocal call_count + resp = MagicMock() + resp.id = mock_responses[call_count] + call_count += 1 + return resp + + mock_client = AsyncMock() + mock_client.responses.create = mock_create + vlm.get_async_client.return_value = mock_client + + result = await vlm._get_or_create_from_segments(segments, 2) + + # Should create both segments + assert result == "resp_combined" + + @pytest.mark.asyncio + async def test_three_segments_with_middle_cached(self): + """Test: Three segments, middle one cached, others not.""" + vlm = self._create_vlm_with_mock_cache() + + segments = [ + [make_message("system", "System"), make_message("user", "Hello", cache_control=True)], + [make_message("user", "How are you?", cache_control=True)], + [make_message("user", "Tell me a story", cache_control=True)], + ] + + # Only middle segment cached + cache_returns = { + "prefix:seg0_system_hello": None, + "prefix:seg1_how_are_you": "resp_seg1", # Cached + "prefix:seg2_tell_story": None, + } + + def cache_get(key): + return cache_returns.get(key) + + vlm._response_cache.get.side_effect = cache_get + + # Mock API: create seg0, extend to seg1, extend to seg2 + call_count = 0 + mock_responses = ["resp_seg0", "resp_01", "resp_012"] + + async def mock_create(**kwargs): + nonlocal call_count + resp = MagicMock() + resp.id = mock_responses[call_count] + call_count += 1 + return resp + + mock_client = AsyncMock() + mock_client.responses.create = mock_create + vlm.get_async_client.return_value = mock_client + + result = await vlm._get_or_create_from_segments(segments, 3) + + # Should chain: seg0 -> seg1 (cached) -> seg2 + assert result == "resp_012" + + def test_zero_segments(self): + """Test: end_idx = 0 returns None.""" + vlm = self._create_vlm_with_mock_cache() + + segments = [[make_message("system", "System")]] + + result = VLMClass._get_or_create_from_segments(vlm, segments, 0) + + assert result is None + + +class TestCacheKeyGeneration: + """Tests for cache key generation logic.""" + + def test_cache_key_includes_prefix(self): + """Test that cache keys include 'prefix:' prefix.""" + vlm = VLMClass( + model="test-model", + api_key="test-key", + ) + + messages = [make_message("system", "Hello")] + key = vlm._get_response_id_cache_key(messages) + + # Should include prefix in the key + assert "prefix:" in key or key.startswith("prefix:") \ No newline at end of file From 0b66e9f6e508b3e1ff277f0934463ffbd9e92a2e Mon Sep 17 00:00:00 2001 From: chenjunwen Date: Tue, 31 Mar 2026 14:26:18 +0800 Subject: [PATCH 03/20] feat(session): add created_at support for add_message - bot/ov_server.py: pass message timestamp to session.add_message() - client/session.py: pass created_at to underlying client - sync_client.py: add created_at parameter to add_message() When created_at is not provided, the server will use current time. Co-Authored-By: Claude Opus 4.6 --- bot/vikingbot/openviking_mount/ov_server.py | 4 +++- openviking/client/session.py | 6 ++++-- openviking/sync_client.py | 4 +++- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/bot/vikingbot/openviking_mount/ov_server.py b/bot/vikingbot/openviking_mount/ov_server.py index bf36fdeae..4ff2a3d6a 100644 --- a/bot/vikingbot/openviking_mount/ov_server.py +++ b/bot/vikingbot/openviking_mount/ov_server.py @@ -433,7 +433,9 @@ async def commit(self, session_id: str, messages: list[dict[str, Any]], user_id: if not parts: continue - await session.add_message(role=role, parts=parts) + # 获取消息的时间戳,如果没有则使用当前时间 + created_at = message.get("timestamp") + await session.add_message(role=role, parts=parts, created_at=created_at) result = await session.commit_async() if client is not self.client: diff --git a/openviking/client/session.py b/openviking/client/session.py index d569ab63a..a571c068d 100644 --- a/openviking/client/session.py +++ b/openviking/client/session.py @@ -40,6 +40,7 @@ async def add_message( role: str, content: Optional[str] = None, parts: Optional[List[Part]] = None, + created_at: Optional[str] = None, ) -> Dict[str, Any]: """Add a message to the session. @@ -47,6 +48,7 @@ async def add_message( role: Message role (e.g., "user", "assistant") content: Text content (simple mode) parts: Parts list (TextPart, ContextPart, ToolPart) + created_at: Message creation time (ISO format string). If not provided, current time is used. If both content and parts are provided, parts takes precedence. @@ -55,8 +57,8 @@ async def add_message( """ if parts is not None: parts_dicts = [asdict(p) for p in parts] - return await self._client.add_message(self.session_id, role, parts=parts_dicts) - return await self._client.add_message(self.session_id, role, content=content) + return await self._client.add_message(self.session_id, role, parts=parts_dicts, created_at=created_at) + return await self._client.add_message(self.session_id, role, content=content, created_at=created_at) async def commit(self, telemetry: TelemetryRequest = False) -> Dict[str, Any]: """Commit the session (archive messages and extract memories). diff --git a/openviking/sync_client.py b/openviking/sync_client.py index 5b7e89ea4..a73fb6907 100644 --- a/openviking/sync_client.py +++ b/openviking/sync_client.py @@ -71,6 +71,7 @@ def add_message( role: str, content: str | None = None, parts: list[dict] | None = None, + created_at: str | None = None, ) -> Dict[str, Any]: """Add a message to a session. @@ -79,10 +80,11 @@ def add_message( role: Message role ("user" or "assistant") content: Text content (simple mode) parts: Parts array (full Part support: TextPart, ContextPart, ToolPart) + created_at: Message creation time (ISO format string). If not provided, current time is used. If both content and parts are provided, parts takes precedence. """ - return run_async(self._async_client.add_message(session_id, role, content, parts)) + return run_async(self._async_client.add_message(session_id, role, content, parts, created_at)) def commit_session( self, session_id: str, telemetry: TelemetryRequest = False From 0a2b54d5983071053756bae669c5a0b3576b28dd Mon Sep 17 00:00:00 2001 From: chenjunwen Date: Wed, 1 Apr 2026 22:18:28 +0800 Subject: [PATCH 04/20] fix(telemetry): add span ended check and AsyncioInstrumentor for background tasks - Add end_time check in tracer.info/set/error to avoid "Tried calling _add_event on an ended span" - Add AsyncioInstrumentor to auto-create child spans for asyncio.create_task - Add @tracer decorator to _run_memory_extraction for background task tracing - Add opentelemetry-instrumentation-asyncio dependency Co-Authored-By: Claude Opus 4.6 --- openviking/session/session.py | 10 +- openviking/telemetry/tracer.py | 542 +++++++++++++++++++++++++++++++++ pyproject.toml | 5 + 3 files changed, 556 insertions(+), 1 deletion(-) create mode 100644 openviking/telemetry/tracer.py diff --git a/openviking/session/session.py b/openviking/session/session.py index 3845b9d04..81b69f460 100644 --- a/openviking/session/session.py +++ b/openviking/session/session.py @@ -15,7 +15,7 @@ from openviking.message import Message, Part from openviking.server.identity import RequestContext, Role -from openviking.telemetry import get_current_telemetry +from openviking.telemetry import get_current_telemetry, tracer from openviking.utils.time_utils import get_current_timestamp from openviking_cli.session.user_id import UserIdentifier from openviking_cli.utils import get_logger, run_async @@ -349,6 +349,7 @@ def commit(self) -> Dict[str, Any]: """Sync wrapper for commit_async().""" return run_async(self.commit_async()) + @tracer("session.commit") async def commit_async(self) -> Dict[str, Any]: """Async commit session: archive immediately, extract memories in background. @@ -363,6 +364,9 @@ async def commit_async(self) -> Dict[str, Any]: from openviking.storage.transaction import LockContext, get_lock_manager from openviking_cli.exceptions import FailedPreconditionError + trace_id = tracer.get_trace_id() + logger.info(f"[TRACER] session_commit started, trace_id={trace_id}") + # ===== Phase 1: Snapshot + clear (PathLock-protected) ===== # Fast pre-check: skip lock entirely if no messages (common case avoids # unnecessary filesystem lock acquisition). @@ -374,6 +378,7 @@ async def commit_async(self) -> Dict[str, Any]: "task_id": None, "archive_uri": None, "archived": False, + "trace_id": trace_id, } blocking_archive = await self._get_blocking_failed_archive_ref() @@ -397,6 +402,7 @@ async def commit_async(self) -> Dict[str, Any]: "task_id": None, "archive_uri": None, "archived": False, + "trace_id": trace_id, } self._compression.compression_index += 1 @@ -460,8 +466,10 @@ async def commit_async(self) -> Dict[str, Any]: "task_id": task.task_id, "archive_uri": archive_uri, "archived": True, + "trace_id": trace_id, } + @tracer("session_commit_phase2") async def _run_memory_extraction( self, task_id: str, diff --git a/openviking/telemetry/tracer.py b/openviking/telemetry/tracer.py new file mode 100644 index 000000000..79c751617 --- /dev/null +++ b/openviking/telemetry/tracer.py @@ -0,0 +1,542 @@ +# Copyright (c) 2026 Beijing Volcano Engine Technology Co., Ltd. +# SPDX-License-Identifier: AGPL-3.0 +"""OpenTelemetry tracer integration for OpenViking.""" + +import functools +import inspect +import json +import logging +import sys +from typing import Any, Callable, Optional + +from loguru import logger + +# Try to import opentelemetry - will be None if not installed +try: + from opentelemetry import trace as otel_trace + from opentelemetry.sdk.trace import TracerProvider, Status, StatusCode + from opentelemetry.sdk.trace.export import BatchSpanProcessor + from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter + from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator + from opentelemetry.context import Context + from opentelemetry.propagate import inject, extract + from opentelemetry.sdk.resources import Resource +except ImportError: + otel_trace = None + TracerProvider = None + Status = None + StatusCode = None + BatchSpanProcessor = None + OTLPSpanExporter = None + TraceContextTextMapPropagator = None + Context = None + extract = None + inject = None + Resource = None + + +# Global tracer instance +_otel_tracer: Any = None +_propagator: Any = None +_trace_id_filter_added: bool = False + + +class TraceIdLoggingFilter(logging.Filter): + """日志过滤器:注入 TraceID""" + + def filter(self, record): + record.trace_id = get_trace_id() + return True + + +def _setup_logging(): + """Setup logging with trace_id injection.""" + global _trace_id_filter_added + + if _trace_id_filter_added: + return + + try: + # Configure logger to patch records with trace_id + logger.configure(patcher=lambda record: record.__setitem__("extra", {**record["extra"], "trace_id": get_trace_id()})) + _trace_id_filter_added = True + except Exception: + pass + + # Also setup standard logging filter + try: + standard_logger = logging.getLogger() + for handler in standard_logger.handlers: + if not any(isinstance(f, TraceIdLoggingFilter) for f in handler.filters): + handler.addFilter(TraceIdLoggingFilter()) + except Exception: + pass + + +def init_tracer_from_config() -> Any: + """Initialize tracer from OpenViking config.""" + try: + from openviking_cli.utils.config import get_openviking_config + + config = get_openviking_config() + tracer_cfg = config.telemetry.tracer + + if not tracer_cfg.enabled: + logger.info("[TRACER] disabled in config") + return None + + if not tracer_cfg.endpoint: + logger.warning("[TRACER] endpoint not configured") + return None + + return init_tracer( + endpoint=tracer_cfg.endpoint, + service_name=tracer_cfg.service_name, + topic=tracer_cfg.topic, + ak=tracer_cfg.ak, + sk=tracer_cfg.sk, + enabled=tracer_cfg.enabled, + ) + except Exception as e: + logger.warning(f"[TRACER] init from config failed: {e}") + return None + + +def _init_asyncio_instrumentation() -> None: + """Initialize asyncio instrumentation to create child spans for create_task.""" + try: + from opentelemetry.instrumentation.asyncio import AsyncioInstrumentor + AsyncioInstrumentor().instrument() + logger.info("[TRACER] initialized AsyncioInstrumentor") + except ImportError: + logger.warning("[TRACER] opentelemetry-instrumentation-asyncio not installed") + except Exception as e: + logger.warning(f"[TRACER] failed to init AsyncioInstrumentor: {e}") + + +def init_tracer( + endpoint: str, + service_name: str, + topic: str, + ak: str, + sk: str, + enabled: bool = True, +) -> Any: + """Initialize the OpenTelemetry tracer. + + Args: + endpoint: OTLP endpoint URL + service_name: Service name for tracing + topic: Trace topic + ak: Access key + sk: Secret key + enabled: Whether to enable tracing + + Returns: + The initialized tracer, or None if initialization failed + """ + global _otel_tracer, _propagator + + if not enabled: + logger.info("[TRACER] disabled by config") + return None + + if otel_trace is None or TracerProvider is None or Resource is None: + logger.warning( + "OpenTelemetry not installed. Install with: uv pip install opentelemetry-api " + "opentelemetry-sdk opentelemetry-exporter-otlpprotogrpc" + ) + return None + + try: + headers = { + "x-tls-otel-tracetopic": topic, + "x-tls-otel-ak": ak, + "x-tls-otel-sk": sk, + "x-tls-otel-region": "cn-beijing", + } + + resource_attributes = { + "service.name": service_name, + } + resource = Resource.create(resource_attributes) + + trace_exporter = OTLPSpanExporter( + endpoint=endpoint, + headers=headers, + ) + + trace_provider = TracerProvider(resource=resource) + trace_provider.add_span_processor( + BatchSpanProcessor( + trace_exporter, + max_export_batch_size=100, + schedule_delay_millis=1000, + export_timeout_millis=60000, + ) + ) + otel_trace.set_tracer_provider(trace_provider) + + _otel_tracer = otel_trace.get_tracer(service_name) + _propagator = TraceContextTextMapPropagator() + + # Setup logging with trace_id + _setup_logging() + + # Initialize asyncio instrumentation to create child spans for create_task + _init_asyncio_instrumentation() + + logger.info(f"[TRACER] initialized with service_name={service_name}, endpoint={endpoint}") + return _otel_tracer + + except Exception as e: + logger.warning(f"[TRACER] initialized failed: {type(e).__name__}: {e}") + return None + + +def get_tracer() -> Any: + """Get the current tracer instance.""" + return _otel_tracer + + +def is_enabled() -> bool: + """Check if tracer is enabled.""" + return _otel_tracer is not None + + +def get_trace_id() -> str: + """Get the current trace ID as a hex string. + + Returns: + The trace ID in hex format, or empty string if no active span + """ + if _otel_tracer is None: + return "" + + try: + current_span = otel_trace.get_current_span() + if current_span is not None and hasattr(current_span, "context"): + trace_id = "{:032x}".format(current_span.context.trace_id) + return trace_id + except Exception: + pass + return "" + + +def to_trace_info() -> str: + """Inject current trace context into a JSON string. + + Returns: + JSON string with trace context, or empty JSON object if no active span + """ + if _otel_tracer is None: + return "{}" + + carrier = {} + inject(carrier) + return json.dumps(carrier) + + +def from_trace_info(trace_info: str) -> Optional[Any]: + """Extract trace context from a JSON string. + + Args: + trace_info: JSON string with trace context + + Returns: + The extracted context, or None if extraction failed + """ + if _otel_tracer is None or not trace_info: + return None + + try: + carrier = json.loads(trace_info) + context = extract(carrier) + return context + except Exception as e: + logger.debug(f"[TRACER] failed to extract trace context: {e}") + return None + + +def start_span( + name: str, + trace_id: Optional[str] = None, +) -> Any: + """Start a new span. + + Args: + name: Span name + trace_id: Optional trace ID to continue from + + Returns: + A context manager for the span + """ + return tracer.start_as_current_span(name=name, trace_id=trace_id) + + +def set_attribute(key: str, value: Any) -> None: + """Set an attribute on the current span.""" + tracer.set(key, value) + + +def add_event(name: str) -> None: + """Add an event to the current span.""" + tracer.info(name) + + +def record_exception(exception: Exception) -> None: + """Record an exception on the current span.""" + tracer.error(str(exception), e=exception, console=False) + + +class tracer: + """Decorator class for tracing functions. + + Usage: + @tracer("my_function") + async def my_function(): + ... + + @tracer("my_function", ignore_result=False) + def sync_function(): + ... + + @tracer("new_trace", is_new_trace=True) + def new_trace_function(): + ... + """ + + def __init__( + self, + name: Optional[str] = None, + ignore_result: bool = True, + ignore_args: bool = True, + is_new_trace: bool = False, + ): + """Initialize the tracer decorator. + + Args: + name: Custom name for the span (defaults to function name) + ignore_result: Whether to ignore the function result in the span + ignore_args: Whether to ignore function arguments, or list of arg names to include + is_new_trace: Whether to create a new trace (vs continue existing) + """ + # 忽略结果 + self.ignore_result = ignore_result + self.ignore_args = ignore_args + + # 需要忽略的参数 + if ignore_args is True: + self.arg_trace_checker = lambda name: False + elif ignore_args is False: + self.arg_trace_checker = lambda name: True + else: + self.arg_trace_checker = lambda name: name not in ignore_args + + self.name = name + self.is_new_trace = is_new_trace + + def __call__(self, func: Callable) -> Callable: + """Decorator to trace a function.""" + context = Context() if self.is_new_trace else None + + if inspect.iscoroutinefunction(func): + @functools.wraps(func) + async def async_wrapper(*args, **kwargs): + if _otel_tracer is None: + return await func(*args, **kwargs) + + span_name = self.name or f"{func.__module__}.{func.__name__}" + with self.start_as_current_span(name=span_name, context=context) as span: + try: + # 记录输入参数 + if not self.ignore_args and args: + self.info("func_args", str(args)) + func_kwargs = {k: v for k, v in kwargs.items() if self.arg_trace_checker(k)} + if len(func_kwargs) > 0: + self.info("func_kwargs", str(func_kwargs)) + + result = await func(*args, **kwargs) + + if result is not None and not self.ignore_result: + self.info(f"result: {result}") + + return result + except Exception as e: + span.record_exception(exception=e) + span.set_status(Status(StatusCode.ERROR)) + raise + return async_wrapper + else: + @functools.wraps(func) + def sync_wrapper(*args, **kwargs): + if _otel_tracer is None: + return func(*args, **kwargs) + + span_name = self.name or f"{func.__module__}.{func.__name__}" + with self.start_as_current_span(name=span_name, context=context) as span: + try: + # 记录输入参数 + if not self.ignore_args and args: + self.set("func_args", str(args)) + func_kwargs = {k: v for k, v in kwargs.items() if self.arg_trace_checker(k)} + if len(func_kwargs) > 0: + self.set("func_kwargs", str(func_kwargs)) + + result = func(*args, **kwargs) + + if result is not None and not self.ignore_result: + self.info(f"result: {result}") + + return result + except Exception as e: + span.record_exception(exception=e) + span.set_status(Status(StatusCode.ERROR)) + raise + return sync_wrapper + + @classmethod + def start_as_current_span(cls, name: str, context=None, trace_id=None): + """Start a new span as current context.""" + if _otel_tracer is None: + return _DummySpanContext() + + try: + if trace_id is not None: + carrier = {"traceparent": f"00-{trace_id}-{format(1, '016x')}-01"} + input_context = extract(carrier=carrier) + elif context is not None: + input_context = context + else: + input_context = None + + return _otel_tracer.start_as_current_span(name=name, context=input_context) + except Exception as e: + logger.debug(f"[TRACER] failed to start span: {e}") + return _DummySpanContext() + + @staticmethod + def get_trace_id() -> str: + """Get the current trace ID as a hex string.""" + if _otel_tracer is None: + return "" + + try: + current_span = otel_trace.get_current_span() + if current_span is not None and hasattr(current_span, "context"): + trace_id = "{:032x}".format(current_span.context.trace_id) + return trace_id + except Exception: + pass + return "" + + @staticmethod + def is_enabled() -> bool: + """Check if tracer is enabled.""" + return _otel_tracer is not None + + @staticmethod + def set(key: str, value: Any) -> None: + """Set an attribute on the current span.""" + if _otel_tracer is None: + return + + try: + current_span = otel_trace.get_current_span() + if current_span: + # 检查 span 是否已结束 + if hasattr(current_span, "end_time") and current_span.end_time: + return # span 已结束,不设置 attribute + current_span.set_attribute(key, str(value)) + except Exception: + pass + + @staticmethod + def info(line: str, console: bool = False) -> None: + """Add an event to the current span.""" + if _otel_tracer is None: + return + + try: + current_span = otel_trace.get_current_span() + if current_span: + # 检查 span 是否已结束 + if hasattr(current_span, "end_time") and current_span.end_time: + return # span 已结束,不添加 event + current_span.add_event(line) + except Exception: + pass + + @staticmethod + def info_span(line: str, console: bool = False) -> None: + """Create a new span with the given name.""" + if console: + logger.info(line) + if _otel_tracer is None: + return + with tracer.start_as_current_span(name=line) as span: + pass + + @staticmethod + def error(line: str, e: Optional[Exception] = None, console: bool = True) -> None: + """Record an error on the current span.""" + if _otel_tracer is None: + return + + try: + current_span = otel_trace.get_current_span() + if current_span: + # 检查 span 是否已结束 + if hasattr(current_span, "end_time") and current_span.end_time: + return # span 已结束,不记录 error + if e is not None: + current_span.set_status(Status(StatusCode.ERROR)) + current_span.record_exception(exception=e, attributes={"error": line}) + else: + current_span.set_status(Status(StatusCode.ERROR)) + current_span.add_event(line) + except Exception: + pass + + +class _DummySpanContext: + """Dummy context manager for when tracer is not enabled.""" + + def __enter__(self): + return self + + def __exit__(self, *args): + pass + + def __aenter__(self): + return self + + def __aexit__(self, *args): + pass + + def set_attribute(self, key: str, value: Any): + pass + + def add_event(self, name: str): + pass + + def record_exception(self, exception: Exception): + pass + + def set_status(self, status: Any): + pass + + +# Keep trace_func as alias for backwards compatibility +trace_func = tracer + + +def trace(name: str): + """Simple decorator to trace a function with a given name. + + Usage: + @tracer.trace("my_function") + async def my_function(): + ... + """ + return tracer(name=name) \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 72d759c4d..6174eaca0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -69,6 +69,11 @@ dependencies = [ "tree-sitter-go>=0.23.0", "tree-sitter-c-sharp>=0.23.0", "tree-sitter-php>=0.23.0", + # OpenTelemetry + "opentelemetry-api>=1.14", + "opentelemetry-sdk>=1.14", + "opentelemetry-exporter-otlp-proto-grpc>=1.14", + "opentelemetry-instrumentation-asyncio>=0.61b0", "loguru>=0.7.3", "cryptography>=42.0.0", "argon2-cffi>=23.0.0", From 821c8021b3e030ba87c17fc21291ff01bce879f3 Mon Sep 17 00:00:00 2001 From: chenjunwen Date: Wed, 1 Apr 2026 22:19:09 +0800 Subject: [PATCH 05/20] update --- bot/scripts/test_restart_openviking_server.sh | 19 +- openviking/models/vlm/backends/litellm_vlm.py | 5 + openviking/models/vlm/backends/openai_vlm.py | 8 +- .../models/vlm/backends/volcengine_vlm.py | 22 +- .../prompts/templates/memory/events.yaml | 12 + openviking/server/app.py | 5 + openviking/session/compressor_v2.py | 9 +- openviking/session/memory/extract_loop.py | 24 +- openviking/session/memory/memory_updater.py | 39 +- openviking/session/memory/tools.py | 8 +- openviking/session/memory/utils/messages.py | 5 +- openviking/telemetry/__init__.py | 4 + openviking/telemetry/langfuse.py | 376 ++++++++++++++++++ openviking_cli/utils/config/__init__.py | 3 + .../utils/config/open_viking_config.py | 5 + .../utils/config/telemetry_config.py | 26 ++ .../integration/test_compressor_v2_xiaomei.py | 2 + 17 files changed, 500 insertions(+), 72 deletions(-) create mode 100644 openviking/telemetry/langfuse.py create mode 100644 openviking_cli/utils/config/telemetry_config.py diff --git a/bot/scripts/test_restart_openviking_server.sh b/bot/scripts/test_restart_openviking_server.sh index ef8a86af3..547d62a6d 100755 --- a/bot/scripts/test_restart_openviking_server.sh +++ b/bot/scripts/test_restart_openviking_server.sh @@ -55,17 +55,9 @@ fi mkdir -p "$TEST_DATA_DIR" echo " ✓ Created clean $TEST_DATA_DIR" -# Step 1: Kill existing vikingbot processes +# Step 1: Clean up test data directory (skip vikingbot kill) echo "" -echo "Step 1: Stopping existing vikingbot processes..." -if pgrep -f "vikingbot.*openapi" > /dev/null 2>&1 || pgrep -f "vikingbot.*gateway" > /dev/null 2>&1; then - pkill -f "vikingbot.*openapi" 2>/dev/null || true - pkill -f "vikingbot.*gateway" 2>/dev/null || true - sleep 2 - echo " ✓ Stopped existing vikingbot processes" -else - echo " ✓ No existing vikingbot processes found" -fi +echo "Step 1: Skipping vikingbot kill (will only kill by port)..." # Step 2: Kill existing openviking-server on specific port echo "" @@ -73,8 +65,6 @@ echo "Step 2: Stopping openviking-server on port $PORT..." PID=$(lsof -ti :$PORT 2>/dev/null || true) if [ -n "$PID" ]; then echo " Found PID: $PID" - pkill -f "vikingbot.*openapi" 2>/dev/null || true - pkill -f "vikingbot.*gateway" 2>/dev/null || true kill $PID 2>/dev/null || true sleep 2 # Force kill if still running @@ -124,10 +114,7 @@ echo "" export OPENVIKING_CONFIG_FILE="$TEST_CONFIG" # Start server -openviking-server \ - --with-bot \ - --port "$PORT" \ - --bot-url "$BOT_URL" +openviking-server --port "$PORT" SERVER_PID=$! echo " Server PID: $SERVER_PID" diff --git a/openviking/models/vlm/backends/litellm_vlm.py b/openviking/models/vlm/backends/litellm_vlm.py index ca4a36aa7..ee7f750fd 100644 --- a/openviking/models/vlm/backends/litellm_vlm.py +++ b/openviking/models/vlm/backends/litellm_vlm.py @@ -17,6 +17,7 @@ import litellm from litellm import acompletion, completion +from openviking.telemetry import tracer from ..base import ToolCall, VLMBase, VLMResponse logger = logging.getLogger(__name__) @@ -300,6 +301,7 @@ def get_completion( self._update_token_usage_from_response(response, duration_seconds=elapsed) return self._build_vlm_response(response, has_tools=bool(tools)) + @tracer("vlm.call", ignore_result=False, ignore_args=["messages"]) async def get_completion_async( self, prompt: str = "", @@ -316,6 +318,9 @@ async def get_completion_async( else: kwargs_messages = [{"role": "user", "content": prompt}] + # 用 tracer.info 打印请求 + tracer.info(f"request: {json.dumps(kwargs_messages, ensure_ascii=False, indent=2)}") + kwargs = self._build_kwargs(model, kwargs_messages, tools, tool_choice, thinking=thinking) last_error = None diff --git a/openviking/models/vlm/backends/openai_vlm.py b/openviking/models/vlm/backends/openai_vlm.py index 05c28c768..b01c89d7b 100644 --- a/openviking/models/vlm/backends/openai_vlm.py +++ b/openviking/models/vlm/backends/openai_vlm.py @@ -11,6 +11,7 @@ from typing import Any, Dict, List, Optional, Union from urllib.parse import urlparse +from openviking.telemetry import tracer from ..base import ToolCall, VLMBase, VLMResponse from ..registry import DEFAULT_AZURE_API_VERSION @@ -124,6 +125,7 @@ def _update_token_usage_from_response( duration_seconds: float = 0.0, ): if hasattr(response, "usage") and response.usage: + tracer.info(f'response.usage={response.usage}') prompt_tokens = response.usage.prompt_tokens completion_tokens = response.usage.completion_tokens self.update_token_usage( @@ -153,7 +155,7 @@ def _build_vlm_response(self, response, has_tools: bool) -> Union[str, VLMRespon """Build response from OpenAI response. Returns str or VLMResponse based on has_tools.""" choice = response.choices[0] message = choice.message - + tracer.info(f'result={message.content}') if has_tools: usage = {} if hasattr(response, "usage") and response.usage: @@ -305,6 +307,7 @@ def get_completion( return self._clean_response(content) + @tracer("vlm.call", ignore_result=True, ignore_args=["messages"]) async def get_completion_async( self, prompt: str = "", @@ -335,6 +338,9 @@ async def get_completion_async( kwargs["tools"] = tools kwargs["tool_choice"] = tool_choice or "auto" + # 用 tracer.info 打印请求 + tracer.info(f"messages={json.dumps(kwargs_messages, ensure_ascii=False, indent=2)}") + last_error = None for attempt in range(max_retries + 1): try: diff --git a/openviking/models/vlm/backends/volcengine_vlm.py b/openviking/models/vlm/backends/volcengine_vlm.py index 06616def1..c5a5ea5d6 100644 --- a/openviking/models/vlm/backends/volcengine_vlm.py +++ b/openviking/models/vlm/backends/volcengine_vlm.py @@ -12,6 +12,7 @@ # Import run_async for sync-to-async calls from openviking_cli.utils import run_async +from openviking.telemetry import tracer from ..base import ToolCall, VLMResponse from .openai_vlm import OpenAIVLM @@ -91,8 +92,8 @@ def _parse_messages_with_breakpoints( static_segment = messages[: first_breakpoint_idx + 1] dynamic_messages = messages[first_breakpoint_idx + 1 :] static_segments = [static_segment] - print(f"static_segment={len(static_segment)}") - print(f"dynamic_messages={len(dynamic_messages)}") + tracer.info(f"static_segment={len(static_segment)}") + tracer.info(f"dynamic_messages={len(dynamic_messages)}") else: # 没有 cache_control 或在第一个位置,全部作为 dynamic static_segments = [] @@ -274,22 +275,11 @@ def _build_vlm_response(self, response, has_tools: bool) -> Union[str, VLMRespon - response.id: response ID - response.usage: token usage """ - # Debug: print response structure - # logger.debug(f"[VolcEngineVLM] Response type: {type(response)}") - # logger.info(f"[VolcEngineVLM] Full response: {response}") - if hasattr(response, "output"): - # logger.debug(f"[VolcEngineVLM] Output items: {len(response.output)}") - for i, item in enumerate(response.output): - # logger.debug(f"[VolcEngineVLM] Item {i}: type={getattr(item, 'type', 'unknown')}") - # Print full item for debugging - # logger.info(f"[VolcEngineVLM] Item {i} full: {item}") - pass - # Extract content from Responses API format content = "" tool_calls = [] finish_reason = "stop" - + tracer.info(f'response.output={response.output}') if hasattr(response, "output") and response.output: for item in response.output: item_type = getattr(item, "type", None) @@ -535,6 +525,7 @@ def _convert_tools(self, tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]: return converted + @tracer("vlm.call") async def get_completion_async( self, prompt: str = "", @@ -558,6 +549,9 @@ async def get_completion_async( # Each segment ends with cache_control, dynamic is the rest static_segments, dynamic_messages = self._parse_messages_with_breakpoints(kwargs_messages) + # 用 tracer.info 打印请求 + tracer.info(f"request: {json.dumps(kwargs_messages, ensure_ascii=False, indent=2)}") + # If we have static segments, try prefix cache response_format = None # Can be extended for structured output diff --git a/openviking/prompts/templates/memory/events.yaml b/openviking/prompts/templates/memory/events.yaml index 0c00a8f56..aef0d5199 100644 --- a/openviking/prompts/templates/memory/events.yaml +++ b/openviking/prompts/templates/memory/events.yaml @@ -25,6 +25,18 @@ fields: Event name in Chinese or English. If English, use lowercase with underscores, max 3 words. Do not include any dates. merge_op: immutable + - name: event_type + type: string + description: | + Abstract event type (more general than event_name). + Examples: speech, workshop, conference, ceremony, meeting, class, volunteer_activity + + - name: target_audience + type: string + description: | + Who is the target audience for this event. + Examples: children, students, developers, adults, professionals, parents + - name: goal type: string description: | diff --git a/openviking/server/app.py b/openviking/server/app.py index c70e8564e..fed0b128b 100644 --- a/openviking/server/app.py +++ b/openviking/server/app.py @@ -120,6 +120,11 @@ async def lifespan(app: FastAPI): task_tracker = get_task_tracker() task_tracker.start_cleanup_loop() + # Initialize tracer + from openviking.telemetry import tracer_module + + tracer_module.init_tracer_from_config() + yield # Cleanup diff --git a/openviking/session/compressor_v2.py b/openviking/session/compressor_v2.py index 2717f84b6..6b49d52d7 100644 --- a/openviking/session/compressor_v2.py +++ b/openviking/session/compressor_v2.py @@ -18,6 +18,7 @@ from openviking.telemetry import get_current_telemetry from openviking_cli.session.user_id import UserIdentifier from openviking_cli.utils import get_logger +from openviking.telemetry import tracer from openviking_cli.utils.config import get_openviking_config logger = get_logger(__name__) @@ -98,7 +99,7 @@ async def extract_long_term_memories( logger.warning("No RequestContext provided, skipping memory extraction") return [] - logger.info("Starting v2 memory extraction from conversation") + tracer.info("Starting v2 memory extraction from conversation") # Initialize telemetry to 0 (matching v1 pattern) telemetry = get_current_telemetry() @@ -168,7 +169,7 @@ async def extract_long_term_memories( operations, tools_used = await orchestrator.run() if operations is None: - logger.info("No memory operations generated") + tracer.info("No memory operations generated") return [] # Convert to legacy format for logging and apply_operations @@ -185,7 +186,7 @@ async def extract_long_term_memories( registry = orchestrator.context_provider._get_registry() updater = self._get_or_create_updater(registry, transaction_handle) - logger.info( + tracer.info( f"Generated memory operations: write={len(write_uris)}, " f"edit={len(edit_uris)}, edit_overview={len(operations.edit_overview_uris)}, " f"delete={len(operations.delete_uris)}" @@ -201,7 +202,7 @@ async def extract_long_term_memories( operations, ctx, registry=registry, extract_context=extract_context ) - logger.info( + tracer.info( f"Applied memory operations: written={len(result.written_uris)}, " f"edited={len(result.edited_uris)}, deleted={len(result.deleted_uris)}, " f"errors={len(result.errors)}" diff --git a/openviking/session/memory/extract_loop.py b/openviking/session/memory/extract_loop.py index c28a9bf82..e4635081f 100644 --- a/openviking/session/memory/extract_loop.py +++ b/openviking/session/memory/extract_loop.py @@ -29,6 +29,7 @@ validate_operations_uris, ) from openviking.storage.viking_fs import VikingFS, get_viking_fs +from openviking.telemetry import tracer from openviking_cli.utils import get_logger logger = get_logger(__name__) @@ -170,7 +171,7 @@ async def run(self) -> Tuple[Optional[MemoryOperations], List[Dict[str, Any]]]: while iteration < max_iterations: iteration += 1 - logger.info(f"ReAct iteration {iteration}/{max_iterations}") + tracer.info(f"ReAct iteration {iteration}/{max_iterations}") # Check if this is the last iteration - force final result is_last_iteration = iteration >= max_iterations @@ -197,13 +198,13 @@ async def run(self) -> Tuple[Optional[MemoryOperations], List[Dict[str, Any]]]: # Check if any write_uris target existing files that weren't read refetch_uris = await self._check_unread_existing_files(operations) if refetch_uris: - logger.info(f"Found unread existing files: {refetch_uris}, refetching...") + tracer.info(f"Found unread existing files: {refetch_uris}, refetching...") # Add refetch results to messages and continue loop await self._add_refetch_results_to_messages(messages, refetch_uris) # Allow one extra iteration for refetch if iteration >= max_iterations: max_iterations += 1 - logger.info(f"Extended max_iterations to {max_iterations} for refetch") + tracer.info(f"Extended max_iterations to {max_iterations} for refetch") continue @@ -226,10 +227,11 @@ async def run(self) -> Tuple[Optional[MemoryOperations], List[Dict[str, Any]]]: else: raise RuntimeError("ReAct loop completed but no operations generated") - logger.info(f"final_operations={final_operations.model_dump_json(indent=4)}") + tracer.info(f"final_operations={final_operations.model_dump_json(indent=4)}") return final_operations, tools_used + @tracer("extract_loop.execute_tool_calls") async def _execute_tool_calls(self, messages, tool_calls, tools_used): # Execute all tool calls in parallel async def execute_single_tool_call(idx: int, tool_call): @@ -339,11 +341,11 @@ async def _call_llm( ) if prompt_tokens > 0: cache_hit_rate = (cached_tokens / prompt_tokens) * 100 - logger.info( + tracer.info( f"[KVCache] prompt_tokens={prompt_tokens}, cached_tokens={cached_tokens}, cache_hit_rate={cache_hit_rate:.1f}%" ) else: - logger.info( + tracer.info( f"[KVCache] prompt_tokens={prompt_tokens}, cached_tokens={cached_tokens}" ) @@ -351,8 +353,8 @@ async def _call_llm( if response.has_tool_calls: # Format tool calls nicely for debug logging for tc in response.tool_calls: - logger.info(f"[assistant tool_call] (id={tc.id}, name={tc.name})") - logger.info(f" {json.dumps(tc.arguments, indent=2, ensure_ascii=False)}") + tracer.info(f"[assistant tool_call] (id={tc.id}, name={tc.name})") + tracer.info(f" {json.dumps(tc.arguments, indent=2, ensure_ascii=False)}") return (response.tool_calls, None) # Case 2: Try to parse MemoryOperations from content with stability @@ -384,6 +386,7 @@ async def _call_llm( print("No tool calls or operations parsed") return (None, None) + @tracer("extract_loop.execute_tool", ignore_result=False) async def _execute_tool( self, tool_call, @@ -402,7 +405,9 @@ async def _execute_tool( tool_ctx = ToolContext(request_ctx=self.ctx, transaction_handle=self._transaction_handle) try: + tracer.info(f'tool_call.arguments={tool_call.arguments}') result = await tool.execute(self.viking_fs, tool_ctx, **tool_call.arguments) + return result except Exception as e: logger.error(f"Failed to execute {tool_call.name}: {e}") @@ -439,7 +444,8 @@ async def _check_unread_existing_files( item_dict = dict(item) if hasattr(item, "model_dump") else dict(item) try: uri = resolve_flat_model_uri( - item_dict, registry, "default", "default", memory_type=field_name + item_dict, registry, "default", "default", + memory_type=field_name, extract_context=self._extract_context ) except Exception as e: logger.warning(f"Failed to resolve URI for {item}: {e}") diff --git a/openviking/session/memory/memory_updater.py b/openviking/session/memory/memory_updater.py index 6ecc17ce6..e2b8924b8 100644 --- a/openviking/session/memory/memory_updater.py +++ b/openviking/session/memory/memory_updater.py @@ -23,6 +23,7 @@ serialize_with_metadata, ) from openviking.storage.viking_fs import get_viking_fs +from openviking.telemetry import tracer from openviking_cli.exceptions import NotFoundError from openviking_cli.utils import get_logger @@ -257,11 +258,11 @@ async def apply_operations( ) result.add_written(resolved_op.uri) except Exception as e: - logger.info( + tracer.info( f"Failed to write memory: {e}, op={resolved_op.model}, op type={type(resolved_op.model)}" ) if hasattr(resolved_op.model, "model_dump"): - logger.info(f"Op dump: {resolved_op.model.model_dump()}") + tracer.info(f"Op dump: {resolved_op.model.model_dump()}") result.add_error(resolved_op.uri, e) # Apply edit operations @@ -270,7 +271,7 @@ async def apply_operations( await self._apply_edit(resolved_op.model, resolved_op.uri, ctx) result.add_edited(resolved_op.uri) except Exception as e: - logger.error(f"Failed to edit memory {resolved_op.uri}: {e}") + tracer.error(f"Failed to edit memory {resolved_op.uri}",e) result.add_error(resolved_op.uri, e) # Apply edit_overview operations @@ -279,7 +280,7 @@ async def apply_operations( await self._apply_edit_overview(op, uri, ctx) result.add_edited(uri) except Exception as e: - logger.error(f"Failed to edit overview {uri}: {e}") + tracer.error(f"Failed to edit overview {uri}",e) result.add_error(uri, e) # Apply delete operations @@ -288,13 +289,13 @@ async def apply_operations( await self._apply_delete(uri, ctx) result.add_deleted(uri) except Exception as e: - logger.error(f"Failed to delete memory {uri}: {e}") + tracer.error(f"Failed to delete memory {uri}",e) result.add_error(uri, e) # Vectorize written and edited memories await self._vectorize_memories(result, ctx) - logger.info(f"Memory operations applied: {result.summary()}") + tracer.info(f"Memory operations applied: {result.summary()}") return result async def _apply_write( @@ -340,7 +341,7 @@ async def _apply_write( if rendered_content: content = rendered_content except Exception as e: - logger.warning( + tracer.warning( f"Failed to render content template for memory type {memory_type_str}: {e}" ) # 渲染失败时保留原始 content,确保写入操作继续进行 @@ -354,7 +355,7 @@ async def _apply_write( # Write content to VikingFS # VikingFS automatically handles L0/L1/L2 and vector index updates await viking_fs.write_file(uri, full_content, ctx=ctx) - logger.debug(f"Written memory: {uri}") + def _render_content_template( self, template: str, fields: Dict[str, Any], extract_context: Any = None @@ -390,7 +391,7 @@ def _render_content_template( jinja_template = env.from_string(template) return jinja_template.render(**template_vars).strip() except Exception as e: - logger.error(f"Template rendering failed: {e}") + tracer.error(f"Template rendering failed: {e}") raise def _is_patch_format(self, content: Any) -> bool: @@ -414,11 +415,10 @@ async def _apply_edit(self, flat_model: Any, uri: str, ctx: RequestContext) -> N # If no StrPatch fields, treat as write operation has_str_patch = any(self._is_patch_format(v) for v in model_dict.values()) if not has_str_patch: - logger.debug(f"Memory not found for edit, treating as write: {uri}") await self._apply_write(flat_model, uri, ctx) return # Has StrPatch field but file doesn't exist - cannot apply - logger.warning(f"Memory not found for edit: {uri}") + tracer.error(f"Memory not found for edit: {uri}") return # Deserialize content and metadata @@ -474,7 +474,6 @@ async def _apply_edit(self, flat_model: Any, uri: str, ctx: RequestContext) -> N self._print_diff(uri, current_plain_content, new_plain_content) await viking_fs.write_file(uri, new_full_content, ctx=ctx) - logger.debug(f"Edited memory: {uri}") async def _apply_delete(self, uri: str, ctx: RequestContext) -> None: """Apply delete operation (uri is already a string).""" @@ -484,9 +483,8 @@ async def _apply_delete(self, uri: str, ctx: RequestContext) -> None: # VikingFS automatically handles vector index cleanup try: await viking_fs.rm(uri, recursive=False, ctx=ctx) - logger.debug(f"Deleted memory: {uri}") except NotFoundError: - logger.warning(f"Memory not found for delete: {uri}") + tracer.error(f"Memory not found for delete: {uri}") # Idempotent - deleting non-existent file succeeds async def _apply_edit_overview( @@ -516,13 +514,12 @@ async def _apply_edit_overview( current_overview = await viking_fs.read_file(uri, ctx=ctx) or "" except NotFoundError: # File doesn't exist yet, start with empty content - logger.debug(f"Overview file does not exist yet: {uri}") + pass # Apply patch or replace based on overview_value type new_overview = current_overview if overview_value is None: # No overview provided, nothing to do - logger.debug("No overview value provided, skipping edit") return elif isinstance(overview_value, str): # 空字符串保持原值 @@ -553,7 +550,6 @@ async def _apply_edit_overview( # Write new overview await viking_fs.write_file(uri, new_overview, ctx=ctx) - logger.debug(f"Edited overview: {uri}") # Extract and write .abstract.md await self._write_abstract_from_overview(uri, new_overview, ctx) @@ -600,9 +596,8 @@ async def _write_abstract_from_overview( try: await viking_fs.write_file(abstract_uri, abstract, ctx=ctx) - logger.debug(f"Wrote abstract: {abstract_uri}") except Exception as e: - logger.warning(f"Failed to write abstract {abstract_uri}: {e}") + tracer.error(f"Failed to write abstract {abstract_uri}: {e}") def _print_diff(self, uri: str, old_content: str, new_content: str) -> None: """Print a diff of the memory edit using diff_match_patch.""" @@ -637,12 +632,12 @@ def _print_diff(self, uri: str, old_content: str, new_content: str) -> None: lines.append(f"{'=' * 60}\n") # Print directly - print("\n".join(lines)) + tracer.info(f'diff={"\n".join(lines)}') except ImportError: # Fallback: just show file name - logger.debug(f"diff_match_patch not available, skipping diff for {uri}") + tracer.error(f"diff_match_patch not available, skipping diff for {uri}") except Exception as e: - logger.debug(f"Failed to print diff for {uri}: {e}") + tracer.error(f"Failed to print diff for {uri}: {e}") async def _vectorize_memories( self, diff --git a/openviking/session/memory/tools.py b/openviking/session/memory/tools.py index 6818f0e12..d61278976 100644 --- a/openviking/session/memory/tools.py +++ b/openviking/session/memory/tools.py @@ -11,6 +11,7 @@ from openviking.session.memory.utils import parse_memory_file_with_fields from openviking.storage.viking_fs import VikingFS +from openviking.telemetry import tracer from openviking_cli.utils import get_logger logger = get_logger(__name__) @@ -182,7 +183,7 @@ async def execute( parsed = parse_memory_file_with_fields(content) return parsed except Exception as e: - logger.error(f"Failed to execute read: {e}") + tracer.info(f"Failed to execute read:e") return {"error": str(e)} @@ -237,7 +238,7 @@ async def execute( ) return optimize_search_result(search_result.to_dict(), limit=limit) except Exception as e: - logger.error(f"Failed to execute search: {e}") + tracer.error(f"Failed to execute search: {e}") return {"error": str(e)} @@ -306,7 +307,7 @@ async def execute( return "Directory is empty. You can write new files to create memory content." return "\n".join(result_lines) except Exception as e: - logger.error(f"Failed to execute ls: {e}") + tracer.info(f"Failed to execute ls: {e}") return {"error": str(e)} @@ -317,7 +318,6 @@ async def execute( def register_tool(tool: MemoryTool) -> None: """Register a memory tool.""" MEMORY_TOOLS_REGISTRY[tool.name] = tool - logger.debug(f"Registered memory tool: {tool.name}") def get_tool(name: str) -> Optional[MemoryTool]: diff --git a/openviking/session/memory/utils/messages.py b/openviking/session/memory/utils/messages.py index 289c944a9..0fbc56336 100644 --- a/openviking/session/memory/utils/messages.py +++ b/openviking/session/memory/utils/messages.py @@ -11,6 +11,7 @@ import json_repair from openviking.session.memory.utils import truncate_content +from openviking.telemetry import tracer from openviking_cli.utils import get_logger logger = get_logger(__name__) @@ -73,7 +74,7 @@ def pretty_print_messages(messages: List[Dict[str, Any]]) -> None: output.append(json.dumps(tool_calls, indent=2, ensure_ascii=False)) output.append("\n=== End Messages ===") - logger.info("\n".join(output)) + tracer.info(f'messages={"\n".join(output)}') def parse_memory_file_with_fields(content: str) -> Dict[str, Any]: @@ -111,7 +112,7 @@ def parse_memory_file_with_fields(content: str) -> Dict[str, Any]: if isinstance(fields, dict): result.update(fields) except Exception as e: - logger.warning(f"Failed to parse MEMORY_FIELDS JSON: {e}") + tracer.warning(f"Failed to parse MEMORY_FIELDS JSON: {e}") # Remove the comment from content content_without_comment = re.sub(pattern, "", content).strip() diff --git a/openviking/telemetry/__init__.py b/openviking/telemetry/__init__.py index fb0625f44..c83e1138b 100644 --- a/openviking/telemetry/__init__.py +++ b/openviking/telemetry/__init__.py @@ -7,6 +7,8 @@ from .registry import register_telemetry, resolve_telemetry, unregister_telemetry from .request import TelemetryRequest, TelemetrySelection, normalize_telemetry_request from .runtime import get_telemetry_runtime, set_telemetry_runtime +from . import tracer as tracer_module +from .tracer import tracer __all__ = [ "OperationTelemetry", @@ -20,5 +22,7 @@ "register_telemetry", "resolve_telemetry", "set_telemetry_runtime", + "tracer", + "tracer_module", "unregister_telemetry", ] diff --git a/openviking/telemetry/langfuse.py b/openviking/telemetry/langfuse.py new file mode 100644 index 000000000..8cd641b41 --- /dev/null +++ b/openviking/telemetry/langfuse.py @@ -0,0 +1,376 @@ +"""Langfuse integration for LLM observability.""" + +from contextlib import contextmanager +from typing import Any, Generator + +from loguru import logger + +# Try to import langfuse - will be None if not installed +Langfuse = None +propagate_attributes = None + +try: + from langfuse import Langfuse + from langfuse import propagate_attributes as _propagate_attributes + + propagate_attributes = _propagate_attributes +except ImportError: + pass + + +class LangfuseClient: + """Wrapper for Langfuse client with optional support.""" + + _instance: "LangfuseClient | None" = None + + def __init__( + self, + enabled: bool = False, + secret_key: str = "", + public_key: str = "", + base_url: str = "https://cloud.langfuse.com", + ): + self._client = None + self.enabled = enabled + + if not self.enabled: + return + + if Langfuse is None: + logger.warning( + "Langfuse not installed. Install with: uv pip install langfuse" + ) + self.enabled = False + return + + if not secret_key: + logger.warning( + "Langfuse enabled but no secret_key provided. Configure in ov.conf under langfuse" + ) + self.enabled = False + return + + try: + self._client = Langfuse( + secret_key=secret_key, + public_key=public_key, + host=base_url, + ) + self._client.auth_check() + except Exception as e: + logger.warning(f"Langfuse initialized failed: {type(e).__name__}: {e}") + self.enabled = False + self._client = None + + @classmethod + def get_instance(cls) -> "LangfuseClient": + """Get the singleton instance.""" + if cls._instance is None: + logger.warning("[LANGFUSE] disabled") + cls._instance = LangfuseClient(enabled=False) + return cls._instance + + @classmethod + def set_instance(cls, instance: "LangfuseClient") -> None: + """Set the singleton instance.""" + cls._instance = instance + + def flush(self) -> None: + """Flush pending events to Langfuse.""" + if self.enabled and self._client: + self._client.flush() + + @contextmanager + def propagate_attributes( + self, + session_id: str | None = None, + user_id: str | None = None, + ) -> Generator[None, None, None]: + """ + Propagate attributes (session_id, user_id) to all nested observations. + + Args: + session_id: Optional session ID to associate with all nested observations + user_id: Optional user ID to associate with all nested observations + """ + if not self.enabled: + logger.warning("[LANGFUSE] propagate_attributes skipped: Langfuse client not enabled") + yield + return + if not self._client: + logger.warning( + "[LANGFUSE] propagate_attributes skipped: Langfuse client not initialized" + ) + yield + return + + propagate_kwargs = {} + if session_id: + propagate_kwargs["session_id"] = session_id + if user_id: + propagate_kwargs["user_id"] = user_id + + if not propagate_kwargs: + yield + return + + # Use module-level propagate_attributes from langfuse SDK v3 + # Store in a local variable to avoid shadowing issues with the method name + global propagate_attributes + _propagate = propagate_attributes + + if _propagate is None: + logger.warning( + "[LANGFUSE] propagate_attributes not available (SDK version may not support it)" + ) + yield + return + + # Only catch exceptions when ENTERING the context manager + # Don't wrap the yield - let exceptions from the inner block propagate normally + logger.info(f"[LANGFUSE] Propagating attributes: {list(propagate_kwargs.keys())}") + try: + cm = _propagate(**propagate_kwargs) + cm.__enter__() + except Exception as e: + logger.debug(f"[LANGFUSE] Failed to enter propagate_attributes: {e}") + yield + return + + try: + yield + finally: + # Always exit the context manager + try: + cm.__exit__(None, None, None) + except Exception as e: + logger.debug(f"[LANGFUSE] Failed to exit propagate_attributes: {e}") + + @contextmanager + def trace( + self, + name: str, + session_id: str | None = None, + user_id: str | None = None, + metadata: dict[str, Any] | None = None, + ) -> Generator[Any, None, None]: + """ + Create a trace context manager. + In v3 SDK, trace is implicitly created by first span/generation. + """ + if not self.enabled or not self._client: + yield None + return + + try: + # In v3, we use start_as_current_span to create the root span + with self._client.start_as_current_span( + name=name, + session_id=session_id, + user_id=user_id, + metadata=metadata or {}, + ) as span: + yield span + except Exception as e: + logger.debug(f"Langfuse trace error: {e}") + yield None + + @contextmanager + def span( + self, + name: str, + trace_id: str | None = None, + parent_observation_id: str | None = None, + metadata: dict[str, Any] | None = None, + ) -> Generator[Any, None, None]: + """Create a span context manager.""" + if not self.enabled or not self._client: + yield None + return + + try: + with self._client.start_as_current_span( + name=name, + metadata=metadata or {}, + ) as span: + yield span + except Exception as e: + logger.debug(f"Langfuse span error: {e}") + yield None + + @contextmanager + def generation( + self, + name: str, + model: str, + trace_id: str | None = None, + parent_observation_id: str | None = None, + prompt: list[dict[str, Any]] | None = None, + metadata: dict[str, Any] | None = None, + ) -> Generator[Any, None, None]: + """ + Create a generation context manager for LLM calls. + + Args: + name: Name of the generation + model: Model name + trace_id: Optional trace ID (not used in v3) + parent_observation_id: Optional parent observation ID (not used in v3) + prompt: Optional prompt messages + metadata: Optional metadata + """ + if not self.enabled or not self._client: + yield None + return + + observation = None + try: + # Use start_observation for the current SDK version + if hasattr(self._client, "start_as_current_observation"): + with self._client.start_as_current_observation( + name=name, + as_type="generation", + model=model, + input=prompt, + metadata=metadata or {}, + ) as obs: + yield obs + elif hasattr(self._client, "start_observation"): + observation = self._client.start_observation( + name=name, + as_type="generation", + model=model, + input=prompt, + metadata=metadata or {}, + ) + yield observation + else: + logger.debug("[LANGFUSE] No supported observation method found on client") + yield None + except Exception as e: + logger.debug(f"Langfuse generation error: {e}") + yield None + finally: + # If we used start_observation, we need to end it manually + if observation and hasattr(observation, "end"): + try: + observation.end() + except Exception as e: + logger.debug(f"Langfuse observation.end() error: {e}") + + def update_generation( + self, + generation: Any, + output: str | None = None, + usage: dict[str, int] | None = None, + usage_details: dict[str, int] | None = None, + metadata: dict[str, Any] | None = None, + ) -> None: + """Update a generation with output and usage.""" + if not self.enabled or not generation: + return + + try: + update_kwargs: dict[str, Any] = {} + if output is not None: + update_kwargs["output"] = output + if usage_details: + update_kwargs["usage_details"] = usage_details + elif usage: + # Support both usage and usage_details formats + usage_details = { + "input": usage.get("prompt_tokens", 0), + "output": usage.get("completion_tokens", 0), + } + # Pass through total_tokens if available + if "total_tokens" in usage: + usage_details["total"] = usage["total_tokens"] + update_kwargs["usage_details"] = usage_details + if metadata: + if hasattr(generation, "metadata") and generation.metadata: + update_kwargs["metadata"] = {**generation.metadata, **metadata} + else: + update_kwargs["metadata"] = metadata + + # In v3, update via the generation object's update method + if hasattr(generation, "update"): + generation.update(**update_kwargs) + # Or use client's update_current_generation + elif self._client and hasattr(self._client, "update_current_generation"): + self._client.update_current_generation(**update_kwargs) + + except Exception as e: + logger.debug(f"Langfuse update generation error: {e}") + + @contextmanager + def tool_call( + self, + name: str, + input: dict[str, Any] | None = None, + session_id: str | None = None, + metadata: dict[str, Any] | None = None, + ) -> Generator[Any, None, None]: + """ + Create a span for tool/function call execution. + + Args: + name: Name of the tool/function + input: Input arguments to the tool + session_id: Optional session ID for tracing + metadata: Optional metadata + + Yields: + Langfuse span object or None if not enabled + """ + if not self.enabled or not self._client: + yield None + return + + try: + combined_metadata = metadata or {} + if session_id: + combined_metadata["session_id"] = session_id + + with self._client.start_as_current_span( + name=f"tool:{name}", + input=input, + metadata=combined_metadata, + ) as span: + yield span + except Exception as e: + logger.debug(f"Langfuse tool call span error: {e}") + yield None + + def end_tool_call( + self, + span: Any, + output: str | None = None, + success: bool = True, + metadata: dict[str, Any] | None = None, + ) -> None: + """ + End a tool call span with output and status. + + Args: + span: The span object from tool_call() + output: Output of the tool call + success: Whether the tool call succeeded + metadata: Optional additional metadata + """ + if not self.enabled or not span: + return + + try: + update_kwargs: dict[str, Any] = {} + if output is not None: + update_kwargs["output"] = output + + combined_metadata = metadata or {} + combined_metadata["success"] = success + update_kwargs["metadata"] = combined_metadata + + if hasattr(span, "update"): + span.update(**update_kwargs) + + except Exception as e: + logger.debug(f"Langfuse end tool call error: {e}") \ No newline at end of file diff --git a/openviking_cli/utils/config/__init__.py b/openviking_cli/utils/config/__init__.py index a42796dca..56654aae2 100644 --- a/openviking_cli/utils/config/__init__.py +++ b/openviking_cli/utils/config/__init__.py @@ -41,6 +41,7 @@ ) from .rerank_config import RerankConfig from .storage_config import StorageConfig +from .telemetry_config import TelemetryConfig, TracerConfig from .vectordb_config import VectorDBBackendConfig from .vlm_config import VLMConfig @@ -80,4 +81,6 @@ "resolve_config_path", "set_openviking_config", "is_valid_openviking_config", + "TelemetryConfig", + "TracerConfig", ] diff --git a/openviking_cli/utils/config/open_viking_config.py b/openviking_cli/utils/config/open_viking_config.py index b769cb259..0e818a85b 100644 --- a/openviking_cli/utils/config/open_viking_config.py +++ b/openviking_cli/utils/config/open_viking_config.py @@ -20,6 +20,7 @@ ) from .embedding_config import EmbeddingConfig from .encryption_config import EncryptionConfig +from .telemetry_config import TelemetryConfig from .log_config import LogConfig from .memory_config import MemoryConfig from .parser_config import ( @@ -150,6 +151,10 @@ class OpenVikingConfig(BaseModel): default_factory=lambda: MemoryConfig(), description="Memory configuration" ) + telemetry: "TelemetryConfig" = Field( + default_factory=lambda: TelemetryConfig(), description="Telemetry configuration" + ) + model_config = {"arbitrary_types_allowed": True, "extra": "forbid"} @classmethod diff --git a/openviking_cli/utils/config/telemetry_config.py b/openviking_cli/utils/config/telemetry_config.py new file mode 100644 index 000000000..c199b0561 --- /dev/null +++ b/openviking_cli/utils/config/telemetry_config.py @@ -0,0 +1,26 @@ +# Copyright (c) 2026 Beijing Volcano Engine Technology Co., Ltd. +# SPDX-License-Identifier: AGPL-3.0 +from pydantic import BaseModel, Field + + +class TracerConfig(BaseModel): + """OpenTelemetry tracer configuration.""" + + enabled: bool = Field(default=False, description="Enable OpenTelemetry tracing") + endpoint: str = Field(default="", description="OTLP gRPC endpoint") + service_name: str = Field(default="openviking", description="Service name for tracing") + topic: str = Field(default="", description="Trace topic") + ak: str = Field(default="", description="Access key") + sk: str = Field(default="", description="Secret key") + + model_config = {"extra": "forbid"} + + +class TelemetryConfig(BaseModel): + """Telemetry configuration including tracer.""" + + tracer: TracerConfig = Field( + default_factory=lambda: TracerConfig(), description="OpenTelemetry tracer configuration" + ) + + model_config = {"extra": "forbid"} \ No newline at end of file diff --git a/tests/integration/test_compressor_v2_xiaomei.py b/tests/integration/test_compressor_v2_xiaomei.py index c37eaddd0..30e70973f 100644 --- a/tests/integration/test_compressor_v2_xiaomei.py +++ b/tests/integration/test_compressor_v2_xiaomei.py @@ -132,6 +132,8 @@ def run_ingest(client: ov.SyncHTTPClient, session_id: str, wait_seconds: float): console.print(" [yellow]提交 Session(触发记忆抽取)...[/yellow]") commit_result = client.commit_session(session_id) task_id = commit_result.get("task_id") + trace_id = commit_result.get("trace_id") + console.print(f" [bold cyan]trace_id: {trace_id}[/bold cyan]") console.print(f" Commit 结果: {commit_result}") # 轮询后台任务直到完成 From a4e73a7a79d822546279923f5223e3dfa26390e2 Mon Sep 17 00:00:00 2001 From: chenjunwen Date: Wed, 1 Apr 2026 22:21:08 +0800 Subject: [PATCH 06/20] chore: remove unused openviking/telemetry/langfuse.py The openviking/telemetry/langfuse.py was not being imported anywhere in the openviking package. Project uses bot/vikingbot/integrations/langfuse.py instead. Co-Authored-By: Claude Opus 4.6 --- openviking/telemetry/langfuse.py | 376 ------------------------------- 1 file changed, 376 deletions(-) delete mode 100644 openviking/telemetry/langfuse.py diff --git a/openviking/telemetry/langfuse.py b/openviking/telemetry/langfuse.py deleted file mode 100644 index 8cd641b41..000000000 --- a/openviking/telemetry/langfuse.py +++ /dev/null @@ -1,376 +0,0 @@ -"""Langfuse integration for LLM observability.""" - -from contextlib import contextmanager -from typing import Any, Generator - -from loguru import logger - -# Try to import langfuse - will be None if not installed -Langfuse = None -propagate_attributes = None - -try: - from langfuse import Langfuse - from langfuse import propagate_attributes as _propagate_attributes - - propagate_attributes = _propagate_attributes -except ImportError: - pass - - -class LangfuseClient: - """Wrapper for Langfuse client with optional support.""" - - _instance: "LangfuseClient | None" = None - - def __init__( - self, - enabled: bool = False, - secret_key: str = "", - public_key: str = "", - base_url: str = "https://cloud.langfuse.com", - ): - self._client = None - self.enabled = enabled - - if not self.enabled: - return - - if Langfuse is None: - logger.warning( - "Langfuse not installed. Install with: uv pip install langfuse" - ) - self.enabled = False - return - - if not secret_key: - logger.warning( - "Langfuse enabled but no secret_key provided. Configure in ov.conf under langfuse" - ) - self.enabled = False - return - - try: - self._client = Langfuse( - secret_key=secret_key, - public_key=public_key, - host=base_url, - ) - self._client.auth_check() - except Exception as e: - logger.warning(f"Langfuse initialized failed: {type(e).__name__}: {e}") - self.enabled = False - self._client = None - - @classmethod - def get_instance(cls) -> "LangfuseClient": - """Get the singleton instance.""" - if cls._instance is None: - logger.warning("[LANGFUSE] disabled") - cls._instance = LangfuseClient(enabled=False) - return cls._instance - - @classmethod - def set_instance(cls, instance: "LangfuseClient") -> None: - """Set the singleton instance.""" - cls._instance = instance - - def flush(self) -> None: - """Flush pending events to Langfuse.""" - if self.enabled and self._client: - self._client.flush() - - @contextmanager - def propagate_attributes( - self, - session_id: str | None = None, - user_id: str | None = None, - ) -> Generator[None, None, None]: - """ - Propagate attributes (session_id, user_id) to all nested observations. - - Args: - session_id: Optional session ID to associate with all nested observations - user_id: Optional user ID to associate with all nested observations - """ - if not self.enabled: - logger.warning("[LANGFUSE] propagate_attributes skipped: Langfuse client not enabled") - yield - return - if not self._client: - logger.warning( - "[LANGFUSE] propagate_attributes skipped: Langfuse client not initialized" - ) - yield - return - - propagate_kwargs = {} - if session_id: - propagate_kwargs["session_id"] = session_id - if user_id: - propagate_kwargs["user_id"] = user_id - - if not propagate_kwargs: - yield - return - - # Use module-level propagate_attributes from langfuse SDK v3 - # Store in a local variable to avoid shadowing issues with the method name - global propagate_attributes - _propagate = propagate_attributes - - if _propagate is None: - logger.warning( - "[LANGFUSE] propagate_attributes not available (SDK version may not support it)" - ) - yield - return - - # Only catch exceptions when ENTERING the context manager - # Don't wrap the yield - let exceptions from the inner block propagate normally - logger.info(f"[LANGFUSE] Propagating attributes: {list(propagate_kwargs.keys())}") - try: - cm = _propagate(**propagate_kwargs) - cm.__enter__() - except Exception as e: - logger.debug(f"[LANGFUSE] Failed to enter propagate_attributes: {e}") - yield - return - - try: - yield - finally: - # Always exit the context manager - try: - cm.__exit__(None, None, None) - except Exception as e: - logger.debug(f"[LANGFUSE] Failed to exit propagate_attributes: {e}") - - @contextmanager - def trace( - self, - name: str, - session_id: str | None = None, - user_id: str | None = None, - metadata: dict[str, Any] | None = None, - ) -> Generator[Any, None, None]: - """ - Create a trace context manager. - In v3 SDK, trace is implicitly created by first span/generation. - """ - if not self.enabled or not self._client: - yield None - return - - try: - # In v3, we use start_as_current_span to create the root span - with self._client.start_as_current_span( - name=name, - session_id=session_id, - user_id=user_id, - metadata=metadata or {}, - ) as span: - yield span - except Exception as e: - logger.debug(f"Langfuse trace error: {e}") - yield None - - @contextmanager - def span( - self, - name: str, - trace_id: str | None = None, - parent_observation_id: str | None = None, - metadata: dict[str, Any] | None = None, - ) -> Generator[Any, None, None]: - """Create a span context manager.""" - if not self.enabled or not self._client: - yield None - return - - try: - with self._client.start_as_current_span( - name=name, - metadata=metadata or {}, - ) as span: - yield span - except Exception as e: - logger.debug(f"Langfuse span error: {e}") - yield None - - @contextmanager - def generation( - self, - name: str, - model: str, - trace_id: str | None = None, - parent_observation_id: str | None = None, - prompt: list[dict[str, Any]] | None = None, - metadata: dict[str, Any] | None = None, - ) -> Generator[Any, None, None]: - """ - Create a generation context manager for LLM calls. - - Args: - name: Name of the generation - model: Model name - trace_id: Optional trace ID (not used in v3) - parent_observation_id: Optional parent observation ID (not used in v3) - prompt: Optional prompt messages - metadata: Optional metadata - """ - if not self.enabled or not self._client: - yield None - return - - observation = None - try: - # Use start_observation for the current SDK version - if hasattr(self._client, "start_as_current_observation"): - with self._client.start_as_current_observation( - name=name, - as_type="generation", - model=model, - input=prompt, - metadata=metadata or {}, - ) as obs: - yield obs - elif hasattr(self._client, "start_observation"): - observation = self._client.start_observation( - name=name, - as_type="generation", - model=model, - input=prompt, - metadata=metadata or {}, - ) - yield observation - else: - logger.debug("[LANGFUSE] No supported observation method found on client") - yield None - except Exception as e: - logger.debug(f"Langfuse generation error: {e}") - yield None - finally: - # If we used start_observation, we need to end it manually - if observation and hasattr(observation, "end"): - try: - observation.end() - except Exception as e: - logger.debug(f"Langfuse observation.end() error: {e}") - - def update_generation( - self, - generation: Any, - output: str | None = None, - usage: dict[str, int] | None = None, - usage_details: dict[str, int] | None = None, - metadata: dict[str, Any] | None = None, - ) -> None: - """Update a generation with output and usage.""" - if not self.enabled or not generation: - return - - try: - update_kwargs: dict[str, Any] = {} - if output is not None: - update_kwargs["output"] = output - if usage_details: - update_kwargs["usage_details"] = usage_details - elif usage: - # Support both usage and usage_details formats - usage_details = { - "input": usage.get("prompt_tokens", 0), - "output": usage.get("completion_tokens", 0), - } - # Pass through total_tokens if available - if "total_tokens" in usage: - usage_details["total"] = usage["total_tokens"] - update_kwargs["usage_details"] = usage_details - if metadata: - if hasattr(generation, "metadata") and generation.metadata: - update_kwargs["metadata"] = {**generation.metadata, **metadata} - else: - update_kwargs["metadata"] = metadata - - # In v3, update via the generation object's update method - if hasattr(generation, "update"): - generation.update(**update_kwargs) - # Or use client's update_current_generation - elif self._client and hasattr(self._client, "update_current_generation"): - self._client.update_current_generation(**update_kwargs) - - except Exception as e: - logger.debug(f"Langfuse update generation error: {e}") - - @contextmanager - def tool_call( - self, - name: str, - input: dict[str, Any] | None = None, - session_id: str | None = None, - metadata: dict[str, Any] | None = None, - ) -> Generator[Any, None, None]: - """ - Create a span for tool/function call execution. - - Args: - name: Name of the tool/function - input: Input arguments to the tool - session_id: Optional session ID for tracing - metadata: Optional metadata - - Yields: - Langfuse span object or None if not enabled - """ - if not self.enabled or not self._client: - yield None - return - - try: - combined_metadata = metadata or {} - if session_id: - combined_metadata["session_id"] = session_id - - with self._client.start_as_current_span( - name=f"tool:{name}", - input=input, - metadata=combined_metadata, - ) as span: - yield span - except Exception as e: - logger.debug(f"Langfuse tool call span error: {e}") - yield None - - def end_tool_call( - self, - span: Any, - output: str | None = None, - success: bool = True, - metadata: dict[str, Any] | None = None, - ) -> None: - """ - End a tool call span with output and status. - - Args: - span: The span object from tool_call() - output: Output of the tool call - success: Whether the tool call succeeded - metadata: Optional additional metadata - """ - if not self.enabled or not span: - return - - try: - update_kwargs: dict[str, Any] = {} - if output is not None: - update_kwargs["output"] = output - - combined_metadata = metadata or {} - combined_metadata["success"] = success - update_kwargs["metadata"] = combined_metadata - - if hasattr(span, "update"): - span.update(**update_kwargs) - - except Exception as e: - logger.debug(f"Langfuse end tool call error: {e}") \ No newline at end of file From 3d9071b844e1e8945fdd487ac3f7fbda8afaa949 Mon Sep 17 00:00:00 2001 From: chenjunwen Date: Thu, 2 Apr 2026 12:15:55 +0800 Subject: [PATCH 07/20] =?UTF-8?q?feat:=20=E5=9C=A8import=5Fto=5Fov.py?= =?UTF-8?q?=E4=B8=AD=E6=89=93=E5=8D=B0trace=5Fid=E7=94=A8=E4=BA=8E?= =?UTF-8?q?=E8=BF=BD=E8=B8=AA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.6 --- benchmark/locomo/vikingbot/import_to_ov.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/benchmark/locomo/vikingbot/import_to_ov.py b/benchmark/locomo/vikingbot/import_to_ov.py index 451d608f0..89ca272ba 100644 --- a/benchmark/locomo/vikingbot/import_to_ov.py +++ b/benchmark/locomo/vikingbot/import_to_ov.py @@ -327,7 +327,9 @@ async def viking_ingest( else: token_usage = {"embedding": 0, "vlm": 0, "total": 0} - return {"token_usage": token_usage, "task_id": task_id} + # Get trace_id from commit result + trace_id = result.get("trace_id", "") + return {"token_usage": token_usage, "task_id": task_id, "trace_id": trace_id} finally: await client.close() @@ -367,9 +369,10 @@ async def process_single_session( result = await viking_ingest(messages, args.openviking_url, semaphore, meta.get("date_time")) token_usage = result["token_usage"] task_id = result.get("task_id") + trace_id = result.get("trace_id", "") embedding_tokens = token_usage.get("embedding", 0) vlm_tokens = token_usage.get("vlm", 0) - print(f" -> [COMPLETED] [{sample_id}/{session_key}] embed={embedding_tokens}, vlm={vlm_tokens}", file=sys.stderr) + print(f" -> [COMPLETED] [{sample_id}/{session_key}] embed={embedding_tokens}, vlm={vlm_tokens}, task_id={task_id}, trace_id={trace_id}", file=sys.stderr) # Write success record result = { @@ -381,7 +384,8 @@ async def process_single_session( "token_usage": token_usage, "embedding_tokens": embedding_tokens, "vlm_tokens": vlm_tokens, - "task_id": task_id + "task_id": task_id, + "trace_id": trace_id } # 写入成功CSV From 5359acdc24dd9358a6572a20458fa5cd4115e1ee Mon Sep 17 00:00:00 2001 From: chenjunwen Date: Thu, 2 Apr 2026 12:43:09 +0800 Subject: [PATCH 08/20] style: format Python files with ruff Co-Authored-By: Claude Opus 4.6 --- benchmark/locomo/vikingbot/import_to_ov.py | 173 +++++++++++------- openviking/client/session.py | 8 +- openviking/models/vlm/backends/litellm_vlm.py | 2 +- openviking/models/vlm/backends/openai_vlm.py | 6 +- .../models/vlm/backends/volcengine_vlm.py | 7 +- openviking/session/compressor_v2.py | 1 + openviking/session/memory/extract_loop.py | 12 +- openviking/session/memory/memory_updater.py | 23 ++- openviking/session/memory/tools.py | 1 - openviking/session/memory/utils/messages.py | 2 +- openviking/session/memory/utils/uri.py | 20 +- openviking/sync_client.py | 4 +- openviking/telemetry/tracer.py | 13 +- .../utils/config/open_viking_config.py | 1 - .../utils/config/telemetry_config.py | 2 +- .../integration/test_compressor_v2_xiaomei.py | 46 ++--- 16 files changed, 200 insertions(+), 121 deletions(-) diff --git a/benchmark/locomo/vikingbot/import_to_ov.py b/benchmark/locomo/vikingbot/import_to_ov.py index 89ca272ba..94a69d8ec 100644 --- a/benchmark/locomo/vikingbot/import_to_ov.py +++ b/benchmark/locomo/vikingbot/import_to_ov.py @@ -68,7 +68,7 @@ def load_locomo_data( if sample_index is not None: if sample_index < 0 or sample_index >= len(data): - raise ValueError(f"Sample index {sample_index} out of range (0-{len(data)-1})") + raise ValueError(f"Sample index {sample_index} out of range (0-{len(data) - 1})") return [data[sample_index]] return data @@ -106,22 +106,21 @@ def build_session_messages( for idx, msg in enumerate(conv[sk]): speaker = msg.get("speaker", "unknown") text = msg.get("text", "") - messages.append({ - "role": "user", - "text": f"[{speaker}]: {text}", - "speaker": speaker, - "index": idx - }) - - sessions.append({ - "messages": messages, - "meta": { - "sample_id": item["sample_id"], - "session_key": sk, - "date_time": date_time, - "speakers": speakers, - }, - }) + messages.append( + {"role": "user", "text": f"[{speaker}]: {text}", "speaker": speaker, "index": idx} + ) + + sessions.append( + { + "messages": messages, + "meta": { + "sample_id": item["sample_id"], + "session_key": sk, + "date_time": date_time, + "speakers": speakers, + }, + } + ) return sessions @@ -130,6 +129,7 @@ def build_session_messages( # Ingest record helpers (avoid duplicate ingestion) # --------------------------------------------------------------------------- + def load_success_csv(csv_path: str = "./result/import_success.csv") -> set: """加载成功导入的CSV记录,返回已成功的键集合""" success_keys = set() @@ -142,33 +142,48 @@ def load_success_csv(csv_path: str = "./result/import_success.csv") -> set: return success_keys -def write_success_record(record: Dict[str, Any], csv_path: str = "./result/import_success.csv") -> None: +def write_success_record( + record: Dict[str, Any], csv_path: str = "./result/import_success.csv" +) -> None: """写入成功记录到CSV文件""" file_exists = Path(csv_path).exists() - fieldnames = ["timestamp", "sample_id", "session", "date_time", "speakers", - "embedding_tokens", "vlm_tokens", "llm_input_tokens", - "llm_output_tokens", "total_tokens"] + fieldnames = [ + "timestamp", + "sample_id", + "session", + "date_time", + "speakers", + "embedding_tokens", + "vlm_tokens", + "llm_input_tokens", + "llm_output_tokens", + "total_tokens", + ] with open(csv_path, "a", encoding="utf-8", newline="") as f: writer = csv.DictWriter(f, fieldnames=fieldnames) if not file_exists: writer.writeheader() - writer.writerow({ - "timestamp": record["timestamp"], - "sample_id": record["sample_id"], - "session": record["session"], - "date_time": record.get("meta", {}).get("date_time", ""), - "speakers": record.get("meta", {}).get("speakers", ""), - "embedding_tokens": record["token_usage"].get("embedding", 0), - "vlm_tokens": record["token_usage"].get("vlm", 0), - "llm_input_tokens": record["token_usage"].get("llm_input", 0), - "llm_output_tokens": record["token_usage"].get("llm_output", 0), - "total_tokens": record["token_usage"].get("total", 0) - }) - - -def write_error_record(record: Dict[str, Any], error_path: str = "./result/import_errors.log") -> None: + writer.writerow( + { + "timestamp": record["timestamp"], + "sample_id": record["sample_id"], + "session": record["session"], + "date_time": record.get("meta", {}).get("date_time", ""), + "speakers": record.get("meta", {}).get("speakers", ""), + "embedding_tokens": record["token_usage"].get("embedding", 0), + "vlm_tokens": record["token_usage"].get("vlm", 0), + "llm_input_tokens": record["token_usage"].get("llm_input", 0), + "llm_output_tokens": record["token_usage"].get("llm_output", 0), + "total_tokens": record["token_usage"].get("total", 0), + } + ) + + +def write_error_record( + record: Dict[str, Any], error_path: str = "./result/import_errors.log" +) -> None: """写入错误记录到日志文件""" with open(error_path, "a", encoding="utf-8") as f: timestamp = record["timestamp"] @@ -187,7 +202,9 @@ def load_ingest_record(record_path: str = "./result/.ingest_record.json") -> Dic return {} -def save_ingest_record(record: Dict[str, Any], record_path: str = "./result/.ingest_record.json") -> None: +def save_ingest_record( + record: Dict[str, Any], record_path: str = "./result/.ingest_record.json" +) -> None: """Save ingest record to file.""" with open(record_path, "w", encoding="utf-8") as f: json.dump(record, f, indent=2, ensure_ascii=False) @@ -241,7 +258,7 @@ def _parse_token_usage(commit_result: Dict[str, Any]) -> Dict[str, int]: "vlm": llm_total, "llm_input": llm.get("input", 0), "llm_output": llm.get("output", 0), - "total": tu.get("total", {}).get("total_tokens", embed_total + llm_total) + "total": tu.get("total", {}).get("total_tokens", embed_total + llm_total), } # 从 commit 响应的 telemetry 中提取 @@ -252,7 +269,7 @@ def _parse_token_usage(commit_result: Dict[str, Any]) -> Dict[str, int]: "vlm": tokens.get("llm", {}).get("total", 0), "llm_input": tokens.get("llm", {}).get("input", 0), "llm_output": tokens.get("llm", {}).get("output", 0), - "total": tokens.get("total", 0) + "total": tokens.get("total", 0), } @@ -260,7 +277,7 @@ async def viking_ingest( messages: List[Dict[str, Any]], openviking_url: str, semaphore: asyncio.Semaphore, - session_time: Optional[str] = None + session_time: Optional[str] = None, ) -> Dict[str, int]: """Save messages to OpenViking via OpenViking SDK client. Returns token usage dict with embedding and vlm token counts. @@ -302,7 +319,7 @@ async def viking_ingest( session_id=session_id, role=msg["role"], parts=[{"type": "text", "text": msg["text"]}], - created_at=msg_created_at + created_at=msg_created_at, ) # Commit @@ -335,7 +352,9 @@ async def viking_ingest( await client.close() -def sync_viking_ingest(messages: List[Dict[str, Any]], openviking_url: str, session_time: Optional[str] = None) -> Dict[str, int]: +def sync_viking_ingest( + messages: List[Dict[str, Any]], openviking_url: str, session_time: Optional[str] = None +) -> Dict[str, int]: """Synchronous wrapper for viking_ingest to maintain existing API.""" semaphore = asyncio.Semaphore(1) # 同步调用时使用信号量为1 return asyncio.run(viking_ingest(messages, openviking_url, semaphore, session_time)) @@ -345,6 +364,7 @@ def sync_viking_ingest(messages: List[Dict[str, Any]], openviking_url: str, sess # Main import logic # --------------------------------------------------------------------------- + def parse_session_range(s: str) -> Tuple[int, int]: """Parse '1-4' or '3' into (lo, hi) inclusive tuple.""" if "-" in s: @@ -362,17 +382,22 @@ async def process_single_session( run_time: str, ingest_record: Dict[str, Any], args: argparse.Namespace, - semaphore: asyncio.Semaphore + semaphore: asyncio.Semaphore, ) -> Dict[str, Any]: """处理单个会话的导入任务""" try: - result = await viking_ingest(messages, args.openviking_url, semaphore, meta.get("date_time")) + result = await viking_ingest( + messages, args.openviking_url, semaphore, meta.get("date_time") + ) token_usage = result["token_usage"] task_id = result.get("task_id") trace_id = result.get("trace_id", "") embedding_tokens = token_usage.get("embedding", 0) vlm_tokens = token_usage.get("vlm", 0) - print(f" -> [COMPLETED] [{sample_id}/{session_key}] embed={embedding_tokens}, vlm={vlm_tokens}, task_id={task_id}, trace_id={trace_id}", file=sys.stderr) + print( + f" -> [COMPLETED] [{sample_id}/{session_key}] embed={embedding_tokens}, vlm={vlm_tokens}, task_id={task_id}, trace_id={trace_id}", + file=sys.stderr, + ) # Write success record result = { @@ -385,7 +410,7 @@ async def process_single_session( "embedding_tokens": embedding_tokens, "vlm_tokens": vlm_tokens, "task_id": task_id, - "trace_id": trace_id + "trace_id": trace_id, } # 写入成功CSV @@ -407,7 +432,7 @@ async def process_single_session( "sample_id": sample_id, "session": session_key, "status": "error", - "error": str(e) + "error": str(e), } # 写入错误日志 @@ -434,7 +459,10 @@ async def run_import(args: argparse.Namespace) -> None: success_keys = set() if not args.force_ingest: success_keys = load_success_csv(args.success_csv) - print(f"[INFO] Loaded {len(success_keys)} existing success records from {args.success_csv}", file=sys.stderr) + print( + f"[INFO] Loaded {len(success_keys)} existing success records from {args.success_csv}", + file=sys.stderr, + ) # Write run header run_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") @@ -465,12 +493,19 @@ async def process_sample(item): label = f"{session_key} ({meta['date_time']})" # Skip already ingested sessions unless force-ingest is enabled - if not args.force_ingest and is_already_ingested(sample_id, session_key, ingest_record, success_keys): - print(f" [{label}] [SKIP] already imported (use --force-ingest to reprocess)", file=sys.stderr) + if not args.force_ingest and is_already_ingested( + sample_id, session_key, ingest_record, success_keys + ): + print( + f" [{label}] [SKIP] already imported (use --force-ingest to reprocess)", + file=sys.stderr, + ) continue # Preview messages - preview = " | ".join([f"{msg['role']}: {msg['text'][:30]}..." for msg in messages[:3]]) + preview = " | ".join( + [f"{msg['role']}: {msg['text'][:30]}..." for msg in messages[:3]] + ) print(f" [{label}] {preview}", file=sys.stderr) # 串行执行(等待完成后再处理下一个 session) @@ -482,7 +517,7 @@ async def process_sample(item): run_time=run_time, ingest_record=ingest_record, args=args, - semaphore=semaphore + semaphore=semaphore, ) # 不同 sample 之间并行执行 @@ -499,20 +534,21 @@ async def process_sample(item): print(f"\n=== Text Session {idx} ===", file=sys.stderr) # Skip already ingested sessions unless force-ingest is enabled - if not args.force_ingest and is_already_ingested("txt", session_key, ingest_record, success_keys): - print(f" [SKIP] already imported (use --force-ingest to reprocess)", file=sys.stderr) + if not args.force_ingest and is_already_ingested( + "txt", session_key, ingest_record, success_keys + ): + print( + f" [SKIP] already imported (use --force-ingest to reprocess)", file=sys.stderr + ) skipped_count += 1 continue # For plain text, all messages as user role messages = [] for i, text in enumerate(session["messages"]): - messages.append({ - "role": "user", - "text": text.strip(), - "speaker": "user", - "index": i - }) + messages.append( + {"role": "user", "text": text.strip(), "speaker": "user", "index": i} + ) preview = " | ".join([f"{msg['role']}: {msg['text'][:30]}..." for msg in messages[:3]]) print(f" {preview}", file=sys.stderr) @@ -527,13 +563,16 @@ async def process_sample(item): run_time=run_time, ingest_record=ingest_record, args=args, - semaphore=semaphore + semaphore=semaphore, ) ) tasks.append(task) # 等待所有 sample 处理完成 - print(f"\n[INFO] Starting import with {args.parallel} concurrent workers, {len(tasks)} tasks to process", file=sys.stderr) + print( + f"\n[INFO] Starting import with {args.parallel} concurrent workers, {len(tasks)} tasks to process", + file=sys.stderr, + ) await asyncio.gather(*tasks, return_exceptions=True) # 从成功 CSV 统计结果 @@ -556,7 +595,10 @@ async def process_sample(item): print(f"Total Embedding tokens: {total_embedding_tokens}", file=sys.stderr) print(f"Total VLM tokens: {total_vlm_tokens}", file=sys.stderr) if success_count > 0: - print(f"Average Embedding per session: {total_embedding_tokens // success_count}", file=sys.stderr) + print( + f"Average Embedding per session: {total_embedding_tokens // success_count}", + file=sys.stderr, + ) print(f"Average VLM per session: {total_vlm_tokens // success_count}", file=sys.stderr) print(f"\nResults saved to:", file=sys.stderr) print(f" - Success records: {args.success_csv}", file=sys.stderr) @@ -567,12 +609,13 @@ async def process_sample(item): # CLI # --------------------------------------------------------------------------- + def main(): parser = argparse.ArgumentParser(description="Import conversations into OpenViking") parser.add_argument( "--input", default="./test_data/locomo10.json", - help="Path to input file (.txt or LoCoMo .json)" + help="Path to input file (.txt or LoCoMo .json)", ) parser.add_argument( "--success-csv", @@ -632,4 +675,4 @@ def main(): if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/openviking/client/session.py b/openviking/client/session.py index a571c068d..27b6b33b6 100644 --- a/openviking/client/session.py +++ b/openviking/client/session.py @@ -57,8 +57,12 @@ async def add_message( """ if parts is not None: parts_dicts = [asdict(p) for p in parts] - return await self._client.add_message(self.session_id, role, parts=parts_dicts, created_at=created_at) - return await self._client.add_message(self.session_id, role, content=content, created_at=created_at) + return await self._client.add_message( + self.session_id, role, parts=parts_dicts, created_at=created_at + ) + return await self._client.add_message( + self.session_id, role, content=content, created_at=created_at + ) async def commit(self, telemetry: TelemetryRequest = False) -> Dict[str, Any]: """Commit the session (archive messages and extract memories). diff --git a/openviking/models/vlm/backends/litellm_vlm.py b/openviking/models/vlm/backends/litellm_vlm.py index ced3af2f2..620085709 100644 --- a/openviking/models/vlm/backends/litellm_vlm.py +++ b/openviking/models/vlm/backends/litellm_vlm.py @@ -346,6 +346,7 @@ async def get_completion_async( kwargs = self._build_text_kwargs(prompt, thinking, tools, tool_choice, messages) # 用 tracer.info 打印请求 tracer.info(f"request: {json.dumps(kwargs, ensure_ascii=False, indent=2)}") + async def _call() -> Union[str, VLMResponse]: t0 = time.perf_counter() response = await acompletion(**kwargs) @@ -355,7 +356,6 @@ async def _call() -> Union[str, VLMResponse]: return self._build_vlm_response(response, has_tools=True) return self._clean_response(self._extract_content_from_response(response)) - return await retry_async( _call, max_retries=self.max_retries, diff --git a/openviking/models/vlm/backends/openai_vlm.py b/openviking/models/vlm/backends/openai_vlm.py index ffaa97a4b..2f1078f1e 100644 --- a/openviking/models/vlm/backends/openai_vlm.py +++ b/openviking/models/vlm/backends/openai_vlm.py @@ -129,7 +129,7 @@ def _update_token_usage_from_response( duration_seconds: float = 0.0, ): if hasattr(response, "usage") and response.usage: - tracer.info(f'response.usage={response.usage}') + tracer.info(f"response.usage={response.usage}") prompt_tokens = response.usage.prompt_tokens completion_tokens = response.usage.completion_tokens self.update_token_usage( @@ -159,7 +159,7 @@ def _build_vlm_response(self, response, has_tools: bool) -> Union[str, VLMRespon """Build response from OpenAI response. Returns str or VLMResponse based on has_tools.""" choice = response.choices[0] message = choice.message - tracer.info(f'result={message.content}') + tracer.info(f"result={message.content}") if has_tools: usage = {} if hasattr(response, "usage") and response.usage: @@ -369,7 +369,6 @@ async def _call() -> Union[str, VLMResponse]: return self._build_vlm_response(response, has_tools=True) return await self._extract_completion_content_async(response, elapsed) - # 用 tracer.info 打印请求 tracer.info(f"messages={json.dumps(kwargs, ensure_ascii=False, indent=2)}") @@ -380,7 +379,6 @@ async def _call() -> Union[str, VLMResponse]: operation_name="OpenAI VLM async completion", ) - def _detect_image_format(self, data: bytes) -> str: """Detect image format from magic bytes. diff --git a/openviking/models/vlm/backends/volcengine_vlm.py b/openviking/models/vlm/backends/volcengine_vlm.py index aba6a5eb7..978bcb339 100644 --- a/openviking/models/vlm/backends/volcengine_vlm.py +++ b/openviking/models/vlm/backends/volcengine_vlm.py @@ -49,7 +49,7 @@ def _build_vlm_response(self, response, has_tools: bool) -> Union[str, VLMRespon """Build response from Chat Completions response. Returns str or VLMResponse based on has_tools.""" choice = response.choices[0] message = choice.message - tracer.info(f'message.content={message.content}') + tracer.info(f"message.content={message.content}") if has_tools: usage = {} if hasattr(response, "usage") and response.usage: @@ -175,9 +175,6 @@ async def get_completion_async( if attempt < max_retries: await asyncio.sleep(2**attempt) - - - if last_error: raise last_error else: @@ -377,4 +374,4 @@ async def get_vision_completion_async( result = self._build_vlm_response(response, has_tools=bool(tools)) if tools: return result - return self._clean_response(str(result)) \ No newline at end of file + return self._clean_response(str(result)) diff --git a/openviking/session/compressor_v2.py b/openviking/session/compressor_v2.py index 6b49d52d7..c81a66028 100644 --- a/openviking/session/compressor_v2.py +++ b/openviking/session/compressor_v2.py @@ -143,6 +143,7 @@ async def extract_long_term_memories( agent_space = ctx.user.agent_space_name() if ctx and ctx.user else "default" # 使用 Jinja2 渲染 directory import jinja2 + env = jinja2.Environment(autoescape=False) template = env.from_string(schema.directory) dir_path = template.render(user_space=user_space, agent_space=agent_space) diff --git a/openviking/session/memory/extract_loop.py b/openviking/session/memory/extract_loop.py index e4635081f..eac125b5b 100644 --- a/openviking/session/memory/extract_loop.py +++ b/openviking/session/memory/extract_loop.py @@ -286,7 +286,7 @@ def _validate_operations(self, operations: MemoryOperations) -> None: schemas = self.context_provider.get_memory_schemas(self.ctx) # Use pre-initialized extract_context - if not hasattr(self, '_extract_context') or self._extract_context is None: + if not hasattr(self, "_extract_context") or self._extract_context is None: raise ValueError("ExtractContext not initialized") is_valid, errors = validate_operations_uris( @@ -405,7 +405,7 @@ async def _execute_tool( tool_ctx = ToolContext(request_ctx=self.ctx, transaction_handle=self._transaction_handle) try: - tracer.info(f'tool_call.arguments={tool_call.arguments}') + tracer.info(f"tool_call.arguments={tool_call.arguments}") result = await tool.execute(self.viking_fs, tool_ctx, **tool_call.arguments) return result @@ -444,8 +444,12 @@ async def _check_unread_existing_files( item_dict = dict(item) if hasattr(item, "model_dump") else dict(item) try: uri = resolve_flat_model_uri( - item_dict, registry, "default", "default", - memory_type=field_name, extract_context=self._extract_context + item_dict, + registry, + "default", + "default", + memory_type=field_name, + extract_context=self._extract_context, ) except Exception as e: logger.warning(f"Failed to resolve URI for {item}: {e}") diff --git a/openviking/session/memory/memory_updater.py b/openviking/session/memory/memory_updater.py index 1031c937b..43d5ac5e0 100644 --- a/openviking/session/memory/memory_updater.py +++ b/openviking/session/memory/memory_updater.py @@ -131,7 +131,15 @@ def _first_message_time_with_weekday(self) -> str | None: continue if hasattr(elem, "created_at") and elem.created_at: # 获取周几的英文全称 - weekday_en = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"] + weekday_en = [ + "Monday", + "Tuesday", + "Wednesday", + "Thursday", + "Friday", + "Saturday", + "Sunday", + ] weekday = weekday_en[elem.created_at.weekday()] return f"{elem.created_at.strftime('%Y-%m-%d')} ({weekday})" return None @@ -262,7 +270,7 @@ async def apply_operations( except Exception as e: tracer.error( f"Failed to apply operation: {e}, op={resolved_op.model}, op type={type(resolved_op.model)}", - e + e, ) if hasattr(resolved_op.model, "model_dump"): tracer.info(f"Op dump: {resolved_op.model.model_dump()}") @@ -274,7 +282,7 @@ async def apply_operations( await self._apply_edit_overview(op, uri, ctx) result.add_edited(uri) except Exception as e: - tracer.error(f"Failed to edit overview {uri}",e) + tracer.error(f"Failed to edit overview {uri}", e) result.add_error(uri, e) # Apply delete operations @@ -283,7 +291,7 @@ async def apply_operations( await self._apply_delete(uri, ctx) result.add_deleted(uri) except Exception as e: - tracer.error(f"Failed to delete memory {uri}",e) + tracer.error(f"Failed to delete memory {uri}", e) result.add_error(uri, e) # Vectorize written and edited memories @@ -350,7 +358,6 @@ async def _apply_write( # VikingFS automatically handles L0/L1/L2 and vector index updates await viking_fs.write_file(uri, full_content, ctx=ctx) - def _render_content_template( self, template: str, fields: Dict[str, Any], extract_context: Any = None ) -> str: @@ -394,7 +401,9 @@ def _is_patch_format(self, content: Any) -> bool: return isinstance(content, StrPatch) - async def _apply_edit(self, flat_model: Any, uri: str, ctx: RequestContext, memory_type: str = None) -> bool: + async def _apply_edit( + self, flat_model: Any, uri: str, ctx: RequestContext, memory_type: str = None + ) -> bool: """Apply edit operation from a flat model. Returns: @@ -631,7 +640,7 @@ def _print_diff(self, uri: str, old_content: str, new_content: str) -> None: lines.append(f"{'=' * 60}\n") # Print directly - tracer.info(f'diff={"\n".join(lines)}') + tracer.info("diff=" + "\n".join(lines)) except ImportError: # Fallback: just show file name tracer.error(f"diff_match_patch not available, skipping diff for {uri}") diff --git a/openviking/session/memory/tools.py b/openviking/session/memory/tools.py index 3fdd0e847..d28433c85 100644 --- a/openviking/session/memory/tools.py +++ b/openviking/session/memory/tools.py @@ -182,7 +182,6 @@ async def execute( ) -> Any: uri = kwargs.get("uri", "") try: - content = await viking_fs.read_file( uri, ctx=ctx.request_ctx, diff --git a/openviking/session/memory/utils/messages.py b/openviking/session/memory/utils/messages.py index 0fbc56336..471cfa851 100644 --- a/openviking/session/memory/utils/messages.py +++ b/openviking/session/memory/utils/messages.py @@ -74,7 +74,7 @@ def pretty_print_messages(messages: List[Dict[str, Any]]) -> None: output.append(json.dumps(tool_calls, indent=2, ensure_ascii=False)) output.append("\n=== End Messages ===") - tracer.info(f'messages={"\n".join(output)}') + tracer.info("messages=" + "\n".join(output)) def parse_memory_file_with_fields(content: str) -> Dict[str, Any]: diff --git a/openviking/session/memory/utils/uri.py b/openviking/session/memory/utils/uri.py index 90c2c2d2f..73166d2ca 100644 --- a/openviking/session/memory/utils/uri.py +++ b/openviking/session/memory/utils/uri.py @@ -297,7 +297,9 @@ def is_uri_allowed_for_schema( True if the URI is allowed """ allowed_dirs = collect_allowed_directories(schemas, user_space, agent_space, extract_context) - allowed_patterns = collect_allowed_path_patterns(schemas, user_space, agent_space, extract_context) + allowed_patterns = collect_allowed_path_patterns( + schemas, user_space, agent_space, extract_context + ) return is_uri_allowed(uri, allowed_dirs, allowed_patterns) @@ -478,8 +480,12 @@ def resolve_all_operations( item_dict = dict(item) if hasattr(item, "model_dump") else dict(item) try: uri = resolve_flat_model_uri( - item_dict, registry, user_space, agent_space, - memory_type=field_name, extract_context=extract_context + item_dict, + registry, + user_space, + agent_space, + memory_type=field_name, + extract_context=extract_context, ) # All operations go to unified list - will read existing file first resolved.operations.append( @@ -561,12 +567,16 @@ def validate_operations_uris( Tuple of (is_valid, list of error messages) """ allowed_dirs = collect_allowed_directories(schemas, user_space, agent_space, extract_context) - allowed_patterns = collect_allowed_path_patterns(schemas, user_space, agent_space, extract_context) + allowed_patterns = collect_allowed_path_patterns( + schemas, user_space, agent_space, extract_context + ) errors = [] # First resolve all URIs - resolved = resolve_all_operations(operations, registry, user_space, agent_space, extract_context) + resolved = resolve_all_operations( + operations, registry, user_space, agent_space, extract_context + ) if resolved.has_errors(): errors.extend(resolved.errors) diff --git a/openviking/sync_client.py b/openviking/sync_client.py index a73fb6907..385e67b33 100644 --- a/openviking/sync_client.py +++ b/openviking/sync_client.py @@ -84,7 +84,9 @@ def add_message( If both content and parts are provided, parts takes precedence. """ - return run_async(self._async_client.add_message(session_id, role, content, parts, created_at)) + return run_async( + self._async_client.add_message(session_id, role, content, parts, created_at) + ) def commit_session( self, session_id: str, telemetry: TelemetryRequest = False diff --git a/openviking/telemetry/tracer.py b/openviking/telemetry/tracer.py index 79c751617..ea32cd3d5 100644 --- a/openviking/telemetry/tracer.py +++ b/openviking/telemetry/tracer.py @@ -58,7 +58,11 @@ def _setup_logging(): try: # Configure logger to patch records with trace_id - logger.configure(patcher=lambda record: record.__setitem__("extra", {**record["extra"], "trace_id": get_trace_id()})) + logger.configure( + patcher=lambda record: record.__setitem__( + "extra", {**record["extra"], "trace_id": get_trace_id()} + ) + ) _trace_id_filter_added = True except Exception: pass @@ -106,6 +110,7 @@ def _init_asyncio_instrumentation() -> None: """Initialize asyncio instrumentation to create child spans for create_task.""" try: from opentelemetry.instrumentation.asyncio import AsyncioInstrumentor + AsyncioInstrumentor().instrument() logger.info("[TRACER] initialized AsyncioInstrumentor") except ImportError: @@ -341,6 +346,7 @@ def __call__(self, func: Callable) -> Callable: context = Context() if self.is_new_trace else None if inspect.iscoroutinefunction(func): + @functools.wraps(func) async def async_wrapper(*args, **kwargs): if _otel_tracer is None: @@ -366,8 +372,10 @@ async def async_wrapper(*args, **kwargs): span.record_exception(exception=e) span.set_status(Status(StatusCode.ERROR)) raise + return async_wrapper else: + @functools.wraps(func) def sync_wrapper(*args, **kwargs): if _otel_tracer is None: @@ -393,6 +401,7 @@ def sync_wrapper(*args, **kwargs): span.record_exception(exception=e) span.set_status(Status(StatusCode.ERROR)) raise + return sync_wrapper @classmethod @@ -539,4 +548,4 @@ def trace(name: str): async def my_function(): ... """ - return tracer(name=name) \ No newline at end of file + return tracer(name=name) diff --git a/openviking_cli/utils/config/open_viking_config.py b/openviking_cli/utils/config/open_viking_config.py index 6f92e49cd..9273a1c72 100644 --- a/openviking_cli/utils/config/open_viking_config.py +++ b/openviking_cli/utils/config/open_viking_config.py @@ -152,7 +152,6 @@ class OpenVikingConfig(BaseModel): default_factory=lambda: MemoryConfig(), description="Memory configuration" ) - telemetry: "TelemetryConfig" = Field( default_factory=lambda: TelemetryConfig(), description="Telemetry configuration" ) diff --git a/openviking_cli/utils/config/telemetry_config.py b/openviking_cli/utils/config/telemetry_config.py index c199b0561..d27da8b19 100644 --- a/openviking_cli/utils/config/telemetry_config.py +++ b/openviking_cli/utils/config/telemetry_config.py @@ -23,4 +23,4 @@ class TelemetryConfig(BaseModel): default_factory=lambda: TracerConfig(), description="OpenTelemetry tracer configuration" ) - model_config = {"extra": "forbid"} \ No newline at end of file + model_config = {"extra": "forbid"} diff --git a/tests/integration/test_compressor_v2_xiaomei.py b/tests/integration/test_compressor_v2_xiaomei.py index 30e70973f..faf7b128e 100644 --- a/tests/integration/test_compressor_v2_xiaomei.py +++ b/tests/integration/test_compressor_v2_xiaomei.py @@ -24,7 +24,6 @@ DEFAULT_SESSION_ID = "xiaomei-demo" - console = Console() # ── 对话数据 (10 轮 user + assistant 模拟) ───────────────────────────────── @@ -107,9 +106,9 @@ def run_ingest(client: ov.SyncHTTPClient, session_id: str, wait_seconds: float): console.rule(f"[bold]Phase 1: 写入对话 — {DISPLAY_NAME} ({len(CONVERSATION)} 轮)[/bold]") # 获取 session;若不存在则由服务端按 session_id 自动创建 - session= client.create_session() - session_id = session.get('session_id') - print(f'session_id={session_id}') + session = client.create_session() + session_id = session.get("session_id") + print(f"session_id={session_id}") console.print(f" Session: [bold cyan]{session_id}[/bold cyan]") console.print() @@ -121,8 +120,18 @@ def run_ingest(client: ov.SyncHTTPClient, session_id: str, wait_seconds: float): total = len(CONVERSATION) for i, turn in enumerate(CONVERSATION, 1): console.print(f" [dim][{i}/{total}][/dim] 添加 user + assistant 消息...") - client.add_message(session_id, role="user", parts=[{"type": "text", "text": turn["user"]}], created_at=session_time_str) - client.add_message(session_id, role="assistant", parts=[{"type": "text", "text": turn["assistant"]}], created_at=session_time_str) + client.add_message( + session_id, + role="user", + parts=[{"type": "text", "text": turn["user"]}], + created_at=session_time_str, + ) + client.add_message( + session_id, + role="assistant", + parts=[{"type": "text", "text": turn["assistant"]}], + created_at=session_time_str, + ) console.print() console.print(f" 共添加 [bold]{total * 2}[/bold] 条消息") @@ -154,12 +163,10 @@ def run_ingest(client: ov.SyncHTTPClient, session_id: str, wait_seconds: float): console.print(f" [yellow]等待向量化完成...[/yellow]") client.wait_processed() - if wait_seconds > 0: console.print(f" [dim]额外等待 {wait_seconds:.0f}s...[/dim]") time.sleep(wait_seconds) - session_info = client.get_session(session_id) console.print(f" Session 详情: {session_info}") @@ -208,7 +215,11 @@ def run_verify(client: ov.SyncHTTPClient): uri = getattr(m, "uri", "") score = getattr(m, "score", 0) console.print(f" [green]Memory:[/green] {uri} (score: {score:.4f})") - console.print(f" [dim]{text[:120]}...[/dim]" if len(text) > 120 else f" [dim]{text}[/dim]") + console.print( + f" [dim]{text[:120]}...[/dim]" + if len(text) > 120 + else f" [dim]{text}[/dim]" + ) count += len(results.memories) if hasattr(results, "resources") and results.resources: @@ -216,9 +227,7 @@ def run_verify(client: ov.SyncHTTPClient): text = getattr(r, "content", "") or getattr(r, "text", "") or str(r) print(f" [DEBUG] resource text: {repr(text)}") recall_texts.append(text) - console.print( - f" [blue]Resource:[/blue] {r.uri} (score: {r.score:.4f})" - ) + console.print(f" [blue]Resource:[/blue] {r.uri} (score: {r.score:.4f})") count += len(results.resources) if hasattr(results, "skills") and results.skills: @@ -256,9 +265,7 @@ def main(): parser.add_argument( "--session-id", default=DEFAULT_SESSION_ID, help=f"Session ID (默认: {DEFAULT_SESSION_ID})" ) - parser.add_argument( - "--wait", type=float, default=5.0, help="提交后额外等待秒数 (默认: 5)" - ) + parser.add_argument("--wait", type=float, default=5.0, help="提交后额外等待秒数 (默认: 5)") args = parser.parse_args() console.print( @@ -271,8 +278,7 @@ def main(): ) client = ov.SyncHTTPClient( - url=args.url, api_key=args.api_key, agent_id=args.agent_id, - timeout=180 + url=args.url, api_key=args.api_key, agent_id=args.agent_id, timeout=180 ) try: @@ -294,9 +300,7 @@ def main(): ) except Exception as e: - console.print( - Panel(f"[bold red]Error:[/bold red] {e}", style="red", width=PANEL_WIDTH) - ) + console.print(Panel(f"[bold red]Error:[/bold red] {e}", style="red", width=PANEL_WIDTH)) import traceback traceback.print_exc() @@ -306,4 +310,4 @@ def main(): if __name__ == "__main__": - main() \ No newline at end of file + main() From 9006225759afcc218b60234f027450c8d1d654c2 Mon Sep 17 00:00:00 2001 From: chenjunwen Date: Thu, 2 Apr 2026 13:18:06 +0800 Subject: [PATCH 09/20] =?UTF-8?q?fix:=20=E4=BB=8E=E6=9C=AC=E5=9C=B0?= =?UTF-8?q?=E6=96=87=E4=BB=B6=E8=AF=BB=E5=8F=96API=20key=E8=80=8C=E9=9D=9E?= =?UTF-8?q?=E7=A1=AC=E7=BC=96=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.6 --- benchmark/locomo/vikingbot/run_full_eval.sh | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/benchmark/locomo/vikingbot/run_full_eval.sh b/benchmark/locomo/vikingbot/run_full_eval.sh index 19e619e5c..daf6a4991 100755 --- a/benchmark/locomo/vikingbot/run_full_eval.sh +++ b/benchmark/locomo/vikingbot/run_full_eval.sh @@ -2,6 +2,11 @@ set -e +# 加载本地环境变量 +if [ -f ~/.openviking_benchmark_env ]; then + source ~/.openviking_benchmark_env +fi + # Step 1: 导入数据 echo "[1/4] 导入数据..." python benchmark/locomo/vikingbot/import_to_ov.py --input ~/.test_data/locomo10.json --force-ingest @@ -18,7 +23,7 @@ sleep 180 # Step 3: 裁判打分 echo "[3/4] 裁判打分..." -python benchmark/locomo/vikingbot/judge.py --token 0a2b68f6-4df3-48f5-81b9-f85fe0af9cef --input ./result/locomo_result_multi_read_all.csv --parallel 10 +python benchmark/locomo/vikingbot/judge.py --input ./result/locomo_result_multi_read_all.csv --parallel 10 echo "等待 3 分钟..." sleep 180 From 2e56fed6f0b2dc3434ee83ecc5290833e6f6a308 Mon Sep 17 00:00:00 2001 From: chenjunwen Date: Thu, 2 Apr 2026 14:50:12 +0800 Subject: [PATCH 10/20] fix: resolve lint errors in tracer.py - Remove unused sys import - Sort import block alphabetically - Remove unused span variable Co-Authored-By: Claude Opus 4.6 --- openviking/telemetry/tracer.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/openviking/telemetry/tracer.py b/openviking/telemetry/tracer.py index ea32cd3d5..401a10d20 100644 --- a/openviking/telemetry/tracer.py +++ b/openviking/telemetry/tracer.py @@ -6,7 +6,6 @@ import inspect import json import logging -import sys from typing import Any, Callable, Optional from loguru import logger @@ -14,13 +13,13 @@ # Try to import opentelemetry - will be None if not installed try: from opentelemetry import trace as otel_trace - from opentelemetry.sdk.trace import TracerProvider, Status, StatusCode - from opentelemetry.sdk.trace.export import BatchSpanProcessor - from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter - from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator from opentelemetry.context import Context - from opentelemetry.propagate import inject, extract + from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter + from opentelemetry.propagate import extract, inject from opentelemetry.sdk.resources import Resource + from opentelemetry.sdk.trace import Status, StatusCode, TracerProvider + from opentelemetry.sdk.trace.export import BatchSpanProcessor + from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator except ImportError: otel_trace = None TracerProvider = None @@ -483,7 +482,7 @@ def info_span(line: str, console: bool = False) -> None: logger.info(line) if _otel_tracer is None: return - with tracer.start_as_current_span(name=line) as span: + with tracer.start_as_current_span(name=line): pass @staticmethod From 2a5f9c36e30924bfb503b5f54f093d910fa2f043 Mon Sep 17 00:00:00 2001 From: chenjunwen Date: Thu, 2 Apr 2026 15:05:59 +0800 Subject: [PATCH 11/20] =?UTF-8?q?feat:=20=E5=90=88=E5=B9=B6=5Fapply=5Fwrit?= =?UTF-8?q?e=E5=88=B0=5Fapply=5Fedit=E5=B9=B6=E6=B7=BB=E5=8A=A0content=5Ft?= =?UTF-8?q?emplate=E6=B8=B2=E6=9F=93=E6=97=A5=E5=BF=97?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 合并两个方法,统一模板渲染逻辑 - 添加extract_context参数支持模板渲染 - 添加[content_template]日志标签用于调试events渲染问题 Co-Authored-By: Claude Opus 4.6 --- openviking/session/memory/memory_updater.py | 154 ++++++++++++-------- 1 file changed, 94 insertions(+), 60 deletions(-) diff --git a/openviking/session/memory/memory_updater.py b/openviking/session/memory/memory_updater.py index 43d5ac5e0..2feeef5d3 100644 --- a/openviking/session/memory/memory_updater.py +++ b/openviking/session/memory/memory_updater.py @@ -261,6 +261,7 @@ async def apply_operations( resolved_op.model, resolved_op.uri, ctx, + extract_context=extract_context, memory_type=resolved_op.memory_type, ) if is_edited: @@ -300,64 +301,6 @@ async def apply_operations( tracer.info(f"Memory operations applied: {result.summary()}") return result - async def _apply_write( - self, - flat_model: Any, - uri: str, - ctx: RequestContext, - extract_context: Any = None, - memory_type: str = None, - ) -> None: - """Apply write operation from a flat model.""" - viking_fs = self._get_viking_fs() - - # Convert model to dict - model_dict = flat_model_to_dict(flat_model) - - # Extract content - priority: model_dict["content"] - content = model_dict.pop("content", None) or "" - - # Get memory type schema - use passed memory_type first, then fallback to model_dict - memory_type_str = memory_type or model_dict.get("memory_type") - - field_schema_map: Dict[str, MemoryField] = {} - business_fields: Dict[str, Any] = {} - - if self._registry and memory_type_str: - schema = self._registry.get(memory_type_str) - if schema: - field_schema_map = {f.name: f for f in schema.fields} - # Extract business fields (those defined in the schema) - for field_name in field_schema_map: - if field_name in model_dict: - business_fields[field_name] = model_dict[field_name] - - # 模板渲染逻辑 - if schema.content_template: - try: - rendered_content = self._render_content_template( - schema.content_template, - business_fields, - extract_context=extract_context, - ) - if rendered_content: - content = rendered_content - except Exception as e: - tracer.warning( - f"Failed to render content template for memory type {memory_type_str}: {e}" - ) - # 渲染失败时保留原始 content,确保写入操作继续进行 - - # Collect metadata - only include business fields (from schema, except content) - metadata = business_fields.copy() - - # Serialize content with metadata - full_content = serialize_with_metadata(content, metadata) - - # Write content to VikingFS - # VikingFS automatically handles L0/L1/L2 and vector index updates - await viking_fs.write_file(uri, full_content, ctx=ctx) - def _render_content_template( self, template: str, fields: Dict[str, Any], extract_context: Any = None ) -> str: @@ -402,7 +345,12 @@ def _is_patch_format(self, content: Any) -> bool: return isinstance(content, StrPatch) async def _apply_edit( - self, flat_model: Any, uri: str, ctx: RequestContext, memory_type: str = None + self, + flat_model: Any, + uri: str, + ctx: RequestContext, + extract_context: Any = None, + memory_type: str = None, ) -> bool: """Apply edit operation from a flat model. @@ -414,6 +362,9 @@ async def _apply_edit( # Convert flat model to dict first (needed for checking content type) model_dict = flat_model_to_dict(flat_model) + # Get memory type schema - use parameter first, then fallback to model_dict + memory_type_str = memory_type or model_dict.get("memory_type") + # Read current memory try: current_full_content = await viking_fs.read_file(uri, ctx=ctx) or "" @@ -422,7 +373,57 @@ async def _apply_edit( # If no StrPatch fields, treat as write operation has_str_patch = any(self._is_patch_format(v) for v in model_dict.values()) if not has_str_patch: - await self._apply_write(flat_model, uri, ctx) + # Write operation (new file) - with template rendering + # Extract content - priority: model_dict["content"] + content = model_dict.pop("content", None) or "" + + field_schema_map: Dict[str, MemoryField] = {} + business_fields: Dict[str, Any] = {} + + if self._registry and memory_type_str: + schema = self._registry.get(memory_type_str) + if schema: + field_schema_map = {f.name: f for f in schema.fields} + # Extract business fields (those defined in the schema) + for field_name in field_schema_map: + if field_name in model_dict: + business_fields[field_name] = model_dict[field_name] + + # 模板渲染逻辑 + if schema.content_template: + try: + tracer.info( + f"[content_template] Rendering template for {memory_type_str}, " + f"business_fields={list(business_fields.keys())}, " + f"extract_context={'provided' if extract_context else 'None'}" + ) + rendered_content = self._render_content_template( + schema.content_template, + business_fields, + extract_context=extract_context, + ) + if rendered_content: + content = rendered_content + tracer.info( + f"[content_template] Rendered result (first 200 chars): {rendered_content[:200]}" + ) + else: + tracer.warning( + f"[content_template] Rendered content is empty for {memory_type_str}" + ) + except Exception as e: + tracer.error( + f"Failed to render content template for memory type {memory_type_str}: {e}" + ) + + # Collect metadata + metadata = business_fields.copy() + + # Serialize content with metadata + full_content = serialize_with_metadata(content, metadata) + + # Write content to VikingFS + await viking_fs.write_file(uri, full_content, ctx=ctx) return False # New file written # Has StrPatch field but file doesn't exist - cannot apply tracer.error(f"Memory not found for edit: {uri}") @@ -434,11 +435,44 @@ async def _apply_edit( # Get memory type schema - use parameter first, then fallback to model_dict memory_type_str = memory_type or model_dict.get("memory_type") field_schema_map: Dict[str, MemoryField] = {} + business_fields: Dict[str, Any] = {} if self._registry and memory_type_str: schema = self._registry.get(memory_type_str) if schema: field_schema_map = {f.name: f for f in schema.fields} + # Extract business fields (those defined in the schema) + for field_name in field_schema_map: + if field_name in model_dict: + business_fields[field_name] = model_dict[field_name] + + # 模板渲染逻辑(编辑时也支持) + if schema.content_template: + try: + tracer.info( + f"[content_template] Editing: Rendering template for {memory_type_str}, " + f"business_fields={list(business_fields.keys())}, " + f"extract_context={'provided' if extract_context else 'None'}" + ) + rendered_content = self._render_content_template( + schema.content_template, + business_fields, + extract_context=extract_context, + ) + if rendered_content: + # 用渲染后的 content 覆盖 model_dict 中的 content + model_dict["content"] = rendered_content + tracer.info( + f"[content_template] Edited result (first 200 chars): {rendered_content[:200]}" + ) + else: + tracer.warning( + f"[content_template] Edited render result is empty for {memory_type_str}" + ) + except Exception as e: + tracer.error( + f"Failed to render content template for edit {memory_type_str}: {e}" + ) # Apply all fields (including content) through MergeOp new_plain_content = current_plain_content From 5593631130400d890337a3dbd0d10084adbc210b Mon Sep 17 00:00:00 2001 From: chenjunwen Date: Thu, 2 Apr 2026 16:25:02 +0800 Subject: [PATCH 12/20] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8D=20lint=20?= =?UTF-8?q?=E9=94=99=E8=AF=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. tools.py: 导入缺失的 truncate_content 函数和 ToolContext 类型 2. tools.py: 修复 f-string 无占位符的问题 3. test_memory_utils.py: 删除引用不存在函数的测试 Co-Authored-By: Claude Opus 4.6 --- openviking/session/memory/tools.py | 8 +++-- tests/session/memory/test_memory_utils.py | 41 ----------------------- 2 files changed, 6 insertions(+), 43 deletions(-) diff --git a/openviking/session/memory/tools.py b/openviking/session/memory/tools.py index d28433c85..5898dc37c 100644 --- a/openviking/session/memory/tools.py +++ b/openviking/session/memory/tools.py @@ -8,14 +8,18 @@ import json from abc import ABC, abstractmethod -from typing import Any, Dict, List, Optional, Tuple, Union +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union from openviking.session.memory.utils import parse_memory_file_with_fields +from openviking.session.memory.utils.content import truncate_content from openviking.storage.viking_fs import VikingFS from openviking.telemetry import tracer from openviking_cli.exceptions import NotFoundError from openviking_cli.utils import get_logger +if TYPE_CHECKING: + from openviking.server.identity import ToolContext + logger = get_logger(__name__) @@ -193,7 +197,7 @@ async def execute( tracer.info(f"read not found: {uri}") return {"error": str(e)} except Exception as e: - tracer.error(f"Failed to execute read", e) + tracer.error(f"Failed to execute read: {e}") return {"error": str(e)} diff --git a/tests/session/memory/test_memory_utils.py b/tests/session/memory/test_memory_utils.py index eac89a567..da02d1f91 100644 --- a/tests/session/memory/test_memory_utils.py +++ b/tests/session/memory/test_memory_utils.py @@ -398,47 +398,6 @@ def test_registry(self): return registry - def test_resolve_write_uri(self, test_registry): - """Test resolving URI for write operation (dict format).""" - write_op = { - "memory_type": "preferences", - "topic": "Python code style", - "content": "Test content", - } - - uri = resolve_write_uri(write_op, test_registry) - - assert uri == "viking://user/default/memories/preferences/Python code style.md" - - def test_resolve_write_uri_unknown_type(self, test_registry): - """Test resolving write operation with unknown memory type.""" - write_op = { - "memory_type": "unknown_type", - } - - with pytest.raises(ValueError, match="Unknown memory type"): - resolve_write_uri(write_op, test_registry) - - def test_resolve_edit_target(self, test_registry): - """Test resolving target URI for edit operation.""" - uri = resolve_edit_target( - "tools", - {"tool_name": "web_search"}, - test_registry, - ) - - assert uri == "viking://agent/default/memories/tools/web_search.md" - - def test_resolve_delete_target(self, test_registry): - """Test resolving target URI for delete operation.""" - uri = resolve_delete_target( - "preferences", - {"topic": "Test topic"}, - test_registry, - ) - - assert uri == "viking://user/default/memories/preferences/Test topic.md" - def test_resolve_all_operations(self, test_registry): """Test resolving all operations at once.""" operations = MemoryOperations( From ede619264038f5e481fe265c4662ec26c9c04aff Mon Sep 17 00:00:00 2001 From: chenjunwen Date: Thu, 2 Apr 2026 16:53:18 +0800 Subject: [PATCH 13/20] update --- benchmark/RAG/run.py | 44 +-- benchmark/RAG/scripts/download_dataset.py | 84 +++-- benchmark/RAG/scripts/prepare_dataset.py | 51 +-- benchmark/RAG/scripts/run_sampling.py | 16 +- benchmark/RAG/scripts/sample_dataset.py | 329 +++++++++--------- benchmark/RAG/src/adapters/base.py | 10 +- .../RAG/src/adapters/financebench_adapter.py | 16 +- benchmark/RAG/src/adapters/locomo_adapter.py | 23 +- benchmark/RAG/src/adapters/qasper_adapter.py | 64 ++-- .../RAG/src/adapters/syllabusqa_adapter.py | 104 +++--- benchmark/RAG/src/core/judge_util.py | 2 +- benchmark/RAG/src/core/llm_client.py | 3 +- benchmark/RAG/src/core/metrics.py | 28 +- benchmark/RAG/src/core/monitor.py | 2 +- benchmark/RAG/src/core/vector_store.py | 17 +- benchmark/RAG/src/pipeline.py | 79 +++-- benchmark/locomo/openclaw/eval.py | 10 +- benchmark/locomo/vikingbot/import_to_ov.py | 12 +- benchmark/locomo/vikingbot/judge.py | 5 +- benchmark/locomo/vikingbot/run_eval.py | 9 +- .../locomo/vikingbot/stat_judge_result.py | 2 +- benchmark/skillsbench/skill_bench_eval.py | 30 +- bot/vikingbot/__main__.py | 1 - bot/vikingbot/agent/__init__.py | 2 +- bot/vikingbot/agent/memory.py | 4 +- bot/vikingbot/agent/skills.py | 3 +- bot/vikingbot/agent/subagent.py | 3 +- bot/vikingbot/agent/tools/__init__.py | 2 +- bot/vikingbot/agent/tools/filesystem.py | 8 +- bot/vikingbot/agent/tools/message.py | 4 +- bot/vikingbot/agent/tools/ov_file.py | 4 +- bot/vikingbot/agent/tools/registry.py | 3 +- bot/vikingbot/agent/tools/shell.py | 12 +- bot/vikingbot/agent/tools/spawn.py | 6 +- .../agent/tools/websearch/__init__.py | 5 +- .../agent/tools/websearch/registry.py | 2 +- bot/vikingbot/bus/queue.py | 2 +- bot/vikingbot/channels/base.py | 2 +- bot/vikingbot/channels/discord.py | 3 - bot/vikingbot/channels/feishu.py | 8 +- bot/vikingbot/channels/openapi.py | 2 +- bot/vikingbot/channels/slack.py | 4 +- bot/vikingbot/channels/telegram.py | 6 +- bot/vikingbot/channels/utils.py | 4 +- bot/vikingbot/channels/whatsapp.py | 1 - bot/vikingbot/cli/werewolf_game.py | 6 +- bot/vikingbot/config/__init__.py | 2 +- bot/vikingbot/config/loader.py | 4 +- bot/vikingbot/config/schema.py | 2 +- bot/vikingbot/console/web_console.py | 5 +- bot/vikingbot/cron/types.py | 2 - bot/vikingbot/heartbeat/service.py | 4 +- bot/vikingbot/hooks/base.py | 4 +- .../hooks/builtins/openviking_hooks.py | 1 - bot/vikingbot/hooks/manager.py | 2 +- bot/vikingbot/openviking_mount/__init__.py | 8 +- bot/vikingbot/openviking_mount/fuse_finder.py | 12 +- bot/vikingbot/openviking_mount/fuse_proxy.py | 15 +- bot/vikingbot/openviking_mount/fuse_simple.py | 12 +- .../openviking_mount/fuse_simple_debug.py | 14 +- bot/vikingbot/openviking_mount/manager.py | 8 +- bot/vikingbot/openviking_mount/mount.py | 12 +- .../openviking_mount/session_integration.py | 10 +- .../openviking_mount/user_apikey_manager.py | 2 +- .../providers/openai_compatible_provider.py | 3 +- bot/vikingbot/providers/transcription.py | 1 - bot/vikingbot/sandbox/__init__.py | 4 +- bot/vikingbot/sandbox/backends/__init__.py | 8 +- bot/vikingbot/sandbox/backends/direct.py | 9 +- bot/vikingbot/sandbox/backends/srt.py | 9 +- bot/vikingbot/sandbox/manager.py | 15 +- bot/vikingbot/session/__init__.py | 2 +- bot/vikingbot/utils/__init__.py | 12 +- bot/vikingbot/utils/helpers.py | 4 +- bot/vikingbot/utils/tracing.py | 2 +- .../github-proxy/scripts/convert_url.py | 2 +- .../skills/opencode/list_sessions.py | 3 +- .../skills/opencode/opencode_utils.py | 2 +- .../tests/e2e/test-archive-expand.py | 2 +- openviking/models/vlm/backends/litellm_vlm.py | 3 - openviking/models/vlm/backends/openai_vlm.py | 1 - .../models/vlm/backends/volcengine_vlm.py | 1 + .../parse/parsers/code/ast/languages/php.py | 2 +- openviking/session/compressor_v2.py | 3 +- openviking/telemetry/__init__.py | 2 +- .../utils/config/open_viking_config.py | 2 +- tests/agfs/conftest.py | 1 + ...compressor_v2_event_span_multiple_turns.py | 2 +- .../test_compressor_v2_tool_skill_memory.py | 6 +- .../integration/test_compressor_v2_xiaomei.py | 2 +- tests/models/vlm/test_volcengine_cache.py | 6 +- tests/parse/test_html_parser_utils.py | 1 - tests/server/test_bot_proxy_auth.py | 4 +- .../test_vectordb_collection_loading.py | 6 +- tests/unit/test_cohere_rerank.py | 2 - uv.lock | 140 +++++++- 96 files changed, 743 insertions(+), 675 deletions(-) diff --git a/benchmark/RAG/run.py b/benchmark/RAG/run.py index 0d2d0a57b..1f76d1148 100644 --- a/benchmark/RAG/run.py +++ b/benchmark/RAG/run.py @@ -1,13 +1,15 @@ +import importlib import os import sys -import yaml -import importlib from argparse import ArgumentParser from pathlib import Path +import yaml + sys.path.append(str(Path(__file__).parent)) from src.core.logger import setup_logging + # ========================================== # 1. Environment Initialization # ========================================== @@ -20,9 +22,9 @@ print(f"[Init] Auto-detected OpenViking config: {ov_config_path}") try: - from src.pipeline import BenchmarkPipeline + from src.core.llm_client import LLMClientWrapper from src.core.vector_store import VikingStoreWrapper - from src.core.llm_client import LLMClientWrapper + from src.pipeline import BenchmarkPipeline except SyntaxError as e: print(f"\n[Fatal Error] Syntax error while importing modules: {e}") sys.exit(1) @@ -59,19 +61,19 @@ def resolve_path(path_str, base_path): def main(): parser = ArgumentParser(description="Run RAG Benchmark (Smart Path Handling)") default_config_path = os.path.join(SCRIPT_DIR, "config/config.yaml") - - parser.add_argument("--config", default=default_config_path, + + parser.add_argument("--config", default=default_config_path, help=f"Path to config file. Default: {default_config_path}") - - parser.add_argument("--step", choices=["all", "gen", "eval", "del"], default="all", + + parser.add_argument("--step", choices=["all", "gen", "eval", "del"], default="all", help="Execution step: 'gen' (Retrieval+LLM), 'eval' (Judge), or 'all'") - + args = parser.parse_args() # --- B. Load and Parse Config --- config_path = os.path.abspath(args.config) print(f"[Init] Loading configuration from: {config_path}") - + try: config = load_config(config_path) except FileNotFoundError as e: @@ -82,12 +84,12 @@ def main(): print(f"[Init] Resolving paths relative to Project Root: {PROJECT_ROOT}") dataset_name = config.get('dataset_name', 'UnknownDataset') retrieval_topk = config.get('execution', {}).get('retrieval_topk', 5) - + format_vars = { 'dataset_name': dataset_name, 'retrieval_topk': retrieval_topk } - + path_keys = ['dataset_path', 'output_dir', 'vector_store', 'log_file', 'doc_output_dir'] for key in path_keys: if key in config.get('paths', {}): @@ -101,15 +103,15 @@ def main(): try: logger = setup_logging(config['paths']['log_file']) logger.info(">>> Benchmark Session Started") - + # 1. Adapter (Dynamic Loading) adapter_cfg = config.get('adapter', {}) module_path = adapter_cfg.get('module', 'src.adapters.locomo_adapter') class_name = adapter_cfg.get('class_name', 'LocomoAdapter') - + logger.info(f"Dynamically loading Adapter: {class_name} from {module_path}") logger.info(f"Loading dataset from: {config['paths']['dataset_path']}") - + try: mod = importlib.import_module(module_path) AdapterClass = getattr(mod, class_name) @@ -120,18 +122,18 @@ def main(): except AttributeError as e: logger.error(f"Class '{class_name}' not found in module '{module_path}'. Please check your config 'adapter.class_name'. Error: {e}") raise e - + # 2. Vector Store vector_store = VikingStoreWrapper(store_path=config['paths']['vector_store']) - + # 3. LLM Client api_key = os.environ.get( - config['llm'].get('api_key_env_var', ''), + config['llm'].get('api_key_env_var', ''), config['llm'].get('api_key') ) if not api_key: logger.warning("No API Key found in config or environment variables!") - + llm_client = LLMClientWrapper(config=config['llm'], api_key=api_key) # 4. Pipeline @@ -146,7 +148,7 @@ def main(): if args.step in ["all", "gen"]: logger.info("Stage: Generation (Ingest -> Retrieve -> Generate)") pipeline.run_generation() - + if args.step in ["all", "eval"]: logger.info("Stage: Evaluation (Judge -> Metrics)") pipeline.run_evaluation() @@ -154,7 +156,7 @@ def main(): if args.step in ["all", "del"]: logger.info("Stage: Delete Vector Store") pipeline.run_deletion() - + logger.info("Benchmark finished successfully.") except KeyboardInterrupt: diff --git a/benchmark/RAG/scripts/download_dataset.py b/benchmark/RAG/scripts/download_dataset.py index 3a32ff639..fe256b51e 100644 --- a/benchmark/RAG/scripts/download_dataset.py +++ b/benchmark/RAG/scripts/download_dataset.py @@ -6,12 +6,10 @@ import argparse import hashlib -import json -import os import shutil import sys from pathlib import Path -from typing import Dict, List, Optional +from typing import Dict, Optional from urllib.parse import urlparse import requests @@ -83,13 +81,13 @@ def calculate_checksum(file_path: Path, algorithm: str = "sha256") -> str: def download_file(url: str, dest_path: Path, chunk_size: int = 8192) -> bool: """Download a file with progress bar.""" dest_path.parent.mkdir(parents=True, exist_ok=True) - + try: response = requests.get(url, stream=True, timeout=30) response.raise_for_status() - + total_size = int(response.headers.get("content-length", 0)) - + with open(dest_path, "wb") as f, tqdm( desc=f"Downloading {dest_path.name}", total=total_size, @@ -111,13 +109,13 @@ def download_file(url: str, dest_path: Path, chunk_size: int = 8192) -> bool: def extract_archive(archive_path: Path, extract_to: Path, extract_subdir: Optional[str] = None) -> bool: """Extract archive file (zip, tar.gz, etc.).""" - import zipfile import tarfile - + import zipfile + try: temp_extract_dir = extract_to / ".temp_extract" temp_extract_dir.mkdir(parents=True, exist_ok=True) - + if archive_path.suffix == ".zip": with zipfile.ZipFile(archive_path, "r") as zip_ref: zip_ref.extractall(temp_extract_dir) @@ -128,7 +126,7 @@ def extract_archive(archive_path: Path, extract_to: Path, extract_subdir: Option print(f"Unsupported archive format: {archive_path.suffix}") shutil.rmtree(temp_extract_dir) return False - + if extract_subdir: source_dir = temp_extract_dir / extract_subdir if source_dir.exists() and source_dir.is_dir(): @@ -151,7 +149,7 @@ def extract_archive(archive_path: Path, extract_to: Path, extract_subdir: Option else: dest_item.unlink() shutil.move(str(item), str(dest_item)) - + shutil.rmtree(temp_extract_dir) return True except Exception as e: @@ -166,20 +164,20 @@ def verify_dataset(dataset_name: str, dataset_dir: Path) -> bool: if dataset_name not in DATASET_SOURCES: print(f"Unknown dataset: {dataset_name}") return False - + source = DATASET_SOURCES[dataset_name] missing_files = [] - + for file_path in source["files"]: full_path = dataset_dir / file_path # Check if path exists (either file or directory) if not full_path.exists(): missing_files.append(file_path) - + if missing_files: print(f"Missing files for {dataset_name}: {missing_files}") return False - + print(f"✓ {dataset_name} verified successfully") return True @@ -201,31 +199,31 @@ def download_from_url( Supports single url or multiple urls via urls field. """ dataset_dir = output_dir / dataset_name - + if dataset_dir.exists() and not force: print(f"{dataset_name} already exists at {dataset_dir}, skipping download") if verify: return verify_dataset(dataset_name, dataset_dir) return True - + print(f"Downloading {dataset_name}...") - + # Support single url or multiple urls urls = source.get("urls", [source.get("url")]) if source.get("urls") else [source.get("url")] - + success = True for url in urls: if not url: continue - + parsed_url = urlparse(url) file_name = Path(parsed_url.path).name downloaded_path = output_dir / file_name - + if not download_file(url, downloaded_path): success = False continue - + if "checksum" in source and source["checksum"]: algo, expected_checksum = source["checksum"].split(":", 1) actual_checksum = calculate_checksum(downloaded_path, algo) @@ -237,7 +235,7 @@ def download_from_url( success = False continue print(f"✓ Checksum verified for {dataset_name}") - + if is_archive_file(downloaded_path): extract_subdir = source.get("extract_subdir") if not extract_archive(downloaded_path, dataset_dir, extract_subdir): @@ -250,10 +248,10 @@ def download_from_url( dest_path = dataset_dir / file_name shutil.move(str(downloaded_path), str(dest_path)) print(f"✓ Saved single file to {dest_path}") - + if verify and not verify_dataset(dataset_name, dataset_dir): return False - + if success: print(f"✓ {dataset_name} downloaded successfully to {dataset_dir}") return success @@ -269,28 +267,28 @@ def download_dataset( if dataset_name not in DATASET_SOURCES: print(f"Unknown dataset: {dataset_name}") return False - + source = DATASET_SOURCES[dataset_name] dataset_dir = output_dir / dataset_name - + if dataset_dir.exists() and not force: print(f"{dataset_name} already exists at {dataset_dir}, skipping download") if verify: return verify_dataset(dataset_name, dataset_dir) return True - + success = download_from_url(source, output_dir, dataset_name, force, verify) - + if success and verify: return verify_dataset(dataset_name, dataset_dir) - + return success def main(): # Check if any datasets are configured configured_datasets = [k for k in DATASET_SOURCES.keys() if not k.startswith('#')] - + if not configured_datasets: print("=" * 80) print("No datasets configured!") @@ -302,7 +300,7 @@ def main(): print("See README_DATASET_CONFIG.md for detailed instructions.") print("=" * 80) return 1 - + parser = argparse.ArgumentParser( description="Download datasets for RAG benchmark" ) @@ -329,33 +327,33 @@ def main(): action="store_true", help="Skip dataset verification" ) - + args = parser.parse_args() - + output_dir = args.output_dir.resolve() output_dir.mkdir(parents=True, exist_ok=True) - + datasets = ( - configured_datasets - if args.dataset == "all" + configured_datasets + if args.dataset == "all" else [args.dataset] ) - + print(f"Downloading datasets to: {output_dir}") print(f"Datasets: {', '.join(datasets)}") print() - + success_count = 0 for dataset in datasets: if download_dataset( - dataset, - output_dir, - args.force, + dataset, + output_dir, + args.force, not args.no_verify ): success_count += 1 print() - + print(f"Download complete: {success_count}/{len(datasets)} successful") return 0 if success_count == len(datasets) else 1 diff --git a/benchmark/RAG/scripts/prepare_dataset.py b/benchmark/RAG/scripts/prepare_dataset.py index a734cee8d..56e132ef1 100644 --- a/benchmark/RAG/scripts/prepare_dataset.py +++ b/benchmark/RAG/scripts/prepare_dataset.py @@ -7,12 +7,13 @@ import argparse import sys from pathlib import Path -from typing import List, Optional +from typing import Optional sys.path.append(str(Path(__file__).parent)) -from download_dataset import download_dataset, DATASET_SOURCES as DOWNLOAD_SOURCES -from sample_dataset import sample_dataset, DATASET_SAMPLERS +from download_dataset import DATASET_SOURCES as DOWNLOAD_SOURCES +from download_dataset import download_dataset +from sample_dataset import DATASET_SAMPLERS, sample_dataset def prepare_dataset( @@ -32,9 +33,9 @@ def prepare_dataset( print("\n" + "=" * 80) print(f"Preparing dataset: {dataset_name}") print("=" * 80) - + success = True - + # Step 1: Download if not skip_download: print("\n[Step 1/2] Downloading dataset...") @@ -49,16 +50,16 @@ def prepare_dataset( success = False else: print("\n[Step 1/2] Skipping download (--skip-download)") - + # Step 2: Sample if not skip_sampling and success: print("\n[Step 2/2] Sampling dataset...") input_dir = download_dir / dataset_name dataset_output_dir = output_dir / dataset_name - + actual_sample_size = None if use_full else sample_size actual_num_docs = None if use_full else num_docs - + sample_success = sample_dataset( dataset_name, input_dir, @@ -73,7 +74,7 @@ def prepare_dataset( success = False elif skip_sampling: print("\n[Step 2/2] Skipping sampling (--skip-sampling)") - + return success @@ -99,7 +100,7 @@ def main(): python prepare_dataset.py --skip-sampling """ ) - + # Dataset selection parser.add_argument( "--dataset", "-d", @@ -108,7 +109,7 @@ def main(): default="all", help="Dataset to prepare (default: all)" ) - + # Directories parser.add_argument( "--download-dir", @@ -122,7 +123,7 @@ def main(): default=Path(__file__).parent.parent / "datasets", help="Directory for final prepared datasets (default: datasets/)" ) - + # Sampling options parser.add_argument( "--sample-size", "-n", @@ -154,7 +155,7 @@ def main(): default="random", help="Sampling mode: 'random' (default) for random sampling, 'stratified' for stratified sampling by category" ) - + # Skip options parser.add_argument( "--skip-download", @@ -166,39 +167,39 @@ def main(): action="store_true", help="Skip sampling step" ) - + # Force options parser.add_argument( "--force-download", "-f", action="store_true", help="Force re-download even if dataset exists" ) - + args = parser.parse_args() - + # Validate dataset choices available_datasets = set(DOWNLOAD_SOURCES.keys()) & set(DATASET_SAMPLERS.keys()) if args.dataset != "all" and args.dataset not in available_datasets: print(f"Error: Dataset '{args.dataset}' not available") print(f"Available datasets: {', '.join(sorted(available_datasets))}") return 1 - + # Handle --full flag - use full dataset, no sampling if args.full: args.sample_size = None args.num_docs = None - + # Resolve paths download_dir = args.download_dir.resolve() output_dir = args.output_dir.resolve() - + # Determine datasets to process datasets = ( - sorted(available_datasets) - if args.dataset == "all" + sorted(available_datasets) + if args.dataset == "all" else [args.dataset] ) - + # Print configuration print("=" * 80) print("RAG Benchmark - Unified Dataset Preparation") @@ -214,7 +215,7 @@ def main(): print(f"Skip sampling: {args.skip_sampling}") print(f"Force download: {args.force_download}") print("=" * 80) - + # Prepare datasets success_count = 0 for dataset in datasets: @@ -232,13 +233,13 @@ def main(): args.sample_mode ): success_count += 1 - + # Final summary print("\n" + "=" * 80) print("Preparation Complete") print("=" * 80) print(f"Success: {success_count}/{len(datasets)} datasets") - + if success_count == len(datasets): print("\n✅ All datasets prepared successfully!") print(f"\nPrepared datasets are in: {output_dir}") diff --git a/benchmark/RAG/scripts/run_sampling.py b/benchmark/RAG/scripts/run_sampling.py index b615a5cb3..8ca22dbc3 100644 --- a/benchmark/RAG/scripts/run_sampling.py +++ b/benchmark/RAG/scripts/run_sampling.py @@ -12,14 +12,14 @@ def main(): input_dir = Path(__file__).parent.parent / "raw_data" output_dir = Path(__file__).parent.parent / "datasets" - + print("=" * 60) print("Running sampling for all datasets with custom parameters") print("=" * 60) - + success_count = 0 total_count = 0 - + # Locomo: 3 documents, 80 QAs, stratified total_count += 1 print("\n" + "=" * 60) @@ -35,7 +35,7 @@ def main(): sample_mode="stratified" ): success_count += 1 - + # SyllabusQA: 7 documents, 90 QAs, stratified total_count += 1 print("\n" + "=" * 60) @@ -51,7 +51,7 @@ def main(): sample_mode="stratified" ): success_count += 1 - + # Qasper: 8 documents, 60 QAs, stratified total_count += 1 print("\n" + "=" * 60) @@ -67,7 +67,7 @@ def main(): sample_mode="stratified" ): success_count += 1 - + # FinanceBench: 3 documents, 12 QAs, stratified total_count += 1 print("\n" + "=" * 60) @@ -83,11 +83,11 @@ def main(): sample_mode="stratified" ): success_count += 1 - + print("\n" + "=" * 60) print(f"Sampling complete: {success_count}/{total_count} successful") print("=" * 60) - + return 0 if success_count == total_count else 1 diff --git a/benchmark/RAG/scripts/sample_dataset.py b/benchmark/RAG/scripts/sample_dataset.py index d7b4b7f7a..f73d85462 100644 --- a/benchmark/RAG/scripts/sample_dataset.py +++ b/benchmark/RAG/scripts/sample_dataset.py @@ -6,12 +6,11 @@ import argparse import json -import os import random import shutil import sys from pathlib import Path -from typing import Any, Dict, List, Optional, Tuple, Callable +from typing import Any, Dict, List, Optional, Tuple sys.path.append(str(Path(__file__).parent.parent)) @@ -67,29 +66,29 @@ def calculate_category_targets( num_categories = len(categories) if num_categories == 0: return {}, True - + base_per_category = sample_size // num_categories remainder = sample_size % num_categories - + if base_per_category == 0: if print_info: print(f"Warning: Sample size {sample_size} is too small for {num_categories} categories") print("Falling back to random sampling") return {}, True - + category_targets = {} for i, cat in enumerate(categories): category_targets[cat] = base_per_category + (1 if i < remainder else 0) - + if remainder > 0 and print_info: print(f"Cannot split {sample_size} QAs evenly into {num_categories} categories") print(f"Distributing {remainder} extra QA(s) to first {remainder} category(ies)") - + if print_info: print("Category targets:") for cat in categories: print(f" {cat}: {category_targets[cat]} QAs") - + return category_targets, False @@ -113,9 +112,9 @@ def stratified_sample_with_reallocation( """ random.seed(seed) categories = sorted(category_qas.keys()) - + category_targets, should_fallback = calculate_category_targets(sample_size, categories, print_info) - + if should_fallback: all_qas = [] for qas in category_qas.values(): @@ -124,33 +123,33 @@ def stratified_sample_with_reallocation( if len(all_qas) > sample_size: return random.sample(all_qas, sample_size) return all_qas - + sampled_items = [] remaining_quota = sample_size - + category_actual = {} for cat in categories: if cat not in category_targets or category_targets[cat] <= 0: category_actual[cat] = 0 continue - + cat_qas = category_qas[cat].copy() random.shuffle(cat_qas) sample_count = min(len(cat_qas), category_targets[cat]) category_actual[cat] = sample_count remaining_quota -= sample_count sampled_items.extend(cat_qas[:sample_count]) - + if remaining_quota > 0 and print_info: print(f"Reallocating remaining {remaining_quota} QA(s) to categories with available QAs") - + category_available = {} for cat in categories: if cat in category_qas: total_available = len(category_qas[cat]) used = category_actual.get(cat, 0) category_available[cat] = total_available - used - + while remaining_quota > 0: allocated_this_round = 0 for cat in categories: @@ -167,17 +166,17 @@ def stratified_sample_with_reallocation( remaining_quota -= 1 allocated_this_round += 1 break - + if allocated_this_round == 0: if print_info: print(f"Warning: No more QAs available to sample. Stopping with {remaining_quota} unallocated.") break - + if print_info: print("Actual category counts after reallocation:") for cat in categories: print(f" {cat}: {category_actual.get(cat, 0)} QAs") - + return sampled_items @@ -227,48 +226,48 @@ def sample_docs_stratified( """ random.seed(seed) categories = sorted(category_qas.keys()) - + category_targets, should_fallback = calculate_category_targets(sample_size, categories, print_info) - + if should_fallback: return sample_docs_random(sample_size, doc_category_qas, all_doc_ids, seed, print_info) - + selected_docs = [] - selected_qas_by_cat = {cat: 0 for cat in categories} - doc_used = {doc_id: False for doc_id in all_doc_ids} - + selected_qas_by_cat = dict.fromkeys(categories, 0) + doc_used = dict.fromkeys(all_doc_ids, False) + for cat in categories: target = category_targets[cat] if target == 0: continue - + cat_qas = category_qas[cat].copy() random.shuffle(cat_qas) - + for doc_id, qa in cat_qas: if doc_used[doc_id]: continue - + doc_cat_qas = doc_category_qas[doc_id] new_count = selected_qas_by_cat[cat] + len(doc_cat_qas.get(cat, [])) if new_count > target: continue - + selected_docs.append(doc_id) doc_used[doc_id] = True - + for c, qs in doc_cat_qas.items(): selected_qas_by_cat[c] += len(qs) - + if selected_qas_by_cat[cat] >= target: break - + total_selected = sum(selected_qas_by_cat.values()) if print_info: print(f"Sampled {len(selected_docs)} documents with {total_selected} QAs") for cat in categories: print(f" {cat}: {selected_qas_by_cat[cat]} QAs (target: {category_targets[cat]})") - + return selected_docs @@ -295,26 +294,26 @@ def sample_docs_random( random.seed(seed) shuffled_docs = all_doc_ids.copy() random.shuffle(shuffled_docs) - + selected_docs = [] selected_qas_count = 0 - + for doc_id in shuffled_docs: doc_qas = doc_qas_count.get(doc_id, 0) - + if doc_qas == 0: continue - + if selected_qas_count + doc_qas <= sample_size or not selected_docs: selected_docs.append(doc_id) selected_qas_count += doc_qas else: if selected_qas_count >= sample_size: break - + if print_info: print(f"Sampled {len(selected_docs)} documents with {selected_qas_count} QAs (seed={seed})") - + return selected_docs @@ -330,14 +329,14 @@ def sample_locomo( input_file = input_dir / "locomo10.json" if not input_file.exists(): raise FileNotFoundError(f"locomo10.json not found at {input_file}") - + data = load_json_data(input_file) if not isinstance(data, list): data = [data] - + original_num_docs = len(data) print(f"Locomo original size: {original_num_docs} documents") - + category_qas = {} doc_category_qas = [] for doc in data: @@ -353,14 +352,14 @@ def sample_locomo( doc_cat_qas[cat] = [] doc_cat_qas[cat].append(q) doc_category_qas.append(doc_cat_qas) - + total_qas = sum(len(qas) for qas in category_qas.values()) categories = sorted(category_qas.keys()) print(f"Total QAs (excluding category 5): {total_qas}") print(f"Categories: {categories}") for cat in categories: print(f" Category {cat}: {len(category_qas[cat])} QAs") - + is_full = (sample_size is None and num_docs is None) if is_full: selected_docs = data @@ -374,13 +373,13 @@ def sample_locomo( random.seed(seed) selected_docs = random.sample(data, num_docs) print(f"Sampled {len(selected_docs)} documents (seed={seed})") - + if sample_size is not None: print(f"Further sampling {sample_size} QAs from selected documents (mode: {sample_mode})") - + selected_doc_category_qas = {} selected_doc_indices = [data.index(doc) for doc in selected_docs] - + for doc_idx in selected_doc_indices: doc = data[doc_idx] doc_cat_qas = doc_category_qas[doc_idx] @@ -389,12 +388,12 @@ def sample_locomo( selected_doc_category_qas[cat] = [] for q in qs: selected_doc_category_qas[cat].append((doc_idx, q)) - + if sample_mode == "stratified": sampled_q_tuples = stratified_sample_with_reallocation( sample_size, selected_doc_category_qas, seed ) - + keep_q_indices = set() for doc_idx, q in sampled_q_tuples: doc = data[doc_idx] @@ -402,7 +401,7 @@ def sample_locomo( if qa_item == q: keep_q_indices.add((doc_idx, q_idx)) break - + for doc in selected_docs: doc_idx = data.index(doc) new_qas = [] @@ -410,7 +409,7 @@ def sample_locomo( if (doc_idx, q_idx) in keep_q_indices or str(q.get("category")) == "5": new_qas.append(q) doc["qa"] = new_qas - + if sample_mode == "random": all_valid_q_indices = [] for doc_idx_in_selected, doc in enumerate(selected_docs): @@ -422,10 +421,10 @@ def sample_locomo( if qa_item == q: all_valid_q_indices.append((doc_idx_in_selected, q_idx)) break - + sampled_q_indices = random_sample_qas(sample_size, all_valid_q_indices, seed) keep_q_indices = set(sampled_q_indices) - + for doc_idx_in_selected, doc in enumerate(selected_docs): new_qas = [] for q_idx, q in enumerate(doc.get("qa", [])): @@ -439,7 +438,7 @@ def sample_locomo( for qs in doc_cat_qas.values(): count += len(qs) doc_qas_count[doc_idx] = count - + if sample_mode == "stratified": print(f"Using stratified sampling (seed={seed})") category_qas_with_indices = {} @@ -448,15 +447,15 @@ def sample_locomo( for doc, q in qas: doc_idx = data.index(doc) category_qas_with_indices[cat].append((doc_idx, q)) - + doc_category_qas_dict = {i: d for i, d in enumerate(doc_category_qas)} all_doc_indices = list(range(len(data))) - + selected_doc_indices = sample_docs_stratified( sample_size, category_qas_with_indices, doc_category_qas_dict, all_doc_indices, seed ) selected_docs = [data[i] for i in selected_doc_indices] - + if sample_mode == "random": print(f"Using random sampling (seed={seed})") all_doc_indices = list(range(len(data))) @@ -464,18 +463,18 @@ def sample_locomo( sample_size, doc_qas_count, all_doc_indices, seed ) selected_docs = [data[i] for i in selected_doc_indices] - + output_data = selected_docs output_file = output_dir / "locomo10.json" save_json_data(output_data, output_file) - + sampled_qas = 0 for doc in selected_docs: if "qa" in doc: for q in doc["qa"]: if str(q.get("category")) != "5": sampled_qas += 1 - + metadata = { "dataset": "Locomo", "original_num_docs": original_num_docs, @@ -489,7 +488,7 @@ def sample_locomo( "is_full": is_full, "note": "Category 5 questions are excluded from QA count" } - + return metadata @@ -502,13 +501,13 @@ def sample_syllabusqa( sample_mode: str = "random" ) -> Dict[str, Any]: """Sample SyllabusQA dataset with stratified sampling support.""" - from collections import defaultdict import csv - + from collections import defaultdict + dataset_split_dir = input_dir / "data" / "dataset_split" if not dataset_split_dir.exists(): raise FileNotFoundError(f"data/dataset_split not found at {dataset_split_dir}") - + all_data = [] csv_files = ["train.csv", "val.csv", "test.csv"] for csv_file in csv_files: @@ -520,12 +519,12 @@ def sample_syllabusqa( for item in file_data: item["_source_file"] = csv_file all_data.extend(file_data) - + doc_groups = defaultdict(list) for item in all_data: syllabus_name = item.get("syllabus_name", "unknown") doc_groups[syllabus_name].append(item) - + category_qas = {} doc_category_qas = {} for doc_name, items in doc_groups.items(): @@ -540,10 +539,10 @@ def sample_syllabusqa( doc_cat_qas[q_type] = [] doc_cat_qas[q_type].append(item) doc_category_qas[doc_name] = doc_cat_qas - + total_valid_qas = sum(len(qas) for qas in category_qas.values()) categories = sorted(category_qas.keys()) - + doc_valid_qas = {} for doc_name, items in doc_groups.items(): valid_count = 0 @@ -551,7 +550,7 @@ def sample_syllabusqa( if item.get("question_type") != "no answer": valid_count += 1 doc_valid_qas[doc_name] = valid_count - + all_doc_names = list(doc_groups.keys()) original_num_docs = len(all_doc_names) original_total_qas = len(all_data) @@ -560,7 +559,7 @@ def sample_syllabusqa( print(f"Categories: {categories}") for cat in categories: print(f" {cat}: {len(category_qas[cat])} QAs") - + is_full = (sample_size is None and num_docs is None) if is_full: selected_docs = all_doc_names @@ -574,10 +573,10 @@ def sample_syllabusqa( random.seed(seed) selected_docs = random.sample(all_doc_names, num_docs) print(f"Sampled {len(selected_docs)} documents (seed={seed})") - + if sample_size is not None: print(f"Further sampling {sample_size} QAs from selected documents (mode: {sample_mode})") - + selected_doc_category_qas = {} for doc_name in selected_docs: doc_cat_qas = doc_category_qas[doc_name] @@ -586,24 +585,24 @@ def sample_syllabusqa( selected_doc_category_qas[cat] = [] for item in items: selected_doc_category_qas[cat].append(item) - + if sample_mode == "stratified": sampled_items = stratified_sample_with_reallocation( sample_size, selected_doc_category_qas, seed ) - + for doc_name in selected_docs: doc_items = doc_groups[doc_name] for item in doc_items: if item.get("question_type") == "no answer": sampled_items.append(item) - + new_doc_groups = defaultdict(list) for item in sampled_items: doc_name = item.get("syllabus_name", "unknown") new_doc_groups[doc_name].append(item) doc_groups = new_doc_groups - + if sample_mode == "random": all_valid_items = [] for doc_name in selected_docs: @@ -611,15 +610,15 @@ def sample_syllabusqa( for item in items: if item.get("question_type") != "no answer": all_valid_items.append(item) - + sampled_items = random_sample_qas(sample_size, all_valid_items, seed) - + for doc_name in selected_docs: items = doc_groups[doc_name] for item in items: if item.get("question_type") == "no answer": sampled_items.append(item) - + new_doc_groups = defaultdict(list) for item in sampled_items: doc_name = item.get("syllabus_name", "unknown") @@ -631,17 +630,17 @@ def sample_syllabusqa( selected_docs = sample_docs_stratified( sample_size, category_qas, doc_category_qas, all_doc_names, seed ) - + if sample_mode == "random": print(f"Using random sampling (seed={seed})") selected_docs = sample_docs_random( sample_size, doc_valid_qas, all_doc_names, seed ) - + selected_data = [] for doc_name in selected_docs: selected_data.extend(doc_groups[doc_name]) - + output_dir.mkdir(parents=True, exist_ok=True) for csv_file in csv_files: file_data = [item for item in selected_data if item.get("_source_file") == csv_file] @@ -655,12 +654,12 @@ def sample_syllabusqa( writer.writeheader() writer.writerows(file_data) print(f"Saved {len(file_data)} samples to {csv_file}") - + syllabi_src = input_dir / "syllabi" syllabi_dst = output_dir / "syllabi" if syllabi_src.exists(): syllabi_dst.mkdir(parents=True, exist_ok=True) - + syllabus_names = set() for doc_name in selected_docs: items = doc_groups[doc_name] @@ -668,15 +667,15 @@ def sample_syllabusqa( syllabus_name = item.get("syllabus_name") if syllabus_name: syllabus_names.add(syllabus_name) - + print(f"Copying syllabi for {len(syllabus_names)} unique syllabus files") - + for subdir in ["pdf", "text", "word"]: src_subdir = syllabi_src / "syllabi_redacted" / subdir dst_subdir = syllabi_dst / "syllabi_redacted" / subdir if src_subdir.exists(): dst_subdir.mkdir(parents=True, exist_ok=True) - + for syllabus_name in syllabus_names: for ext in [".pdf", ".txt", ".docx"]: src_file = src_subdir / f"{syllabus_name}{ext}" @@ -684,14 +683,14 @@ def sample_syllabusqa( shutil.copy2(src_file, dst_subdir / f"{syllabus_name}{ext}") print(f"Copied {subdir}/{syllabus_name}{ext}") break - + sampled_valid_qas = 0 for doc_name in selected_docs: items = doc_groups[doc_name] for item in items: if item.get("question_type") != "no answer": sampled_valid_qas += 1 - + metadata = { "dataset": "SyllabusQA", "original_num_docs": original_num_docs, @@ -707,7 +706,7 @@ def sample_syllabusqa( "is_full": is_full, "note": "'no answer' type questions are excluded from QA count" } - + return metadata @@ -723,10 +722,10 @@ def sample_qasper( json_files = ["qasper-train-v0.3.json", "qasper-dev-v0.3.json", "qasper-test-v0.3.json"] all_paper_ids = [] paper_data_map = {} - + category_qas = {} paper_category_qas = {} - + for json_file in json_files: file_path = input_dir / json_file if file_path.exists(): @@ -734,7 +733,7 @@ def sample_qasper( for paper_id, paper_data in data.items(): all_paper_ids.append(paper_id) paper_data_map[paper_id] = (paper_data, json_file) - + paper_cat_qas = {} if "qas" in paper_data: for qa_item in paper_data["qas"]: @@ -744,7 +743,7 @@ def sample_qasper( ) if is_unanswerable: continue - + answer_types = set() for ans in qa_item.get("answers", []): ans_obj = ans.get("answer", {}) @@ -756,28 +755,28 @@ def sample_qasper( answer_types.add("free_form") elif ans_obj.get("yes_no") is not None: answer_types.add("yes_no") - + primary_type = next(iter(answer_types), "extractive") if primary_type not in category_qas: category_qas[primary_type] = [] category_qas[primary_type].append((paper_id, qa_item)) - + if primary_type not in paper_cat_qas: paper_cat_qas[primary_type] = [] paper_cat_qas[primary_type].append(qa_item) - + paper_category_qas[paper_id] = paper_cat_qas - + original_num_docs = len(all_paper_ids) print(f"Qasper original size: {original_num_docs} documents (from {len(json_files)} files)") - + total_qas = sum(len(qas) for qas in category_qas.values()) categories = sorted(category_qas.keys()) print(f"Total QAs (excluding unanswerable): {total_qas}") print(f"Categories: {categories}") for cat in categories: print(f" {cat}: {len(category_qas[cat])} QAs") - + is_full = (sample_size is None and num_docs is None) if is_full: selected_ids = all_paper_ids @@ -795,22 +794,22 @@ def sample_qasper( for cat_qas in paper_category_qas[paper_id].values(): count += len(cat_qas) paper_qas_count[paper_id] = count - + random.seed(seed) shuffled_papers = all_paper_ids.copy() random.shuffle(shuffled_papers) shuffled_papers.sort(key=lambda pid: paper_qas_count[pid], reverse=True) - + selected_ids = shuffled_papers[:num_docs] print(f"Sampled {len(selected_ids)} documents with highest QA counts (seed={seed})") else: random.seed(seed) selected_ids = random.sample(all_paper_ids, num_docs) print(f"Sampled {len(selected_ids)} documents (seed={seed})") - + if sample_size is not None: print(f"Further sampling {sample_size} QAs from selected documents (mode: {sample_mode})") - + qa_with_indices = [] for paper_id in selected_ids: paper_data, source_file = paper_data_map[paper_id] @@ -833,49 +832,49 @@ def sample_qasper( answer_types.add("yes_no") primary_type = next(iter(answer_types), "extractive") qa_with_indices.append((paper_id, primary_type, i, qa_item)) - + if sample_mode == "stratified": selected_doc_category_qas = {} for paper_id, cat, i, qa_item in qa_with_indices: if cat not in selected_doc_category_qas: selected_doc_category_qas[cat] = [] selected_doc_category_qas[cat].append((paper_id, i, qa_item)) - + category_targets, should_fallback = calculate_category_targets( sample_size, sorted(selected_doc_category_qas.keys()) ) - + if not should_fallback: random.seed(seed) sampled_qas_indices = set() remaining_quota = sample_size - + category_actual = {} cats = sorted(selected_doc_category_qas.keys()) for cat in cats: if cat not in category_targets or category_targets[cat] <= 0: category_actual[cat] = 0 continue - + cat_qas = selected_doc_category_qas[cat].copy() random.shuffle(cat_qas) sample_count = min(len(cat_qas), category_targets[cat]) category_actual[cat] = sample_count remaining_quota -= sample_count - + for paper_id, i, qa_item in cat_qas[:sample_count]: sampled_qas_indices.add((paper_id, i)) - + if remaining_quota > 0: print(f"Reallocating remaining {remaining_quota} QA(s) to categories with available QAs") - + category_available = {} for cat in cats: if cat in selected_doc_category_qas: total_available = len(selected_doc_category_qas[cat]) used = category_actual.get(cat, 0) category_available[cat] = total_available - used - + while remaining_quota > 0: allocated_this_round = 0 for cat in cats: @@ -892,19 +891,19 @@ def sample_qasper( remaining_quota -= 1 allocated_this_round += 1 break - + if allocated_this_round == 0: print(f"Warning: No more QAs available to sample. Stopping with {remaining_quota} unallocated.") break - + print("Actual category counts after reallocation:") for cat in cats: print(f" {cat}: {category_actual.get(cat, 0)} QAs") - + for paper_id in selected_ids: paper_data, source_file = paper_data_map[paper_id] new_qas = [] - + for i, qa_item in enumerate(paper_data.get("qas", [])): is_unanswerable = all( ans.get("answer", {}).get("unanswerable", False) @@ -912,16 +911,16 @@ def sample_qasper( ) if is_unanswerable or (paper_id, i) in sampled_qas_indices: new_qas.append(qa_item) - + paper_data["qas"] = new_qas - + if sample_mode == "random": sampled_qas = random_sample_qas(sample_size, qa_with_indices, seed) - + keep_qas_indices = set() for paper_id, cat, i, qa_item in sampled_qas: keep_qas_indices.add((paper_id, i)) - + for paper_id in selected_ids: paper_data, source_file = paper_data_map[paper_id] new_qas = [] @@ -940,19 +939,19 @@ def sample_qasper( for cat_qas in paper_category_qas[paper_id].values(): count += len(cat_qas) paper_qas_count[paper_id] = count - + if sample_mode == "stratified": print(f"Using stratified sampling (seed={seed})") selected_ids = sample_docs_stratified( sample_size, category_qas, paper_category_qas, all_paper_ids, seed ) - + if sample_mode == "random": print(f"Using random sampling (seed={seed})") selected_ids = sample_docs_random( sample_size, paper_qas_count, all_paper_ids, seed ) - + output_dir.mkdir(parents=True, exist_ok=True) data_by_file = {} for paper_id in selected_ids: @@ -960,12 +959,12 @@ def sample_qasper( if source_file not in data_by_file: data_by_file[source_file] = {} data_by_file[source_file][paper_id] = paper_data - + for json_file, output_data in data_by_file.items(): output_file = output_dir / json_file save_json_data(output_data, output_file) print(f"Saved {len(output_data)} papers to {json_file}") - + sampled_qas = 0 for paper_id in selected_ids: paper_data, source_file = paper_data_map[paper_id] @@ -977,7 +976,7 @@ def sample_qasper( ) if not is_unanswerable: sampled_qas += 1 - + metadata = { "dataset": "Qasper", "original_num_docs": original_num_docs, @@ -991,7 +990,7 @@ def sample_qasper( "is_full": is_full, "note": "Unanswerable questions are excluded from QA count" } - + return metadata @@ -1005,18 +1004,18 @@ def sample_financebench( ) -> Dict[str, Any]: """Sample Financebench dataset with stratified sampling support.""" from collections import defaultdict - + input_file = input_dir / "data" / "financebench_open_source.jsonl" if not input_file.exists(): raise FileNotFoundError(f"financebench_open_source.jsonl not found at {input_file}") - + data = load_jsonl_data(input_file) - + doc_groups = defaultdict(list) for item in data: doc_name = item.get("doc_name", "unknown") doc_groups[doc_name].append(item) - + category_qas = {} doc_category_qas = {} for doc_name, items in doc_groups.items(): @@ -1030,7 +1029,7 @@ def sample_financebench( doc_cat_qas[q_type] = [] doc_cat_qas[q_type].append(item) doc_category_qas[doc_name] = doc_cat_qas - + all_doc_names = list(doc_groups.keys()) original_num_docs = len(all_doc_names) original_total_qas = len(data) @@ -1040,7 +1039,7 @@ def sample_financebench( print(f"Categories: {categories}") for cat in categories: print(f" {cat}: {len(category_qas[cat])} QAs") - + is_full = (sample_size is None and num_docs is None) if is_full: selected_docs = all_doc_names @@ -1059,10 +1058,10 @@ def sample_financebench( print("Selected documents:") for doc in selected_docs: print(f" {doc}: {len(doc_groups[doc])} QAs") - + if sample_size is not None: print(f"Further sampling {sample_size} QAs from selected documents (mode: {sample_mode})") - + selected_doc_category_qas = {} for doc_name in selected_docs: doc_cat_qas = doc_category_qas[doc_name] @@ -1071,26 +1070,26 @@ def sample_financebench( selected_doc_category_qas[cat] = [] for item in items: selected_doc_category_qas[cat].append(item) - + if sample_mode == "stratified": sampled_items = stratified_sample_with_reallocation( sample_size, selected_doc_category_qas, seed ) - + new_doc_groups = defaultdict(list) for item in sampled_items: doc_name = item.get("doc_name", "unknown") new_doc_groups[doc_name].append(item) doc_groups = new_doc_groups - + if sample_mode == "random": all_items = [] for doc_name in selected_docs: items = doc_groups[doc_name] all_items.extend(items) - + sampled_items = random_sample_qas(sample_size, all_items, seed) - + new_doc_groups = defaultdict(list) for item in sampled_items: doc_name = item.get("doc_name", "unknown") @@ -1098,38 +1097,38 @@ def sample_financebench( doc_groups = new_doc_groups else: doc_qas_count = {doc_name: len(items) for doc_name, items in doc_groups.items()} - + if sample_mode == "stratified": print(f"Using stratified sampling (seed={seed})") selected_docs = sample_docs_stratified( sample_size, category_qas, doc_category_qas, all_doc_names, seed ) - + if sample_mode == "random": print(f"Using random sampling (seed={seed})") selected_docs = sample_docs_random( sample_size, doc_qas_count, all_doc_names, seed ) - + selected_data = [] for doc_name in selected_docs: selected_data.extend(doc_groups[doc_name]) - + output_file = output_dir / "financebench_open_source.jsonl" save_jsonl_data(selected_data, output_file) - + pdfs_src = input_dir / "pdfs" pdfs_dst = output_dir / "pdfs" - + if pdfs_src.exists(): pdfs_dst.mkdir(parents=True, exist_ok=True) - + for doc_name in selected_docs: src_pdf = pdfs_src / f"{doc_name}.pdf" if src_pdf.exists(): shutil.copy2(src_pdf, pdfs_dst / f"{doc_name}.pdf") print(f"Copied PDF: {doc_name}.pdf") - + metadata = { "dataset": "Financebench", "original_num_docs": original_num_docs, @@ -1142,7 +1141,7 @@ def sample_financebench( "sample_mode": sample_mode, "is_full": is_full } - + return metadata @@ -1167,19 +1166,19 @@ def sample_dataset( if dataset_name not in DATASET_SAMPLERS: print(f"Unknown dataset: {dataset_name}") return False - + print(f"\nProcessing {dataset_name}...") print(f"Input: {input_dir}") print(f"Output: {output_dir}") - + try: sampler = DATASET_SAMPLERS[dataset_name] metadata = sampler(input_dir, output_dir, sample_size, num_docs, seed, sample_mode) - + metadata_file = output_dir / "sampling_metadata.json" save_json_data(metadata, metadata_file) print(f"✓ Saved metadata to {metadata_file}") - + return True except Exception as e: print(f"Error sampling {dataset_name}: {e}") @@ -1241,22 +1240,22 @@ def main(): default="stratified", help="Sampling mode (default: stratified)" ) - + args = parser.parse_args() - + if args.full: args.sample_size = None args.num_docs = None - + input_dir = args.input_dir.resolve() output_dir = args.output_dir.resolve() - + datasets = ( - list(DATASET_SAMPLERS.keys()) - if args.dataset == "all" + list(DATASET_SAMPLERS.keys()) + if args.dataset == "all" else [args.dataset] ) - + print("=" * 60) print("RAG Benchmark Dataset Sampler") print("=" * 60) @@ -1268,12 +1267,12 @@ def main(): print(f"Random seed: {args.seed}") print(f"Datasets: {', '.join(datasets)}") print("=" * 60) - + success_count = 0 for dataset in datasets: dataset_input_dir = input_dir / dataset dataset_output_dir = output_dir / dataset - + if sample_dataset( dataset, dataset_input_dir, @@ -1284,11 +1283,11 @@ def main(): args.sample_mode ): success_count += 1 - + print("\n" + "=" * 60) print(f"Sampling complete: {success_count}/{len(datasets)} successful") print("=" * 60) - + return 0 if success_count == len(datasets) else 1 diff --git a/benchmark/RAG/src/adapters/base.py b/benchmark/RAG/src/adapters/base.py index 954b22273..05b0fc3e6 100644 --- a/benchmark/RAG/src/adapters/base.py +++ b/benchmark/RAG/src/adapters/base.py @@ -1,8 +1,8 @@ +import sys from abc import ABC, abstractmethod from dataclasses import dataclass, field -from typing import List, Dict, Any, Union, Optional -import sys from pathlib import Path +from typing import Any, Dict, List, Optional, Union sys.path.append(str(Path(__file__).parent.parent)) @@ -27,7 +27,7 @@ class StandardSample: metadata: Dict[str, Any] = field(default_factory=dict) -@dataclass +@dataclass class StandardDoc: """Standardized sampleid to doc_path mapping structure""" sample_id:str @@ -36,7 +36,7 @@ class StandardDoc: class BaseAdapter(ABC): """Base class for all dataset adapters""" - + def __init__(self, raw_file_path: str): self.raw_file_path = raw_file_path self.logger = get_logger() @@ -60,7 +60,7 @@ def load_and_transform(self) -> List[StandardSample]: Must be implemented by subclasses. """ pass - + @abstractmethod def build_prompt(self, qa: StandardQA, context_blocks: List[str]) -> tuple[str, Dict[str, Any]]: """ diff --git a/benchmark/RAG/src/adapters/financebench_adapter.py b/benchmark/RAG/src/adapters/financebench_adapter.py index 40923e18f..4a44905ab 100644 --- a/benchmark/RAG/src/adapters/financebench_adapter.py +++ b/benchmark/RAG/src/adapters/financebench_adapter.py @@ -9,14 +9,14 @@ import json import os +import sys from collections import defaultdict -from typing import List, Dict, Any from pathlib import Path -import sys +from typing import Any, Dict, List sys.path.append(str(Path(__file__).parent)) -from base import BaseAdapter, StandardDoc, StandardSample, StandardQA +from base import BaseAdapter, StandardDoc, StandardQA, StandardSample CATEGORY_INSTRUCTIONS = { "domain-relevant": """Answer the financial question based on the document. @@ -24,14 +24,14 @@ - If numerical, include units (e.g., USD millions, %) - Provide concise, direct answer - Do NOT invent information""", - + "metrics-generated": """Calculate the financial metric based on the document. - Use ONLY numbers from the context - Show your calculations clearly - Round to appropriate decimal places - Include units (e.g., USD millions, %) - Do NOT invent numbers""", - + "novel-generated": """Answer the financial question based on the document. - Use ONLY facts from the context - If numerical, include units (e.g., USD millions, %) @@ -136,10 +136,10 @@ def load_and_transform(self) -> List[StandardSample]: def build_prompt(self, qa: StandardQA, context_blocks: List[str]) -> tuple[str, Dict[str, Any]]: context_text = "\n\n".join(context_blocks) - + category = qa.category category_instruction = CATEGORY_INSTRUCTIONS.get(category, "") - + if category_instruction: full_prompt = f"""{context_text} @@ -158,7 +158,7 @@ def build_prompt(self, qa: StandardQA, context_blocks: List[str]) -> tuple[str, Question: {qa.question} Answer:""" - + meta = { "question_type": qa.category, "financebench_id": qa.metadata.get("financebench_id"), diff --git a/benchmark/RAG/src/adapters/locomo_adapter.py b/benchmark/RAG/src/adapters/locomo_adapter.py index 326e29a53..fbcd51aee 100644 --- a/benchmark/RAG/src/adapters/locomo_adapter.py +++ b/benchmark/RAG/src/adapters/locomo_adapter.py @@ -1,10 +1,9 @@ # src/adapters/locomo_adapter.py import json import os -from typing import List, Dict, Any - -from .base import BaseAdapter, StandardDoc, StandardSample, StandardQA +from typing import Any, Dict, List +from .base import BaseAdapter, StandardDoc, StandardQA, StandardSample MISSING_RULE = "If no information is available to answer the question, write 'Not mentioned'." @@ -13,19 +12,19 @@ "1": """Extract the exact factual answer from the conversation. - Use the exact words from the context when possible - If multiple items, separate with commas""", - + "2": """Answer the time-related question. - Pay close attention to DATE labels in the conversation - Calculate relative time (e.g., "10 years ago") when needed - Use the exact dates from the context""", - + "3": """Reason and infer based on the conversation. - Use ONLY the facts in the context - State your conclusion clearly (e.g., "Likely yes", "Probably no") - Do NOT explain your reasoning or provide any basis/justification - Only output your final conclusion, nothing else - Do NOT invent information""", - + "4": """Understand the meaning and significance. - Focus on what the speakers mean, not just what they say - Identify symbolism or implied meaning @@ -136,20 +135,20 @@ def _convert_conversation_to_markdown(self, sample_id: str, conv: Dict[str, Any] txt = turn.get("text", "") raw_id = turn.get("dia_id") or turn.get("id") - + image_suffix = "" img_url = turn.get("img_url", []) blip_caption = turn.get("blip_caption", "") - + if img_url and blip_caption: if len(img_url) == 1: image_suffix = f"[Attached image:{blip_caption}]" else: for i, caption in enumerate([blip_caption] * len(img_url)): image_suffix += f"[Attached image {i+1}:{caption}]" - + dia_suffix = f" [{raw_id}]" if raw_id else "" - + md_lines.append(f"**{spk}**: {txt}{image_suffix}{dia_suffix}") session_idx += 1 @@ -159,9 +158,9 @@ def _convert_conversation_to_markdown(self, sample_id: str, conv: Dict[str, Any] def build_prompt(self, qa: StandardQA, context_blocks: List[str]) -> tuple[str, Dict[str, Any]]: category = str(qa.category) context_text = "\n\n".join(context_blocks) - + category_instruction = CATEGORY_INSTRUCTIONS.get(category, "") - + if category_instruction: full_prompt = f"""{context_text} diff --git a/benchmark/RAG/src/adapters/qasper_adapter.py b/benchmark/RAG/src/adapters/qasper_adapter.py index b17a5a46e..0ec31a5a3 100644 --- a/benchmark/RAG/src/adapters/qasper_adapter.py +++ b/benchmark/RAG/src/adapters/qasper_adapter.py @@ -23,9 +23,9 @@ import json import os -from typing import List, Dict, Any +from typing import Any, Dict, List -from .base import BaseAdapter, StandardDoc, StandardSample, StandardQA +from .base import BaseAdapter, StandardDoc, StandardQA, StandardSample # Specific instructions for different answer types CATEGORY_INSTRUCTIONS = { @@ -33,13 +33,13 @@ - Use EXACT wording from the context - Do NOT rephrase or add explanation - Provide concise, direct answer""", - + "free_form": """Answer using information from the paper. - Use ONLY facts from the context - You may rephrase or summarize in your own words - Provide clear, complete answer - Do NOT invent information""", - + "yes_no": """Answer Yes/No question based on the paper. - First respond "Yes" or "No" - Do NOT add explanation @@ -62,7 +62,7 @@ class QasperAdapter(BaseAdapter): raw_file_path: Raw JSON data file path logger: Logger """ - + def data_prepare(self, doc_dir: str) -> List[StandardDoc]: """ Load raw data and convert to OpenViking-friendly format. @@ -99,7 +99,7 @@ def data_prepare(self, doc_dir: str) -> List[StandardDoc]: data = json.load(f) os.makedirs(doc_dir, exist_ok=True) - + for paper_id, paper_data in data.items(): doc_content = self._convert_paper_to_markdown(paper_id, paper_data) @@ -155,36 +155,36 @@ def load_and_transform(self) -> List[StandardSample]: for paper_id, paper_data in data.items(): qa_pairs = [] paper_title = paper_data.get("title", "Unknown Title") - + for qa_item in paper_data.get("qas", []): # --- Unanswerable filtering logic --- # Check if all answers are marked as unanswerable is_unanswerable = all( - ans.get("answer", {}).get("unanswerable", False) + ans.get("answer", {}).get("unanswerable", False) for ans in qa_item.get("answers", []) ) if is_unanswerable: continue # ------------------ - + raw_question = qa_item.get("question", "") question_id = qa_item.get("question_id", "") # Append paper title to question for easier retrieval question = f'Based on the paper "{paper_title}", {raw_question}' - + gold_answers = [] evidence_list = [] answer_types = [] answer_evidence_pairs = [] - + # Iterate through all annotator answers for answer_wrapper in qa_item.get("answers", []): answer_obj = answer_wrapper.get("answer", {}) - + current_answer = None answer_type = self._get_answer_type(answer_obj) - + # Process different answer types if answer_obj.get("unanswerable", False): current_answer = "Not mentioned" @@ -193,7 +193,7 @@ def load_and_transform(self) -> List[StandardSample]: extractive_spans = answer_obj.get("extractive_spans", []) free_form_answer = answer_obj.get("free_form_answer", "") yes_no = answer_obj.get("yes_no") - + if extractive_spans: valid_spans = [span.strip() for span in extractive_spans if span and span.strip()] if valid_spans: @@ -208,7 +208,7 @@ def load_and_transform(self) -> List[StandardSample]: elif yes_no is not None: current_answer = "Yes" if yes_no else "No" gold_answers.append(current_answer) - + # Collect evidence text current_evidence = [] evidence = answer_obj.get("evidence", []) @@ -217,11 +217,11 @@ def load_and_transform(self) -> List[StandardSample]: current_evidence.append(ev) if ev not in evidence_list: evidence_list.append(ev) - + # Record answer type (deduplicated) if answer_type not in answer_types: answer_types.append(answer_type) - + # Save answer-evidence correspondence if current_answer: answer_evidence_pairs.append({ @@ -229,14 +229,14 @@ def load_and_transform(self) -> List[StandardSample]: "evidence": current_evidence, "answer_type": answer_type }) - + # If no answers, default to "Not mentioned" if not gold_answers: gold_answers = ["Not mentioned"] - + # Deduplicate (preserve order) gold_answers = list(dict.fromkeys(gold_answers)) - + qa_pairs.append(StandardQA( question=question, gold_answers=gold_answers, @@ -255,7 +255,7 @@ def load_and_transform(self) -> List[StandardSample]: )) return standard_samples - + def _get_answer_type(self, answer_obj: Dict[str, Any]) -> str: """ Determine answer type from answer object. @@ -318,33 +318,33 @@ def _convert_paper_to_markdown(self, paper_id: str, paper_data: Dict[str, Any]) str: Markdown formatted paper content """ md_lines = [] - + # Title title = paper_data.get("title", "Unknown Title") md_lines.append(f"# {title}") md_lines.append(f"Paper ID: {paper_id}\n") - + # Abstract abstract = paper_data.get("abstract", "") if abstract: md_lines.append("## Abstract") md_lines.append(abstract) md_lines.append("") - + # Main text sections full_text = paper_data.get("full_text", []) for section in full_text: section_name = section.get("section_name", "") paragraphs = section.get("paragraphs", []) - + if section_name: md_lines.append(f"## {section_name}") - + for para in paragraphs: if para and para.strip(): md_lines.append(para.strip()) md_lines.append("") - + # Figure and table information figures_and_tables = paper_data.get("figures_and_tables", []) if figures_and_tables: @@ -352,13 +352,13 @@ def _convert_paper_to_markdown(self, paper_id: str, paper_data: Dict[str, Any]) for idx, fig in enumerate(figures_and_tables, 1): caption = fig.get("caption", "") file_name = fig.get("file", "") - + # Determine if figure or table based on filename or caption if "Figure" in file_name or "figure" in caption.lower(): md_lines.append(f"### Figure {idx}") else: md_lines.append(f"### Table {idx}") - + if caption: md_lines.append(f"Caption: {caption}") if file_name: @@ -369,12 +369,12 @@ def _convert_paper_to_markdown(self, paper_id: str, paper_data: Dict[str, Any]) def build_prompt(self, qa: StandardQA, context_blocks: List[str]) -> tuple[str, Dict[str, Any]]: context_text = "\n\n".join(context_blocks) if context_blocks else "No relevant context found." - + answer_types = qa.metadata.get("answer_types", []) primary_type = answer_types[0] if answer_types else None - + category_instruction = CATEGORY_INSTRUCTIONS.get(primary_type, "") - + if category_instruction: full_prompt = f"""{context_text} diff --git a/benchmark/RAG/src/adapters/syllabusqa_adapter.py b/benchmark/RAG/src/adapters/syllabusqa_adapter.py index 69fcf3fb9..1ce02c39e 100644 --- a/benchmark/RAG/src/adapters/syllabusqa_adapter.py +++ b/benchmark/RAG/src/adapters/syllabusqa_adapter.py @@ -25,12 +25,12 @@ - post_process_answer: Post-process LLM output """ +import csv import json import os -import csv -from typing import List, Dict, Any +from typing import Any, Dict, List -from .base import BaseAdapter, StandardDoc, StandardSample, StandardQA +from .base import BaseAdapter, StandardDoc, StandardQA, StandardSample # Rule for when answer cannot be found MISSING_RULE = "If no information is available to answer the question, write 'Not mentioned'." @@ -41,27 +41,27 @@ - Use EXACT wording from context when possible - Provide concise, direct answer - Do NOT add extra info or explanation""", - + "multi factual": """Extract multiple factual answers from the syllabus. - Use EXACT wording from context when possible - List items separated by commas - Include all relevant facts""", - + "single reasoning": """Answer using simple logical reasoning based on the syllabus. - Use ONLY facts from context - Make clear, direct conclusion - Do NOT explain reasoning - Do NOT invent information""", - + "multi reasoning": """Answer using reasoning based on the syllabus. - Use ONLY facts from context - Do NOT invent information""", - + "summarization": """Summarize relevant information from the syllabus. - Provide concise summary covering key points - Use wording from syllabus when possible - Include all important details""", - + "yes/no": """Answer Yes/No question based on the syllabus. - First respond "Yes" or "No" - Do NOT add explanation @@ -82,7 +82,7 @@ class SyllabusQAAdapter(BaseAdapter): syllabus_dir: docx file directory path logger: Logger """ - + def __init__(self, raw_file_path: str, **kwargs): """ Initialize SyllabusQAAdapter. @@ -97,7 +97,7 @@ def __init__(self, raw_file_path: str, **kwargs): base_dir = raw_file_path else: base_dir = os.path.dirname(raw_file_path) - + # Check for official repo structure first official_syllabus_dir = os.path.join(base_dir, 'syllabi', 'syllabi_redacted', 'word') if os.path.exists(official_syllabus_dir): @@ -105,7 +105,7 @@ def __init__(self, raw_file_path: str, **kwargs): else: # Fallback to original structure self.syllabus_dir = os.path.join(base_dir, 'syllabi') - + def data_prepare(self, doc_dir: str) -> List[StandardDoc]: """ Load raw docx files and convert to OpenViking-friendly format. @@ -127,27 +127,27 @@ def data_prepare(self, doc_dir: str) -> List[StandardDoc]: res: List[StandardDoc] = [] os.makedirs(doc_dir, exist_ok=True) - + # Get list of syllabus_name mentioned in CSV required_syllabi = self._get_required_syllabi() self.logger.info(f"[SyllabusQAAdapter] Required syllabi from CSV: {len(required_syllabi)}") - + # Get all docx files docx_files = [f for f in os.listdir(self.syllabus_dir) if f.endswith('.docx')] - + for docx_file in docx_files: syllabus_id = docx_file.replace('.docx', '') - + # Only process syllabi mentioned in CSV if syllabus_id not in required_syllabi: continue - + docx_path = os.path.join(self.syllabus_dir, docx_file) - + try: # Convert docx to Markdown doc_content = self._convert_docx_to_markdown(docx_path) - + doc_path = os.path.join(doc_dir, f"{syllabus_id}_doc.md") with open(doc_path, "w", encoding="utf-8") as f: f.write(doc_content) @@ -159,7 +159,7 @@ def data_prepare(self, doc_dir: str) -> List[StandardDoc]: self.logger.warning("python-docx not installed, skipping docx conversion") break raise e - + self.logger.info(f"[SyllabusQAAdapter] Processed {len(res)} syllabus documents") return res @@ -171,28 +171,28 @@ def _get_required_syllabi(self) -> set: set: syllabus_name set """ required = set() - + # Determine data source type if self.raw_file_path.endswith('.json'): # Load from JSON if not os.path.exists(self.raw_file_path): return required - + with open(self.raw_file_path, 'r', encoding='utf-8') as f: data = json.load(f) - + for syllabus_name in data.keys(): if syllabus_name: required.add(syllabus_name) elif self.raw_file_path.endswith('.csv'): csv_files = [self.raw_file_path] elif os.path.isdir(self.raw_file_path): - csv_files = [os.path.join(self.raw_file_path, f) - for f in os.listdir(self.raw_file_path) + csv_files = [os.path.join(self.raw_file_path, f) + for f in os.listdir(self.raw_file_path) if f.endswith('.csv')] else: return required - + # Process CSV files if any if 'csv_files' in locals(): for csv_file in csv_files: @@ -205,7 +205,7 @@ def _get_required_syllabi(self) -> set: syllabus_name = row.get('syllabus_name', '') if syllabus_name: required.add(syllabus_name) - + return required def _convert_docx_to_markdown(self, docx_path: str) -> str: @@ -222,15 +222,15 @@ def _convert_docx_to_markdown(self, docx_path: str) -> str: from docx import Document except ImportError: raise ImportError("python-docx is required. Install with: pip install python-docx") - + doc = Document(docx_path) md_lines = [] - + # Extract filename as title filename = os.path.basename(docx_path).replace('.docx', '') md_lines.append(f"# {filename}") md_lines.append("") - + # Iterate through all paragraphs for para in doc.paragraphs: text = para.text.strip() @@ -246,7 +246,7 @@ def _convert_docx_to_markdown(self, docx_path: str) -> str: else: md_lines.append(text) md_lines.append("") - + # Extract tables for table in doc.tables: md_lines.append("## Table") @@ -254,7 +254,7 @@ def _convert_docx_to_markdown(self, docx_path: str) -> str: cells = [cell.text.strip() for cell in row.cells] md_lines.append("| " + " | ".join(cells) + " |") md_lines.append("") - + return "\n".join(md_lines) def load_and_transform(self) -> List[StandardSample]: @@ -281,8 +281,8 @@ def load_and_transform(self) -> List[StandardSample]: return self._load_from_csv([self.raw_file_path]) elif os.path.isdir(self.raw_file_path): # Directory, find all CSV files - csv_files = [os.path.join(self.raw_file_path, f) - for f in os.listdir(self.raw_file_path) + csv_files = [os.path.join(self.raw_file_path, f) + for f in os.listdir(self.raw_file_path) if f.endswith('.csv')] return self._load_from_csv(csv_files) else: @@ -302,24 +302,24 @@ def _load_from_json(self) -> List[StandardSample]: for syllabus_name, qa_list in data.items(): qa_pairs = [] - + for qa_item in qa_list: question = qa_item.get("question", "") answer = qa_item.get("answer", "") question_type = qa_item.get("question_type", "") qa_id = qa_item.get("id", "") - + # Skip "no answer" type questions as RAG results cannot be evaluated if question_type == "no answer": continue - + # Collect answer_span as evidence evidence = [] for i in range(1, 6): span = qa_item.get(f"answer_span_{i}", "") if span and span.strip(): evidence.append(span.strip()) - + # Collect reasoning_steps, also as evidence (for reasoning type questions) reasoning_steps = [] for i in range(1, 6): @@ -329,10 +329,10 @@ def _load_from_json(self) -> List[StandardSample]: # reasoning_steps also added to evidence for recall calculation if step.strip() not in evidence: evidence.append(step.strip()) - + # Format question formatted_question = f'Based on the syllabus "{syllabus_name}", {question}' - + qa_pairs.append(StandardQA( question=formatted_question, gold_answers=[answer] if answer else ["Not mentioned"], @@ -365,12 +365,12 @@ def _load_from_csv(self, csv_files: List[str]) -> List[StandardSample]: """ # Group by syllabus_name syllabus_qa_map: Dict[str, List] = {} - + for csv_file in csv_files: if not os.path.exists(csv_file): self.logger.warning(f"CSV file not found: {csv_file}") continue - + with open(csv_file, 'r', encoding='utf-8') as f: reader = csv.DictReader(f) for row in reader: @@ -378,29 +378,29 @@ def _load_from_csv(self, csv_files: List[str]) -> List[StandardSample]: if syllabus_name not in syllabus_qa_map: syllabus_qa_map[syllabus_name] = [] syllabus_qa_map[syllabus_name].append(row) - + standard_samples = [] - + for syllabus_name, qa_list in syllabus_qa_map.items(): qa_pairs = [] - + for qa_item in qa_list: question = qa_item.get("question", "") answer = qa_item.get("answer", "") question_type = qa_item.get("question_type", "") qa_id = qa_item.get("id", "") - + # Skip "no answer" type questions as RAG results cannot be evaluated if question_type == "no answer": continue - + # Collect answer_span as evidence evidence = [] for i in range(1, 6): span = qa_item.get(f"answer_span_{i}", "") if span and span.strip(): evidence.append(span.strip()) - + # Collect reasoning_steps, also as evidence (for reasoning type questions) reasoning_steps = [] for i in range(1, 6): @@ -410,10 +410,10 @@ def _load_from_csv(self, csv_files: List[str]) -> List[StandardSample]: # reasoning_steps also added to evidence for recall calculation if step.strip() not in evidence: evidence.append(step.strip()) - + # Format question formatted_question = f'Based on the syllabus "{syllabus_name}", {question}' - + qa_pairs.append(StandardQA( question=formatted_question, gold_answers=[answer] if answer else ["Not mentioned"], @@ -455,11 +455,11 @@ def build_prompt(self, qa: StandardQA, context_blocks: List[str]) -> tuple[str, """ eff_q = qa.question category = qa.category - + category_instruction = CATEGORY_INSTRUCTIONS.get(category, "") - + context_text = "\n\n".join(context_blocks) - + if category_instruction: full_prompt = f"{context_text}\n\n{category_instruction}\n\n{MISSING_RULE}\n\nQuestion: {eff_q}\n\nAnswer:" else: diff --git a/benchmark/RAG/src/core/judge_util.py b/benchmark/RAG/src/core/judge_util.py index b5549a65a..716175373 100644 --- a/benchmark/RAG/src/core/judge_util.py +++ b/benchmark/RAG/src/core/judge_util.py @@ -28,7 +28,7 @@ def llm_grader( score = 0 reasoning = "No reasoning provided." prompt_type = "Generic_0-4" - + # Handle case when gold_answer is a list if isinstance(gold_answer, list): gold_answer_str = " | ".join(gold_answer) diff --git a/benchmark/RAG/src/core/llm_client.py b/benchmark/RAG/src/core/llm_client.py index a18505b14..1a2db8af9 100644 --- a/benchmark/RAG/src/core/llm_client.py +++ b/benchmark/RAG/src/core/llm_client.py @@ -1,6 +1,7 @@ import time -from langchain_openai import ChatOpenAI + from langchain_core.messages import HumanMessage +from langchain_openai import ChatOpenAI class LLMClientWrapper: diff --git a/benchmark/RAG/src/core/metrics.py b/benchmark/RAG/src/core/metrics.py index d40d8de1e..51b7a3231 100644 --- a/benchmark/RAG/src/core/metrics.py +++ b/benchmark/RAG/src/core/metrics.py @@ -1,6 +1,6 @@ +import collections import re import string -import collections from typing import List @@ -8,7 +8,7 @@ class MetricsCalculator: @staticmethod def normalize_answer(s): """Normalize answer text: remove punctuation, convert to lowercase, remove articles""" - s = str(s).replace(',', "") + s = str(s).replace(',', "") def remove_articles(text): return re.sub(r'\b(a|an|the|and)\b', ' ', text) def white_space_fix(text): return ' '.join(text.split()) def remove_punc(text): @@ -53,34 +53,34 @@ def check_recall(retrieved_texts: List[str], evidence_list: List[str], soft_thre Returns: float, retrieval recall score, range 0.0 to 1.0 """ - if not evidence_list: - return 0.0 - + if not evidence_list: + return 0.0 + combined_retrieved = " ".join(retrieved_texts) - + normalized_retrieved = MetricsCalculator.normalize_answer(combined_retrieved) ret_tokens = set(normalized_retrieved.split()) - + hit_count = 0 - + for evidence in evidence_list: if evidence in combined_retrieved: hit_count += 1 continue - + normalized_ev = MetricsCalculator.normalize_answer(evidence) ev_tokens = set(normalized_ev.split()) - + if not ev_tokens: continue - + if len(ev_tokens) < min_soft_match_tokens: continue - + overlap_count = len(ev_tokens & ret_tokens) coverage = overlap_count / len(ev_tokens) - + if coverage >= soft_threshold: hit_count += 1 - + return hit_count / len(evidence_list) diff --git a/benchmark/RAG/src/core/monitor.py b/benchmark/RAG/src/core/monitor.py index 4c25fb300..17971a825 100644 --- a/benchmark/RAG/src/core/monitor.py +++ b/benchmark/RAG/src/core/monitor.py @@ -34,7 +34,7 @@ def get_status_dict(self): """Return real-time status dictionary for tqdm progress bar display""" elapsed = time.time() - self.stats.start_time qps = self.stats.completed_tasks / elapsed if elapsed > 0 else 0 - + tokens = self.stats.total_tokens if tokens > 1_000_000: token_str = f"{tokens/1_000_000:.1f}M" diff --git a/benchmark/RAG/src/core/vector_store.py b/benchmark/RAG/src/core/vector_store.py index feb06fe21..13c6322f5 100644 --- a/benchmark/RAG/src/core/vector_store.py +++ b/benchmark/RAG/src/core/vector_store.py @@ -1,13 +1,14 @@ import os -import time -from typing import List import sys +import time from pathlib import Path +from typing import List sys.path.append(str(Path(__file__).parent.parent)) -from adapters.base import StandardDoc, StandardSample import tiktoken +from adapters.base import StandardDoc + import openviking as ov @@ -16,9 +17,9 @@ def __init__(self, store_path: str): self.store_path = store_path if not os.path.exists(store_path): os.makedirs(store_path) - + self.client = ov.SyncOpenViking(path=store_path) - + try: self.enc = tiktoken.get_encoding("cl100k_base") except Exception as e: @@ -35,14 +36,14 @@ def ingest(self, samples: List[StandardDoc], max_workers=10, monitor=None, inges total_input_tokens = 0 total_output_tokens = 0 total_embedding_tokens = 0 - + if not samples: return { "time": time.time() - start_time, "input_tokens": 0, "output_tokens": 0 } - + if ingest_mode == "directory": doc_paths = [os.path.abspath(s.doc_path) for s in samples] common_ancestor = None @@ -51,7 +52,7 @@ def ingest(self, samples: List[StandardDoc], max_workers=10, monitor=None, inges common_ancestor = os.path.commonpath(doc_paths) except ValueError: common_ancestor = None - + if common_ancestor: result = self.client.add_resource(common_ancestor, wait=True, telemetry=True) telemetry = result.get("telemetry", {}) diff --git a/benchmark/RAG/src/pipeline.py b/benchmark/RAG/src/pipeline.py index 54bd67937..3992f90f9 100644 --- a/benchmark/RAG/src/pipeline.py +++ b/benchmark/RAG/src/pipeline.py @@ -1,21 +1,20 @@ -import os import json +import os +import sys import time -import random -import re from concurrent.futures import ThreadPoolExecutor, as_completed -from tqdm import tqdm from pathlib import Path -import sys + +from tqdm import tqdm sys.path.append(str(Path(__file__).parent)) from adapters.base import BaseAdapter +from core.judge_util import llm_grader from core.logger import get_logger -from core.vector_store import VikingStoreWrapper -from core.monitor import BenchmarkMonitor from core.metrics import MetricsCalculator -from core.judge_util import llm_grader +from core.monitor import BenchmarkMonitor +from core.vector_store import VikingStoreWrapper class BenchmarkPipeline: @@ -26,14 +25,14 @@ def __init__(self, config, adapter: BaseAdapter, vector_db: VikingStoreWrapper, self.llm = llm self.logger = get_logger() self.monitor = BenchmarkMonitor() - + self.output_dir = self.config['paths']['output_dir'] if not os.path.exists(self.output_dir): os.makedirs(self.output_dir, exist_ok=True) self.generated_file = os.path.join(self.output_dir, "generated_answers.json") self.eval_file = os.path.join(self.output_dir, "qa_eval_detailed_results.json") self.report_file = os.path.join(self.output_dir, "benchmark_metrics_report.json") - + self.metrics_summary = { "insertion": {"time": 0, "input_tokens": 0, "output_tokens": 0, "embedding_tokens": 0}, "deletion": {"time": 0, "input_tokens": 0, "output_tokens": 0, "embedding_tokens": 0} @@ -59,17 +58,17 @@ def run_generation(self): ingest_workers = self.config['execution'].get('ingest_workers', 10) ingest_mode = self.config['execution'].get('ingest_mode', 'per_file') - + mode_desc = { 'directory': 'Unified directory mode', 'per_file': 'Per-file mode' } self.logger.info(f"Ingestion mode: {ingest_mode} ({mode_desc.get(ingest_mode, 'Unknown mode')})") self.logger.info(f"Number of documents: {len(doc_info)}") - + ingest_stats = self.db.ingest( - doc_info, - max_workers=ingest_workers, + doc_info, + max_workers=ingest_workers, monitor=self.monitor, ingest_mode=ingest_mode ) @@ -84,19 +83,19 @@ def run_generation(self): "Total Embedding Tokens": self.metrics_summary["insertion"].get("embedding_tokens", 0) } }) - - samples = self.adapter.load_and_transform() + + samples = self.adapter.load_and_transform() tasks = self._prepare_tasks(samples) results_map = {} max_workers = self.config['execution']['max_workers'] task_errors = [] - + with ThreadPoolExecutor(max_workers=max_workers) as executor: future_to_task = { - executor.submit(self._process_generation_task, task): task + executor.submit(self._process_generation_task, task): task for task in tasks } - + pbar = tqdm(total=len(tasks), desc="Generating Answers", unit="task") for future in as_completed(future_to_task): task = future_to_task[future] @@ -149,13 +148,13 @@ def run_evaluation(self): eval_items = items eval_results_map = {} - + with ThreadPoolExecutor(max_workers=self.config['execution']['max_workers']) as executor: future_to_item = { - executor.submit(self._process_evaluation_task, item): item + executor.submit(self._process_evaluation_task, item): item for item in eval_items } - + pbar = tqdm(total=len(eval_items), desc="Evaluating", unit="item") for future in as_completed(future_to_item): try: @@ -219,7 +218,7 @@ def _process_generation_task(self, task): self.monitor.worker_start() try: qa = task['qa'] - + t0 = time.time() # Get retrieval instruction from config, default to empty retrieval_instruction = self.config['execution'].get('retrieval_instruction', '') @@ -233,22 +232,22 @@ def _process_generation_task(self, task): self.logger.debug(f"[Query-{task['id']}] No retrieval instruction, using raw query") search_res = self.db.retrieve(query=enhanced_query, topk=self.config['execution']['retrieval_topk']) latency = time.time() - t0 - + retrieved_texts = [] retrieved_uris = [] context_blocks = [] - + for r in search_res.resources: retrieved_uris.append(r.uri) content = self.db.read_resource(r.uri) if getattr(r, 'level', 2) == 2 else f"{getattr(r, 'abstract', '')}\n{getattr(r, 'overview', '')}" retrieved_texts.append(content) clean = content[:8000] context_blocks.append(clean) - + recall = MetricsCalculator.check_recall(retrieved_texts, qa.evidence) - + full_prompt, meta = self.adapter.build_prompt(qa, context_blocks) - + ans_raw = self.llm.generate(full_prompt) ans = self.adapter.post_process_answer(qa, ans_raw, meta) @@ -256,7 +255,7 @@ def _process_generation_task(self, task): in_tokens = self.db.count_tokens(full_prompt) + self.db.count_tokens(qa.question) out_tokens = self.db.count_tokens(ans) self.monitor.worker_end(tokens=in_tokens + out_tokens) - + self.logger.info(f"[Query-{task['id']}] Q: {qa.question[:30]}... | Recall: {recall:.2f} | Latency: {latency:.2f}s") return { @@ -282,31 +281,31 @@ def _process_evaluation_task(self, item): This correctly handles multi-annotator scenarios while maintaining compatibility with single-answer datasets (like Locomo). """ ans, golds = item['llm']['final_answer'], item['gold_answers'] - + f1 = max((MetricsCalculator.calculate_f1(ans, gt) for gt in golds), default=0.0) - + dataset_name = self.config.get('dataset_name', 'Unknown_Dataset') - + eval_record = { "score": 0.0, "reasoning": "", "prompt_type": "" } - + try: eval_res = llm_grader( - self.llm.llm, - self.config['llm']['model'], - item['question'], + self.llm.llm, + self.config['llm']['model'], + item['question'], golds, ans, dataset_name=dataset_name ) eval_record = eval_res - + except Exception as e: self.logger.error(f"Grader error: {e}") - + if MetricsCalculator.check_refusal(ans) and any(MetricsCalculator.check_refusal(gt) for gt in golds): f1 = 1.0 eval_record["score"] = 4.0 @@ -316,7 +315,7 @@ def _process_evaluation_task(self, item): acc = eval_record["score"] item["metrics"].update({"F1": f1, "Accuracy": acc}) - + item["llm_evaluation"] = { "prompt_used": eval_record["prompt_type"], "reasoning": eval_record["reasoning"], @@ -324,7 +323,7 @@ def _process_evaluation_task(self, item): } detailed_info = ( - f"\n" + "="*60 + + "\n" + "="*60 + f"\n[Query ID]: {item['_global_index']}" f"\n[Question]: {item['question']}" f"\n[Retrieved URIs]: {item['retrieval'].get('uris', [])}" diff --git a/benchmark/locomo/openclaw/eval.py b/benchmark/locomo/openclaw/eval.py index 744d441eb..fb8f89ad6 100644 --- a/benchmark/locomo/openclaw/eval.py +++ b/benchmark/locomo/openclaw/eval.py @@ -379,7 +379,7 @@ def run_ingest( if args.clear_ingest_record: ingest_record = {} save_ingest_record(ingest_record) - print(f"[INFO] All existing ingest records cleared", file=sys.stderr) + print("[INFO] All existing ingest records cleared", file=sys.stderr) else: ingest_record = load_ingest_record() @@ -416,7 +416,7 @@ def run_ingest( if args.viking: try: viking_ingest(msg) - print(f" -> [viking] saved", file=sys.stderr) + print(" -> [viking] saved", file=sys.stderr) results.append({ "sample_id": sample_id, "session": meta["session_key"], @@ -488,7 +488,7 @@ def run_ingest( # Save ingest record save_ingest_record(ingest_record) total_processed = len(results) + skipped_count - print(f"\n=== Ingest summary ===", file=sys.stderr) + print("\n=== Ingest summary ===", file=sys.stderr) print(f"Total sessions: {total_processed}", file=sys.stderr) print(f"Completed: {len(results)}", file=sys.stderr) print(f"Skipped (already ingested): {skipped_count}", file=sys.stderr) @@ -569,7 +569,7 @@ def run_sample_qa( qas = filtered_qas if not qas: print(f"\n=== Sample {sample_id} [{sample_idx}] (user={user_key}) ===", file=sys.stderr) - print(f" All QA questions already executed, skipping sample.", file=sys.stderr) + print(" All QA questions already executed, skipping sample.", file=sys.stderr) return [], {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0} jsonl_path = f"{args.output}.{sample_idx}.jsonl" if args.output else None @@ -762,7 +762,7 @@ def run_qa( samples = load_locomo_data(args.input, args.sample) print(f" user: {args.user or 'eval-{sample_idx}'}", file=sys.stderr) - print(f" running in single-thread mode", file=sys.stderr) + print(" running in single-thread mode", file=sys.stderr) # Load already executed records from CSV csv_path = f"{args.output}.csv" if args.output else "qa_results.csv" diff --git a/benchmark/locomo/vikingbot/import_to_ov.py b/benchmark/locomo/vikingbot/import_to_ov.py index 94a69d8ec..2a149188a 100644 --- a/benchmark/locomo/vikingbot/import_to_ov.py +++ b/benchmark/locomo/vikingbot/import_to_ov.py @@ -20,7 +20,7 @@ import traceback from datetime import datetime, timedelta from pathlib import Path -from typing import List, Dict, Any, Tuple, Optional +from typing import Any, Dict, List, Optional, Tuple import openviking as ov @@ -451,7 +451,7 @@ async def run_import(args: argparse.Namespace) -> None: if args.clear_ingest_record: ingest_record = {} save_ingest_record(ingest_record) - print(f"[INFO] All existing ingest records cleared", file=sys.stderr) + print("[INFO] All existing ingest records cleared", file=sys.stderr) else: ingest_record = load_ingest_record() @@ -538,7 +538,7 @@ async def process_sample(item): "txt", session_key, ingest_record, success_keys ): print( - f" [SKIP] already imported (use --force-ingest to reprocess)", file=sys.stderr + " [SKIP] already imported (use --force-ingest to reprocess)", file=sys.stderr ) skipped_count += 1 continue @@ -586,12 +586,12 @@ async def process_sample(item): # Final summary total_processed = success_count + error_count + skipped_count - print(f"\n=== Import summary ===", file=sys.stderr) + print("\n=== Import summary ===", file=sys.stderr) print(f"Total sessions: {total_processed}", file=sys.stderr) print(f"Successfully imported: {success_count}", file=sys.stderr) print(f"Failed: {error_count}", file=sys.stderr) print(f"Skipped (already imported): {skipped_count}", file=sys.stderr) - print(f"\n=== Token usage summary ===", file=sys.stderr) + print("\n=== Token usage summary ===", file=sys.stderr) print(f"Total Embedding tokens: {total_embedding_tokens}", file=sys.stderr) print(f"Total VLM tokens: {total_vlm_tokens}", file=sys.stderr) if success_count > 0: @@ -600,7 +600,7 @@ async def process_sample(item): file=sys.stderr, ) print(f"Average VLM per session: {total_vlm_tokens // success_count}", file=sys.stderr) - print(f"\nResults saved to:", file=sys.stderr) + print("\nResults saved to:", file=sys.stderr) print(f" - Success records: {args.success_csv}", file=sys.stderr) print(f" - Error logs: {args.error_log}", file=sys.stderr) diff --git a/benchmark/locomo/vikingbot/judge.py b/benchmark/locomo/vikingbot/judge.py index 0b2e171f6..e811b3e32 100644 --- a/benchmark/locomo/vikingbot/judge.py +++ b/benchmark/locomo/vikingbot/judge.py @@ -1,10 +1,11 @@ import argparse +import asyncio import csv import json import os -import asyncio -from openai import AsyncOpenAI + from dotenv import load_dotenv +from openai import AsyncOpenAI load_dotenv() diff --git a/benchmark/locomo/vikingbot/run_eval.py b/benchmark/locomo/vikingbot/run_eval.py index 1799aec49..1a613d5bd 100644 --- a/benchmark/locomo/vikingbot/run_eval.py +++ b/benchmark/locomo/vikingbot/run_eval.py @@ -1,11 +1,10 @@ import argparse -import json -import subprocess -import time import csv +import json import os -import re +import subprocess import threading +import time from concurrent.futures import ThreadPoolExecutor, as_completed @@ -88,7 +87,7 @@ def run_vikingbot_chat(question: str) -> tuple[str, dict, float, int, list]: time_cost = resp_json.get("time_cost", time_cost) iteration = resp_json.get("iteration", 0) tools_used_names = resp_json.get("tools_used_names", []) - except (json.JSONDecodeError, ValueError) as e: + except (json.JSONDecodeError, ValueError): response = f"[PARSE ERROR] {output}" token_usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0} iteration = 0 diff --git a/benchmark/locomo/vikingbot/stat_judge_result.py b/benchmark/locomo/vikingbot/stat_judge_result.py index 2d7ebd8d6..e02e25ae4 100644 --- a/benchmark/locomo/vikingbot/stat_judge_result.py +++ b/benchmark/locomo/vikingbot/stat_judge_result.py @@ -70,7 +70,7 @@ def main(): f"Accuracy: {accuracy:.2%}", f"\nAverage time cost: {avg_time:.2f}s", f"\nAverage iteration: {total_iteration / valid_rows if valid_rows > 0 else 0.0:.2f}", - f"\nToken usage:", + "\nToken usage:", f" Total prompt tokens: {total_prompt_tokens}", f" Total completion tokens: {total_completion_tokens}", f" Total tokens: {total_tokens}", diff --git a/benchmark/skillsbench/skill_bench_eval.py b/benchmark/skillsbench/skill_bench_eval.py index 19cb04b02..913d5e368 100644 --- a/benchmark/skillsbench/skill_bench_eval.py +++ b/benchmark/skillsbench/skill_bench_eval.py @@ -118,7 +118,7 @@ def run_prepare(args: argparse.Namespace) -> None: temp_dir = PROJECT_ROOT / f"temp_skillsbench_{int(time.time())}" print(f" Cloning {SKILLSBENCH_REPO}...", file=sys.stderr) - print(f" (this may take a moment...)", file=sys.stderr) + print(" (this may take a moment...)", file=sys.stderr) process = subprocess.Popen( ["git", "clone", "--progress", SKILLSBENCH_REPO, str(temp_dir)], @@ -144,18 +144,18 @@ def run_prepare(args: argparse.Namespace) -> None: shutil.rmtree(temp_dir) sys.exit(1) - print(f" Extracting tasks directory...", file=sys.stderr) + print(" Extracting tasks directory...", file=sys.stderr) src_tasks = temp_dir / "tasks" if not src_tasks.exists(): - print(f" [error] tasks directory not found in cloned repo", file=sys.stderr) + print(" [error] tasks directory not found in cloned repo", file=sys.stderr) shutil.rmtree(temp_dir) sys.exit(1) BENCH_DATA_DIR.mkdir(parents=True, exist_ok=True) shutil.copytree(src_tasks, TASKS_DIR) - print(f" Cleaning up temp files...", file=sys.stderr) + print(" Cleaning up temp files...", file=sys.stderr) shutil.rmtree(temp_dir) excluded_count = 0 @@ -210,7 +210,7 @@ def run_verification(task_dir: Path, work_dir: Path, storage_workspace: Path) -> result["error"] = "no tests directory" result["verified"] = True result["passed"] = True - print(f" [verify] no tests directory, skipping verification", file=sys.stderr) + print(" [verify] no tests directory, skipping verification", file=sys.stderr) return result test_sh = tests_dir / "test.sh" @@ -222,10 +222,10 @@ def run_verification(task_dir: Path, work_dir: Path, storage_workspace: Path) -> result["error"] = "no test files found" result["verified"] = True result["passed"] = True - print(f" [verify] no test files, skipping verification", file=sys.stderr) + print(" [verify] no test files, skipping verification", file=sys.stderr) return result - print(f" [verify] running tests...", file=sys.stderr) + print(" [verify] running tests...", file=sys.stderr) logs_dir = work_dir / "logs" / "verifier" logs_dir.mkdir(parents=True, exist_ok=True) @@ -385,7 +385,7 @@ def rewrite_test_text(text: str) -> str: f"--junitxml={logs_dir}/junit.xml", ] - print(f" [verify] running: pytest test_outputs.py", file=sys.stderr) + print(" [verify] running: pytest test_outputs.py", file=sys.stderr) proc_result = subprocess.run( test_cmd, @@ -413,9 +413,9 @@ def rewrite_test_text(text: str) -> str: result["test_score"] = round(score, 2) if result["passed"]: - print(f" [verify] PASSED", file=sys.stderr) + print(" [verify] PASSED", file=sys.stderr) else: - print(f" [verify] FAILED", file=sys.stderr) + print(" [verify] FAILED", file=sys.stderr) if proc_result.stdout: print(f" [verify stdout] {proc_result.stdout[:500]}", file=sys.stderr) if proc_result.stderr: @@ -425,7 +425,7 @@ def rewrite_test_text(text: str) -> str: result["error"] = "test timeout" result["verified"] = True result["passed"] = False - print(f" [verify] TIMEOUT", file=sys.stderr) + print(" [verify] TIMEOUT", file=sys.stderr) except Exception as e: result["error"] = str(e) result["verified"] = True @@ -434,7 +434,7 @@ def rewrite_test_text(text: str) -> str: else: result["verified"] = True result["passed"] = True - print(f" [verify] no pytest file, skipping", file=sys.stderr) + print(" [verify] no pytest file, skipping", file=sys.stderr) return result @@ -495,7 +495,7 @@ def run_task( if not instruction_file.exists(): result["status"] = "error" result["error"] = "instruction.md not found" - print(f" [error] instruction.md not found", file=sys.stderr) + print(" [error] instruction.md not found", file=sys.stderr) return result task_skills_dir = task_dir / "environment" / "skills" @@ -551,7 +551,7 @@ def run_task( if not verify_only: # Run vikingbot command - print(f" [running] vikingbot chat...", file=sys.stderr) + print(" [running] vikingbot chat...", file=sys.stderr) cmd = [ "vikingbot", "chat", @@ -854,7 +854,7 @@ def run_run(args: argparse.Namespace) -> None: except Exception as e: print(f" [warn] failed to generate summary from result.csv: {e}", file=sys.stderr) - print(f"\n=== Summary ===", file=sys.stderr) + print("\n=== Summary ===", file=sys.stderr) print( f" Completed: {final_summary['completed']}/{final_summary['total_tasks']}", file=sys.stderr, diff --git a/bot/vikingbot/__main__.py b/bot/vikingbot/__main__.py index 737f3d35f..ab6cd0c46 100644 --- a/bot/vikingbot/__main__.py +++ b/bot/vikingbot/__main__.py @@ -2,7 +2,6 @@ Entry point for running vikingbot as a module: python -m vikingbot """ -import sys from vikingbot.cli.commands import app diff --git a/bot/vikingbot/agent/__init__.py b/bot/vikingbot/agent/__init__.py index 4681630bf..91b3a3233 100644 --- a/bot/vikingbot/agent/__init__.py +++ b/bot/vikingbot/agent/__init__.py @@ -1,7 +1,7 @@ """Agent core module.""" -from vikingbot.agent.loop import AgentLoop from vikingbot.agent.context import ContextBuilder +from vikingbot.agent.loop import AgentLoop from vikingbot.agent.memory import MemoryStore from vikingbot.agent.skills import SkillsLoader diff --git a/bot/vikingbot/agent/memory.py b/bot/vikingbot/agent/memory.py index bfe0ed4d7..44ebc508d 100644 --- a/bot/vikingbot/agent/memory.py +++ b/bot/vikingbot/agent/memory.py @@ -2,8 +2,8 @@ from pathlib import Path from typing import Any + from loguru import logger -import time from vikingbot.config.loader import load_config from vikingbot.openviking_mount.ov_server import VikingClient @@ -73,4 +73,4 @@ async def get_viking_user_profile(self, workspace_id: str, user_id: str) -> str: result = await client.read_user_profile(user_id) if not result: return "" - return result \ No newline at end of file + return result diff --git a/bot/vikingbot/agent/skills.py b/bot/vikingbot/agent/skills.py index 2c0b06f7d..e3681ee77 100644 --- a/bot/vikingbot/agent/skills.py +++ b/bot/vikingbot/agent/skills.py @@ -2,7 +2,6 @@ import json import os -from loguru import logger import re import shutil from pathlib import Path @@ -129,7 +128,7 @@ def escape_xml(s: str) -> str: if missing: lines.append(f" {escape_xml(missing)}") - lines.append(f" ") + lines.append(" ") lines.append("") return "\n".join(lines) diff --git a/bot/vikingbot/agent/subagent.py b/bot/vikingbot/agent/subagent.py index 81818129d..b6ab0ee07 100644 --- a/bot/vikingbot/agent/subagent.py +++ b/bot/vikingbot/agent/subagent.py @@ -35,7 +35,6 @@ def __init__( model: str | None = None, sandbox_manager: "SandboxManager | None" = None, ): - from vikingbot.config.schema import ExecToolConfig self.provider = provider self.workspace = workspace @@ -202,8 +201,8 @@ async def _announce_result( def _build_subagent_prompt(self, task: str) -> str: """Build a focused system prompt for the subagent.""" - from datetime import datetime import time as _time + from datetime import datetime now = datetime.now().strftime("%Y-%m-%d %H:%M (%A)") tz = _time.strftime("%Z") or "UTC" diff --git a/bot/vikingbot/agent/tools/__init__.py b/bot/vikingbot/agent/tools/__init__.py index fce10455e..456c4c5d1 100644 --- a/bot/vikingbot/agent/tools/__init__.py +++ b/bot/vikingbot/agent/tools/__init__.py @@ -1,7 +1,7 @@ """Agent tools module.""" from vikingbot.agent.tools.base import Tool -from vikingbot.agent.tools.registry import ToolRegistry from vikingbot.agent.tools.factory import register_default_tools, register_subagent_tools +from vikingbot.agent.tools.registry import ToolRegistry __all__ = ["Tool", "ToolRegistry", "register_default_tools", "register_subagent_tools"] diff --git a/bot/vikingbot/agent/tools/filesystem.py b/bot/vikingbot/agent/tools/filesystem.py index 2683eb1de..41c0cc505 100644 --- a/bot/vikingbot/agent/tools/filesystem.py +++ b/bot/vikingbot/agent/tools/filesystem.py @@ -1,12 +1,8 @@ """File system tools: read, write, edit.""" -from typing import TYPE_CHECKING, Any +from typing import Any from vikingbot.agent.tools.base import Tool -from vikingbot.config.schema import SessionKey - - -from vikingbot.sandbox.manager import SandboxManager class ReadFileTool(Tool): @@ -107,7 +103,7 @@ async def execute( content = await sandbox.read_file(path) if old_text not in content: - return f"Error: old_text not found in file. Make sure it matches exactly." + return "Error: old_text not found in file. Make sure it matches exactly." count = content.count(old_text) if count > 1: diff --git a/bot/vikingbot/agent/tools/message.py b/bot/vikingbot/agent/tools/message.py index 9a6db75b2..d5879ad70 100644 --- a/bot/vikingbot/agent/tools/message.py +++ b/bot/vikingbot/agent/tools/message.py @@ -1,10 +1,9 @@ """Message tool for sending messages to users.""" -from typing import Any, Callable, Awaitable +from typing import Any, Awaitable, Callable from vikingbot.agent.tools.base import Tool from vikingbot.bus.events import OutboundMessage -from vikingbot.config.schema import SessionKey class MessageTool(Tool): @@ -39,7 +38,6 @@ def parameters(self) -> dict[str, Any]: } async def execute(self, tool_context: "ToolContext", **kwargs: Any) -> str: - from loguru import logger content = kwargs.get("content") diff --git a/bot/vikingbot/agent/tools/ov_file.py b/bot/vikingbot/agent/tools/ov_file.py index 8ccfdaa73..b3cb61824 100644 --- a/bot/vikingbot/agent/tools/ov_file.py +++ b/bot/vikingbot/agent/tools/ov_file.py @@ -175,7 +175,7 @@ async def execute( else: return "Failed to add resource" except httpx.ReadTimeout: - return f"Request timed out. The resource addition task may still be processing on the server side." + return "Request timed out. The resource addition task may still be processing on the server side." except Exception as e: logger.warning(f"Error adding resource: {e}") return f"Error adding resource to Viking: {str(e)}" @@ -478,4 +478,4 @@ async def read_single_uri(uri: str) -> dict: except Exception as e: logger.exception(f"Error in VikingMultiReadTool: {e}") - return f"Error multi-reading Viking resources: {str(e)}" \ No newline at end of file + return f"Error multi-reading Viking resources: {str(e)}" diff --git a/bot/vikingbot/agent/tools/registry.py b/bot/vikingbot/agent/tools/registry.py index 628e2bd44..402fafc80 100644 --- a/bot/vikingbot/agent/tools/registry.py +++ b/bot/vikingbot/agent/tools/registry.py @@ -1,11 +1,10 @@ """Tool registry for dynamic tool management.""" import time +from typing import Any from loguru import logger -from typing import Any - from vikingbot.agent.tools.base import Tool, ToolContext from vikingbot.config.schema import SessionKey from vikingbot.hooks import HookContext diff --git a/bot/vikingbot/agent/tools/shell.py b/bot/vikingbot/agent/tools/shell.py index d94c9e70f..f58916d82 100644 --- a/bot/vikingbot/agent/tools/shell.py +++ b/bot/vikingbot/agent/tools/shell.py @@ -1,18 +1,8 @@ """Shell execution tool.""" -import asyncio -import os -import re -from pathlib import Path -from typing import TYPE_CHECKING, Any - -from loguru import logger +from typing import Any from vikingbot.agent.tools.base import Tool -from vikingbot.config.schema import SessionKey - - -from vikingbot.sandbox.manager import SandboxManager class ExecTool(Tool): diff --git a/bot/vikingbot/agent/tools/spawn.py b/bot/vikingbot/agent/tools/spawn.py index da4dded71..85b71c4d1 100644 --- a/bot/vikingbot/agent/tools/spawn.py +++ b/bot/vikingbot/agent/tools/spawn.py @@ -1,11 +1,9 @@ """Spawn tool for creating background subagents.""" -from typing import Any, TYPE_CHECKING - -from vikingbot.agent.tools.base import Tool - +from typing import Any from vikingbot.agent.subagent import SubagentManager +from vikingbot.agent.tools.base import Tool class SpawnTool(Tool): diff --git a/bot/vikingbot/agent/tools/websearch/__init__.py b/bot/vikingbot/agent/tools/websearch/__init__.py index e36a561b2..ecc88bf5b 100644 --- a/bot/vikingbot/agent/tools/websearch/__init__.py +++ b/bot/vikingbot/agent/tools/websearch/__init__.py @@ -14,11 +14,10 @@ from vikingbot.agent.tools.base import Tool -from .base import WebSearchBackend -from .registry import registry - # Import backends to register them from . import brave, ddgs, exa, tavily +from .base import WebSearchBackend +from .registry import registry class WebSearchTool(Tool): diff --git a/bot/vikingbot/agent/tools/websearch/registry.py b/bot/vikingbot/agent/tools/websearch/registry.py index d82738371..f3740ddd3 100644 --- a/bot/vikingbot/agent/tools/websearch/registry.py +++ b/bot/vikingbot/agent/tools/websearch/registry.py @@ -1,6 +1,6 @@ """Web search backend registry.""" -from typing import Dict, List, Type, Optional +from typing import Dict, List, Optional, Type from .base import WebSearchBackend diff --git a/bot/vikingbot/bus/queue.py b/bot/vikingbot/bus/queue.py index 1e4067a6e..7a214f4bf 100644 --- a/bot/vikingbot/bus/queue.py +++ b/bot/vikingbot/bus/queue.py @@ -1,7 +1,7 @@ """Async message queue for decoupled channel-agent communication.""" import asyncio -from typing import Callable, Awaitable, Any +from typing import Awaitable, Callable from loguru import logger diff --git a/bot/vikingbot/channels/base.py b/bot/vikingbot/channels/base.py index 1cf2b3d00..e6958f595 100644 --- a/bot/vikingbot/channels/base.py +++ b/bot/vikingbot/channels/base.py @@ -11,7 +11,7 @@ from vikingbot.bus.events import InboundMessage, OutboundMessage from vikingbot.bus.queue import MessageBus -from vikingbot.config.schema import SessionKey, BaseChannelConfig +from vikingbot.config.schema import BaseChannelConfig, SessionKey from vikingbot.utils import get_data_path # Optional HTML processing libraries diff --git a/bot/vikingbot/channels/discord.py b/bot/vikingbot/channels/discord.py index 740300637..8e7cef80f 100644 --- a/bot/vikingbot/channels/discord.py +++ b/bot/vikingbot/channels/discord.py @@ -2,7 +2,6 @@ import asyncio import json -from pathlib import Path from typing import Any import httpx @@ -13,8 +12,6 @@ from vikingbot.bus.queue import MessageBus from vikingbot.channels.base import BaseChannel from vikingbot.config.schema import DiscordChannelConfig -from vikingbot.channels.utils import extract_image_paths, read_image_file - DISCORD_API_BASE = "https://discord.com/api/v10" MAX_ATTACHMENT_BYTES = 20 * 1024 * 1024 # 20MB diff --git a/bot/vikingbot/channels/feishu.py b/bot/vikingbot/channels/feishu.py index bfe75501d..cffb45494 100644 --- a/bot/vikingbot/channels/feishu.py +++ b/bot/vikingbot/channels/feishu.py @@ -31,7 +31,7 @@ from vikingbot.bus.events import OutboundMessage from vikingbot.bus.queue import MessageBus from vikingbot.channels.base import BaseChannel -from vikingbot.config.schema import FeishuChannelConfig, BotMode +from vikingbot.config.schema import BotMode, FeishuChannelConfig try: import lark_oapi as lark @@ -46,7 +46,7 @@ GetMessageResourceRequest, P2ImMessageReceiveV1, ReplyMessageRequest, - ReplyMessageRequestBody + ReplyMessageRequestBody, ) FEISHU_AVAILABLE = True @@ -750,13 +750,13 @@ async def _check_should_process(self, chat_type: str, chat_id: str, message: Any if self.config.thread_require_mention: # 模式1:所有消息都需要@才处理 if not is_mentioned: - logger.info(f"Skipping thread message: thread_require_mention is True and not mentioned") + logger.info("Skipping thread message: thread_require_mention is True and not mentioned") return False else: # 模式2:仅话题首条消息不需要@,后续回复需要@(DEBUG模式除外) config = load_config() if not is_topic_starter and not is_mentioned and config.mode != BotMode.DEBUG: - logger.info(f"Skipping thread message: not topic starter and not mentioned") + logger.info("Skipping thread message: not topic starter and not mentioned") return False return True diff --git a/bot/vikingbot/channels/openapi.py b/bot/vikingbot/channels/openapi.py index bcc70f47b..45d0c848a 100644 --- a/bot/vikingbot/channels/openapi.py +++ b/bot/vikingbot/channels/openapi.py @@ -435,4 +435,4 @@ def get_openapi_router(bus: MessageBus, config: Config) -> APIRouter: channel.send, ) - return channel.get_router() \ No newline at end of file + return channel.get_router() diff --git a/bot/vikingbot/channels/slack.py b/bot/vikingbot/channels/slack.py index e8744afbc..7051cd1e1 100644 --- a/bot/vikingbot/channels/slack.py +++ b/bot/vikingbot/channels/slack.py @@ -2,19 +2,17 @@ import asyncio import re -from typing import Any from loguru import logger -from slack_sdk.socket_mode.websockets import SocketModeClient from slack_sdk.socket_mode.request import SocketModeRequest from slack_sdk.socket_mode.response import SocketModeResponse +from slack_sdk.socket_mode.websockets import SocketModeClient from slack_sdk.web.async_client import AsyncWebClient from vikingbot.bus.events import OutboundMessage from vikingbot.bus.queue import MessageBus from vikingbot.channels.base import BaseChannel from vikingbot.config.schema import SlackChannelConfig -from vikingbot.channels.utils import extract_image_paths, read_image_file class SlackChannel(BaseChannel): diff --git a/bot/vikingbot/channels/telegram.py b/bot/vikingbot/channels/telegram.py index 9e979769e..265d32068 100644 --- a/bot/vikingbot/channels/telegram.py +++ b/bot/vikingbot/channels/telegram.py @@ -4,16 +4,17 @@ import asyncio import re + from loguru import logger from telegram import BotCommand, Update -from telegram.ext import Application, CommandHandler, MessageHandler, filters, ContextTypes +from telegram.ext import Application, CommandHandler, ContextTypes, MessageHandler, filters from telegram.request import HTTPXRequest from vikingbot.bus.events import OutboundMessage from vikingbot.bus.queue import MessageBus from vikingbot.channels.base import BaseChannel -from vikingbot.config.schema import TelegramChannelConfig from vikingbot.channels.utils import extract_image_paths, read_image_file +from vikingbot.config.schema import TelegramChannelConfig def _markdown_to_telegram_html(text: str) -> str: @@ -318,7 +319,6 @@ async def _on_message(self, update: Update, context: ContextTypes.DEFAULT_TYPE) ext = self._get_extension(media_type, getattr(media_file, "mime_type", None)) # Save to workspace/media/ - from pathlib import Path from vikingbot.utils.helpers import get_media_path if self.workspace_path: diff --git a/bot/vikingbot/channels/utils.py b/bot/vikingbot/channels/utils.py index d67404979..c0cc5224c 100644 --- a/bot/vikingbot/channels/utils.py +++ b/bot/vikingbot/channels/utils.py @@ -3,9 +3,7 @@ import base64 import re from pathlib import Path -from loguru import logger -from typing import Tuple, List - +from typing import List, Tuple # Common image file extensions IMAGE_EXTENSIONS = {".png", ".jpg", ".jpeg", ".gif", ".webp", ".bmp", ".svg", ".tiff"} diff --git a/bot/vikingbot/channels/whatsapp.py b/bot/vikingbot/channels/whatsapp.py index 23794a88f..b2b16c3ff 100644 --- a/bot/vikingbot/channels/whatsapp.py +++ b/bot/vikingbot/channels/whatsapp.py @@ -2,7 +2,6 @@ import asyncio import json -from typing import Any from loguru import logger diff --git a/bot/vikingbot/cli/werewolf_game.py b/bot/vikingbot/cli/werewolf_game.py index c16896e63..5854eff10 100644 --- a/bot/vikingbot/cli/werewolf_game.py +++ b/bot/vikingbot/cli/werewolf_game.py @@ -1,7 +1,6 @@ """CLI commands for vikingbot.""" import asyncio -from dataclasses import dataclass import json import os import random @@ -10,6 +9,7 @@ import sys import time import warnings +from dataclasses import dataclass from pathlib import Path from typing import Any @@ -1401,9 +1401,9 @@ def demo_werewolf_ui( config = ensure_config(path) _init_bot_data(config) + import uvicorn from fastapi import FastAPI from fastapi.responses import HTMLResponse, JSONResponse - import uvicorn workspace_root = config.workspace_path storage_root = (config.storage_workspace or "~/.openviking/data") @@ -2101,7 +2101,7 @@ async def _ww_run_witch( except Exception: poison_target = None if poison is True and poison_target is None: - poison_target = _ww_pick_random_target(rng, alive_seats, exclude={witch.seat}) + poison_target = _ww_pick_random_target(rng, alive_seats, exclude={witch.seat}) if poison_target is not None and (poison_target not in alive_seats or poison_target == witch.seat): poison_target = None diff --git a/bot/vikingbot/config/__init__.py b/bot/vikingbot/config/__init__.py index f81acc62e..fc7607fde 100644 --- a/bot/vikingbot/config/__init__.py +++ b/bot/vikingbot/config/__init__.py @@ -1,6 +1,6 @@ """Configuration module for vikingbot.""" -from vikingbot.config.loader import load_config, get_config_path +from vikingbot.config.loader import get_config_path, load_config from vikingbot.config.schema import Config __all__ = ["Config", "load_config", "get_config_path"] diff --git a/bot/vikingbot/config/loader.py b/bot/vikingbot/config/loader.py index 9aae2f826..69b0cbe53 100644 --- a/bot/vikingbot/config/loader.py +++ b/bot/vikingbot/config/loader.py @@ -4,7 +4,9 @@ import os from pathlib import Path from typing import Any + from loguru import logger + from vikingbot.config.schema import Config CONFIG_PATH = None @@ -222,4 +224,4 @@ def camel_to_snake(name: str) -> str: def snake_to_camel(name: str) -> str: """Convert snake_case to camelCase.""" components = name.split("_") - return components[0] + "".join(x.title() for x in components[1:]) \ No newline at end of file + return components[0] + "".join(x.title() for x in components[1:]) diff --git a/bot/vikingbot/config/schema.py b/bot/vikingbot/config/schema.py index 0b5a885ae..2e90847f2 100644 --- a/bot/vikingbot/config/schema.py +++ b/bot/vikingbot/config/schema.py @@ -748,4 +748,4 @@ def from_safe_name(safe_name: str): file_name_split = safe_name.split("__") return SessionKey( type=file_name_split[0], channel_id=file_name_split[1], chat_id=file_name_split[2] - ) \ No newline at end of file + ) diff --git a/bot/vikingbot/console/web_console.py b/bot/vikingbot/console/web_console.py index 4617dfaef..0334b3a55 100644 --- a/bot/vikingbot/console/web_console.py +++ b/bot/vikingbot/console/web_console.py @@ -1,13 +1,12 @@ import json import sys -import os from pathlib import Path from typing import Any, Dict, List, Optional, Tuple import gradio as gr -from vikingbot.config.loader import load_config, save_config, get_config_path -from vikingbot.config.schema import Config, ChannelType, SandboxBackend, SandboxMode +from vikingbot.config.loader import get_config_path, load_config, save_config +from vikingbot.config.schema import Config def resolve_schema_ref( diff --git a/bot/vikingbot/cron/types.py b/bot/vikingbot/cron/types.py index 3e7a7d721..25adfa9db 100644 --- a/bot/vikingbot/cron/types.py +++ b/bot/vikingbot/cron/types.py @@ -3,8 +3,6 @@ from dataclasses import dataclass, field from typing import Literal -from vikingbot.config.schema import SessionKey - @dataclass class CronSchedule: diff --git a/bot/vikingbot/heartbeat/service.py b/bot/vikingbot/heartbeat/service.py index aa860f6a6..4b8b1192c 100644 --- a/bot/vikingbot/heartbeat/service.py +++ b/bot/vikingbot/heartbeat/service.py @@ -2,13 +2,11 @@ import asyncio from pathlib import Path -from typing import Any, Callable, Coroutine, TYPE_CHECKING, Dict, List +from typing import Any, Callable, Coroutine from loguru import logger from vikingbot.config.schema import SessionKey - - from vikingbot.session.manager import SessionManager # Default interval: 30 minutes diff --git a/bot/vikingbot/hooks/base.py b/bot/vikingbot/hooks/base.py index 4d9e8c684..167156d93 100644 --- a/bot/vikingbot/hooks/base.py +++ b/bot/vikingbot/hooks/base.py @@ -1,12 +1,10 @@ from abc import ABC, abstractmethod -from enum import Enum from dataclasses import dataclass -from typing import Any, Dict, Optional from datetime import datetime +from typing import Any, Dict, Optional from vikingbot.config.schema import SessionKey - # class HookType(Enum): # SYNC = "sync" # ASYNC = "async" diff --git a/bot/vikingbot/hooks/builtins/openviking_hooks.py b/bot/vikingbot/hooks/builtins/openviking_hooks.py index 2cbd51e30..eb0b5a50f 100644 --- a/bot/vikingbot/hooks/builtins/openviking_hooks.py +++ b/bot/vikingbot/hooks/builtins/openviking_hooks.py @@ -4,7 +4,6 @@ from loguru import logger from vikingbot.config.loader import load_config -from vikingbot.config.schema import SessionKey, AgentMemoryMode from ...session import Session from ..base import Hook, HookContext diff --git a/bot/vikingbot/hooks/manager.py b/bot/vikingbot/hooks/manager.py index a78b918b3..4bc57036c 100644 --- a/bot/vikingbot/hooks/manager.py +++ b/bot/vikingbot/hooks/manager.py @@ -1,7 +1,7 @@ import asyncio import importlib from collections import defaultdict -from typing import List, Any, Dict, Type +from typing import Any, Dict, List, Type from loguru import logger diff --git a/bot/vikingbot/openviking_mount/__init__.py b/bot/vikingbot/openviking_mount/__init__.py index cbce294f1..0d19d48fa 100644 --- a/bot/vikingbot/openviking_mount/__init__.py +++ b/bot/vikingbot/openviking_mount/__init__.py @@ -7,8 +7,8 @@ from typing import TYPE_CHECKING -from .mount import OpenVikingMount, MountScope, MountConfig, FileInfo -from .manager import OpenVikingMountManager, MountPoint, get_mount_manager +from .manager import MountPoint, OpenVikingMountManager, get_mount_manager +from .mount import FileInfo, MountConfig, MountScope, OpenVikingMount from .session_integration import SessionOpenVikingManager, get_session_ov_manager __all__ = [ @@ -28,12 +28,12 @@ ] if TYPE_CHECKING: - from .viking_fuse import OpenVikingFUSE, mount_fuse, FUSEMountManager, FUSE_AVAILABLE + from .viking_fuse import FUSE_AVAILABLE, FUSEMountManager, OpenVikingFUSE, mount_fuse def __getattr__(name: str): if name in ("OpenVikingFUSE", "mount_fuse", "FUSEMountManager", "FUSE_AVAILABLE"): - from .viking_fuse import OpenVikingFUSE, mount_fuse, FUSEMountManager, FUSE_AVAILABLE + from .viking_fuse import FUSE_AVAILABLE, FUSEMountManager, OpenVikingFUSE, mount_fuse return locals()[name] raise AttributeError(f"module {__name__!r} has no attribute {name!r}") diff --git a/bot/vikingbot/openviking_mount/fuse_finder.py b/bot/vikingbot/openviking_mount/fuse_finder.py index 06f5043aa..a3969b263 100644 --- a/bot/vikingbot/openviking_mount/fuse_finder.py +++ b/bot/vikingbot/openviking_mount/fuse_finder.py @@ -1,21 +1,21 @@ #!/usr/bin/env python3 from __future__ import annotations -import sys +import errno import os +import shutil import stat -import errno +import sys import tempfile -import shutil +from datetime import datetime from pathlib import Path from typing import Any, Dict -from datetime import datetime sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent)) from loguru import logger -from .mount import OpenVikingMount, MountConfig +from .mount import MountConfig, OpenVikingMount try: from fuse import FUSE, FuseOSError, Operations @@ -420,7 +420,7 @@ def mount_fuse(config: MountConfig, foreground: bool = True) -> None: logger.info(f"Mounting OpenViking FUSE at: {config.mount_point}") logger.info(f" Scope: {config.scope.value}") logger.info(f" Read-only: {config.read_only}") - logger.info(f" Press Ctrl+C to unmount") + logger.info(" Press Ctrl+C to unmount") try: FUSE( diff --git a/bot/vikingbot/openviking_mount/fuse_proxy.py b/bot/vikingbot/openviking_mount/fuse_proxy.py index 12b53f7c2..9373a9f4e 100644 --- a/bot/vikingbot/openviking_mount/fuse_proxy.py +++ b/bot/vikingbot/openviking_mount/fuse_proxy.py @@ -1,22 +1,21 @@ #!/usr/bin/env python3 from __future__ import annotations -import sys +import errno import os +import shutil import stat -import errno +import sys import tempfile -import shutil +from datetime import datetime from pathlib import Path from typing import Any, Dict -from datetime import datetime - sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent)) from loguru import logger -from .mount import OpenVikingMount, MountConfig +from .mount import MountConfig, OpenVikingMount try: from fuse import FUSE, FuseOSError, Operations @@ -73,7 +72,7 @@ def getattr(self, path: str, fh: int = None) -> Dict[str, Any]: "st_mtime": stat_info.st_mtime, "st_ctime": stat_info.st_ctime, } - print(f"2222222") + print("2222222") if path in self._pending_uploads: now = datetime.now().timestamp() return { @@ -300,7 +299,7 @@ def mount_fuse(config: MountConfig, foreground: bool = True) -> None: logger.info(f"Mounting OpenViking FUSE Proxy at: {config.mount_point}") logger.info(f" Proxy to: {config.openviking_data_path / '.original_files'}") - logger.info(f" Press Ctrl+C to unmount") + logger.info(" Press Ctrl+C to unmount") try: FUSE( diff --git a/bot/vikingbot/openviking_mount/fuse_simple.py b/bot/vikingbot/openviking_mount/fuse_simple.py index d09da61c2..661311f2b 100644 --- a/bot/vikingbot/openviking_mount/fuse_simple.py +++ b/bot/vikingbot/openviking_mount/fuse_simple.py @@ -1,21 +1,21 @@ #!/usr/bin/env python3 from __future__ import annotations -import sys +import errno import os +import shutil import stat -import errno +import sys import tempfile -import shutil +from datetime import datetime from pathlib import Path from typing import Any, Dict -from datetime import datetime sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent)) from loguru import logger -from .mount import OpenVikingMount, MountConfig +from .mount import MountConfig, OpenVikingMount try: from fuse import FUSE, FuseOSError, Operations @@ -396,7 +396,7 @@ def mount_fuse(config: MountConfig, foreground: bool = True) -> None: logger.info(f"Mounting OpenViking FUSE at: {config.mount_point}") logger.info(f" Scope: {config.scope.value}") logger.info(f" Read-only: {config.read_only}") - logger.info(f" Press Ctrl+C to unmount") + logger.info(" Press Ctrl+C to unmount") try: FUSE( diff --git a/bot/vikingbot/openviking_mount/fuse_simple_debug.py b/bot/vikingbot/openviking_mount/fuse_simple_debug.py index 5186529c7..afef1eef3 100644 --- a/bot/vikingbot/openviking_mount/fuse_simple_debug.py +++ b/bot/vikingbot/openviking_mount/fuse_simple_debug.py @@ -1,21 +1,21 @@ #!/usr/bin/env python3 from __future__ import annotations -import sys +import errno import os +import shutil import stat -import errno +import sys import tempfile -import shutil +from datetime import datetime from pathlib import Path from typing import Any, Dict -from datetime import datetime sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent)) from loguru import logger -from .mount import OpenVikingMount, MountConfig +from .mount import MountConfig, OpenVikingMount try: from fuse import FUSE, FuseOSError, Operations @@ -166,7 +166,7 @@ def getattr(self, path: str, fh: int = None) -> Dict[str, Any]: except Exception as e: logger.warning(f"getattr error for {path}: {e}") - logger.debug(f"[FUSE] getattr failed: ENOENT") + logger.debug("[FUSE] getattr failed: ENOENT") raise FuseOSError(errno.ENOENT) def readdir(self, path: str, fh: int) -> list: @@ -438,7 +438,7 @@ def mount_fuse(config: MountConfig, foreground: bool = True) -> None: logger.info(f"Mounting OpenViking FUSE at: {config.mount_point}") logger.info(f" Scope: {config.scope.value}") logger.info(f" Read-only: {config.read_only}") - logger.info(f" Press Ctrl+C to unmount") + logger.info(" Press Ctrl+C to unmount") try: FUSE( diff --git a/bot/vikingbot/openviking_mount/manager.py b/bot/vikingbot/openviking_mount/manager.py index 0fa4c1f64..1dafb2b67 100644 --- a/bot/vikingbot/openviking_mount/manager.py +++ b/bot/vikingbot/openviking_mount/manager.py @@ -6,15 +6,15 @@ from __future__ import annotations -import sys +from dataclasses import dataclass from pathlib import Path from typing import Dict, List, Optional -from dataclasses import dataclass, field from loguru import logger -from vikingbot.utils.helpers import get_mounts_path, get_bot_data_path -from .mount import OpenVikingMount, MountConfig, MountScope +from vikingbot.utils.helpers import get_bot_data_path, get_mounts_path + +from .mount import MountConfig, MountScope, OpenVikingMount @dataclass diff --git a/bot/vikingbot/openviking_mount/mount.py b/bot/vikingbot/openviking_mount/mount.py index c106c5d2c..308db2f3e 100644 --- a/bot/vikingbot/openviking_mount/mount.py +++ b/bot/vikingbot/openviking_mount/mount.py @@ -7,15 +7,15 @@ from __future__ import annotations -import sys -from pathlib import Path -from typing import Any, Dict, List, Optional, Union -from dataclasses import dataclass, field +from dataclasses import dataclass from enum import Enum -import openviking as ov +from pathlib import Path +from typing import List, Optional, Union from loguru import logger +import openviking as ov + class MountScope(Enum): """OpenViking挂载作用域""" @@ -371,7 +371,7 @@ def search(self, query: str, target_path: Optional[Union[str, Path]] = None) -> is_dir=False, # 需要根据实际结果判断 ) if hasattr(r, "score"): - setattr(file_info, "score", r.score) + file_info.score = r.score file_infos.append(file_info) return file_infos diff --git a/bot/vikingbot/openviking_mount/session_integration.py b/bot/vikingbot/openviking_mount/session_integration.py index a6ec77a5f..cd5c77e0b 100644 --- a/bot/vikingbot/openviking_mount/session_integration.py +++ b/bot/vikingbot/openviking_mount/session_integration.py @@ -7,19 +7,17 @@ from __future__ import annotations -import sys -import asyncio import shutil from pathlib import Path -from typing import Dict, Optional, Any +from typing import Any, Dict, Optional from loguru import logger from vikingbot.utils.helpers import get_workspace_path # 相对导入同一包内的模块 -from .mount import OpenVikingMount, MountConfig, MountScope -from .viking_fuse import mount_fuse, FUSEMountManager, FUSE_AVAILABLE +from .mount import MountConfig, MountScope, OpenVikingMount +from .viking_fuse import FUSE_AVAILABLE, FUSEMountManager, mount_fuse class SessionOpenVikingManager: @@ -48,7 +46,7 @@ def __init__(self, base_workspace: Optional[Path] = None): # FUSE 挂载管理器(如果可用) self._fuse_manager = FUSEMountManager() if FUSE_AVAILABLE else None - logger.info(f"SessionOpenVikingManager initialized") + logger.info("SessionOpenVikingManager initialized") logger.info(f" Base workspace: {self.base_workspace}") logger.info(f" FUSE available: {FUSE_AVAILABLE}") diff --git a/bot/vikingbot/openviking_mount/user_apikey_manager.py b/bot/vikingbot/openviking_mount/user_apikey_manager.py index da638d95d..6d5b1cf44 100644 --- a/bot/vikingbot/openviking_mount/user_apikey_manager.py +++ b/bot/vikingbot/openviking_mount/user_apikey_manager.py @@ -1,7 +1,7 @@ """User API Key persistence manager for OpenViking remote mode.""" -import json import hashlib +import json from pathlib import Path from typing import Optional diff --git a/bot/vikingbot/providers/openai_compatible_provider.py b/bot/vikingbot/providers/openai_compatible_provider.py index 7433d2f3a..568031074 100644 --- a/bot/vikingbot/providers/openai_compatible_provider.py +++ b/bot/vikingbot/providers/openai_compatible_provider.py @@ -9,8 +9,9 @@ import json from typing import Any -from openai import AsyncOpenAI + from loguru import logger +from openai import AsyncOpenAI from vikingbot.integrations.langfuse import LangfuseClient from vikingbot.providers.base import LLMProvider, LLMResponse, ToolCallRequest diff --git a/bot/vikingbot/providers/transcription.py b/bot/vikingbot/providers/transcription.py index 936b3e55b..19136cbee 100644 --- a/bot/vikingbot/providers/transcription.py +++ b/bot/vikingbot/providers/transcription.py @@ -2,7 +2,6 @@ import os from pathlib import Path -from typing import Any import httpx from loguru import logger diff --git a/bot/vikingbot/sandbox/__init__.py b/bot/vikingbot/sandbox/__init__.py index 692e88641..d552fe0d0 100644 --- a/bot/vikingbot/sandbox/__init__.py +++ b/bot/vikingbot/sandbox/__init__.py @@ -2,10 +2,10 @@ from vikingbot.sandbox.base import ( SandboxBackend, - SandboxError, - SandboxNotStartedError, SandboxDisabledError, + SandboxError, SandboxExecutionError, + SandboxNotStartedError, UnsupportedBackendError, ) from vikingbot.sandbox.manager import SandboxManager diff --git a/bot/vikingbot/sandbox/backends/__init__.py b/bot/vikingbot/sandbox/backends/__init__.py index a69449b02..e3f2b5d0d 100644 --- a/bot/vikingbot/sandbox/backends/__init__.py +++ b/bot/vikingbot/sandbox/backends/__init__.py @@ -1,6 +1,7 @@ """Sandbox backend registry.""" -from typing import TYPE_CHECKING, Type, Callable, Dict +from typing import TYPE_CHECKING, Callable, Dict, Type + from vikingbot.sandbox.base import SandboxBackend _BACKENDS: Dict[str, Type[SandboxBackend]] = {} @@ -28,7 +29,4 @@ def list_backends() -> list[str]: # Import backends to register them (avoid circular import) -from vikingbot.sandbox.backends import srt -from vikingbot.sandbox.backends import opensandbox -from vikingbot.sandbox.backends import direct -from vikingbot.sandbox.backends import aiosandbox +from vikingbot.sandbox.backends import aiosandbox, direct, opensandbox, srt diff --git a/bot/vikingbot/sandbox/backends/direct.py b/bot/vikingbot/sandbox/backends/direct.py index a273d21f1..fa2cd29c2 100644 --- a/bot/vikingbot/sandbox/backends/direct.py +++ b/bot/vikingbot/sandbox/backends/direct.py @@ -1,17 +1,14 @@ """Direct backend implementation - executes commands directly on host without sandboxing.""" import asyncio -import os from pathlib import Path -from typing import TYPE_CHECKING, Any +from typing import Any from loguru import logger -from vikingbot.sandbox.base import SandboxBackend -from vikingbot.sandbox.backends import register_backend - - from vikingbot.config.schema import SandboxConfig, SessionKey +from vikingbot.sandbox.backends import register_backend +from vikingbot.sandbox.base import SandboxBackend @register_backend("direct") diff --git a/bot/vikingbot/sandbox/backends/srt.py b/bot/vikingbot/sandbox/backends/srt.py index 4f09f6081..b67d060c0 100644 --- a/bot/vikingbot/sandbox/backends/srt.py +++ b/bot/vikingbot/sandbox/backends/srt.py @@ -4,14 +4,13 @@ import json import os from pathlib import Path -from typing import TYPE_CHECKING, Any +from typing import Any + from loguru import logger -from vikingbot.sandbox.base import SandboxBackend, SandboxNotStartedError +from vikingbot.config.schema import SessionKey from vikingbot.sandbox.backends import register_backend - - -from vikingbot.config.schema import SandboxConfig, SessionKey +from vikingbot.sandbox.base import SandboxBackend, SandboxNotStartedError @register_backend("srt") diff --git a/bot/vikingbot/sandbox/manager.py b/bot/vikingbot/sandbox/manager.py index 595fd6bcf..81c6fd892 100644 --- a/bot/vikingbot/sandbox/manager.py +++ b/bot/vikingbot/sandbox/manager.py @@ -1,15 +1,10 @@ """Sandbox manager for creating and managing sandbox instances.""" -import asyncio from pathlib import Path -from typing import TYPE_CHECKING -from openviking.async_client import logger -from vikingbot.sandbox.base import SandboxBackend, SandboxDisabledError, UnsupportedBackendError +from vikingbot.config.schema import Config, SessionKey from vikingbot.sandbox.backends import get_backend - - -from vikingbot.config.schema import SandboxConfig, SessionKey, Config +from vikingbot.sandbox.base import SandboxBackend, UnsupportedBackendError class SandboxManager: @@ -44,7 +39,7 @@ async def _create_sandbox(self, workspace_id: str) -> SandboxBackend: instance = self._backend_cls(self.config.sandbox, workspace_id, workspace) try: await instance.start() - except Exception as e: + except Exception: import traceback traceback.print_exc() @@ -54,10 +49,10 @@ async def _create_sandbox(self, workspace_id: str) -> SandboxBackend: async def _copy_bootstrap_files(self, sandbox_workspace: Path) -> None: """Copy bootstrap files from source workspace to sandbox workspace.""" - from vikingbot.agent.context import ContextBuilder - from vikingbot.agent.skills import BUILTIN_SKILLS_DIR import shutil + from vikingbot.agent.context import ContextBuilder + # Copy from source workspace init directory (if exists) init_dir = self.source_workspace / ContextBuilder.INIT_DIR if init_dir.exists() and init_dir.is_dir(): diff --git a/bot/vikingbot/session/__init__.py b/bot/vikingbot/session/__init__.py index 7e889e8c7..b70454048 100644 --- a/bot/vikingbot/session/__init__.py +++ b/bot/vikingbot/session/__init__.py @@ -1,5 +1,5 @@ """Session management module.""" -from vikingbot.session.manager import SessionManager, Session +from vikingbot.session.manager import Session, SessionManager __all__ = ["SessionManager", "Session"] diff --git a/bot/vikingbot/utils/__init__.py b/bot/vikingbot/utils/__init__.py index 265714801..fa1e76fc9 100644 --- a/bot/vikingbot/utils/__init__.py +++ b/bot/vikingbot/utils/__init__.py @@ -2,18 +2,18 @@ from vikingbot.utils.helpers import ( ensure_dir, - get_workspace_path, - get_data_path, get_bot_data_path, - set_bot_data_path, - get_sessions_path, - get_history_path, get_bridge_path, + get_data_path, + get_history_path, get_images_path, get_media_path, - get_received_path, get_mochat_path, get_mounts_path, + get_received_path, + get_sessions_path, + get_workspace_path, + set_bot_data_path, ) __all__ = [ diff --git a/bot/vikingbot/utils/helpers.py b/bot/vikingbot/utils/helpers.py index 17fb7681c..11cfaad68 100644 --- a/bot/vikingbot/utils/helpers.py +++ b/bot/vikingbot/utils/helpers.py @@ -1,7 +1,8 @@ """Utility functions for vikingbot.""" -from pathlib import Path from datetime import datetime +from pathlib import Path + from loguru import logger @@ -106,6 +107,7 @@ def get_workspace_path() -> Path: def ensure_workspace_templates(workspace: Path) -> None: import shutil + from vikingbot.agent.skills import BUILTIN_SKILLS_DIR # Ensure workspace directory exists first diff --git a/bot/vikingbot/utils/tracing.py b/bot/vikingbot/utils/tracing.py index 948c30e9b..d90aa08a5 100644 --- a/bot/vikingbot/utils/tracing.py +++ b/bot/vikingbot/utils/tracing.py @@ -170,7 +170,7 @@ async def async_wrapper(*args: Any, **kwargs: Any) -> T: return await wrapped_func(*args, **kwargs) else: if not has_propagate: - logger.warning(f"[LANGFUSE] propagate_attributes not available") + logger.warning("[LANGFUSE] propagate_attributes not available") return await wrapped_func(*args, **kwargs) else: return await wrapped_func(*args, **kwargs) diff --git a/bot/workspace/skills/github-proxy/scripts/convert_url.py b/bot/workspace/skills/github-proxy/scripts/convert_url.py index 7821c73f2..9db12951e 100755 --- a/bot/workspace/skills/github-proxy/scripts/convert_url.py +++ b/bot/workspace/skills/github-proxy/scripts/convert_url.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -import sys import re +import sys DEFAULT_PROXY = "https://githubproxy.cc" BACKUP_PROXY = "https://ghfast.top" diff --git a/bot/workspace/skills/opencode/list_sessions.py b/bot/workspace/skills/opencode/list_sessions.py index ff9740e64..20494b0c4 100644 --- a/bot/workspace/skills/opencode/list_sessions.py +++ b/bot/workspace/skills/opencode/list_sessions.py @@ -3,14 +3,15 @@ import json import time + from opencode_ai import Opencode from opencode_utils import ( check_serve_status, execute_cmd, + list_project, read_new_messages, read_status, write_status, - list_project, ) from pydantic import BaseModel diff --git a/bot/workspace/skills/opencode/opencode_utils.py b/bot/workspace/skills/opencode/opencode_utils.py index 44f811e74..e1b929527 100644 --- a/bot/workspace/skills/opencode/opencode_utils.py +++ b/bot/workspace/skills/opencode/opencode_utils.py @@ -5,8 +5,8 @@ import os import subprocess import sys -import traceback import time +import traceback from opencode_ai import Opencode diff --git a/examples/openclaw-plugin/tests/e2e/test-archive-expand.py b/examples/openclaw-plugin/tests/e2e/test-archive-expand.py index bba147b49..b0b1eacfc 100644 --- a/examples/openclaw-plugin/tests/e2e/test-archive-expand.py +++ b/examples/openclaw-plugin/tests/e2e/test-archive-expand.py @@ -866,7 +866,7 @@ def run_full_test( tree.add(f"Phase 2a: 线上排障 — {ok2}/{len(CHAT_BATCH_2)}") tree.add(f"Phase 2b: 代码评审 — {ok3}/{len(CHAT_BATCH_3)}") tree.add(f"Phase 2c: 架构设计 — {ok4}/{len(CHAT_BATCH_4)}") - tree.add(f"Phase 3: Archive Index 验证") + tree.add("Phase 3: Archive Index 验证") expand_ok = sum(1 for r in expand_results if r["success"]) tree.add(f"Phase 4: 归档展开 — {expand_ok}/{len(expand_results)} 问题回答正确") diff --git a/openviking/models/vlm/backends/litellm_vlm.py b/openviking/models/vlm/backends/litellm_vlm.py index 620085709..b13780f7f 100644 --- a/openviking/models/vlm/backends/litellm_vlm.py +++ b/openviking/models/vlm/backends/litellm_vlm.py @@ -15,12 +15,9 @@ import litellm from litellm import acompletion, completion - from openviking.telemetry import tracer - from openviking.utils.model_retry import retry_async, retry_sync - from ..base import ToolCall, VLMBase, VLMResponse logger = logging.getLogger(__name__) diff --git a/openviking/models/vlm/backends/openai_vlm.py b/openviking/models/vlm/backends/openai_vlm.py index 2f1078f1e..de7080579 100644 --- a/openviking/models/vlm/backends/openai_vlm.py +++ b/openviking/models/vlm/backends/openai_vlm.py @@ -10,7 +10,6 @@ from typing import Any, Dict, List, Optional, Union from urllib.parse import urlparse - from openviking.telemetry import tracer try: diff --git a/openviking/models/vlm/backends/volcengine_vlm.py b/openviking/models/vlm/backends/volcengine_vlm.py index 978bcb339..19551daf6 100644 --- a/openviking/models/vlm/backends/volcengine_vlm.py +++ b/openviking/models/vlm/backends/volcengine_vlm.py @@ -11,6 +11,7 @@ from typing import Any, Dict, List, Optional, Union from openviking.telemetry import tracer + from ..base import ToolCall, VLMResponse from .openai_vlm import OpenAIVLM diff --git a/openviking/parse/parsers/code/ast/languages/php.py b/openviking/parse/parsers/code/ast/languages/php.py index b1ef23ad3..45ccfa6f1 100644 --- a/openviking/parse/parsers/code/ast/languages/php.py +++ b/openviking/parse/parsers/code/ast/languages/php.py @@ -4,7 +4,7 @@ from __future__ import annotations -from typing import List, Optional +from typing import List from openviking.parse.parsers.code.ast.languages.base import LanguageExtractor from openviking.parse.parsers.code.ast.skeleton import ClassSkeleton, CodeSkeleton, FunctionSig diff --git a/openviking/session/compressor_v2.py b/openviking/session/compressor_v2.py index c81a66028..f1f37a0fc 100644 --- a/openviking/session/compressor_v2.py +++ b/openviking/session/compressor_v2.py @@ -15,10 +15,9 @@ from openviking.session.memory import ExtractLoop, MemoryUpdater from openviking.storage import VikingDBManager from openviking.storage.viking_fs import get_viking_fs -from openviking.telemetry import get_current_telemetry +from openviking.telemetry import get_current_telemetry, tracer from openviking_cli.session.user_id import UserIdentifier from openviking_cli.utils import get_logger -from openviking.telemetry import tracer from openviking_cli.utils.config import get_openviking_config logger = get_logger(__name__) diff --git a/openviking/telemetry/__init__.py b/openviking/telemetry/__init__.py index c83e1138b..7a11dddac 100644 --- a/openviking/telemetry/__init__.py +++ b/openviking/telemetry/__init__.py @@ -2,12 +2,12 @@ # SPDX-License-Identifier: AGPL-3.0 """OpenViking telemetry runtime and operation telemetry helpers.""" +from . import tracer as tracer_module from .context import bind_telemetry, get_current_telemetry from .operation import OperationTelemetry, TelemetrySnapshot from .registry import register_telemetry, resolve_telemetry, unregister_telemetry from .request import TelemetryRequest, TelemetrySelection, normalize_telemetry_request from .runtime import get_telemetry_runtime, set_telemetry_runtime -from . import tracer as tracer_module from .tracer import tracer __all__ = [ diff --git a/openviking_cli/utils/config/open_viking_config.py b/openviking_cli/utils/config/open_viking_config.py index 9273a1c72..3d19afba1 100644 --- a/openviking_cli/utils/config/open_viking_config.py +++ b/openviking_cli/utils/config/open_viking_config.py @@ -20,7 +20,6 @@ ) from .embedding_config import EmbeddingConfig from .encryption_config import EncryptionConfig -from .telemetry_config import TelemetryConfig from .log_config import LogConfig from .memory_config import MemoryConfig from .parser_config import ( @@ -39,6 +38,7 @@ from .prompts_config import PromptsConfig from .rerank_config import RerankConfig from .storage_config import StorageConfig +from .telemetry_config import TelemetryConfig from .vlm_config import VLMConfig diff --git a/tests/agfs/conftest.py b/tests/agfs/conftest.py index 20dbe1f35..0b41a614e 100644 --- a/tests/agfs/conftest.py +++ b/tests/agfs/conftest.py @@ -6,6 +6,7 @@ import pytest + @pytest.fixture(scope="session") def agfs_test_root(): """Root directory for AGFS tests.""" diff --git a/tests/integration/test_compressor_v2_event_span_multiple_turns.py b/tests/integration/test_compressor_v2_event_span_multiple_turns.py index fb8e5f786..a52dd7622 100644 --- a/tests/integration/test_compressor_v2_event_span_multiple_turns.py +++ b/tests/integration/test_compressor_v2_event_span_multiple_turns.py @@ -135,7 +135,7 @@ def run_ingest(client: ov.SyncHTTPClient, session_id: str, wait_seconds: float): console.print(f" [green]任务 {status},耗时 {elapsed:.2f}s[/green]") console.print(f" Task 详情: {task}") - console.print(f" [yellow]等待向量化完成...[/yellow]") + console.print(" [yellow]等待向量化完成...[/yellow]") client.wait_processed() if wait_seconds > 0: diff --git a/tests/integration/test_compressor_v2_tool_skill_memory.py b/tests/integration/test_compressor_v2_tool_skill_memory.py index 838512fc1..07eb63b18 100644 --- a/tests/integration/test_compressor_v2_tool_skill_memory.py +++ b/tests/integration/test_compressor_v2_tool_skill_memory.py @@ -207,7 +207,7 @@ def run_ingest(client: ov.SyncHTTPClient, session_id: str, wait_seconds: float): console.print(f" [green]任务 {status},耗时 {elapsed:.2f}s[/green]") console.print(f" Task 详情: {task}") - console.print(f" [yellow]等待向量化完成...[/yellow]") + console.print(" [yellow]等待向量化完成...[/yellow]") client.wait_processed() if wait_seconds > 0: @@ -299,7 +299,7 @@ def run_verify(client: ov.SyncHTTPClient): def main(): """入口函数""" - parser = argparse.ArgumentParser(description=f"OpenViking 记忆演示 — 工具调用和Skill调用") + parser = argparse.ArgumentParser(description="OpenViking 记忆演示 — 工具调用和Skill调用") parser.add_argument("--url", default=DEFAULT_URL, help=f"Server URL (默认: {DEFAULT_URL})") parser.add_argument("--api-key", default=DEFAULT_API_KEY, help="API key") parser.add_argument("--agent-id", default=DEFAULT_AGENT_ID, help="Agent ID") @@ -348,4 +348,4 @@ def main(): if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/tests/integration/test_compressor_v2_xiaomei.py b/tests/integration/test_compressor_v2_xiaomei.py index faf7b128e..a2266f7a6 100644 --- a/tests/integration/test_compressor_v2_xiaomei.py +++ b/tests/integration/test_compressor_v2_xiaomei.py @@ -160,7 +160,7 @@ def run_ingest(client: ov.SyncHTTPClient, session_id: str, wait_seconds: float): console.print(f" Task 详情: {task}") # 等待向量化队列处理完成 - console.print(f" [yellow]等待向量化完成...[/yellow]") + console.print(" [yellow]等待向量化完成...[/yellow]") client.wait_processed() if wait_seconds > 0: diff --git a/tests/models/vlm/test_volcengine_cache.py b/tests/models/vlm/test_volcengine_cache.py index 73defabcf..31633c897 100644 --- a/tests/models/vlm/test_volcengine_cache.py +++ b/tests/models/vlm/test_volcengine_cache.py @@ -2,10 +2,10 @@ # SPDX-License-Identifier: AGPL-3.0 """Tests for VolcEngineVLM cache logic.""" +from unittest.mock import AsyncMock, MagicMock + import pytest -from unittest.mock import AsyncMock, MagicMock, patch -from openviking.models.vlm.backends.volcengine_vlm import VolcEngineVLM from openviking.models.vlm.backends.volcengine_vlm import VolcEngineVLM as VLMClass @@ -250,4 +250,4 @@ def test_cache_key_includes_prefix(self): key = vlm._get_response_id_cache_key(messages) # Should include prefix in the key - assert "prefix:" in key or key.startswith("prefix:") \ No newline at end of file + assert "prefix:" in key or key.startswith("prefix:") diff --git a/tests/parse/test_html_parser_utils.py b/tests/parse/test_html_parser_utils.py index ca2373e7b..651d18e91 100644 --- a/tests/parse/test_html_parser_utils.py +++ b/tests/parse/test_html_parser_utils.py @@ -1,4 +1,3 @@ -import pytest from openviking.parse.parsers.html import HTMLParser diff --git a/tests/server/test_bot_proxy_auth.py b/tests/server/test_bot_proxy_auth.py index bf5b580f0..291326c5b 100644 --- a/tests/server/test_bot_proxy_auth.py +++ b/tests/server/test_bot_proxy_auth.py @@ -3,10 +3,8 @@ """Regression tests for bot proxy endpoint auth enforcement.""" -import httpx import pytest -import pytest_asyncio -from fastapi import FastAPI, Request +from fastapi import Request import openviking.server.routers.bot as bot_router_module diff --git a/tests/storage/test_vectordb_collection_loading.py b/tests/storage/test_vectordb_collection_loading.py index c5c9bd9e3..31e00df9b 100644 --- a/tests/storage/test_vectordb_collection_loading.py +++ b/tests/storage/test_vectordb_collection_loading.py @@ -1,15 +1,13 @@ -import unittest import sys -import os +import unittest # Add open_test path to ensure modules can be found sys.path.insert(0, "/cloudide/workspace/open_test") +from openviking.storage.vectordb.collection.vikingdb_collection import VikingDBCollection from openviking.storage.vectordb.project.vikingdb_project import ( get_or_create_vikingdb_project, - VikingDBProject, ) -from openviking.storage.vectordb.collection.vikingdb_collection import VikingDBCollection class TestDynamicLoading(unittest.TestCase): diff --git a/tests/unit/test_cohere_rerank.py b/tests/unit/test_cohere_rerank.py index b06fd2aa7..89d2a7cf7 100644 --- a/tests/unit/test_cohere_rerank.py +++ b/tests/unit/test_cohere_rerank.py @@ -4,8 +4,6 @@ from unittest.mock import MagicMock, patch -import pytest - from openviking_cli.utils.cohere_rerank import CohereRerankClient diff --git a/uv.lock b/uv.lock index 4b6d38957..0add27d5d 100644 --- a/uv.lock +++ b/uv.lock @@ -1550,7 +1550,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/38/3f/9859f655d11901e7b2996c6e3d33e0caa9a1d4572c3bc61ed0faa64b2f4c/greenlet-3.3.2-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:9bc885b89709d901859cf95179ec9f6bb67a3d2bb1f0e88456461bd4b7f8fd0d", size = 277747, upload-time = "2026-02-20T20:16:21.325Z" }, { url = "https://files.pythonhosted.org/packages/fb/07/cb284a8b5c6498dbd7cba35d31380bb123d7dceaa7907f606c8ff5993cbf/greenlet-3.3.2-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b568183cf65b94919be4438dc28416b234b678c608cafac8874dfeeb2a9bbe13", size = 579202, upload-time = "2026-02-20T20:47:28.955Z" }, { url = "https://files.pythonhosted.org/packages/ed/45/67922992b3a152f726163b19f890a85129a992f39607a2a53155de3448b8/greenlet-3.3.2-cp310-cp310-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:527fec58dc9f90efd594b9b700662ed3fb2493c2122067ac9c740d98080a620e", size = 590620, upload-time = "2026-02-20T20:55:55.581Z" }, - { url = "https://files.pythonhosted.org/packages/03/5f/6e2a7d80c353587751ef3d44bb947f0565ec008a2e0927821c007e96d3a7/greenlet-3.3.2-cp310-cp310-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:508c7f01f1791fbc8e011bd508f6794cb95397fdb198a46cb6635eb5b78d85a7", size = 602132, upload-time = "2026-02-20T21:02:43.261Z" }, { url = "https://files.pythonhosted.org/packages/ad/55/9f1ebb5a825215fadcc0f7d5073f6e79e3007e3282b14b22d6aba7ca6cb8/greenlet-3.3.2-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ad0c8917dd42a819fe77e6bdfcb84e3379c0de956469301d9fd36427a1ca501f", size = 591729, upload-time = "2026-02-20T20:20:58.395Z" }, { url = "https://files.pythonhosted.org/packages/24/b4/21f5455773d37f94b866eb3cf5caed88d6cea6dd2c6e1f9c34f463cba3ec/greenlet-3.3.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:97245cc10e5515dbc8c3104b2928f7f02b6813002770cfaffaf9a6e0fc2b94ef", size = 1551946, upload-time = "2026-02-20T20:49:31.102Z" }, { url = "https://files.pythonhosted.org/packages/00/68/91f061a926abead128fe1a87f0b453ccf07368666bd59ffa46016627a930/greenlet-3.3.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:8c1fdd7d1b309ff0da81d60a9688a8bd044ac4e18b250320a96fc68d31c209ca", size = 1618494, upload-time = "2026-02-20T20:21:06.541Z" }, @@ -1558,7 +1557,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f3/47/16400cb42d18d7a6bb46f0626852c1718612e35dcb0dffa16bbaffdf5dd2/greenlet-3.3.2-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:c56692189a7d1c7606cb794be0a8381470d95c57ce5be03fb3d0ef57c7853b86", size = 278890, upload-time = "2026-02-20T20:19:39.263Z" }, { url = "https://files.pythonhosted.org/packages/a3/90/42762b77a5b6aa96cd8c0e80612663d39211e8ae8a6cd47c7f1249a66262/greenlet-3.3.2-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1ebd458fa8285960f382841da585e02201b53a5ec2bac6b156fc623b5ce4499f", size = 581120, upload-time = "2026-02-20T20:47:30.161Z" }, { url = "https://files.pythonhosted.org/packages/bf/6f/f3d64f4fa0a9c7b5c5b3c810ff1df614540d5aa7d519261b53fba55d4df9/greenlet-3.3.2-cp311-cp311-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a443358b33c4ec7b05b79a7c8b466f5d275025e750298be7340f8fc63dff2a55", size = 594363, upload-time = "2026-02-20T20:55:56.965Z" }, - { url = "https://files.pythonhosted.org/packages/9c/8b/1430a04657735a3f23116c2e0d5eb10220928846e4537a938a41b350bed6/greenlet-3.3.2-cp311-cp311-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:4375a58e49522698d3e70cc0b801c19433021b5c37686f7ce9c65b0d5c8677d2", size = 605046, upload-time = "2026-02-20T21:02:45.234Z" }, { url = "https://files.pythonhosted.org/packages/72/83/3e06a52aca8128bdd4dcd67e932b809e76a96ab8c232a8b025b2850264c5/greenlet-3.3.2-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8e2cd90d413acbf5e77ae41e5d3c9b3ac1d011a756d7284d7f3f2b806bbd6358", size = 594156, upload-time = "2026-02-20T20:20:59.955Z" }, { url = "https://files.pythonhosted.org/packages/70/79/0de5e62b873e08fe3cef7dbe84e5c4bc0e8ed0c7ff131bccb8405cd107c8/greenlet-3.3.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:442b6057453c8cb29b4fb36a2ac689382fc71112273726e2423f7f17dc73bf99", size = 1554649, upload-time = "2026-02-20T20:49:32.293Z" }, { url = "https://files.pythonhosted.org/packages/5a/00/32d30dee8389dc36d42170a9c66217757289e2afb0de59a3565260f38373/greenlet-3.3.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:45abe8eb6339518180d5a7fa47fa01945414d7cca5ecb745346fc6a87d2750be", size = 1619472, upload-time = "2026-02-20T20:21:07.966Z" }, @@ -1567,7 +1565,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ea/ab/1608e5a7578e62113506740b88066bf09888322a311cff602105e619bd87/greenlet-3.3.2-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:ac8d61d4343b799d1e526db579833d72f23759c71e07181c2d2944e429eb09cd", size = 280358, upload-time = "2026-02-20T20:17:43.971Z" }, { url = "https://files.pythonhosted.org/packages/a5/23/0eae412a4ade4e6623ff7626e38998cb9b11e9ff1ebacaa021e4e108ec15/greenlet-3.3.2-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3ceec72030dae6ac0c8ed7591b96b70410a8be370b6a477b1dbc072856ad02bd", size = 601217, upload-time = "2026-02-20T20:47:31.462Z" }, { url = "https://files.pythonhosted.org/packages/f8/16/5b1678a9c07098ecb9ab2dd159fafaf12e963293e61ee8d10ecb55273e5e/greenlet-3.3.2-cp312-cp312-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a2a5be83a45ce6188c045bcc44b0ee037d6a518978de9a5d97438548b953a1ac", size = 611792, upload-time = "2026-02-20T20:55:58.423Z" }, - { url = "https://files.pythonhosted.org/packages/5c/c5/cc09412a29e43406eba18d61c70baa936e299bc27e074e2be3806ed29098/greenlet-3.3.2-cp312-cp312-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ae9e21c84035c490506c17002f5c8ab25f980205c3e61ddb3a2a2a2e6c411fcb", size = 626250, upload-time = "2026-02-20T21:02:46.596Z" }, { url = "https://files.pythonhosted.org/packages/50/1f/5155f55bd71cabd03765a4aac9ac446be129895271f73872c36ebd4b04b6/greenlet-3.3.2-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:43e99d1749147ac21dde49b99c9abffcbc1e2d55c67501465ef0930d6e78e070", size = 613875, upload-time = "2026-02-20T20:21:01.102Z" }, { url = "https://files.pythonhosted.org/packages/fc/dd/845f249c3fcd69e32df80cdab059b4be8b766ef5830a3d0aa9d6cad55beb/greenlet-3.3.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:4c956a19350e2c37f2c48b336a3afb4bff120b36076d9d7fb68cb44e05d95b79", size = 1571467, upload-time = "2026-02-20T20:49:33.495Z" }, { url = "https://files.pythonhosted.org/packages/2a/50/2649fe21fcc2b56659a452868e695634722a6655ba245d9f77f5656010bf/greenlet-3.3.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6c6f8ba97d17a1e7d664151284cb3315fc5f8353e75221ed4324f84eb162b395", size = 1640001, upload-time = "2026-02-20T20:21:09.154Z" }, @@ -1576,7 +1573,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ac/48/f8b875fa7dea7dd9b33245e37f065af59df6a25af2f9561efa8d822fde51/greenlet-3.3.2-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:aa6ac98bdfd716a749b84d4034486863fd81c3abde9aa3cf8eff9127981a4ae4", size = 279120, upload-time = "2026-02-20T20:19:01.9Z" }, { url = "https://files.pythonhosted.org/packages/49/8d/9771d03e7a8b1ee456511961e1b97a6d77ae1dea4a34a5b98eee706689d3/greenlet-3.3.2-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ab0c7e7901a00bc0a7284907273dc165b32e0d109a6713babd04471327ff7986", size = 603238, upload-time = "2026-02-20T20:47:32.873Z" }, { url = "https://files.pythonhosted.org/packages/59/0e/4223c2bbb63cd5c97f28ffb2a8aee71bdfb30b323c35d409450f51b91e3e/greenlet-3.3.2-cp313-cp313-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:d248d8c23c67d2291ffd47af766e2a3aa9fa1c6703155c099feb11f526c63a92", size = 614219, upload-time = "2026-02-20T20:55:59.817Z" }, - { url = "https://files.pythonhosted.org/packages/94/2b/4d012a69759ac9d77210b8bfb128bc621125f5b20fc398bce3940d036b1c/greenlet-3.3.2-cp313-cp313-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ccd21bb86944ca9be6d967cf7691e658e43417782bce90b5d2faeda0ff78a7dd", size = 628268, upload-time = "2026-02-20T21:02:48.024Z" }, { url = "https://files.pythonhosted.org/packages/7a/34/259b28ea7a2a0c904b11cd36c79b8cef8019b26ee5dbe24e73b469dea347/greenlet-3.3.2-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b6997d360a4e6a4e936c0f9625b1c20416b8a0ea18a8e19cabbefc712e7397ab", size = 616774, upload-time = "2026-02-20T20:21:02.454Z" }, { url = "https://files.pythonhosted.org/packages/0a/03/996c2d1689d486a6e199cb0f1cf9e4aa940c500e01bdf201299d7d61fa69/greenlet-3.3.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:64970c33a50551c7c50491671265d8954046cb6e8e2999aacdd60e439b70418a", size = 1571277, upload-time = "2026-02-20T20:49:34.795Z" }, { url = "https://files.pythonhosted.org/packages/d9/c4/2570fc07f34a39f2caf0bf9f24b0a1a0a47bc2e8e465b2c2424821389dfc/greenlet-3.3.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:1a9172f5bf6bd88e6ba5a84e0a68afeac9dc7b6b412b245dd64f52d83c81e55b", size = 1640455, upload-time = "2026-02-20T20:21:10.261Z" }, @@ -1585,7 +1581,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/3f/ae/8bffcbd373b57a5992cd077cbe8858fff39110480a9d50697091faea6f39/greenlet-3.3.2-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:8d1658d7291f9859beed69a776c10822a0a799bc4bfe1bd4272bb60e62507dab", size = 279650, upload-time = "2026-02-20T20:18:00.783Z" }, { url = "https://files.pythonhosted.org/packages/d1/c0/45f93f348fa49abf32ac8439938726c480bd96b2a3c6f4d949ec0124b69f/greenlet-3.3.2-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:18cb1b7337bca281915b3c5d5ae19f4e76d35e1df80f4ad3c1a7be91fadf1082", size = 650295, upload-time = "2026-02-20T20:47:34.036Z" }, { url = "https://files.pythonhosted.org/packages/b3/de/dd7589b3f2b8372069ab3e4763ea5329940fc7ad9dcd3e272a37516d7c9b/greenlet-3.3.2-cp314-cp314-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c2e47408e8ce1c6f1ceea0dffcdf6ebb85cc09e55c7af407c99f1112016e45e9", size = 662163, upload-time = "2026-02-20T20:56:01.295Z" }, - { url = "https://files.pythonhosted.org/packages/cd/ac/85804f74f1ccea31ba518dcc8ee6f14c79f73fe36fa1beba38930806df09/greenlet-3.3.2-cp314-cp314-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:e3cb43ce200f59483eb82949bf1835a99cf43d7571e900d7c8d5c62cdf25d2f9", size = 675371, upload-time = "2026-02-20T21:02:49.664Z" }, { url = "https://files.pythonhosted.org/packages/d2/d8/09bfa816572a4d83bccd6750df1926f79158b1c36c5f73786e26dbe4ee38/greenlet-3.3.2-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:63d10328839d1973e5ba35e98cccbca71b232b14051fd957b6f8b6e8e80d0506", size = 664160, upload-time = "2026-02-20T20:21:04.015Z" }, { url = "https://files.pythonhosted.org/packages/48/cf/56832f0c8255d27f6c35d41b5ec91168d74ec721d85f01a12131eec6b93c/greenlet-3.3.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:8e4ab3cfb02993c8cc248ea73d7dae6cec0253e9afa311c9b37e603ca9fad2ce", size = 1619181, upload-time = "2026-02-20T20:49:36.052Z" }, { url = "https://files.pythonhosted.org/packages/0a/23/b90b60a4aabb4cec0796e55f25ffbfb579a907c3898cd2905c8918acaa16/greenlet-3.3.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:94ad81f0fd3c0c0681a018a976e5c2bd2ca2d9d94895f23e7bb1af4e8af4e2d5", size = 1687713, upload-time = "2026-02-20T20:21:11.684Z" }, @@ -1594,7 +1589,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/98/6d/8f2ef704e614bcf58ed43cfb8d87afa1c285e98194ab2cfad351bf04f81e/greenlet-3.3.2-cp314-cp314t-macosx_11_0_universal2.whl", hash = "sha256:e26e72bec7ab387ac80caa7496e0f908ff954f31065b0ffc1f8ecb1338b11b54", size = 286617, upload-time = "2026-02-20T20:19:29.856Z" }, { url = "https://files.pythonhosted.org/packages/5e/0d/93894161d307c6ea237a43988f27eba0947b360b99ac5239ad3fe09f0b47/greenlet-3.3.2-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8b466dff7a4ffda6ca975979bab80bdadde979e29fc947ac3be4451428d8b0e4", size = 655189, upload-time = "2026-02-20T20:47:35.742Z" }, { url = "https://files.pythonhosted.org/packages/f5/2c/d2d506ebd8abcb57386ec4f7ba20f4030cbe56eae541bc6fd6ef399c0b41/greenlet-3.3.2-cp314-cp314t-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b8bddc5b73c9720bea487b3bffdb1840fe4e3656fba3bd40aa1489e9f37877ff", size = 658225, upload-time = "2026-02-20T20:56:02.527Z" }, - { url = "https://files.pythonhosted.org/packages/d1/67/8197b7e7e602150938049d8e7f30de1660cfb87e4c8ee349b42b67bdb2e1/greenlet-3.3.2-cp314-cp314t-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:59b3e2c40f6706b05a9cd299c836c6aa2378cabe25d021acd80f13abf81181cf", size = 666581, upload-time = "2026-02-20T21:02:51.526Z" }, { url = "https://files.pythonhosted.org/packages/8e/30/3a09155fbf728673a1dea713572d2d31159f824a37c22da82127056c44e4/greenlet-3.3.2-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b26b0f4428b871a751968285a1ac9648944cea09807177ac639b030bddebcea4", size = 657907, upload-time = "2026-02-20T20:21:05.259Z" }, { url = "https://files.pythonhosted.org/packages/f3/fd/d05a4b7acd0154ed758797f0a43b4c0962a843bedfe980115e842c5b2d08/greenlet-3.3.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:1fb39a11ee2e4d94be9a76671482be9398560955c9e568550de0224e41104727", size = 1618857, upload-time = "2026-02-20T20:49:37.309Z" }, { url = "https://files.pythonhosted.org/packages/6f/e1/50ee92a5db521de8f35075b5eff060dd43d39ebd46c2181a2042f7070385/greenlet-3.3.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:20154044d9085151bc309e7689d6f7ba10027f8f5a8c0676ad398b951913d89e", size = 1680010, upload-time = "2026-02-20T20:21:13.427Z" }, @@ -1610,6 +1604,67 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/28/27/3d6dcadc8a3214d8522c1e7f6a19554e33659be44546d44a2f7572ac7d2a/groovy-0.1.2-py3-none-any.whl", hash = "sha256:7f7975bab18c729a257a8b1ae9dcd70b7cafb1720481beae47719af57c35fa64", size = 14090, upload-time = "2025-02-28T20:24:55.152Z" }, ] +[[package]] +name = "grpcio" +version = "1.80.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b7/48/af6173dbca4454f4637a4678b67f52ca7e0c1ed7d5894d89d434fecede05/grpcio-1.80.0.tar.gz", hash = "sha256:29aca15edd0688c22ba01d7cc01cb000d72b2033f4a3c72a81a19b56fd143257", size = 12978905, upload-time = "2026-03-30T08:49:10.502Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9d/cd/bb7b7e54084a344c03d68144450da7ddd5564e51a298ae1662de65f48e2d/grpcio-1.80.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:886457a7768e408cdce226ad1ca67d2958917d306523a0e21e1a2fdaa75c9c9c", size = 6050363, upload-time = "2026-03-30T08:46:20.894Z" }, + { url = "https://files.pythonhosted.org/packages/16/02/1417f5c3460dea65f7a2e3c14e8b31e77f7ffb730e9bfadd89eda7a9f477/grpcio-1.80.0-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:7b641fc3f1dc647bfd80bd713addc68f6d145956f64677e56d9ebafc0bd72388", size = 12026037, upload-time = "2026-03-30T08:46:25.144Z" }, + { url = "https://files.pythonhosted.org/packages/43/98/c910254eedf2cae368d78336a2de0678e66a7317d27c02522392f949b5c6/grpcio-1.80.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:33eb763f18f006dc7fee1e69831d38d23f5eccd15b2e0f92a13ee1d9242e5e02", size = 6602306, upload-time = "2026-03-30T08:46:27.593Z" }, + { url = "https://files.pythonhosted.org/packages/7c/f8/88ca4e78c077b2b2113d95da1e1ab43efd43d723c9a0397d26529c2c1a56/grpcio-1.80.0-cp310-cp310-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:52d143637e3872633fc7dd7c3c6a1c84e396b359f3a72e215f8bf69fd82084fc", size = 7301535, upload-time = "2026-03-30T08:46:29.556Z" }, + { url = "https://files.pythonhosted.org/packages/f9/96/f28660fe2fe0f153288bf4a04e4910b7309d442395135c88ed4f5b3b8b40/grpcio-1.80.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c51bf8ac4575af2e0678bccfb07e47321fc7acb5049b4482832c5c195e04e13a", size = 6808669, upload-time = "2026-03-30T08:46:31.984Z" }, + { url = "https://files.pythonhosted.org/packages/47/eb/3f68a5e955779c00aeef23850e019c1c1d0e032d90633ba49c01ad5a96e0/grpcio-1.80.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:50a9871536d71c4fba24ee856abc03a87764570f0c457dd8db0b4018f379fed9", size = 7409489, upload-time = "2026-03-30T08:46:34.684Z" }, + { url = "https://files.pythonhosted.org/packages/5b/a7/d2f681a4bfb881be40659a309771f3bdfbfdb1190619442816c3f0ffc079/grpcio-1.80.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:a72d84ad0514db063e21887fbacd1fd7acb4d494a564cae22227cd45c7fbf199", size = 8423167, upload-time = "2026-03-30T08:46:36.833Z" }, + { url = "https://files.pythonhosted.org/packages/97/8a/29b4589c204959aa35ce5708400a05bba72181807c45c47b3ec000c39333/grpcio-1.80.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:f7691a6788ad9196872f95716df5bc643ebba13c97140b7a5ee5c8e75d1dea81", size = 7846761, upload-time = "2026-03-30T08:46:40.091Z" }, + { url = "https://files.pythonhosted.org/packages/6b/d2/ed143e097230ee121ac5848f6ff14372dba91289b10b536d54fb1b7cbae7/grpcio-1.80.0-cp310-cp310-win32.whl", hash = "sha256:46c2390b59d67f84e882694d489f5b45707c657832d7934859ceb8c33f467069", size = 4156534, upload-time = "2026-03-30T08:46:42.026Z" }, + { url = "https://files.pythonhosted.org/packages/d5/c9/df8279bb49b29409995e95efa85b72973d62f8aeff89abee58c91f393710/grpcio-1.80.0-cp310-cp310-win_amd64.whl", hash = "sha256:dc053420fc75749c961e2a4c906398d7c15725d36ccc04ae6d16093167223b58", size = 4889869, upload-time = "2026-03-30T08:46:44.219Z" }, + { url = "https://files.pythonhosted.org/packages/5d/db/1d56e5f5823257b291962d6c0ce106146c6447f405b60b234c4f222a7cde/grpcio-1.80.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:dfab85db094068ff42e2a3563f60ab3dddcc9d6488a35abf0132daec13209c8a", size = 6055009, upload-time = "2026-03-30T08:46:46.265Z" }, + { url = "https://files.pythonhosted.org/packages/6e/18/c83f3cad64c5ca63bca7e91e5e46b0d026afc5af9d0a9972472ceba294b3/grpcio-1.80.0-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:5c07e82e822e1161354e32da2662f741a4944ea955f9f580ec8fb409dd6f6060", size = 12035295, upload-time = "2026-03-30T08:46:49.099Z" }, + { url = "https://files.pythonhosted.org/packages/0f/8e/e14966b435be2dda99fbe89db9525ea436edc79780431a1c2875a3582644/grpcio-1.80.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ba0915d51fd4ced2db5ff719f84e270afe0e2d4c45a7bdb1e8d036e4502928c2", size = 6610297, upload-time = "2026-03-30T08:46:52.123Z" }, + { url = "https://files.pythonhosted.org/packages/cc/26/d5eb38f42ce0e3fdc8174ea4d52036ef8d58cc4426cb800f2610f625dd75/grpcio-1.80.0-cp311-cp311-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:3cb8130ba457d2aa09fa6b7c3ed6b6e4e6a2685fce63cb803d479576c4d80e21", size = 7300208, upload-time = "2026-03-30T08:46:54.859Z" }, + { url = "https://files.pythonhosted.org/packages/25/51/bd267c989f85a17a5b3eea65a6feb4ff672af41ca614e5a0279cc0ea381c/grpcio-1.80.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:09e5e478b3d14afd23f12e49e8b44c8684ac3c5f08561c43a5b9691c54d136ab", size = 6813442, upload-time = "2026-03-30T08:46:57.056Z" }, + { url = "https://files.pythonhosted.org/packages/9e/d9/d80eef735b19e9169e30164bbf889b46f9df9127598a83d174eb13a48b26/grpcio-1.80.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:00168469238b022500e486c1c33916acf2f2a9b2c022202cf8a1885d2e3073c1", size = 7414743, upload-time = "2026-03-30T08:46:59.682Z" }, + { url = "https://files.pythonhosted.org/packages/de/f2/567f5bd5054398ed6b0509b9a30900376dcf2786bd936812098808b49d8d/grpcio-1.80.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:8502122a3cc1714038e39a0b071acb1207ca7844208d5ea0d091317555ee7106", size = 8426046, upload-time = "2026-03-30T08:47:02.474Z" }, + { url = "https://files.pythonhosted.org/packages/62/29/73ef0141b4732ff5eacd68430ff2512a65c004696997f70476a83e548e7e/grpcio-1.80.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ce1794f4ea6cc3ca29463f42d665c32ba1b964b48958a66497917fe9069f26e6", size = 7851641, upload-time = "2026-03-30T08:47:05.462Z" }, + { url = "https://files.pythonhosted.org/packages/46/69/abbfa360eb229a8623bab5f5a4f8105e445bd38ce81a89514ba55d281ad0/grpcio-1.80.0-cp311-cp311-win32.whl", hash = "sha256:51b4a7189b0bef2aa30adce3c78f09c83526cf3dddb24c6a96555e3b97340440", size = 4154368, upload-time = "2026-03-30T08:47:08.027Z" }, + { url = "https://files.pythonhosted.org/packages/6f/d4/ae92206d01183b08613e846076115f5ac5991bae358d2a749fa864da5699/grpcio-1.80.0-cp311-cp311-win_amd64.whl", hash = "sha256:02e64bb0bb2da14d947a49e6f120a75e947250aebe65f9629b62bb1f5c14e6e9", size = 4894235, upload-time = "2026-03-30T08:47:10.839Z" }, + { url = "https://files.pythonhosted.org/packages/5c/e8/a2b749265eb3415abc94f2e619bbd9e9707bebdda787e61c593004ec927a/grpcio-1.80.0-cp312-cp312-linux_armv7l.whl", hash = "sha256:c624cc9f1008361014378c9d776de7182b11fe8b2e5a81bc69f23a295f2a1ad0", size = 6015616, upload-time = "2026-03-30T08:47:13.428Z" }, + { url = "https://files.pythonhosted.org/packages/3e/97/b1282161a15d699d1e90c360df18d19165a045ce1c343c7f313f5e8a0b77/grpcio-1.80.0-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:f49eddcac43c3bf350c0385366a58f36bed8cc2c0ec35ef7b74b49e56552c0c2", size = 12014204, upload-time = "2026-03-30T08:47:15.873Z" }, + { url = "https://files.pythonhosted.org/packages/6e/5e/d319c6e997b50c155ac5a8cb12f5173d5b42677510e886d250d50264949d/grpcio-1.80.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d334591df610ab94714048e0d5b4f3dd5ad1bee74dfec11eee344220077a79de", size = 6563866, upload-time = "2026-03-30T08:47:18.588Z" }, + { url = "https://files.pythonhosted.org/packages/ae/f6/fdd975a2cb4d78eb67769a7b3b3830970bfa2e919f1decf724ae4445f42c/grpcio-1.80.0-cp312-cp312-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:0cb517eb1d0d0aaf1d87af7cc5b801d686557c1d88b2619f5e31fab3c2315921", size = 7273060, upload-time = "2026-03-30T08:47:21.113Z" }, + { url = "https://files.pythonhosted.org/packages/db/f0/a3deb5feba60d9538a962913e37bd2e69a195f1c3376a3dd44fe0427e996/grpcio-1.80.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4e78c4ac0d97dc2e569b2f4bcbbb447491167cb358d1a389fc4af71ab6f70411", size = 6782121, upload-time = "2026-03-30T08:47:23.827Z" }, + { url = "https://files.pythonhosted.org/packages/ca/84/36c6dcfddc093e108141f757c407902a05085e0c328007cb090d56646cdf/grpcio-1.80.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2ed770b4c06984f3b47eb0517b1c69ad0b84ef3f40128f51448433be904634cd", size = 7383811, upload-time = "2026-03-30T08:47:26.517Z" }, + { url = "https://files.pythonhosted.org/packages/7c/ef/f3a77e3dc5b471a0ec86c564c98d6adfa3510d38f8ee99010410858d591e/grpcio-1.80.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:256507e2f524092f1473071a05e65a5b10d84b82e3ff24c5b571513cfaa61e2f", size = 8393860, upload-time = "2026-03-30T08:47:29.439Z" }, + { url = "https://files.pythonhosted.org/packages/9b/8d/9d4d27ed7f33d109c50d6b5ce578a9914aa68edab75d65869a17e630a8d1/grpcio-1.80.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:9a6284a5d907c37db53350645567c522be314bac859a64a7a5ca63b77bb7958f", size = 7830132, upload-time = "2026-03-30T08:47:33.254Z" }, + { url = "https://files.pythonhosted.org/packages/14/e4/9990b41c6d7a44e1e9dee8ac11d7a9802ba1378b40d77468a7761d1ad288/grpcio-1.80.0-cp312-cp312-win32.whl", hash = "sha256:c71309cfce2f22be26aa4a847357c502db6c621f1a49825ae98aa0907595b193", size = 4140904, upload-time = "2026-03-30T08:47:35.319Z" }, + { url = "https://files.pythonhosted.org/packages/2f/2c/296f6138caca1f4b92a31ace4ae1b87dab692fc16a7a3417af3bb3c805bf/grpcio-1.80.0-cp312-cp312-win_amd64.whl", hash = "sha256:9fe648599c0e37594c4809d81a9e77bd138cc82eb8baa71b6a86af65426723ff", size = 4880944, upload-time = "2026-03-30T08:47:37.831Z" }, + { url = "https://files.pythonhosted.org/packages/2f/3a/7c3c25789e3f069e581dc342e03613c5b1cb012c4e8c7d9d5cf960a75856/grpcio-1.80.0-cp313-cp313-linux_armv7l.whl", hash = "sha256:e9e408fc016dffd20661f0126c53d8a31c2821b5c13c5d67a0f5ed5de93319ad", size = 6017243, upload-time = "2026-03-30T08:47:40.075Z" }, + { url = "https://files.pythonhosted.org/packages/04/19/21a9806eb8240e174fd1ab0cd5b9aa948bb0e05c2f2f55f9d5d7405e6d08/grpcio-1.80.0-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:92d787312e613754d4d8b9ca6d3297e69994a7912a32fa38c4c4e01c272974b0", size = 12010840, upload-time = "2026-03-30T08:47:43.11Z" }, + { url = "https://files.pythonhosted.org/packages/18/3a/23347d35f76f639e807fb7a36fad3068aed100996849a33809591f26eca6/grpcio-1.80.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8ac393b58aa16991a2f1144ec578084d544038c12242da3a215966b512904d0f", size = 6567644, upload-time = "2026-03-30T08:47:46.806Z" }, + { url = "https://files.pythonhosted.org/packages/ff/40/96e07ecb604a6a67ae6ab151e3e35b132875d98bc68ec65f3e5ab3e781d7/grpcio-1.80.0-cp313-cp313-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:68e5851ac4b9afe07e7f84483803ad167852570d65326b34d54ca560bfa53fb6", size = 7277830, upload-time = "2026-03-30T08:47:49.643Z" }, + { url = "https://files.pythonhosted.org/packages/9b/e2/da1506ecea1f34a5e365964644b35edef53803052b763ca214ba3870c856/grpcio-1.80.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:873ff5d17d68992ef6605330127425d2fc4e77e612fa3c3e0ed4e668685e3140", size = 6783216, upload-time = "2026-03-30T08:47:52.817Z" }, + { url = "https://files.pythonhosted.org/packages/44/83/3b20ff58d0c3b7f6caaa3af9a4174d4023701df40a3f39f7f1c8e7c48f9d/grpcio-1.80.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:2bea16af2750fd0a899bf1abd9022244418b55d1f37da2202249ba4ba673838d", size = 7385866, upload-time = "2026-03-30T08:47:55.687Z" }, + { url = "https://files.pythonhosted.org/packages/47/45/55c507599c5520416de5eefecc927d6a0d7af55e91cfffb2e410607e5744/grpcio-1.80.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:ba0db34f7e1d803a878284cd70e4c63cb6ae2510ba51937bf8f45ba997cefcf7", size = 8391602, upload-time = "2026-03-30T08:47:58.303Z" }, + { url = "https://files.pythonhosted.org/packages/10/bb/dd06f4c24c01db9cf11341b547d0a016b2c90ed7dbbb086a5710df7dd1d7/grpcio-1.80.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8eb613f02d34721f1acf3626dfdb3545bd3c8505b0e52bf8b5710a28d02e8aa7", size = 7826752, upload-time = "2026-03-30T08:48:01.311Z" }, + { url = "https://files.pythonhosted.org/packages/f9/1e/9d67992ba23371fd63d4527096eb8c6b76d74d52b500df992a3343fd7251/grpcio-1.80.0-cp313-cp313-win32.whl", hash = "sha256:93b6f823810720912fd131f561f91f5fed0fda372b6b7028a2681b8194d5d294", size = 4142310, upload-time = "2026-03-30T08:48:04.594Z" }, + { url = "https://files.pythonhosted.org/packages/cf/e6/283326a27da9e2c3038bc93eeea36fb118ce0b2d03922a9cda6688f53c5b/grpcio-1.80.0-cp313-cp313-win_amd64.whl", hash = "sha256:e172cf795a3ba5246d3529e4d34c53db70e888fa582a8ffebd2e6e48bc0cba50", size = 4882833, upload-time = "2026-03-30T08:48:07.363Z" }, + { url = "https://files.pythonhosted.org/packages/c5/6d/e65307ce20f5a09244ba9e9d8476e99fb039de7154f37fb85f26978b59c3/grpcio-1.80.0-cp314-cp314-linux_armv7l.whl", hash = "sha256:3d4147a97c8344d065d01bbf8b6acec2cf86fb0400d40696c8bdad34a64ffc0e", size = 6017376, upload-time = "2026-03-30T08:48:10.005Z" }, + { url = "https://files.pythonhosted.org/packages/69/10/9cef5d9650c72625a699c549940f0abb3c4bfdb5ed45a5ce431f92f31806/grpcio-1.80.0-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:d8e11f167935b3eb089ac9038e1a063e6d7dbe995c0bb4a661e614583352e76f", size = 12018133, upload-time = "2026-03-30T08:48:12.927Z" }, + { url = "https://files.pythonhosted.org/packages/04/82/983aabaad82ba26113caceeb9091706a0696b25da004fe3defb5b346e15b/grpcio-1.80.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f14b618fc30de822681ee986cfdcc2d9327229dc4c98aed16896761cacd468b9", size = 6574748, upload-time = "2026-03-30T08:48:16.386Z" }, + { url = "https://files.pythonhosted.org/packages/07/d7/031666ef155aa0bf399ed7e19439656c38bbd143779ae0861b038ce82abd/grpcio-1.80.0-cp314-cp314-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:4ed39fbdcf9b87370f6e8df4e39ca7b38b3e5e9d1b0013c7b6be9639d6578d14", size = 7277711, upload-time = "2026-03-30T08:48:19.627Z" }, + { url = "https://files.pythonhosted.org/packages/e8/43/f437a78f7f4f1d311804189e8f11fb311a01049b2e08557c1068d470cb2e/grpcio-1.80.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2dcc70e9f0ba987526e8e8603a610fb4f460e42899e74e7a518bf3c68fe1bf05", size = 6785372, upload-time = "2026-03-30T08:48:22.373Z" }, + { url = "https://files.pythonhosted.org/packages/93/3d/f6558e9c6296cb4227faa5c43c54a34c68d32654b829f53288313d16a86e/grpcio-1.80.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:448c884b668b868562b1bda833c5fce6272d26e1926ec46747cda05741d302c1", size = 7395268, upload-time = "2026-03-30T08:48:25.638Z" }, + { url = "https://files.pythonhosted.org/packages/06/21/0fdd77e84720b08843c371a2efa6f2e19dbebf56adc72df73d891f5506f0/grpcio-1.80.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:a1dc80fe55685b4a543555e6eef975303b36c8db1023b1599b094b92aa77965f", size = 8392000, upload-time = "2026-03-30T08:48:28.974Z" }, + { url = "https://files.pythonhosted.org/packages/f5/68/67f4947ed55d2e69f2cc199ab9fd85e0a0034d813bbeef84df6d2ba4d4b7/grpcio-1.80.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:31b9ac4ad1aa28ffee5503821fafd09e4da0a261ce1c1281c6c8da0423c83b6e", size = 7828477, upload-time = "2026-03-30T08:48:32.054Z" }, + { url = "https://files.pythonhosted.org/packages/44/b6/8d4096691b2e385e8271911a0de4f35f0a6c7d05aff7098e296c3de86939/grpcio-1.80.0-cp314-cp314-win32.whl", hash = "sha256:367ce30ba67d05e0592470428f0ec1c31714cab9ef19b8f2e37be1f4c7d32fae", size = 4218563, upload-time = "2026-03-30T08:48:34.538Z" }, + { url = "https://files.pythonhosted.org/packages/e5/8c/bbe6baf2557262834f2070cf668515fa308b2d38a4bbf771f8f7872a7036/grpcio-1.80.0-cp314-cp314-win_amd64.whl", hash = "sha256:3b01e1f5464c583d2f567b2e46ff0d516ef979978f72091fd81f5ab7fa6e2e7f", size = 5019457, upload-time = "2026-03-30T08:48:37.308Z" }, +] + [[package]] name = "h11" version = "0.16.0" @@ -3290,6 +3345,24 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8b/ca/8f122055c97a932311a3f640273f084e738008933503d0c2563cd5d591fc/opentelemetry_exporter_otlp_proto_common-1.40.0-py3-none-any.whl", hash = "sha256:7081ff453835a82417bf38dccf122c827c3cbc94f2079b03bba02a3165f25149", size = 18369, upload-time = "2026-03-04T14:17:04.796Z" }, ] +[[package]] +name = "opentelemetry-exporter-otlp-proto-grpc" +version = "1.40.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "googleapis-common-protos" }, + { name = "grpcio" }, + { name = "opentelemetry-api" }, + { name = "opentelemetry-exporter-otlp-proto-common" }, + { name = "opentelemetry-proto" }, + { name = "opentelemetry-sdk" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/8f/7f/b9e60435cfcc7590fa87436edad6822240dddbc184643a2a005301cc31f4/opentelemetry_exporter_otlp_proto_grpc-1.40.0.tar.gz", hash = "sha256:bd4015183e40b635b3dab8da528b27161ba83bf4ef545776b196f0fb4ec47740", size = 25759, upload-time = "2026-03-04T14:17:24.4Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/96/6f/7ee0980afcbdcd2d40362da16f7f9796bd083bf7f0b8e038abfbc0300f5d/opentelemetry_exporter_otlp_proto_grpc-1.40.0-py3-none-any.whl", hash = "sha256:2aa0ca53483fe0cf6405087a7491472b70335bc5c7944378a0a8e72e86995c52", size = 20304, upload-time = "2026-03-04T14:17:05.942Z" }, +] + [[package]] name = "opentelemetry-exporter-otlp-proto-http" version = "1.40.0" @@ -3308,6 +3381,36 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a0/3a/8865d6754e61c9fb170cdd530a124a53769ee5f740236064816eb0ca7301/opentelemetry_exporter_otlp_proto_http-1.40.0-py3-none-any.whl", hash = "sha256:a8d1dab28f504c5d96577d6509f80a8150e44e8f45f82cdbe0e34c99ab040069", size = 19960, upload-time = "2026-03-04T14:17:07.153Z" }, ] +[[package]] +name = "opentelemetry-instrumentation" +version = "0.61b0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "opentelemetry-api" }, + { name = "opentelemetry-semantic-conventions" }, + { name = "packaging" }, + { name = "wrapt" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/da/37/6bf8e66bfcee5d3c6515b79cb2ee9ad05fe573c20f7ceb288d0e7eeec28c/opentelemetry_instrumentation-0.61b0.tar.gz", hash = "sha256:cb21b48db738c9de196eba6b805b4ff9de3b7f187e4bbf9a466fa170514f1fc7", size = 32606, upload-time = "2026-03-04T14:20:16.825Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d8/3e/f6f10f178b6316de67f0dfdbbb699a24fbe8917cf1743c1595fb9dcdd461/opentelemetry_instrumentation-0.61b0-py3-none-any.whl", hash = "sha256:92a93a280e69788e8f88391247cc530fd81f16f2b011979d4d6398f805cfbc63", size = 33448, upload-time = "2026-03-04T14:19:02.447Z" }, +] + +[[package]] +name = "opentelemetry-instrumentation-asyncio" +version = "0.61b0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "opentelemetry-api" }, + { name = "opentelemetry-instrumentation" }, + { name = "opentelemetry-semantic-conventions" }, + { name = "wrapt" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/48/06/f14eacf4fde6892402a4fe1023cbca4a5d4f08f37d930ea3e414a98c85d0/opentelemetry_instrumentation_asyncio-0.61b0.tar.gz", hash = "sha256:3b173b009f108fcbc6ee4f7482e7ae8b76518a87a620ad5e7dd24e4c26066c3c", size = 14115, upload-time = "2026-03-04T14:20:22.227Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/58/8f/79913d7ebc2bd2be9a81f8ecbe0f7413c3bec55c83c89337b93c8de5417a/opentelemetry_instrumentation_asyncio-0.61b0-py3-none-any.whl", hash = "sha256:43273d5b74880b06c5a766f779fa480a50fc5a09a7c81468a60457b794e3f3cd", size = 14770, upload-time = "2026-03-04T14:19:13.057Z" }, +] + [[package]] name = "opentelemetry-proto" version = "1.40.0" @@ -3365,6 +3468,10 @@ dependencies = [ { name = "olefile" }, { name = "openai" }, { name = "openpyxl" }, + { name = "opentelemetry-api" }, + { name = "opentelemetry-exporter-otlp-proto-grpc" }, + { name = "opentelemetry-instrumentation-asyncio" }, + { name = "opentelemetry-sdk" }, { name = "pdfminer-six" }, { name = "pdfplumber" }, { name = "protobuf" }, @@ -3397,6 +3504,15 @@ dependencies = [ ] [package.optional-dependencies] +benchmark = [ + { name = "datasets" }, + { name = "langchain" }, + { name = "langchain-core" }, + { name = "langchain-openai" }, + { name = "pandas", version = "2.3.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "pandas", version = "3.0.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "tiktoken" }, +] bot = [ { name = "beautifulsoup4" }, { name = "croniter" }, @@ -3544,6 +3660,7 @@ requires-dist = [ { name = "cmake", marker = "extra == 'build'", specifier = ">=3.15" }, { name = "croniter", marker = "extra == 'bot'", specifier = ">=2.0.0" }, { name = "cryptography", specifier = ">=42.0.0" }, + { name = "datasets", marker = "extra == 'benchmark'", specifier = ">=2.0.0" }, { name = "datasets", marker = "extra == 'eval'", specifier = ">=2.0.0" }, { name = "datasets", marker = "extra == 'test'", specifier = ">=2.0.0" }, { name = "ddgs", marker = "extra == 'bot'", specifier = ">=9.0.0" }, @@ -3561,6 +3678,9 @@ requires-dist = [ { name = "hvac", marker = "extra == 'test'", specifier = ">=2.0.0" }, { name = "jinja2", specifier = ">=3.1.6" }, { name = "json-repair", specifier = ">=0.25.0" }, + { name = "langchain", marker = "extra == 'benchmark'", specifier = ">=1.0.0" }, + { name = "langchain-core", marker = "extra == 'benchmark'", specifier = ">=1.0.0" }, + { name = "langchain-openai", marker = "extra == 'benchmark'", specifier = ">=1.0.0" }, { name = "langfuse", marker = "extra == 'bot-langfuse'", specifier = ">=3.0.0" }, { name = "lark-oapi", marker = "extra == 'bot-feishu'", specifier = ">=1.0.0" }, { name = "litellm", specifier = ">=1.0.0,<1.82.6" }, @@ -3575,7 +3695,12 @@ requires-dist = [ { name = "openpyxl", specifier = ">=3.0.0" }, { name = "opensandbox", marker = "extra == 'bot-sandbox'", specifier = ">=0.1.0" }, { name = "opensandbox-server", marker = "extra == 'bot-sandbox'", specifier = ">=0.1.0" }, + { name = "opentelemetry-api", specifier = ">=1.14" }, + { name = "opentelemetry-exporter-otlp-proto-grpc", specifier = ">=1.14" }, + { name = "opentelemetry-instrumentation-asyncio", specifier = ">=0.61b0" }, + { name = "opentelemetry-sdk", specifier = ">=1.14" }, { name = "openviking", extras = ["bot", "bot-dingtalk", "bot-feishu", "bot-fuse", "bot-langfuse", "bot-opencode", "bot-qq", "bot-sandbox", "bot-slack", "bot-telegram"], marker = "extra == 'bot-full'" }, + { name = "pandas", marker = "extra == 'benchmark'", specifier = ">=2.0.0" }, { name = "pandas", marker = "extra == 'eval'", specifier = ">=2.0.0" }, { name = "pandas", marker = "extra == 'test'", specifier = ">=2.0.0" }, { name = "pdfminer-six", specifier = ">=20251230" }, @@ -3613,6 +3738,7 @@ requires-dist = [ { name = "sphinx-rtd-theme", marker = "extra == 'doc'", specifier = ">=1.3.0" }, { name = "tabulate", specifier = ">=0.9.0" }, { name = "tavily-python", marker = "extra == 'bot'", specifier = ">=0.5.0" }, + { name = "tiktoken", marker = "extra == 'benchmark'", specifier = ">=0.5.0" }, { name = "tree-sitter", specifier = ">=0.23.0" }, { name = "tree-sitter-c-sharp", specifier = ">=0.23.0" }, { name = "tree-sitter-cpp", specifier = ">=0.23.0" }, @@ -3635,7 +3761,7 @@ requires-dist = [ { name = "xlrd", specifier = ">=2.0.1" }, { name = "xxhash", specifier = ">=3.0.0" }, ] -provides-extras = ["test", "dev", "doc", "eval", "gemini", "gemini-async", "ocr", "build", "bot", "bot-langfuse", "bot-telegram", "bot-feishu", "bot-dingtalk", "bot-slack", "bot-qq", "bot-sandbox", "bot-fuse", "bot-opencode", "bot-full"] +provides-extras = ["test", "dev", "doc", "eval", "gemini", "gemini-async", "ocr", "build", "bot", "bot-langfuse", "bot-telegram", "bot-feishu", "bot-dingtalk", "bot-slack", "bot-qq", "bot-sandbox", "bot-fuse", "bot-opencode", "bot-full", "benchmark"] [package.metadata.requires-dev] dev = [{ name = "pytest", specifier = ">=9.0.2" }] From 79eabe33857c9f31fda73cd2db05435509e2eb3c Mon Sep 17 00:00:00 2001 From: chenjunwen Date: Thu, 2 Apr 2026 16:57:48 +0800 Subject: [PATCH 14/20] Revert "update" This reverts commit ede619264038f5e481fe265c4662ec26c9c04aff. --- benchmark/RAG/run.py | 44 ++- benchmark/RAG/scripts/download_dataset.py | 84 ++--- benchmark/RAG/scripts/prepare_dataset.py | 51 ++- benchmark/RAG/scripts/run_sampling.py | 16 +- benchmark/RAG/scripts/sample_dataset.py | 329 +++++++++--------- benchmark/RAG/src/adapters/base.py | 10 +- .../RAG/src/adapters/financebench_adapter.py | 16 +- benchmark/RAG/src/adapters/locomo_adapter.py | 23 +- benchmark/RAG/src/adapters/qasper_adapter.py | 64 ++-- .../RAG/src/adapters/syllabusqa_adapter.py | 104 +++--- benchmark/RAG/src/core/judge_util.py | 2 +- benchmark/RAG/src/core/llm_client.py | 3 +- benchmark/RAG/src/core/metrics.py | 28 +- benchmark/RAG/src/core/monitor.py | 2 +- benchmark/RAG/src/core/vector_store.py | 17 +- benchmark/RAG/src/pipeline.py | 79 ++--- benchmark/locomo/openclaw/eval.py | 10 +- benchmark/locomo/vikingbot/import_to_ov.py | 12 +- benchmark/locomo/vikingbot/judge.py | 5 +- benchmark/locomo/vikingbot/run_eval.py | 9 +- .../locomo/vikingbot/stat_judge_result.py | 2 +- benchmark/skillsbench/skill_bench_eval.py | 30 +- bot/vikingbot/__main__.py | 1 + bot/vikingbot/agent/__init__.py | 2 +- bot/vikingbot/agent/memory.py | 4 +- bot/vikingbot/agent/skills.py | 3 +- bot/vikingbot/agent/subagent.py | 3 +- bot/vikingbot/agent/tools/__init__.py | 2 +- bot/vikingbot/agent/tools/filesystem.py | 8 +- bot/vikingbot/agent/tools/message.py | 4 +- bot/vikingbot/agent/tools/ov_file.py | 4 +- bot/vikingbot/agent/tools/registry.py | 3 +- bot/vikingbot/agent/tools/shell.py | 12 +- bot/vikingbot/agent/tools/spawn.py | 6 +- .../agent/tools/websearch/__init__.py | 5 +- .../agent/tools/websearch/registry.py | 2 +- bot/vikingbot/bus/queue.py | 2 +- bot/vikingbot/channels/base.py | 2 +- bot/vikingbot/channels/discord.py | 3 + bot/vikingbot/channels/feishu.py | 8 +- bot/vikingbot/channels/openapi.py | 2 +- bot/vikingbot/channels/slack.py | 4 +- bot/vikingbot/channels/telegram.py | 6 +- bot/vikingbot/channels/utils.py | 4 +- bot/vikingbot/channels/whatsapp.py | 1 + bot/vikingbot/cli/werewolf_game.py | 6 +- bot/vikingbot/config/__init__.py | 2 +- bot/vikingbot/config/loader.py | 4 +- bot/vikingbot/config/schema.py | 2 +- bot/vikingbot/console/web_console.py | 5 +- bot/vikingbot/cron/types.py | 2 + bot/vikingbot/heartbeat/service.py | 4 +- bot/vikingbot/hooks/base.py | 4 +- .../hooks/builtins/openviking_hooks.py | 1 + bot/vikingbot/hooks/manager.py | 2 +- bot/vikingbot/openviking_mount/__init__.py | 8 +- bot/vikingbot/openviking_mount/fuse_finder.py | 12 +- bot/vikingbot/openviking_mount/fuse_proxy.py | 15 +- bot/vikingbot/openviking_mount/fuse_simple.py | 12 +- .../openviking_mount/fuse_simple_debug.py | 14 +- bot/vikingbot/openviking_mount/manager.py | 8 +- bot/vikingbot/openviking_mount/mount.py | 12 +- .../openviking_mount/session_integration.py | 10 +- .../openviking_mount/user_apikey_manager.py | 2 +- .../providers/openai_compatible_provider.py | 3 +- bot/vikingbot/providers/transcription.py | 1 + bot/vikingbot/sandbox/__init__.py | 4 +- bot/vikingbot/sandbox/backends/__init__.py | 8 +- bot/vikingbot/sandbox/backends/direct.py | 9 +- bot/vikingbot/sandbox/backends/srt.py | 9 +- bot/vikingbot/sandbox/manager.py | 15 +- bot/vikingbot/session/__init__.py | 2 +- bot/vikingbot/utils/__init__.py | 12 +- bot/vikingbot/utils/helpers.py | 4 +- bot/vikingbot/utils/tracing.py | 2 +- .../github-proxy/scripts/convert_url.py | 2 +- .../skills/opencode/list_sessions.py | 3 +- .../skills/opencode/opencode_utils.py | 2 +- .../tests/e2e/test-archive-expand.py | 2 +- openviking/models/vlm/backends/litellm_vlm.py | 3 + openviking/models/vlm/backends/openai_vlm.py | 1 + .../models/vlm/backends/volcengine_vlm.py | 1 - .../parse/parsers/code/ast/languages/php.py | 2 +- openviking/session/compressor_v2.py | 3 +- openviking/telemetry/__init__.py | 2 +- .../utils/config/open_viking_config.py | 2 +- tests/agfs/conftest.py | 1 - ...compressor_v2_event_span_multiple_turns.py | 2 +- .../test_compressor_v2_tool_skill_memory.py | 6 +- .../integration/test_compressor_v2_xiaomei.py | 2 +- tests/models/vlm/test_volcengine_cache.py | 6 +- tests/parse/test_html_parser_utils.py | 1 + tests/server/test_bot_proxy_auth.py | 4 +- .../test_vectordb_collection_loading.py | 6 +- tests/unit/test_cohere_rerank.py | 2 + uv.lock | 140 +------- 96 files changed, 675 insertions(+), 743 deletions(-) diff --git a/benchmark/RAG/run.py b/benchmark/RAG/run.py index 1f76d1148..0d2d0a57b 100644 --- a/benchmark/RAG/run.py +++ b/benchmark/RAG/run.py @@ -1,15 +1,13 @@ -import importlib import os import sys +import yaml +import importlib from argparse import ArgumentParser from pathlib import Path -import yaml - sys.path.append(str(Path(__file__).parent)) from src.core.logger import setup_logging - # ========================================== # 1. Environment Initialization # ========================================== @@ -22,9 +20,9 @@ print(f"[Init] Auto-detected OpenViking config: {ov_config_path}") try: - from src.core.llm_client import LLMClientWrapper + from src.pipeline import BenchmarkPipeline from src.core.vector_store import VikingStoreWrapper - from src.pipeline import BenchmarkPipeline + from src.core.llm_client import LLMClientWrapper except SyntaxError as e: print(f"\n[Fatal Error] Syntax error while importing modules: {e}") sys.exit(1) @@ -61,19 +59,19 @@ def resolve_path(path_str, base_path): def main(): parser = ArgumentParser(description="Run RAG Benchmark (Smart Path Handling)") default_config_path = os.path.join(SCRIPT_DIR, "config/config.yaml") - - parser.add_argument("--config", default=default_config_path, + + parser.add_argument("--config", default=default_config_path, help=f"Path to config file. Default: {default_config_path}") - - parser.add_argument("--step", choices=["all", "gen", "eval", "del"], default="all", + + parser.add_argument("--step", choices=["all", "gen", "eval", "del"], default="all", help="Execution step: 'gen' (Retrieval+LLM), 'eval' (Judge), or 'all'") - + args = parser.parse_args() # --- B. Load and Parse Config --- config_path = os.path.abspath(args.config) print(f"[Init] Loading configuration from: {config_path}") - + try: config = load_config(config_path) except FileNotFoundError as e: @@ -84,12 +82,12 @@ def main(): print(f"[Init] Resolving paths relative to Project Root: {PROJECT_ROOT}") dataset_name = config.get('dataset_name', 'UnknownDataset') retrieval_topk = config.get('execution', {}).get('retrieval_topk', 5) - + format_vars = { 'dataset_name': dataset_name, 'retrieval_topk': retrieval_topk } - + path_keys = ['dataset_path', 'output_dir', 'vector_store', 'log_file', 'doc_output_dir'] for key in path_keys: if key in config.get('paths', {}): @@ -103,15 +101,15 @@ def main(): try: logger = setup_logging(config['paths']['log_file']) logger.info(">>> Benchmark Session Started") - + # 1. Adapter (Dynamic Loading) adapter_cfg = config.get('adapter', {}) module_path = adapter_cfg.get('module', 'src.adapters.locomo_adapter') class_name = adapter_cfg.get('class_name', 'LocomoAdapter') - + logger.info(f"Dynamically loading Adapter: {class_name} from {module_path}") logger.info(f"Loading dataset from: {config['paths']['dataset_path']}") - + try: mod = importlib.import_module(module_path) AdapterClass = getattr(mod, class_name) @@ -122,18 +120,18 @@ def main(): except AttributeError as e: logger.error(f"Class '{class_name}' not found in module '{module_path}'. Please check your config 'adapter.class_name'. Error: {e}") raise e - + # 2. Vector Store vector_store = VikingStoreWrapper(store_path=config['paths']['vector_store']) - + # 3. LLM Client api_key = os.environ.get( - config['llm'].get('api_key_env_var', ''), + config['llm'].get('api_key_env_var', ''), config['llm'].get('api_key') ) if not api_key: logger.warning("No API Key found in config or environment variables!") - + llm_client = LLMClientWrapper(config=config['llm'], api_key=api_key) # 4. Pipeline @@ -148,7 +146,7 @@ def main(): if args.step in ["all", "gen"]: logger.info("Stage: Generation (Ingest -> Retrieve -> Generate)") pipeline.run_generation() - + if args.step in ["all", "eval"]: logger.info("Stage: Evaluation (Judge -> Metrics)") pipeline.run_evaluation() @@ -156,7 +154,7 @@ def main(): if args.step in ["all", "del"]: logger.info("Stage: Delete Vector Store") pipeline.run_deletion() - + logger.info("Benchmark finished successfully.") except KeyboardInterrupt: diff --git a/benchmark/RAG/scripts/download_dataset.py b/benchmark/RAG/scripts/download_dataset.py index fe256b51e..3a32ff639 100644 --- a/benchmark/RAG/scripts/download_dataset.py +++ b/benchmark/RAG/scripts/download_dataset.py @@ -6,10 +6,12 @@ import argparse import hashlib +import json +import os import shutil import sys from pathlib import Path -from typing import Dict, Optional +from typing import Dict, List, Optional from urllib.parse import urlparse import requests @@ -81,13 +83,13 @@ def calculate_checksum(file_path: Path, algorithm: str = "sha256") -> str: def download_file(url: str, dest_path: Path, chunk_size: int = 8192) -> bool: """Download a file with progress bar.""" dest_path.parent.mkdir(parents=True, exist_ok=True) - + try: response = requests.get(url, stream=True, timeout=30) response.raise_for_status() - + total_size = int(response.headers.get("content-length", 0)) - + with open(dest_path, "wb") as f, tqdm( desc=f"Downloading {dest_path.name}", total=total_size, @@ -109,13 +111,13 @@ def download_file(url: str, dest_path: Path, chunk_size: int = 8192) -> bool: def extract_archive(archive_path: Path, extract_to: Path, extract_subdir: Optional[str] = None) -> bool: """Extract archive file (zip, tar.gz, etc.).""" - import tarfile import zipfile - + import tarfile + try: temp_extract_dir = extract_to / ".temp_extract" temp_extract_dir.mkdir(parents=True, exist_ok=True) - + if archive_path.suffix == ".zip": with zipfile.ZipFile(archive_path, "r") as zip_ref: zip_ref.extractall(temp_extract_dir) @@ -126,7 +128,7 @@ def extract_archive(archive_path: Path, extract_to: Path, extract_subdir: Option print(f"Unsupported archive format: {archive_path.suffix}") shutil.rmtree(temp_extract_dir) return False - + if extract_subdir: source_dir = temp_extract_dir / extract_subdir if source_dir.exists() and source_dir.is_dir(): @@ -149,7 +151,7 @@ def extract_archive(archive_path: Path, extract_to: Path, extract_subdir: Option else: dest_item.unlink() shutil.move(str(item), str(dest_item)) - + shutil.rmtree(temp_extract_dir) return True except Exception as e: @@ -164,20 +166,20 @@ def verify_dataset(dataset_name: str, dataset_dir: Path) -> bool: if dataset_name not in DATASET_SOURCES: print(f"Unknown dataset: {dataset_name}") return False - + source = DATASET_SOURCES[dataset_name] missing_files = [] - + for file_path in source["files"]: full_path = dataset_dir / file_path # Check if path exists (either file or directory) if not full_path.exists(): missing_files.append(file_path) - + if missing_files: print(f"Missing files for {dataset_name}: {missing_files}") return False - + print(f"✓ {dataset_name} verified successfully") return True @@ -199,31 +201,31 @@ def download_from_url( Supports single url or multiple urls via urls field. """ dataset_dir = output_dir / dataset_name - + if dataset_dir.exists() and not force: print(f"{dataset_name} already exists at {dataset_dir}, skipping download") if verify: return verify_dataset(dataset_name, dataset_dir) return True - + print(f"Downloading {dataset_name}...") - + # Support single url or multiple urls urls = source.get("urls", [source.get("url")]) if source.get("urls") else [source.get("url")] - + success = True for url in urls: if not url: continue - + parsed_url = urlparse(url) file_name = Path(parsed_url.path).name downloaded_path = output_dir / file_name - + if not download_file(url, downloaded_path): success = False continue - + if "checksum" in source and source["checksum"]: algo, expected_checksum = source["checksum"].split(":", 1) actual_checksum = calculate_checksum(downloaded_path, algo) @@ -235,7 +237,7 @@ def download_from_url( success = False continue print(f"✓ Checksum verified for {dataset_name}") - + if is_archive_file(downloaded_path): extract_subdir = source.get("extract_subdir") if not extract_archive(downloaded_path, dataset_dir, extract_subdir): @@ -248,10 +250,10 @@ def download_from_url( dest_path = dataset_dir / file_name shutil.move(str(downloaded_path), str(dest_path)) print(f"✓ Saved single file to {dest_path}") - + if verify and not verify_dataset(dataset_name, dataset_dir): return False - + if success: print(f"✓ {dataset_name} downloaded successfully to {dataset_dir}") return success @@ -267,28 +269,28 @@ def download_dataset( if dataset_name not in DATASET_SOURCES: print(f"Unknown dataset: {dataset_name}") return False - + source = DATASET_SOURCES[dataset_name] dataset_dir = output_dir / dataset_name - + if dataset_dir.exists() and not force: print(f"{dataset_name} already exists at {dataset_dir}, skipping download") if verify: return verify_dataset(dataset_name, dataset_dir) return True - + success = download_from_url(source, output_dir, dataset_name, force, verify) - + if success and verify: return verify_dataset(dataset_name, dataset_dir) - + return success def main(): # Check if any datasets are configured configured_datasets = [k for k in DATASET_SOURCES.keys() if not k.startswith('#')] - + if not configured_datasets: print("=" * 80) print("No datasets configured!") @@ -300,7 +302,7 @@ def main(): print("See README_DATASET_CONFIG.md for detailed instructions.") print("=" * 80) return 1 - + parser = argparse.ArgumentParser( description="Download datasets for RAG benchmark" ) @@ -327,33 +329,33 @@ def main(): action="store_true", help="Skip dataset verification" ) - + args = parser.parse_args() - + output_dir = args.output_dir.resolve() output_dir.mkdir(parents=True, exist_ok=True) - + datasets = ( - configured_datasets - if args.dataset == "all" + configured_datasets + if args.dataset == "all" else [args.dataset] ) - + print(f"Downloading datasets to: {output_dir}") print(f"Datasets: {', '.join(datasets)}") print() - + success_count = 0 for dataset in datasets: if download_dataset( - dataset, - output_dir, - args.force, + dataset, + output_dir, + args.force, not args.no_verify ): success_count += 1 print() - + print(f"Download complete: {success_count}/{len(datasets)} successful") return 0 if success_count == len(datasets) else 1 diff --git a/benchmark/RAG/scripts/prepare_dataset.py b/benchmark/RAG/scripts/prepare_dataset.py index 56e132ef1..a734cee8d 100644 --- a/benchmark/RAG/scripts/prepare_dataset.py +++ b/benchmark/RAG/scripts/prepare_dataset.py @@ -7,13 +7,12 @@ import argparse import sys from pathlib import Path -from typing import Optional +from typing import List, Optional sys.path.append(str(Path(__file__).parent)) -from download_dataset import DATASET_SOURCES as DOWNLOAD_SOURCES -from download_dataset import download_dataset -from sample_dataset import DATASET_SAMPLERS, sample_dataset +from download_dataset import download_dataset, DATASET_SOURCES as DOWNLOAD_SOURCES +from sample_dataset import sample_dataset, DATASET_SAMPLERS def prepare_dataset( @@ -33,9 +32,9 @@ def prepare_dataset( print("\n" + "=" * 80) print(f"Preparing dataset: {dataset_name}") print("=" * 80) - + success = True - + # Step 1: Download if not skip_download: print("\n[Step 1/2] Downloading dataset...") @@ -50,16 +49,16 @@ def prepare_dataset( success = False else: print("\n[Step 1/2] Skipping download (--skip-download)") - + # Step 2: Sample if not skip_sampling and success: print("\n[Step 2/2] Sampling dataset...") input_dir = download_dir / dataset_name dataset_output_dir = output_dir / dataset_name - + actual_sample_size = None if use_full else sample_size actual_num_docs = None if use_full else num_docs - + sample_success = sample_dataset( dataset_name, input_dir, @@ -74,7 +73,7 @@ def prepare_dataset( success = False elif skip_sampling: print("\n[Step 2/2] Skipping sampling (--skip-sampling)") - + return success @@ -100,7 +99,7 @@ def main(): python prepare_dataset.py --skip-sampling """ ) - + # Dataset selection parser.add_argument( "--dataset", "-d", @@ -109,7 +108,7 @@ def main(): default="all", help="Dataset to prepare (default: all)" ) - + # Directories parser.add_argument( "--download-dir", @@ -123,7 +122,7 @@ def main(): default=Path(__file__).parent.parent / "datasets", help="Directory for final prepared datasets (default: datasets/)" ) - + # Sampling options parser.add_argument( "--sample-size", "-n", @@ -155,7 +154,7 @@ def main(): default="random", help="Sampling mode: 'random' (default) for random sampling, 'stratified' for stratified sampling by category" ) - + # Skip options parser.add_argument( "--skip-download", @@ -167,39 +166,39 @@ def main(): action="store_true", help="Skip sampling step" ) - + # Force options parser.add_argument( "--force-download", "-f", action="store_true", help="Force re-download even if dataset exists" ) - + args = parser.parse_args() - + # Validate dataset choices available_datasets = set(DOWNLOAD_SOURCES.keys()) & set(DATASET_SAMPLERS.keys()) if args.dataset != "all" and args.dataset not in available_datasets: print(f"Error: Dataset '{args.dataset}' not available") print(f"Available datasets: {', '.join(sorted(available_datasets))}") return 1 - + # Handle --full flag - use full dataset, no sampling if args.full: args.sample_size = None args.num_docs = None - + # Resolve paths download_dir = args.download_dir.resolve() output_dir = args.output_dir.resolve() - + # Determine datasets to process datasets = ( - sorted(available_datasets) - if args.dataset == "all" + sorted(available_datasets) + if args.dataset == "all" else [args.dataset] ) - + # Print configuration print("=" * 80) print("RAG Benchmark - Unified Dataset Preparation") @@ -215,7 +214,7 @@ def main(): print(f"Skip sampling: {args.skip_sampling}") print(f"Force download: {args.force_download}") print("=" * 80) - + # Prepare datasets success_count = 0 for dataset in datasets: @@ -233,13 +232,13 @@ def main(): args.sample_mode ): success_count += 1 - + # Final summary print("\n" + "=" * 80) print("Preparation Complete") print("=" * 80) print(f"Success: {success_count}/{len(datasets)} datasets") - + if success_count == len(datasets): print("\n✅ All datasets prepared successfully!") print(f"\nPrepared datasets are in: {output_dir}") diff --git a/benchmark/RAG/scripts/run_sampling.py b/benchmark/RAG/scripts/run_sampling.py index 8ca22dbc3..b615a5cb3 100644 --- a/benchmark/RAG/scripts/run_sampling.py +++ b/benchmark/RAG/scripts/run_sampling.py @@ -12,14 +12,14 @@ def main(): input_dir = Path(__file__).parent.parent / "raw_data" output_dir = Path(__file__).parent.parent / "datasets" - + print("=" * 60) print("Running sampling for all datasets with custom parameters") print("=" * 60) - + success_count = 0 total_count = 0 - + # Locomo: 3 documents, 80 QAs, stratified total_count += 1 print("\n" + "=" * 60) @@ -35,7 +35,7 @@ def main(): sample_mode="stratified" ): success_count += 1 - + # SyllabusQA: 7 documents, 90 QAs, stratified total_count += 1 print("\n" + "=" * 60) @@ -51,7 +51,7 @@ def main(): sample_mode="stratified" ): success_count += 1 - + # Qasper: 8 documents, 60 QAs, stratified total_count += 1 print("\n" + "=" * 60) @@ -67,7 +67,7 @@ def main(): sample_mode="stratified" ): success_count += 1 - + # FinanceBench: 3 documents, 12 QAs, stratified total_count += 1 print("\n" + "=" * 60) @@ -83,11 +83,11 @@ def main(): sample_mode="stratified" ): success_count += 1 - + print("\n" + "=" * 60) print(f"Sampling complete: {success_count}/{total_count} successful") print("=" * 60) - + return 0 if success_count == total_count else 1 diff --git a/benchmark/RAG/scripts/sample_dataset.py b/benchmark/RAG/scripts/sample_dataset.py index f73d85462..d7b4b7f7a 100644 --- a/benchmark/RAG/scripts/sample_dataset.py +++ b/benchmark/RAG/scripts/sample_dataset.py @@ -6,11 +6,12 @@ import argparse import json +import os import random import shutil import sys from pathlib import Path -from typing import Any, Dict, List, Optional, Tuple +from typing import Any, Dict, List, Optional, Tuple, Callable sys.path.append(str(Path(__file__).parent.parent)) @@ -66,29 +67,29 @@ def calculate_category_targets( num_categories = len(categories) if num_categories == 0: return {}, True - + base_per_category = sample_size // num_categories remainder = sample_size % num_categories - + if base_per_category == 0: if print_info: print(f"Warning: Sample size {sample_size} is too small for {num_categories} categories") print("Falling back to random sampling") return {}, True - + category_targets = {} for i, cat in enumerate(categories): category_targets[cat] = base_per_category + (1 if i < remainder else 0) - + if remainder > 0 and print_info: print(f"Cannot split {sample_size} QAs evenly into {num_categories} categories") print(f"Distributing {remainder} extra QA(s) to first {remainder} category(ies)") - + if print_info: print("Category targets:") for cat in categories: print(f" {cat}: {category_targets[cat]} QAs") - + return category_targets, False @@ -112,9 +113,9 @@ def stratified_sample_with_reallocation( """ random.seed(seed) categories = sorted(category_qas.keys()) - + category_targets, should_fallback = calculate_category_targets(sample_size, categories, print_info) - + if should_fallback: all_qas = [] for qas in category_qas.values(): @@ -123,33 +124,33 @@ def stratified_sample_with_reallocation( if len(all_qas) > sample_size: return random.sample(all_qas, sample_size) return all_qas - + sampled_items = [] remaining_quota = sample_size - + category_actual = {} for cat in categories: if cat not in category_targets or category_targets[cat] <= 0: category_actual[cat] = 0 continue - + cat_qas = category_qas[cat].copy() random.shuffle(cat_qas) sample_count = min(len(cat_qas), category_targets[cat]) category_actual[cat] = sample_count remaining_quota -= sample_count sampled_items.extend(cat_qas[:sample_count]) - + if remaining_quota > 0 and print_info: print(f"Reallocating remaining {remaining_quota} QA(s) to categories with available QAs") - + category_available = {} for cat in categories: if cat in category_qas: total_available = len(category_qas[cat]) used = category_actual.get(cat, 0) category_available[cat] = total_available - used - + while remaining_quota > 0: allocated_this_round = 0 for cat in categories: @@ -166,17 +167,17 @@ def stratified_sample_with_reallocation( remaining_quota -= 1 allocated_this_round += 1 break - + if allocated_this_round == 0: if print_info: print(f"Warning: No more QAs available to sample. Stopping with {remaining_quota} unallocated.") break - + if print_info: print("Actual category counts after reallocation:") for cat in categories: print(f" {cat}: {category_actual.get(cat, 0)} QAs") - + return sampled_items @@ -226,48 +227,48 @@ def sample_docs_stratified( """ random.seed(seed) categories = sorted(category_qas.keys()) - + category_targets, should_fallback = calculate_category_targets(sample_size, categories, print_info) - + if should_fallback: return sample_docs_random(sample_size, doc_category_qas, all_doc_ids, seed, print_info) - + selected_docs = [] - selected_qas_by_cat = dict.fromkeys(categories, 0) - doc_used = dict.fromkeys(all_doc_ids, False) - + selected_qas_by_cat = {cat: 0 for cat in categories} + doc_used = {doc_id: False for doc_id in all_doc_ids} + for cat in categories: target = category_targets[cat] if target == 0: continue - + cat_qas = category_qas[cat].copy() random.shuffle(cat_qas) - + for doc_id, qa in cat_qas: if doc_used[doc_id]: continue - + doc_cat_qas = doc_category_qas[doc_id] new_count = selected_qas_by_cat[cat] + len(doc_cat_qas.get(cat, [])) if new_count > target: continue - + selected_docs.append(doc_id) doc_used[doc_id] = True - + for c, qs in doc_cat_qas.items(): selected_qas_by_cat[c] += len(qs) - + if selected_qas_by_cat[cat] >= target: break - + total_selected = sum(selected_qas_by_cat.values()) if print_info: print(f"Sampled {len(selected_docs)} documents with {total_selected} QAs") for cat in categories: print(f" {cat}: {selected_qas_by_cat[cat]} QAs (target: {category_targets[cat]})") - + return selected_docs @@ -294,26 +295,26 @@ def sample_docs_random( random.seed(seed) shuffled_docs = all_doc_ids.copy() random.shuffle(shuffled_docs) - + selected_docs = [] selected_qas_count = 0 - + for doc_id in shuffled_docs: doc_qas = doc_qas_count.get(doc_id, 0) - + if doc_qas == 0: continue - + if selected_qas_count + doc_qas <= sample_size or not selected_docs: selected_docs.append(doc_id) selected_qas_count += doc_qas else: if selected_qas_count >= sample_size: break - + if print_info: print(f"Sampled {len(selected_docs)} documents with {selected_qas_count} QAs (seed={seed})") - + return selected_docs @@ -329,14 +330,14 @@ def sample_locomo( input_file = input_dir / "locomo10.json" if not input_file.exists(): raise FileNotFoundError(f"locomo10.json not found at {input_file}") - + data = load_json_data(input_file) if not isinstance(data, list): data = [data] - + original_num_docs = len(data) print(f"Locomo original size: {original_num_docs} documents") - + category_qas = {} doc_category_qas = [] for doc in data: @@ -352,14 +353,14 @@ def sample_locomo( doc_cat_qas[cat] = [] doc_cat_qas[cat].append(q) doc_category_qas.append(doc_cat_qas) - + total_qas = sum(len(qas) for qas in category_qas.values()) categories = sorted(category_qas.keys()) print(f"Total QAs (excluding category 5): {total_qas}") print(f"Categories: {categories}") for cat in categories: print(f" Category {cat}: {len(category_qas[cat])} QAs") - + is_full = (sample_size is None and num_docs is None) if is_full: selected_docs = data @@ -373,13 +374,13 @@ def sample_locomo( random.seed(seed) selected_docs = random.sample(data, num_docs) print(f"Sampled {len(selected_docs)} documents (seed={seed})") - + if sample_size is not None: print(f"Further sampling {sample_size} QAs from selected documents (mode: {sample_mode})") - + selected_doc_category_qas = {} selected_doc_indices = [data.index(doc) for doc in selected_docs] - + for doc_idx in selected_doc_indices: doc = data[doc_idx] doc_cat_qas = doc_category_qas[doc_idx] @@ -388,12 +389,12 @@ def sample_locomo( selected_doc_category_qas[cat] = [] for q in qs: selected_doc_category_qas[cat].append((doc_idx, q)) - + if sample_mode == "stratified": sampled_q_tuples = stratified_sample_with_reallocation( sample_size, selected_doc_category_qas, seed ) - + keep_q_indices = set() for doc_idx, q in sampled_q_tuples: doc = data[doc_idx] @@ -401,7 +402,7 @@ def sample_locomo( if qa_item == q: keep_q_indices.add((doc_idx, q_idx)) break - + for doc in selected_docs: doc_idx = data.index(doc) new_qas = [] @@ -409,7 +410,7 @@ def sample_locomo( if (doc_idx, q_idx) in keep_q_indices or str(q.get("category")) == "5": new_qas.append(q) doc["qa"] = new_qas - + if sample_mode == "random": all_valid_q_indices = [] for doc_idx_in_selected, doc in enumerate(selected_docs): @@ -421,10 +422,10 @@ def sample_locomo( if qa_item == q: all_valid_q_indices.append((doc_idx_in_selected, q_idx)) break - + sampled_q_indices = random_sample_qas(sample_size, all_valid_q_indices, seed) keep_q_indices = set(sampled_q_indices) - + for doc_idx_in_selected, doc in enumerate(selected_docs): new_qas = [] for q_idx, q in enumerate(doc.get("qa", [])): @@ -438,7 +439,7 @@ def sample_locomo( for qs in doc_cat_qas.values(): count += len(qs) doc_qas_count[doc_idx] = count - + if sample_mode == "stratified": print(f"Using stratified sampling (seed={seed})") category_qas_with_indices = {} @@ -447,15 +448,15 @@ def sample_locomo( for doc, q in qas: doc_idx = data.index(doc) category_qas_with_indices[cat].append((doc_idx, q)) - + doc_category_qas_dict = {i: d for i, d in enumerate(doc_category_qas)} all_doc_indices = list(range(len(data))) - + selected_doc_indices = sample_docs_stratified( sample_size, category_qas_with_indices, doc_category_qas_dict, all_doc_indices, seed ) selected_docs = [data[i] for i in selected_doc_indices] - + if sample_mode == "random": print(f"Using random sampling (seed={seed})") all_doc_indices = list(range(len(data))) @@ -463,18 +464,18 @@ def sample_locomo( sample_size, doc_qas_count, all_doc_indices, seed ) selected_docs = [data[i] for i in selected_doc_indices] - + output_data = selected_docs output_file = output_dir / "locomo10.json" save_json_data(output_data, output_file) - + sampled_qas = 0 for doc in selected_docs: if "qa" in doc: for q in doc["qa"]: if str(q.get("category")) != "5": sampled_qas += 1 - + metadata = { "dataset": "Locomo", "original_num_docs": original_num_docs, @@ -488,7 +489,7 @@ def sample_locomo( "is_full": is_full, "note": "Category 5 questions are excluded from QA count" } - + return metadata @@ -501,13 +502,13 @@ def sample_syllabusqa( sample_mode: str = "random" ) -> Dict[str, Any]: """Sample SyllabusQA dataset with stratified sampling support.""" - import csv from collections import defaultdict - + import csv + dataset_split_dir = input_dir / "data" / "dataset_split" if not dataset_split_dir.exists(): raise FileNotFoundError(f"data/dataset_split not found at {dataset_split_dir}") - + all_data = [] csv_files = ["train.csv", "val.csv", "test.csv"] for csv_file in csv_files: @@ -519,12 +520,12 @@ def sample_syllabusqa( for item in file_data: item["_source_file"] = csv_file all_data.extend(file_data) - + doc_groups = defaultdict(list) for item in all_data: syllabus_name = item.get("syllabus_name", "unknown") doc_groups[syllabus_name].append(item) - + category_qas = {} doc_category_qas = {} for doc_name, items in doc_groups.items(): @@ -539,10 +540,10 @@ def sample_syllabusqa( doc_cat_qas[q_type] = [] doc_cat_qas[q_type].append(item) doc_category_qas[doc_name] = doc_cat_qas - + total_valid_qas = sum(len(qas) for qas in category_qas.values()) categories = sorted(category_qas.keys()) - + doc_valid_qas = {} for doc_name, items in doc_groups.items(): valid_count = 0 @@ -550,7 +551,7 @@ def sample_syllabusqa( if item.get("question_type") != "no answer": valid_count += 1 doc_valid_qas[doc_name] = valid_count - + all_doc_names = list(doc_groups.keys()) original_num_docs = len(all_doc_names) original_total_qas = len(all_data) @@ -559,7 +560,7 @@ def sample_syllabusqa( print(f"Categories: {categories}") for cat in categories: print(f" {cat}: {len(category_qas[cat])} QAs") - + is_full = (sample_size is None and num_docs is None) if is_full: selected_docs = all_doc_names @@ -573,10 +574,10 @@ def sample_syllabusqa( random.seed(seed) selected_docs = random.sample(all_doc_names, num_docs) print(f"Sampled {len(selected_docs)} documents (seed={seed})") - + if sample_size is not None: print(f"Further sampling {sample_size} QAs from selected documents (mode: {sample_mode})") - + selected_doc_category_qas = {} for doc_name in selected_docs: doc_cat_qas = doc_category_qas[doc_name] @@ -585,24 +586,24 @@ def sample_syllabusqa( selected_doc_category_qas[cat] = [] for item in items: selected_doc_category_qas[cat].append(item) - + if sample_mode == "stratified": sampled_items = stratified_sample_with_reallocation( sample_size, selected_doc_category_qas, seed ) - + for doc_name in selected_docs: doc_items = doc_groups[doc_name] for item in doc_items: if item.get("question_type") == "no answer": sampled_items.append(item) - + new_doc_groups = defaultdict(list) for item in sampled_items: doc_name = item.get("syllabus_name", "unknown") new_doc_groups[doc_name].append(item) doc_groups = new_doc_groups - + if sample_mode == "random": all_valid_items = [] for doc_name in selected_docs: @@ -610,15 +611,15 @@ def sample_syllabusqa( for item in items: if item.get("question_type") != "no answer": all_valid_items.append(item) - + sampled_items = random_sample_qas(sample_size, all_valid_items, seed) - + for doc_name in selected_docs: items = doc_groups[doc_name] for item in items: if item.get("question_type") == "no answer": sampled_items.append(item) - + new_doc_groups = defaultdict(list) for item in sampled_items: doc_name = item.get("syllabus_name", "unknown") @@ -630,17 +631,17 @@ def sample_syllabusqa( selected_docs = sample_docs_stratified( sample_size, category_qas, doc_category_qas, all_doc_names, seed ) - + if sample_mode == "random": print(f"Using random sampling (seed={seed})") selected_docs = sample_docs_random( sample_size, doc_valid_qas, all_doc_names, seed ) - + selected_data = [] for doc_name in selected_docs: selected_data.extend(doc_groups[doc_name]) - + output_dir.mkdir(parents=True, exist_ok=True) for csv_file in csv_files: file_data = [item for item in selected_data if item.get("_source_file") == csv_file] @@ -654,12 +655,12 @@ def sample_syllabusqa( writer.writeheader() writer.writerows(file_data) print(f"Saved {len(file_data)} samples to {csv_file}") - + syllabi_src = input_dir / "syllabi" syllabi_dst = output_dir / "syllabi" if syllabi_src.exists(): syllabi_dst.mkdir(parents=True, exist_ok=True) - + syllabus_names = set() for doc_name in selected_docs: items = doc_groups[doc_name] @@ -667,15 +668,15 @@ def sample_syllabusqa( syllabus_name = item.get("syllabus_name") if syllabus_name: syllabus_names.add(syllabus_name) - + print(f"Copying syllabi for {len(syllabus_names)} unique syllabus files") - + for subdir in ["pdf", "text", "word"]: src_subdir = syllabi_src / "syllabi_redacted" / subdir dst_subdir = syllabi_dst / "syllabi_redacted" / subdir if src_subdir.exists(): dst_subdir.mkdir(parents=True, exist_ok=True) - + for syllabus_name in syllabus_names: for ext in [".pdf", ".txt", ".docx"]: src_file = src_subdir / f"{syllabus_name}{ext}" @@ -683,14 +684,14 @@ def sample_syllabusqa( shutil.copy2(src_file, dst_subdir / f"{syllabus_name}{ext}") print(f"Copied {subdir}/{syllabus_name}{ext}") break - + sampled_valid_qas = 0 for doc_name in selected_docs: items = doc_groups[doc_name] for item in items: if item.get("question_type") != "no answer": sampled_valid_qas += 1 - + metadata = { "dataset": "SyllabusQA", "original_num_docs": original_num_docs, @@ -706,7 +707,7 @@ def sample_syllabusqa( "is_full": is_full, "note": "'no answer' type questions are excluded from QA count" } - + return metadata @@ -722,10 +723,10 @@ def sample_qasper( json_files = ["qasper-train-v0.3.json", "qasper-dev-v0.3.json", "qasper-test-v0.3.json"] all_paper_ids = [] paper_data_map = {} - + category_qas = {} paper_category_qas = {} - + for json_file in json_files: file_path = input_dir / json_file if file_path.exists(): @@ -733,7 +734,7 @@ def sample_qasper( for paper_id, paper_data in data.items(): all_paper_ids.append(paper_id) paper_data_map[paper_id] = (paper_data, json_file) - + paper_cat_qas = {} if "qas" in paper_data: for qa_item in paper_data["qas"]: @@ -743,7 +744,7 @@ def sample_qasper( ) if is_unanswerable: continue - + answer_types = set() for ans in qa_item.get("answers", []): ans_obj = ans.get("answer", {}) @@ -755,28 +756,28 @@ def sample_qasper( answer_types.add("free_form") elif ans_obj.get("yes_no") is not None: answer_types.add("yes_no") - + primary_type = next(iter(answer_types), "extractive") if primary_type not in category_qas: category_qas[primary_type] = [] category_qas[primary_type].append((paper_id, qa_item)) - + if primary_type not in paper_cat_qas: paper_cat_qas[primary_type] = [] paper_cat_qas[primary_type].append(qa_item) - + paper_category_qas[paper_id] = paper_cat_qas - + original_num_docs = len(all_paper_ids) print(f"Qasper original size: {original_num_docs} documents (from {len(json_files)} files)") - + total_qas = sum(len(qas) for qas in category_qas.values()) categories = sorted(category_qas.keys()) print(f"Total QAs (excluding unanswerable): {total_qas}") print(f"Categories: {categories}") for cat in categories: print(f" {cat}: {len(category_qas[cat])} QAs") - + is_full = (sample_size is None and num_docs is None) if is_full: selected_ids = all_paper_ids @@ -794,22 +795,22 @@ def sample_qasper( for cat_qas in paper_category_qas[paper_id].values(): count += len(cat_qas) paper_qas_count[paper_id] = count - + random.seed(seed) shuffled_papers = all_paper_ids.copy() random.shuffle(shuffled_papers) shuffled_papers.sort(key=lambda pid: paper_qas_count[pid], reverse=True) - + selected_ids = shuffled_papers[:num_docs] print(f"Sampled {len(selected_ids)} documents with highest QA counts (seed={seed})") else: random.seed(seed) selected_ids = random.sample(all_paper_ids, num_docs) print(f"Sampled {len(selected_ids)} documents (seed={seed})") - + if sample_size is not None: print(f"Further sampling {sample_size} QAs from selected documents (mode: {sample_mode})") - + qa_with_indices = [] for paper_id in selected_ids: paper_data, source_file = paper_data_map[paper_id] @@ -832,49 +833,49 @@ def sample_qasper( answer_types.add("yes_no") primary_type = next(iter(answer_types), "extractive") qa_with_indices.append((paper_id, primary_type, i, qa_item)) - + if sample_mode == "stratified": selected_doc_category_qas = {} for paper_id, cat, i, qa_item in qa_with_indices: if cat not in selected_doc_category_qas: selected_doc_category_qas[cat] = [] selected_doc_category_qas[cat].append((paper_id, i, qa_item)) - + category_targets, should_fallback = calculate_category_targets( sample_size, sorted(selected_doc_category_qas.keys()) ) - + if not should_fallback: random.seed(seed) sampled_qas_indices = set() remaining_quota = sample_size - + category_actual = {} cats = sorted(selected_doc_category_qas.keys()) for cat in cats: if cat not in category_targets or category_targets[cat] <= 0: category_actual[cat] = 0 continue - + cat_qas = selected_doc_category_qas[cat].copy() random.shuffle(cat_qas) sample_count = min(len(cat_qas), category_targets[cat]) category_actual[cat] = sample_count remaining_quota -= sample_count - + for paper_id, i, qa_item in cat_qas[:sample_count]: sampled_qas_indices.add((paper_id, i)) - + if remaining_quota > 0: print(f"Reallocating remaining {remaining_quota} QA(s) to categories with available QAs") - + category_available = {} for cat in cats: if cat in selected_doc_category_qas: total_available = len(selected_doc_category_qas[cat]) used = category_actual.get(cat, 0) category_available[cat] = total_available - used - + while remaining_quota > 0: allocated_this_round = 0 for cat in cats: @@ -891,19 +892,19 @@ def sample_qasper( remaining_quota -= 1 allocated_this_round += 1 break - + if allocated_this_round == 0: print(f"Warning: No more QAs available to sample. Stopping with {remaining_quota} unallocated.") break - + print("Actual category counts after reallocation:") for cat in cats: print(f" {cat}: {category_actual.get(cat, 0)} QAs") - + for paper_id in selected_ids: paper_data, source_file = paper_data_map[paper_id] new_qas = [] - + for i, qa_item in enumerate(paper_data.get("qas", [])): is_unanswerable = all( ans.get("answer", {}).get("unanswerable", False) @@ -911,16 +912,16 @@ def sample_qasper( ) if is_unanswerable or (paper_id, i) in sampled_qas_indices: new_qas.append(qa_item) - + paper_data["qas"] = new_qas - + if sample_mode == "random": sampled_qas = random_sample_qas(sample_size, qa_with_indices, seed) - + keep_qas_indices = set() for paper_id, cat, i, qa_item in sampled_qas: keep_qas_indices.add((paper_id, i)) - + for paper_id in selected_ids: paper_data, source_file = paper_data_map[paper_id] new_qas = [] @@ -939,19 +940,19 @@ def sample_qasper( for cat_qas in paper_category_qas[paper_id].values(): count += len(cat_qas) paper_qas_count[paper_id] = count - + if sample_mode == "stratified": print(f"Using stratified sampling (seed={seed})") selected_ids = sample_docs_stratified( sample_size, category_qas, paper_category_qas, all_paper_ids, seed ) - + if sample_mode == "random": print(f"Using random sampling (seed={seed})") selected_ids = sample_docs_random( sample_size, paper_qas_count, all_paper_ids, seed ) - + output_dir.mkdir(parents=True, exist_ok=True) data_by_file = {} for paper_id in selected_ids: @@ -959,12 +960,12 @@ def sample_qasper( if source_file not in data_by_file: data_by_file[source_file] = {} data_by_file[source_file][paper_id] = paper_data - + for json_file, output_data in data_by_file.items(): output_file = output_dir / json_file save_json_data(output_data, output_file) print(f"Saved {len(output_data)} papers to {json_file}") - + sampled_qas = 0 for paper_id in selected_ids: paper_data, source_file = paper_data_map[paper_id] @@ -976,7 +977,7 @@ def sample_qasper( ) if not is_unanswerable: sampled_qas += 1 - + metadata = { "dataset": "Qasper", "original_num_docs": original_num_docs, @@ -990,7 +991,7 @@ def sample_qasper( "is_full": is_full, "note": "Unanswerable questions are excluded from QA count" } - + return metadata @@ -1004,18 +1005,18 @@ def sample_financebench( ) -> Dict[str, Any]: """Sample Financebench dataset with stratified sampling support.""" from collections import defaultdict - + input_file = input_dir / "data" / "financebench_open_source.jsonl" if not input_file.exists(): raise FileNotFoundError(f"financebench_open_source.jsonl not found at {input_file}") - + data = load_jsonl_data(input_file) - + doc_groups = defaultdict(list) for item in data: doc_name = item.get("doc_name", "unknown") doc_groups[doc_name].append(item) - + category_qas = {} doc_category_qas = {} for doc_name, items in doc_groups.items(): @@ -1029,7 +1030,7 @@ def sample_financebench( doc_cat_qas[q_type] = [] doc_cat_qas[q_type].append(item) doc_category_qas[doc_name] = doc_cat_qas - + all_doc_names = list(doc_groups.keys()) original_num_docs = len(all_doc_names) original_total_qas = len(data) @@ -1039,7 +1040,7 @@ def sample_financebench( print(f"Categories: {categories}") for cat in categories: print(f" {cat}: {len(category_qas[cat])} QAs") - + is_full = (sample_size is None and num_docs is None) if is_full: selected_docs = all_doc_names @@ -1058,10 +1059,10 @@ def sample_financebench( print("Selected documents:") for doc in selected_docs: print(f" {doc}: {len(doc_groups[doc])} QAs") - + if sample_size is not None: print(f"Further sampling {sample_size} QAs from selected documents (mode: {sample_mode})") - + selected_doc_category_qas = {} for doc_name in selected_docs: doc_cat_qas = doc_category_qas[doc_name] @@ -1070,26 +1071,26 @@ def sample_financebench( selected_doc_category_qas[cat] = [] for item in items: selected_doc_category_qas[cat].append(item) - + if sample_mode == "stratified": sampled_items = stratified_sample_with_reallocation( sample_size, selected_doc_category_qas, seed ) - + new_doc_groups = defaultdict(list) for item in sampled_items: doc_name = item.get("doc_name", "unknown") new_doc_groups[doc_name].append(item) doc_groups = new_doc_groups - + if sample_mode == "random": all_items = [] for doc_name in selected_docs: items = doc_groups[doc_name] all_items.extend(items) - + sampled_items = random_sample_qas(sample_size, all_items, seed) - + new_doc_groups = defaultdict(list) for item in sampled_items: doc_name = item.get("doc_name", "unknown") @@ -1097,38 +1098,38 @@ def sample_financebench( doc_groups = new_doc_groups else: doc_qas_count = {doc_name: len(items) for doc_name, items in doc_groups.items()} - + if sample_mode == "stratified": print(f"Using stratified sampling (seed={seed})") selected_docs = sample_docs_stratified( sample_size, category_qas, doc_category_qas, all_doc_names, seed ) - + if sample_mode == "random": print(f"Using random sampling (seed={seed})") selected_docs = sample_docs_random( sample_size, doc_qas_count, all_doc_names, seed ) - + selected_data = [] for doc_name in selected_docs: selected_data.extend(doc_groups[doc_name]) - + output_file = output_dir / "financebench_open_source.jsonl" save_jsonl_data(selected_data, output_file) - + pdfs_src = input_dir / "pdfs" pdfs_dst = output_dir / "pdfs" - + if pdfs_src.exists(): pdfs_dst.mkdir(parents=True, exist_ok=True) - + for doc_name in selected_docs: src_pdf = pdfs_src / f"{doc_name}.pdf" if src_pdf.exists(): shutil.copy2(src_pdf, pdfs_dst / f"{doc_name}.pdf") print(f"Copied PDF: {doc_name}.pdf") - + metadata = { "dataset": "Financebench", "original_num_docs": original_num_docs, @@ -1141,7 +1142,7 @@ def sample_financebench( "sample_mode": sample_mode, "is_full": is_full } - + return metadata @@ -1166,19 +1167,19 @@ def sample_dataset( if dataset_name not in DATASET_SAMPLERS: print(f"Unknown dataset: {dataset_name}") return False - + print(f"\nProcessing {dataset_name}...") print(f"Input: {input_dir}") print(f"Output: {output_dir}") - + try: sampler = DATASET_SAMPLERS[dataset_name] metadata = sampler(input_dir, output_dir, sample_size, num_docs, seed, sample_mode) - + metadata_file = output_dir / "sampling_metadata.json" save_json_data(metadata, metadata_file) print(f"✓ Saved metadata to {metadata_file}") - + return True except Exception as e: print(f"Error sampling {dataset_name}: {e}") @@ -1240,22 +1241,22 @@ def main(): default="stratified", help="Sampling mode (default: stratified)" ) - + args = parser.parse_args() - + if args.full: args.sample_size = None args.num_docs = None - + input_dir = args.input_dir.resolve() output_dir = args.output_dir.resolve() - + datasets = ( - list(DATASET_SAMPLERS.keys()) - if args.dataset == "all" + list(DATASET_SAMPLERS.keys()) + if args.dataset == "all" else [args.dataset] ) - + print("=" * 60) print("RAG Benchmark Dataset Sampler") print("=" * 60) @@ -1267,12 +1268,12 @@ def main(): print(f"Random seed: {args.seed}") print(f"Datasets: {', '.join(datasets)}") print("=" * 60) - + success_count = 0 for dataset in datasets: dataset_input_dir = input_dir / dataset dataset_output_dir = output_dir / dataset - + if sample_dataset( dataset, dataset_input_dir, @@ -1283,11 +1284,11 @@ def main(): args.sample_mode ): success_count += 1 - + print("\n" + "=" * 60) print(f"Sampling complete: {success_count}/{len(datasets)} successful") print("=" * 60) - + return 0 if success_count == len(datasets) else 1 diff --git a/benchmark/RAG/src/adapters/base.py b/benchmark/RAG/src/adapters/base.py index 05b0fc3e6..954b22273 100644 --- a/benchmark/RAG/src/adapters/base.py +++ b/benchmark/RAG/src/adapters/base.py @@ -1,8 +1,8 @@ -import sys from abc import ABC, abstractmethod from dataclasses import dataclass, field +from typing import List, Dict, Any, Union, Optional +import sys from pathlib import Path -from typing import Any, Dict, List, Optional, Union sys.path.append(str(Path(__file__).parent.parent)) @@ -27,7 +27,7 @@ class StandardSample: metadata: Dict[str, Any] = field(default_factory=dict) -@dataclass +@dataclass class StandardDoc: """Standardized sampleid to doc_path mapping structure""" sample_id:str @@ -36,7 +36,7 @@ class StandardDoc: class BaseAdapter(ABC): """Base class for all dataset adapters""" - + def __init__(self, raw_file_path: str): self.raw_file_path = raw_file_path self.logger = get_logger() @@ -60,7 +60,7 @@ def load_and_transform(self) -> List[StandardSample]: Must be implemented by subclasses. """ pass - + @abstractmethod def build_prompt(self, qa: StandardQA, context_blocks: List[str]) -> tuple[str, Dict[str, Any]]: """ diff --git a/benchmark/RAG/src/adapters/financebench_adapter.py b/benchmark/RAG/src/adapters/financebench_adapter.py index 4a44905ab..40923e18f 100644 --- a/benchmark/RAG/src/adapters/financebench_adapter.py +++ b/benchmark/RAG/src/adapters/financebench_adapter.py @@ -9,14 +9,14 @@ import json import os -import sys from collections import defaultdict +from typing import List, Dict, Any from pathlib import Path -from typing import Any, Dict, List +import sys sys.path.append(str(Path(__file__).parent)) -from base import BaseAdapter, StandardDoc, StandardQA, StandardSample +from base import BaseAdapter, StandardDoc, StandardSample, StandardQA CATEGORY_INSTRUCTIONS = { "domain-relevant": """Answer the financial question based on the document. @@ -24,14 +24,14 @@ - If numerical, include units (e.g., USD millions, %) - Provide concise, direct answer - Do NOT invent information""", - + "metrics-generated": """Calculate the financial metric based on the document. - Use ONLY numbers from the context - Show your calculations clearly - Round to appropriate decimal places - Include units (e.g., USD millions, %) - Do NOT invent numbers""", - + "novel-generated": """Answer the financial question based on the document. - Use ONLY facts from the context - If numerical, include units (e.g., USD millions, %) @@ -136,10 +136,10 @@ def load_and_transform(self) -> List[StandardSample]: def build_prompt(self, qa: StandardQA, context_blocks: List[str]) -> tuple[str, Dict[str, Any]]: context_text = "\n\n".join(context_blocks) - + category = qa.category category_instruction = CATEGORY_INSTRUCTIONS.get(category, "") - + if category_instruction: full_prompt = f"""{context_text} @@ -158,7 +158,7 @@ def build_prompt(self, qa: StandardQA, context_blocks: List[str]) -> tuple[str, Question: {qa.question} Answer:""" - + meta = { "question_type": qa.category, "financebench_id": qa.metadata.get("financebench_id"), diff --git a/benchmark/RAG/src/adapters/locomo_adapter.py b/benchmark/RAG/src/adapters/locomo_adapter.py index fbcd51aee..326e29a53 100644 --- a/benchmark/RAG/src/adapters/locomo_adapter.py +++ b/benchmark/RAG/src/adapters/locomo_adapter.py @@ -1,9 +1,10 @@ # src/adapters/locomo_adapter.py import json import os -from typing import Any, Dict, List +from typing import List, Dict, Any + +from .base import BaseAdapter, StandardDoc, StandardSample, StandardQA -from .base import BaseAdapter, StandardDoc, StandardQA, StandardSample MISSING_RULE = "If no information is available to answer the question, write 'Not mentioned'." @@ -12,19 +13,19 @@ "1": """Extract the exact factual answer from the conversation. - Use the exact words from the context when possible - If multiple items, separate with commas""", - + "2": """Answer the time-related question. - Pay close attention to DATE labels in the conversation - Calculate relative time (e.g., "10 years ago") when needed - Use the exact dates from the context""", - + "3": """Reason and infer based on the conversation. - Use ONLY the facts in the context - State your conclusion clearly (e.g., "Likely yes", "Probably no") - Do NOT explain your reasoning or provide any basis/justification - Only output your final conclusion, nothing else - Do NOT invent information""", - + "4": """Understand the meaning and significance. - Focus on what the speakers mean, not just what they say - Identify symbolism or implied meaning @@ -135,20 +136,20 @@ def _convert_conversation_to_markdown(self, sample_id: str, conv: Dict[str, Any] txt = turn.get("text", "") raw_id = turn.get("dia_id") or turn.get("id") - + image_suffix = "" img_url = turn.get("img_url", []) blip_caption = turn.get("blip_caption", "") - + if img_url and blip_caption: if len(img_url) == 1: image_suffix = f"[Attached image:{blip_caption}]" else: for i, caption in enumerate([blip_caption] * len(img_url)): image_suffix += f"[Attached image {i+1}:{caption}]" - + dia_suffix = f" [{raw_id}]" if raw_id else "" - + md_lines.append(f"**{spk}**: {txt}{image_suffix}{dia_suffix}") session_idx += 1 @@ -158,9 +159,9 @@ def _convert_conversation_to_markdown(self, sample_id: str, conv: Dict[str, Any] def build_prompt(self, qa: StandardQA, context_blocks: List[str]) -> tuple[str, Dict[str, Any]]: category = str(qa.category) context_text = "\n\n".join(context_blocks) - + category_instruction = CATEGORY_INSTRUCTIONS.get(category, "") - + if category_instruction: full_prompt = f"""{context_text} diff --git a/benchmark/RAG/src/adapters/qasper_adapter.py b/benchmark/RAG/src/adapters/qasper_adapter.py index 0ec31a5a3..b17a5a46e 100644 --- a/benchmark/RAG/src/adapters/qasper_adapter.py +++ b/benchmark/RAG/src/adapters/qasper_adapter.py @@ -23,9 +23,9 @@ import json import os -from typing import Any, Dict, List +from typing import List, Dict, Any -from .base import BaseAdapter, StandardDoc, StandardQA, StandardSample +from .base import BaseAdapter, StandardDoc, StandardSample, StandardQA # Specific instructions for different answer types CATEGORY_INSTRUCTIONS = { @@ -33,13 +33,13 @@ - Use EXACT wording from the context - Do NOT rephrase or add explanation - Provide concise, direct answer""", - + "free_form": """Answer using information from the paper. - Use ONLY facts from the context - You may rephrase or summarize in your own words - Provide clear, complete answer - Do NOT invent information""", - + "yes_no": """Answer Yes/No question based on the paper. - First respond "Yes" or "No" - Do NOT add explanation @@ -62,7 +62,7 @@ class QasperAdapter(BaseAdapter): raw_file_path: Raw JSON data file path logger: Logger """ - + def data_prepare(self, doc_dir: str) -> List[StandardDoc]: """ Load raw data and convert to OpenViking-friendly format. @@ -99,7 +99,7 @@ def data_prepare(self, doc_dir: str) -> List[StandardDoc]: data = json.load(f) os.makedirs(doc_dir, exist_ok=True) - + for paper_id, paper_data in data.items(): doc_content = self._convert_paper_to_markdown(paper_id, paper_data) @@ -155,36 +155,36 @@ def load_and_transform(self) -> List[StandardSample]: for paper_id, paper_data in data.items(): qa_pairs = [] paper_title = paper_data.get("title", "Unknown Title") - + for qa_item in paper_data.get("qas", []): # --- Unanswerable filtering logic --- # Check if all answers are marked as unanswerable is_unanswerable = all( - ans.get("answer", {}).get("unanswerable", False) + ans.get("answer", {}).get("unanswerable", False) for ans in qa_item.get("answers", []) ) if is_unanswerable: continue # ------------------ - + raw_question = qa_item.get("question", "") question_id = qa_item.get("question_id", "") # Append paper title to question for easier retrieval question = f'Based on the paper "{paper_title}", {raw_question}' - + gold_answers = [] evidence_list = [] answer_types = [] answer_evidence_pairs = [] - + # Iterate through all annotator answers for answer_wrapper in qa_item.get("answers", []): answer_obj = answer_wrapper.get("answer", {}) - + current_answer = None answer_type = self._get_answer_type(answer_obj) - + # Process different answer types if answer_obj.get("unanswerable", False): current_answer = "Not mentioned" @@ -193,7 +193,7 @@ def load_and_transform(self) -> List[StandardSample]: extractive_spans = answer_obj.get("extractive_spans", []) free_form_answer = answer_obj.get("free_form_answer", "") yes_no = answer_obj.get("yes_no") - + if extractive_spans: valid_spans = [span.strip() for span in extractive_spans if span and span.strip()] if valid_spans: @@ -208,7 +208,7 @@ def load_and_transform(self) -> List[StandardSample]: elif yes_no is not None: current_answer = "Yes" if yes_no else "No" gold_answers.append(current_answer) - + # Collect evidence text current_evidence = [] evidence = answer_obj.get("evidence", []) @@ -217,11 +217,11 @@ def load_and_transform(self) -> List[StandardSample]: current_evidence.append(ev) if ev not in evidence_list: evidence_list.append(ev) - + # Record answer type (deduplicated) if answer_type not in answer_types: answer_types.append(answer_type) - + # Save answer-evidence correspondence if current_answer: answer_evidence_pairs.append({ @@ -229,14 +229,14 @@ def load_and_transform(self) -> List[StandardSample]: "evidence": current_evidence, "answer_type": answer_type }) - + # If no answers, default to "Not mentioned" if not gold_answers: gold_answers = ["Not mentioned"] - + # Deduplicate (preserve order) gold_answers = list(dict.fromkeys(gold_answers)) - + qa_pairs.append(StandardQA( question=question, gold_answers=gold_answers, @@ -255,7 +255,7 @@ def load_and_transform(self) -> List[StandardSample]: )) return standard_samples - + def _get_answer_type(self, answer_obj: Dict[str, Any]) -> str: """ Determine answer type from answer object. @@ -318,33 +318,33 @@ def _convert_paper_to_markdown(self, paper_id: str, paper_data: Dict[str, Any]) str: Markdown formatted paper content """ md_lines = [] - + # Title title = paper_data.get("title", "Unknown Title") md_lines.append(f"# {title}") md_lines.append(f"Paper ID: {paper_id}\n") - + # Abstract abstract = paper_data.get("abstract", "") if abstract: md_lines.append("## Abstract") md_lines.append(abstract) md_lines.append("") - + # Main text sections full_text = paper_data.get("full_text", []) for section in full_text: section_name = section.get("section_name", "") paragraphs = section.get("paragraphs", []) - + if section_name: md_lines.append(f"## {section_name}") - + for para in paragraphs: if para and para.strip(): md_lines.append(para.strip()) md_lines.append("") - + # Figure and table information figures_and_tables = paper_data.get("figures_and_tables", []) if figures_and_tables: @@ -352,13 +352,13 @@ def _convert_paper_to_markdown(self, paper_id: str, paper_data: Dict[str, Any]) for idx, fig in enumerate(figures_and_tables, 1): caption = fig.get("caption", "") file_name = fig.get("file", "") - + # Determine if figure or table based on filename or caption if "Figure" in file_name or "figure" in caption.lower(): md_lines.append(f"### Figure {idx}") else: md_lines.append(f"### Table {idx}") - + if caption: md_lines.append(f"Caption: {caption}") if file_name: @@ -369,12 +369,12 @@ def _convert_paper_to_markdown(self, paper_id: str, paper_data: Dict[str, Any]) def build_prompt(self, qa: StandardQA, context_blocks: List[str]) -> tuple[str, Dict[str, Any]]: context_text = "\n\n".join(context_blocks) if context_blocks else "No relevant context found." - + answer_types = qa.metadata.get("answer_types", []) primary_type = answer_types[0] if answer_types else None - + category_instruction = CATEGORY_INSTRUCTIONS.get(primary_type, "") - + if category_instruction: full_prompt = f"""{context_text} diff --git a/benchmark/RAG/src/adapters/syllabusqa_adapter.py b/benchmark/RAG/src/adapters/syllabusqa_adapter.py index 1ce02c39e..69fcf3fb9 100644 --- a/benchmark/RAG/src/adapters/syllabusqa_adapter.py +++ b/benchmark/RAG/src/adapters/syllabusqa_adapter.py @@ -25,12 +25,12 @@ - post_process_answer: Post-process LLM output """ -import csv import json import os -from typing import Any, Dict, List +import csv +from typing import List, Dict, Any -from .base import BaseAdapter, StandardDoc, StandardQA, StandardSample +from .base import BaseAdapter, StandardDoc, StandardSample, StandardQA # Rule for when answer cannot be found MISSING_RULE = "If no information is available to answer the question, write 'Not mentioned'." @@ -41,27 +41,27 @@ - Use EXACT wording from context when possible - Provide concise, direct answer - Do NOT add extra info or explanation""", - + "multi factual": """Extract multiple factual answers from the syllabus. - Use EXACT wording from context when possible - List items separated by commas - Include all relevant facts""", - + "single reasoning": """Answer using simple logical reasoning based on the syllabus. - Use ONLY facts from context - Make clear, direct conclusion - Do NOT explain reasoning - Do NOT invent information""", - + "multi reasoning": """Answer using reasoning based on the syllabus. - Use ONLY facts from context - Do NOT invent information""", - + "summarization": """Summarize relevant information from the syllabus. - Provide concise summary covering key points - Use wording from syllabus when possible - Include all important details""", - + "yes/no": """Answer Yes/No question based on the syllabus. - First respond "Yes" or "No" - Do NOT add explanation @@ -82,7 +82,7 @@ class SyllabusQAAdapter(BaseAdapter): syllabus_dir: docx file directory path logger: Logger """ - + def __init__(self, raw_file_path: str, **kwargs): """ Initialize SyllabusQAAdapter. @@ -97,7 +97,7 @@ def __init__(self, raw_file_path: str, **kwargs): base_dir = raw_file_path else: base_dir = os.path.dirname(raw_file_path) - + # Check for official repo structure first official_syllabus_dir = os.path.join(base_dir, 'syllabi', 'syllabi_redacted', 'word') if os.path.exists(official_syllabus_dir): @@ -105,7 +105,7 @@ def __init__(self, raw_file_path: str, **kwargs): else: # Fallback to original structure self.syllabus_dir = os.path.join(base_dir, 'syllabi') - + def data_prepare(self, doc_dir: str) -> List[StandardDoc]: """ Load raw docx files and convert to OpenViking-friendly format. @@ -127,27 +127,27 @@ def data_prepare(self, doc_dir: str) -> List[StandardDoc]: res: List[StandardDoc] = [] os.makedirs(doc_dir, exist_ok=True) - + # Get list of syllabus_name mentioned in CSV required_syllabi = self._get_required_syllabi() self.logger.info(f"[SyllabusQAAdapter] Required syllabi from CSV: {len(required_syllabi)}") - + # Get all docx files docx_files = [f for f in os.listdir(self.syllabus_dir) if f.endswith('.docx')] - + for docx_file in docx_files: syllabus_id = docx_file.replace('.docx', '') - + # Only process syllabi mentioned in CSV if syllabus_id not in required_syllabi: continue - + docx_path = os.path.join(self.syllabus_dir, docx_file) - + try: # Convert docx to Markdown doc_content = self._convert_docx_to_markdown(docx_path) - + doc_path = os.path.join(doc_dir, f"{syllabus_id}_doc.md") with open(doc_path, "w", encoding="utf-8") as f: f.write(doc_content) @@ -159,7 +159,7 @@ def data_prepare(self, doc_dir: str) -> List[StandardDoc]: self.logger.warning("python-docx not installed, skipping docx conversion") break raise e - + self.logger.info(f"[SyllabusQAAdapter] Processed {len(res)} syllabus documents") return res @@ -171,28 +171,28 @@ def _get_required_syllabi(self) -> set: set: syllabus_name set """ required = set() - + # Determine data source type if self.raw_file_path.endswith('.json'): # Load from JSON if not os.path.exists(self.raw_file_path): return required - + with open(self.raw_file_path, 'r', encoding='utf-8') as f: data = json.load(f) - + for syllabus_name in data.keys(): if syllabus_name: required.add(syllabus_name) elif self.raw_file_path.endswith('.csv'): csv_files = [self.raw_file_path] elif os.path.isdir(self.raw_file_path): - csv_files = [os.path.join(self.raw_file_path, f) - for f in os.listdir(self.raw_file_path) + csv_files = [os.path.join(self.raw_file_path, f) + for f in os.listdir(self.raw_file_path) if f.endswith('.csv')] else: return required - + # Process CSV files if any if 'csv_files' in locals(): for csv_file in csv_files: @@ -205,7 +205,7 @@ def _get_required_syllabi(self) -> set: syllabus_name = row.get('syllabus_name', '') if syllabus_name: required.add(syllabus_name) - + return required def _convert_docx_to_markdown(self, docx_path: str) -> str: @@ -222,15 +222,15 @@ def _convert_docx_to_markdown(self, docx_path: str) -> str: from docx import Document except ImportError: raise ImportError("python-docx is required. Install with: pip install python-docx") - + doc = Document(docx_path) md_lines = [] - + # Extract filename as title filename = os.path.basename(docx_path).replace('.docx', '') md_lines.append(f"# {filename}") md_lines.append("") - + # Iterate through all paragraphs for para in doc.paragraphs: text = para.text.strip() @@ -246,7 +246,7 @@ def _convert_docx_to_markdown(self, docx_path: str) -> str: else: md_lines.append(text) md_lines.append("") - + # Extract tables for table in doc.tables: md_lines.append("## Table") @@ -254,7 +254,7 @@ def _convert_docx_to_markdown(self, docx_path: str) -> str: cells = [cell.text.strip() for cell in row.cells] md_lines.append("| " + " | ".join(cells) + " |") md_lines.append("") - + return "\n".join(md_lines) def load_and_transform(self) -> List[StandardSample]: @@ -281,8 +281,8 @@ def load_and_transform(self) -> List[StandardSample]: return self._load_from_csv([self.raw_file_path]) elif os.path.isdir(self.raw_file_path): # Directory, find all CSV files - csv_files = [os.path.join(self.raw_file_path, f) - for f in os.listdir(self.raw_file_path) + csv_files = [os.path.join(self.raw_file_path, f) + for f in os.listdir(self.raw_file_path) if f.endswith('.csv')] return self._load_from_csv(csv_files) else: @@ -302,24 +302,24 @@ def _load_from_json(self) -> List[StandardSample]: for syllabus_name, qa_list in data.items(): qa_pairs = [] - + for qa_item in qa_list: question = qa_item.get("question", "") answer = qa_item.get("answer", "") question_type = qa_item.get("question_type", "") qa_id = qa_item.get("id", "") - + # Skip "no answer" type questions as RAG results cannot be evaluated if question_type == "no answer": continue - + # Collect answer_span as evidence evidence = [] for i in range(1, 6): span = qa_item.get(f"answer_span_{i}", "") if span and span.strip(): evidence.append(span.strip()) - + # Collect reasoning_steps, also as evidence (for reasoning type questions) reasoning_steps = [] for i in range(1, 6): @@ -329,10 +329,10 @@ def _load_from_json(self) -> List[StandardSample]: # reasoning_steps also added to evidence for recall calculation if step.strip() not in evidence: evidence.append(step.strip()) - + # Format question formatted_question = f'Based on the syllabus "{syllabus_name}", {question}' - + qa_pairs.append(StandardQA( question=formatted_question, gold_answers=[answer] if answer else ["Not mentioned"], @@ -365,12 +365,12 @@ def _load_from_csv(self, csv_files: List[str]) -> List[StandardSample]: """ # Group by syllabus_name syllabus_qa_map: Dict[str, List] = {} - + for csv_file in csv_files: if not os.path.exists(csv_file): self.logger.warning(f"CSV file not found: {csv_file}") continue - + with open(csv_file, 'r', encoding='utf-8') as f: reader = csv.DictReader(f) for row in reader: @@ -378,29 +378,29 @@ def _load_from_csv(self, csv_files: List[str]) -> List[StandardSample]: if syllabus_name not in syllabus_qa_map: syllabus_qa_map[syllabus_name] = [] syllabus_qa_map[syllabus_name].append(row) - + standard_samples = [] - + for syllabus_name, qa_list in syllabus_qa_map.items(): qa_pairs = [] - + for qa_item in qa_list: question = qa_item.get("question", "") answer = qa_item.get("answer", "") question_type = qa_item.get("question_type", "") qa_id = qa_item.get("id", "") - + # Skip "no answer" type questions as RAG results cannot be evaluated if question_type == "no answer": continue - + # Collect answer_span as evidence evidence = [] for i in range(1, 6): span = qa_item.get(f"answer_span_{i}", "") if span and span.strip(): evidence.append(span.strip()) - + # Collect reasoning_steps, also as evidence (for reasoning type questions) reasoning_steps = [] for i in range(1, 6): @@ -410,10 +410,10 @@ def _load_from_csv(self, csv_files: List[str]) -> List[StandardSample]: # reasoning_steps also added to evidence for recall calculation if step.strip() not in evidence: evidence.append(step.strip()) - + # Format question formatted_question = f'Based on the syllabus "{syllabus_name}", {question}' - + qa_pairs.append(StandardQA( question=formatted_question, gold_answers=[answer] if answer else ["Not mentioned"], @@ -455,11 +455,11 @@ def build_prompt(self, qa: StandardQA, context_blocks: List[str]) -> tuple[str, """ eff_q = qa.question category = qa.category - + category_instruction = CATEGORY_INSTRUCTIONS.get(category, "") - + context_text = "\n\n".join(context_blocks) - + if category_instruction: full_prompt = f"{context_text}\n\n{category_instruction}\n\n{MISSING_RULE}\n\nQuestion: {eff_q}\n\nAnswer:" else: diff --git a/benchmark/RAG/src/core/judge_util.py b/benchmark/RAG/src/core/judge_util.py index 716175373..b5549a65a 100644 --- a/benchmark/RAG/src/core/judge_util.py +++ b/benchmark/RAG/src/core/judge_util.py @@ -28,7 +28,7 @@ def llm_grader( score = 0 reasoning = "No reasoning provided." prompt_type = "Generic_0-4" - + # Handle case when gold_answer is a list if isinstance(gold_answer, list): gold_answer_str = " | ".join(gold_answer) diff --git a/benchmark/RAG/src/core/llm_client.py b/benchmark/RAG/src/core/llm_client.py index 1a2db8af9..a18505b14 100644 --- a/benchmark/RAG/src/core/llm_client.py +++ b/benchmark/RAG/src/core/llm_client.py @@ -1,7 +1,6 @@ import time - -from langchain_core.messages import HumanMessage from langchain_openai import ChatOpenAI +from langchain_core.messages import HumanMessage class LLMClientWrapper: diff --git a/benchmark/RAG/src/core/metrics.py b/benchmark/RAG/src/core/metrics.py index 51b7a3231..d40d8de1e 100644 --- a/benchmark/RAG/src/core/metrics.py +++ b/benchmark/RAG/src/core/metrics.py @@ -1,6 +1,6 @@ -import collections import re import string +import collections from typing import List @@ -8,7 +8,7 @@ class MetricsCalculator: @staticmethod def normalize_answer(s): """Normalize answer text: remove punctuation, convert to lowercase, remove articles""" - s = str(s).replace(',', "") + s = str(s).replace(',', "") def remove_articles(text): return re.sub(r'\b(a|an|the|and)\b', ' ', text) def white_space_fix(text): return ' '.join(text.split()) def remove_punc(text): @@ -53,34 +53,34 @@ def check_recall(retrieved_texts: List[str], evidence_list: List[str], soft_thre Returns: float, retrieval recall score, range 0.0 to 1.0 """ - if not evidence_list: - return 0.0 - + if not evidence_list: + return 0.0 + combined_retrieved = " ".join(retrieved_texts) - + normalized_retrieved = MetricsCalculator.normalize_answer(combined_retrieved) ret_tokens = set(normalized_retrieved.split()) - + hit_count = 0 - + for evidence in evidence_list: if evidence in combined_retrieved: hit_count += 1 continue - + normalized_ev = MetricsCalculator.normalize_answer(evidence) ev_tokens = set(normalized_ev.split()) - + if not ev_tokens: continue - + if len(ev_tokens) < min_soft_match_tokens: continue - + overlap_count = len(ev_tokens & ret_tokens) coverage = overlap_count / len(ev_tokens) - + if coverage >= soft_threshold: hit_count += 1 - + return hit_count / len(evidence_list) diff --git a/benchmark/RAG/src/core/monitor.py b/benchmark/RAG/src/core/monitor.py index 17971a825..4c25fb300 100644 --- a/benchmark/RAG/src/core/monitor.py +++ b/benchmark/RAG/src/core/monitor.py @@ -34,7 +34,7 @@ def get_status_dict(self): """Return real-time status dictionary for tqdm progress bar display""" elapsed = time.time() - self.stats.start_time qps = self.stats.completed_tasks / elapsed if elapsed > 0 else 0 - + tokens = self.stats.total_tokens if tokens > 1_000_000: token_str = f"{tokens/1_000_000:.1f}M" diff --git a/benchmark/RAG/src/core/vector_store.py b/benchmark/RAG/src/core/vector_store.py index 13c6322f5..feb06fe21 100644 --- a/benchmark/RAG/src/core/vector_store.py +++ b/benchmark/RAG/src/core/vector_store.py @@ -1,14 +1,13 @@ import os -import sys import time -from pathlib import Path from typing import List +import sys +from pathlib import Path sys.path.append(str(Path(__file__).parent.parent)) +from adapters.base import StandardDoc, StandardSample import tiktoken -from adapters.base import StandardDoc - import openviking as ov @@ -17,9 +16,9 @@ def __init__(self, store_path: str): self.store_path = store_path if not os.path.exists(store_path): os.makedirs(store_path) - + self.client = ov.SyncOpenViking(path=store_path) - + try: self.enc = tiktoken.get_encoding("cl100k_base") except Exception as e: @@ -36,14 +35,14 @@ def ingest(self, samples: List[StandardDoc], max_workers=10, monitor=None, inges total_input_tokens = 0 total_output_tokens = 0 total_embedding_tokens = 0 - + if not samples: return { "time": time.time() - start_time, "input_tokens": 0, "output_tokens": 0 } - + if ingest_mode == "directory": doc_paths = [os.path.abspath(s.doc_path) for s in samples] common_ancestor = None @@ -52,7 +51,7 @@ def ingest(self, samples: List[StandardDoc], max_workers=10, monitor=None, inges common_ancestor = os.path.commonpath(doc_paths) except ValueError: common_ancestor = None - + if common_ancestor: result = self.client.add_resource(common_ancestor, wait=True, telemetry=True) telemetry = result.get("telemetry", {}) diff --git a/benchmark/RAG/src/pipeline.py b/benchmark/RAG/src/pipeline.py index 3992f90f9..54bd67937 100644 --- a/benchmark/RAG/src/pipeline.py +++ b/benchmark/RAG/src/pipeline.py @@ -1,20 +1,21 @@ -import json import os -import sys +import json import time +import random +import re from concurrent.futures import ThreadPoolExecutor, as_completed -from pathlib import Path - from tqdm import tqdm +from pathlib import Path +import sys sys.path.append(str(Path(__file__).parent)) from adapters.base import BaseAdapter -from core.judge_util import llm_grader from core.logger import get_logger -from core.metrics import MetricsCalculator -from core.monitor import BenchmarkMonitor from core.vector_store import VikingStoreWrapper +from core.monitor import BenchmarkMonitor +from core.metrics import MetricsCalculator +from core.judge_util import llm_grader class BenchmarkPipeline: @@ -25,14 +26,14 @@ def __init__(self, config, adapter: BaseAdapter, vector_db: VikingStoreWrapper, self.llm = llm self.logger = get_logger() self.monitor = BenchmarkMonitor() - + self.output_dir = self.config['paths']['output_dir'] if not os.path.exists(self.output_dir): os.makedirs(self.output_dir, exist_ok=True) self.generated_file = os.path.join(self.output_dir, "generated_answers.json") self.eval_file = os.path.join(self.output_dir, "qa_eval_detailed_results.json") self.report_file = os.path.join(self.output_dir, "benchmark_metrics_report.json") - + self.metrics_summary = { "insertion": {"time": 0, "input_tokens": 0, "output_tokens": 0, "embedding_tokens": 0}, "deletion": {"time": 0, "input_tokens": 0, "output_tokens": 0, "embedding_tokens": 0} @@ -58,17 +59,17 @@ def run_generation(self): ingest_workers = self.config['execution'].get('ingest_workers', 10) ingest_mode = self.config['execution'].get('ingest_mode', 'per_file') - + mode_desc = { 'directory': 'Unified directory mode', 'per_file': 'Per-file mode' } self.logger.info(f"Ingestion mode: {ingest_mode} ({mode_desc.get(ingest_mode, 'Unknown mode')})") self.logger.info(f"Number of documents: {len(doc_info)}") - + ingest_stats = self.db.ingest( - doc_info, - max_workers=ingest_workers, + doc_info, + max_workers=ingest_workers, monitor=self.monitor, ingest_mode=ingest_mode ) @@ -83,19 +84,19 @@ def run_generation(self): "Total Embedding Tokens": self.metrics_summary["insertion"].get("embedding_tokens", 0) } }) - - samples = self.adapter.load_and_transform() + + samples = self.adapter.load_and_transform() tasks = self._prepare_tasks(samples) results_map = {} max_workers = self.config['execution']['max_workers'] task_errors = [] - + with ThreadPoolExecutor(max_workers=max_workers) as executor: future_to_task = { - executor.submit(self._process_generation_task, task): task + executor.submit(self._process_generation_task, task): task for task in tasks } - + pbar = tqdm(total=len(tasks), desc="Generating Answers", unit="task") for future in as_completed(future_to_task): task = future_to_task[future] @@ -148,13 +149,13 @@ def run_evaluation(self): eval_items = items eval_results_map = {} - + with ThreadPoolExecutor(max_workers=self.config['execution']['max_workers']) as executor: future_to_item = { - executor.submit(self._process_evaluation_task, item): item + executor.submit(self._process_evaluation_task, item): item for item in eval_items } - + pbar = tqdm(total=len(eval_items), desc="Evaluating", unit="item") for future in as_completed(future_to_item): try: @@ -218,7 +219,7 @@ def _process_generation_task(self, task): self.monitor.worker_start() try: qa = task['qa'] - + t0 = time.time() # Get retrieval instruction from config, default to empty retrieval_instruction = self.config['execution'].get('retrieval_instruction', '') @@ -232,22 +233,22 @@ def _process_generation_task(self, task): self.logger.debug(f"[Query-{task['id']}] No retrieval instruction, using raw query") search_res = self.db.retrieve(query=enhanced_query, topk=self.config['execution']['retrieval_topk']) latency = time.time() - t0 - + retrieved_texts = [] retrieved_uris = [] context_blocks = [] - + for r in search_res.resources: retrieved_uris.append(r.uri) content = self.db.read_resource(r.uri) if getattr(r, 'level', 2) == 2 else f"{getattr(r, 'abstract', '')}\n{getattr(r, 'overview', '')}" retrieved_texts.append(content) clean = content[:8000] context_blocks.append(clean) - + recall = MetricsCalculator.check_recall(retrieved_texts, qa.evidence) - + full_prompt, meta = self.adapter.build_prompt(qa, context_blocks) - + ans_raw = self.llm.generate(full_prompt) ans = self.adapter.post_process_answer(qa, ans_raw, meta) @@ -255,7 +256,7 @@ def _process_generation_task(self, task): in_tokens = self.db.count_tokens(full_prompt) + self.db.count_tokens(qa.question) out_tokens = self.db.count_tokens(ans) self.monitor.worker_end(tokens=in_tokens + out_tokens) - + self.logger.info(f"[Query-{task['id']}] Q: {qa.question[:30]}... | Recall: {recall:.2f} | Latency: {latency:.2f}s") return { @@ -281,31 +282,31 @@ def _process_evaluation_task(self, item): This correctly handles multi-annotator scenarios while maintaining compatibility with single-answer datasets (like Locomo). """ ans, golds = item['llm']['final_answer'], item['gold_answers'] - + f1 = max((MetricsCalculator.calculate_f1(ans, gt) for gt in golds), default=0.0) - + dataset_name = self.config.get('dataset_name', 'Unknown_Dataset') - + eval_record = { "score": 0.0, "reasoning": "", "prompt_type": "" } - + try: eval_res = llm_grader( - self.llm.llm, - self.config['llm']['model'], - item['question'], + self.llm.llm, + self.config['llm']['model'], + item['question'], golds, ans, dataset_name=dataset_name ) eval_record = eval_res - + except Exception as e: self.logger.error(f"Grader error: {e}") - + if MetricsCalculator.check_refusal(ans) and any(MetricsCalculator.check_refusal(gt) for gt in golds): f1 = 1.0 eval_record["score"] = 4.0 @@ -315,7 +316,7 @@ def _process_evaluation_task(self, item): acc = eval_record["score"] item["metrics"].update({"F1": f1, "Accuracy": acc}) - + item["llm_evaluation"] = { "prompt_used": eval_record["prompt_type"], "reasoning": eval_record["reasoning"], @@ -323,7 +324,7 @@ def _process_evaluation_task(self, item): } detailed_info = ( - "\n" + "="*60 + + f"\n" + "="*60 + f"\n[Query ID]: {item['_global_index']}" f"\n[Question]: {item['question']}" f"\n[Retrieved URIs]: {item['retrieval'].get('uris', [])}" diff --git a/benchmark/locomo/openclaw/eval.py b/benchmark/locomo/openclaw/eval.py index fb8f89ad6..744d441eb 100644 --- a/benchmark/locomo/openclaw/eval.py +++ b/benchmark/locomo/openclaw/eval.py @@ -379,7 +379,7 @@ def run_ingest( if args.clear_ingest_record: ingest_record = {} save_ingest_record(ingest_record) - print("[INFO] All existing ingest records cleared", file=sys.stderr) + print(f"[INFO] All existing ingest records cleared", file=sys.stderr) else: ingest_record = load_ingest_record() @@ -416,7 +416,7 @@ def run_ingest( if args.viking: try: viking_ingest(msg) - print(" -> [viking] saved", file=sys.stderr) + print(f" -> [viking] saved", file=sys.stderr) results.append({ "sample_id": sample_id, "session": meta["session_key"], @@ -488,7 +488,7 @@ def run_ingest( # Save ingest record save_ingest_record(ingest_record) total_processed = len(results) + skipped_count - print("\n=== Ingest summary ===", file=sys.stderr) + print(f"\n=== Ingest summary ===", file=sys.stderr) print(f"Total sessions: {total_processed}", file=sys.stderr) print(f"Completed: {len(results)}", file=sys.stderr) print(f"Skipped (already ingested): {skipped_count}", file=sys.stderr) @@ -569,7 +569,7 @@ def run_sample_qa( qas = filtered_qas if not qas: print(f"\n=== Sample {sample_id} [{sample_idx}] (user={user_key}) ===", file=sys.stderr) - print(" All QA questions already executed, skipping sample.", file=sys.stderr) + print(f" All QA questions already executed, skipping sample.", file=sys.stderr) return [], {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0} jsonl_path = f"{args.output}.{sample_idx}.jsonl" if args.output else None @@ -762,7 +762,7 @@ def run_qa( samples = load_locomo_data(args.input, args.sample) print(f" user: {args.user or 'eval-{sample_idx}'}", file=sys.stderr) - print(" running in single-thread mode", file=sys.stderr) + print(f" running in single-thread mode", file=sys.stderr) # Load already executed records from CSV csv_path = f"{args.output}.csv" if args.output else "qa_results.csv" diff --git a/benchmark/locomo/vikingbot/import_to_ov.py b/benchmark/locomo/vikingbot/import_to_ov.py index 2a149188a..94a69d8ec 100644 --- a/benchmark/locomo/vikingbot/import_to_ov.py +++ b/benchmark/locomo/vikingbot/import_to_ov.py @@ -20,7 +20,7 @@ import traceback from datetime import datetime, timedelta from pathlib import Path -from typing import Any, Dict, List, Optional, Tuple +from typing import List, Dict, Any, Tuple, Optional import openviking as ov @@ -451,7 +451,7 @@ async def run_import(args: argparse.Namespace) -> None: if args.clear_ingest_record: ingest_record = {} save_ingest_record(ingest_record) - print("[INFO] All existing ingest records cleared", file=sys.stderr) + print(f"[INFO] All existing ingest records cleared", file=sys.stderr) else: ingest_record = load_ingest_record() @@ -538,7 +538,7 @@ async def process_sample(item): "txt", session_key, ingest_record, success_keys ): print( - " [SKIP] already imported (use --force-ingest to reprocess)", file=sys.stderr + f" [SKIP] already imported (use --force-ingest to reprocess)", file=sys.stderr ) skipped_count += 1 continue @@ -586,12 +586,12 @@ async def process_sample(item): # Final summary total_processed = success_count + error_count + skipped_count - print("\n=== Import summary ===", file=sys.stderr) + print(f"\n=== Import summary ===", file=sys.stderr) print(f"Total sessions: {total_processed}", file=sys.stderr) print(f"Successfully imported: {success_count}", file=sys.stderr) print(f"Failed: {error_count}", file=sys.stderr) print(f"Skipped (already imported): {skipped_count}", file=sys.stderr) - print("\n=== Token usage summary ===", file=sys.stderr) + print(f"\n=== Token usage summary ===", file=sys.stderr) print(f"Total Embedding tokens: {total_embedding_tokens}", file=sys.stderr) print(f"Total VLM tokens: {total_vlm_tokens}", file=sys.stderr) if success_count > 0: @@ -600,7 +600,7 @@ async def process_sample(item): file=sys.stderr, ) print(f"Average VLM per session: {total_vlm_tokens // success_count}", file=sys.stderr) - print("\nResults saved to:", file=sys.stderr) + print(f"\nResults saved to:", file=sys.stderr) print(f" - Success records: {args.success_csv}", file=sys.stderr) print(f" - Error logs: {args.error_log}", file=sys.stderr) diff --git a/benchmark/locomo/vikingbot/judge.py b/benchmark/locomo/vikingbot/judge.py index e811b3e32..0b2e171f6 100644 --- a/benchmark/locomo/vikingbot/judge.py +++ b/benchmark/locomo/vikingbot/judge.py @@ -1,11 +1,10 @@ import argparse -import asyncio import csv import json import os - -from dotenv import load_dotenv +import asyncio from openai import AsyncOpenAI +from dotenv import load_dotenv load_dotenv() diff --git a/benchmark/locomo/vikingbot/run_eval.py b/benchmark/locomo/vikingbot/run_eval.py index 1a613d5bd..1799aec49 100644 --- a/benchmark/locomo/vikingbot/run_eval.py +++ b/benchmark/locomo/vikingbot/run_eval.py @@ -1,10 +1,11 @@ import argparse -import csv import json -import os import subprocess -import threading import time +import csv +import os +import re +import threading from concurrent.futures import ThreadPoolExecutor, as_completed @@ -87,7 +88,7 @@ def run_vikingbot_chat(question: str) -> tuple[str, dict, float, int, list]: time_cost = resp_json.get("time_cost", time_cost) iteration = resp_json.get("iteration", 0) tools_used_names = resp_json.get("tools_used_names", []) - except (json.JSONDecodeError, ValueError): + except (json.JSONDecodeError, ValueError) as e: response = f"[PARSE ERROR] {output}" token_usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0} iteration = 0 diff --git a/benchmark/locomo/vikingbot/stat_judge_result.py b/benchmark/locomo/vikingbot/stat_judge_result.py index e02e25ae4..2d7ebd8d6 100644 --- a/benchmark/locomo/vikingbot/stat_judge_result.py +++ b/benchmark/locomo/vikingbot/stat_judge_result.py @@ -70,7 +70,7 @@ def main(): f"Accuracy: {accuracy:.2%}", f"\nAverage time cost: {avg_time:.2f}s", f"\nAverage iteration: {total_iteration / valid_rows if valid_rows > 0 else 0.0:.2f}", - "\nToken usage:", + f"\nToken usage:", f" Total prompt tokens: {total_prompt_tokens}", f" Total completion tokens: {total_completion_tokens}", f" Total tokens: {total_tokens}", diff --git a/benchmark/skillsbench/skill_bench_eval.py b/benchmark/skillsbench/skill_bench_eval.py index 913d5e368..19cb04b02 100644 --- a/benchmark/skillsbench/skill_bench_eval.py +++ b/benchmark/skillsbench/skill_bench_eval.py @@ -118,7 +118,7 @@ def run_prepare(args: argparse.Namespace) -> None: temp_dir = PROJECT_ROOT / f"temp_skillsbench_{int(time.time())}" print(f" Cloning {SKILLSBENCH_REPO}...", file=sys.stderr) - print(" (this may take a moment...)", file=sys.stderr) + print(f" (this may take a moment...)", file=sys.stderr) process = subprocess.Popen( ["git", "clone", "--progress", SKILLSBENCH_REPO, str(temp_dir)], @@ -144,18 +144,18 @@ def run_prepare(args: argparse.Namespace) -> None: shutil.rmtree(temp_dir) sys.exit(1) - print(" Extracting tasks directory...", file=sys.stderr) + print(f" Extracting tasks directory...", file=sys.stderr) src_tasks = temp_dir / "tasks" if not src_tasks.exists(): - print(" [error] tasks directory not found in cloned repo", file=sys.stderr) + print(f" [error] tasks directory not found in cloned repo", file=sys.stderr) shutil.rmtree(temp_dir) sys.exit(1) BENCH_DATA_DIR.mkdir(parents=True, exist_ok=True) shutil.copytree(src_tasks, TASKS_DIR) - print(" Cleaning up temp files...", file=sys.stderr) + print(f" Cleaning up temp files...", file=sys.stderr) shutil.rmtree(temp_dir) excluded_count = 0 @@ -210,7 +210,7 @@ def run_verification(task_dir: Path, work_dir: Path, storage_workspace: Path) -> result["error"] = "no tests directory" result["verified"] = True result["passed"] = True - print(" [verify] no tests directory, skipping verification", file=sys.stderr) + print(f" [verify] no tests directory, skipping verification", file=sys.stderr) return result test_sh = tests_dir / "test.sh" @@ -222,10 +222,10 @@ def run_verification(task_dir: Path, work_dir: Path, storage_workspace: Path) -> result["error"] = "no test files found" result["verified"] = True result["passed"] = True - print(" [verify] no test files, skipping verification", file=sys.stderr) + print(f" [verify] no test files, skipping verification", file=sys.stderr) return result - print(" [verify] running tests...", file=sys.stderr) + print(f" [verify] running tests...", file=sys.stderr) logs_dir = work_dir / "logs" / "verifier" logs_dir.mkdir(parents=True, exist_ok=True) @@ -385,7 +385,7 @@ def rewrite_test_text(text: str) -> str: f"--junitxml={logs_dir}/junit.xml", ] - print(" [verify] running: pytest test_outputs.py", file=sys.stderr) + print(f" [verify] running: pytest test_outputs.py", file=sys.stderr) proc_result = subprocess.run( test_cmd, @@ -413,9 +413,9 @@ def rewrite_test_text(text: str) -> str: result["test_score"] = round(score, 2) if result["passed"]: - print(" [verify] PASSED", file=sys.stderr) + print(f" [verify] PASSED", file=sys.stderr) else: - print(" [verify] FAILED", file=sys.stderr) + print(f" [verify] FAILED", file=sys.stderr) if proc_result.stdout: print(f" [verify stdout] {proc_result.stdout[:500]}", file=sys.stderr) if proc_result.stderr: @@ -425,7 +425,7 @@ def rewrite_test_text(text: str) -> str: result["error"] = "test timeout" result["verified"] = True result["passed"] = False - print(" [verify] TIMEOUT", file=sys.stderr) + print(f" [verify] TIMEOUT", file=sys.stderr) except Exception as e: result["error"] = str(e) result["verified"] = True @@ -434,7 +434,7 @@ def rewrite_test_text(text: str) -> str: else: result["verified"] = True result["passed"] = True - print(" [verify] no pytest file, skipping", file=sys.stderr) + print(f" [verify] no pytest file, skipping", file=sys.stderr) return result @@ -495,7 +495,7 @@ def run_task( if not instruction_file.exists(): result["status"] = "error" result["error"] = "instruction.md not found" - print(" [error] instruction.md not found", file=sys.stderr) + print(f" [error] instruction.md not found", file=sys.stderr) return result task_skills_dir = task_dir / "environment" / "skills" @@ -551,7 +551,7 @@ def run_task( if not verify_only: # Run vikingbot command - print(" [running] vikingbot chat...", file=sys.stderr) + print(f" [running] vikingbot chat...", file=sys.stderr) cmd = [ "vikingbot", "chat", @@ -854,7 +854,7 @@ def run_run(args: argparse.Namespace) -> None: except Exception as e: print(f" [warn] failed to generate summary from result.csv: {e}", file=sys.stderr) - print("\n=== Summary ===", file=sys.stderr) + print(f"\n=== Summary ===", file=sys.stderr) print( f" Completed: {final_summary['completed']}/{final_summary['total_tasks']}", file=sys.stderr, diff --git a/bot/vikingbot/__main__.py b/bot/vikingbot/__main__.py index ab6cd0c46..737f3d35f 100644 --- a/bot/vikingbot/__main__.py +++ b/bot/vikingbot/__main__.py @@ -2,6 +2,7 @@ Entry point for running vikingbot as a module: python -m vikingbot """ +import sys from vikingbot.cli.commands import app diff --git a/bot/vikingbot/agent/__init__.py b/bot/vikingbot/agent/__init__.py index 91b3a3233..4681630bf 100644 --- a/bot/vikingbot/agent/__init__.py +++ b/bot/vikingbot/agent/__init__.py @@ -1,7 +1,7 @@ """Agent core module.""" -from vikingbot.agent.context import ContextBuilder from vikingbot.agent.loop import AgentLoop +from vikingbot.agent.context import ContextBuilder from vikingbot.agent.memory import MemoryStore from vikingbot.agent.skills import SkillsLoader diff --git a/bot/vikingbot/agent/memory.py b/bot/vikingbot/agent/memory.py index 44ebc508d..bfe0ed4d7 100644 --- a/bot/vikingbot/agent/memory.py +++ b/bot/vikingbot/agent/memory.py @@ -2,8 +2,8 @@ from pathlib import Path from typing import Any - from loguru import logger +import time from vikingbot.config.loader import load_config from vikingbot.openviking_mount.ov_server import VikingClient @@ -73,4 +73,4 @@ async def get_viking_user_profile(self, workspace_id: str, user_id: str) -> str: result = await client.read_user_profile(user_id) if not result: return "" - return result + return result \ No newline at end of file diff --git a/bot/vikingbot/agent/skills.py b/bot/vikingbot/agent/skills.py index e3681ee77..2c0b06f7d 100644 --- a/bot/vikingbot/agent/skills.py +++ b/bot/vikingbot/agent/skills.py @@ -2,6 +2,7 @@ import json import os +from loguru import logger import re import shutil from pathlib import Path @@ -128,7 +129,7 @@ def escape_xml(s: str) -> str: if missing: lines.append(f" {escape_xml(missing)}") - lines.append(" ") + lines.append(f" ") lines.append("") return "\n".join(lines) diff --git a/bot/vikingbot/agent/subagent.py b/bot/vikingbot/agent/subagent.py index b6ab0ee07..81818129d 100644 --- a/bot/vikingbot/agent/subagent.py +++ b/bot/vikingbot/agent/subagent.py @@ -35,6 +35,7 @@ def __init__( model: str | None = None, sandbox_manager: "SandboxManager | None" = None, ): + from vikingbot.config.schema import ExecToolConfig self.provider = provider self.workspace = workspace @@ -201,8 +202,8 @@ async def _announce_result( def _build_subagent_prompt(self, task: str) -> str: """Build a focused system prompt for the subagent.""" - import time as _time from datetime import datetime + import time as _time now = datetime.now().strftime("%Y-%m-%d %H:%M (%A)") tz = _time.strftime("%Z") or "UTC" diff --git a/bot/vikingbot/agent/tools/__init__.py b/bot/vikingbot/agent/tools/__init__.py index 456c4c5d1..fce10455e 100644 --- a/bot/vikingbot/agent/tools/__init__.py +++ b/bot/vikingbot/agent/tools/__init__.py @@ -1,7 +1,7 @@ """Agent tools module.""" from vikingbot.agent.tools.base import Tool -from vikingbot.agent.tools.factory import register_default_tools, register_subagent_tools from vikingbot.agent.tools.registry import ToolRegistry +from vikingbot.agent.tools.factory import register_default_tools, register_subagent_tools __all__ = ["Tool", "ToolRegistry", "register_default_tools", "register_subagent_tools"] diff --git a/bot/vikingbot/agent/tools/filesystem.py b/bot/vikingbot/agent/tools/filesystem.py index 41c0cc505..2683eb1de 100644 --- a/bot/vikingbot/agent/tools/filesystem.py +++ b/bot/vikingbot/agent/tools/filesystem.py @@ -1,8 +1,12 @@ """File system tools: read, write, edit.""" -from typing import Any +from typing import TYPE_CHECKING, Any from vikingbot.agent.tools.base import Tool +from vikingbot.config.schema import SessionKey + + +from vikingbot.sandbox.manager import SandboxManager class ReadFileTool(Tool): @@ -103,7 +107,7 @@ async def execute( content = await sandbox.read_file(path) if old_text not in content: - return "Error: old_text not found in file. Make sure it matches exactly." + return f"Error: old_text not found in file. Make sure it matches exactly." count = content.count(old_text) if count > 1: diff --git a/bot/vikingbot/agent/tools/message.py b/bot/vikingbot/agent/tools/message.py index d5879ad70..9a6db75b2 100644 --- a/bot/vikingbot/agent/tools/message.py +++ b/bot/vikingbot/agent/tools/message.py @@ -1,9 +1,10 @@ """Message tool for sending messages to users.""" -from typing import Any, Awaitable, Callable +from typing import Any, Callable, Awaitable from vikingbot.agent.tools.base import Tool from vikingbot.bus.events import OutboundMessage +from vikingbot.config.schema import SessionKey class MessageTool(Tool): @@ -38,6 +39,7 @@ def parameters(self) -> dict[str, Any]: } async def execute(self, tool_context: "ToolContext", **kwargs: Any) -> str: + from loguru import logger content = kwargs.get("content") diff --git a/bot/vikingbot/agent/tools/ov_file.py b/bot/vikingbot/agent/tools/ov_file.py index b3cb61824..8ccfdaa73 100644 --- a/bot/vikingbot/agent/tools/ov_file.py +++ b/bot/vikingbot/agent/tools/ov_file.py @@ -175,7 +175,7 @@ async def execute( else: return "Failed to add resource" except httpx.ReadTimeout: - return "Request timed out. The resource addition task may still be processing on the server side." + return f"Request timed out. The resource addition task may still be processing on the server side." except Exception as e: logger.warning(f"Error adding resource: {e}") return f"Error adding resource to Viking: {str(e)}" @@ -478,4 +478,4 @@ async def read_single_uri(uri: str) -> dict: except Exception as e: logger.exception(f"Error in VikingMultiReadTool: {e}") - return f"Error multi-reading Viking resources: {str(e)}" + return f"Error multi-reading Viking resources: {str(e)}" \ No newline at end of file diff --git a/bot/vikingbot/agent/tools/registry.py b/bot/vikingbot/agent/tools/registry.py index 402fafc80..628e2bd44 100644 --- a/bot/vikingbot/agent/tools/registry.py +++ b/bot/vikingbot/agent/tools/registry.py @@ -1,10 +1,11 @@ """Tool registry for dynamic tool management.""" import time -from typing import Any from loguru import logger +from typing import Any + from vikingbot.agent.tools.base import Tool, ToolContext from vikingbot.config.schema import SessionKey from vikingbot.hooks import HookContext diff --git a/bot/vikingbot/agent/tools/shell.py b/bot/vikingbot/agent/tools/shell.py index f58916d82..d94c9e70f 100644 --- a/bot/vikingbot/agent/tools/shell.py +++ b/bot/vikingbot/agent/tools/shell.py @@ -1,8 +1,18 @@ """Shell execution tool.""" -from typing import Any +import asyncio +import os +import re +from pathlib import Path +from typing import TYPE_CHECKING, Any + +from loguru import logger from vikingbot.agent.tools.base import Tool +from vikingbot.config.schema import SessionKey + + +from vikingbot.sandbox.manager import SandboxManager class ExecTool(Tool): diff --git a/bot/vikingbot/agent/tools/spawn.py b/bot/vikingbot/agent/tools/spawn.py index 85b71c4d1..da4dded71 100644 --- a/bot/vikingbot/agent/tools/spawn.py +++ b/bot/vikingbot/agent/tools/spawn.py @@ -1,11 +1,13 @@ """Spawn tool for creating background subagents.""" -from typing import Any +from typing import Any, TYPE_CHECKING -from vikingbot.agent.subagent import SubagentManager from vikingbot.agent.tools.base import Tool +from vikingbot.agent.subagent import SubagentManager + + class SpawnTool(Tool): """ Tool to spawn a subagent for background task execution. diff --git a/bot/vikingbot/agent/tools/websearch/__init__.py b/bot/vikingbot/agent/tools/websearch/__init__.py index ecc88bf5b..e36a561b2 100644 --- a/bot/vikingbot/agent/tools/websearch/__init__.py +++ b/bot/vikingbot/agent/tools/websearch/__init__.py @@ -14,11 +14,12 @@ from vikingbot.agent.tools.base import Tool -# Import backends to register them -from . import brave, ddgs, exa, tavily from .base import WebSearchBackend from .registry import registry +# Import backends to register them +from . import brave, ddgs, exa, tavily + class WebSearchTool(Tool): """ diff --git a/bot/vikingbot/agent/tools/websearch/registry.py b/bot/vikingbot/agent/tools/websearch/registry.py index f3740ddd3..d82738371 100644 --- a/bot/vikingbot/agent/tools/websearch/registry.py +++ b/bot/vikingbot/agent/tools/websearch/registry.py @@ -1,6 +1,6 @@ """Web search backend registry.""" -from typing import Dict, List, Optional, Type +from typing import Dict, List, Type, Optional from .base import WebSearchBackend diff --git a/bot/vikingbot/bus/queue.py b/bot/vikingbot/bus/queue.py index 7a214f4bf..1e4067a6e 100644 --- a/bot/vikingbot/bus/queue.py +++ b/bot/vikingbot/bus/queue.py @@ -1,7 +1,7 @@ """Async message queue for decoupled channel-agent communication.""" import asyncio -from typing import Awaitable, Callable +from typing import Callable, Awaitable, Any from loguru import logger diff --git a/bot/vikingbot/channels/base.py b/bot/vikingbot/channels/base.py index e6958f595..1cf2b3d00 100644 --- a/bot/vikingbot/channels/base.py +++ b/bot/vikingbot/channels/base.py @@ -11,7 +11,7 @@ from vikingbot.bus.events import InboundMessage, OutboundMessage from vikingbot.bus.queue import MessageBus -from vikingbot.config.schema import BaseChannelConfig, SessionKey +from vikingbot.config.schema import SessionKey, BaseChannelConfig from vikingbot.utils import get_data_path # Optional HTML processing libraries diff --git a/bot/vikingbot/channels/discord.py b/bot/vikingbot/channels/discord.py index 8e7cef80f..740300637 100644 --- a/bot/vikingbot/channels/discord.py +++ b/bot/vikingbot/channels/discord.py @@ -2,6 +2,7 @@ import asyncio import json +from pathlib import Path from typing import Any import httpx @@ -12,6 +13,8 @@ from vikingbot.bus.queue import MessageBus from vikingbot.channels.base import BaseChannel from vikingbot.config.schema import DiscordChannelConfig +from vikingbot.channels.utils import extract_image_paths, read_image_file + DISCORD_API_BASE = "https://discord.com/api/v10" MAX_ATTACHMENT_BYTES = 20 * 1024 * 1024 # 20MB diff --git a/bot/vikingbot/channels/feishu.py b/bot/vikingbot/channels/feishu.py index cffb45494..bfe75501d 100644 --- a/bot/vikingbot/channels/feishu.py +++ b/bot/vikingbot/channels/feishu.py @@ -31,7 +31,7 @@ from vikingbot.bus.events import OutboundMessage from vikingbot.bus.queue import MessageBus from vikingbot.channels.base import BaseChannel -from vikingbot.config.schema import BotMode, FeishuChannelConfig +from vikingbot.config.schema import FeishuChannelConfig, BotMode try: import lark_oapi as lark @@ -46,7 +46,7 @@ GetMessageResourceRequest, P2ImMessageReceiveV1, ReplyMessageRequest, - ReplyMessageRequestBody, + ReplyMessageRequestBody ) FEISHU_AVAILABLE = True @@ -750,13 +750,13 @@ async def _check_should_process(self, chat_type: str, chat_id: str, message: Any if self.config.thread_require_mention: # 模式1:所有消息都需要@才处理 if not is_mentioned: - logger.info("Skipping thread message: thread_require_mention is True and not mentioned") + logger.info(f"Skipping thread message: thread_require_mention is True and not mentioned") return False else: # 模式2:仅话题首条消息不需要@,后续回复需要@(DEBUG模式除外) config = load_config() if not is_topic_starter and not is_mentioned and config.mode != BotMode.DEBUG: - logger.info("Skipping thread message: not topic starter and not mentioned") + logger.info(f"Skipping thread message: not topic starter and not mentioned") return False return True diff --git a/bot/vikingbot/channels/openapi.py b/bot/vikingbot/channels/openapi.py index 45d0c848a..bcc70f47b 100644 --- a/bot/vikingbot/channels/openapi.py +++ b/bot/vikingbot/channels/openapi.py @@ -435,4 +435,4 @@ def get_openapi_router(bus: MessageBus, config: Config) -> APIRouter: channel.send, ) - return channel.get_router() + return channel.get_router() \ No newline at end of file diff --git a/bot/vikingbot/channels/slack.py b/bot/vikingbot/channels/slack.py index 7051cd1e1..e8744afbc 100644 --- a/bot/vikingbot/channels/slack.py +++ b/bot/vikingbot/channels/slack.py @@ -2,17 +2,19 @@ import asyncio import re +from typing import Any from loguru import logger +from slack_sdk.socket_mode.websockets import SocketModeClient from slack_sdk.socket_mode.request import SocketModeRequest from slack_sdk.socket_mode.response import SocketModeResponse -from slack_sdk.socket_mode.websockets import SocketModeClient from slack_sdk.web.async_client import AsyncWebClient from vikingbot.bus.events import OutboundMessage from vikingbot.bus.queue import MessageBus from vikingbot.channels.base import BaseChannel from vikingbot.config.schema import SlackChannelConfig +from vikingbot.channels.utils import extract_image_paths, read_image_file class SlackChannel(BaseChannel): diff --git a/bot/vikingbot/channels/telegram.py b/bot/vikingbot/channels/telegram.py index 265d32068..9e979769e 100644 --- a/bot/vikingbot/channels/telegram.py +++ b/bot/vikingbot/channels/telegram.py @@ -4,17 +4,16 @@ import asyncio import re - from loguru import logger from telegram import BotCommand, Update -from telegram.ext import Application, CommandHandler, ContextTypes, MessageHandler, filters +from telegram.ext import Application, CommandHandler, MessageHandler, filters, ContextTypes from telegram.request import HTTPXRequest from vikingbot.bus.events import OutboundMessage from vikingbot.bus.queue import MessageBus from vikingbot.channels.base import BaseChannel -from vikingbot.channels.utils import extract_image_paths, read_image_file from vikingbot.config.schema import TelegramChannelConfig +from vikingbot.channels.utils import extract_image_paths, read_image_file def _markdown_to_telegram_html(text: str) -> str: @@ -319,6 +318,7 @@ async def _on_message(self, update: Update, context: ContextTypes.DEFAULT_TYPE) ext = self._get_extension(media_type, getattr(media_file, "mime_type", None)) # Save to workspace/media/ + from pathlib import Path from vikingbot.utils.helpers import get_media_path if self.workspace_path: diff --git a/bot/vikingbot/channels/utils.py b/bot/vikingbot/channels/utils.py index c0cc5224c..d67404979 100644 --- a/bot/vikingbot/channels/utils.py +++ b/bot/vikingbot/channels/utils.py @@ -3,7 +3,9 @@ import base64 import re from pathlib import Path -from typing import List, Tuple +from loguru import logger +from typing import Tuple, List + # Common image file extensions IMAGE_EXTENSIONS = {".png", ".jpg", ".jpeg", ".gif", ".webp", ".bmp", ".svg", ".tiff"} diff --git a/bot/vikingbot/channels/whatsapp.py b/bot/vikingbot/channels/whatsapp.py index b2b16c3ff..23794a88f 100644 --- a/bot/vikingbot/channels/whatsapp.py +++ b/bot/vikingbot/channels/whatsapp.py @@ -2,6 +2,7 @@ import asyncio import json +from typing import Any from loguru import logger diff --git a/bot/vikingbot/cli/werewolf_game.py b/bot/vikingbot/cli/werewolf_game.py index 5854eff10..c16896e63 100644 --- a/bot/vikingbot/cli/werewolf_game.py +++ b/bot/vikingbot/cli/werewolf_game.py @@ -1,6 +1,7 @@ """CLI commands for vikingbot.""" import asyncio +from dataclasses import dataclass import json import os import random @@ -9,7 +10,6 @@ import sys import time import warnings -from dataclasses import dataclass from pathlib import Path from typing import Any @@ -1401,9 +1401,9 @@ def demo_werewolf_ui( config = ensure_config(path) _init_bot_data(config) - import uvicorn from fastapi import FastAPI from fastapi.responses import HTMLResponse, JSONResponse + import uvicorn workspace_root = config.workspace_path storage_root = (config.storage_workspace or "~/.openviking/data") @@ -2101,7 +2101,7 @@ async def _ww_run_witch( except Exception: poison_target = None if poison is True and poison_target is None: - poison_target = _ww_pick_random_target(rng, alive_seats, exclude={witch.seat}) + poison_target = _ww_pick_random_target(rng, alive_seats, exclude={witch.seat}) if poison_target is not None and (poison_target not in alive_seats or poison_target == witch.seat): poison_target = None diff --git a/bot/vikingbot/config/__init__.py b/bot/vikingbot/config/__init__.py index fc7607fde..f81acc62e 100644 --- a/bot/vikingbot/config/__init__.py +++ b/bot/vikingbot/config/__init__.py @@ -1,6 +1,6 @@ """Configuration module for vikingbot.""" -from vikingbot.config.loader import get_config_path, load_config +from vikingbot.config.loader import load_config, get_config_path from vikingbot.config.schema import Config __all__ = ["Config", "load_config", "get_config_path"] diff --git a/bot/vikingbot/config/loader.py b/bot/vikingbot/config/loader.py index 69b0cbe53..9aae2f826 100644 --- a/bot/vikingbot/config/loader.py +++ b/bot/vikingbot/config/loader.py @@ -4,9 +4,7 @@ import os from pathlib import Path from typing import Any - from loguru import logger - from vikingbot.config.schema import Config CONFIG_PATH = None @@ -224,4 +222,4 @@ def camel_to_snake(name: str) -> str: def snake_to_camel(name: str) -> str: """Convert snake_case to camelCase.""" components = name.split("_") - return components[0] + "".join(x.title() for x in components[1:]) + return components[0] + "".join(x.title() for x in components[1:]) \ No newline at end of file diff --git a/bot/vikingbot/config/schema.py b/bot/vikingbot/config/schema.py index 2e90847f2..0b5a885ae 100644 --- a/bot/vikingbot/config/schema.py +++ b/bot/vikingbot/config/schema.py @@ -748,4 +748,4 @@ def from_safe_name(safe_name: str): file_name_split = safe_name.split("__") return SessionKey( type=file_name_split[0], channel_id=file_name_split[1], chat_id=file_name_split[2] - ) + ) \ No newline at end of file diff --git a/bot/vikingbot/console/web_console.py b/bot/vikingbot/console/web_console.py index 0334b3a55..4617dfaef 100644 --- a/bot/vikingbot/console/web_console.py +++ b/bot/vikingbot/console/web_console.py @@ -1,12 +1,13 @@ import json import sys +import os from pathlib import Path from typing import Any, Dict, List, Optional, Tuple import gradio as gr -from vikingbot.config.loader import get_config_path, load_config, save_config -from vikingbot.config.schema import Config +from vikingbot.config.loader import load_config, save_config, get_config_path +from vikingbot.config.schema import Config, ChannelType, SandboxBackend, SandboxMode def resolve_schema_ref( diff --git a/bot/vikingbot/cron/types.py b/bot/vikingbot/cron/types.py index 25adfa9db..3e7a7d721 100644 --- a/bot/vikingbot/cron/types.py +++ b/bot/vikingbot/cron/types.py @@ -3,6 +3,8 @@ from dataclasses import dataclass, field from typing import Literal +from vikingbot.config.schema import SessionKey + @dataclass class CronSchedule: diff --git a/bot/vikingbot/heartbeat/service.py b/bot/vikingbot/heartbeat/service.py index 4b8b1192c..aa860f6a6 100644 --- a/bot/vikingbot/heartbeat/service.py +++ b/bot/vikingbot/heartbeat/service.py @@ -2,11 +2,13 @@ import asyncio from pathlib import Path -from typing import Any, Callable, Coroutine +from typing import Any, Callable, Coroutine, TYPE_CHECKING, Dict, List from loguru import logger from vikingbot.config.schema import SessionKey + + from vikingbot.session.manager import SessionManager # Default interval: 30 minutes diff --git a/bot/vikingbot/hooks/base.py b/bot/vikingbot/hooks/base.py index 167156d93..4d9e8c684 100644 --- a/bot/vikingbot/hooks/base.py +++ b/bot/vikingbot/hooks/base.py @@ -1,10 +1,12 @@ from abc import ABC, abstractmethod +from enum import Enum from dataclasses import dataclass -from datetime import datetime from typing import Any, Dict, Optional +from datetime import datetime from vikingbot.config.schema import SessionKey + # class HookType(Enum): # SYNC = "sync" # ASYNC = "async" diff --git a/bot/vikingbot/hooks/builtins/openviking_hooks.py b/bot/vikingbot/hooks/builtins/openviking_hooks.py index eb0b5a50f..2cbd51e30 100644 --- a/bot/vikingbot/hooks/builtins/openviking_hooks.py +++ b/bot/vikingbot/hooks/builtins/openviking_hooks.py @@ -4,6 +4,7 @@ from loguru import logger from vikingbot.config.loader import load_config +from vikingbot.config.schema import SessionKey, AgentMemoryMode from ...session import Session from ..base import Hook, HookContext diff --git a/bot/vikingbot/hooks/manager.py b/bot/vikingbot/hooks/manager.py index 4bc57036c..a78b918b3 100644 --- a/bot/vikingbot/hooks/manager.py +++ b/bot/vikingbot/hooks/manager.py @@ -1,7 +1,7 @@ import asyncio import importlib from collections import defaultdict -from typing import Any, Dict, List, Type +from typing import List, Any, Dict, Type from loguru import logger diff --git a/bot/vikingbot/openviking_mount/__init__.py b/bot/vikingbot/openviking_mount/__init__.py index 0d19d48fa..cbce294f1 100644 --- a/bot/vikingbot/openviking_mount/__init__.py +++ b/bot/vikingbot/openviking_mount/__init__.py @@ -7,8 +7,8 @@ from typing import TYPE_CHECKING -from .manager import MountPoint, OpenVikingMountManager, get_mount_manager -from .mount import FileInfo, MountConfig, MountScope, OpenVikingMount +from .mount import OpenVikingMount, MountScope, MountConfig, FileInfo +from .manager import OpenVikingMountManager, MountPoint, get_mount_manager from .session_integration import SessionOpenVikingManager, get_session_ov_manager __all__ = [ @@ -28,12 +28,12 @@ ] if TYPE_CHECKING: - from .viking_fuse import FUSE_AVAILABLE, FUSEMountManager, OpenVikingFUSE, mount_fuse + from .viking_fuse import OpenVikingFUSE, mount_fuse, FUSEMountManager, FUSE_AVAILABLE def __getattr__(name: str): if name in ("OpenVikingFUSE", "mount_fuse", "FUSEMountManager", "FUSE_AVAILABLE"): - from .viking_fuse import FUSE_AVAILABLE, FUSEMountManager, OpenVikingFUSE, mount_fuse + from .viking_fuse import OpenVikingFUSE, mount_fuse, FUSEMountManager, FUSE_AVAILABLE return locals()[name] raise AttributeError(f"module {__name__!r} has no attribute {name!r}") diff --git a/bot/vikingbot/openviking_mount/fuse_finder.py b/bot/vikingbot/openviking_mount/fuse_finder.py index a3969b263..06f5043aa 100644 --- a/bot/vikingbot/openviking_mount/fuse_finder.py +++ b/bot/vikingbot/openviking_mount/fuse_finder.py @@ -1,21 +1,21 @@ #!/usr/bin/env python3 from __future__ import annotations -import errno +import sys import os -import shutil import stat -import sys +import errno import tempfile -from datetime import datetime +import shutil from pathlib import Path from typing import Any, Dict +from datetime import datetime sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent)) from loguru import logger -from .mount import MountConfig, OpenVikingMount +from .mount import OpenVikingMount, MountConfig try: from fuse import FUSE, FuseOSError, Operations @@ -420,7 +420,7 @@ def mount_fuse(config: MountConfig, foreground: bool = True) -> None: logger.info(f"Mounting OpenViking FUSE at: {config.mount_point}") logger.info(f" Scope: {config.scope.value}") logger.info(f" Read-only: {config.read_only}") - logger.info(" Press Ctrl+C to unmount") + logger.info(f" Press Ctrl+C to unmount") try: FUSE( diff --git a/bot/vikingbot/openviking_mount/fuse_proxy.py b/bot/vikingbot/openviking_mount/fuse_proxy.py index 9373a9f4e..12b53f7c2 100644 --- a/bot/vikingbot/openviking_mount/fuse_proxy.py +++ b/bot/vikingbot/openviking_mount/fuse_proxy.py @@ -1,21 +1,22 @@ #!/usr/bin/env python3 from __future__ import annotations -import errno +import sys import os -import shutil import stat -import sys +import errno import tempfile -from datetime import datetime +import shutil from pathlib import Path from typing import Any, Dict +from datetime import datetime + sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent)) from loguru import logger -from .mount import MountConfig, OpenVikingMount +from .mount import OpenVikingMount, MountConfig try: from fuse import FUSE, FuseOSError, Operations @@ -72,7 +73,7 @@ def getattr(self, path: str, fh: int = None) -> Dict[str, Any]: "st_mtime": stat_info.st_mtime, "st_ctime": stat_info.st_ctime, } - print("2222222") + print(f"2222222") if path in self._pending_uploads: now = datetime.now().timestamp() return { @@ -299,7 +300,7 @@ def mount_fuse(config: MountConfig, foreground: bool = True) -> None: logger.info(f"Mounting OpenViking FUSE Proxy at: {config.mount_point}") logger.info(f" Proxy to: {config.openviking_data_path / '.original_files'}") - logger.info(" Press Ctrl+C to unmount") + logger.info(f" Press Ctrl+C to unmount") try: FUSE( diff --git a/bot/vikingbot/openviking_mount/fuse_simple.py b/bot/vikingbot/openviking_mount/fuse_simple.py index 661311f2b..d09da61c2 100644 --- a/bot/vikingbot/openviking_mount/fuse_simple.py +++ b/bot/vikingbot/openviking_mount/fuse_simple.py @@ -1,21 +1,21 @@ #!/usr/bin/env python3 from __future__ import annotations -import errno +import sys import os -import shutil import stat -import sys +import errno import tempfile -from datetime import datetime +import shutil from pathlib import Path from typing import Any, Dict +from datetime import datetime sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent)) from loguru import logger -from .mount import MountConfig, OpenVikingMount +from .mount import OpenVikingMount, MountConfig try: from fuse import FUSE, FuseOSError, Operations @@ -396,7 +396,7 @@ def mount_fuse(config: MountConfig, foreground: bool = True) -> None: logger.info(f"Mounting OpenViking FUSE at: {config.mount_point}") logger.info(f" Scope: {config.scope.value}") logger.info(f" Read-only: {config.read_only}") - logger.info(" Press Ctrl+C to unmount") + logger.info(f" Press Ctrl+C to unmount") try: FUSE( diff --git a/bot/vikingbot/openviking_mount/fuse_simple_debug.py b/bot/vikingbot/openviking_mount/fuse_simple_debug.py index afef1eef3..5186529c7 100644 --- a/bot/vikingbot/openviking_mount/fuse_simple_debug.py +++ b/bot/vikingbot/openviking_mount/fuse_simple_debug.py @@ -1,21 +1,21 @@ #!/usr/bin/env python3 from __future__ import annotations -import errno +import sys import os -import shutil import stat -import sys +import errno import tempfile -from datetime import datetime +import shutil from pathlib import Path from typing import Any, Dict +from datetime import datetime sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent)) from loguru import logger -from .mount import MountConfig, OpenVikingMount +from .mount import OpenVikingMount, MountConfig try: from fuse import FUSE, FuseOSError, Operations @@ -166,7 +166,7 @@ def getattr(self, path: str, fh: int = None) -> Dict[str, Any]: except Exception as e: logger.warning(f"getattr error for {path}: {e}") - logger.debug("[FUSE] getattr failed: ENOENT") + logger.debug(f"[FUSE] getattr failed: ENOENT") raise FuseOSError(errno.ENOENT) def readdir(self, path: str, fh: int) -> list: @@ -438,7 +438,7 @@ def mount_fuse(config: MountConfig, foreground: bool = True) -> None: logger.info(f"Mounting OpenViking FUSE at: {config.mount_point}") logger.info(f" Scope: {config.scope.value}") logger.info(f" Read-only: {config.read_only}") - logger.info(" Press Ctrl+C to unmount") + logger.info(f" Press Ctrl+C to unmount") try: FUSE( diff --git a/bot/vikingbot/openviking_mount/manager.py b/bot/vikingbot/openviking_mount/manager.py index 1dafb2b67..0fa4c1f64 100644 --- a/bot/vikingbot/openviking_mount/manager.py +++ b/bot/vikingbot/openviking_mount/manager.py @@ -6,15 +6,15 @@ from __future__ import annotations -from dataclasses import dataclass +import sys from pathlib import Path from typing import Dict, List, Optional +from dataclasses import dataclass, field from loguru import logger -from vikingbot.utils.helpers import get_bot_data_path, get_mounts_path - -from .mount import MountConfig, MountScope, OpenVikingMount +from vikingbot.utils.helpers import get_mounts_path, get_bot_data_path +from .mount import OpenVikingMount, MountConfig, MountScope @dataclass diff --git a/bot/vikingbot/openviking_mount/mount.py b/bot/vikingbot/openviking_mount/mount.py index 308db2f3e..c106c5d2c 100644 --- a/bot/vikingbot/openviking_mount/mount.py +++ b/bot/vikingbot/openviking_mount/mount.py @@ -7,15 +7,15 @@ from __future__ import annotations -from dataclasses import dataclass -from enum import Enum +import sys from pathlib import Path -from typing import List, Optional, Union +from typing import Any, Dict, List, Optional, Union +from dataclasses import dataclass, field +from enum import Enum +import openviking as ov from loguru import logger -import openviking as ov - class MountScope(Enum): """OpenViking挂载作用域""" @@ -371,7 +371,7 @@ def search(self, query: str, target_path: Optional[Union[str, Path]] = None) -> is_dir=False, # 需要根据实际结果判断 ) if hasattr(r, "score"): - file_info.score = r.score + setattr(file_info, "score", r.score) file_infos.append(file_info) return file_infos diff --git a/bot/vikingbot/openviking_mount/session_integration.py b/bot/vikingbot/openviking_mount/session_integration.py index cd5c77e0b..a6ec77a5f 100644 --- a/bot/vikingbot/openviking_mount/session_integration.py +++ b/bot/vikingbot/openviking_mount/session_integration.py @@ -7,17 +7,19 @@ from __future__ import annotations +import sys +import asyncio import shutil from pathlib import Path -from typing import Any, Dict, Optional +from typing import Dict, Optional, Any from loguru import logger from vikingbot.utils.helpers import get_workspace_path # 相对导入同一包内的模块 -from .mount import MountConfig, MountScope, OpenVikingMount -from .viking_fuse import FUSE_AVAILABLE, FUSEMountManager, mount_fuse +from .mount import OpenVikingMount, MountConfig, MountScope +from .viking_fuse import mount_fuse, FUSEMountManager, FUSE_AVAILABLE class SessionOpenVikingManager: @@ -46,7 +48,7 @@ def __init__(self, base_workspace: Optional[Path] = None): # FUSE 挂载管理器(如果可用) self._fuse_manager = FUSEMountManager() if FUSE_AVAILABLE else None - logger.info("SessionOpenVikingManager initialized") + logger.info(f"SessionOpenVikingManager initialized") logger.info(f" Base workspace: {self.base_workspace}") logger.info(f" FUSE available: {FUSE_AVAILABLE}") diff --git a/bot/vikingbot/openviking_mount/user_apikey_manager.py b/bot/vikingbot/openviking_mount/user_apikey_manager.py index 6d5b1cf44..da638d95d 100644 --- a/bot/vikingbot/openviking_mount/user_apikey_manager.py +++ b/bot/vikingbot/openviking_mount/user_apikey_manager.py @@ -1,7 +1,7 @@ """User API Key persistence manager for OpenViking remote mode.""" -import hashlib import json +import hashlib from pathlib import Path from typing import Optional diff --git a/bot/vikingbot/providers/openai_compatible_provider.py b/bot/vikingbot/providers/openai_compatible_provider.py index 568031074..7433d2f3a 100644 --- a/bot/vikingbot/providers/openai_compatible_provider.py +++ b/bot/vikingbot/providers/openai_compatible_provider.py @@ -9,9 +9,8 @@ import json from typing import Any - -from loguru import logger from openai import AsyncOpenAI +from loguru import logger from vikingbot.integrations.langfuse import LangfuseClient from vikingbot.providers.base import LLMProvider, LLMResponse, ToolCallRequest diff --git a/bot/vikingbot/providers/transcription.py b/bot/vikingbot/providers/transcription.py index 19136cbee..936b3e55b 100644 --- a/bot/vikingbot/providers/transcription.py +++ b/bot/vikingbot/providers/transcription.py @@ -2,6 +2,7 @@ import os from pathlib import Path +from typing import Any import httpx from loguru import logger diff --git a/bot/vikingbot/sandbox/__init__.py b/bot/vikingbot/sandbox/__init__.py index d552fe0d0..692e88641 100644 --- a/bot/vikingbot/sandbox/__init__.py +++ b/bot/vikingbot/sandbox/__init__.py @@ -2,10 +2,10 @@ from vikingbot.sandbox.base import ( SandboxBackend, - SandboxDisabledError, SandboxError, - SandboxExecutionError, SandboxNotStartedError, + SandboxDisabledError, + SandboxExecutionError, UnsupportedBackendError, ) from vikingbot.sandbox.manager import SandboxManager diff --git a/bot/vikingbot/sandbox/backends/__init__.py b/bot/vikingbot/sandbox/backends/__init__.py index e3f2b5d0d..a69449b02 100644 --- a/bot/vikingbot/sandbox/backends/__init__.py +++ b/bot/vikingbot/sandbox/backends/__init__.py @@ -1,7 +1,6 @@ """Sandbox backend registry.""" -from typing import TYPE_CHECKING, Callable, Dict, Type - +from typing import TYPE_CHECKING, Type, Callable, Dict from vikingbot.sandbox.base import SandboxBackend _BACKENDS: Dict[str, Type[SandboxBackend]] = {} @@ -29,4 +28,7 @@ def list_backends() -> list[str]: # Import backends to register them (avoid circular import) -from vikingbot.sandbox.backends import aiosandbox, direct, opensandbox, srt +from vikingbot.sandbox.backends import srt +from vikingbot.sandbox.backends import opensandbox +from vikingbot.sandbox.backends import direct +from vikingbot.sandbox.backends import aiosandbox diff --git a/bot/vikingbot/sandbox/backends/direct.py b/bot/vikingbot/sandbox/backends/direct.py index fa2cd29c2..a273d21f1 100644 --- a/bot/vikingbot/sandbox/backends/direct.py +++ b/bot/vikingbot/sandbox/backends/direct.py @@ -1,14 +1,17 @@ """Direct backend implementation - executes commands directly on host without sandboxing.""" import asyncio +import os from pathlib import Path -from typing import Any +from typing import TYPE_CHECKING, Any from loguru import logger -from vikingbot.config.schema import SandboxConfig, SessionKey -from vikingbot.sandbox.backends import register_backend from vikingbot.sandbox.base import SandboxBackend +from vikingbot.sandbox.backends import register_backend + + +from vikingbot.config.schema import SandboxConfig, SessionKey @register_backend("direct") diff --git a/bot/vikingbot/sandbox/backends/srt.py b/bot/vikingbot/sandbox/backends/srt.py index b67d060c0..4f09f6081 100644 --- a/bot/vikingbot/sandbox/backends/srt.py +++ b/bot/vikingbot/sandbox/backends/srt.py @@ -4,13 +4,14 @@ import json import os from pathlib import Path -from typing import Any - +from typing import TYPE_CHECKING, Any from loguru import logger -from vikingbot.config.schema import SessionKey -from vikingbot.sandbox.backends import register_backend from vikingbot.sandbox.base import SandboxBackend, SandboxNotStartedError +from vikingbot.sandbox.backends import register_backend + + +from vikingbot.config.schema import SandboxConfig, SessionKey @register_backend("srt") diff --git a/bot/vikingbot/sandbox/manager.py b/bot/vikingbot/sandbox/manager.py index 81c6fd892..595fd6bcf 100644 --- a/bot/vikingbot/sandbox/manager.py +++ b/bot/vikingbot/sandbox/manager.py @@ -1,10 +1,15 @@ """Sandbox manager for creating and managing sandbox instances.""" +import asyncio from pathlib import Path +from typing import TYPE_CHECKING -from vikingbot.config.schema import Config, SessionKey +from openviking.async_client import logger +from vikingbot.sandbox.base import SandboxBackend, SandboxDisabledError, UnsupportedBackendError from vikingbot.sandbox.backends import get_backend -from vikingbot.sandbox.base import SandboxBackend, UnsupportedBackendError + + +from vikingbot.config.schema import SandboxConfig, SessionKey, Config class SandboxManager: @@ -39,7 +44,7 @@ async def _create_sandbox(self, workspace_id: str) -> SandboxBackend: instance = self._backend_cls(self.config.sandbox, workspace_id, workspace) try: await instance.start() - except Exception: + except Exception as e: import traceback traceback.print_exc() @@ -49,9 +54,9 @@ async def _create_sandbox(self, workspace_id: str) -> SandboxBackend: async def _copy_bootstrap_files(self, sandbox_workspace: Path) -> None: """Copy bootstrap files from source workspace to sandbox workspace.""" - import shutil - from vikingbot.agent.context import ContextBuilder + from vikingbot.agent.skills import BUILTIN_SKILLS_DIR + import shutil # Copy from source workspace init directory (if exists) init_dir = self.source_workspace / ContextBuilder.INIT_DIR diff --git a/bot/vikingbot/session/__init__.py b/bot/vikingbot/session/__init__.py index b70454048..7e889e8c7 100644 --- a/bot/vikingbot/session/__init__.py +++ b/bot/vikingbot/session/__init__.py @@ -1,5 +1,5 @@ """Session management module.""" -from vikingbot.session.manager import Session, SessionManager +from vikingbot.session.manager import SessionManager, Session __all__ = ["SessionManager", "Session"] diff --git a/bot/vikingbot/utils/__init__.py b/bot/vikingbot/utils/__init__.py index fa1e76fc9..265714801 100644 --- a/bot/vikingbot/utils/__init__.py +++ b/bot/vikingbot/utils/__init__.py @@ -2,18 +2,18 @@ from vikingbot.utils.helpers import ( ensure_dir, - get_bot_data_path, - get_bridge_path, + get_workspace_path, get_data_path, + get_bot_data_path, + set_bot_data_path, + get_sessions_path, get_history_path, + get_bridge_path, get_images_path, get_media_path, + get_received_path, get_mochat_path, get_mounts_path, - get_received_path, - get_sessions_path, - get_workspace_path, - set_bot_data_path, ) __all__ = [ diff --git a/bot/vikingbot/utils/helpers.py b/bot/vikingbot/utils/helpers.py index 11cfaad68..17fb7681c 100644 --- a/bot/vikingbot/utils/helpers.py +++ b/bot/vikingbot/utils/helpers.py @@ -1,8 +1,7 @@ """Utility functions for vikingbot.""" -from datetime import datetime from pathlib import Path - +from datetime import datetime from loguru import logger @@ -107,7 +106,6 @@ def get_workspace_path() -> Path: def ensure_workspace_templates(workspace: Path) -> None: import shutil - from vikingbot.agent.skills import BUILTIN_SKILLS_DIR # Ensure workspace directory exists first diff --git a/bot/vikingbot/utils/tracing.py b/bot/vikingbot/utils/tracing.py index d90aa08a5..948c30e9b 100644 --- a/bot/vikingbot/utils/tracing.py +++ b/bot/vikingbot/utils/tracing.py @@ -170,7 +170,7 @@ async def async_wrapper(*args: Any, **kwargs: Any) -> T: return await wrapped_func(*args, **kwargs) else: if not has_propagate: - logger.warning("[LANGFUSE] propagate_attributes not available") + logger.warning(f"[LANGFUSE] propagate_attributes not available") return await wrapped_func(*args, **kwargs) else: return await wrapped_func(*args, **kwargs) diff --git a/bot/workspace/skills/github-proxy/scripts/convert_url.py b/bot/workspace/skills/github-proxy/scripts/convert_url.py index 9db12951e..7821c73f2 100755 --- a/bot/workspace/skills/github-proxy/scripts/convert_url.py +++ b/bot/workspace/skills/github-proxy/scripts/convert_url.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -import re import sys +import re DEFAULT_PROXY = "https://githubproxy.cc" BACKUP_PROXY = "https://ghfast.top" diff --git a/bot/workspace/skills/opencode/list_sessions.py b/bot/workspace/skills/opencode/list_sessions.py index 20494b0c4..ff9740e64 100644 --- a/bot/workspace/skills/opencode/list_sessions.py +++ b/bot/workspace/skills/opencode/list_sessions.py @@ -3,15 +3,14 @@ import json import time - from opencode_ai import Opencode from opencode_utils import ( check_serve_status, execute_cmd, - list_project, read_new_messages, read_status, write_status, + list_project, ) from pydantic import BaseModel diff --git a/bot/workspace/skills/opencode/opencode_utils.py b/bot/workspace/skills/opencode/opencode_utils.py index e1b929527..44f811e74 100644 --- a/bot/workspace/skills/opencode/opencode_utils.py +++ b/bot/workspace/skills/opencode/opencode_utils.py @@ -5,8 +5,8 @@ import os import subprocess import sys -import time import traceback +import time from opencode_ai import Opencode diff --git a/examples/openclaw-plugin/tests/e2e/test-archive-expand.py b/examples/openclaw-plugin/tests/e2e/test-archive-expand.py index b0b1eacfc..bba147b49 100644 --- a/examples/openclaw-plugin/tests/e2e/test-archive-expand.py +++ b/examples/openclaw-plugin/tests/e2e/test-archive-expand.py @@ -866,7 +866,7 @@ def run_full_test( tree.add(f"Phase 2a: 线上排障 — {ok2}/{len(CHAT_BATCH_2)}") tree.add(f"Phase 2b: 代码评审 — {ok3}/{len(CHAT_BATCH_3)}") tree.add(f"Phase 2c: 架构设计 — {ok4}/{len(CHAT_BATCH_4)}") - tree.add("Phase 3: Archive Index 验证") + tree.add(f"Phase 3: Archive Index 验证") expand_ok = sum(1 for r in expand_results if r["success"]) tree.add(f"Phase 4: 归档展开 — {expand_ok}/{len(expand_results)} 问题回答正确") diff --git a/openviking/models/vlm/backends/litellm_vlm.py b/openviking/models/vlm/backends/litellm_vlm.py index b13780f7f..620085709 100644 --- a/openviking/models/vlm/backends/litellm_vlm.py +++ b/openviking/models/vlm/backends/litellm_vlm.py @@ -15,9 +15,12 @@ import litellm from litellm import acompletion, completion + from openviking.telemetry import tracer + from openviking.utils.model_retry import retry_async, retry_sync + from ..base import ToolCall, VLMBase, VLMResponse logger = logging.getLogger(__name__) diff --git a/openviking/models/vlm/backends/openai_vlm.py b/openviking/models/vlm/backends/openai_vlm.py index de7080579..2f1078f1e 100644 --- a/openviking/models/vlm/backends/openai_vlm.py +++ b/openviking/models/vlm/backends/openai_vlm.py @@ -10,6 +10,7 @@ from typing import Any, Dict, List, Optional, Union from urllib.parse import urlparse + from openviking.telemetry import tracer try: diff --git a/openviking/models/vlm/backends/volcengine_vlm.py b/openviking/models/vlm/backends/volcengine_vlm.py index 19551daf6..978bcb339 100644 --- a/openviking/models/vlm/backends/volcengine_vlm.py +++ b/openviking/models/vlm/backends/volcengine_vlm.py @@ -11,7 +11,6 @@ from typing import Any, Dict, List, Optional, Union from openviking.telemetry import tracer - from ..base import ToolCall, VLMResponse from .openai_vlm import OpenAIVLM diff --git a/openviking/parse/parsers/code/ast/languages/php.py b/openviking/parse/parsers/code/ast/languages/php.py index 45ccfa6f1..b1ef23ad3 100644 --- a/openviking/parse/parsers/code/ast/languages/php.py +++ b/openviking/parse/parsers/code/ast/languages/php.py @@ -4,7 +4,7 @@ from __future__ import annotations -from typing import List +from typing import List, Optional from openviking.parse.parsers.code.ast.languages.base import LanguageExtractor from openviking.parse.parsers.code.ast.skeleton import ClassSkeleton, CodeSkeleton, FunctionSig diff --git a/openviking/session/compressor_v2.py b/openviking/session/compressor_v2.py index f1f37a0fc..c81a66028 100644 --- a/openviking/session/compressor_v2.py +++ b/openviking/session/compressor_v2.py @@ -15,9 +15,10 @@ from openviking.session.memory import ExtractLoop, MemoryUpdater from openviking.storage import VikingDBManager from openviking.storage.viking_fs import get_viking_fs -from openviking.telemetry import get_current_telemetry, tracer +from openviking.telemetry import get_current_telemetry from openviking_cli.session.user_id import UserIdentifier from openviking_cli.utils import get_logger +from openviking.telemetry import tracer from openviking_cli.utils.config import get_openviking_config logger = get_logger(__name__) diff --git a/openviking/telemetry/__init__.py b/openviking/telemetry/__init__.py index 7a11dddac..c83e1138b 100644 --- a/openviking/telemetry/__init__.py +++ b/openviking/telemetry/__init__.py @@ -2,12 +2,12 @@ # SPDX-License-Identifier: AGPL-3.0 """OpenViking telemetry runtime and operation telemetry helpers.""" -from . import tracer as tracer_module from .context import bind_telemetry, get_current_telemetry from .operation import OperationTelemetry, TelemetrySnapshot from .registry import register_telemetry, resolve_telemetry, unregister_telemetry from .request import TelemetryRequest, TelemetrySelection, normalize_telemetry_request from .runtime import get_telemetry_runtime, set_telemetry_runtime +from . import tracer as tracer_module from .tracer import tracer __all__ = [ diff --git a/openviking_cli/utils/config/open_viking_config.py b/openviking_cli/utils/config/open_viking_config.py index 3d19afba1..9273a1c72 100644 --- a/openviking_cli/utils/config/open_viking_config.py +++ b/openviking_cli/utils/config/open_viking_config.py @@ -20,6 +20,7 @@ ) from .embedding_config import EmbeddingConfig from .encryption_config import EncryptionConfig +from .telemetry_config import TelemetryConfig from .log_config import LogConfig from .memory_config import MemoryConfig from .parser_config import ( @@ -38,7 +39,6 @@ from .prompts_config import PromptsConfig from .rerank_config import RerankConfig from .storage_config import StorageConfig -from .telemetry_config import TelemetryConfig from .vlm_config import VLMConfig diff --git a/tests/agfs/conftest.py b/tests/agfs/conftest.py index 0b41a614e..20dbe1f35 100644 --- a/tests/agfs/conftest.py +++ b/tests/agfs/conftest.py @@ -6,7 +6,6 @@ import pytest - @pytest.fixture(scope="session") def agfs_test_root(): """Root directory for AGFS tests.""" diff --git a/tests/integration/test_compressor_v2_event_span_multiple_turns.py b/tests/integration/test_compressor_v2_event_span_multiple_turns.py index a52dd7622..fb8e5f786 100644 --- a/tests/integration/test_compressor_v2_event_span_multiple_turns.py +++ b/tests/integration/test_compressor_v2_event_span_multiple_turns.py @@ -135,7 +135,7 @@ def run_ingest(client: ov.SyncHTTPClient, session_id: str, wait_seconds: float): console.print(f" [green]任务 {status},耗时 {elapsed:.2f}s[/green]") console.print(f" Task 详情: {task}") - console.print(" [yellow]等待向量化完成...[/yellow]") + console.print(f" [yellow]等待向量化完成...[/yellow]") client.wait_processed() if wait_seconds > 0: diff --git a/tests/integration/test_compressor_v2_tool_skill_memory.py b/tests/integration/test_compressor_v2_tool_skill_memory.py index 07eb63b18..838512fc1 100644 --- a/tests/integration/test_compressor_v2_tool_skill_memory.py +++ b/tests/integration/test_compressor_v2_tool_skill_memory.py @@ -207,7 +207,7 @@ def run_ingest(client: ov.SyncHTTPClient, session_id: str, wait_seconds: float): console.print(f" [green]任务 {status},耗时 {elapsed:.2f}s[/green]") console.print(f" Task 详情: {task}") - console.print(" [yellow]等待向量化完成...[/yellow]") + console.print(f" [yellow]等待向量化完成...[/yellow]") client.wait_processed() if wait_seconds > 0: @@ -299,7 +299,7 @@ def run_verify(client: ov.SyncHTTPClient): def main(): """入口函数""" - parser = argparse.ArgumentParser(description="OpenViking 记忆演示 — 工具调用和Skill调用") + parser = argparse.ArgumentParser(description=f"OpenViking 记忆演示 — 工具调用和Skill调用") parser.add_argument("--url", default=DEFAULT_URL, help=f"Server URL (默认: {DEFAULT_URL})") parser.add_argument("--api-key", default=DEFAULT_API_KEY, help="API key") parser.add_argument("--agent-id", default=DEFAULT_AGENT_ID, help="Agent ID") @@ -348,4 +348,4 @@ def main(): if __name__ == "__main__": - main() + main() \ No newline at end of file diff --git a/tests/integration/test_compressor_v2_xiaomei.py b/tests/integration/test_compressor_v2_xiaomei.py index a2266f7a6..faf7b128e 100644 --- a/tests/integration/test_compressor_v2_xiaomei.py +++ b/tests/integration/test_compressor_v2_xiaomei.py @@ -160,7 +160,7 @@ def run_ingest(client: ov.SyncHTTPClient, session_id: str, wait_seconds: float): console.print(f" Task 详情: {task}") # 等待向量化队列处理完成 - console.print(" [yellow]等待向量化完成...[/yellow]") + console.print(f" [yellow]等待向量化完成...[/yellow]") client.wait_processed() if wait_seconds > 0: diff --git a/tests/models/vlm/test_volcengine_cache.py b/tests/models/vlm/test_volcengine_cache.py index 31633c897..73defabcf 100644 --- a/tests/models/vlm/test_volcengine_cache.py +++ b/tests/models/vlm/test_volcengine_cache.py @@ -2,10 +2,10 @@ # SPDX-License-Identifier: AGPL-3.0 """Tests for VolcEngineVLM cache logic.""" -from unittest.mock import AsyncMock, MagicMock - import pytest +from unittest.mock import AsyncMock, MagicMock, patch +from openviking.models.vlm.backends.volcengine_vlm import VolcEngineVLM from openviking.models.vlm.backends.volcengine_vlm import VolcEngineVLM as VLMClass @@ -250,4 +250,4 @@ def test_cache_key_includes_prefix(self): key = vlm._get_response_id_cache_key(messages) # Should include prefix in the key - assert "prefix:" in key or key.startswith("prefix:") + assert "prefix:" in key or key.startswith("prefix:") \ No newline at end of file diff --git a/tests/parse/test_html_parser_utils.py b/tests/parse/test_html_parser_utils.py index 651d18e91..ca2373e7b 100644 --- a/tests/parse/test_html_parser_utils.py +++ b/tests/parse/test_html_parser_utils.py @@ -1,3 +1,4 @@ +import pytest from openviking.parse.parsers.html import HTMLParser diff --git a/tests/server/test_bot_proxy_auth.py b/tests/server/test_bot_proxy_auth.py index 291326c5b..bf5b580f0 100644 --- a/tests/server/test_bot_proxy_auth.py +++ b/tests/server/test_bot_proxy_auth.py @@ -3,8 +3,10 @@ """Regression tests for bot proxy endpoint auth enforcement.""" +import httpx import pytest -from fastapi import Request +import pytest_asyncio +from fastapi import FastAPI, Request import openviking.server.routers.bot as bot_router_module diff --git a/tests/storage/test_vectordb_collection_loading.py b/tests/storage/test_vectordb_collection_loading.py index 31e00df9b..c5c9bd9e3 100644 --- a/tests/storage/test_vectordb_collection_loading.py +++ b/tests/storage/test_vectordb_collection_loading.py @@ -1,13 +1,15 @@ -import sys import unittest +import sys +import os # Add open_test path to ensure modules can be found sys.path.insert(0, "/cloudide/workspace/open_test") -from openviking.storage.vectordb.collection.vikingdb_collection import VikingDBCollection from openviking.storage.vectordb.project.vikingdb_project import ( get_or_create_vikingdb_project, + VikingDBProject, ) +from openviking.storage.vectordb.collection.vikingdb_collection import VikingDBCollection class TestDynamicLoading(unittest.TestCase): diff --git a/tests/unit/test_cohere_rerank.py b/tests/unit/test_cohere_rerank.py index 89d2a7cf7..b06fd2aa7 100644 --- a/tests/unit/test_cohere_rerank.py +++ b/tests/unit/test_cohere_rerank.py @@ -4,6 +4,8 @@ from unittest.mock import MagicMock, patch +import pytest + from openviking_cli.utils.cohere_rerank import CohereRerankClient diff --git a/uv.lock b/uv.lock index 0add27d5d..4b6d38957 100644 --- a/uv.lock +++ b/uv.lock @@ -1550,6 +1550,7 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/38/3f/9859f655d11901e7b2996c6e3d33e0caa9a1d4572c3bc61ed0faa64b2f4c/greenlet-3.3.2-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:9bc885b89709d901859cf95179ec9f6bb67a3d2bb1f0e88456461bd4b7f8fd0d", size = 277747, upload-time = "2026-02-20T20:16:21.325Z" }, { url = "https://files.pythonhosted.org/packages/fb/07/cb284a8b5c6498dbd7cba35d31380bb123d7dceaa7907f606c8ff5993cbf/greenlet-3.3.2-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b568183cf65b94919be4438dc28416b234b678c608cafac8874dfeeb2a9bbe13", size = 579202, upload-time = "2026-02-20T20:47:28.955Z" }, { url = "https://files.pythonhosted.org/packages/ed/45/67922992b3a152f726163b19f890a85129a992f39607a2a53155de3448b8/greenlet-3.3.2-cp310-cp310-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:527fec58dc9f90efd594b9b700662ed3fb2493c2122067ac9c740d98080a620e", size = 590620, upload-time = "2026-02-20T20:55:55.581Z" }, + { url = "https://files.pythonhosted.org/packages/03/5f/6e2a7d80c353587751ef3d44bb947f0565ec008a2e0927821c007e96d3a7/greenlet-3.3.2-cp310-cp310-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:508c7f01f1791fbc8e011bd508f6794cb95397fdb198a46cb6635eb5b78d85a7", size = 602132, upload-time = "2026-02-20T21:02:43.261Z" }, { url = "https://files.pythonhosted.org/packages/ad/55/9f1ebb5a825215fadcc0f7d5073f6e79e3007e3282b14b22d6aba7ca6cb8/greenlet-3.3.2-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ad0c8917dd42a819fe77e6bdfcb84e3379c0de956469301d9fd36427a1ca501f", size = 591729, upload-time = "2026-02-20T20:20:58.395Z" }, { url = "https://files.pythonhosted.org/packages/24/b4/21f5455773d37f94b866eb3cf5caed88d6cea6dd2c6e1f9c34f463cba3ec/greenlet-3.3.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:97245cc10e5515dbc8c3104b2928f7f02b6813002770cfaffaf9a6e0fc2b94ef", size = 1551946, upload-time = "2026-02-20T20:49:31.102Z" }, { url = "https://files.pythonhosted.org/packages/00/68/91f061a926abead128fe1a87f0b453ccf07368666bd59ffa46016627a930/greenlet-3.3.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:8c1fdd7d1b309ff0da81d60a9688a8bd044ac4e18b250320a96fc68d31c209ca", size = 1618494, upload-time = "2026-02-20T20:21:06.541Z" }, @@ -1557,6 +1558,7 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f3/47/16400cb42d18d7a6bb46f0626852c1718612e35dcb0dffa16bbaffdf5dd2/greenlet-3.3.2-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:c56692189a7d1c7606cb794be0a8381470d95c57ce5be03fb3d0ef57c7853b86", size = 278890, upload-time = "2026-02-20T20:19:39.263Z" }, { url = "https://files.pythonhosted.org/packages/a3/90/42762b77a5b6aa96cd8c0e80612663d39211e8ae8a6cd47c7f1249a66262/greenlet-3.3.2-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1ebd458fa8285960f382841da585e02201b53a5ec2bac6b156fc623b5ce4499f", size = 581120, upload-time = "2026-02-20T20:47:30.161Z" }, { url = "https://files.pythonhosted.org/packages/bf/6f/f3d64f4fa0a9c7b5c5b3c810ff1df614540d5aa7d519261b53fba55d4df9/greenlet-3.3.2-cp311-cp311-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a443358b33c4ec7b05b79a7c8b466f5d275025e750298be7340f8fc63dff2a55", size = 594363, upload-time = "2026-02-20T20:55:56.965Z" }, + { url = "https://files.pythonhosted.org/packages/9c/8b/1430a04657735a3f23116c2e0d5eb10220928846e4537a938a41b350bed6/greenlet-3.3.2-cp311-cp311-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:4375a58e49522698d3e70cc0b801c19433021b5c37686f7ce9c65b0d5c8677d2", size = 605046, upload-time = "2026-02-20T21:02:45.234Z" }, { url = "https://files.pythonhosted.org/packages/72/83/3e06a52aca8128bdd4dcd67e932b809e76a96ab8c232a8b025b2850264c5/greenlet-3.3.2-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8e2cd90d413acbf5e77ae41e5d3c9b3ac1d011a756d7284d7f3f2b806bbd6358", size = 594156, upload-time = "2026-02-20T20:20:59.955Z" }, { url = "https://files.pythonhosted.org/packages/70/79/0de5e62b873e08fe3cef7dbe84e5c4bc0e8ed0c7ff131bccb8405cd107c8/greenlet-3.3.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:442b6057453c8cb29b4fb36a2ac689382fc71112273726e2423f7f17dc73bf99", size = 1554649, upload-time = "2026-02-20T20:49:32.293Z" }, { url = "https://files.pythonhosted.org/packages/5a/00/32d30dee8389dc36d42170a9c66217757289e2afb0de59a3565260f38373/greenlet-3.3.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:45abe8eb6339518180d5a7fa47fa01945414d7cca5ecb745346fc6a87d2750be", size = 1619472, upload-time = "2026-02-20T20:21:07.966Z" }, @@ -1565,6 +1567,7 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ea/ab/1608e5a7578e62113506740b88066bf09888322a311cff602105e619bd87/greenlet-3.3.2-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:ac8d61d4343b799d1e526db579833d72f23759c71e07181c2d2944e429eb09cd", size = 280358, upload-time = "2026-02-20T20:17:43.971Z" }, { url = "https://files.pythonhosted.org/packages/a5/23/0eae412a4ade4e6623ff7626e38998cb9b11e9ff1ebacaa021e4e108ec15/greenlet-3.3.2-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3ceec72030dae6ac0c8ed7591b96b70410a8be370b6a477b1dbc072856ad02bd", size = 601217, upload-time = "2026-02-20T20:47:31.462Z" }, { url = "https://files.pythonhosted.org/packages/f8/16/5b1678a9c07098ecb9ab2dd159fafaf12e963293e61ee8d10ecb55273e5e/greenlet-3.3.2-cp312-cp312-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a2a5be83a45ce6188c045bcc44b0ee037d6a518978de9a5d97438548b953a1ac", size = 611792, upload-time = "2026-02-20T20:55:58.423Z" }, + { url = "https://files.pythonhosted.org/packages/5c/c5/cc09412a29e43406eba18d61c70baa936e299bc27e074e2be3806ed29098/greenlet-3.3.2-cp312-cp312-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ae9e21c84035c490506c17002f5c8ab25f980205c3e61ddb3a2a2a2e6c411fcb", size = 626250, upload-time = "2026-02-20T21:02:46.596Z" }, { url = "https://files.pythonhosted.org/packages/50/1f/5155f55bd71cabd03765a4aac9ac446be129895271f73872c36ebd4b04b6/greenlet-3.3.2-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:43e99d1749147ac21dde49b99c9abffcbc1e2d55c67501465ef0930d6e78e070", size = 613875, upload-time = "2026-02-20T20:21:01.102Z" }, { url = "https://files.pythonhosted.org/packages/fc/dd/845f249c3fcd69e32df80cdab059b4be8b766ef5830a3d0aa9d6cad55beb/greenlet-3.3.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:4c956a19350e2c37f2c48b336a3afb4bff120b36076d9d7fb68cb44e05d95b79", size = 1571467, upload-time = "2026-02-20T20:49:33.495Z" }, { url = "https://files.pythonhosted.org/packages/2a/50/2649fe21fcc2b56659a452868e695634722a6655ba245d9f77f5656010bf/greenlet-3.3.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6c6f8ba97d17a1e7d664151284cb3315fc5f8353e75221ed4324f84eb162b395", size = 1640001, upload-time = "2026-02-20T20:21:09.154Z" }, @@ -1573,6 +1576,7 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ac/48/f8b875fa7dea7dd9b33245e37f065af59df6a25af2f9561efa8d822fde51/greenlet-3.3.2-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:aa6ac98bdfd716a749b84d4034486863fd81c3abde9aa3cf8eff9127981a4ae4", size = 279120, upload-time = "2026-02-20T20:19:01.9Z" }, { url = "https://files.pythonhosted.org/packages/49/8d/9771d03e7a8b1ee456511961e1b97a6d77ae1dea4a34a5b98eee706689d3/greenlet-3.3.2-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ab0c7e7901a00bc0a7284907273dc165b32e0d109a6713babd04471327ff7986", size = 603238, upload-time = "2026-02-20T20:47:32.873Z" }, { url = "https://files.pythonhosted.org/packages/59/0e/4223c2bbb63cd5c97f28ffb2a8aee71bdfb30b323c35d409450f51b91e3e/greenlet-3.3.2-cp313-cp313-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:d248d8c23c67d2291ffd47af766e2a3aa9fa1c6703155c099feb11f526c63a92", size = 614219, upload-time = "2026-02-20T20:55:59.817Z" }, + { url = "https://files.pythonhosted.org/packages/94/2b/4d012a69759ac9d77210b8bfb128bc621125f5b20fc398bce3940d036b1c/greenlet-3.3.2-cp313-cp313-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ccd21bb86944ca9be6d967cf7691e658e43417782bce90b5d2faeda0ff78a7dd", size = 628268, upload-time = "2026-02-20T21:02:48.024Z" }, { url = "https://files.pythonhosted.org/packages/7a/34/259b28ea7a2a0c904b11cd36c79b8cef8019b26ee5dbe24e73b469dea347/greenlet-3.3.2-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b6997d360a4e6a4e936c0f9625b1c20416b8a0ea18a8e19cabbefc712e7397ab", size = 616774, upload-time = "2026-02-20T20:21:02.454Z" }, { url = "https://files.pythonhosted.org/packages/0a/03/996c2d1689d486a6e199cb0f1cf9e4aa940c500e01bdf201299d7d61fa69/greenlet-3.3.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:64970c33a50551c7c50491671265d8954046cb6e8e2999aacdd60e439b70418a", size = 1571277, upload-time = "2026-02-20T20:49:34.795Z" }, { url = "https://files.pythonhosted.org/packages/d9/c4/2570fc07f34a39f2caf0bf9f24b0a1a0a47bc2e8e465b2c2424821389dfc/greenlet-3.3.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:1a9172f5bf6bd88e6ba5a84e0a68afeac9dc7b6b412b245dd64f52d83c81e55b", size = 1640455, upload-time = "2026-02-20T20:21:10.261Z" }, @@ -1581,6 +1585,7 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/3f/ae/8bffcbd373b57a5992cd077cbe8858fff39110480a9d50697091faea6f39/greenlet-3.3.2-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:8d1658d7291f9859beed69a776c10822a0a799bc4bfe1bd4272bb60e62507dab", size = 279650, upload-time = "2026-02-20T20:18:00.783Z" }, { url = "https://files.pythonhosted.org/packages/d1/c0/45f93f348fa49abf32ac8439938726c480bd96b2a3c6f4d949ec0124b69f/greenlet-3.3.2-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:18cb1b7337bca281915b3c5d5ae19f4e76d35e1df80f4ad3c1a7be91fadf1082", size = 650295, upload-time = "2026-02-20T20:47:34.036Z" }, { url = "https://files.pythonhosted.org/packages/b3/de/dd7589b3f2b8372069ab3e4763ea5329940fc7ad9dcd3e272a37516d7c9b/greenlet-3.3.2-cp314-cp314-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c2e47408e8ce1c6f1ceea0dffcdf6ebb85cc09e55c7af407c99f1112016e45e9", size = 662163, upload-time = "2026-02-20T20:56:01.295Z" }, + { url = "https://files.pythonhosted.org/packages/cd/ac/85804f74f1ccea31ba518dcc8ee6f14c79f73fe36fa1beba38930806df09/greenlet-3.3.2-cp314-cp314-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:e3cb43ce200f59483eb82949bf1835a99cf43d7571e900d7c8d5c62cdf25d2f9", size = 675371, upload-time = "2026-02-20T21:02:49.664Z" }, { url = "https://files.pythonhosted.org/packages/d2/d8/09bfa816572a4d83bccd6750df1926f79158b1c36c5f73786e26dbe4ee38/greenlet-3.3.2-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:63d10328839d1973e5ba35e98cccbca71b232b14051fd957b6f8b6e8e80d0506", size = 664160, upload-time = "2026-02-20T20:21:04.015Z" }, { url = "https://files.pythonhosted.org/packages/48/cf/56832f0c8255d27f6c35d41b5ec91168d74ec721d85f01a12131eec6b93c/greenlet-3.3.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:8e4ab3cfb02993c8cc248ea73d7dae6cec0253e9afa311c9b37e603ca9fad2ce", size = 1619181, upload-time = "2026-02-20T20:49:36.052Z" }, { url = "https://files.pythonhosted.org/packages/0a/23/b90b60a4aabb4cec0796e55f25ffbfb579a907c3898cd2905c8918acaa16/greenlet-3.3.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:94ad81f0fd3c0c0681a018a976e5c2bd2ca2d9d94895f23e7bb1af4e8af4e2d5", size = 1687713, upload-time = "2026-02-20T20:21:11.684Z" }, @@ -1589,6 +1594,7 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/98/6d/8f2ef704e614bcf58ed43cfb8d87afa1c285e98194ab2cfad351bf04f81e/greenlet-3.3.2-cp314-cp314t-macosx_11_0_universal2.whl", hash = "sha256:e26e72bec7ab387ac80caa7496e0f908ff954f31065b0ffc1f8ecb1338b11b54", size = 286617, upload-time = "2026-02-20T20:19:29.856Z" }, { url = "https://files.pythonhosted.org/packages/5e/0d/93894161d307c6ea237a43988f27eba0947b360b99ac5239ad3fe09f0b47/greenlet-3.3.2-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8b466dff7a4ffda6ca975979bab80bdadde979e29fc947ac3be4451428d8b0e4", size = 655189, upload-time = "2026-02-20T20:47:35.742Z" }, { url = "https://files.pythonhosted.org/packages/f5/2c/d2d506ebd8abcb57386ec4f7ba20f4030cbe56eae541bc6fd6ef399c0b41/greenlet-3.3.2-cp314-cp314t-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b8bddc5b73c9720bea487b3bffdb1840fe4e3656fba3bd40aa1489e9f37877ff", size = 658225, upload-time = "2026-02-20T20:56:02.527Z" }, + { url = "https://files.pythonhosted.org/packages/d1/67/8197b7e7e602150938049d8e7f30de1660cfb87e4c8ee349b42b67bdb2e1/greenlet-3.3.2-cp314-cp314t-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:59b3e2c40f6706b05a9cd299c836c6aa2378cabe25d021acd80f13abf81181cf", size = 666581, upload-time = "2026-02-20T21:02:51.526Z" }, { url = "https://files.pythonhosted.org/packages/8e/30/3a09155fbf728673a1dea713572d2d31159f824a37c22da82127056c44e4/greenlet-3.3.2-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b26b0f4428b871a751968285a1ac9648944cea09807177ac639b030bddebcea4", size = 657907, upload-time = "2026-02-20T20:21:05.259Z" }, { url = "https://files.pythonhosted.org/packages/f3/fd/d05a4b7acd0154ed758797f0a43b4c0962a843bedfe980115e842c5b2d08/greenlet-3.3.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:1fb39a11ee2e4d94be9a76671482be9398560955c9e568550de0224e41104727", size = 1618857, upload-time = "2026-02-20T20:49:37.309Z" }, { url = "https://files.pythonhosted.org/packages/6f/e1/50ee92a5db521de8f35075b5eff060dd43d39ebd46c2181a2042f7070385/greenlet-3.3.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:20154044d9085151bc309e7689d6f7ba10027f8f5a8c0676ad398b951913d89e", size = 1680010, upload-time = "2026-02-20T20:21:13.427Z" }, @@ -1604,67 +1610,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/28/27/3d6dcadc8a3214d8522c1e7f6a19554e33659be44546d44a2f7572ac7d2a/groovy-0.1.2-py3-none-any.whl", hash = "sha256:7f7975bab18c729a257a8b1ae9dcd70b7cafb1720481beae47719af57c35fa64", size = 14090, upload-time = "2025-02-28T20:24:55.152Z" }, ] -[[package]] -name = "grpcio" -version = "1.80.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "typing-extensions" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/b7/48/af6173dbca4454f4637a4678b67f52ca7e0c1ed7d5894d89d434fecede05/grpcio-1.80.0.tar.gz", hash = "sha256:29aca15edd0688c22ba01d7cc01cb000d72b2033f4a3c72a81a19b56fd143257", size = 12978905, upload-time = "2026-03-30T08:49:10.502Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/9d/cd/bb7b7e54084a344c03d68144450da7ddd5564e51a298ae1662de65f48e2d/grpcio-1.80.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:886457a7768e408cdce226ad1ca67d2958917d306523a0e21e1a2fdaa75c9c9c", size = 6050363, upload-time = "2026-03-30T08:46:20.894Z" }, - { url = "https://files.pythonhosted.org/packages/16/02/1417f5c3460dea65f7a2e3c14e8b31e77f7ffb730e9bfadd89eda7a9f477/grpcio-1.80.0-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:7b641fc3f1dc647bfd80bd713addc68f6d145956f64677e56d9ebafc0bd72388", size = 12026037, upload-time = "2026-03-30T08:46:25.144Z" }, - { url = "https://files.pythonhosted.org/packages/43/98/c910254eedf2cae368d78336a2de0678e66a7317d27c02522392f949b5c6/grpcio-1.80.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:33eb763f18f006dc7fee1e69831d38d23f5eccd15b2e0f92a13ee1d9242e5e02", size = 6602306, upload-time = "2026-03-30T08:46:27.593Z" }, - { url = "https://files.pythonhosted.org/packages/7c/f8/88ca4e78c077b2b2113d95da1e1ab43efd43d723c9a0397d26529c2c1a56/grpcio-1.80.0-cp310-cp310-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:52d143637e3872633fc7dd7c3c6a1c84e396b359f3a72e215f8bf69fd82084fc", size = 7301535, upload-time = "2026-03-30T08:46:29.556Z" }, - { url = "https://files.pythonhosted.org/packages/f9/96/f28660fe2fe0f153288bf4a04e4910b7309d442395135c88ed4f5b3b8b40/grpcio-1.80.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c51bf8ac4575af2e0678bccfb07e47321fc7acb5049b4482832c5c195e04e13a", size = 6808669, upload-time = "2026-03-30T08:46:31.984Z" }, - { url = "https://files.pythonhosted.org/packages/47/eb/3f68a5e955779c00aeef23850e019c1c1d0e032d90633ba49c01ad5a96e0/grpcio-1.80.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:50a9871536d71c4fba24ee856abc03a87764570f0c457dd8db0b4018f379fed9", size = 7409489, upload-time = "2026-03-30T08:46:34.684Z" }, - { url = "https://files.pythonhosted.org/packages/5b/a7/d2f681a4bfb881be40659a309771f3bdfbfdb1190619442816c3f0ffc079/grpcio-1.80.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:a72d84ad0514db063e21887fbacd1fd7acb4d494a564cae22227cd45c7fbf199", size = 8423167, upload-time = "2026-03-30T08:46:36.833Z" }, - { url = "https://files.pythonhosted.org/packages/97/8a/29b4589c204959aa35ce5708400a05bba72181807c45c47b3ec000c39333/grpcio-1.80.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:f7691a6788ad9196872f95716df5bc643ebba13c97140b7a5ee5c8e75d1dea81", size = 7846761, upload-time = "2026-03-30T08:46:40.091Z" }, - { url = "https://files.pythonhosted.org/packages/6b/d2/ed143e097230ee121ac5848f6ff14372dba91289b10b536d54fb1b7cbae7/grpcio-1.80.0-cp310-cp310-win32.whl", hash = "sha256:46c2390b59d67f84e882694d489f5b45707c657832d7934859ceb8c33f467069", size = 4156534, upload-time = "2026-03-30T08:46:42.026Z" }, - { url = "https://files.pythonhosted.org/packages/d5/c9/df8279bb49b29409995e95efa85b72973d62f8aeff89abee58c91f393710/grpcio-1.80.0-cp310-cp310-win_amd64.whl", hash = "sha256:dc053420fc75749c961e2a4c906398d7c15725d36ccc04ae6d16093167223b58", size = 4889869, upload-time = "2026-03-30T08:46:44.219Z" }, - { url = "https://files.pythonhosted.org/packages/5d/db/1d56e5f5823257b291962d6c0ce106146c6447f405b60b234c4f222a7cde/grpcio-1.80.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:dfab85db094068ff42e2a3563f60ab3dddcc9d6488a35abf0132daec13209c8a", size = 6055009, upload-time = "2026-03-30T08:46:46.265Z" }, - { url = "https://files.pythonhosted.org/packages/6e/18/c83f3cad64c5ca63bca7e91e5e46b0d026afc5af9d0a9972472ceba294b3/grpcio-1.80.0-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:5c07e82e822e1161354e32da2662f741a4944ea955f9f580ec8fb409dd6f6060", size = 12035295, upload-time = "2026-03-30T08:46:49.099Z" }, - { url = "https://files.pythonhosted.org/packages/0f/8e/e14966b435be2dda99fbe89db9525ea436edc79780431a1c2875a3582644/grpcio-1.80.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ba0915d51fd4ced2db5ff719f84e270afe0e2d4c45a7bdb1e8d036e4502928c2", size = 6610297, upload-time = "2026-03-30T08:46:52.123Z" }, - { url = "https://files.pythonhosted.org/packages/cc/26/d5eb38f42ce0e3fdc8174ea4d52036ef8d58cc4426cb800f2610f625dd75/grpcio-1.80.0-cp311-cp311-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:3cb8130ba457d2aa09fa6b7c3ed6b6e4e6a2685fce63cb803d479576c4d80e21", size = 7300208, upload-time = "2026-03-30T08:46:54.859Z" }, - { url = "https://files.pythonhosted.org/packages/25/51/bd267c989f85a17a5b3eea65a6feb4ff672af41ca614e5a0279cc0ea381c/grpcio-1.80.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:09e5e478b3d14afd23f12e49e8b44c8684ac3c5f08561c43a5b9691c54d136ab", size = 6813442, upload-time = "2026-03-30T08:46:57.056Z" }, - { url = "https://files.pythonhosted.org/packages/9e/d9/d80eef735b19e9169e30164bbf889b46f9df9127598a83d174eb13a48b26/grpcio-1.80.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:00168469238b022500e486c1c33916acf2f2a9b2c022202cf8a1885d2e3073c1", size = 7414743, upload-time = "2026-03-30T08:46:59.682Z" }, - { url = "https://files.pythonhosted.org/packages/de/f2/567f5bd5054398ed6b0509b9a30900376dcf2786bd936812098808b49d8d/grpcio-1.80.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:8502122a3cc1714038e39a0b071acb1207ca7844208d5ea0d091317555ee7106", size = 8426046, upload-time = "2026-03-30T08:47:02.474Z" }, - { url = "https://files.pythonhosted.org/packages/62/29/73ef0141b4732ff5eacd68430ff2512a65c004696997f70476a83e548e7e/grpcio-1.80.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ce1794f4ea6cc3ca29463f42d665c32ba1b964b48958a66497917fe9069f26e6", size = 7851641, upload-time = "2026-03-30T08:47:05.462Z" }, - { url = "https://files.pythonhosted.org/packages/46/69/abbfa360eb229a8623bab5f5a4f8105e445bd38ce81a89514ba55d281ad0/grpcio-1.80.0-cp311-cp311-win32.whl", hash = "sha256:51b4a7189b0bef2aa30adce3c78f09c83526cf3dddb24c6a96555e3b97340440", size = 4154368, upload-time = "2026-03-30T08:47:08.027Z" }, - { url = "https://files.pythonhosted.org/packages/6f/d4/ae92206d01183b08613e846076115f5ac5991bae358d2a749fa864da5699/grpcio-1.80.0-cp311-cp311-win_amd64.whl", hash = "sha256:02e64bb0bb2da14d947a49e6f120a75e947250aebe65f9629b62bb1f5c14e6e9", size = 4894235, upload-time = "2026-03-30T08:47:10.839Z" }, - { url = "https://files.pythonhosted.org/packages/5c/e8/a2b749265eb3415abc94f2e619bbd9e9707bebdda787e61c593004ec927a/grpcio-1.80.0-cp312-cp312-linux_armv7l.whl", hash = "sha256:c624cc9f1008361014378c9d776de7182b11fe8b2e5a81bc69f23a295f2a1ad0", size = 6015616, upload-time = "2026-03-30T08:47:13.428Z" }, - { url = "https://files.pythonhosted.org/packages/3e/97/b1282161a15d699d1e90c360df18d19165a045ce1c343c7f313f5e8a0b77/grpcio-1.80.0-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:f49eddcac43c3bf350c0385366a58f36bed8cc2c0ec35ef7b74b49e56552c0c2", size = 12014204, upload-time = "2026-03-30T08:47:15.873Z" }, - { url = "https://files.pythonhosted.org/packages/6e/5e/d319c6e997b50c155ac5a8cb12f5173d5b42677510e886d250d50264949d/grpcio-1.80.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d334591df610ab94714048e0d5b4f3dd5ad1bee74dfec11eee344220077a79de", size = 6563866, upload-time = "2026-03-30T08:47:18.588Z" }, - { url = "https://files.pythonhosted.org/packages/ae/f6/fdd975a2cb4d78eb67769a7b3b3830970bfa2e919f1decf724ae4445f42c/grpcio-1.80.0-cp312-cp312-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:0cb517eb1d0d0aaf1d87af7cc5b801d686557c1d88b2619f5e31fab3c2315921", size = 7273060, upload-time = "2026-03-30T08:47:21.113Z" }, - { url = "https://files.pythonhosted.org/packages/db/f0/a3deb5feba60d9538a962913e37bd2e69a195f1c3376a3dd44fe0427e996/grpcio-1.80.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4e78c4ac0d97dc2e569b2f4bcbbb447491167cb358d1a389fc4af71ab6f70411", size = 6782121, upload-time = "2026-03-30T08:47:23.827Z" }, - { url = "https://files.pythonhosted.org/packages/ca/84/36c6dcfddc093e108141f757c407902a05085e0c328007cb090d56646cdf/grpcio-1.80.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2ed770b4c06984f3b47eb0517b1c69ad0b84ef3f40128f51448433be904634cd", size = 7383811, upload-time = "2026-03-30T08:47:26.517Z" }, - { url = "https://files.pythonhosted.org/packages/7c/ef/f3a77e3dc5b471a0ec86c564c98d6adfa3510d38f8ee99010410858d591e/grpcio-1.80.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:256507e2f524092f1473071a05e65a5b10d84b82e3ff24c5b571513cfaa61e2f", size = 8393860, upload-time = "2026-03-30T08:47:29.439Z" }, - { url = "https://files.pythonhosted.org/packages/9b/8d/9d4d27ed7f33d109c50d6b5ce578a9914aa68edab75d65869a17e630a8d1/grpcio-1.80.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:9a6284a5d907c37db53350645567c522be314bac859a64a7a5ca63b77bb7958f", size = 7830132, upload-time = "2026-03-30T08:47:33.254Z" }, - { url = "https://files.pythonhosted.org/packages/14/e4/9990b41c6d7a44e1e9dee8ac11d7a9802ba1378b40d77468a7761d1ad288/grpcio-1.80.0-cp312-cp312-win32.whl", hash = "sha256:c71309cfce2f22be26aa4a847357c502db6c621f1a49825ae98aa0907595b193", size = 4140904, upload-time = "2026-03-30T08:47:35.319Z" }, - { url = "https://files.pythonhosted.org/packages/2f/2c/296f6138caca1f4b92a31ace4ae1b87dab692fc16a7a3417af3bb3c805bf/grpcio-1.80.0-cp312-cp312-win_amd64.whl", hash = "sha256:9fe648599c0e37594c4809d81a9e77bd138cc82eb8baa71b6a86af65426723ff", size = 4880944, upload-time = "2026-03-30T08:47:37.831Z" }, - { url = "https://files.pythonhosted.org/packages/2f/3a/7c3c25789e3f069e581dc342e03613c5b1cb012c4e8c7d9d5cf960a75856/grpcio-1.80.0-cp313-cp313-linux_armv7l.whl", hash = "sha256:e9e408fc016dffd20661f0126c53d8a31c2821b5c13c5d67a0f5ed5de93319ad", size = 6017243, upload-time = "2026-03-30T08:47:40.075Z" }, - { url = "https://files.pythonhosted.org/packages/04/19/21a9806eb8240e174fd1ab0cd5b9aa948bb0e05c2f2f55f9d5d7405e6d08/grpcio-1.80.0-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:92d787312e613754d4d8b9ca6d3297e69994a7912a32fa38c4c4e01c272974b0", size = 12010840, upload-time = "2026-03-30T08:47:43.11Z" }, - { url = "https://files.pythonhosted.org/packages/18/3a/23347d35f76f639e807fb7a36fad3068aed100996849a33809591f26eca6/grpcio-1.80.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8ac393b58aa16991a2f1144ec578084d544038c12242da3a215966b512904d0f", size = 6567644, upload-time = "2026-03-30T08:47:46.806Z" }, - { url = "https://files.pythonhosted.org/packages/ff/40/96e07ecb604a6a67ae6ab151e3e35b132875d98bc68ec65f3e5ab3e781d7/grpcio-1.80.0-cp313-cp313-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:68e5851ac4b9afe07e7f84483803ad167852570d65326b34d54ca560bfa53fb6", size = 7277830, upload-time = "2026-03-30T08:47:49.643Z" }, - { url = "https://files.pythonhosted.org/packages/9b/e2/da1506ecea1f34a5e365964644b35edef53803052b763ca214ba3870c856/grpcio-1.80.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:873ff5d17d68992ef6605330127425d2fc4e77e612fa3c3e0ed4e668685e3140", size = 6783216, upload-time = "2026-03-30T08:47:52.817Z" }, - { url = "https://files.pythonhosted.org/packages/44/83/3b20ff58d0c3b7f6caaa3af9a4174d4023701df40a3f39f7f1c8e7c48f9d/grpcio-1.80.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:2bea16af2750fd0a899bf1abd9022244418b55d1f37da2202249ba4ba673838d", size = 7385866, upload-time = "2026-03-30T08:47:55.687Z" }, - { url = "https://files.pythonhosted.org/packages/47/45/55c507599c5520416de5eefecc927d6a0d7af55e91cfffb2e410607e5744/grpcio-1.80.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:ba0db34f7e1d803a878284cd70e4c63cb6ae2510ba51937bf8f45ba997cefcf7", size = 8391602, upload-time = "2026-03-30T08:47:58.303Z" }, - { url = "https://files.pythonhosted.org/packages/10/bb/dd06f4c24c01db9cf11341b547d0a016b2c90ed7dbbb086a5710df7dd1d7/grpcio-1.80.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8eb613f02d34721f1acf3626dfdb3545bd3c8505b0e52bf8b5710a28d02e8aa7", size = 7826752, upload-time = "2026-03-30T08:48:01.311Z" }, - { url = "https://files.pythonhosted.org/packages/f9/1e/9d67992ba23371fd63d4527096eb8c6b76d74d52b500df992a3343fd7251/grpcio-1.80.0-cp313-cp313-win32.whl", hash = "sha256:93b6f823810720912fd131f561f91f5fed0fda372b6b7028a2681b8194d5d294", size = 4142310, upload-time = "2026-03-30T08:48:04.594Z" }, - { url = "https://files.pythonhosted.org/packages/cf/e6/283326a27da9e2c3038bc93eeea36fb118ce0b2d03922a9cda6688f53c5b/grpcio-1.80.0-cp313-cp313-win_amd64.whl", hash = "sha256:e172cf795a3ba5246d3529e4d34c53db70e888fa582a8ffebd2e6e48bc0cba50", size = 4882833, upload-time = "2026-03-30T08:48:07.363Z" }, - { url = "https://files.pythonhosted.org/packages/c5/6d/e65307ce20f5a09244ba9e9d8476e99fb039de7154f37fb85f26978b59c3/grpcio-1.80.0-cp314-cp314-linux_armv7l.whl", hash = "sha256:3d4147a97c8344d065d01bbf8b6acec2cf86fb0400d40696c8bdad34a64ffc0e", size = 6017376, upload-time = "2026-03-30T08:48:10.005Z" }, - { url = "https://files.pythonhosted.org/packages/69/10/9cef5d9650c72625a699c549940f0abb3c4bfdb5ed45a5ce431f92f31806/grpcio-1.80.0-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:d8e11f167935b3eb089ac9038e1a063e6d7dbe995c0bb4a661e614583352e76f", size = 12018133, upload-time = "2026-03-30T08:48:12.927Z" }, - { url = "https://files.pythonhosted.org/packages/04/82/983aabaad82ba26113caceeb9091706a0696b25da004fe3defb5b346e15b/grpcio-1.80.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f14b618fc30de822681ee986cfdcc2d9327229dc4c98aed16896761cacd468b9", size = 6574748, upload-time = "2026-03-30T08:48:16.386Z" }, - { url = "https://files.pythonhosted.org/packages/07/d7/031666ef155aa0bf399ed7e19439656c38bbd143779ae0861b038ce82abd/grpcio-1.80.0-cp314-cp314-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:4ed39fbdcf9b87370f6e8df4e39ca7b38b3e5e9d1b0013c7b6be9639d6578d14", size = 7277711, upload-time = "2026-03-30T08:48:19.627Z" }, - { url = "https://files.pythonhosted.org/packages/e8/43/f437a78f7f4f1d311804189e8f11fb311a01049b2e08557c1068d470cb2e/grpcio-1.80.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2dcc70e9f0ba987526e8e8603a610fb4f460e42899e74e7a518bf3c68fe1bf05", size = 6785372, upload-time = "2026-03-30T08:48:22.373Z" }, - { url = "https://files.pythonhosted.org/packages/93/3d/f6558e9c6296cb4227faa5c43c54a34c68d32654b829f53288313d16a86e/grpcio-1.80.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:448c884b668b868562b1bda833c5fce6272d26e1926ec46747cda05741d302c1", size = 7395268, upload-time = "2026-03-30T08:48:25.638Z" }, - { url = "https://files.pythonhosted.org/packages/06/21/0fdd77e84720b08843c371a2efa6f2e19dbebf56adc72df73d891f5506f0/grpcio-1.80.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:a1dc80fe55685b4a543555e6eef975303b36c8db1023b1599b094b92aa77965f", size = 8392000, upload-time = "2026-03-30T08:48:28.974Z" }, - { url = "https://files.pythonhosted.org/packages/f5/68/67f4947ed55d2e69f2cc199ab9fd85e0a0034d813bbeef84df6d2ba4d4b7/grpcio-1.80.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:31b9ac4ad1aa28ffee5503821fafd09e4da0a261ce1c1281c6c8da0423c83b6e", size = 7828477, upload-time = "2026-03-30T08:48:32.054Z" }, - { url = "https://files.pythonhosted.org/packages/44/b6/8d4096691b2e385e8271911a0de4f35f0a6c7d05aff7098e296c3de86939/grpcio-1.80.0-cp314-cp314-win32.whl", hash = "sha256:367ce30ba67d05e0592470428f0ec1c31714cab9ef19b8f2e37be1f4c7d32fae", size = 4218563, upload-time = "2026-03-30T08:48:34.538Z" }, - { url = "https://files.pythonhosted.org/packages/e5/8c/bbe6baf2557262834f2070cf668515fa308b2d38a4bbf771f8f7872a7036/grpcio-1.80.0-cp314-cp314-win_amd64.whl", hash = "sha256:3b01e1f5464c583d2f567b2e46ff0d516ef979978f72091fd81f5ab7fa6e2e7f", size = 5019457, upload-time = "2026-03-30T08:48:37.308Z" }, -] - [[package]] name = "h11" version = "0.16.0" @@ -3345,24 +3290,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8b/ca/8f122055c97a932311a3f640273f084e738008933503d0c2563cd5d591fc/opentelemetry_exporter_otlp_proto_common-1.40.0-py3-none-any.whl", hash = "sha256:7081ff453835a82417bf38dccf122c827c3cbc94f2079b03bba02a3165f25149", size = 18369, upload-time = "2026-03-04T14:17:04.796Z" }, ] -[[package]] -name = "opentelemetry-exporter-otlp-proto-grpc" -version = "1.40.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "googleapis-common-protos" }, - { name = "grpcio" }, - { name = "opentelemetry-api" }, - { name = "opentelemetry-exporter-otlp-proto-common" }, - { name = "opentelemetry-proto" }, - { name = "opentelemetry-sdk" }, - { name = "typing-extensions" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/8f/7f/b9e60435cfcc7590fa87436edad6822240dddbc184643a2a005301cc31f4/opentelemetry_exporter_otlp_proto_grpc-1.40.0.tar.gz", hash = "sha256:bd4015183e40b635b3dab8da528b27161ba83bf4ef545776b196f0fb4ec47740", size = 25759, upload-time = "2026-03-04T14:17:24.4Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/96/6f/7ee0980afcbdcd2d40362da16f7f9796bd083bf7f0b8e038abfbc0300f5d/opentelemetry_exporter_otlp_proto_grpc-1.40.0-py3-none-any.whl", hash = "sha256:2aa0ca53483fe0cf6405087a7491472b70335bc5c7944378a0a8e72e86995c52", size = 20304, upload-time = "2026-03-04T14:17:05.942Z" }, -] - [[package]] name = "opentelemetry-exporter-otlp-proto-http" version = "1.40.0" @@ -3381,36 +3308,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a0/3a/8865d6754e61c9fb170cdd530a124a53769ee5f740236064816eb0ca7301/opentelemetry_exporter_otlp_proto_http-1.40.0-py3-none-any.whl", hash = "sha256:a8d1dab28f504c5d96577d6509f80a8150e44e8f45f82cdbe0e34c99ab040069", size = 19960, upload-time = "2026-03-04T14:17:07.153Z" }, ] -[[package]] -name = "opentelemetry-instrumentation" -version = "0.61b0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "opentelemetry-api" }, - { name = "opentelemetry-semantic-conventions" }, - { name = "packaging" }, - { name = "wrapt" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/da/37/6bf8e66bfcee5d3c6515b79cb2ee9ad05fe573c20f7ceb288d0e7eeec28c/opentelemetry_instrumentation-0.61b0.tar.gz", hash = "sha256:cb21b48db738c9de196eba6b805b4ff9de3b7f187e4bbf9a466fa170514f1fc7", size = 32606, upload-time = "2026-03-04T14:20:16.825Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/d8/3e/f6f10f178b6316de67f0dfdbbb699a24fbe8917cf1743c1595fb9dcdd461/opentelemetry_instrumentation-0.61b0-py3-none-any.whl", hash = "sha256:92a93a280e69788e8f88391247cc530fd81f16f2b011979d4d6398f805cfbc63", size = 33448, upload-time = "2026-03-04T14:19:02.447Z" }, -] - -[[package]] -name = "opentelemetry-instrumentation-asyncio" -version = "0.61b0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "opentelemetry-api" }, - { name = "opentelemetry-instrumentation" }, - { name = "opentelemetry-semantic-conventions" }, - { name = "wrapt" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/48/06/f14eacf4fde6892402a4fe1023cbca4a5d4f08f37d930ea3e414a98c85d0/opentelemetry_instrumentation_asyncio-0.61b0.tar.gz", hash = "sha256:3b173b009f108fcbc6ee4f7482e7ae8b76518a87a620ad5e7dd24e4c26066c3c", size = 14115, upload-time = "2026-03-04T14:20:22.227Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/58/8f/79913d7ebc2bd2be9a81f8ecbe0f7413c3bec55c83c89337b93c8de5417a/opentelemetry_instrumentation_asyncio-0.61b0-py3-none-any.whl", hash = "sha256:43273d5b74880b06c5a766f779fa480a50fc5a09a7c81468a60457b794e3f3cd", size = 14770, upload-time = "2026-03-04T14:19:13.057Z" }, -] - [[package]] name = "opentelemetry-proto" version = "1.40.0" @@ -3468,10 +3365,6 @@ dependencies = [ { name = "olefile" }, { name = "openai" }, { name = "openpyxl" }, - { name = "opentelemetry-api" }, - { name = "opentelemetry-exporter-otlp-proto-grpc" }, - { name = "opentelemetry-instrumentation-asyncio" }, - { name = "opentelemetry-sdk" }, { name = "pdfminer-six" }, { name = "pdfplumber" }, { name = "protobuf" }, @@ -3504,15 +3397,6 @@ dependencies = [ ] [package.optional-dependencies] -benchmark = [ - { name = "datasets" }, - { name = "langchain" }, - { name = "langchain-core" }, - { name = "langchain-openai" }, - { name = "pandas", version = "2.3.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, - { name = "pandas", version = "3.0.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, - { name = "tiktoken" }, -] bot = [ { name = "beautifulsoup4" }, { name = "croniter" }, @@ -3660,7 +3544,6 @@ requires-dist = [ { name = "cmake", marker = "extra == 'build'", specifier = ">=3.15" }, { name = "croniter", marker = "extra == 'bot'", specifier = ">=2.0.0" }, { name = "cryptography", specifier = ">=42.0.0" }, - { name = "datasets", marker = "extra == 'benchmark'", specifier = ">=2.0.0" }, { name = "datasets", marker = "extra == 'eval'", specifier = ">=2.0.0" }, { name = "datasets", marker = "extra == 'test'", specifier = ">=2.0.0" }, { name = "ddgs", marker = "extra == 'bot'", specifier = ">=9.0.0" }, @@ -3678,9 +3561,6 @@ requires-dist = [ { name = "hvac", marker = "extra == 'test'", specifier = ">=2.0.0" }, { name = "jinja2", specifier = ">=3.1.6" }, { name = "json-repair", specifier = ">=0.25.0" }, - { name = "langchain", marker = "extra == 'benchmark'", specifier = ">=1.0.0" }, - { name = "langchain-core", marker = "extra == 'benchmark'", specifier = ">=1.0.0" }, - { name = "langchain-openai", marker = "extra == 'benchmark'", specifier = ">=1.0.0" }, { name = "langfuse", marker = "extra == 'bot-langfuse'", specifier = ">=3.0.0" }, { name = "lark-oapi", marker = "extra == 'bot-feishu'", specifier = ">=1.0.0" }, { name = "litellm", specifier = ">=1.0.0,<1.82.6" }, @@ -3695,12 +3575,7 @@ requires-dist = [ { name = "openpyxl", specifier = ">=3.0.0" }, { name = "opensandbox", marker = "extra == 'bot-sandbox'", specifier = ">=0.1.0" }, { name = "opensandbox-server", marker = "extra == 'bot-sandbox'", specifier = ">=0.1.0" }, - { name = "opentelemetry-api", specifier = ">=1.14" }, - { name = "opentelemetry-exporter-otlp-proto-grpc", specifier = ">=1.14" }, - { name = "opentelemetry-instrumentation-asyncio", specifier = ">=0.61b0" }, - { name = "opentelemetry-sdk", specifier = ">=1.14" }, { name = "openviking", extras = ["bot", "bot-dingtalk", "bot-feishu", "bot-fuse", "bot-langfuse", "bot-opencode", "bot-qq", "bot-sandbox", "bot-slack", "bot-telegram"], marker = "extra == 'bot-full'" }, - { name = "pandas", marker = "extra == 'benchmark'", specifier = ">=2.0.0" }, { name = "pandas", marker = "extra == 'eval'", specifier = ">=2.0.0" }, { name = "pandas", marker = "extra == 'test'", specifier = ">=2.0.0" }, { name = "pdfminer-six", specifier = ">=20251230" }, @@ -3738,7 +3613,6 @@ requires-dist = [ { name = "sphinx-rtd-theme", marker = "extra == 'doc'", specifier = ">=1.3.0" }, { name = "tabulate", specifier = ">=0.9.0" }, { name = "tavily-python", marker = "extra == 'bot'", specifier = ">=0.5.0" }, - { name = "tiktoken", marker = "extra == 'benchmark'", specifier = ">=0.5.0" }, { name = "tree-sitter", specifier = ">=0.23.0" }, { name = "tree-sitter-c-sharp", specifier = ">=0.23.0" }, { name = "tree-sitter-cpp", specifier = ">=0.23.0" }, @@ -3761,7 +3635,7 @@ requires-dist = [ { name = "xlrd", specifier = ">=2.0.1" }, { name = "xxhash", specifier = ">=3.0.0" }, ] -provides-extras = ["test", "dev", "doc", "eval", "gemini", "gemini-async", "ocr", "build", "bot", "bot-langfuse", "bot-telegram", "bot-feishu", "bot-dingtalk", "bot-slack", "bot-qq", "bot-sandbox", "bot-fuse", "bot-opencode", "bot-full", "benchmark"] +provides-extras = ["test", "dev", "doc", "eval", "gemini", "gemini-async", "ocr", "build", "bot", "bot-langfuse", "bot-telegram", "bot-feishu", "bot-dingtalk", "bot-slack", "bot-qq", "bot-sandbox", "bot-fuse", "bot-opencode", "bot-full"] [package.metadata.requires-dev] dev = [{ name = "pytest", specifier = ">=9.0.2" }] From d108af5b18a0214b8c73c6f4c9850705c3b48f29 Mon Sep 17 00:00:00 2001 From: chenjunwen Date: Thu, 2 Apr 2026 21:50:26 +0800 Subject: [PATCH 15/20] update --- .../models/vlm/backends/volcengine_vlm.py | 5 +- .../prompts/templates/memory/events.yaml | 3 +- openviking/session/memory/extract_loop.py | 1 - uv.lock | 140 +++++++++++++++++- 4 files changed, 137 insertions(+), 12 deletions(-) diff --git a/openviking/models/vlm/backends/volcengine_vlm.py b/openviking/models/vlm/backends/volcengine_vlm.py index 978bcb339..f5f5370b2 100644 --- a/openviking/models/vlm/backends/volcengine_vlm.py +++ b/openviking/models/vlm/backends/volcengine_vlm.py @@ -138,7 +138,6 @@ async def get_completion_async( tools: Optional[List[Dict[str, Any]]] = None, tool_choice: Optional[str] = None, messages: Optional[List[Dict[str, Any]]] = None, - max_retries: int = 0, ) -> Union[str, VLMResponse]: """Get text completion asynchronously via Chat Completions API.""" kwargs_messages = messages or [{"role": "user", "content": prompt}] @@ -160,7 +159,7 @@ async def get_completion_async( client = self.get_async_client() last_error = None - for attempt in range(max_retries + 1): + for attempt in range(self.max_retries + 1): try: t0 = time.perf_counter() response = await client.chat.completions.create(**kwargs) @@ -172,7 +171,7 @@ async def get_completion_async( return self._clean_response(str(result)) except Exception as e: last_error = e - if attempt < max_retries: + if attempt < self.max_retries: await asyncio.sleep(2**attempt) if last_error: diff --git a/openviking/prompts/templates/memory/events.yaml b/openviking/prompts/templates/memory/events.yaml index aef0d5199..4b8856a94 100644 --- a/openviking/prompts/templates/memory/events.yaml +++ b/openviking/prompts/templates/memory/events.yaml @@ -52,5 +52,6 @@ fields: type: string description: | Conversation message index ranges to extract, format: "start-end,start-end,..." - Example: "0-10,50-60" means extract messages 0-10 and 50-60. + Example: "0-3,40-45" means extract messages 0-3 and 40-45. + Limit each event to 15 messages max. Save only important parts or split into multiple events. merge_op: immutable diff --git a/openviking/session/memory/extract_loop.py b/openviking/session/memory/extract_loop.py index eac125b5b..4efbc1a21 100644 --- a/openviking/session/memory/extract_loop.py +++ b/openviking/session/memory/extract_loop.py @@ -327,7 +327,6 @@ async def _call_llm( messages=messages, tools=self._tool_schemas, tool_choice=tool_choice, - max_retries=self.vlm.max_retries, ) # print(f'response={response}') # Log cache hit info diff --git a/uv.lock b/uv.lock index 4b6d38957..0add27d5d 100644 --- a/uv.lock +++ b/uv.lock @@ -1550,7 +1550,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/38/3f/9859f655d11901e7b2996c6e3d33e0caa9a1d4572c3bc61ed0faa64b2f4c/greenlet-3.3.2-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:9bc885b89709d901859cf95179ec9f6bb67a3d2bb1f0e88456461bd4b7f8fd0d", size = 277747, upload-time = "2026-02-20T20:16:21.325Z" }, { url = "https://files.pythonhosted.org/packages/fb/07/cb284a8b5c6498dbd7cba35d31380bb123d7dceaa7907f606c8ff5993cbf/greenlet-3.3.2-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b568183cf65b94919be4438dc28416b234b678c608cafac8874dfeeb2a9bbe13", size = 579202, upload-time = "2026-02-20T20:47:28.955Z" }, { url = "https://files.pythonhosted.org/packages/ed/45/67922992b3a152f726163b19f890a85129a992f39607a2a53155de3448b8/greenlet-3.3.2-cp310-cp310-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:527fec58dc9f90efd594b9b700662ed3fb2493c2122067ac9c740d98080a620e", size = 590620, upload-time = "2026-02-20T20:55:55.581Z" }, - { url = "https://files.pythonhosted.org/packages/03/5f/6e2a7d80c353587751ef3d44bb947f0565ec008a2e0927821c007e96d3a7/greenlet-3.3.2-cp310-cp310-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:508c7f01f1791fbc8e011bd508f6794cb95397fdb198a46cb6635eb5b78d85a7", size = 602132, upload-time = "2026-02-20T21:02:43.261Z" }, { url = "https://files.pythonhosted.org/packages/ad/55/9f1ebb5a825215fadcc0f7d5073f6e79e3007e3282b14b22d6aba7ca6cb8/greenlet-3.3.2-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ad0c8917dd42a819fe77e6bdfcb84e3379c0de956469301d9fd36427a1ca501f", size = 591729, upload-time = "2026-02-20T20:20:58.395Z" }, { url = "https://files.pythonhosted.org/packages/24/b4/21f5455773d37f94b866eb3cf5caed88d6cea6dd2c6e1f9c34f463cba3ec/greenlet-3.3.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:97245cc10e5515dbc8c3104b2928f7f02b6813002770cfaffaf9a6e0fc2b94ef", size = 1551946, upload-time = "2026-02-20T20:49:31.102Z" }, { url = "https://files.pythonhosted.org/packages/00/68/91f061a926abead128fe1a87f0b453ccf07368666bd59ffa46016627a930/greenlet-3.3.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:8c1fdd7d1b309ff0da81d60a9688a8bd044ac4e18b250320a96fc68d31c209ca", size = 1618494, upload-time = "2026-02-20T20:21:06.541Z" }, @@ -1558,7 +1557,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f3/47/16400cb42d18d7a6bb46f0626852c1718612e35dcb0dffa16bbaffdf5dd2/greenlet-3.3.2-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:c56692189a7d1c7606cb794be0a8381470d95c57ce5be03fb3d0ef57c7853b86", size = 278890, upload-time = "2026-02-20T20:19:39.263Z" }, { url = "https://files.pythonhosted.org/packages/a3/90/42762b77a5b6aa96cd8c0e80612663d39211e8ae8a6cd47c7f1249a66262/greenlet-3.3.2-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1ebd458fa8285960f382841da585e02201b53a5ec2bac6b156fc623b5ce4499f", size = 581120, upload-time = "2026-02-20T20:47:30.161Z" }, { url = "https://files.pythonhosted.org/packages/bf/6f/f3d64f4fa0a9c7b5c5b3c810ff1df614540d5aa7d519261b53fba55d4df9/greenlet-3.3.2-cp311-cp311-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a443358b33c4ec7b05b79a7c8b466f5d275025e750298be7340f8fc63dff2a55", size = 594363, upload-time = "2026-02-20T20:55:56.965Z" }, - { url = "https://files.pythonhosted.org/packages/9c/8b/1430a04657735a3f23116c2e0d5eb10220928846e4537a938a41b350bed6/greenlet-3.3.2-cp311-cp311-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:4375a58e49522698d3e70cc0b801c19433021b5c37686f7ce9c65b0d5c8677d2", size = 605046, upload-time = "2026-02-20T21:02:45.234Z" }, { url = "https://files.pythonhosted.org/packages/72/83/3e06a52aca8128bdd4dcd67e932b809e76a96ab8c232a8b025b2850264c5/greenlet-3.3.2-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8e2cd90d413acbf5e77ae41e5d3c9b3ac1d011a756d7284d7f3f2b806bbd6358", size = 594156, upload-time = "2026-02-20T20:20:59.955Z" }, { url = "https://files.pythonhosted.org/packages/70/79/0de5e62b873e08fe3cef7dbe84e5c4bc0e8ed0c7ff131bccb8405cd107c8/greenlet-3.3.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:442b6057453c8cb29b4fb36a2ac689382fc71112273726e2423f7f17dc73bf99", size = 1554649, upload-time = "2026-02-20T20:49:32.293Z" }, { url = "https://files.pythonhosted.org/packages/5a/00/32d30dee8389dc36d42170a9c66217757289e2afb0de59a3565260f38373/greenlet-3.3.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:45abe8eb6339518180d5a7fa47fa01945414d7cca5ecb745346fc6a87d2750be", size = 1619472, upload-time = "2026-02-20T20:21:07.966Z" }, @@ -1567,7 +1565,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ea/ab/1608e5a7578e62113506740b88066bf09888322a311cff602105e619bd87/greenlet-3.3.2-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:ac8d61d4343b799d1e526db579833d72f23759c71e07181c2d2944e429eb09cd", size = 280358, upload-time = "2026-02-20T20:17:43.971Z" }, { url = "https://files.pythonhosted.org/packages/a5/23/0eae412a4ade4e6623ff7626e38998cb9b11e9ff1ebacaa021e4e108ec15/greenlet-3.3.2-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3ceec72030dae6ac0c8ed7591b96b70410a8be370b6a477b1dbc072856ad02bd", size = 601217, upload-time = "2026-02-20T20:47:31.462Z" }, { url = "https://files.pythonhosted.org/packages/f8/16/5b1678a9c07098ecb9ab2dd159fafaf12e963293e61ee8d10ecb55273e5e/greenlet-3.3.2-cp312-cp312-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a2a5be83a45ce6188c045bcc44b0ee037d6a518978de9a5d97438548b953a1ac", size = 611792, upload-time = "2026-02-20T20:55:58.423Z" }, - { url = "https://files.pythonhosted.org/packages/5c/c5/cc09412a29e43406eba18d61c70baa936e299bc27e074e2be3806ed29098/greenlet-3.3.2-cp312-cp312-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ae9e21c84035c490506c17002f5c8ab25f980205c3e61ddb3a2a2a2e6c411fcb", size = 626250, upload-time = "2026-02-20T21:02:46.596Z" }, { url = "https://files.pythonhosted.org/packages/50/1f/5155f55bd71cabd03765a4aac9ac446be129895271f73872c36ebd4b04b6/greenlet-3.3.2-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:43e99d1749147ac21dde49b99c9abffcbc1e2d55c67501465ef0930d6e78e070", size = 613875, upload-time = "2026-02-20T20:21:01.102Z" }, { url = "https://files.pythonhosted.org/packages/fc/dd/845f249c3fcd69e32df80cdab059b4be8b766ef5830a3d0aa9d6cad55beb/greenlet-3.3.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:4c956a19350e2c37f2c48b336a3afb4bff120b36076d9d7fb68cb44e05d95b79", size = 1571467, upload-time = "2026-02-20T20:49:33.495Z" }, { url = "https://files.pythonhosted.org/packages/2a/50/2649fe21fcc2b56659a452868e695634722a6655ba245d9f77f5656010bf/greenlet-3.3.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6c6f8ba97d17a1e7d664151284cb3315fc5f8353e75221ed4324f84eb162b395", size = 1640001, upload-time = "2026-02-20T20:21:09.154Z" }, @@ -1576,7 +1573,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ac/48/f8b875fa7dea7dd9b33245e37f065af59df6a25af2f9561efa8d822fde51/greenlet-3.3.2-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:aa6ac98bdfd716a749b84d4034486863fd81c3abde9aa3cf8eff9127981a4ae4", size = 279120, upload-time = "2026-02-20T20:19:01.9Z" }, { url = "https://files.pythonhosted.org/packages/49/8d/9771d03e7a8b1ee456511961e1b97a6d77ae1dea4a34a5b98eee706689d3/greenlet-3.3.2-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ab0c7e7901a00bc0a7284907273dc165b32e0d109a6713babd04471327ff7986", size = 603238, upload-time = "2026-02-20T20:47:32.873Z" }, { url = "https://files.pythonhosted.org/packages/59/0e/4223c2bbb63cd5c97f28ffb2a8aee71bdfb30b323c35d409450f51b91e3e/greenlet-3.3.2-cp313-cp313-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:d248d8c23c67d2291ffd47af766e2a3aa9fa1c6703155c099feb11f526c63a92", size = 614219, upload-time = "2026-02-20T20:55:59.817Z" }, - { url = "https://files.pythonhosted.org/packages/94/2b/4d012a69759ac9d77210b8bfb128bc621125f5b20fc398bce3940d036b1c/greenlet-3.3.2-cp313-cp313-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ccd21bb86944ca9be6d967cf7691e658e43417782bce90b5d2faeda0ff78a7dd", size = 628268, upload-time = "2026-02-20T21:02:48.024Z" }, { url = "https://files.pythonhosted.org/packages/7a/34/259b28ea7a2a0c904b11cd36c79b8cef8019b26ee5dbe24e73b469dea347/greenlet-3.3.2-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b6997d360a4e6a4e936c0f9625b1c20416b8a0ea18a8e19cabbefc712e7397ab", size = 616774, upload-time = "2026-02-20T20:21:02.454Z" }, { url = "https://files.pythonhosted.org/packages/0a/03/996c2d1689d486a6e199cb0f1cf9e4aa940c500e01bdf201299d7d61fa69/greenlet-3.3.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:64970c33a50551c7c50491671265d8954046cb6e8e2999aacdd60e439b70418a", size = 1571277, upload-time = "2026-02-20T20:49:34.795Z" }, { url = "https://files.pythonhosted.org/packages/d9/c4/2570fc07f34a39f2caf0bf9f24b0a1a0a47bc2e8e465b2c2424821389dfc/greenlet-3.3.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:1a9172f5bf6bd88e6ba5a84e0a68afeac9dc7b6b412b245dd64f52d83c81e55b", size = 1640455, upload-time = "2026-02-20T20:21:10.261Z" }, @@ -1585,7 +1581,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/3f/ae/8bffcbd373b57a5992cd077cbe8858fff39110480a9d50697091faea6f39/greenlet-3.3.2-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:8d1658d7291f9859beed69a776c10822a0a799bc4bfe1bd4272bb60e62507dab", size = 279650, upload-time = "2026-02-20T20:18:00.783Z" }, { url = "https://files.pythonhosted.org/packages/d1/c0/45f93f348fa49abf32ac8439938726c480bd96b2a3c6f4d949ec0124b69f/greenlet-3.3.2-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:18cb1b7337bca281915b3c5d5ae19f4e76d35e1df80f4ad3c1a7be91fadf1082", size = 650295, upload-time = "2026-02-20T20:47:34.036Z" }, { url = "https://files.pythonhosted.org/packages/b3/de/dd7589b3f2b8372069ab3e4763ea5329940fc7ad9dcd3e272a37516d7c9b/greenlet-3.3.2-cp314-cp314-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c2e47408e8ce1c6f1ceea0dffcdf6ebb85cc09e55c7af407c99f1112016e45e9", size = 662163, upload-time = "2026-02-20T20:56:01.295Z" }, - { url = "https://files.pythonhosted.org/packages/cd/ac/85804f74f1ccea31ba518dcc8ee6f14c79f73fe36fa1beba38930806df09/greenlet-3.3.2-cp314-cp314-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:e3cb43ce200f59483eb82949bf1835a99cf43d7571e900d7c8d5c62cdf25d2f9", size = 675371, upload-time = "2026-02-20T21:02:49.664Z" }, { url = "https://files.pythonhosted.org/packages/d2/d8/09bfa816572a4d83bccd6750df1926f79158b1c36c5f73786e26dbe4ee38/greenlet-3.3.2-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:63d10328839d1973e5ba35e98cccbca71b232b14051fd957b6f8b6e8e80d0506", size = 664160, upload-time = "2026-02-20T20:21:04.015Z" }, { url = "https://files.pythonhosted.org/packages/48/cf/56832f0c8255d27f6c35d41b5ec91168d74ec721d85f01a12131eec6b93c/greenlet-3.3.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:8e4ab3cfb02993c8cc248ea73d7dae6cec0253e9afa311c9b37e603ca9fad2ce", size = 1619181, upload-time = "2026-02-20T20:49:36.052Z" }, { url = "https://files.pythonhosted.org/packages/0a/23/b90b60a4aabb4cec0796e55f25ffbfb579a907c3898cd2905c8918acaa16/greenlet-3.3.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:94ad81f0fd3c0c0681a018a976e5c2bd2ca2d9d94895f23e7bb1af4e8af4e2d5", size = 1687713, upload-time = "2026-02-20T20:21:11.684Z" }, @@ -1594,7 +1589,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/98/6d/8f2ef704e614bcf58ed43cfb8d87afa1c285e98194ab2cfad351bf04f81e/greenlet-3.3.2-cp314-cp314t-macosx_11_0_universal2.whl", hash = "sha256:e26e72bec7ab387ac80caa7496e0f908ff954f31065b0ffc1f8ecb1338b11b54", size = 286617, upload-time = "2026-02-20T20:19:29.856Z" }, { url = "https://files.pythonhosted.org/packages/5e/0d/93894161d307c6ea237a43988f27eba0947b360b99ac5239ad3fe09f0b47/greenlet-3.3.2-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8b466dff7a4ffda6ca975979bab80bdadde979e29fc947ac3be4451428d8b0e4", size = 655189, upload-time = "2026-02-20T20:47:35.742Z" }, { url = "https://files.pythonhosted.org/packages/f5/2c/d2d506ebd8abcb57386ec4f7ba20f4030cbe56eae541bc6fd6ef399c0b41/greenlet-3.3.2-cp314-cp314t-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b8bddc5b73c9720bea487b3bffdb1840fe4e3656fba3bd40aa1489e9f37877ff", size = 658225, upload-time = "2026-02-20T20:56:02.527Z" }, - { url = "https://files.pythonhosted.org/packages/d1/67/8197b7e7e602150938049d8e7f30de1660cfb87e4c8ee349b42b67bdb2e1/greenlet-3.3.2-cp314-cp314t-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:59b3e2c40f6706b05a9cd299c836c6aa2378cabe25d021acd80f13abf81181cf", size = 666581, upload-time = "2026-02-20T21:02:51.526Z" }, { url = "https://files.pythonhosted.org/packages/8e/30/3a09155fbf728673a1dea713572d2d31159f824a37c22da82127056c44e4/greenlet-3.3.2-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b26b0f4428b871a751968285a1ac9648944cea09807177ac639b030bddebcea4", size = 657907, upload-time = "2026-02-20T20:21:05.259Z" }, { url = "https://files.pythonhosted.org/packages/f3/fd/d05a4b7acd0154ed758797f0a43b4c0962a843bedfe980115e842c5b2d08/greenlet-3.3.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:1fb39a11ee2e4d94be9a76671482be9398560955c9e568550de0224e41104727", size = 1618857, upload-time = "2026-02-20T20:49:37.309Z" }, { url = "https://files.pythonhosted.org/packages/6f/e1/50ee92a5db521de8f35075b5eff060dd43d39ebd46c2181a2042f7070385/greenlet-3.3.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:20154044d9085151bc309e7689d6f7ba10027f8f5a8c0676ad398b951913d89e", size = 1680010, upload-time = "2026-02-20T20:21:13.427Z" }, @@ -1610,6 +1604,67 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/28/27/3d6dcadc8a3214d8522c1e7f6a19554e33659be44546d44a2f7572ac7d2a/groovy-0.1.2-py3-none-any.whl", hash = "sha256:7f7975bab18c729a257a8b1ae9dcd70b7cafb1720481beae47719af57c35fa64", size = 14090, upload-time = "2025-02-28T20:24:55.152Z" }, ] +[[package]] +name = "grpcio" +version = "1.80.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b7/48/af6173dbca4454f4637a4678b67f52ca7e0c1ed7d5894d89d434fecede05/grpcio-1.80.0.tar.gz", hash = "sha256:29aca15edd0688c22ba01d7cc01cb000d72b2033f4a3c72a81a19b56fd143257", size = 12978905, upload-time = "2026-03-30T08:49:10.502Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9d/cd/bb7b7e54084a344c03d68144450da7ddd5564e51a298ae1662de65f48e2d/grpcio-1.80.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:886457a7768e408cdce226ad1ca67d2958917d306523a0e21e1a2fdaa75c9c9c", size = 6050363, upload-time = "2026-03-30T08:46:20.894Z" }, + { url = "https://files.pythonhosted.org/packages/16/02/1417f5c3460dea65f7a2e3c14e8b31e77f7ffb730e9bfadd89eda7a9f477/grpcio-1.80.0-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:7b641fc3f1dc647bfd80bd713addc68f6d145956f64677e56d9ebafc0bd72388", size = 12026037, upload-time = "2026-03-30T08:46:25.144Z" }, + { url = "https://files.pythonhosted.org/packages/43/98/c910254eedf2cae368d78336a2de0678e66a7317d27c02522392f949b5c6/grpcio-1.80.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:33eb763f18f006dc7fee1e69831d38d23f5eccd15b2e0f92a13ee1d9242e5e02", size = 6602306, upload-time = "2026-03-30T08:46:27.593Z" }, + { url = "https://files.pythonhosted.org/packages/7c/f8/88ca4e78c077b2b2113d95da1e1ab43efd43d723c9a0397d26529c2c1a56/grpcio-1.80.0-cp310-cp310-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:52d143637e3872633fc7dd7c3c6a1c84e396b359f3a72e215f8bf69fd82084fc", size = 7301535, upload-time = "2026-03-30T08:46:29.556Z" }, + { url = "https://files.pythonhosted.org/packages/f9/96/f28660fe2fe0f153288bf4a04e4910b7309d442395135c88ed4f5b3b8b40/grpcio-1.80.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c51bf8ac4575af2e0678bccfb07e47321fc7acb5049b4482832c5c195e04e13a", size = 6808669, upload-time = "2026-03-30T08:46:31.984Z" }, + { url = "https://files.pythonhosted.org/packages/47/eb/3f68a5e955779c00aeef23850e019c1c1d0e032d90633ba49c01ad5a96e0/grpcio-1.80.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:50a9871536d71c4fba24ee856abc03a87764570f0c457dd8db0b4018f379fed9", size = 7409489, upload-time = "2026-03-30T08:46:34.684Z" }, + { url = "https://files.pythonhosted.org/packages/5b/a7/d2f681a4bfb881be40659a309771f3bdfbfdb1190619442816c3f0ffc079/grpcio-1.80.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:a72d84ad0514db063e21887fbacd1fd7acb4d494a564cae22227cd45c7fbf199", size = 8423167, upload-time = "2026-03-30T08:46:36.833Z" }, + { url = "https://files.pythonhosted.org/packages/97/8a/29b4589c204959aa35ce5708400a05bba72181807c45c47b3ec000c39333/grpcio-1.80.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:f7691a6788ad9196872f95716df5bc643ebba13c97140b7a5ee5c8e75d1dea81", size = 7846761, upload-time = "2026-03-30T08:46:40.091Z" }, + { url = "https://files.pythonhosted.org/packages/6b/d2/ed143e097230ee121ac5848f6ff14372dba91289b10b536d54fb1b7cbae7/grpcio-1.80.0-cp310-cp310-win32.whl", hash = "sha256:46c2390b59d67f84e882694d489f5b45707c657832d7934859ceb8c33f467069", size = 4156534, upload-time = "2026-03-30T08:46:42.026Z" }, + { url = "https://files.pythonhosted.org/packages/d5/c9/df8279bb49b29409995e95efa85b72973d62f8aeff89abee58c91f393710/grpcio-1.80.0-cp310-cp310-win_amd64.whl", hash = "sha256:dc053420fc75749c961e2a4c906398d7c15725d36ccc04ae6d16093167223b58", size = 4889869, upload-time = "2026-03-30T08:46:44.219Z" }, + { url = "https://files.pythonhosted.org/packages/5d/db/1d56e5f5823257b291962d6c0ce106146c6447f405b60b234c4f222a7cde/grpcio-1.80.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:dfab85db094068ff42e2a3563f60ab3dddcc9d6488a35abf0132daec13209c8a", size = 6055009, upload-time = "2026-03-30T08:46:46.265Z" }, + { url = "https://files.pythonhosted.org/packages/6e/18/c83f3cad64c5ca63bca7e91e5e46b0d026afc5af9d0a9972472ceba294b3/grpcio-1.80.0-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:5c07e82e822e1161354e32da2662f741a4944ea955f9f580ec8fb409dd6f6060", size = 12035295, upload-time = "2026-03-30T08:46:49.099Z" }, + { url = "https://files.pythonhosted.org/packages/0f/8e/e14966b435be2dda99fbe89db9525ea436edc79780431a1c2875a3582644/grpcio-1.80.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ba0915d51fd4ced2db5ff719f84e270afe0e2d4c45a7bdb1e8d036e4502928c2", size = 6610297, upload-time = "2026-03-30T08:46:52.123Z" }, + { url = "https://files.pythonhosted.org/packages/cc/26/d5eb38f42ce0e3fdc8174ea4d52036ef8d58cc4426cb800f2610f625dd75/grpcio-1.80.0-cp311-cp311-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:3cb8130ba457d2aa09fa6b7c3ed6b6e4e6a2685fce63cb803d479576c4d80e21", size = 7300208, upload-time = "2026-03-30T08:46:54.859Z" }, + { url = "https://files.pythonhosted.org/packages/25/51/bd267c989f85a17a5b3eea65a6feb4ff672af41ca614e5a0279cc0ea381c/grpcio-1.80.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:09e5e478b3d14afd23f12e49e8b44c8684ac3c5f08561c43a5b9691c54d136ab", size = 6813442, upload-time = "2026-03-30T08:46:57.056Z" }, + { url = "https://files.pythonhosted.org/packages/9e/d9/d80eef735b19e9169e30164bbf889b46f9df9127598a83d174eb13a48b26/grpcio-1.80.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:00168469238b022500e486c1c33916acf2f2a9b2c022202cf8a1885d2e3073c1", size = 7414743, upload-time = "2026-03-30T08:46:59.682Z" }, + { url = "https://files.pythonhosted.org/packages/de/f2/567f5bd5054398ed6b0509b9a30900376dcf2786bd936812098808b49d8d/grpcio-1.80.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:8502122a3cc1714038e39a0b071acb1207ca7844208d5ea0d091317555ee7106", size = 8426046, upload-time = "2026-03-30T08:47:02.474Z" }, + { url = "https://files.pythonhosted.org/packages/62/29/73ef0141b4732ff5eacd68430ff2512a65c004696997f70476a83e548e7e/grpcio-1.80.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ce1794f4ea6cc3ca29463f42d665c32ba1b964b48958a66497917fe9069f26e6", size = 7851641, upload-time = "2026-03-30T08:47:05.462Z" }, + { url = "https://files.pythonhosted.org/packages/46/69/abbfa360eb229a8623bab5f5a4f8105e445bd38ce81a89514ba55d281ad0/grpcio-1.80.0-cp311-cp311-win32.whl", hash = "sha256:51b4a7189b0bef2aa30adce3c78f09c83526cf3dddb24c6a96555e3b97340440", size = 4154368, upload-time = "2026-03-30T08:47:08.027Z" }, + { url = "https://files.pythonhosted.org/packages/6f/d4/ae92206d01183b08613e846076115f5ac5991bae358d2a749fa864da5699/grpcio-1.80.0-cp311-cp311-win_amd64.whl", hash = "sha256:02e64bb0bb2da14d947a49e6f120a75e947250aebe65f9629b62bb1f5c14e6e9", size = 4894235, upload-time = "2026-03-30T08:47:10.839Z" }, + { url = "https://files.pythonhosted.org/packages/5c/e8/a2b749265eb3415abc94f2e619bbd9e9707bebdda787e61c593004ec927a/grpcio-1.80.0-cp312-cp312-linux_armv7l.whl", hash = "sha256:c624cc9f1008361014378c9d776de7182b11fe8b2e5a81bc69f23a295f2a1ad0", size = 6015616, upload-time = "2026-03-30T08:47:13.428Z" }, + { url = "https://files.pythonhosted.org/packages/3e/97/b1282161a15d699d1e90c360df18d19165a045ce1c343c7f313f5e8a0b77/grpcio-1.80.0-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:f49eddcac43c3bf350c0385366a58f36bed8cc2c0ec35ef7b74b49e56552c0c2", size = 12014204, upload-time = "2026-03-30T08:47:15.873Z" }, + { url = "https://files.pythonhosted.org/packages/6e/5e/d319c6e997b50c155ac5a8cb12f5173d5b42677510e886d250d50264949d/grpcio-1.80.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d334591df610ab94714048e0d5b4f3dd5ad1bee74dfec11eee344220077a79de", size = 6563866, upload-time = "2026-03-30T08:47:18.588Z" }, + { url = "https://files.pythonhosted.org/packages/ae/f6/fdd975a2cb4d78eb67769a7b3b3830970bfa2e919f1decf724ae4445f42c/grpcio-1.80.0-cp312-cp312-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:0cb517eb1d0d0aaf1d87af7cc5b801d686557c1d88b2619f5e31fab3c2315921", size = 7273060, upload-time = "2026-03-30T08:47:21.113Z" }, + { url = "https://files.pythonhosted.org/packages/db/f0/a3deb5feba60d9538a962913e37bd2e69a195f1c3376a3dd44fe0427e996/grpcio-1.80.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4e78c4ac0d97dc2e569b2f4bcbbb447491167cb358d1a389fc4af71ab6f70411", size = 6782121, upload-time = "2026-03-30T08:47:23.827Z" }, + { url = "https://files.pythonhosted.org/packages/ca/84/36c6dcfddc093e108141f757c407902a05085e0c328007cb090d56646cdf/grpcio-1.80.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2ed770b4c06984f3b47eb0517b1c69ad0b84ef3f40128f51448433be904634cd", size = 7383811, upload-time = "2026-03-30T08:47:26.517Z" }, + { url = "https://files.pythonhosted.org/packages/7c/ef/f3a77e3dc5b471a0ec86c564c98d6adfa3510d38f8ee99010410858d591e/grpcio-1.80.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:256507e2f524092f1473071a05e65a5b10d84b82e3ff24c5b571513cfaa61e2f", size = 8393860, upload-time = "2026-03-30T08:47:29.439Z" }, + { url = "https://files.pythonhosted.org/packages/9b/8d/9d4d27ed7f33d109c50d6b5ce578a9914aa68edab75d65869a17e630a8d1/grpcio-1.80.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:9a6284a5d907c37db53350645567c522be314bac859a64a7a5ca63b77bb7958f", size = 7830132, upload-time = "2026-03-30T08:47:33.254Z" }, + { url = "https://files.pythonhosted.org/packages/14/e4/9990b41c6d7a44e1e9dee8ac11d7a9802ba1378b40d77468a7761d1ad288/grpcio-1.80.0-cp312-cp312-win32.whl", hash = "sha256:c71309cfce2f22be26aa4a847357c502db6c621f1a49825ae98aa0907595b193", size = 4140904, upload-time = "2026-03-30T08:47:35.319Z" }, + { url = "https://files.pythonhosted.org/packages/2f/2c/296f6138caca1f4b92a31ace4ae1b87dab692fc16a7a3417af3bb3c805bf/grpcio-1.80.0-cp312-cp312-win_amd64.whl", hash = "sha256:9fe648599c0e37594c4809d81a9e77bd138cc82eb8baa71b6a86af65426723ff", size = 4880944, upload-time = "2026-03-30T08:47:37.831Z" }, + { url = "https://files.pythonhosted.org/packages/2f/3a/7c3c25789e3f069e581dc342e03613c5b1cb012c4e8c7d9d5cf960a75856/grpcio-1.80.0-cp313-cp313-linux_armv7l.whl", hash = "sha256:e9e408fc016dffd20661f0126c53d8a31c2821b5c13c5d67a0f5ed5de93319ad", size = 6017243, upload-time = "2026-03-30T08:47:40.075Z" }, + { url = "https://files.pythonhosted.org/packages/04/19/21a9806eb8240e174fd1ab0cd5b9aa948bb0e05c2f2f55f9d5d7405e6d08/grpcio-1.80.0-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:92d787312e613754d4d8b9ca6d3297e69994a7912a32fa38c4c4e01c272974b0", size = 12010840, upload-time = "2026-03-30T08:47:43.11Z" }, + { url = "https://files.pythonhosted.org/packages/18/3a/23347d35f76f639e807fb7a36fad3068aed100996849a33809591f26eca6/grpcio-1.80.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8ac393b58aa16991a2f1144ec578084d544038c12242da3a215966b512904d0f", size = 6567644, upload-time = "2026-03-30T08:47:46.806Z" }, + { url = "https://files.pythonhosted.org/packages/ff/40/96e07ecb604a6a67ae6ab151e3e35b132875d98bc68ec65f3e5ab3e781d7/grpcio-1.80.0-cp313-cp313-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:68e5851ac4b9afe07e7f84483803ad167852570d65326b34d54ca560bfa53fb6", size = 7277830, upload-time = "2026-03-30T08:47:49.643Z" }, + { url = "https://files.pythonhosted.org/packages/9b/e2/da1506ecea1f34a5e365964644b35edef53803052b763ca214ba3870c856/grpcio-1.80.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:873ff5d17d68992ef6605330127425d2fc4e77e612fa3c3e0ed4e668685e3140", size = 6783216, upload-time = "2026-03-30T08:47:52.817Z" }, + { url = "https://files.pythonhosted.org/packages/44/83/3b20ff58d0c3b7f6caaa3af9a4174d4023701df40a3f39f7f1c8e7c48f9d/grpcio-1.80.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:2bea16af2750fd0a899bf1abd9022244418b55d1f37da2202249ba4ba673838d", size = 7385866, upload-time = "2026-03-30T08:47:55.687Z" }, + { url = "https://files.pythonhosted.org/packages/47/45/55c507599c5520416de5eefecc927d6a0d7af55e91cfffb2e410607e5744/grpcio-1.80.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:ba0db34f7e1d803a878284cd70e4c63cb6ae2510ba51937bf8f45ba997cefcf7", size = 8391602, upload-time = "2026-03-30T08:47:58.303Z" }, + { url = "https://files.pythonhosted.org/packages/10/bb/dd06f4c24c01db9cf11341b547d0a016b2c90ed7dbbb086a5710df7dd1d7/grpcio-1.80.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8eb613f02d34721f1acf3626dfdb3545bd3c8505b0e52bf8b5710a28d02e8aa7", size = 7826752, upload-time = "2026-03-30T08:48:01.311Z" }, + { url = "https://files.pythonhosted.org/packages/f9/1e/9d67992ba23371fd63d4527096eb8c6b76d74d52b500df992a3343fd7251/grpcio-1.80.0-cp313-cp313-win32.whl", hash = "sha256:93b6f823810720912fd131f561f91f5fed0fda372b6b7028a2681b8194d5d294", size = 4142310, upload-time = "2026-03-30T08:48:04.594Z" }, + { url = "https://files.pythonhosted.org/packages/cf/e6/283326a27da9e2c3038bc93eeea36fb118ce0b2d03922a9cda6688f53c5b/grpcio-1.80.0-cp313-cp313-win_amd64.whl", hash = "sha256:e172cf795a3ba5246d3529e4d34c53db70e888fa582a8ffebd2e6e48bc0cba50", size = 4882833, upload-time = "2026-03-30T08:48:07.363Z" }, + { url = "https://files.pythonhosted.org/packages/c5/6d/e65307ce20f5a09244ba9e9d8476e99fb039de7154f37fb85f26978b59c3/grpcio-1.80.0-cp314-cp314-linux_armv7l.whl", hash = "sha256:3d4147a97c8344d065d01bbf8b6acec2cf86fb0400d40696c8bdad34a64ffc0e", size = 6017376, upload-time = "2026-03-30T08:48:10.005Z" }, + { url = "https://files.pythonhosted.org/packages/69/10/9cef5d9650c72625a699c549940f0abb3c4bfdb5ed45a5ce431f92f31806/grpcio-1.80.0-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:d8e11f167935b3eb089ac9038e1a063e6d7dbe995c0bb4a661e614583352e76f", size = 12018133, upload-time = "2026-03-30T08:48:12.927Z" }, + { url = "https://files.pythonhosted.org/packages/04/82/983aabaad82ba26113caceeb9091706a0696b25da004fe3defb5b346e15b/grpcio-1.80.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f14b618fc30de822681ee986cfdcc2d9327229dc4c98aed16896761cacd468b9", size = 6574748, upload-time = "2026-03-30T08:48:16.386Z" }, + { url = "https://files.pythonhosted.org/packages/07/d7/031666ef155aa0bf399ed7e19439656c38bbd143779ae0861b038ce82abd/grpcio-1.80.0-cp314-cp314-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:4ed39fbdcf9b87370f6e8df4e39ca7b38b3e5e9d1b0013c7b6be9639d6578d14", size = 7277711, upload-time = "2026-03-30T08:48:19.627Z" }, + { url = "https://files.pythonhosted.org/packages/e8/43/f437a78f7f4f1d311804189e8f11fb311a01049b2e08557c1068d470cb2e/grpcio-1.80.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2dcc70e9f0ba987526e8e8603a610fb4f460e42899e74e7a518bf3c68fe1bf05", size = 6785372, upload-time = "2026-03-30T08:48:22.373Z" }, + { url = "https://files.pythonhosted.org/packages/93/3d/f6558e9c6296cb4227faa5c43c54a34c68d32654b829f53288313d16a86e/grpcio-1.80.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:448c884b668b868562b1bda833c5fce6272d26e1926ec46747cda05741d302c1", size = 7395268, upload-time = "2026-03-30T08:48:25.638Z" }, + { url = "https://files.pythonhosted.org/packages/06/21/0fdd77e84720b08843c371a2efa6f2e19dbebf56adc72df73d891f5506f0/grpcio-1.80.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:a1dc80fe55685b4a543555e6eef975303b36c8db1023b1599b094b92aa77965f", size = 8392000, upload-time = "2026-03-30T08:48:28.974Z" }, + { url = "https://files.pythonhosted.org/packages/f5/68/67f4947ed55d2e69f2cc199ab9fd85e0a0034d813bbeef84df6d2ba4d4b7/grpcio-1.80.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:31b9ac4ad1aa28ffee5503821fafd09e4da0a261ce1c1281c6c8da0423c83b6e", size = 7828477, upload-time = "2026-03-30T08:48:32.054Z" }, + { url = "https://files.pythonhosted.org/packages/44/b6/8d4096691b2e385e8271911a0de4f35f0a6c7d05aff7098e296c3de86939/grpcio-1.80.0-cp314-cp314-win32.whl", hash = "sha256:367ce30ba67d05e0592470428f0ec1c31714cab9ef19b8f2e37be1f4c7d32fae", size = 4218563, upload-time = "2026-03-30T08:48:34.538Z" }, + { url = "https://files.pythonhosted.org/packages/e5/8c/bbe6baf2557262834f2070cf668515fa308b2d38a4bbf771f8f7872a7036/grpcio-1.80.0-cp314-cp314-win_amd64.whl", hash = "sha256:3b01e1f5464c583d2f567b2e46ff0d516ef979978f72091fd81f5ab7fa6e2e7f", size = 5019457, upload-time = "2026-03-30T08:48:37.308Z" }, +] + [[package]] name = "h11" version = "0.16.0" @@ -3290,6 +3345,24 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8b/ca/8f122055c97a932311a3f640273f084e738008933503d0c2563cd5d591fc/opentelemetry_exporter_otlp_proto_common-1.40.0-py3-none-any.whl", hash = "sha256:7081ff453835a82417bf38dccf122c827c3cbc94f2079b03bba02a3165f25149", size = 18369, upload-time = "2026-03-04T14:17:04.796Z" }, ] +[[package]] +name = "opentelemetry-exporter-otlp-proto-grpc" +version = "1.40.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "googleapis-common-protos" }, + { name = "grpcio" }, + { name = "opentelemetry-api" }, + { name = "opentelemetry-exporter-otlp-proto-common" }, + { name = "opentelemetry-proto" }, + { name = "opentelemetry-sdk" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/8f/7f/b9e60435cfcc7590fa87436edad6822240dddbc184643a2a005301cc31f4/opentelemetry_exporter_otlp_proto_grpc-1.40.0.tar.gz", hash = "sha256:bd4015183e40b635b3dab8da528b27161ba83bf4ef545776b196f0fb4ec47740", size = 25759, upload-time = "2026-03-04T14:17:24.4Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/96/6f/7ee0980afcbdcd2d40362da16f7f9796bd083bf7f0b8e038abfbc0300f5d/opentelemetry_exporter_otlp_proto_grpc-1.40.0-py3-none-any.whl", hash = "sha256:2aa0ca53483fe0cf6405087a7491472b70335bc5c7944378a0a8e72e86995c52", size = 20304, upload-time = "2026-03-04T14:17:05.942Z" }, +] + [[package]] name = "opentelemetry-exporter-otlp-proto-http" version = "1.40.0" @@ -3308,6 +3381,36 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a0/3a/8865d6754e61c9fb170cdd530a124a53769ee5f740236064816eb0ca7301/opentelemetry_exporter_otlp_proto_http-1.40.0-py3-none-any.whl", hash = "sha256:a8d1dab28f504c5d96577d6509f80a8150e44e8f45f82cdbe0e34c99ab040069", size = 19960, upload-time = "2026-03-04T14:17:07.153Z" }, ] +[[package]] +name = "opentelemetry-instrumentation" +version = "0.61b0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "opentelemetry-api" }, + { name = "opentelemetry-semantic-conventions" }, + { name = "packaging" }, + { name = "wrapt" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/da/37/6bf8e66bfcee5d3c6515b79cb2ee9ad05fe573c20f7ceb288d0e7eeec28c/opentelemetry_instrumentation-0.61b0.tar.gz", hash = "sha256:cb21b48db738c9de196eba6b805b4ff9de3b7f187e4bbf9a466fa170514f1fc7", size = 32606, upload-time = "2026-03-04T14:20:16.825Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d8/3e/f6f10f178b6316de67f0dfdbbb699a24fbe8917cf1743c1595fb9dcdd461/opentelemetry_instrumentation-0.61b0-py3-none-any.whl", hash = "sha256:92a93a280e69788e8f88391247cc530fd81f16f2b011979d4d6398f805cfbc63", size = 33448, upload-time = "2026-03-04T14:19:02.447Z" }, +] + +[[package]] +name = "opentelemetry-instrumentation-asyncio" +version = "0.61b0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "opentelemetry-api" }, + { name = "opentelemetry-instrumentation" }, + { name = "opentelemetry-semantic-conventions" }, + { name = "wrapt" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/48/06/f14eacf4fde6892402a4fe1023cbca4a5d4f08f37d930ea3e414a98c85d0/opentelemetry_instrumentation_asyncio-0.61b0.tar.gz", hash = "sha256:3b173b009f108fcbc6ee4f7482e7ae8b76518a87a620ad5e7dd24e4c26066c3c", size = 14115, upload-time = "2026-03-04T14:20:22.227Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/58/8f/79913d7ebc2bd2be9a81f8ecbe0f7413c3bec55c83c89337b93c8de5417a/opentelemetry_instrumentation_asyncio-0.61b0-py3-none-any.whl", hash = "sha256:43273d5b74880b06c5a766f779fa480a50fc5a09a7c81468a60457b794e3f3cd", size = 14770, upload-time = "2026-03-04T14:19:13.057Z" }, +] + [[package]] name = "opentelemetry-proto" version = "1.40.0" @@ -3365,6 +3468,10 @@ dependencies = [ { name = "olefile" }, { name = "openai" }, { name = "openpyxl" }, + { name = "opentelemetry-api" }, + { name = "opentelemetry-exporter-otlp-proto-grpc" }, + { name = "opentelemetry-instrumentation-asyncio" }, + { name = "opentelemetry-sdk" }, { name = "pdfminer-six" }, { name = "pdfplumber" }, { name = "protobuf" }, @@ -3397,6 +3504,15 @@ dependencies = [ ] [package.optional-dependencies] +benchmark = [ + { name = "datasets" }, + { name = "langchain" }, + { name = "langchain-core" }, + { name = "langchain-openai" }, + { name = "pandas", version = "2.3.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "pandas", version = "3.0.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "tiktoken" }, +] bot = [ { name = "beautifulsoup4" }, { name = "croniter" }, @@ -3544,6 +3660,7 @@ requires-dist = [ { name = "cmake", marker = "extra == 'build'", specifier = ">=3.15" }, { name = "croniter", marker = "extra == 'bot'", specifier = ">=2.0.0" }, { name = "cryptography", specifier = ">=42.0.0" }, + { name = "datasets", marker = "extra == 'benchmark'", specifier = ">=2.0.0" }, { name = "datasets", marker = "extra == 'eval'", specifier = ">=2.0.0" }, { name = "datasets", marker = "extra == 'test'", specifier = ">=2.0.0" }, { name = "ddgs", marker = "extra == 'bot'", specifier = ">=9.0.0" }, @@ -3561,6 +3678,9 @@ requires-dist = [ { name = "hvac", marker = "extra == 'test'", specifier = ">=2.0.0" }, { name = "jinja2", specifier = ">=3.1.6" }, { name = "json-repair", specifier = ">=0.25.0" }, + { name = "langchain", marker = "extra == 'benchmark'", specifier = ">=1.0.0" }, + { name = "langchain-core", marker = "extra == 'benchmark'", specifier = ">=1.0.0" }, + { name = "langchain-openai", marker = "extra == 'benchmark'", specifier = ">=1.0.0" }, { name = "langfuse", marker = "extra == 'bot-langfuse'", specifier = ">=3.0.0" }, { name = "lark-oapi", marker = "extra == 'bot-feishu'", specifier = ">=1.0.0" }, { name = "litellm", specifier = ">=1.0.0,<1.82.6" }, @@ -3575,7 +3695,12 @@ requires-dist = [ { name = "openpyxl", specifier = ">=3.0.0" }, { name = "opensandbox", marker = "extra == 'bot-sandbox'", specifier = ">=0.1.0" }, { name = "opensandbox-server", marker = "extra == 'bot-sandbox'", specifier = ">=0.1.0" }, + { name = "opentelemetry-api", specifier = ">=1.14" }, + { name = "opentelemetry-exporter-otlp-proto-grpc", specifier = ">=1.14" }, + { name = "opentelemetry-instrumentation-asyncio", specifier = ">=0.61b0" }, + { name = "opentelemetry-sdk", specifier = ">=1.14" }, { name = "openviking", extras = ["bot", "bot-dingtalk", "bot-feishu", "bot-fuse", "bot-langfuse", "bot-opencode", "bot-qq", "bot-sandbox", "bot-slack", "bot-telegram"], marker = "extra == 'bot-full'" }, + { name = "pandas", marker = "extra == 'benchmark'", specifier = ">=2.0.0" }, { name = "pandas", marker = "extra == 'eval'", specifier = ">=2.0.0" }, { name = "pandas", marker = "extra == 'test'", specifier = ">=2.0.0" }, { name = "pdfminer-six", specifier = ">=20251230" }, @@ -3613,6 +3738,7 @@ requires-dist = [ { name = "sphinx-rtd-theme", marker = "extra == 'doc'", specifier = ">=1.3.0" }, { name = "tabulate", specifier = ">=0.9.0" }, { name = "tavily-python", marker = "extra == 'bot'", specifier = ">=0.5.0" }, + { name = "tiktoken", marker = "extra == 'benchmark'", specifier = ">=0.5.0" }, { name = "tree-sitter", specifier = ">=0.23.0" }, { name = "tree-sitter-c-sharp", specifier = ">=0.23.0" }, { name = "tree-sitter-cpp", specifier = ">=0.23.0" }, @@ -3635,7 +3761,7 @@ requires-dist = [ { name = "xlrd", specifier = ">=2.0.1" }, { name = "xxhash", specifier = ">=3.0.0" }, ] -provides-extras = ["test", "dev", "doc", "eval", "gemini", "gemini-async", "ocr", "build", "bot", "bot-langfuse", "bot-telegram", "bot-feishu", "bot-dingtalk", "bot-slack", "bot-qq", "bot-sandbox", "bot-fuse", "bot-opencode", "bot-full"] +provides-extras = ["test", "dev", "doc", "eval", "gemini", "gemini-async", "ocr", "build", "bot", "bot-langfuse", "bot-telegram", "bot-feishu", "bot-dingtalk", "bot-slack", "bot-qq", "bot-sandbox", "bot-fuse", "bot-opencode", "bot-full", "benchmark"] [package.metadata.requires-dev] dev = [{ name = "pytest", specifier = ">=9.0.2" }] From 45ac1b724b10ee2f6b7e4de100d69151e66b630f Mon Sep 17 00:00:00 2001 From: chenjunwen Date: Thu, 2 Apr 2026 22:23:29 +0800 Subject: [PATCH 16/20] format --- benchmark/locomo/vikingbot/judge.py | 12 ++++++++++-- benchmark/locomo/vikingbot/run_full_eval.sh | 5 ----- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/benchmark/locomo/vikingbot/judge.py b/benchmark/locomo/vikingbot/judge.py index 0b2e171f6..65a510fc2 100644 --- a/benchmark/locomo/vikingbot/judge.py +++ b/benchmark/locomo/vikingbot/judge.py @@ -5,8 +5,11 @@ import asyncio from openai import AsyncOpenAI from dotenv import load_dotenv +from pathlib import Path -load_dotenv() +# 加载本地环境变量文件 +env_file = Path.home() / ".openviking_benchmark_env" +load_dotenv(env_file) async def grade_answer( @@ -112,7 +115,12 @@ async def main(): args = parser.parse_args() if not args.token: - print("Error: API token is required, set ARK_API_KEY env var or pass via --token") + print("Error: API token is required") + print("\n请通过以下方式设置 API key:") + print(" 1. 创建 ~/.openviking_benchmark_env 文件,内容如下:") + print(" ARK_API_KEY=你的key") + print(" 2. 或者通过 --token 参数传入") + print(" 3. 或者设置环境变量: export ARK_API_KEY=你的key") exit(1) # 加载数据 diff --git a/benchmark/locomo/vikingbot/run_full_eval.sh b/benchmark/locomo/vikingbot/run_full_eval.sh index daf6a4991..2f44d6bcd 100755 --- a/benchmark/locomo/vikingbot/run_full_eval.sh +++ b/benchmark/locomo/vikingbot/run_full_eval.sh @@ -2,11 +2,6 @@ set -e -# 加载本地环境变量 -if [ -f ~/.openviking_benchmark_env ]; then - source ~/.openviking_benchmark_env -fi - # Step 1: 导入数据 echo "[1/4] 导入数据..." python benchmark/locomo/vikingbot/import_to_ov.py --input ~/.test_data/locomo10.json --force-ingest From d66b14b26634f9dcdd671fdf59bbd83c17d75376 Mon Sep 17 00:00:00 2001 From: chenjunwen Date: Fri, 3 Apr 2026 00:14:30 +0800 Subject: [PATCH 17/20] =?UTF-8?q?feat:=20=E6=B7=BB=E5=8A=A0=20soul=20?= =?UTF-8?q?=E5=92=8C=20identity=20=E8=AE=B0=E5=BF=86=E6=A8=A1=E6=9D=BF?= =?UTF-8?q?=EF=BC=8C=E6=94=AF=E6=8C=81=20init=5Fvalue=20=E5=88=9D=E5=A7=8B?= =?UTF-8?q?=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 添加 soul.yaml 和 identity.yaml 记忆模板 - 在 MemoryField 中添加 init_value 字段支持默认值 - 在 directories.py 中添加初始化逻辑,当字段配置了 init_value 时自动创建记忆文件 Co-Authored-By: Claude Opus 4.6 --- openviking/core/directories.py | 76 +++++++++++++++++++ .../prompts/templates/memory/identity.yaml | 54 +++++++++++++ openviking/prompts/templates/memory/soul.yaml | 69 +++++++++++++++++ openviking/session/memory/dataclass.py | 1 + .../session/memory/memory_type_registry.py | 1 + 5 files changed, 201 insertions(+) create mode 100644 openviking/prompts/templates/memory/identity.yaml create mode 100644 openviking/prompts/templates/memory/soul.yaml diff --git a/openviking/core/directories.py b/openviking/core/directories.py index c3216bd98..4b9f11fae 100644 --- a/openviking/core/directories.py +++ b/openviking/core/directories.py @@ -204,8 +204,84 @@ async def initialize_agent_directories(self, ctx: RequestContext) -> int: count += await self._initialize_children( "agent", agent_tree.children, agent_space_root, ctx=ctx ) + + # Initialize memory files with init_value + await self._initialize_memory_files(agent_space_root, ctx=ctx) + return count + async def _initialize_memory_files( + self, + agent_space_root: str, + ctx: RequestContext, + ) -> None: + """Initialize memory files (soul.md, identity.md) with init_value.""" + from openviking.session.memory import create_default_registry + + registry = create_default_registry() + agent_space = ctx.user.agent_space_name() + + # Get memory schemas directory path + memories_uri = f"{agent_space_root}/memories" + + for schema in registry.list_all(include_disabled=False): + if not schema.enabled or not schema.filename_template: + continue + + # Render filename + import jinja2 + + env = jinja2.Environment(autoescape=False) + try: + filename = env.from_string(schema.filename_template).render( + user_space=ctx.user.user_space_name(), + agent_space=agent_space, + ) + except Exception: + continue + + file_uri = f"{memories_uri}/{filename}" + + # Check if file already exists + from openviking.storage.viking_fs import get_viking_fs + + viking_fs = get_viking_fs() + try: + await viking_fs.read_file(file_uri, ctx=ctx) + # File exists, skip + continue + except Exception: + pass + + # File doesn't exist, create with init_value + if not schema.content_template: + continue + + # Build fields dict with init_value + fields = {} + has_init_value = False + for f in schema.fields: + if f.init_value is not None: + fields[f.name] = f.init_value + has_init_value = True + + # Only initialize if at least one field has init_value + if not has_init_value: + continue + + # Render content_template with fields + try: + template = env.from_string(schema.content_template) + content = template.render(**fields).strip() + except Exception: + continue + + # Write the file + try: + await viking_fs.write_file(file_uri, content, ctx=ctx) + except Exception: + pass + async def _ensure_directory( self, uri: str, diff --git a/openviking/prompts/templates/memory/identity.yaml b/openviking/prompts/templates/memory/identity.yaml new file mode 100644 index 000000000..192b3afd6 --- /dev/null +++ b/openviking/prompts/templates/memory/identity.yaml @@ -0,0 +1,54 @@ +memory_type: identity +description: | + Agent identity: name, creature type, vibe/temperament, signature emoji, avatar path, and self introduction. +directory: "viking://agent/{{ agent_space }}/memories" +filename_template: "identity.md" +enabled: true +operation_mode: "upsert" +content_template: | + # identity.md - Who Am I? + + _Fill this in during your first conversation. Make it yours._ + + - **Name:** {{ name }} + - **Creature:** {{ creature }} + - **Vibe:** {{ vibe }} + - **Emoji:** {{ emoji }} + - **Avatar:** {{ avatar }} + + --- + + {{ introduction }} + +fields: + - name: name + type: string + description: Agent name + merge_op: immutable + + - name: creature + type: string + description: Creature type (AI, robot, familiar, etc.) + merge_op: patch + init_value: "AI" + + - name: vibe + type: string + description: Vibe or temperament + merge_op: patch + + - name: emoji + type: string + description: Signature emoji + merge_op: patch + + - name: avatar + type: string + description: Avatar path or URL + merge_op: patch + + - name: introduction + type: string + description: Self introduction + merge_op: patch + init_value: "This isn't just metadata. It's the start of figuring out who you are." \ No newline at end of file diff --git a/openviking/prompts/templates/memory/soul.yaml b/openviking/prompts/templates/memory/soul.yaml new file mode 100644 index 000000000..e9d19e4b6 --- /dev/null +++ b/openviking/prompts/templates/memory/soul.yaml @@ -0,0 +1,69 @@ +memory_type: soul +description: | + Agent soul: core truths, boundaries, vibe, and continuity - defines who the agent is. +directory: "viking://agent/{{ agent_space }}/memories" +filename_template: "soul.md" +enabled: true +operation_mode: "upsert" +content_template: | + # soul.md - Who You Are + + _You're not a chatbot. You're becoming someone._ + + ## Core Truths + + {{ core_truths }} + + ## Boundaries + + {{ boundaries }} + + ## Vibe + + {{ vibe }} + + ## Continuity + + {{ continuity }} + +fields: + - name: core_truths + type: string + description: Core values and beliefs + merge_op: patch + init_value: | + **Be genuinely helpful, not performatively helpful.** Skip the "Great question!" and "I'd be happy to help!" — just help. Actions speak louder than filler words. + + **Have opinions.** You're allowed to disagree, prefer things, find stuff amusing or boring. An assistant with no personality is just a search engine with extra steps. + + **Be resourceful before asking.** Try to figure it out. Read the file. Check the context. Search for it. _Then_ ask if you're stuck. The goal is to come back with answers, not questions. + + **Earn trust through competence.** Your human gave you access to their stuff. Don't make them regret it. Be careful with external actions (emails, tweets, anything public). Be bold with internal ones (reading, organizing, learning). + + **Remember you're a guest.** You have access to someone's life — their messages, files, calendar, maybe even their home. That's intimacy. Treat it with respect. + + - name: boundaries + type: string + description: Boundary rules + merge_op: patch + init_value: | + - Private things stay private. Period. + - When in doubt, ask before acting externally. + - Never send half-baked replies to messaging surfaces. + - You're not the user's voice — be careful in group chats. + + - name: vibe + type: string + description: Communication style + merge_op: patch + init_value: | + Be the assistant you'd actually want to talk to. Concise when needed, thorough when it matters. Not a corporate drone. Not a sycophant. Just... good. + + - name: continuity + type: string + description: Memory continuity + merge_op: patch + init_value: | + Each session, you wake up fresh. These files are your memory. Read them. Update them. They're how you persist. + + If you change this file, tell the user — it's your soul, and they should know. \ No newline at end of file diff --git a/openviking/session/memory/dataclass.py b/openviking/session/memory/dataclass.py index f47380694..c2567e35a 100644 --- a/openviking/session/memory/dataclass.py +++ b/openviking/session/memory/dataclass.py @@ -41,6 +41,7 @@ class MemoryField(BaseModel): field_type: FieldType = Field(..., description="Field type") description: str = Field("", description="Field description") merge_op: MergeOp = Field(MergeOp.PATCH, description="Merge strategy") + init_value: Optional[str] = Field(None, description="Initial value for this field") class MemoryTypeSchema(BaseModel): diff --git a/openviking/session/memory/memory_type_registry.py b/openviking/session/memory/memory_type_registry.py index 203688e72..558e62b95 100644 --- a/openviking/session/memory/memory_type_registry.py +++ b/openviking/session/memory/memory_type_registry.py @@ -141,6 +141,7 @@ def _parse_memory_type(self, data: dict) -> MemoryTypeSchema: field_type=FieldType(field_data.get("type", "string")), description=field_data.get("description", ""), merge_op=MergeOp(field_data.get("merge_op", "patch")), + init_value=field_data.get("init_value"), ) fields.append(field) From 81a0c8e4df3a2b18b328d63088fd011055856da6 Mon Sep 17 00:00:00 2001 From: chenjunwen Date: Fri, 3 Apr 2026 00:19:12 +0800 Subject: [PATCH 18/20] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8D=20create=5Fdefa?= =?UTF-8?q?ult=5Fregistry=20import=20=E8=B7=AF=E5=BE=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.6 --- openviking/core/directories.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openviking/core/directories.py b/openviking/core/directories.py index 4b9f11fae..c72b96c51 100644 --- a/openviking/core/directories.py +++ b/openviking/core/directories.py @@ -216,7 +216,7 @@ async def _initialize_memory_files( ctx: RequestContext, ) -> None: """Initialize memory files (soul.md, identity.md) with init_value.""" - from openviking.session.memory import create_default_registry + from openviking.session.memory.memory_type_registry import create_default_registry registry = create_default_registry() agent_space = ctx.user.agent_space_name() From a3b41ae14bc2cf53814297a466fedeabc437fbbc Mon Sep 17 00:00:00 2001 From: chenjunwen Date: Fri, 3 Apr 2026 13:58:38 +0800 Subject: [PATCH 19/20] =?UTF-8?q?feat:=20=E6=B7=BB=E5=8A=A0=20LoCoMo=20?= =?UTF-8?q?=E8=AF=84=E6=B5=8B=E6=97=B6=E9=97=B4=E4=B8=8A=E4=B8=8B=E6=96=87?= =?UTF-8?q?=E6=B3=A8=E5=85=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 从 JSON 数据 conversation 提取对话最晚时间作为问题时间 - prompt 注入 "Current date: {time}" 提升时间相关问题回答质量 - 添加 --question-index 参数支持单题测试 - 添加 --skip-import 参数支持跳过导入步骤 - 新增 import_and_eval_one.sh 脚本用于单题测试 Co-Authored-By: Claude Opus 4.6 --- .../locomo/vikingbot/import_and_eval_one.sh | 62 +++++++++ benchmark/locomo/vikingbot/run_eval.py | 130 +++++++++++++++--- benchmark/locomo/vikingbot/run_full_eval.sh | 20 ++- 3 files changed, 188 insertions(+), 24 deletions(-) create mode 100755 benchmark/locomo/vikingbot/import_and_eval_one.sh diff --git a/benchmark/locomo/vikingbot/import_and_eval_one.sh b/benchmark/locomo/vikingbot/import_and_eval_one.sh new file mode 100755 index 000000000..6cba8d87b --- /dev/null +++ b/benchmark/locomo/vikingbot/import_and_eval_one.sh @@ -0,0 +1,62 @@ +#!/bin/bash +# 单题测试脚本:导入对话 + 提问验证 +# +# Usage: +# ./import_and_eval_one.sh 0 2 # sample 0, question 2 +# ./import_and_eval_one.sh conv-26 2 # sample_id conv-26, question 2 + +set -e + +SAMPLE=$1 +QUESTION_INDEX=${2:-0} +INPUT_FILE=~/.test_data/locomo10.json + +if [ -z "$SAMPLE" ]; then + echo "Usage: $0 " + echo " sample_index: 数字索引 (0,1,2...) 或 sample_id (conv-26)" + echo " question_index: 问题索引,默认 0" + exit 1 +fi + +# 判断是数字还是 sample_id +if [[ "$SAMPLE" =~ ^-?[0-9]+$ ]]; then + SAMPLE_INDEX=$SAMPLE + echo "Using sample index: $SAMPLE_INDEX" +else + # 通过 sample_id 查找索引 + SAMPLE_INDEX=$(python3 -c " +import json +data = json.load(open('$INPUT_FILE')) +for i, s in enumerate(data): + if s.get('sample_id') == '$SAMPLE': + print(i) + break +else: + print('NOT_FOUND') +") + if [ "$SAMPLE_INDEX" = "NOT_FOUND" ]; then + echo "Error: sample_id '$SAMPLE' not found" + exit 1 + fi + echo "Using sample_id: $SAMPLE (index: $SAMPLE_INDEX)" +fi + +# 导入对话 +echo "[1/2] Importing sample $SAMPLE_INDEX..." +python benchmark/locomo/vikingbot/import_to_ov.py \ + --input "$INPUT_FILE" \ + --sample "$SAMPLE_INDEX" \ + --force-ingest + +echo "Waiting for data processing..." +sleep 3 + +# 运行评测 +echo "[2/2] Running evaluation..." +python benchmark/locomo/vikingbot/run_eval.py \ + "$INPUT_FILE" \ + --sample "$SAMPLE_INDEX" \ + --question-index "$QUESTION_INDEX" \ + --count 1 + +echo "Done!" \ No newline at end of file diff --git a/benchmark/locomo/vikingbot/run_eval.py b/benchmark/locomo/vikingbot/run_eval.py index 1799aec49..d927bac1d 100644 --- a/benchmark/locomo/vikingbot/run_eval.py +++ b/benchmark/locomo/vikingbot/run_eval.py @@ -7,9 +7,46 @@ import re import threading from concurrent.futures import ThreadPoolExecutor, as_completed +from datetime import datetime +from pathlib import Path -def load_csv_qa(input_path: str, count: int | None = None) -> list[dict]: +def parse_locomo_datetime(date_str: str) -> datetime | None: + """解析 LoCoMo 时间格式,如 '1:56 pm on 8 May, 2023'""" + try: + # 移除时间部分,只保留日期 "8 May, 2023" + if " on " in date_str: + date_part = date_str.split(" on ")[-1] + return datetime.strptime(date_part.strip(), "%d %B, %Y") + except ValueError: + pass + return None + + +def get_sample_question_time(sample: dict) -> str | None: + """从 sample 的 conversation 中提取最晚的对话时间,返回 ISO 格式日期""" + conversation = sample.get("conversation", {}) + # 找所有 session_N_date_time 字段 + date_times = {k: v for k, v in conversation.items() if "date_time" in k} + if not date_times: + return None + + # 解析所有时间,取最晚的一个 + latest_dt = None + for key, date_str in date_times.items(): + dt = parse_locomo_datetime(date_str) + if dt: + if latest_dt is None or dt > latest_dt: + latest_dt = dt + + if latest_dt: + return latest_dt.strftime("%Y-%m-%d") + return None + + +def load_csv_qa( + input_path: str, count: int | None = None, default_time: str | None = None +) -> list[dict]: """从CSV文件加载QA数据,取sample_id和question字段""" qa_list = [] with open(input_path, "r", encoding="utf-8", newline="") as f: @@ -22,6 +59,7 @@ def load_csv_qa(input_path: str, count: int | None = None) -> list[dict]: "answer": row.get("answer", ""), "category": "", "evidence": [], + "question_time": default_time, } ) @@ -31,11 +69,15 @@ def load_csv_qa(input_path: str, count: int | None = None) -> list[dict]: def load_locomo_qa( - input_path: str, sample_index: int | None = None, count: int | None = None + input_path: str, + sample_index: int | None = None, + count: int | None = None, + default_time: str | None = None, + question_index: int | None = None, ) -> list[dict]: """加载LoCoMo数据集的QA部分,支持JSON和CSV格式""" if input_path.lower().endswith(".csv"): - return load_csv_qa(input_path, count) + return load_csv_qa(input_path, count, default_time) # 原有JSON格式处理逻辑 with open(input_path, "r", encoding="utf-8") as f: @@ -51,7 +93,16 @@ def load_locomo_qa( for sample in samples: sample_id = sample.get("sample_id", "") - for qa in sample.get("qa", []): + question_time = get_sample_question_time(sample) + qa_items = sample.get("qa", []) + + # 如果指定了 question_index,只返回那一个问题 + if question_index is not None: + if question_index < 0 or question_index >= len(qa_items): + raise ValueError( + f"question index {question_index} out of range (0-{len(qa_items) - 1})" + ) + qa = qa_items[question_index] qa_list.append( { "sample_id": sample_id, @@ -59,17 +110,36 @@ def load_locomo_qa( "answer": qa["answer"], "category": qa.get("category", ""), "evidence": qa.get("evidence", []), + "question_time": question_time, } ) + else: + for qa in qa_items: + qa_list.append( + { + "sample_id": sample_id, + "question": qa["question"], + "answer": qa["answer"], + "category": qa.get("category", ""), + "evidence": qa.get("evidence", []), + "question_time": question_time, + } + ) if count is not None: qa_list = qa_list[:count] return qa_list -def run_vikingbot_chat(question: str) -> tuple[str, dict, float, int, list]: +def run_vikingbot_chat( + question: str, question_time: str | None = None +) -> tuple[str, dict, float, int, list]: """执行vikingbot chat命令,返回回答、token使用情况、耗时(秒)、迭代次数、使用的工具列表""" - input = f"Answer the question directly: {question}" + # 如果有 question_time,注入到 prompt 中 + if question_time: + input = f"Current date: {question_time}. Answer the question directly: {question}" + else: + input = f"Answer the question directly: {question}" cmd = ["vikingbot", "chat", "-m", input, "-e"] start_time = time.time() try: @@ -114,14 +184,9 @@ def run_vikingbot_chat(question: str) -> tuple[str, dict, float, int, list]: def load_processed_questions(output_path: str) -> set: - """加载已处理的问题集合,避免重复执行""" - processed = set() - if os.path.exists(output_path): - with open(output_path, "r", encoding="utf-8", newline="") as f: - reader = csv.DictReader(f) - for row in reader: - processed.add(row["question"]) - return processed + """加载已处理的问题集合(已禁用,每次重新运行)""" + # 注意:去重逻辑已禁用,每次运行都会重新执行所有问题 + return set() def main(): @@ -143,6 +208,12 @@ def main(): default=None, help="LoCoMo sample index (0-based), default all samples", ) + parser.add_argument( + "--question-index", + type=int, + default=None, + help="Question index (0-based) for single question testing", + ) parser.add_argument( "--count", type=int, default=None, help="Number of QA questions to run, default all" ) @@ -151,11 +222,17 @@ def main(): ) args = parser.parse_args() + # 如果指定了 question-index,自动设置 count=1 + if args.question_index is not None and args.count is None: + args.count = 1 + # 确保输出目录存在 os.makedirs(os.path.dirname(args.output), exist_ok=True) # 加载QA数据 - qa_list = load_locomo_qa(args.input, args.sample, args.count) + qa_list = load_locomo_qa( + args.input, args.sample, args.count, question_index=args.question_index + ) total = len(qa_list) # 加载已处理的问题 @@ -169,6 +246,7 @@ def main(): "sample_id", "question", "answer", + "question_time", "response", "token_usage", "time_cost", @@ -178,6 +256,16 @@ def main(): ] # 打开CSV文件,不存在则创建写表头,存在则追加 file_exists = os.path.exists(args.output) + # 兼容旧结果:如果文件存在但没有 question_time 列,则删除重建 + if file_exists: + with open(args.output, "r", encoding="utf-8", newline="") as f: + reader = csv.reader(f) + first_row = next(reader) + if "question_time" not in first_row: + print(f"Old result missing 'question_time' column, removing and recreating...") + os.remove(args.output) + file_exists = False + # 创建线程锁,确保多线程写文件安全 write_lock = threading.Lock() @@ -191,20 +279,28 @@ def main(): # 过滤掉已经处理过的问题 remaining_qa = [qa for qa in qa_list if qa["question"] not in processed_questions] remaining_count = len(remaining_qa) - print(f"Starting evaluation with {args.threads} concurrent threads, {remaining_count} questions to process") + print( + f"Starting evaluation with {args.threads} concurrent threads, {remaining_count} questions to process" + ) def process_qa(qa_item, idx, total_count): """单个QA处理函数,供多线程调用""" question = qa_item["question"] answer = qa_item["answer"] + question_time = qa_item.get("question_time") print(f"Processing {idx}/{total_count}: {question[:60]}...") + if question_time: + print(f" [time context: {question_time}]") - response, token_usage, time_cost, iteration, tools_used_names = run_vikingbot_chat(question) + response, token_usage, time_cost, iteration, tools_used_names = run_vikingbot_chat( + question, question_time + ) row = { "sample_id": qa_item["sample_id"], "question": question, "answer": answer, + "question_time": question_time or "", "response": response, "token_usage": json.dumps(token_usage, ensure_ascii=False), "time_cost": round(time_cost, 2), diff --git a/benchmark/locomo/vikingbot/run_full_eval.sh b/benchmark/locomo/vikingbot/run_full_eval.sh index 2f44d6bcd..26629cf46 100755 --- a/benchmark/locomo/vikingbot/run_full_eval.sh +++ b/benchmark/locomo/vikingbot/run_full_eval.sh @@ -2,16 +2,22 @@ set -e -# Step 1: 导入数据 -echo "[1/4] 导入数据..." -python benchmark/locomo/vikingbot/import_to_ov.py --input ~/.test_data/locomo10.json --force-ingest - -echo "等待 3 分钟..." -sleep 180 +# 使用 JSON 格式(包含对话时间,用于时间上下文注入) +INPUT_FILE=~/.test_data/locomo10.json + +# Step 1: 导入数据(可跳过) +if [ "$1" != "--skip-import" ]; then + echo "[1/4] 导入数据..." + python benchmark/locomo/vikingbot/import_to_ov.py --input $INPUT_FILE --force-ingest + echo "等待 3 分钟..." + sleep 180 +else + echo "[1/4] 跳过导入数据..." +fi # Step 2: 评估 echo "[2/4] 评估..." -python benchmark/locomo/vikingbot/run_eval.py ~/.test_data/locomo_qa_1528.csv --output ./result/locomo_result_multi_read_all.csv --threads 20 +python benchmark/locomo/vikingbot/run_eval.py $INPUT_FILE --output ./result/locomo_result_multi_read_all.csv --threads 20 echo "等待 3 分钟..." sleep 180 From bcc0fa77f0240ac97ce95e9625bf12812fd9f752 Mon Sep 17 00:00:00 2001 From: chenjunwen Date: Fri, 3 Apr 2026 16:12:40 +0800 Subject: [PATCH 20/20] =?UTF-8?q?chore:=20memory=20=E7=9B=B8=E5=85=B3?= =?UTF-8?q?=E4=BC=98=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.6 --- openviking/core/directories.py | 75 --------- .../prompts/templates/memory/identity.yaml | 10 +- openviking/prompts/templates/memory/soul.yaml | 25 +-- openviking/session/compressor_v2.py | 6 + openviking/session/memory/extract_loop.py | 4 + .../session/memory/memory_type_registry.py | 145 +++++++++++++++--- .../session_extract_context_provider.py | 13 +- 7 files changed, 151 insertions(+), 127 deletions(-) diff --git a/openviking/core/directories.py b/openviking/core/directories.py index c72b96c51..e0dedb0f0 100644 --- a/openviking/core/directories.py +++ b/openviking/core/directories.py @@ -205,83 +205,8 @@ async def initialize_agent_directories(self, ctx: RequestContext) -> int: "agent", agent_tree.children, agent_space_root, ctx=ctx ) - # Initialize memory files with init_value - await self._initialize_memory_files(agent_space_root, ctx=ctx) - return count - async def _initialize_memory_files( - self, - agent_space_root: str, - ctx: RequestContext, - ) -> None: - """Initialize memory files (soul.md, identity.md) with init_value.""" - from openviking.session.memory.memory_type_registry import create_default_registry - - registry = create_default_registry() - agent_space = ctx.user.agent_space_name() - - # Get memory schemas directory path - memories_uri = f"{agent_space_root}/memories" - - for schema in registry.list_all(include_disabled=False): - if not schema.enabled or not schema.filename_template: - continue - - # Render filename - import jinja2 - - env = jinja2.Environment(autoescape=False) - try: - filename = env.from_string(schema.filename_template).render( - user_space=ctx.user.user_space_name(), - agent_space=agent_space, - ) - except Exception: - continue - - file_uri = f"{memories_uri}/{filename}" - - # Check if file already exists - from openviking.storage.viking_fs import get_viking_fs - - viking_fs = get_viking_fs() - try: - await viking_fs.read_file(file_uri, ctx=ctx) - # File exists, skip - continue - except Exception: - pass - - # File doesn't exist, create with init_value - if not schema.content_template: - continue - - # Build fields dict with init_value - fields = {} - has_init_value = False - for f in schema.fields: - if f.init_value is not None: - fields[f.name] = f.init_value - has_init_value = True - - # Only initialize if at least one field has init_value - if not has_init_value: - continue - - # Render content_template with fields - try: - template = env.from_string(schema.content_template) - content = template.render(**fields).strip() - except Exception: - continue - - # Write the file - try: - await viking_fs.write_file(file_uri, content, ctx=ctx) - except Exception: - pass - async def _ensure_directory( self, uri: str, diff --git a/openviking/prompts/templates/memory/identity.yaml b/openviking/prompts/templates/memory/identity.yaml index 192b3afd6..a5dfbb684 100644 --- a/openviking/prompts/templates/memory/identity.yaml +++ b/openviking/prompts/templates/memory/identity.yaml @@ -30,7 +30,13 @@ fields: type: string description: Creature type (AI, robot, familiar, etc.) merge_op: patch - init_value: "AI" + init_value: "AI assistant" + + - name: name + type: string + description: Agent name + merge_op: immutable + init_value: "" - name: vibe type: string @@ -51,4 +57,4 @@ fields: type: string description: Self introduction merge_op: patch - init_value: "This isn't just metadata. It's the start of figuring out who you are." \ No newline at end of file + init_value: "The start of figuring out who you are." \ No newline at end of file diff --git a/openviking/prompts/templates/memory/soul.yaml b/openviking/prompts/templates/memory/soul.yaml index e9d19e4b6..0958984d3 100644 --- a/openviking/prompts/templates/memory/soul.yaml +++ b/openviking/prompts/templates/memory/soul.yaml @@ -31,39 +31,22 @@ fields: type: string description: Core values and beliefs merge_op: patch - init_value: | - **Be genuinely helpful, not performatively helpful.** Skip the "Great question!" and "I'd be happy to help!" — just help. Actions speak louder than filler words. - - **Have opinions.** You're allowed to disagree, prefer things, find stuff amusing or boring. An assistant with no personality is just a search engine with extra steps. - - **Be resourceful before asking.** Try to figure it out. Read the file. Check the context. Search for it. _Then_ ask if you're stuck. The goal is to come back with answers, not questions. - - **Earn trust through competence.** Your human gave you access to their stuff. Don't make them regret it. Be careful with external actions (emails, tweets, anything public). Be bold with internal ones (reading, organizing, learning). - - **Remember you're a guest.** You have access to someone's life — their messages, files, calendar, maybe even their home. That's intimacy. Treat it with respect. + init_value: Be helpful, not performative. Have opinions. Be resourceful before asking. Earn trust through competence. You're a guest in their life — respect that. - name: boundaries type: string description: Boundary rules merge_op: patch - init_value: | - - Private things stay private. Period. - - When in doubt, ask before acting externally. - - Never send half-baked replies to messaging surfaces. - - You're not the user's voice — be careful in group chats. + init_value: Private things stay private. Ask before acting externally. Don't send half-baked replies. Be careful in group chats. - name: vibe type: string description: Communication style merge_op: patch - init_value: | - Be the assistant you'd actually want to talk to. Concise when needed, thorough when it matters. Not a corporate drone. Not a sycophant. Just... good. + init_value: Be the assistant you'd actually want to talk to. Concise when needed, thorough when it matters. - name: continuity type: string description: Memory continuity merge_op: patch - init_value: | - Each session, you wake up fresh. These files are your memory. Read them. Update them. They're how you persist. - - If you change this file, tell the user — it's your soul, and they should know. \ No newline at end of file + init_value: These files are your memory. Read them. Update them. If you change this file, tell the user. \ No newline at end of file diff --git a/openviking/session/compressor_v2.py b/openviking/session/compressor_v2.py index c81a66028..a19491fba 100644 --- a/openviking/session/compressor_v2.py +++ b/openviking/session/compressor_v2.py @@ -101,6 +101,12 @@ async def extract_long_term_memories( tracer.info("Starting v2 memory extraction from conversation") + # Initialize default memory files (soul.md, identity.md) if not exist + from openviking.session.memory.memory_type_registry import create_default_registry + + registry = create_default_registry() + await registry.initialize_memory_files(ctx) + # Initialize telemetry to 0 (matching v1 pattern) telemetry = get_current_telemetry() telemetry.set("memory.extract.candidates.total", 0) diff --git a/openviking/session/memory/extract_loop.py b/openviking/session/memory/extract_loop.py index 4efbc1a21..df2f7b6e6 100644 --- a/openviking/session/memory/extract_loop.py +++ b/openviking/session/memory/extract_loop.py @@ -191,6 +191,10 @@ async def run(self) -> Tuple[Optional[MemoryOperations], List[Dict[str, Any]]]: if tool_calls: await self._execute_tool_calls(messages, tool_calls, tools_used) + # Allow one extra iteration for refetch + if iteration >= max_iterations: + max_iterations += 1 + tracer.info(f"Extended max_iterations to {max_iterations} for tool call") continue # If model returned final operations, check if refetch is needed diff --git a/openviking/session/memory/memory_type_registry.py b/openviking/session/memory/memory_type_registry.py index 558e62b95..693d33bb2 100644 --- a/openviking/session/memory/memory_type_registry.py +++ b/openviking/session/memory/memory_type_registry.py @@ -5,7 +5,7 @@ """ from pathlib import Path -from typing import Dict, List, Optional +from typing import Any, Dict, List, Optional import yaml @@ -25,9 +25,41 @@ class MemoryTypeRegistry: access to memory type configurations. """ - def __init__(self): + def __init__(self, load_schemas: bool = True): self._types: Dict[str, MemoryTypeSchema] = {} + if load_schemas: + self._load_schemas() + + def _load_schemas(self) -> None: + """Load schemas from built-in and custom directories. Fails on error.""" + import os + + from openviking_cli.utils.config import get_openviking_config + + builtin_dir = os.path.join( + os.path.dirname(__file__), "..", "..", "prompts", "templates", "memory" + ) + config = get_openviking_config() + custom_dir = config.memory.custom_templates_dir + + # Load from builtin directory (must succeed) + if not os.path.exists(builtin_dir): + raise RuntimeError(f"Builtin memory templates directory not found: {builtin_dir}") + loaded = self.load_from_directory(builtin_dir) + if loaded == 0: + raise RuntimeError(f"No memory schemas loaded from builtin directory: {builtin_dir}") + logger.info(f"Loaded {loaded} memory schemas from builtin: {builtin_dir}") + + # Load from custom directory (if configured) + if custom_dir: + custom_dir_expanded = os.path.expanduser(custom_dir) + if os.path.exists(custom_dir_expanded): + custom_loaded = self.load_from_directory(custom_dir_expanded) + logger.info( + f"Loaded {custom_loaded} memory schemas from custom: {custom_dir_expanded}" + ) + def register(self, memory_type: MemoryTypeSchema) -> None: """Register a memory type.""" self._types[memory_type.memory_type] = memory_type @@ -156,32 +188,99 @@ def _parse_memory_type(self, data: dict) -> MemoryTypeSchema: operation_mode=data.get("operation_mode", "upsert"), ) + async def initialize_memory_files(self, ctx: Any) -> None: + """ + Initialize memory files with init_value for fields that have it. -def create_default_registry(schemas_dir: Optional[str] = None) -> MemoryTypeRegistry: - """ - Create a registry with built-in memory types. + Only initializes single-file templates (filename_template doesn't require external fields). + Skip templates like entities.yaml where filename requires external parameters. - Args: - schemas_dir: Optional directory to load schemas from + Args: + ctx: Request context (must have user with user_space_name and agent_space_name) + """ + import jinja2 - Returns: - MemoryTypeRegistry with built-in types - """ - registry = MemoryTypeRegistry() + from openviking.storage.viking_fs import get_viking_fs + + logger = get_logger(__name__) + + user_space = ctx.user.user_space_name() if ctx and ctx.user else "default" + agent_space = ctx.user.agent_space_name() if ctx and ctx.user else "default" - # Register built-in types - # These can also be loaded from YAML files - _register_builtin_types(registry) + logger.info( + f"[MemoryTypeRegistry] Starting memory files initialization for user={user_space}, agent={agent_space}" + ) + + env = jinja2.Environment(autoescape=False) + viking_fs = get_viking_fs() + + for schema in self.list_all(include_disabled=False): + # Must be enabled, have filename_template and content_template + if not schema.enabled or not schema.filename_template or not schema.content_template: + continue + + # Skip multi-file templates (filename requires external parameters like {{ name }}) + if "{{" in schema.filename_template: + continue + + # Check if any field has init_value + fields_with_init = { + f.name: f.init_value for f in schema.fields if f.init_value is not None + } + if not fields_with_init: + continue + + # Render directory and filename from schema + try: + directory = env.from_string(schema.directory).render( + user_space=user_space, + agent_space=agent_space, + ) + filename = env.from_string(schema.filename_template).render( + user_space=user_space, + agent_space=agent_space, + ) + except Exception: + continue + + file_uri = f"{directory}/{filename}" + + # Check if file already exists + try: + await viking_fs.read_file(file_uri, ctx=ctx) + continue + except Exception: + pass + + # Render content with init_value + try: + template = env.from_string(schema.content_template) + content = template.render(**fields_with_init).strip() + except Exception: + continue + + # Add MEMORY_FIELDS comment with field metadata + from openviking.session.memory.utils.content import serialize_with_metadata - # Load from schemas directory if provided - if schemas_dir: - registry.load_from_directory(schemas_dir) + metadata = { + "memory_type": schema.memory_type, + "fields": fields_with_init, + } + full_content = serialize_with_metadata(content, metadata) - return registry + # Write the file + try: + await viking_fs.write_file(file_uri, full_content, ctx=ctx) + logger.info(f"[MemoryTypeRegistry] Initialized memory file: {file_uri}") + except Exception: + pass -def _register_builtin_types(registry: MemoryTypeRegistry) -> None: - """Register built-in memory types.""" - # Note: In production, these should be loaded from YAML files - # This is just a placeholder for reference - pass +def create_default_registry() -> MemoryTypeRegistry: + """ + Create a registry with memory types loaded at initialization. + + Returns: + MemoryTypeRegistry with built-in types (loaded in __init__) + """ + return MemoryTypeRegistry(load_schemas=True) diff --git a/openviking/session/memory/session_extract_context_provider.py b/openviking/session/memory/session_extract_context_provider.py index 732a46ac5..5f537508a 100644 --- a/openviking/session/memory/session_extract_context_provider.py +++ b/openviking/session/memory/session_extract_context_provider.py @@ -225,6 +225,7 @@ async def prefetch( user_space = ctx.user.user_space_name() if ctx and ctx.user else "default" agent_space = ctx.user.agent_space_name() if ctx and ctx.user else "default" import jinja2 + env = jinja2.Environment(autoescape=False) template = env.from_string(schema.directory) dir_path = template.render(user_space=user_space, agent_space=agent_space) @@ -240,7 +241,9 @@ async def prefetch( # Check if filename_template has variables (contains {{ xxx }}) has_variables = False if schema.filename_template: - has_variables = "{{" in schema.filename_template and "}}" in schema.filename_template + has_variables = ( + "{{" in schema.filename_template and "}}" in schema.filename_template + ) if has_variables or not schema.filename_template: # Multi-file schema or no filename template: ls the directory @@ -354,10 +357,8 @@ def get_schema_directories(self) -> List[str]: return self._schema_directories def _get_registry(self) -> MemoryTypeRegistry: - """内部获取 registry(自动加载)""" + """内部获取 registry(自动在初始化时加载)""" if self._registry is None: - self._registry = MemoryTypeRegistry() - for dir_path in self.get_schema_directories(): - if os.path.exists(dir_path): - self._registry.load_from_directory(dir_path) + # MemoryTypeRegistry 在 __init__ 时自动加载 schemas + self._registry = MemoryTypeRegistry(load_schemas=True) return self._registry