diff --git a/.gitignore b/.gitignore index 537ac52..ac1e4f6 100644 --- a/.gitignore +++ b/.gitignore @@ -42,6 +42,9 @@ yarn-error.log* config.json .node +# Local dev app data +src-tauri/com.mine-kb.app/ + release oblite.so diff --git a/README-ZH.md b/README-ZH.md index a8bcfed..b01ce6e 100644 --- a/README-ZH.md +++ b/README-ZH.md @@ -24,7 +24,7 @@ MineKB 是一个基于 Tauri 构建的跨平台桌面应用,旨在帮助用户 - **向量搜索**:利用语义搜索技术,快速定位相关文档内容 - **流式输出**:实时流式展示 AI 生成的回答,提供流畅的用户体验 - **语音交互**:支持语音输入功能,让知识查询更便捷 -- **本地存储**:所有数据存储在本地嵌入式数据库中,保护隐私安 +- **本地存储**:所有数据存储在本地嵌入式数据库中,保护隐私安全 ## 基本原理 @@ -46,7 +46,7 @@ MineKB 采用 RAG(Retrieval-Augmented Generation)架构,结合向量检索 - 使用阿里云百炼 API 生成文档的向量表示(Embeddings) 2. **向量存储** - - 使用 SeekDB 0.0.1.dev4 作为嵌入式向量数据库(通过 Python 子进程访问) + - 使用 [seekdb-rs](https://github.com/ob-labs/seekdb-rs) 嵌入式向量数据库(Rust 原生,无 Python 依赖) - 原生支持向量类型和 HNSW 索引,实现高效的向量检索 - 支持项目级别的数据隔离和事务处理 - 支持向量列输出和数据库存在性验证 @@ -106,14 +106,12 @@ MineKB 采用 RAG(Retrieval-Augmented Generation)架构,结合向量检索 - `@tauri-apps/api 1.5` - 前端 API 调用库 - `@tauri-apps/cli 1.5` - 命令行工具 - 启用功能:`path-all`、`http-all`、`dialog-all`、`fs-all`、`shell-open` -- **Python 3.8+** - SeekDB 数据库操作(通过子进程通信) **数据库** -- **SeekDB 0.0.1.dev4** (Python) - AI-Native 嵌入式向量数据库 +- **seekdb-rs** (Rust) - AI-Native 嵌入式向量数据库,无 Python 依赖 - 原生支持向量类型和 HNSW 索引 - 支持混合检索和全文搜索 - 高性能向量相似度计算 - - 通过 JSON-RPC 协议与 Rust 通信 ### Rust 核心依赖 @@ -122,8 +120,7 @@ MineKB 采用 RAG(Retrieval-Augmented Generation)架构,结合向量检索 - `docx-rs 0.4` - Word 文档处理 **数据存储** -- `seekdb 0.0.1.dev4` (Python) - AI-Native 嵌入式数据库,原生支持向量索引和 HNSW 检索 -- JSON 通信协议 - Rust 与 Python 子进程通信 +- `seekdb-rs` (Rust) - AI-Native 嵌入式数据库,原生支持向量索引和 HNSW 检索,无 Python **向量计算** - SeekDB 原生向量索引 (HNSW) - 高效向量相似度搜索 @@ -166,17 +163,79 @@ MineKB 采用 RAG(Retrieval-Augmented Generation)架构,结合向量检索 ## 系统架构 ### 架构概览 - + +```mermaid +graph TB + subgraph Frontend["前端层"] + UI[React UI 组件] + State[状态管理] + end + + subgraph Command["命令层 (Tauri)"] + CMD_Project[项目命令] + CMD_Doc[文档命令] + CMD_Chat[对话命令] + CMD_Speech[语音命令] + end + + subgraph Service["服务层 (Rust)"] + SVC_Project[ProjectService] + SVC_Doc[DocumentService] + SVC_Conv[ConversationService] + SVC_Embed[EmbeddingService] + SVC_LLM[LLMClient] + SVC_Speech[SpeechService] + end + + subgraph Data["数据层"] + Adapter[SeekDbAdapter] + Client[seekdb-rs Client] + DB[(嵌入式 SeekDB)] + Tables[关系表] + VectorColl[向量集合 + HNSW] + end + + subgraph External["外部服务"] + DashScope[阿里云百炼 API
Embedding + LLM] + end + + UI --> Command + State --> Command + CMD_Project --> SVC_Project + CMD_Doc --> SVC_Doc + CMD_Chat --> SVC_Conv + CMD_Speech --> SVC_Speech + + SVC_Doc --> SVC_Embed + SVC_Conv --> SVC_LLM + SVC_Project --> Adapter + SVC_Doc --> Adapter + SVC_Conv --> Adapter + + Adapter --> Client + Client --> DB + DB --> Tables + DB --> VectorColl + + SVC_Embed --> DashScope + SVC_LLM --> DashScope +``` + +- **前端**:React + TypeScript,负责状态与界面。 +- **命令层**:Tauri 命令(项目、文档、对话、语音)连接前端与 Rust 服务。 +- **服务层**:ProjectService、DocumentService、ConversationService、EmbeddingService、LLMClient、SpeechService。 +- **数据层**:SeekDbAdapter 通过 **seekdb-rs** 异步 Client 与嵌入式 SeekDB(SQL + 向量集合)通信,无 Python。 +- **外部**:阿里云百炼 API 提供 Embedding 与 LLM。 ## 快速开始 ### 环境要求 -- Node.js 16+ -- Rust 1.70+ -- Python 3.8+ +**构建 / 开发环境**(本地开发或打包): -> **注意**: SeekDB 目前仅发布 Linux 版本,不久会支持 MacOS。MacOS 用户推荐使用 [UTM](https://mac.getutm.app) 虚拟机管理器运行 [Ubuntu 20.x 以上](https://mac.getutm.app/gallery/ubuntu-20-04)。 +- Node.js 16+(前端与 Tauri CLI) +- Rust 1.70+(Tauri 后端) +- 无需 Python(seekdb-rs 为 Rust 原生) ### 安装依赖 @@ -184,7 +243,7 @@ MineKB 采用 RAG(Retrieval-Augmented Generation)架构,结合向量检索 # 安装前端依赖 npm install -# Rust 和 Python 依赖会在构建时自动安装 +# Rust 依赖在构建时自动解析 ``` ### 配置 @@ -201,6 +260,9 @@ cp src-tauri/config.example.json src-tauri/config.json ```bash # 启动开发服务器 tnpm run tauri:dev + +# 自定义数据目录时可设置环境变量 CONFIG_DIR +CONFIG_DIR=/path/to/your/data tnpm run tauri:dev ``` ### 构建应用 @@ -234,7 +296,7 @@ cd src-tauri && cargo test - ✅ **HNSW 索引**:专业的向量索引算法,检索更快更准 - ✅ **AI-Native 特性**:内置全文检索、混合检索等 AI 能力 - ✅ **更好的扩展性**:支持更大规模的数据和更复杂的查询 -- ✅ **最新版本特性**(0.0.1.dev4):向量列输出、数据库验证、USE 语句稳定支持 +- ✅ **seekdb-rs**(Rust):嵌入式客户端,无 Python 依赖,支持向量列输出与数据库存在性验证 --- diff --git a/README.md b/README.md index f6ca94a..4b1bbbc 100644 --- a/README.md +++ b/README.md @@ -46,7 +46,7 @@ MineKB employs a RAG (Retrieval-Augmented Generation) architecture, combining ve - Generation of document embeddings using Alibaba Cloud Bailian API 2. **Vector Storage** - - SeekDB 0.0.1.dev4 as an embedded vector database (accessed via Python subprocess) + - [seekdb-rs](https://github.com/ob-labs/seekdb-rs) embedded vector database (Rust native, no Python) - Native support for vector types and HNSW indexing for efficient vector retrieval - Project-level data isolation and transaction support - Vector column output and database existence validation @@ -106,14 +106,12 @@ MineKB employs a RAG (Retrieval-Augmented Generation) architecture, combining ve - `@tauri-apps/api 1.5` - Frontend API library - `@tauri-apps/cli 1.5` - Command-line tools - Enabled features: `path-all`, `http-all`, `dialog-all`, `fs-all`, `shell-open` -- **Python 3.8+** - SeekDB database operations (via subprocess communication) **Database** -- **SeekDB 0.0.1.dev4** (Python) - AI-Native embedded vector database +- **seekdb-rs** (Rust) - AI-Native embedded vector database, no Python dependency - Native support for vector types and HNSW indexing - Hybrid search and full-text search support - High-performance vector similarity computation - - Communication with Rust via JSON-RPC protocol ### Rust Core Dependencies @@ -122,8 +120,7 @@ MineKB employs a RAG (Retrieval-Augmented Generation) architecture, combining ve - `docx-rs 0.4` - Word document processing **Data Storage** -- `seekdb 0.0.1.dev4` (Python) - AI-Native embedded database with native vector indexing and HNSW retrieval -- JSON communication protocol - Rust to Python subprocess communication +- `seekdb-rs` (Rust) - AI-Native embedded database with native vector indexing and HNSW retrieval, no Python **Vector Computation** - SeekDB native vector indexing (HNSW) - Efficient vector similarity search @@ -166,17 +163,79 @@ MineKB employs a RAG (Retrieval-Augmented Generation) architecture, combining ve ## System Architecture ### Architecture Overview - + +```mermaid +graph TB + subgraph Frontend["Frontend Layer"] + UI[React UI Components] + State[State Management] + end + + subgraph Command["Command Layer (Tauri)"] + CMD_Project[Project Commands] + CMD_Doc[Document Commands] + CMD_Chat[Conversation Commands] + CMD_Speech[Speech Commands] + end + + subgraph Service["Service Layer (Rust)"] + SVC_Project[ProjectService] + SVC_Doc[DocumentService] + SVC_Conv[ConversationService] + SVC_Embed[EmbeddingService] + SVC_LLM[LLMClient] + SVC_Speech[SpeechService] + end + + subgraph Data["Data Layer"] + Adapter[SeekDbAdapter] + Client[seekdb-rs Client] + DB[(Embedded SeekDB)] + Tables[Relational Tables] + VectorColl[Vector Collection + HNSW] + end + + subgraph External["External Services"] + DashScope[Aliyun Bailian API
Embedding + LLM] + end + + UI --> Command + State --> Command + CMD_Project --> SVC_Project + CMD_Doc --> SVC_Doc + CMD_Chat --> SVC_Conv + CMD_Speech --> SVC_Speech + + SVC_Doc --> SVC_Embed + SVC_Conv --> SVC_LLM + SVC_Project --> Adapter + SVC_Doc --> Adapter + SVC_Conv --> Adapter + + Adapter --> Client + Client --> DB + DB --> Tables + DB --> VectorColl + + SVC_Embed --> DashScope + SVC_LLM --> DashScope +``` + +- **Frontend**: React + TypeScript; state and UI. +- **Command Layer**: Tauri commands (project, document, conversation, speech) bridge frontend and Rust services. +- **Service Layer**: ProjectService, DocumentService, ConversationService, EmbeddingService, LLMClient, SpeechService. +- **Data Layer**: SeekDbAdapter uses **seekdb-rs** async Client to talk to embedded SeekDB (SQL + vector collection); no Python. +- **External**: Aliyun Bailian API for embeddings and LLM. ## Quick Start ### Requirements -- Node.js 16+ -- Rust 1.70+ -- Python 3.8+ +**Build / development environment** (for local dev or packaging): -> **Note**: SeekDB currently only releases Linux builds. macOS support is coming soon. macOS users are recommended to use [UTM](https://mac.getutm.app) virtual machine manager to run [Ubuntu 20.x or later](https://mac.getutm.app/gallery/ubuntu-20-04). +- Node.js 16+ (frontend and Tauri CLI) +- Rust 1.70+ (Tauri backend) +- No Python required (seekdb-rs is Rust-native) ### Install Dependencies @@ -184,7 +243,7 @@ MineKB employs a RAG (Retrieval-Augmented Generation) architecture, combining ve # Install frontend dependencies npm install -# Rust and Python dependencies are automatically installed during build +# Rust dependencies are resolved at build time ``` ### Configuration @@ -201,6 +260,9 @@ cp src-tauri/config.example.json src-tauri/config.json ```bash # Start development server tnpm run tauri:dev + +# 自定义数据目录时可设置环境变量 CONFIG_DIR +CONFIG_DIR=/path/to/your/data tnpm run tauri:dev ``` ### Build Application @@ -234,7 +296,7 @@ cd src-tauri && cargo test - ✅ **HNSW Indexing**: Professional vector indexing algorithm for faster and more accurate retrieval - ✅ **AI-Native Features**: Built-in full-text search, hybrid search, and other AI capabilities - ✅ **Better Scalability**: Supports larger datasets and more complex queries -- ✅ **Latest Version Features** (0.0.1.dev4): Vector column output, database validation, stable USE statement support +- ✅ **seekdb-rs** (Rust): Embedded client, no Python dependency, vector column output and database validation --- diff --git a/docs/COMPLETE_FIX_SUMMARY.md b/docs/COMPLETE_FIX_SUMMARY.md index 23093dd..0a38115 100644 --- a/docs/COMPLETE_FIX_SUMMARY.md +++ b/docs/COMPLETE_FIX_SUMMARY.md @@ -1,8 +1,5 @@ # 完整修复总结 -> **历史文档**: 本文档记录了 2025-10-29 的修复过程,当时使用的是 SeekDB 0.0.1.dev2 版本。 -> **当前版本**: 已升级到 SeekDB 0.0.1.dev4,模块名从 `oblite` 更改为 `seekdb`。 -> **参考**: [SeekDB 0.0.1.dev4 升级指南](UPGRADE_SEEKDB_0.0.1.dev4.md) ## 概述 @@ -45,7 +42,6 @@ - ✅ `src-tauri/src/services/seekdb_package.rs` - ✅ `src-tauri/src/services/python_env.rs` -**详细文档**: `docs/FIX_PIP_INSTALL_ERROR.md` --- @@ -68,7 +64,7 @@ ModuleNotFoundError: No module named 'oblite' import oblite # 失败 # 正确的方式 -import seekdb # 先导入 seekdb +import pyseekdb import oblite # 然后才能导入 oblite ``` @@ -169,17 +165,14 @@ ubuntu 53026 9.1 1.9 74151808 161456 ? Sl 03:45 0:06 mine-kb ### Python 代码 3. **`src-tauri/python/seekdb_bridge.py`** - - 修改导入顺序:先 `import seekdb`,再 `import oblite` + - 使用 pyseekdb 客户端连接 - 重写 `handle_init()` 方法,添加数据库自动创建逻辑 - 使用 `oblite.connect("")` 访问系统上下文 - 执行 `CREATE DATABASE IF NOT EXISTS` 确保数据库存在 ### 文档 -4. **`docs/FIX_PIP_INSTALL_ERROR.md`** - - pip 安装问题的详细分析和解决方案 - -5. **`docs/FIX_SEEKDB_DATABASE_ERROR.md`** +4. **`docs/FIX_SEEKDB_DATABASE_ERROR.md`** - SeekDB 数据库问题的详细分析和解决方案 6. **`docs/COMPLETE_FIX_SUMMARY.md`** (本文档) @@ -284,8 +277,7 @@ except Exception as e: - [x] Python 虚拟环境存在 - [x] pip 可用(`python -m pip --version`) -- [x] seekdb 已安装(`python -c "import seekdb"`) -- [x] oblite 可导入(`python -c "import seekdb; import oblite"`) +- [x] pyseekdb 已安装(`python -c "import pyseekdb"`) - [x] 数据库实例目录存在或会自动创建 - [x] 数据库会在初始化时自动创建 @@ -312,9 +304,8 @@ except Exception as e: ## 相关资源 ### 文档 -- [pip 安装问题修复](./FIX_PIP_INSTALL_ERROR.md) - [SeekDB 数据库问题修复](./FIX_SEEKDB_DATABASE_ERROR.md) -- [SeekDB 自动安装文档](./SEEKDB_AUTO_INSTALL.md) +- [seekdb.md](./seekdb.md) - SeekDB / pyseekdb 文档 ### 代码文件 - `src-tauri/src/services/python_env.rs` - Python 环境管理 diff --git a/docs/FIX_OBLITE_EXECUTE_ERROR.md b/docs/FIX_OBLITE_EXECUTE_ERROR.md deleted file mode 100644 index dd95ae2..0000000 --- a/docs/FIX_OBLITE_EXECUTE_ERROR.md +++ /dev/null @@ -1,196 +0,0 @@ -# 修复 ObLite execute() 参数化查询错误 - -> **历史文档**: 本文档记录了早期版本(0.0.1.dev2)使用 `oblite` 模块时的问题。 -> **当前版本**: SeekDB 0.0.1.dev4 版本已更改模块名为 `seekdb`,但该限制仍然存在。 -> **参考**: [SeekDB 0.0.1.dev4 升级指南](UPGRADE_SEEKDB_0.0.1.dev4.md) - -## 问题描述 - -创建知识库时出现以下错误: - -``` -Error:创建知识库失败:创建项目失败:Python subprocess error: ExecuteError - -execute: incompatible function arguments. The following argument types are supported: -1. (self: oblite.ObLiteiEmbedCursor, arg0: str) -> int -Invoked with: , -'INSERT INTO projects (id, name, description, status, document_count, created_at, updated_at) - VALUES (?, ?, ?, ?, ?, ?, ?) - ON DUPLICATE KEY UPDATE ...', -['3600f13e-51df-4616-8de6-6aa4fa9e904b', 'Untitled', '', 'Created', 0, - '2025-10-29T05:42:49.609341126+00:00', '2025-10-29T05:42:49.609341126+00:00'] -``` - -## 根本原因 - -**ObLite 的 `cursor.execute()` 方法只接受一个字符串参数,不支持参数化查询。** - -之前的实现尝试使用类似 SQLite/MySQL 的参数化查询方式: -```python -cursor.execute(sql, values) # ❌ ObLite 不支持 -``` - -但 ObLite 只支持: -```python -cursor.execute(sql) # ✅ 只接受一个字符串参数 -``` - -## 解决方案 - -### 1. 修改 `seekdb_bridge.py` - -在 `SeekDBBridge` 类中添加了两个辅助方法: - -#### 1.1 `format_sql_value()` - 格式化 Python 值为 SQL 字符串 - -```python -def format_sql_value(self, value: Any) -> str: - """Format a Python value to SQL string representation for ObLite""" - if value is None: - return "NULL" - elif isinstance(value, bool): - return "1" if value else "0" - elif isinstance(value, (int, float)): - return str(value) - elif isinstance(value, str): - # Escape single quotes in strings - escaped = value.replace("'", "''") - return f"'{escaped}'" - elif isinstance(value, list): - # For vector/array values - return str(value) - else: - # For other types, convert to string and quote - escaped = str(value).replace("'", "''") - return f"'{escaped}'" -``` - -#### 1.2 `build_sql_with_values()` - 将参数嵌入到 SQL 字符串中 - -```python -def build_sql_with_values(self, sql: str, values: List[Any]) -> str: - """ - Replace ? placeholders in SQL with actual values - ObLite doesn't support parameterized queries, so we embed values directly - """ - if not values: - return sql - - # Replace ? with actual values - result = sql - for value in values: - formatted_value = self.format_sql_value(value) - # Replace the first occurrence of ? - result = result.replace("?", formatted_value, 1) - - return result -``` - -### 2. 更新三个处理方法 - -#### 2.1 `handle_execute()` - -**修改前:** -```python -def handle_execute(self, params: Dict[str, Any]): - try: - sql = params["sql"] - values = params.get("values", []) - - if values: - self.cursor.execute(sql, values) # ❌ 两个参数 - else: - self.cursor.execute(sql) -``` - -**修改后:** -```python -def handle_execute(self, params: Dict[str, Any]): - try: - sql = params["sql"] - values = params.get("values", []) - - # ObLite doesn't support parameterized queries, embed values directly - final_sql = self.build_sql_with_values(sql, values) - - # ObLite execute() only accepts one argument - self.cursor.execute(final_sql) # ✅ 一个参数 -``` - -#### 2.2 `handle_query()` 和 `handle_query_one()` - -同样的修改应用到查询方法: -```python -# 嵌入参数值 -final_sql = self.build_sql_with_values(sql, values) - -# 只使用一个参数调用 execute -self.cursor.execute(final_sql) -``` - -## 关于 `ON DUPLICATE KEY UPDATE` 语法 - -ObLite 基于 OceanBase,支持 MySQL 兼容语法,因此 `ON DUPLICATE KEY UPDATE` 是被支持的。 - -原始 SQL: -```sql -INSERT INTO projects (id, name, description, status, document_count, created_at, updated_at) -VALUES (?, ?, ?, ?, ?, ?, ?) -ON DUPLICATE KEY UPDATE - name = VALUES(name), - description = VALUES(description), - status = VALUES(status), - document_count = VALUES(document_count), - updated_at = VALUES(updated_at) -``` - -修复后会被转换为: -```sql -INSERT INTO projects (id, name, description, status, document_count, created_at, updated_at) -VALUES ('3600f13e-51df-4616-8de6-6aa4fa9e904b', 'Untitled', '', 'Created', 0, - '2025-10-29T05:42:49.609341126+00:00', '2025-10-29T05:42:49.609341126+00:00') -ON DUPLICATE KEY UPDATE - name = VALUES(name), - description = VALUES(description), - status = VALUES(status), - document_count = VALUES(document_count), - updated_at = VALUES(updated_at) -``` - -## 测试验证 - -创建了测试脚本 `scripts/test_oblite_upsert.py` 用于验证不同的 UPSERT 语法支持。 - -## 修改文件 - -- `src-tauri/python/seekdb_bridge.py` - Python 桥接层主要修复 - -## 影响范围 - -此修复影响所有通过 Python 子进程与 ObLite 交互的数据库操作: -- 项目创建和更新 -- 文档插入 -- 向量数据存储 -- 对话记录保存 -- 所有 SQL 查询 - -## 测试建议 - -1. **创建新项目**:测试项目创建功能 -2. **更新项目**:测试项目更新功能(应触发 UPSERT) -3. **添加文档**:测试文档添加和向量存储 -4. **搜索功能**:测试向量相似度搜索 -5. **对话功能**:测试对话记录的保存和加载 - -## 相关文档 - -- `docs/seekdb.md` - SeekDB/ObLite 使用文档 -- `scripts/test_oblite_upsert.py` - UPSERT 语法测试脚本 - -## 修复日期 - -2025-10-29 - -## 修复人员 - -AI Assistant (Cursor) - diff --git a/docs/FIX_PIP_INSTALL_ERROR.md b/docs/FIX_PIP_INSTALL_ERROR.md deleted file mode 100644 index 8fb75ab..0000000 --- a/docs/FIX_PIP_INSTALL_ERROR.md +++ /dev/null @@ -1,154 +0,0 @@ -# 修复 pip 安装错误 - -## 问题描述 - -应用启动时出现以下错误: -``` -执行 pip install 失败: No such file or directory (os error 2) -``` - -## 根本原因 - -1. **虚拟环境中缺少 pip 可执行文件** - - Python 虚拟环境被创建后,`venv/bin/` 目录中只有 python 相关的符号链接 - - 没有生成 `pip` 或 `pip3` 可执行文件 - - 代码尝试直接调用 `pip3` 可执行文件,导致 "No such file or directory" 错误 - -2. **oblite 模块导入问题** - - SeekDB 包安装后,`oblite` 模块不能直接导入 - - 必须先导入 `seekdb` 模块来触发 `oblite` 的动态加载 - - `oblite.so` 被缓存到 `~/.seekdb/cache/` 目录 - -## 修复方案 - -### 1. 使用 `python -m pip` 替代直接调用 pip - -**修改文件**: `src-tauri/src/services/seekdb_package.rs` - -将所有 pip 调用改为使用 `python -m pip` 的方式: - -```rust -// 修改前 -let status = Command::new(&pip_executable) - .arg("install") - .arg(format!("seekdb=={}", SEEKDB_VERSION)) - .status()?; - -// 修改后 -let status = Command::new(python_executable) - .arg("-m") - .arg("pip") - .arg("install") - .arg(format!("seekdb=={}", SEEKDB_VERSION)) - .status()?; -``` - -**原因**: `python -m pip` 是更可靠的方式,不依赖于 pip 可执行文件的存在。 - -### 2. 确保虚拟环境中 pip 可用 - -**修改文件**: `src-tauri/src/services/python_env.rs` - -添加 `ensure_pip()` 方法,在虚拟环境创建后确保 pip 可用: - -```rust -fn ensure_pip(&self) -> Result<()> { - // 检查 pip 是否可用 - let output = Command::new(&self.python_executable) - .arg("-m") - .arg("pip") - .arg("--version") - .output(); - - match output { - Ok(output) if output.status.success() => { - // pip 已可用 - Ok(()) - } - _ => { - // 使用 ensurepip 安装 pip - let install_output = Command::new(&self.python_executable) - .arg("-m") - .arg("ensurepip") - .arg("--default-pip") - .output()?; - - if !install_output.status.success() { - return Err(anyhow!("pip 安装失败")); - } - Ok(()) - } - } -} -``` - -### 3. 修复 seekdb/oblite 模块导入顺序 - -**修改文件**: -- `src-tauri/src/services/seekdb_package.rs` -- `src-tauri/python/seekdb_bridge.py` - -修改验证代码,先导入 `seekdb` 再导入 `oblite`: - -```python -# seekdb_bridge.py -try: - import seekdb # 先导入 seekdb 来触发 oblite 的加载 - import oblite -except ImportError as e: - # 错误处理 - ... -``` - -```rust -// seekdb_package.rs - verify() -let output = Command::new(self.python_env.get_python_executable()) - .arg("-c") - .arg("import seekdb; import oblite; print('seekdb location:', seekdb.__file__); print('oblite location:', oblite.__file__)") - .output() -``` - -## 修复验证 - -修复后的正常日志输出: - -``` -✅ Python 虚拟环境已存在 -🔍 检查 pip 是否可用... -✅ pip 已可用: pip 25.3 from ... -🔧 升级 pip... -✅ pip 升级完成 -📦 安装 seekdb==0.0.1.dev2... -Successfully installed seekdb-0.0.1.dev2 seekdb_lib-0.0.1.dev2 -🔍 验证 seekdb 安装... -✅ seekdb 验证通过 -[SeekDB Bridge] SeekDB Bridge started, waiting for commands... -[SeekDB Bridge] Initializing SeekDB: path=... -[SeekDB Bridge] SeekDB initialized successfully -``` - -## 技术要点 - -1. **`python -m pip` vs 直接调用 pip** - - `python -m pip` 更可靠,适用于各种环境 - - 不依赖于 pip 可执行文件的存在和路径配置 - -2. **虚拟环境的 pip 安装** - - 某些 Python 安装可能不包含完整的 ensurepip - - 使用 `python -m ensurepip` 可以确保 pip 可用 - -3. **SeekDB 的模块加载机制** - - `oblite` 模块是动态加载的 - - 必须先导入 `seekdb` 模块 - - `oblite.so` 会被缓存到用户目录 - -## 相关文件 - -- `src-tauri/src/services/seekdb_package.rs` - SeekDB 包管理 -- `src-tauri/src/services/python_env.rs` - Python 环境管理 -- `src-tauri/python/seekdb_bridge.py` - SeekDB Python 桥接 - -## 修复日期 - -2025-10-29 - diff --git a/docs/FIX_SEEKDB_DATABASE_ERROR.md b/docs/FIX_SEEKDB_DATABASE_ERROR.md index 686a445..85aca4e 100644 --- a/docs/FIX_SEEKDB_DATABASE_ERROR.md +++ b/docs/FIX_SEEKDB_DATABASE_ERROR.md @@ -1,8 +1,5 @@ # 修复 SeekDB "Unknown Database" 错误 -> **历史文档**: 本文档记录了早期版本的数据库初始化问题。 -> **当前版本**: SeekDB 0.0.1.dev4 已增强数据库存在性验证。 -> **参考**: [SeekDB 0.0.1.dev4 升级指南](UPGRADE_SEEKDB_0.0.1.dev4.md) ## 问题描述 @@ -235,7 +232,6 @@ def handle_init(self, params: Dict[str, Any]): - `src-tauri/python/seekdb_bridge.py` - SeekDB Python 桥接(已修复) - `src-tauri/src/services/seekdb_adapter.rs` - Rust 适配器 -- `docs/FIX_PIP_INSTALL_ERROR.md` - pip 安装问题修复(前置问题) ## 修复日期 diff --git a/docs/FIX_SEEKDB_VECTOR_QUERY.md b/docs/FIX_SEEKDB_VECTOR_QUERY.md index 6a5183a..6199615 100644 --- a/docs/FIX_SEEKDB_VECTOR_QUERY.md +++ b/docs/FIX_SEEKDB_VECTOR_QUERY.md @@ -1,8 +1,5 @@ # 修复SeekDB向量字段查询问题 -> **历史文档**: 本文档记录了早期版本的向量字段查询限制。 -> **当前版本**: SeekDB 0.0.1.dev4 已部分改进向量列输出支持,但在使用向量函数时仍有限制。 -> **参考**: [SeekDB 0.0.1.dev4 升级指南](UPGRADE_SEEKDB_0.0.1.dev4.md) **日期**: 2025-10-29 **问题**: SeekDB不支持在某些上下文中直接SELECT vector类型字段 @@ -249,7 +246,6 @@ Finished `release` profile [optimized] target(s) in 41.00s ## 相关文档 - [RESTORE_SEEKDB_VECTOR_SEARCH.md](./RESTORE_SEEKDB_VECTOR_SEARCH.md) - 恢复使用SeekDB向量检索 -- [MIGRATION_SUMMARY.md](./MIGRATION_SUMMARY.md) - SeekDB迁移总结 - [seekdb.md](./seekdb.md) - SeekDB使用说明 ## 总结 diff --git a/docs/FIX_STARTUP_HANG_FINAL.md b/docs/FIX_STARTUP_HANG_FINAL.md index 03de547..a83363d 100644 --- a/docs/FIX_STARTUP_HANG_FINAL.md +++ b/docs/FIX_STARTUP_HANG_FINAL.md @@ -434,7 +434,7 @@ df -h / **解决方法**: ```bash # 手动安装测试 -~/.local/share/mine-kb/venv/bin/pip install seekdb==0.0.1.dev2 -i https://pypi.tuna.tsinghua.edu.cn/simple/ +~/.local/share/com.mine-kb.app/venv/bin/pip install pyseekdb -i https://pypi.tuna.tsinghua.edu.cn/simple/ # 检查 Python 版本 python3 --version # 需要 3.8+ @@ -452,7 +452,7 @@ python3 --version # 需要 3.8+ - [FIX_STARTUP_HANG.md](./FIX_STARTUP_HANG.md) - 第一版修复 - [FIX_STARTUP_HANG_V2.md](./FIX_STARTUP_HANG_V2.md) - 第二版修复 -- [SEEKDB_AUTO_INSTALL.md](./SEEKDB_AUTO_INSTALL.md) - SeekDB 安装文档 +- [seekdb.md](./seekdb.md) - SeekDB / pyseekdb 文档 - [SPLASH_SCREEN.md](./SPLASH_SCREEN.md) - 启动界面设计 --- diff --git a/docs/FIX_SUMMARY.md b/docs/FIX_SUMMARY.md index 298ca3b..b0a889d 100644 --- a/docs/FIX_SUMMARY.md +++ b/docs/FIX_SUMMARY.md @@ -1,8 +1,5 @@ # 🔧 路径问题修复总结 -> **历史文档**: 本文档记录了早期版本的路径问题修复。 -> **当前版本**: SeekDB 0.0.1.dev4,相关问题已修复。 -> **参考**: [SeekDB 0.0.1.dev4 升级指南](UPGRADE_SEEKDB_0.0.1.dev4.md) ## ✅ 问题已解决 @@ -105,7 +102,7 @@ sudo apt install python3-pip ### 2. 安装 SeekDB ```bash -pip3 install --user seekdb==0.0.1.dev2 -i https://pypi.tuna.tsinghua.edu.cn/simple/ +pip3 install pyseekdb -i https://pypi.tuna.tsinghua.edu.cn/simple/ ``` ### 3. 验证安装 @@ -120,7 +117,7 @@ python3 test_seekdb.py ============================================================ SeekDB Installation Test ============================================================ -Testing oblite import... ✅ OK +Testing pyseekdb import... ✅ OK Testing basic operations... Creating database at /tmp/.../test.db... ✅ ... @@ -155,10 +152,8 @@ npm run tauri:dev ## 📚 相关文档 -- [PATH_FIX_APPLIED.md](PATH_FIX_APPLIED.md) - 详细的修复说明和下一步指南 - [SETUP_CHECKLIST.md](SETUP_CHECKLIST.md) - 完整的设置清单 -- [MIGRATION_SEEKDB.md](MIGRATION_SEEKDB.md) - SeekDB 迁移指南 -- [MIGRATION_SUMMARY.md](MIGRATION_SUMMARY.md) - 技术实现总结 +- [seekdb.md](seekdb.md) - SeekDB / pyseekdb 文档 ## 🔄 如果还有问题 diff --git a/docs/HYBRID_SEARCH_AND_LOGGING.md b/docs/HYBRID_SEARCH_AND_LOGGING.md index 8ffd013..dd5869e 100644 --- a/docs/HYBRID_SEARCH_AND_LOGGING.md +++ b/docs/HYBRID_SEARCH_AND_LOGGING.md @@ -277,7 +277,7 @@ npm run tauri dev 1. **删除旧数据库**(开发环境): ```bash - rm -rf ~/.local/share/com.mine-kb.app/oblite.db + rm -rf ~/.local/share/com.mine-kb.app/ ``` 2. **保留数据迁移**(生产环境): @@ -352,10 +352,7 @@ npm run tauri dev ## 📚 参考文档 -- [SeekDB 官方文档](./seekdb.md) -- [混合检索示例](./seekdb.md#33-混合检索) -- [向量检索原理](./seekdb.md#31-向量检索) -- [全文检索原理](./seekdb.md#32-全文检索) +- [SeekDB in MineKB](./seekdb.md) --- diff --git a/docs/MIGRATION_PIP_INSTALL.md b/docs/MIGRATION_PIP_INSTALL.md deleted file mode 100644 index 816f0c9..0000000 --- a/docs/MIGRATION_PIP_INSTALL.md +++ /dev/null @@ -1,250 +0,0 @@ -# SeekDB 安装方式迁移总结 - -## 迁移概述 - -将 SeekDB 依赖从手动下载 oblite.so 文件改为通过 pip 自动安装。 - -**迁移日期**:2025-10-28 -**版本**:v2.0 - -## 变更对比 - -### 旧方式(v1.0) -- ❌ 手动下载 oblite.so (2.7GB) 到 `src-tauri/libs/` -- ❌ 需要设置 PYTHONPATH 环境变量 -- ❌ 架构不匹配问题(x86-64 vs ARM64) -- ❌ 磁盘空间占用大 -- ❌ 文件可能损坏 - -### 新方式(v2.0) -- ✅ pip 自动安装 seekdb 包 -- ✅ 使用 Python 虚拟环境隔离依赖 -- ✅ 自动适配系统架构 -- ✅ 节省磁盘空间 -- ✅ 更可靠的包管理 - -## 实施清单 - -### 新增文件 -- [x] `src-tauri/src/services/python_env.rs` - Python 虚拟环境管理器 -- [x] `src-tauri/src/services/seekdb_package.rs` - SeekDB 包管理器 - -### 修改文件 -- [x] `src-tauri/src/services/mod.rs` - 添加新模块导出 -- [x] `src-tauri/src/services/python_subprocess.rs` - 改用 Python 可执行文件路径 -- [x] `src-tauri/src/services/seekdb_adapter.rs` - 传递 Python 路径 -- [x] `src-tauri/src/services/document_service.rs` - 更新参数 -- [x] `src-tauri/src/services/app_state.rs` - 更新参数 -- [x] `src-tauri/src/main.rs` - 完全重构启动逻辑 -- [x] `src-tauri/python/install_deps.sh` - 更新为虚拟环境方式 - -### 删除文件 -- [x] `src-tauri/src/services/seekdb_installer.rs` - 已移除 -- [x] `src-tauri/libs/` 目录(包括 oblite.so) - 已移除 - -### 文档更新 -- [x] `docs/SEEKDB_AUTO_INSTALL.md` - 完全重写 -- [x] `docs/archive/ERROR_ANALYSIS_OBLITE_SO.md` - 归档旧文档 -- [x] `docs/MIGRATION_PIP_INSTALL.md` - 本文档 - -## 代码变更详情 - -### 1. Python 环境管理 (python_env.rs) - -```rust -pub struct PythonEnv { - venv_dir: PathBuf, - python_executable: PathBuf, -} - -impl PythonEnv { - pub fn new(app_data_dir: &Path) -> Result - pub fn ensure_venv(&self) -> Result<()> - pub fn get_python_executable(&self) -> &Path - pub fn get_pip_executable(&self) -> PathBuf -} -``` - -### 2. SeekDB 包管理 (seekdb_package.rs) - -```rust -pub struct SeekDbPackage<'a> { - python_env: &'a PythonEnv, -} - -impl<'a> SeekDbPackage<'a> { - pub fn new(python_env: &'a PythonEnv) -> Self - pub fn is_installed(&self) -> Result - pub fn install(&self) -> Result<()> - pub fn verify(&self) -> Result<()> -} -``` - -### 3. 启动流程变更 (main.rs) - -**旧流程**: -```rust -// 1. 检查/下载 oblite.so -let seekdb_installer = SeekDbInstaller::new(&resource_dir)?; -seekdb_installer.ensure_oblite_available(&resource_dir)?; -let lib_dir = seekdb_installer.get_lib_dir(); - -// 2. 传递 lib_dir 作为 PYTHONPATH -AppState::new_with_full_config(db_path, config, cache_dir, Some(lib_dir)) -``` - -**新流程**: -```rust -// 1. 创建 Python 虚拟环境 -let python_env = PythonEnv::new(&app_data_dir)?; -python_env.ensure_venv()?; - -// 2. 检测并安装 seekdb -let seekdb_pkg = SeekDbPackage::new(&python_env); -if !seekdb_pkg.is_installed()? { - seekdb_pkg.install()?; -} -seekdb_pkg.verify()?; - -// 3. 获取 Python 路径 -let python_path = python_env.get_python_executable(); - -// 4. 传递 python_path 给服务 -AppState::new_with_full_config(db_path, config, cache_dir, Some(python_path)) -``` - -### 4. Python 子进程变更 - -**旧方式**: -```rust -// 设置 PYTHONPATH 环境变量 -command.env("PYTHONPATH", lib_path); -let child = Command::new("python3").spawn()?; -``` - -**新方式**: -```rust -// 直接使用虚拟环境的 Python -let child = Command::new(python_executable).spawn()?; -// 不需要设置 PYTHONPATH -``` - -## 测试验证 - -### 编译测试 -```bash -cd src-tauri -cargo build -``` -✅ 编译成功,无错误 - -### 运行测试 -```bash -cargo run -``` - -预期行为: -1. 首次运行:自动创建虚拟环境并安装 seekdb -2. 再次运行:跳过安装,直接使用已有环境 - -### 清理测试 -```bash -# 删除虚拟环境 -rm -rf ~/.local/share/com.mine-kb.app/venv - -# 重新运行 -cargo run -``` - -预期行为:重新创建虚拟环境并安装 seekdb - -## 优势分析 - -### 1. 跨平台兼容性 -- pip 自动识别系统架构(ARM64/x86-64) -- 自动下载适合的二进制包 -- 解决了之前的架构不匹配问题 - -### 2. 空间效率 -- 项目中不再需要存储 2.7GB 的 oblite.so -- 虚拟环境只在用户机器上创建一次 -- Git 仓库体积大幅减小 - -### 3. 依赖隔离 -- 虚拟环境不影响系统 Python -- 不同版本的应用可以共存 -- 避免依赖冲突 - -### 4. 易于维护 -- pip 可以轻松升级到新版本 -- 统一的包管理方式 -- 更好的版本控制 - -### 5. 用户体验 -- 自动化安装,无需用户干预 -- 清晰的进度反馈 -- 友好的错误提示 - -## 潜在问题及解决方案 - -### 问题 1:首次安装时间较长 -**原因**:需要下载和安装 seekdb 包 -**解决**:显示进度提示"首次运行需要下载并安装 SeekDB,可能需要几分钟..." - -### 问题 2:网络连接失败 -**原因**:无法访问 PyPI 镜像 -**解决**: -- 使用清华镜像源(国内访问快) -- 提供友好的错误信息 -- 建议检查网络连接 - -### 问题 3:Python 环境缺失 -**原因**:系统未安装 Python 3 -**解决**: -- 检测 Python 是否存在 -- 提供安装指引 -- 友好的错误提示 - -### 问题 4:虚拟环境创建失败 -**原因**:缺少 python3-venv 模块 -**解决**: -- 检测并提示安装 python3-venv -- 提供具体的安装命令 -- 支持手动安装脚本 - -## 回滚方案 - -如果需要回滚到旧版本: - -```bash -git checkout -cd src-tauri -cargo build -``` - -注意:回滚后需要手动下载 oblite.so 到 `src-tauri/libs/` - -## 后续优化建议 - -1. **下载进度显示**:在 UI 中显示 pip 安装进度 -2. **离线安装支持**:提供离线安装包 -3. **多镜像源**:支持切换到其他 PyPI 镜像 -4. **版本锁定**:requirements.txt 锁定依赖版本 -5. **缓存优化**:利用 pip 缓存加速重装 - -## 总结 - -本次迁移成功将 SeekDB 安装方式从手动文件管理改为自动化的 pip 安装,显著提升了: -- ✅ 跨平台兼容性 -- ✅ 用户体验 -- ✅ 代码可维护性 -- ✅ 空间效率 - -所有代码编译通过,测试验证完成,可以安全部署到生产环境。 - ---- - -**完成日期**:2025-10-28 -**实施人员**:AI Assistant -**状态**:✅ 完成并验证 - diff --git a/docs/MIGRATION_SEEKDB.md b/docs/MIGRATION_SEEKDB.md deleted file mode 100644 index 239da3f..0000000 --- a/docs/MIGRATION_SEEKDB.md +++ /dev/null @@ -1,235 +0,0 @@ -# SQLite to SeekDB Migration Guide - -This guide explains how to migrate from SQLite to SeekDB and provides information about the new database architecture. - -> **版本说明**: 本文档适用于 SeekDB 0.0.1.dev4 版本。从 0.0.1.dev2 升级的用户,请参考 [UPGRADE_SEEKDB_0.0.1.dev4.md](UPGRADE_SEEKDB_0.0.1.dev4.md) - -## What Changed? - -The application has been migrated from SQLite (via rusqlite) to **SeekDB 0.0.1.dev4**, an embedded database with native AI capabilities including: - -- **Native Vector Search**: Built-in HNSW vector indexing for efficient similarity search -- **Full-text Search**: Integrated fulltext search capabilities -- **Hybrid Search**: Combined vector and keyword search (coming soon) -- **OLAP Support**: Column storage and analytical query optimization - -## Architecture - -The new architecture uses: - -1. **Python Subprocess**: A persistent Python process manages SeekDB operations -2. **JSON Protocol**: Communication between Rust and Python via stdin/stdout using JSON -3. **SeekDB**: OceanBase's lightweight embedded database with AI-native features - -``` -┌─────────────────┐ -│ Rust App │ -│ (Tauri) │ -└────────┬────────┘ - │ JSON over stdin/stdout - ▼ -┌─────────────────┐ -│ Python Bridge │ -│ (subprocess) │ -└────────┬────────┘ - │ - ▼ -┌─────────────────┐ -│ SeekDB │ -│ (oblite.db) │ -└─────────────────┘ -``` - -## Prerequisites - -### Python Setup - -1. **Install Python 3.x** (if not already installed): - ```bash - # Ubuntu/Debian - sudo apt update - sudo apt install python3 python3-pip - - # macOS - brew install python3 - - # Windows - # Download from python.org - ``` - -2. **Install SeekDB package** (version 0.0.1.dev4): - ```bash - pip install seekdb==0.0.1.dev4 -i https://pypi.tuna.tsinghua.edu.cn/simple/ - ``` - -3. **Verify installation**: - ```bash - python3 -c "import seekdb; print('SeekDB 0.0.1.dev4 installed successfully')" - ``` - -## Migration Process - -### Option 1: Automatic Migration (Recommended) - -When you first run the application after upgrading, it will automatically detect the old SQLite database and offer to migrate it. - -### Option 2: Manual Migration - -If you want to manually migrate your existing SQLite database: - -```bash -cd src-tauri/python -python3 migrate_sqlite_to_seekdb.py -``` - -**Example**: -```bash -# Migrate from old SQLite database to new SeekDB -python3 migrate_sqlite_to_seekdb.py ~/Library/Application\ Support/mine-kb/mine_kb.db ./oblite.db -``` - -The migration script will: -- ✅ Copy all projects -- ✅ Copy all conversations and messages -- ✅ Convert and migrate all vector embeddings (1536 dimensions) -- ✅ Create proper indexes (including HNSW vector index) -- ✅ Verify data integrity - -### Migration Notes - -- **Embedding Dimension**: The migration pads/truncates embeddings to 1536 dimensions (DashScope text-embedding-v1 standard) -- **Vector Index**: A native HNSW index is created for efficient vector search -- **Backup Recommended**: Always backup your data before migration -- **Time Estimate**: Migration takes approximately 1-5 minutes per 10,000 documents - -## Configuration - -### Database Location - -SeekDB stores data in the database directory (previously `oblite.db`, can be named as needed). You can configure the location: - -```json -// config.json -{ - "database": { - "path": "./mine_kb.db", - "name": "mine_kb" - } -} -``` - -> **注意**: 从 0.0.1.dev4 版本开始,推荐使用更清晰的数据库路径名称,而非固定的 `oblite.db`。 - -### Python Script Location - -The Python bridge script (`seekdb_bridge.py`) should be located at: -- Production: `/python/seekdb_bridge.py` -- Development: `src-tauri/python/seekdb_bridge.py` - -## Performance Improvements - -SeekDB provides several performance benefits over SQLite: - -1. **Faster Vector Search**: Native HNSW indexing (10-100x faster for large datasets) -2. **Better Scalability**: Optimized for AI/ML workloads -3. **Native Vector Types**: No need to serialize/deserialize embeddings -4. **Approximate Search**: Trade-off between speed and accuracy - -## Troubleshooting - -### Python subprocess not starting - -**Error**: `Failed to start Python process` - -**Solutions**: -- Verify Python 3 is installed: `python3 --version` -- Check SeekDB installation: `pip list | grep seekdb` -- Ensure script path is correct -- Check Python PATH environment variable - -### Import error: seekdb module not found - -**Error**: `ModuleNotFoundError: No module named 'seekdb'` or `No module named 'oblite'` - -**Solution**: -```bash -# 安装最新版本 -pip install seekdb==0.0.1.dev4 -i https://pypi.tuna.tsinghua.edu.cn/simple/ - -# 验证安装 -python3 -c "import seekdb; print('OK')" -``` - -> **注意**: 从 0.0.1.dev4 版本开始,应使用 `import seekdb` 而非 `import oblite`。详见 [升级指南](UPGRADE_SEEKDB_0.0.1.dev4.md)。 - -### Vector dimension mismatch - -**Error**: `Vector dimension mismatch` - -**Solution**: -- Ensure you're using DashScope text-embedding-v1 (1536 dimensions) -- Re-run migration with `--force-dimension 1536` flag - -### Subprocess communication timeout - -**Error**: `Timeout waiting for subprocess response` - -**Solutions**: -- Check if Python process is still running -- Restart the application -- Check system resources (CPU/memory) - -## Rollback to SQLite - -If you need to rollback to the old SQLite implementation: - -1. Checkout the previous version: - ```bash - git checkout - ``` - -2. Rebuild the application: - ```bash - cd src-tauri - cargo build --release - ``` - -## Development - -### Testing SeekDB Operations - -```bash -# Start Python bridge in standalone mode -cd src-tauri/python -python3 seekdb_bridge.py -``` - -Then send JSON commands via stdin: -```json -{"command": "init", "params": {"db_path": "./test.db", "db_name": "test"}} -{"command": "query", "params": {"sql": "SELECT * FROM projects", "values": []}} -``` - -### Adding New Database Operations - -1. Add command handler in `seekdb_bridge.py` -2. Add Rust wrapper in `python_subprocess.rs` -3. Add high-level method in `seekdb_adapter.rs` - -## Support - -For issues or questions: -- Check [docs/seekdb.md](seekdb.md) for SeekDB basic documentation -- Check [docs/SEEKDB_USAGE_GUIDE.md](SEEKDB_USAGE_GUIDE.md) for comprehensive usage guide -- Check [docs/UPGRADE_SEEKDB_0.0.1.dev4.md](UPGRADE_SEEKDB_0.0.1.dev4.md) for version upgrade guide -- Create an issue on GitHub -- Consult SeekDB documentation: [SeekDB Docs](https://www.oceanbase.com/) - -## Future Enhancements - -Planned features leveraging SeekDB capabilities: -- [ ] Hybrid search (vector + fulltext) -- [ ] Materialized views for faster aggregations -- [ ] External table support for document imports -- [ ] Advanced analytics with OLAP features - diff --git a/docs/MIGRATION_SUMMARY.md b/docs/MIGRATION_SUMMARY.md deleted file mode 100644 index c39711c..0000000 --- a/docs/MIGRATION_SUMMARY.md +++ /dev/null @@ -1,282 +0,0 @@ -# SQLite to SeekDB Migration - Implementation Summary - -## Overview - -Successfully migrated the MineKB application from SQLite (rusqlite) to SeekDB, an AI-Native embedded database with native vector search capabilities. The migration maintains full backward compatibility while significantly improving performance and adding new capabilities. - -## What Was Changed - -### 1. New Components Created - -#### Python Bridge (`src-tauri/python/seekdb_bridge.py`) -- Persistent Python subprocess that manages SeekDB operations -- JSON-based command/response protocol via stdin/stdout -- Command handlers for: - - `init` - Initialize database connection - - `execute` - Run INSERT/UPDATE/DELETE/CREATE statements - - `query` - Execute SELECT queries and return results - - `query_one` - Execute SELECT and return first row - - `commit` - Commit transactions - - `rollback` - Rollback transactions - - `ping` - Health check -- Comprehensive error handling and logging - -#### Subprocess Manager (`src-tauri/src/services/python_subprocess.rs`) -- Manages persistent Python process lifecycle -- JSON serialization/deserialization for communication -- Automatic restart on process failure -- Thread-safe command execution with Mutex -- Graceful shutdown handling -- Methods for all database operations with type-safe API - -#### SeekDB Adapter (`src-tauri/src/services/seekdb_adapter.rs`) -- Drop-in replacement for `EmbeddedVectorDb` -- Implements all database operations: - - Project management (CRUD operations) - - Vector document storage and retrieval - - Conversation and message persistence - - Native vector similarity search using L2_DISTANCE -- Maintains same public API for seamless integration -- Enhanced with SeekDB's native HNSW vector indexing - -#### Migration Script (`src-tauri/python/migrate_sqlite_to_seekdb.py`) -- Standalone script to migrate existing SQLite databases -- Converts binary blob embeddings to JSON array format -- Pads/truncates embeddings to 1536 dimensions -- Preserves all data: - - Projects - - Vector documents (with embeddings) - - Conversations - - Messages -- Progress reporting and error handling -- Data integrity verification - -### 2. Modified Components - -#### Service Layer Updates -- **`document_service.rs`**: Updated to use `SeekDbAdapter` instead of `EmbeddedVectorDb` -- **`project_service.rs`**: Type updated to use `SeekDbAdapter` -- **`conversation_service.rs`**: Type updated to use `SeekDbAdapter` -- **`app_state.rs`**: No changes needed (uses services abstraction) -- **`mod.rs`**: Added new modules, commented out old `embedded_vector_db` - -#### Build Configuration -- **`Cargo.toml`**: - - Commented out `rusqlite` dependency - - Commented out `sqlx` dependency - - All other dependencies remain unchanged - -### 3. Documentation Created - -- **`MIGRATION_SEEKDB.md`**: Comprehensive migration guide -- **`MIGRATION_SUMMARY.md`**: This implementation summary -- **`src-tauri/python/requirements.txt`**: Python dependencies -- **`src-tauri/python/install_deps.sh`**: Installation script -- **`README.md`**: Updated with SeekDB information - -## Technical Architecture - -### Communication Flow - -``` -┌─────────────────────┐ -│ Rust Application │ -│ (Tauri/Tokio) │ -└──────────┬──────────┘ - │ - │ JSON Commands via stdin - │ JSON Responses via stdout - ▼ -┌─────────────────────┐ -│ Python Subprocess │ -│ (seekdb_bridge.py) │ -└──────────┬──────────┘ - │ - │ Python API calls - ▼ -┌─────────────────────┐ -│ SeekDB │ -│ (oblite.so) │ -└─────────────────────┘ -``` - -### Database Schema - -```sql --- Projects table (unchanged structure) -CREATE TABLE projects ( - id VARCHAR(36) PRIMARY KEY, - name TEXT NOT NULL, - description TEXT, - status TEXT NOT NULL, - document_count INTEGER DEFAULT 0, - created_at DATETIME NOT NULL, - updated_at DATETIME NOT NULL -) - --- Vector documents with native vector type -CREATE TABLE vector_documents ( - id VARCHAR(36) PRIMARY KEY, - project_id VARCHAR(36) NOT NULL, - document_id VARCHAR(36) NOT NULL, - chunk_index INTEGER NOT NULL, - content TEXT NOT NULL, - embedding vector(1536), -- Native vector type! - metadata TEXT NOT NULL, - created_at DATETIME DEFAULT CURRENT_TIMESTAMP, - UNIQUE(document_id, chunk_index) -) - --- HNSW vector index for fast similarity search -CREATE VECTOR INDEX idx_embedding ON vector_documents(embedding) -WITH (distance=l2, type=hnsw, lib=vsag) - --- Conversations and messages (unchanged structure) --- ... (same as before) -``` - -### Vector Search Implementation - -**Before (SQLite with manual cosine similarity):** -```rust -// Load all embeddings from database -// Calculate cosine similarity in Rust for each -// Sort and filter results -``` - -**After (SeekDB with native L2 distance):** -```sql -SELECT *, l2_distance(embedding, '[...]') as distance -FROM vector_documents -WHERE project_id = ? -ORDER BY l2_distance(embedding, '[...]') APPROXIMATE -LIMIT 10 -``` - -Performance improvement: **10-100x faster** for large datasets. - -## Key Benefits - -### 1. Performance Improvements -- **Native Vector Operations**: No serialization/deserialization overhead -- **HNSW Indexing**: Approximate nearest neighbor search (vs. exhaustive search) -- **Optimized Storage**: Vector-specific storage format -- **Query Optimization**: Database-level query planning for vector operations - -### 2. New Capabilities -- **Native Vector Type**: First-class support for vector data -- **Full-text Search**: Built-in fulltext indexing (ready for future use) -- **Hybrid Search**: Combine vector and keyword search (future feature) -- **OLAP Support**: Column storage and analytical queries (future feature) - -### 3. Maintainability -- **Cleaner Separation**: Database logic in Python, app logic in Rust -- **Easier Testing**: Can test database operations independently -- **Better Debugging**: Separate logs for database and application -- **Standard SQL**: Use standard SQL syntax (SeekDB is MySQL-compatible) - -## Migration Path for Users - -### For New Installations -1. Install Python 3.8+ -2. Install SeekDB: `pip install seekdb==0.0.1.dev2` -3. Run application normally - -### For Existing Users (Upgrading) -1. Install Python 3.8+ (if not already installed) -2. Install SeekDB: `pip install seekdb==0.0.1.dev2` -3. Run migration script: - ```bash - cd src-tauri/python - python3 migrate_sqlite_to_seekdb.py - ``` -4. Update application to use new database - -## Testing Checklist - -- [x] ✅ Code compiles without errors -- [x] ✅ All service types updated correctly -- [x] ✅ Schema initialization works -- [x] ✅ Migration script created -- [x] ✅ Documentation complete -- [ ] ⏳ Integration tests (requires Python environment setup) -- [ ] ⏳ Performance benchmarks -- [ ] ⏳ End-to-end testing with real data - -## Files Changed Summary - -### Created (9 files) -- `src-tauri/src/services/python_subprocess.rs` (279 lines) -- `src-tauri/src/services/seekdb_adapter.rs` (876 lines) -- `src-tauri/python/seekdb_bridge.py` (244 lines) -- `src-tauri/python/migrate_sqlite_to_seekdb.py` (376 lines) -- `src-tauri/python/requirements.txt` (3 lines) -- `src-tauri/python/install_deps.sh` (31 lines) -- `MIGRATION_SEEKDB.md` (245 lines) -- `MIGRATION_SUMMARY.md` (this file) - -### Modified (6 files) -- `src-tauri/src/services/mod.rs` (added 2 modules, commented 1) -- `src-tauri/src/services/document_service.rs` (imports and types) -- `src-tauri/src/services/project_service.rs` (imports and types) -- `src-tauri/src/services/conversation_service.rs` (imports and types) -- `src-tauri/Cargo.toml` (commented out rusqlite and sqlx) -- `README.md` (added SeekDB information) - -### Deprecated (kept for reference) -- `src-tauri/src/services/embedded_vector_db.rs` (commented out in mod.rs) - -## Known Limitations - -1. **Python Dependency**: Application now requires Python 3.8+ to be installed -2. **Subprocess Overhead**: Small latency from process communication (typically <1ms) -3. **SeekDB Alpha**: SeekDB is in early release (0.0.1.dev2) -4. **Error Recovery**: Subprocess failures require restart (handled automatically) - -## Future Enhancements - -### Short-term -- [ ] Add connection pooling for multiple Python processes -- [ ] Implement retry logic for transient failures -- [ ] Add metrics collection for database operations -- [ ] Performance benchmarking suite - -### Long-term -- [ ] Hybrid search (vector + fulltext) -- [ ] Materialized views for aggregations -- [ ] External table support for batch imports -- [ ] Advanced analytics with OLAP features -- [ ] Distributed mode support (when SeekDB adds it) - -## Rollback Plan - -If issues are encountered, rollback is straightforward: - -1. Checkout previous commit: `git checkout ` -2. Rebuild application: `cargo build --release` -3. Use old SQLite database - -Data can be preserved by: -1. Keep old SQLite database file -2. Re-migrate from SeekDB back to SQLite (reverse migration script needed) - -## Conclusion - -The migration to SeekDB has been successfully completed with: -- ✅ **Zero Breaking Changes**: Same API, enhanced backend -- ✅ **Significant Performance Gains**: Native vector operations -- ✅ **Future-Ready**: Access to AI-Native database features -- ✅ **Well Documented**: Comprehensive migration guides -- ✅ **Production Ready**: Code compiles and follows best practices - -The application is now ready for testing and deployment with SeekDB! - ---- - -**Implementation Date**: October 27, 2025 -**Version**: 0.2.0 (SeekDB Migration) -**Migration Time**: ~4 hours -**Lines of Code Added**: ~2,050 lines -**Lines of Code Modified**: ~50 lines -**Lines of Code Removed**: 0 (deprecated code commented out) - diff --git a/docs/MINEKB_DEV_TUTORIAL.md b/docs/MINEKB_DEV_TUTORIAL.md index f2f733f..aa64645 100644 --- a/docs/MINEKB_DEV_TUTORIAL.md +++ b/docs/MINEKB_DEV_TUTORIAL.md @@ -102,22 +102,14 @@ graph TB subgraph "数据层 Data Layer" direction TB - subgraph "SeekDB Adapter - Rust" - Adapter[SeekDB 适配器] + subgraph "SeekDB - Rust" + Adapter[SeekDbAdapter] + DB[(Embedded SeekDB
向量数据库)] + Tables[关系表] + VectorIndex[向量集合 HNSW] end - subgraph "Python Bridge - Subprocess" - Bridge[seekdb_bridge.py] - end - - subgraph "SeekDB Database" - DB[(SeekDB
向量数据库)] - Tables[数据表] - VectorIndex[向量索引 HNSW] - end - - Adapter -->|JSON Protocol
stdin/stdout| Bridge - Bridge -->|Python API| DB + Adapter -->|seekdb-rs
Client| DB DB --> Tables DB --> VectorIndex end @@ -183,65 +175,44 @@ graph TB **4. 数据层(Data Layer)** ⭐ 重点 -数据层是 MineKB 的核心,采用 **Rust → Python Bridge → SeekDB** 三层架构: +数据层采用 **Rust 直连嵌入式 SeekDB**,无 Python 依赖: -##### 4.1 SeekDB Adapter(Rust 端) +##### 4.1 SeekDbAdapter(Rust) **位置**:`src-tauri/src/services/seekdb_adapter.rs` **职责**: -- 管理 Python 子进程的生命周期 -- 构建和发送 JSON 格式的命令 -- 解析 Python 返回的结果 -- 提供类型安全的 Rust API +- 使用 seekdb-rs 的异步 **Client** 打开并操作嵌入式 SeekDB(`SeekDbAdapter::new_async(path).await`) +- 关系数据(项目、会话、消息)通过参数化 SQL 读写 +- 向量数据通过 seekdb-rs 的 Collection API(单集合 + metadata 过滤 project_id)做 upsert、KNN、混合检索 **核心方法**: ```rust pub struct SeekDbAdapter { - subprocess: Arc>, + client: Client, // seekdb_rs::Client (async) + hnsw_config: HnswConfig, db_path: String, db_name: String, } -// 核心方法 impl SeekDbAdapter { - pub fn new(db_path: &Path) -> Result; - pub async fn init(&self) -> Result<()>; - pub async fn execute(&self, sql: &str, values: Vec) -> Result<()>; - pub async fn query(&self, sql: &str, values: Vec) -> Result>; - pub async fn upsert_vector_documents(&self, docs: &[VectorDocument]) -> Result<()>; - pub async fn search_similar(&self, project_id: &str, query_embedding: &[f64], limit: usize) -> Result>; + pub async fn new_async>(db_path: P) -> Result; + async fn execute(&self, sql: &str, params: Vec) -> Result<()>; + async fn query(&self, sql: &str, params: Vec) -> Result>>; + pub async fn add_document(&self, doc: VectorDocument) -> Result<()>; + pub async fn add_documents(&self, docs: Vec) -> Result<()>; + pub async fn hybrid_search_by_text(&self, embedding_service: Arc<...>, project_id: Option<&str>, query_text: &str, limit: usize) -> Result>; + pub async fn get_project_documents(&self, project_id: &str) -> Result>; + // ... } ``` -##### 4.2 Python Bridge(子进程) - -**位置**:`src-tauri/python/seekdb_bridge.py` - -**通信协议**:基于 stdin/stdout 的 JSON 行协议(Newline-Delimited JSON) - -**命令格式**: -```json -{ - "command": "init", - "params": { - "db_path": "./mine_kb.db", - "db_name": "mine_kb" - } -} -``` +##### 4.2 Embedded SeekDB -**响应格式**: -```json -{ - "status": "success", - "data": { ... } -} -``` +**访问方式**:seekdb-rs 异步 **Client**(async API,内部封装嵌入式 C 库) -**支持的命令**: -- `init`:初始化数据库连接 -- `execute`:执行 SQL(INSERT/UPDATE/DELETE) +**支持的能力**: +- `execute` / `fetch_all`:参数化 SQL(INSERT/UPDATE/DELETE/SELECT) - `query`:查询数据(SELECT) - `query_one`:查询单行 - `commit`:提交事务 @@ -367,10 +338,10 @@ CREATE INDEX idx_messages_conversation ON messages(conversation_id); **数据流向**: -1. **写入流程**:Rust Service → Adapter → Python Bridge → SeekDB -2. **查询流程**:Rust Service → Adapter → Python Bridge → SeekDB → 返回结果 +1. **写入流程**:Rust Service → SeekDbAdapter → seekdb-rs → Embedded SeekDB +2. **查询流程**:Rust Service → SeekDbAdapter → seekdb-rs → Embedded SeekDB → 返回结果 3. **向量检索流程**: - - Query Embedding → Python Bridge + - Query Embedding → SeekDbAdapter(内部调用 seekdb-rs) - SeekDB HNSW 索引检索 - 返回 Top-K 相似文档 @@ -385,9 +356,8 @@ sequenceDiagram participant Tauri as Tauri Command participant DocSvc as DocumentService participant EmbedSvc as EmbeddingService - participant Adapter as SeekDB Adapter - participant Bridge as Python Bridge - participant DB as SeekDB + participant Adapter as SeekDbAdapter + participant DB as Embedded SeekDB participant API as 阿里云百炼 API %% 流程 1: 创建项目 @@ -395,10 +365,8 @@ sequenceDiagram User->>UI: 点击"创建项目" UI->>Tauri: create_project(name, desc) Tauri->>Adapter: execute(INSERT INTO projects...) - Adapter->>Bridge: {"command": "execute", "params": {...}} - Bridge->>DB: INSERT INTO projects VALUES(...) - DB-->>Bridge: OK - Bridge-->>Adapter: {"status": "success"} + Adapter->>DB: execute(SQL, params) + DB-->>Adapter: OK Adapter-->>Tauri: Result::Ok(project) Tauri-->>UI: ProjectResponse UI-->>User: 显示新项目 @@ -420,11 +388,9 @@ sequenceDiagram EmbedSvc-->>DocSvc: Vec end - DocSvc->>Adapter: upsert_vector_documents(chunks) - Adapter->>Bridge: {"command": "execute", "params": {...}} - Bridge->>DB: INSERT INTO vector_documents VALUES(...) - DB-->>Bridge: OK - Bridge-->>Adapter: {"status": "success"} + DocSvc->>Adapter: add_documents(chunks) + Adapter->>DB: collection.upsert_batch(...) + DB-->>Adapter: OK end Adapter-->>Tauri: Result::Ok(summary) @@ -439,11 +405,9 @@ sequenceDiagram EmbedSvc->>API: POST /embeddings API-->>EmbedSvc: query_embedding[1536] - Tauri->>Adapter: search_similar(project_id, query_embedding, limit=20) - Adapter->>Bridge: {"command": "query", "params": {...}} - Bridge->>DB: SELECT ... ORDER BY l2_distance(embedding, [...]) APPROXIMATE LIMIT 20 - DB-->>Bridge: Top-K 相似文档 - Bridge-->>Adapter: {"status": "success", "data": [...]} + Tauri->>Adapter: hybrid_search_by_text(embedding_svc, project_id, query, limit=20) + Adapter->>DB: collection.hybrid_search_advanced(...) + DB-->>Adapter: QueryResult Adapter-->>Tauri: Vec Tauri->>Tauri: 构建 Prompt(query + context) @@ -456,9 +420,8 @@ sequenceDiagram end Tauri->>Adapter: save_message(conversation_id, role, content) - Adapter->>Bridge: {"command": "execute", "params": {...}} - Bridge->>DB: INSERT INTO messages VALUES(...) - DB-->>Bridge: OK + Adapter->>DB: execute(INSERT INTO messages...) + DB-->>Adapter: OK ``` **数据流图说明**: @@ -466,14 +429,14 @@ sequenceDiagram 1. **创建项目流程** - 用户输入项目名称和描述 - Tauri 命令验证参数 - - 通过 Adapter 和 Bridge 将数据写入 SeekDB + - 通过 SeekDbAdapter 将数据写入嵌入式 SeekDB - 返回创建成功的项目信息 2. **文档处理流程** - 文档上传后进行文本提取(PDF、DOCX 等) - 文本分块(默认 500 字符/块,重叠 50 字符) - 每个块调用阿里云百炼 API 生成 1536 维向量 - - 向量和文本一起存储到 SeekDB 的 `vector_documents` 表 + - 向量和文本一起写入 SeekDB 的向量集合(collection) - HNSW 索引自动更新 3. **对话问答流程** @@ -490,17 +453,19 @@ sequenceDiagram ### 3.1 环境要求 -开发和运行 MineKB 需要以下环境: +**构建/开发环境**(本地开发或打包时需要): | 组件 | 版本要求 | 说明 | |-----|---------|------| | **操作系统** | Linux / macOS / Windows | 推荐 Ubuntu 20.04+ / macOS 10.15+ / Windows 10+ | -| **Node.js** | 16.x+ | 用于前端开发,推荐 18.x LTS | +| **Node.js** | 16.x+ | 前端构建与 Tauri CLI,推荐 18.x LTS | | **npm/tnpm** | 对应 Node.js 版本 | 阿里内部推荐使用 tnpm | -| **Rust** | 1.70+ | Tauri 依赖,推荐 1.75+ | -| **Python** | 3.8+ | SeekDB 依赖,推荐 3.9+ | +| **Rust** | 1.70+ | Tauri 编译,推荐 1.75+ | +| **(无 Python)** | - | 数据层使用 seekdb-rs,无需 Python | | **系统依赖** | 根据平台 | 见下方说明 | +**安装后运行环境**(用户机器):无需 Python;Tauri 打包后为单一可执行(或平台包),依赖已内嵌。 + #### Linux (Ubuntu/Debian) 系统依赖 ```bash @@ -513,9 +478,7 @@ sudo apt install -y \ libssl-dev \ libgtk-3-dev \ libayatana-appindicator3-dev \ - librsvg2-dev \ - python3-pip \ - python3-venv + librsvg2-dev ``` #### macOS 系统依赖 @@ -527,15 +490,14 @@ xcode-select --install # 安装 Homebrew(如果尚未安装) /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" -# 安装 Python -brew install python@3.9 +# (无需安装 Python) ``` #### Windows 系统依赖 - Visual Studio 2019 或更高版本(包含 C++ 工具) - 或 Visual Studio Build Tools -- Python 3.8+ from [python.org](https://www.python.org/downloads/) +- (无需 Python) ### 3.2 技术栈和依赖包 @@ -616,21 +578,9 @@ sha2 = "0.10" - `pdf-extract`、`docx-rs`:文档解析 - Rust 标准库:向量计算和数学运算 -#### Python 依赖(requirements.txt) - -```txt -seekdb==0.0.1.dev4 -``` - -**SeekDB 安装**: +#### Rust 依赖(seekdb-rs) -```bash -# 使用清华镜像源 -pip install seekdb==0.0.1.dev4 -i https://pypi.tuna.tsinghua.edu.cn/simple/ - -# 验证安装 -python3 -c "import seekdb; print('SeekDB installed successfully')" -``` +seekdb-rs 为 path 依赖(`src-tauri/Cargo.toml`),启用 features:`embedded`、`sync`。构建时自动拉取,无需单独安装。 ### 3.3 API 配置 @@ -681,12 +631,7 @@ npm install # 或使用 tnpm(阿里内部) tnpm install -# 安装 Python 依赖 -pip install seekdb==0.0.1.dev4 -i https://pypi.tuna.tsinghua.edu.cn/simple/ -# 或使用安装脚本 -bash src-tauri/python/install_deps.sh - -# Rust 依赖会在编译时自动下载 +# Rust 依赖(含 seekdb-rs)会在编译时自动下载 ``` #### 2. 配置 API Key @@ -702,11 +647,14 @@ nano src-tauri/config.json #### 3. 启动开发服务器 ```bash -# 启动 Tauri 开发模式 +# 启动 Tauri 开发模式(默认 CONFIG_DIR=com.mine-kb,数据目录为 src-tauri/com.mine-kb) npm run tauri:dev # 或使用 tnpm tnpm run tauri:dev + +# 自定义数据目录时可设置 CONFIG_DIR +CONFIG_DIR=/path/to/your/data tnpm run tauri:dev ``` **预期输出**: @@ -715,13 +663,12 @@ tnpm run tauri:dev Compiling mine-kb v0.1.0 (/path/to/mine-kb/src-tauri) Finished dev [unoptimized + debuginfo] target(s) in 45.23s Running `target/debug/mine-kb` -[2025-11-05T10:00:00Z INFO mine_kb] 🚀 MineKB 启动中... -[2025-11-05T10:00:00Z INFO mine_kb] 📁 应用数据目录: /home/user/.local/share/com.mine-kb.app -[2025-11-05T10:00:00Z INFO mine_kb] 🐍 正在检查 Python 环境... -[2025-11-05T10:00:01Z INFO mine_kb] ✅ Python 环境准备完成 -[2025-11-05T10:00:01Z INFO mine_kb] 🗄️ 正在初始化 SeekDB... -[2025-11-05T10:00:02Z INFO mine_kb] ✅ SeekDB 初始化成功 -[2025-11-05T10:00:02Z INFO mine_kb] 🎉 MineKB 启动成功! +[INFO mine_kb] 🚀 MineKB 启动中... +[INFO mine_kb] 使用 CONFIG_DIR 指定数据目录 +[INFO mine_kb] 🔗 [NEW-DB] Opening embedded SeekDB: ... +[INFO mine_kb] 🔗 [NEW-DB] Database ready +[INFO mine_kb] ✅ SeekDB 初始化成功 +[INFO mine_kb] 🎉 MineKB 启动成功! ``` #### 4. 构建生产版本(可选) @@ -763,27 +710,15 @@ fn main() { let config = load_config(&config_path) .expect("无法加载配置文件"); - // 4. 初始化 Python 环境 - log::info!("🐍 正在检查 Python 环境..."); - let python_env = PythonEnv::new(&app_data_dir) - .expect("Python 环境初始化失败"); - python_env.ensure_seekdb_installed() - .expect("SeekDB 安装失败"); - log::info!("✅ Python 环境准备完成"); - - // 5. 初始化 SeekDB + // 4. 初始化 SeekDB(seekdb-rs 嵌入式客户端,无 Python) log::info!("🗄️ 正在初始化 SeekDB..."); let db_path = app_data_dir.join(&config.database.path); - let seekdb_adapter = SeekDbAdapter::new(&db_path) + let seekdb_adapter = SeekDbAdapter::new_async(&db_path).await .expect("SeekDB 适配器创建失败"); - - // 6. 初始化数据库架构 - seekdb_adapter.init() - .await - .expect("数据库初始化失败"); + // 表结构在 SeekDbAdapter::new_async 内通过 initialize_schema().await 完成 log::info!("✅ SeekDB 初始化成功"); - // 7. 创建应用状态 + // 7. 创建应用状态(实际代码中通过 DocumentService::with_full_config().await 创建 adapter,再构建 AppState) let app_state = AppState::new(seekdb_adapter, config); let app_state_wrapper = AppStateWrapper::new(app_state); @@ -811,41 +746,25 @@ fn main() { - 输出到 stderr,便于调试 2. **应用数据目录确定** - - macOS: `~/Library/Application Support/com.mine-kb.app/` - - Linux: `~/.local/share/com.mine-kb.app/` - - Windows: `%APPDATA%\com.mine-kb.app\` + - 若设置了环境变量 `CONFIG_DIR`,则以其值为数据根目录(本地开发默认 `CONFIG_DIR=com.mine-kb`,即 `src-tauri/com.mine-kb`)。 + - 否则:macOS: `~/Library/Application Support/com.mine-kb.app/`;Linux: `~/.local/share/com.mine-kb.app/`;Windows: `%APPDATA%\com.mine-kb.app\`。 3. **配置文件加载** - 首次运行时,从 `config.example.json` 复制 - 读取 API Key、数据库路径等配置 -4. **Python 环境准备** - - 检查是否存在虚拟环境 `venv/` - - 如果不存在,创建虚拟环境 - - 安装 `seekdb==0.0.1.dev4` - - 验证安装成功 - -5. **SeekDB 初始化** - - 启动 Python 子进程(`seekdb_bridge.py`) - - 打开数据库实例(`oblite.open(db_path)`) - - 连接空字符串创建管理连接 - - 执行 `CREATE DATABASE IF NOT EXISTS mine_kb` - - 切换到 `mine_kb` 数据库 - -6. **数据库架构创建** - - 检查表是否存在 - - 创建 `projects`、`documents`、`vector_documents`、`conversations`、`messages` 表 - - 创建向量索引(HNSW) - - 创建普通索引 - -7. **应用状态管理** +4. **SeekDB 初始化** + - 使用 seekdb-rs 的异步 **Client** 打开嵌入式数据库(路径为应用数据目录下的 `mine_kb.db` 等) + - `SeekDbAdapter::new_async(path).await` 内会执行 `initialize_schema().await`:创建 `projects`、`conversations`、`messages` 表;向量数据使用 Collection 存储,不建 `vector_documents` 表 + +5. **应用状态管理** - 创建全局 `AppState`,包含: - SeekDB Adapter - 配置信息 - 服务实例(ProjectService、DocumentService 等) - 使用 `Arc>` 实现线程安全的状态共享 -8. **Tauri 应用启动** +6. **Tauri 应用启动** - 注册所有 Tauri 命令 - 启动 WebView - 加载前端界面 @@ -1005,30 +924,16 @@ impl ProjectService { } ``` -#### 数据库层(seekdb_adapter.rs → Python Bridge → SeekDB) +#### 数据库层(seekdb_adapter.rs → seekdb-rs → Embedded SeekDB) -```python -# Python Bridge 接收命令 -{ - "command": "execute", - "params": { - "sql": "INSERT INTO projects (...) VALUES (?, ?, ?, ?, ?, ?, ?)", - "values": ["uuid-here", "我的项目", "描述", "active", 0, "2025-11-05T...", "2025-11-05T..."] - } -} +SeekDbAdapter 通过 seekdb-rs 的异步 **Client** 执行参数化 SQL,例如: -# 转换为 SeekDB SQL -cursor.execute(""" - INSERT INTO projects (id, name, description, status, document_count, created_at, updated_at) - VALUES ('uuid-here', '我的项目', '描述', 'active', 0, '2025-11-05T...', '2025-11-05T...') -""") -conn.commit() - -# 返回成功响应 -{ - "status": "success", - "data": null -} +```rust +self.execute( + "INSERT INTO projects (id, name, ...) VALUES (?, ?, ...) ON DUPLICATE KEY UPDATE ...", + vec![Value::String(project.id.to_string()), Value::String(project.name.clone()), ...], +).await?; +self.commit().await?; ``` **总结:创建知识库做了什么** @@ -1420,7 +1325,7 @@ impl LlmClient { **优势**: - ✅ 用户无需任何数据库知识 -- ✅ 安装包自包含(除 Python 依赖外) +- ✅ 安装包自包含(无 Python 依赖) - ✅ 首次启动自动初始化 - ✅ 跨平台一致的安装体验 @@ -1458,7 +1363,7 @@ SeekDB 的 All-in-One 能力为未来扩展提供了无限可能: **关键成功因素**: 1. **SeekDB** 提供了强大的向量检索能力 2. **Tauri** 提供了轻量级的跨平台桌面应用框架 -3. **Python Bridge** 实现了 Rust 和 SeekDB 的无缝集成 +3. **seekdb-rs** 实现了 Rust 直连嵌入式 SeekDB 4. **RAG 架构** 充分发挥了向量检索的优势 **适用场景**: @@ -1516,7 +1421,7 @@ LIMIT 20 -- 不要返回过多结果 #### 应用架构 -1. **使用 Python 子进程隔离 SeekDB** +1. **使用 seekdb-rs 嵌入式客户端直连 SeekDB** - 避免 Rust FFI 的复杂性 - JSON 协议简单可靠 - 便于调试和错误处理 @@ -1565,9 +1470,7 @@ MineKB 项目的成功验证了 SeekDB 在桌面应用领域的巨大潜力。 ### A. 相关资源 - **项目地址**:https://github.com/ob-labs/mine-kb -- **SeekDB 文档**:[docs/SEEKDB_USAGE_GUIDE.md](SEEKDB_USAGE_GUIDE.md) -- **迁移指南**:[docs/MIGRATION_SEEKDB.md](MIGRATION_SEEKDB.md) -- **升级指南**:[docs/UPGRADE_SEEKDB_0.0.1.dev4.md](UPGRADE_SEEKDB_0.0.1.dev4.md) +- **SeekDB / seekdb-rs 文档**:[docs/seekdb.md](seekdb.md) ### B. 技术栈链接 diff --git a/docs/PATH_FIX_APPLIED.md b/docs/PATH_FIX_APPLIED.md deleted file mode 100644 index 17e29f7..0000000 --- a/docs/PATH_FIX_APPLIED.md +++ /dev/null @@ -1,128 +0,0 @@ -# 路径问题已修复 ✅ - -## 问题分析 - -之前的错误是由于路径查找逻辑在某些情况下会拼接出错误的路径: -- 错误路径: `/home/ubuntu/Desktop/mine-kb/src-tauri/src-tauri/python/seekdb_bridge.py` -- 正确路径: `/home/ubuntu/Desktop/mine-kb/src-tauri/python/seekdb_bridge.py` - -## 已应用的修复 - -修改了 `src-tauri/src/services/seekdb_adapter.rs` 中的路径查找逻辑,现在会: - -1. **首先**尝试从可执行文件所在目录查找 -2. **然后**尝试多个可能的位置: - - `python/seekdb_bridge.py` (如果当前在 src-tauri 目录) - - `src-tauri/python/seekdb_bridge.py` (如果当前在项目根目录) - - `../python/seekdb_bridge.py` (如果当前在 src-tauri/src 目录) -3. **最后**使用默认的相对路径作为后备 - -新的代码会记录详细的调试信息,显示它检查了哪些路径。 - -## 下一步需要做的事情 - -### 1. 安装 Python 依赖 (SeekDB) - -你的系统没有 pip3,需要先安装: - -```bash -# 方法 1: 使用 get-pip.py 安装(不需要 sudo) -curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py -python3 get-pip.py --user -export PATH="$HOME/.local/bin:$PATH" - -# 验证安装 -pip3 --version - -# 方法 2: 如果你有 sudo 权限 -sudo apt update -sudo apt install python3-pip -``` - -### 2. 安装 SeekDB 包 - -```bash -pip3 install --user seekdb==0.0.1.dev2 -i https://pypi.tuna.tsinghua.edu.cn/simple/ -``` - -### 3. 验证 SeekDB 安装 - -```bash -cd /home/ubuntu/Desktop/mine-kb/src-tauri/python -python3 test_seekdb.py -``` - -### 4. 运行应用 - -```bash -cd /home/ubuntu/Desktop/mine-kb -npm run tauri:dev -``` - -## 验证修复 - -代码已经编译成功: -``` -✅ Checking mine-kb v0.1.0 (/home/ubuntu/Desktop/mine-kb/src-tauri) -✅ Finished `dev` profile [unoptimized + debuginfo] target(s) in 2.74s -``` - -Python 脚本文件存在且可执行: -``` -✅ /home/ubuntu/Desktop/mine-kb/src-tauri/python/seekdb_bridge.py -``` - -## 如果仍然有问题 - -如果安装 pip 遇到困难,你可以: - -### 选项 A: 使用虚拟环境(推荐) - -```bash -cd /home/ubuntu/Desktop/mine-kb -python3 -m venv venv -source venv/bin/activate -# 虚拟环境内会有 pip -pip install seekdb==0.0.1.dev2 -i https://pypi.tuna.tsinghua.edu.cn/simple/ -``` - -然后在运行应用前先激活虚拟环境: -```bash -source /home/ubuntu/Desktop/mine-kb/venv/bin/activate -npm run tauri:dev -``` - -### 选项 B: 临时回退到 SQLite - -如果暂时不能安装 SeekDB,可以暂时回退到之前的 SQLite 版本: - -```bash -cd /home/ubuntu/Desktop/mine-kb -git stash # 保存当前更改 -git checkout <之前的commit> # 回到SQLite版本 -``` - -## 日志检查 - -运行应用时,你应该能看到类似这样的日志: - -``` -[INFO] 🔗 [NEW-DB] Opening SeekDB: /home/ubuntu/.local/share/com.mine-kb.app/mine_kb.db -[INFO] 🔗 [NEW-DB] Database directory: "/home/ubuntu/.local/share/com.mine-kb.app" -[INFO] 🔗 [NEW-DB] Database name: mine_kb -[DEBUG] 🔍 Current directory: /home/ubuntu/Desktop/mine-kb -[DEBUG] 🔍 Checking: /home/ubuntu/Desktop/mine-kb/python/seekdb_bridge.py -[DEBUG] 🔍 Checking: /home/ubuntu/Desktop/mine-kb/src-tauri/python/seekdb_bridge.py -[INFO] ✅ Found script at: "/home/ubuntu/Desktop/mine-kb/src-tauri/python/seekdb_bridge.py" -[INFO] 🐍 Starting Python subprocess: /home/ubuntu/Desktop/mine-kb/src-tauri/python/seekdb_bridge.py -``` - -如果看到这些日志,说明路径查找工作正常! - -## 技术细节 - -修改的文件: -- `src-tauri/src/services/seekdb_adapter.rs` (第 60-93 行) - -新的路径查找逻辑使用了智能后备机制,可以从任何工作目录正确找到 Python 脚本。 - diff --git a/docs/SEEKDB_AUTO_INSTALL.md b/docs/SEEKDB_AUTO_INSTALL.md deleted file mode 100644 index 319d099..0000000 --- a/docs/SEEKDB_AUTO_INSTALL.md +++ /dev/null @@ -1,303 +0,0 @@ -# SeekDB 自动安装功能(通过 pip) - -> **版本**: SeekDB 0.0.1.dev4 -> **最后更新**: 2025-11-05 - -> **重要更新**: 从 0.0.1.dev4 版本开始,模块名称从 `oblite` 更改为 `seekdb`。详见 [升级指南](UPGRADE_SEEKDB_0.0.1.dev4.md) - -## 概述 - -本应用使用 Python 虚拟环境自动管理 SeekDB 依赖。首次启动时,应用会: -1. 自动创建独立的 Python 虚拟环境 -2. 通过 pip 自动安装 seekdb 包(0.0.1.dev4 版本) -3. 验证安装成功后启动应用 - -无需手动下载或管理依赖文件,一切都是自动完成的。 - -## 实施架构 - -### 核心模块 - -#### 1. `src-tauri/src/services/python_env.rs` -Python 虚拟环境管理器,负责: -- 在应用数据目录创建 Python 虚拟环境 -- 检测虚拟环境是否存在 -- 提供虚拟环境 Python 可执行文件路径 -- 提供 pip 可执行文件路径 - -**关键方法:** -- `new(app_data_dir)` - 创建环境管理器实例 -- `ensure_venv()` - 确保虚拟环境存在,不存在则创建 -- `venv_exists()` - 检查虚拟环境是否存在 -- `get_python_executable()` - 获取虚拟环境的 Python 路径 -- `get_pip_executable()` - 获取虚拟环境的 pip 路径 - -#### 2. `src-tauri/src/services/seekdb_package.rs` -SeekDB 包管理器,负责: -- 检测 seekdb 包是否已安装 -- 自动安装 seekdb 包 -- 验证安装是否成功 - -**关键方法:** -- `new(python_env)` - 创建包管理器实例 -- `is_installed()` - 检查 seekdb 是否已安装 -- `install()` - 安装 seekdb 包 -- `verify()` - 验证安装成功 -- `get_version_info()` - 获取版本信息 - -### 修改的模块 - -#### `src-tauri/src/services/python_subprocess.rs` -- 修改为 `new_with_python(script_path, python_executable)` -- 直接使用虚拟环境的 Python,不再需要设置 PYTHONPATH -- 移除了所有 PYTHONPATH 相关逻辑 - -#### `src-tauri/src/services/seekdb_adapter.rs` -- 修改为 `new_with_python(db_path, python_executable)` -- 接收 Python 可执行文件路径参数 -- 传递给 PythonSubprocess - -#### `src-tauri/src/services/document_service.rs` -- 修改为 `with_full_config(db_path, api_key, base_url, python_path)` -- 传递 Python 可执行文件路径 - -#### `src-tauri/src/services/app_state.rs` -- 修改为 `new_with_full_config(db_path, app_config, model_cache_dir, python_path)` -- 传递 Python 可执行文件路径给所有服务 - -#### `src-tauri/src/main.rs` -应用启动流程(三个阶段): - -**阶段 1:Python 环境和 SeekDB 安装** -1. 创建 Python 虚拟环境管理器 -2. 确保虚拟环境存在(不存在则创建) -3. 检查 seekdb 是否已安装 -4. 未安装则自动安装 -5. 验证安装成功 -6. 获取 Python 可执行文件路径 - -**阶段 2:配置文件加载** -- 加载应用配置 -- 验证 API 密钥等 - -**阶段 3:初始化应用状态** -- 传递 Python 路径给各个服务 -- 初始化数据库连接 - -### 移除的模块 - -- ❌ `src-tauri/src/services/seekdb_installer.rs` - 不再需要 -- ❌ `src-tauri/libs/` 目录 - 不再需要手动管理 oblite.so - -## 技术要点 - -### 安装配置 - -- **包名**:`seekdb` -- **版本**:`0.0.1.dev4` -- **镜像源**:`https://pypi.tuna.tsinghua.edu.cn/simple/` -- **安装位置**:`<应用数据目录>/venv/` -- **安装方式**:`pip install seekdb==0.0.1.dev4 -i https://pypi.tuna.tsinghua.edu.cn/simple/` - -### 虚拟环境位置 - -不同操作系统的虚拟环境位置: -- **macOS**: `~/Library/Application Support/com.mine-kb.app/venv/` -- **Linux**: `~/.local/share/com.mine-kb.app/venv/` -- **Windows**: `%APPDATA%\com.mine-kb.app\venv\` - -### Python 可执行文件路径 - -- **macOS/Linux**: `/bin/python3` -- **Windows**: `\Scripts\python.exe` - -### 验证流程 - -1. 检查虚拟环境是否存在 -2. 尝试导入 seekdb 模块(0.0.1.dev4 版本使用 `import seekdb`) -3. 获取 seekdb 模块路径和版本 -4. 启动 Python 子进程验证数据库连接 - -### 优势 - -1. **跨平台兼容**:pip 自动安装适合当前架构的包(ARM64/x86-64) -2. **依赖隔离**:虚拟环境不影响系统 Python -3. **自动化**:首次运行自动安装,无需用户干预 -4. **节省空间**:不需要在项目中存储 2.7GB 的 oblite.so -5. **易于升级**:pip 可以轻松升级到新版本 - -## 启动日志示例 - -成功启动时的日志输出: - -``` -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ - 步骤 1/3: 初始化 Python 环境和 SeekDB -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ -🔍 查找 Python 虚拟环境... - 系统 Python: Python 3.10.12 -🔧 创建 Python 虚拟环境... - 位置: /home/user/.local/share/com.mine-kb.app/venv -✅ Python 虚拟环境创建成功 - -🔍 检查 seekdb 包是否已安装... -📦 SeekDB 未安装,开始安装... -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ - 📦 安装 SeekDB 包 -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ - 版本: 0.0.1.dev4 - 镜像: https://pypi.tuna.tsinghua.edu.cn/simple/ - -🔧 升级 pip... -✅ pip 升级完成 -📦 安装 seekdb==0.0.1.dev4... -✅ seekdb 安装完成 - -🔍 验证 seekdb 安装... -✅ seekdb 验证通过 - seekdb version: 0.0.1.dev4 - seekdb path: /path/to/venv/lib/python3.10/site-packages/seekdb/ - -✅ Python 可执行文件: /path/to/venv/bin/python3 - -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ - 步骤 2/3: 加载配置文件 -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ -成功从配置文件读取配置: /path/to/config.json - -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ - 步骤 3/3: 初始化应用状态 -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ -📦 初始化应用状态... - - Python 路径: /path/to/venv/bin/python3 -🐍 Starting Python subprocess... -✅ Python subprocess started successfully -🔍 验证 SeekDB 数据库连接... -✅ SeekDB 数据库连接正常 - -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ - ✅ 应用启动成功! -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ -``` - -## 手动安装(可选) - -如果需要手动安装依赖,可以运行: - -```bash -cd src-tauri/python -./install_deps.sh -``` - -此脚本会: -1. 检查 Python 3 是否安装 -2. 在应用数据目录创建虚拟环境 -3. 激活虚拟环境 -4. 升级 pip -5. 安装 seekdb 包 - -## 测试建议 - -### 1. 首次安装测试 -- 删除应用数据目录(完全清理) -- 启动应用 -- 验证自动创建虚拟环境和安装 seekdb - -### 2. 重启测试 -- 正常关闭应用 -- 再次启动应用 -- 验证跳过安装,直接使用已有环境 - -### 3. 网络故障测试 -- 删除虚拟环境 -- 断开网络连接 -- 启动应用 -- 验证显示友好的错误信息 - -### 4. 多架构测试 -- 在 ARM64 系统上测试 -- 在 x86-64 系统上测试 -- 验证 pip 自动安装正确架构的包 - -## 故障排查 - -如果应用启动失败,检查以下内容: - -### 1. 检查 Python 环境 -```bash -python3 --version # 确保 Python 3.8+ -python3 -m venv --help # 确保 venv 模块可用 -``` - -Ubuntu/Debian 系统可能需要安装: -```bash -sudo apt install python3-venv -``` - -### 2. 检查网络连接 -```bash -ping pypi.tuna.tsinghua.edu.cn -curl -I https://pypi.tuna.tsinghua.edu.cn/simple/ -``` - -### 3. 检查虚拟环境 -```bash -# Linux/macOS -ls -la ~/.local/share/com.mine-kb.app/venv/ - -# 手动测试(0.0.1.dev4 版本使用 seekdb 模块) -~/.local/share/com.mine-kb.app/venv/bin/python3 -c "import seekdb; print(seekdb.__file__)" -``` - -### 4. 手动安装 seekdb -```bash -# 创建虚拟环境 -python3 -m venv ~/.local/share/com.mine-kb.app/venv - -# 激活虚拟环境 -source ~/.local/share/com.mine-kb.app/venv/bin/activate - -# 安装 seekdb 0.0.1.dev4 -pip install seekdb==0.0.1.dev4 -i https://pypi.tuna.tsinghua.edu.cn/simple/ - -# 验证(0.0.1.dev4 使用 seekdb 模块) -python -c "import seekdb; print('SeekDB 0.0.1.dev4 OK')" -``` - -### 5. 查看应用日志 -应用日志会显示详细的错误信息,包括: -- Python 版本检测 -- 虚拟环境创建过程 -- pip 安装过程 -- seekdb 验证结果 - -## 相关文件清单 - -### 新增文件 -- `src-tauri/src/services/python_env.rs` - Python 虚拟环境管理 -- `src-tauri/src/services/seekdb_package.rs` - SeekDB 包管理 - -### 修改文件 -- `src-tauri/src/services/mod.rs` -- `src-tauri/src/services/python_subprocess.rs` -- `src-tauri/src/services/seekdb_adapter.rs` -- `src-tauri/src/services/document_service.rs` -- `src-tauri/src/services/app_state.rs` -- `src-tauri/src/main.rs` -- `src-tauri/python/install_deps.sh` - -### 删除文件 -- `src-tauri/src/services/seekdb_installer.rs` - 已移除 -- `src-tauri/libs/` - 已移除 - -### 归档文件 -- `docs/archive/ERROR_ANALYSIS_OBLITE_SO.md` - 旧的错误分析文档(已过时) - ---- - -**更新日期**:2025-11-05 -**版本**:v3.0 (SeekDB 0.0.1.dev4) -**变更**: -- 升级到 SeekDB 0.0.1.dev4 版本 -- 模块名称从 oblite 更改为 seekdb -- 支持向量列输出和数据库验证新特性 diff --git a/docs/SEEKDB_USAGE_GUIDE.md b/docs/SEEKDB_USAGE_GUIDE.md deleted file mode 100644 index fc5f9aa..0000000 --- a/docs/SEEKDB_USAGE_GUIDE.md +++ /dev/null @@ -1,1597 +0,0 @@ -# SeekDB 使用方法与实践经验总结 - -**文档版本**: 2.0 -**最后更新**: 2025-11-05 -**适用版本**: SeekDB 0.0.1.dev4 -**作者**: MineKB Team - -> **重要更新**: 本文档已更新至 SeekDB 0.0.1.dev4 版本,主要变更: -> - 模块名称从 `oblite` 更改为 `seekdb` -> - 新增向量列类型输出支持 -> - 新增数据库存在性验证 -> - 新增 USE 语句稳定支持 -> - 新增自动提交模式(autocommit 参数) - ---- - -## 📋 目录 - -1. [SeekDB 简介](#1-seekdb-简介) -2. [安装与配置](#2-安装与配置) -3. [基本使用](#3-基本使用) -4. [核心功能详解](#4-核心功能详解) -5. [MineKB 项目实践](#5-minekb-项目实践) -6. [常见问题与解决方案](#6-常见问题与解决方案) -7. [最佳实践](#7-最佳实践) -8. [注意事项与限制](#8-注意事项与限制) -9. [性能优化建议](#9-性能优化建议) -10. [总结与展望](#10-总结与展望) - ---- - -## 1. SeekDB 简介 - -### 1.1 什么是 SeekDB? - -SeekDB(基于 OceanBase Lite)是一款轻量级嵌入式数据库,以库的形式集成在应用程序中,为开发者提供 **ALL IN ONE** 的数据管理能力: - -- **TP (OLTP)**: 事务处理能力 -- **AP (OLAP)**: 分析查询能力 -- **AI Native**: 原生 AI 能力(向量检索、全文检索、混合检索) - -### 1.2 核心特性 - -| 特性分类 | 功能 | 说明 | -|---------|------|------| -| **AI Native** | 向量检索 | HNSW 索引,支持近似最近邻搜索 | -| | 全文检索 | 内置 Fulltext Index | -| | 混合检索 | 语义搜索 + 关键词搜索 | -| **OLAP** | 列存 | Column Group 支持 | -| | 数据导入 | 旁路导入(Direct Load) | -| | 物化视图 | 自动刷新的 Materialized View | -| | 外表 | 直接查询 CSV 等外部文件 | -| **OLTP** | 事务 | ACID 事务支持 | -| | 索引 | B-tree、Vector Index | -| **部署** | 嵌入式 | 无需单独部署服务 | -| | 轻量级 | 适用于边缘计算、IoT、移动应用 | - -### 1.3 适用场景 - -✅ **适合使用 SeekDB 的场景**: -- 嵌入式 AI 应用(向量检索) -- 知识库、文档搜索系统 -- 边缘计算、IoT 设备 -- 单机应用需要分析能力 -- 快速原型验证 - -❌ **不适合使用 SeekDB 的场景**: -- 大规模分布式系统 -- 需要高并发写入(千级 TPS 以上) -- 跨机器的分布式事务 -- 需要复杂的数据库管理功能 - ---- - -## 2. 安装与配置 - -### 2.1 安装方式 - -#### 方式一:通过 pip 安装(推荐) - -```bash -# 使用清华镜像源安装最新版本 -pip install seekdb==0.0.1.dev4 -i https://pypi.tuna.tsinghua.edu.cn/simple/ - -# 验证安装(注意:0.0.1.dev4 版本使用 seekdb 模块) -python3 -c "import seekdb; print('SeekDB 0.0.1.dev4 安装成功')" -``` - -#### 方式二:自动安装(MineKB 应用) - -MineKB 应用启动时会自动: -1. 创建 Python 虚拟环境(`/venv/`) -2. 安装 seekdb 包 -3. 验证安装成功 - -**应用数据目录位置**: -- **macOS**: `~/Library/Application Support/com.mine-kb.app/` -- **Linux**: `~/.local/share/com.mine-kb.app/` -- **Windows**: `%APPDATA%\com.mine-kb.app\` - -#### 方式三:手动下载(不推荐) - -```bash -# 注意:0.0.1.dev4 版本建议通过 pip 安装 -# 如需手动安装,请参考官方文档 -# 不再推荐直接下载 .so 文件的方式 -``` - -### 2.2 基本配置 - -```json -// config.json -{ - "database": { - "path": "./mine_kb.db", // 数据库实例路径(推荐使用清晰的名称) - "name": "mine_kb" // 数据库名称 - } -} -``` - -### 2.3 系统要求 - -| 组件 | 要求 | -|------|------| -| Python | 3.8+ | -| 操作系统 | Linux (Ubuntu 18.04+), macOS (10.15+), Windows 10+ | -| 架构 | x86-64, ARM64 (pip 自动适配) | -| 内存 | 最低 2GB,推荐 4GB+ | -| 磁盘 | 最低 1GB 可用空间 | - ---- - -## 3. 基本使用 - -### 3.1 快速开始 - -```python -import seekdb - -# 1. 打开数据库实例 -seekdb.open("./mine_kb.db") - -# 2. 连接数据库 -conn = seekdb.connect("test") -cursor = conn.cursor() - -# 3. 创建表 -cursor.execute(""" - CREATE TABLE t1 ( - c1 INT PRIMARY KEY, - c2 INT - ) -""") - -# 4. 插入数据 -cursor.execute("INSERT INTO t1 VALUES(1, 10)") -cursor.execute("INSERT INTO t1 VALUES(2, 20)") - -# 5. 提交事务 -conn.commit() - -# 6. 查询数据 -cursor.execute("SELECT * FROM t1") -rows = cursor.fetchall() -print(rows) # [(1, 10), (2, 20)] -``` - -### 3.2 数据库初始化流程 - -⚠️ **重要**:SeekDB 不会自动创建数据库,必须显式创建! - -**0.0.1.dev4 版本新增**:数据库存在性验证功能,连接不存在的数据库会报错。 - -```python -import seekdb - -# 正确的初始化流程 -seekdb.open("./mine_kb.db") - -# 1. 连接空字符串以访问系统上下文 -admin_conn = seekdb.connect("") -admin_cursor = admin_conn.cursor() - -# 2. 创建数据库(幂等操作) -admin_cursor.execute("CREATE DATABASE IF NOT EXISTS `my_database`") -admin_conn.commit() -admin_conn.close() - -# 3. 现在连接到新创建的数据库 -conn = seekdb.connect("my_database") -cursor = conn.cursor() - -# 4. 创建表(现在可以成功) -cursor.execute("CREATE TABLE ...") -``` - -**0.0.1.dev4 新特性:自动提交模式** - -```python -# 自动提交模式(无需手动 commit) -conn = seekdb.connect(db_name='my_database', autocommit=True) -cursor = conn.cursor() -cursor.execute("INSERT INTO t1 VALUES(1, 10)") # 自动提交 -``` - -### 3.3 常用 SQL 操作 - -```python -# 创建表 -cursor.execute(""" - CREATE TABLE IF NOT EXISTS users ( - id VARCHAR(36) PRIMARY KEY, - name TEXT NOT NULL, - age INT, - created_at DATETIME DEFAULT CURRENT_TIMESTAMP - ) -""") - -# 插入数据 -cursor.execute( - "INSERT INTO users (id, name, age) VALUES (?, ?, ?)", - ("user1", "Alice", 30) -) - -# 查询数据 -cursor.execute("SELECT * FROM users WHERE age > 25") -rows = cursor.fetchall() - -# 更新数据 -cursor.execute("UPDATE users SET age = 31 WHERE id = 'user1'") - -# 删除数据 -cursor.execute("DELETE FROM users WHERE age < 18") - -# 提交事务 -conn.commit() -``` - ---- - -## 4. 核心功能详解 - -### 4.1 向量检索(Vector Search) - -#### 4.1.1 创建向量表 - -```python -import seekdb - -seekdb.open("./mine_kb.db") -conn = seekdb.connect("test") -cursor = conn.cursor() - -# 创建带向量字段的表 -cursor.execute(""" - CREATE TABLE test_vector ( - c1 INT PRIMARY KEY, - c2 vector(2), - VECTOR INDEX idx1(c2) WITH ( - distance=l2, - type=hnsw, - lib=vsag - ) - ) -""") -``` - -**向量索引参数说明**: -- `distance`: 距离度量方式 - - `l2`: 欧氏距离(常用) - - `cosine`: 余弦相似度 - - `ip`: 内积 -- `type`: 索引类型 - - `hnsw`: 分层可导航小世界图(推荐) -- `lib`: 底层库 - - `vsag`: 默认向量库 - -#### 4.1.2 插入向量数据 - -```python -# 插入向量 -cursor.execute("INSERT INTO test_vector VALUES(1, [1.0, 1.0])") -cursor.execute("INSERT INTO test_vector VALUES(2, [1.0, 2.0])") -cursor.execute("INSERT INTO test_vector VALUES(3, [1.0, 3.0])") -conn.commit() -``` - -#### 4.1.3 向量检索查询 - -```python -# 近似最近邻搜索(推荐) -cursor.execute(""" - SELECT c1, l2_distance(c2, '[1.0, 2.5]') as distance - FROM test_vector - ORDER BY l2_distance(c2, '[1.0, 2.5]') APPROXIMATE - LIMIT 2 -""") - -results = cursor.fetchall() -print(results) # [(2, 0.5), (3, 0.5)] -``` - -**⚠️ 重要限制**(0.0.1.dev4 版本部分改进): -- ⚠️ 在使用向量函数时 SELECT vector 字段可能有限制 -- ✅ 推荐只 SELECT 主键、元数据和距离值 -- ✅ **0.0.1.dev4 新增**:支持向量列类型输出(在某些场景下) - -```python -# ❌ 不推荐的用法(可能在某些场景下失败) -cursor.execute(""" - SELECT c1, c2, l2_distance(c2, '[1.0, 2.5]') as distance - FROM test_vector - ORDER BY l2_distance(c2, '[1.0, 2.5]') APPROXIMATE -""") -# 可能报错:fetchall failed 1235 Not supported feature or function - -# ✅ 推荐用法(稳定可靠) -cursor.execute(""" - SELECT c1, l2_distance(c2, '[1.0, 2.5]') as distance - FROM test_vector - ORDER BY l2_distance(c2, '[1.0, 2.5]') APPROXIMATE -""") - -# ✅ 0.0.1.dev4 新增:单独查询向量列(不使用向量函数时) -cursor.execute("SELECT c1, c2 FROM test_vector WHERE c1 = 1") -``` - -### 4.2 全文检索(Full-text Search) - -#### 4.2.1 创建全文索引表 - -```python -cursor.execute(""" - CREATE TABLE articles ( - title VARCHAR(200) PRIMARY KEY, - body TEXT, - FULLTEXT fts_idx(title, body) - ) -""") -``` - -#### 4.2.2 插入文档 - -```python -cursor.execute(""" - INSERT INTO articles(title, body) VALUES - ('OceanBase Tutorial', 'This is a tutorial about OceanBase Fulltext.'), - ('Fulltext Index', 'Fulltext index can be very useful.'), - ('OceanBase Test Case', 'Writing test cases helps ensure quality.') -""") -conn.commit() -``` - -#### 4.2.3 全文搜索 - -```python -cursor.execute(""" - SELECT - title, - MATCH(title, body) AGAINST("OceanBase") as score - FROM articles - WHERE MATCH(title, body) AGAINST("OceanBase") - ORDER BY score DESC -""") - -results = cursor.fetchall() -print(results) -# [('OceanBase Tutorial', score1), ('OceanBase Test Case', score2)] -``` - -### 4.3 混合检索(Hybrid Search) - -混合检索结合了向量检索(语义理解)和关键词检索(精确匹配),提供更好的搜索效果。 - -⚠️ **注意**:混合检索功能需要 SeekDB patch44x 版本支持,当前轻量版暂不支持。 - -```python -# 创建混合检索表 -cursor.execute(""" - CREATE TABLE doc_table ( - c1 INT, - vector vector(3), - query VARCHAR(255), - content VARCHAR(255), - VECTOR INDEX idx1(vector) WITH (distance=l2, type=hnsw, lib=vsag), - FULLTEXT idx2(query), - FULLTEXT idx3(content) - ) -""") - -# 插入数据 -cursor.execute(""" - INSERT INTO doc_table VALUES - (1, '[1,2,3]', 'hello world', 'oceanbase Elasticsearch database'), - (2, '[1,2,1]', 'hello world, what is your name', 'oceanbase mysql database'), - (3, '[1,1,1]', 'hello world, how are you', 'oceanbase oracle database') -""") -conn.commit() - -# 混合检索查询 -cursor.execute(""" - SET @parm = '{ - "query": { - "bool": { - "must": [ - {"match": {"query": "hi hello"}}, - {"match": {"content": "oceanbase mysql"}} - ] - } - }, - "knn": { - "field": "vector", - "k": 5, - "num_candidates": 10, - "query_vector": [1,2,3], - "boost": 0.7 - }, - "_source": ["query", "content", "_keyword_score", "_semantic_score"] - }' -""") - -cursor.execute("SELECT dbms_hybrid_search.search('doc_table', @parm)") -results = cursor.fetchall() -``` - -### 4.4 OLAP 分析能力 - -#### 4.4.1 列存表 - -```python -# 创建列存表 -cursor.execute(""" - CREATE TABLE each_column_group ( - col1 VARCHAR(30) NOT NULL, - col2 VARCHAR(30) NOT NULL, - col3 VARCHAR(30) NOT NULL, - col4 VARCHAR(30) NOT NULL, - col5 INT - ) WITH COLUMN GROUP (EACH COLUMN) -""") - -# 插入数据 -cursor.execute("INSERT INTO each_column_group VALUES('a', 'b', 'c', 'd', 1)") -conn.commit() - -# 列式查询(只读取需要的列,性能更优) -cursor.execute("SELECT col1, col2 FROM each_column_group") -``` - -#### 4.4.2 数据导入(Direct Load) - -```python -# 快速导入大量数据 -cursor.execute(""" - LOAD DATA /*+ direct(true, 0) */ - INFILE '/data/1/example.csv' - INTO TABLE test_olap - FIELDS TERMINATED BY ',' -""") -``` - -#### 4.4.3 外表查询 - -```python -# 创建外表(无需导入数据,直接查询文件) -cursor.execute(""" - CREATE EXTERNAL TABLE test_external_table ( - c1 INT, - c2 INT - ) - LOCATION='/data/1' - FORMAT=(TYPE='CSV' FIELD_DELIMITER=',') - PATTERN='example.csv' -""") - -# 直接查询外部文件 -cursor.execute("SELECT * FROM test_external_table") -``` - -### 4.5 OLTP 事务能力 - -```python -import seekdb - -seekdb.open("./mine_kb.db") -conn = seekdb.connect("test") -cursor = conn.cursor() - -# 创建表 -cursor.execute(""" - CREATE TABLE test_oltp ( - c1 INT PRIMARY KEY, - c2 INT - ) -""") - -# 事务操作 -try: - cursor.execute("INSERT INTO test_oltp VALUES(1, 10)") - cursor.execute("INSERT INTO test_oltp VALUES(2, 20)") - cursor.execute("INSERT INTO test_oltp VALUES(3, 30)") - - # 提交事务 - conn.commit() - -except Exception as e: - # 回滚事务 - conn.rollback() - print(f"Transaction failed: {e}") - -# 查询(包含事务版本号) -cursor.execute("SELECT *, ORA_ROWSCN FROM test_oltp") -print(cursor.fetchall()) - -# 0.0.1.dev4 新增:自动提交模式 -conn_auto = seekdb.connect("test", autocommit=True) -cursor_auto = conn_auto.cursor() -cursor_auto.execute("INSERT INTO test_oltp VALUES(4, 40)") # 自动提交 -``` - ---- - -## 5. MineKB 项目实践 - -### 5.1 架构设计 - -MineKB 使用 **Rust + Python + SeekDB** 的架构: - -``` -┌─────────────────┐ -│ Rust (Tauri) │ ← 主应用程序 -│ Frontend │ -└────────┬────────┘ - │ JSON Protocol (stdin/stdout) - ▼ -┌─────────────────┐ -│ Python Bridge │ ← seekdb_bridge.py -│ (subprocess) │ -└────────┬────────┘ - │ Python API - ▼ -┌─────────────────┐ -│ SeekDB │ ← oblite.db/ -│ (oblite.so) │ -└─────────────────┘ -``` - -**优势**: -- Rust 提供高性能前端和业务逻辑 -- Python 作为 SeekDB 的官方接口语言 -- JSON 协议简单、可靠、易于调试 - -### 5.2 数据库设计 - -#### 5.2.1 表结构 - -```sql --- 项目表 -CREATE TABLE projects ( - id VARCHAR(36) PRIMARY KEY, - name TEXT NOT NULL, - description TEXT, - status TEXT NOT NULL, - document_count INTEGER DEFAULT 0, - created_at DATETIME NOT NULL, - updated_at DATETIME NOT NULL -); - --- 向量文档表(核心) -CREATE TABLE vector_documents ( - id VARCHAR(36) PRIMARY KEY, - project_id VARCHAR(36) NOT NULL, - document_id VARCHAR(36) NOT NULL, - chunk_index INTEGER NOT NULL, - content TEXT NOT NULL, - embedding vector(1536), -- DashScope text-embedding-v1 - metadata TEXT NOT NULL, - created_at DATETIME DEFAULT CURRENT_TIMESTAMP, - UNIQUE(document_id, chunk_index) -); - --- 向量索引 -CREATE VECTOR INDEX idx_embedding -ON vector_documents(embedding) -WITH (distance=l2, type=hnsw, lib=vsag); - --- 普通索引 -CREATE INDEX idx_project_id ON vector_documents(project_id); -CREATE INDEX idx_document_id ON vector_documents(document_id); - --- 会话表 -CREATE TABLE conversations ( - id VARCHAR(36) PRIMARY KEY, - project_id VARCHAR(36) NOT NULL, - title TEXT NOT NULL, - created_at DATETIME NOT NULL, - updated_at DATETIME NOT NULL, - message_count INTEGER DEFAULT 0, - FOREIGN KEY (project_id) REFERENCES projects(id) ON DELETE CASCADE -); - --- 消息表 -CREATE TABLE messages ( - id VARCHAR(36) PRIMARY KEY, - conversation_id VARCHAR(36) NOT NULL, - role TEXT NOT NULL, - content TEXT NOT NULL, - created_at DATETIME NOT NULL, - sources TEXT, - FOREIGN KEY (conversation_id) REFERENCES conversations(id) ON DELETE CASCADE -); -``` - -#### 5.2.2 索引策略 - -| 表 | 索引类型 | 字段 | 用途 | -|----|---------|------|------| -| vector_documents | VECTOR INDEX | embedding | 向量检索 | -| vector_documents | B-tree | project_id | 项目过滤 | -| vector_documents | B-tree | document_id | 文档查询 | -| conversations | B-tree | project_id | 项目会话列表 | -| messages | B-tree | conversation_id | 消息历史 | - -### 5.3 Python Bridge 实现 - -#### 5.3.1 JSON 协议 - -**命令格式**(stdin → Python): -```json -{ - "command": "init", - "params": { - "db_path": "./oblite.db", - "db_name": "mine_kb" - } -} -``` - -**响应格式**(Python → stdout): -```json -{ - "status": "success", - "data": {"db_path": "./oblite.db", "db_name": "mine_kb"} -} -``` - -或错误响应: -```json -{ - "status": "error", - "error": "InitError", - "details": "数据库初始化失败..." -} -``` - -#### 5.3.2 支持的命令 - -| 命令 | 说明 | 参数 | -|-----|------|------| -| `init` | 初始化数据库连接 | db_path, db_name | -| `execute` | 执行 SQL(INSERT/UPDATE/DELETE) | sql, values | -| `query` | 查询数据(SELECT) | sql, values | -| `query_one` | 查询单行数据 | sql, values | -| `commit` | 提交事务 | - | -| `rollback` | 回滚事务 | - | -| `ping` | 健康检查 | - | - -#### 5.3.3 关键实现细节 - -**参数化查询处理**: - -SeekDB 不支持标准的参数化查询(`?` 占位符),需要手动替换: - -```python -def format_sql_value(self, value: Any) -> str: - """将 Python 值转换为 SQL 字符串表示""" - if value is None: - return "NULL" - elif isinstance(value, bool): - return "1" if value else "0" - elif isinstance(value, (int, float)): - return str(value) - elif isinstance(value, str): - # 转义单引号 - escaped = value.replace("'", "''") - return f"'{escaped}'" - elif isinstance(value, list): - # 向量/数组值 - return str(value) - else: - escaped = str(value).replace("'", "''") - return f"'{escaped}'" - -def build_sql_with_values(self, sql: str, values: List[Any]) -> str: - """替换 SQL 中的 ? 占位符为实际值""" - if not values: - return sql - - result = sql - for value in values: - formatted_value = self.format_sql_value(value) - result = result.replace("?", formatted_value, 1) - - return result -``` - -**类型转换**: - -```python -def convert_value_for_json(self, value: Any) -> Any: - """将 Python 对象转换为 JSON 可序列化格式""" - if value is None: - return None - elif isinstance(value, (datetime, date)): - # datetime → ISO 字符串 - return value.isoformat() - elif isinstance(value, Decimal): - # Decimal → float - return float(value) - elif isinstance(value, bytes): - # bytes → base64 字符串 - import base64 - return base64.b64encode(value).decode('utf-8') - elif isinstance(value, (list, tuple)): - return [self.convert_value_for_json(v) for v in value] - else: - return value -``` - -### 5.4 向量检索实现 - -#### 5.4.1 检索流程 - -``` -用户查询 - ↓ -生成 query embedding (DashScope API) - ↓ -向量检索 SQL (l2_distance + APPROXIMATE) - ↓ -计算相似度分数 (1 - distance/sqrt(2)) - ↓ -过滤低分结果 (threshold=0.3) - ↓ -返回相关文档块 -``` - -#### 5.4.2 SQL 实现 - -```sql -SELECT - id, - project_id, - document_id, - chunk_index, - content, - metadata, - l2_distance(embedding, '[...]') as distance -FROM vector_documents -WHERE project_id = ? -ORDER BY l2_distance(embedding, '[...]') APPROXIMATE -LIMIT 20 -``` - -**关键点**: -- 不 SELECT `embedding` 字段(避免 1235 错误) -- 使用 `APPROXIMATE` 关键字(HNSW 近似搜索) -- 限制返回数量(`LIMIT 20`) -- 在应用层计算相似度并过滤 - -#### 5.4.3 相似度计算 - -```rust -// L2 距离 → 相似度分数 -// 假设向量已归一化,最大距离约为 sqrt(2) -let similarity = 1.0 - (distance / std::f64::consts::SQRT_2); - -// 过滤低分结果 -if similarity >= 0.3 { - results.push(doc); -} -``` - -### 5.5 数据迁移 - -从 SQLite 迁移到 SeekDB: - -```bash -python migrate_sqlite_to_seekdb.py -``` - -**迁移内容**: -- ✅ 所有项目元数据 -- ✅ 所有会话和消息历史 -- ✅ 所有文档块和向量 embeddings -- ✅ 自动创建 HNSW 索引 -- ✅ 数据完整性验证 - -**注意事项**: -- Embedding 维度统一为 1536(DashScope 标准) -- SQLite BLOB → JSON 数组字符串 -- 时间戳格式转换 - ---- - -## 6. 常见问题与解决方案 - -### 6.1 安装问题 - -#### 问题 1: ModuleNotFoundError: No module named 'seekdb' - -**原因**:seekdb 包未安装或版本不正确 - -**解决方案**: -```bash -# 方案 1: 安装最新版本 -pip install seekdb==0.0.1.dev4 -i https://pypi.tuna.tsinghua.edu.cn/simple/ - -# 方案 2: 检查虚拟环境 -source ~/.local/share/com.mine-kb.app/venv/bin/activate -pip list | grep seekdb - -# 方案 3: 验证导入(注意:0.0.1.dev4 使用 seekdb 模块) -python -c "import seekdb; print('SeekDB 0.0.1.dev4 OK')" -``` - -> **注意**:0.0.1.dev4 版本应使用 `import seekdb`,不再使用 `import oblite`。 - -#### 问题 2: pip install 失败(No such file or directory) - -**原因**:直接使用 `pip` 命令,但虚拟环境中可能没有 `pip` 可执行文件 - -**解决方案**: -```bash -# 使用 python -m pip 替代 -python -m pip install seekdb==0.0.1.dev2 -i https://pypi.tuna.tsinghua.edu.cn/simple/ -``` - -### 6.2 数据库初始化问题 - -#### 问题 3: Unknown database (错误码 1049) - -**原因**:SeekDB 不会自动创建数据库。**0.0.1.dev4 新增**:数据库存在性验证,连接不存在的数据库会报错。 - -**解决方案**: -```python -# 显式创建数据库 -admin_conn = seekdb.connect("") -admin_cursor = admin_conn.cursor() -admin_cursor.execute("CREATE DATABASE IF NOT EXISTS `mine_kb`") -admin_conn.commit() -admin_conn.close() - -# 然后连接 -conn = seekdb.connect("mine_kb") -``` - -#### 问题 4: 应用启动后数据库连接失败 - -**诊断步骤**: -```bash -# 1. 检查数据库目录 -ls -la ~/.local/share/com.mine-kb.app/mine_kb.db/ - -# 2. 查看日志 -cat ~/.local/share/com.mine-kb.app/mine_kb.db/log/oblite.log - -# 3. 手动测试连接(使用 seekdb 模块) -python3 < **注意**:0.0.1.dev4 版本已稳定支持 USE 语句,但仍推荐使用 `connect(db_name)` 方式。 - -### 6.4 性能问题 - -#### 问题 8: 向量检索速度慢 - -**诊断**: -```python -import time - -start = time.time() -cursor.execute(""" - SELECT id FROM vector_documents - ORDER BY l2_distance(embedding, '[...]') APPROXIMATE - LIMIT 10 -""") -results = cursor.fetchall() -elapsed = time.time() - start -print(f"Query time: {elapsed:.3f}s") -``` - -**优化方案**: -1. 确保使用了 `APPROXIMATE` 关键字 -2. 减少 `LIMIT` 数量 -3. 添加 `WHERE` 过滤条件(如 `project_id`) -4. 检查 HNSW 索引是否创建成功 - -```sql --- 查看索引 -SHOW INDEX FROM vector_documents; -``` - -#### 问题 9: 查询返回数据量大导致内存占用高 - -**解决方案**: -```python -# 1. 分页查询 -def query_in_batches(cursor, sql, batch_size=1000): - offset = 0 - while True: - batch_sql = f"{sql} LIMIT {batch_size} OFFSET {offset}" - cursor.execute(batch_sql) - rows = cursor.fetchall() - - if not rows: - break - - yield from rows - offset += batch_size - -# 2. 只查询需要的字段(不要 SELECT *) -cursor.execute("SELECT id, content FROM vector_documents WHERE ...") - -# 3. 不查询 vector 字段 -# ❌ SELECT embedding FROM ... -# ✅ 只查询元数据 -``` - -### 6.5 数据一致性问题 - -#### 问题 10: 事务未提交导致数据丢失 - -**原因**:忘记调用 `conn.commit()` - -**解决方案**: -```python -try: - cursor.execute("INSERT INTO ...") - cursor.execute("UPDATE ...") - conn.commit() # ✅ 必须调用 -except Exception as e: - conn.rollback() - raise -``` - -#### 问题 11: 外键约束失败 - -**诊断**: -```sql --- 检查外键约束 -SELECT * FROM information_schema.table_constraints -WHERE constraint_type = 'FOREIGN KEY'; -``` - -**解决方案**: -```python -# 确保按正确顺序插入(先父表,后子表) -cursor.execute("INSERT INTO projects ...") -conn.commit() - -cursor.execute("INSERT INTO conversations ...") # 引用 projects -conn.commit() -``` - ---- - -## 7. 最佳实践 - -### 7.1 表设计 - -#### ✅ 推荐做法 - -```sql --- 1. 使用合适的主键类型 -CREATE TABLE projects ( - id VARCHAR(36) PRIMARY KEY, -- UUID - name TEXT NOT NULL, - created_at DATETIME NOT NULL -); - --- 2. 为常用查询字段创建索引 -CREATE INDEX idx_project_name ON projects(name); -CREATE INDEX idx_created_at ON projects(created_at); - --- 3. 使用外键约束确保数据一致性 -CREATE TABLE documents ( - id VARCHAR(36) PRIMARY KEY, - project_id VARCHAR(36) NOT NULL, - FOREIGN KEY (project_id) REFERENCES projects(id) ON DELETE CASCADE -); - --- 4. 为向量字段创建 HNSW 索引 -CREATE TABLE embeddings ( - id VARCHAR(36) PRIMARY KEY, - content TEXT, - embedding vector(1536), - VECTOR INDEX idx_embedding(embedding) - WITH (distance=l2, type=hnsw, lib=vsag) -); -``` - -#### ❌ 避免做法 - -```sql --- ❌ 不要使用 SELECT * -SELECT * FROM large_table; -- 浪费带宽和内存 - --- ❌ 不要在热点字段上使用 TEXT -CREATE TABLE users ( - email TEXT PRIMARY KEY -- ❌ 应该用 VARCHAR(255) -); - --- ❌ 不要创建过多索引 -CREATE INDEX idx1 ON table(col1); -CREATE INDEX idx2 ON table(col2); -CREATE INDEX idx3 ON table(col3); --- ... 过多索引影响写入性能 - --- ❌ 不要在 vector 字段上创建普通索引 -CREATE INDEX idx_embedding ON embeddings(embedding); -- ❌ 应该用 VECTOR INDEX -``` - -### 7.2 查询优化 - -#### ✅ 推荐做法 - -```sql --- 1. 只查询需要的字段 -SELECT id, name, created_at FROM projects WHERE status = 'active'; - --- 2. 使用索引字段进行过滤 -SELECT * FROM vector_documents WHERE project_id = '...'; -- 有索引 - --- 3. 向量检索使用 APPROXIMATE -SELECT id, l2_distance(embedding, '[...]') as distance -FROM vector_documents -ORDER BY l2_distance(embedding, '[...]') APPROXIMATE -- ✅ -LIMIT 10; - --- 4. 限制返回数量 -SELECT * FROM messages ORDER BY created_at DESC LIMIT 100; -``` - -#### ❌ 避免做法 - -```sql --- ❌ 不要在向量查询中 SELECT vector 字段 -SELECT embedding, l2_distance(embedding, '[...]') as distance -FROM vector_documents; - --- ❌ 不要使用精确搜索(慢) -SELECT * FROM vector_documents -ORDER BY l2_distance(embedding, '[...]') -- 缺少 APPROXIMATE -LIMIT 10; - --- ❌ 不要在非索引字段上过滤大表 -SELECT * FROM large_table WHERE non_indexed_column = '...'; - --- ❌ 不要使用 ORDER BY(除了向量检索) -SELECT * FROM projects ORDER BY name; -- 不支持 -``` - -### 7.3 事务管理 - -#### ✅ 推荐做法 - -```python -# 1. 使用上下文管理器(如果支持) -try: - cursor.execute("INSERT ...") - cursor.execute("UPDATE ...") - conn.commit() -except Exception as e: - conn.rollback() - log.error(f"Transaction failed: {e}") - raise - -# 2. 批量操作在一个事务中 -cursor.execute("BEGIN") -for item in items: - cursor.execute("INSERT INTO ...", item) -conn.commit() - -# 3. 读操作不需要事务 -cursor.execute("SELECT * FROM projects") -rows = cursor.fetchall() -``` - -#### ❌ 避免做法 - -```python -# ❌ 不要忘记提交 -cursor.execute("INSERT ...") -# 缺少 conn.commit() - -# ❌ 不要在循环中提交(性能差) -for item in items: - cursor.execute("INSERT ...") - conn.commit() # ❌ 每次都提交 - -# ❌ 不要嵌套事务(不支持) -conn.begin() -cursor.execute("INSERT ...") -conn.begin() # ❌ 不支持 -``` - -### 7.4 向量检索 - -#### ✅ 推荐做法 - -```python -# 1. 归一化查询向量 -import numpy as np -query_embedding = np.array(embedding) -query_embedding = query_embedding / np.linalg.norm(query_embedding) - -# 2. 使用阈值过滤结果 -threshold = 0.3 -results = [doc for doc in search_results if doc.similarity >= threshold] - -# 3. 添加项目过滤条件 -sql = """ - SELECT id, content, l2_distance(embedding, '{}') as distance - FROM vector_documents - WHERE project_id = ? - ORDER BY l2_distance(embedding, '{}') APPROXIMATE - LIMIT 20 -""" - -# 4. 返回元数据而非原始向量 -# embedding 字段设为空向量 -VectorDocument { - id, content, metadata, - embedding: vec![], # 不返回原始向量 -} -``` - -#### ❌ 避免做法 - -```python -# ❌ 不要查询所有项目的文档(慢) -sql = "SELECT * FROM vector_documents ORDER BY l2_distance(...) APPROXIMATE" - -# ❌ 不要返回过多结果 -LIMIT 1000 # ❌ 太多 - -# ❌ 不要在向量检索后再过滤 -# 应该在 WHERE 子句中过滤 -``` - -### 7.5 错误处理 - -#### ✅ 推荐做法 - -```python -import logging - -try: - cursor.execute(sql, values) - conn.commit() -except Exception as e: - logging.error(f"Database error: {e}") - logging.error(f"SQL: {sql}") - logging.error(f"Values: {values}") - conn.rollback() - - # 返回友好的错误信息 - if "1049" in str(e): - raise DatabaseError("数据库不存在,请检查配置") - elif "1235" in str(e): - raise DatabaseError("查询不支持,请检查 SQL 语句") - else: - raise -``` - -#### ❌ 避免做法 - -```python -# ❌ 不要忽略错误 -try: - cursor.execute(sql) -except: - pass # ❌ 吞掉错误 - -# ❌ 不要暴露敏感信息 -except Exception as e: - return str(e) # ❌ 可能包含敏感路径、密码等 -``` - -### 7.6 连接管理 - -#### ✅ 推荐做法 - -```python -import seekdb - -# 1. 使用连接池(长期运行的服务) -class SeekDBPool: - def __init__(self, db_path, db_name): - self.db_path = db_path - self.db_name = db_name - self.conn = None - - def get_connection(self): - if self.conn is None: - seekdb.open(self.db_path) - self.conn = seekdb.connect(self.db_name) - return self.conn - - def close(self): - if self.conn: - self.conn.close() - self.conn = None - -# 2. 重用连接 -pool = SeekDBPool("./mine_kb.db", "mine_kb") -conn = pool.get_connection() -cursor = conn.cursor() - -# 3. 程序退出时关闭 -import atexit -atexit.register(pool.close) -``` - -#### ❌ 避免做法 - -```python -# ❌ 不要频繁创建/关闭连接 -for query in queries: - conn = seekdb.connect(db_name) # ❌ 每次都连接 - cursor = conn.cursor() - cursor.execute(query) - conn.close() - -# ❌ 不要忘记关闭连接(资源泄漏) -conn = seekdb.connect(db_name) -# ... 使用连接 -# 缺少 conn.close() -``` - ---- - -## 8. 注意事项与限制 - -### 8.1 已知限制 - -| 限制 | 说明 | 解决方案 | -|-----|------|---------| -| **不支持 ORDER BY** | 除了向量检索的 `ORDER BY l2_distance(...) APPROXIMATE` | 在应用层排序 | -| **不支持 SELECT vector 字段** | 在使用向量函数时不能同时 SELECT vector 字段 | 使用空向量替代 | -| **不支持参数化查询** | `execute(sql, params)` 不支持 | 手动构建 SQL(注意防注入) | -| **不支持 USE 语句** | `USE database` 行为不稳定 | 通过 `connect(db_name)` 指定 | -| **不自动创建数据库** | `connect()` 不会创建数据库 | 显式 `CREATE DATABASE` | -| **混合检索未实装** | 轻量版暂不支持混合检索 | 等待 patch44x 版本 | -| **物化视图有限** | 功能尚不完整 | 谨慎使用 | -| **并发写入受限** | 单机数据库,写入性能有限 | 批量写入,避免频繁提交 | - -### 8.2 性能限制 - -| 指标 | 典型值 | 说明 | -|-----|-------|------| -| 向量检索延迟 | 10-100ms | 取决于数据量和 LIMIT | -| 写入 TPS | 100-1000 | 单机性能 | -| 最大向量维度 | 无明确限制 | 推荐 ≤2048 | -| 最大数据库大小 | 无明确限制 | 取决于磁盘空间 | -| 并发连接数 | 建议 1-10 | 嵌入式数据库 | - -### 8.3 兼容性 - -| 组件 | 兼容性 | -|-----|-------| -| Python 版本 | 3.8+ | -| 操作系统 | Linux, macOS, Windows | -| CPU 架构 | x86-64, ARM64 | -| SQL 方言 | MySQL/OceanBase 风格 | -| 字符编码 | UTF-8 | - ---- - -## 9. 性能优化建议 - -### 9.1 向量检索优化 - -```python -# 1. 使用合适的 LIMIT -LIMIT 10 # ✅ 推荐 -LIMIT 100 # ⚠️ 可能较慢 -LIMIT 1000 # ❌ 不推荐 - -# 2. 添加过滤条件 -WHERE project_id = '...' # ✅ 减少搜索空间 - -# 3. 调整 HNSW 参数(创建索引时) -WITH ( - distance=l2, - type=hnsw, - lib=vsag, - -- 可能支持的参数(需验证) - -- M=16, -- HNSW 图的连接数 - -- ef_construction=200 -- 构建时的搜索深度 -) -``` - -### 9.2 批量操作优化 - -```python -# ❌ 逐条插入(慢) -for item in items: - cursor.execute("INSERT INTO table VALUES (?)", item) - conn.commit() - -# ✅ 批量插入(快) -cursor.execute("BEGIN") -for item in items: - cursor.execute("INSERT INTO table VALUES (?)", item) -conn.commit() - -# ✅ 使用 LOAD DATA(最快,适用于大量数据) -cursor.execute(""" - LOAD DATA /*+ direct(true, 0) */ - INFILE 'data.csv' - INTO TABLE table - FIELDS TERMINATED BY ',' -""") -``` - -### 9.3 查询优化 - -```python -# 1. 避免全表扫描 -# ❌ 不要 -cursor.execute("SELECT * FROM large_table WHERE non_indexed_col = '...'") - -# ✅ 使用索引 -cursor.execute("SELECT * FROM large_table WHERE indexed_col = '...'") - -# 2. 只查询需要的字段 -# ❌ 不要 -cursor.execute("SELECT * FROM table") - -# ✅ 只查询需要的 -cursor.execute("SELECT id, name FROM table") - -# 3. 使用 EXPLAIN 分析查询(如果支持) -cursor.execute("EXPLAIN SELECT ...") -print(cursor.fetchall()) -``` - -### 9.4 索引策略 - -```sql --- 1. 为常用查询字段创建索引 -CREATE INDEX idx_project_id ON vector_documents(project_id); - --- 2. 组合索引(如果需要) -CREATE INDEX idx_project_doc ON vector_documents(project_id, document_id); - --- 3. 定期检查索引使用情况 -SHOW INDEX FROM vector_documents; - --- 4. 删除不使用的索引 -DROP INDEX unused_idx ON table; -``` - -### 9.5 内存优化 - -```python -# 1. 分页查询大结果集 -def fetch_in_batches(cursor, sql, batch_size=1000): - offset = 0 - while True: - batch_sql = f"{sql} LIMIT {batch_size} OFFSET {offset}" - cursor.execute(batch_sql) - rows = cursor.fetchall() - - if not rows: - break - - for row in rows: - yield row - - offset += batch_size - -# 2. 及时释放不需要的数据 -results = cursor.fetchall() -process_results(results) -del results # 释放内存 - -# 3. 不要在内存中缓存大量数据 -# ❌ 不要 -all_docs = cursor.execute("SELECT * FROM large_table").fetchall() - -# ✅ 流式处理 -for row in cursor.execute("SELECT * FROM large_table"): - process_row(row) -``` - ---- - -## 10. 总结与展望 - -### 10.1 SeekDB 的优势 - -| 优势 | 说明 | -|-----|------| -| **All-in-One** | TP + AP + AI 能力集成 | -| **原生向量检索** | HNSW 索引,性能优秀 | -| **轻量级部署** | 嵌入式,无需独立服务 | -| **平滑升级** | 可升级到分布式 OceanBase | -| **开发友好** | Python API,易于集成 | - -### 10.2 SeekDB 的不足 - -| 不足 | 影响 | 缓解方案 | -|-----|------|---------| -| ORDER BY 限制 | 需要应用层排序 | 影响较小 | -| Vector 字段限制 | 不能直接查询 | 使用空向量 | -| 单机性能 | 并发写入受限 | 批量操作 | -| 文档不完善 | 学习曲线陡峭 | 参考示例代码 | -| 功能未完整 | 部分特性未实装 | 等待新版本 | - -### 10.3 适用场景总结 - -#### ✅ 非常适合 - -- 嵌入式 AI 应用(RAG、知识库) -- 桌面应用需要向量检索 -- 原型验证、MVP 开发 -- 边缘计算、IoT 设备 -- 单机部署的中小型应用 - -#### ⚠️ 需要评估 - -- 高并发写入场景(考虑批量优化) -- 大数据量场景(数百万条记录以上) -- 复杂分析查询(部分功能受限) - -#### ❌ 不适合 - -- 分布式系统(应该用 OceanBase 分布式版) -- 需要高可用、容灾的生产环境 -- 极高性能要求(万级 TPS) - -### 10.4 未来展望 - -SeekDB/OceanBase Lite 的发展方向: - -1. **功能完善** - - 混合检索(Hybrid Search)全面支持 - - 更丰富的 OLAP 功能 - - 完善的物化视图 - -2. **性能提升** - - 更快的向量检索 - - 更好的并发支持 - - 更低的内存占用 - -3. **易用性改进** - - 完善的文档和示例 - - 更友好的错误信息 - - 可视化管理工具 - -4. **生态建设** - - 更多语言的 SDK - - 与主流框架的集成 - - 云原生支持 - -### 10.5 推荐学习路径 - -1. **入门阶段**(1-2 天) - - 安装 SeekDB - - 学习基本 SQL 操作 - - 创建第一个向量检索应用 - -2. **进阶阶段**(1 周) - - 深入理解向量检索原理 - - 掌握 HNSW 索引调优 - - 学习事务和并发控制 - -3. **高级阶段**(2-4 周) - - 生产环境部署优化 - - 性能调优和监控 - - 与现有系统集成 - -4. **专家阶段**(持续) - - 贡献社区和开源项目 - - 深入研究底层实现 - - 探索创新应用场景 - ---- - -## 附录 - -### A. 相关文档 - -- `docs/seekdb.md` - SeekDB 基础文档 -- `docs/MIGRATION_SEEKDB.md` - SQLite 迁移指南 -- `docs/SEEKDB_AUTO_INSTALL.md` - 自动安装说明 -- `docs/FIX_SEEKDB_VECTOR_QUERY.md` - 向量查询修复 -- `docs/FIX_SEEKDB_ORDER_BY.md` - ORDER BY 问题修复 -- `docs/FIX_SEEKDB_DATABASE_ERROR.md` - 数据库初始化修复 -- `docs/SEEKDB_VECTOR_FIELD_LIMITATION_ANALYSIS.md` - Vector 字段限制分析 - -### C. 示例代码 - -完整示例代码请参考: -- `src-tauri/python/seekdb_bridge.py` - Python Bridge 实现(已更新至 0.0.1.dev4) -- `src-tauri/python/migrate_sqlite_to_seekdb.py` - 数据迁移脚本 -- `src-tauri/python/test_seekdb.py` - 测试示例(已更新至 0.0.1.dev4) -- `src-tauri/src/services/seekdb_adapter.rs` - Rust 适配器 - -### D. 版本升级指南 - -- [UPGRADE_SEEKDB_0.0.1.dev4.md](UPGRADE_SEEKDB_0.0.1.dev4.md) - 从 0.0.1.dev2 升级到 0.0.1.dev4 的详细指南 - -### E. 技术支持 - -遇到问题? - -1. 查阅本文档的"常见问题"章节 -2. 查看 [UPGRADE_SEEKDB_0.0.1.dev4.md](UPGRADE_SEEKDB_0.0.1.dev4.md) 升级指南 -3. 查看 `docs/` 目录下的相关修复文档 -4. 检查应用日志(`/log/oblite.log`) -5. 在 GitHub 上提交 Issue -6. 参考 MineKB 项目的实现 - ---- - -**文档结束** - -感谢阅读!希望这份文档能帮助你更好地理解和使用 SeekDB。 - -如有任何问题或建议,欢迎反馈。 - ---- - -**版权声明**:本文档基于 MineKB 项目的实践经验总结,遵循项目开源协议。 - diff --git a/docs/SEEKDB_VECTOR_FIELD_LIMITATION_ANALYSIS.md b/docs/SEEKDB_VECTOR_FIELD_LIMITATION_ANALYSIS.md index c741cd6..1f80eb5 100644 --- a/docs/SEEKDB_VECTOR_FIELD_LIMITATION_ANALYSIS.md +++ b/docs/SEEKDB_VECTOR_FIELD_LIMITATION_ANALYSIS.md @@ -365,7 +365,6 @@ SELECT * FROM vector_documents WHERE id = ? - [FIX_SEEKDB_VECTOR_QUERY.md](./FIX_SEEKDB_VECTOR_QUERY.md) - 修复实施文档 - [RESTORE_SEEKDB_VECTOR_SEARCH.md](./RESTORE_SEEKDB_VECTOR_SEARCH.md) - 向量检索恢复 - [seekdb.md](./seekdb.md) - SeekDB官方文档 -- [MIGRATION_SUMMARY.md](./MIGRATION_SUMMARY.md) - 迁移总结 --- diff --git a/docs/SETUP_CHECKLIST.md b/docs/SETUP_CHECKLIST.md index 275706d..ad08986 100644 --- a/docs/SETUP_CHECKLIST.md +++ b/docs/SETUP_CHECKLIST.md @@ -1,195 +1,60 @@ # SeekDB Setup Checklist -Follow these steps to set up the application with SeekDB support. +MineKB uses **seekdb-rs** (Rust) for embedded SeekDB. No Python or venv is required. ## Prerequisites -- [ ] Python 3.8 or higher installed - ```bash - python3 --version - ``` +**Build / development** (for local dev or packaging only): -- [ ] pip3 installed - ```bash - pip3 --version - ``` - -- [ ] Rust 1.70+ installed (for building) +- [ ] Rust 1.70+ installed ```bash rustc --version ``` -- [ ] Node.js 16+ installed (for frontend) +- [ ] Node.js 16+ installed (frontend build) ```bash node --version ``` -## Installation Steps - -### 1. Install SeekDB Python Package - -```bash -# Using pip with Tsinghua mirror (recommended in China) -pip3 install seekdb==0.0.1.dev2 -i https://pypi.tuna.tsinghua.edu.cn/simple/ - -# Or using the installation script -cd src-tauri/python -bash install_deps.sh -``` - -### 2. Verify Installation - -```bash -cd src-tauri/python -python3 test_seekdb.py -``` +**End-user runtime**: No extra runtime (Python, Node, etc.). The built app is self-contained. -Expected output: -``` -============================================================ -SeekDB Installation Test -============================================================ -Testing oblite import... ✅ OK - -Testing basic operations... - Creating database at /tmp/.../test.db... ✅ - Creating table... ✅ - Inserting data... ✅ - Querying data... ✅ - Closing connection... ✅ - -✅ All basic operations passed! -... -✅ All tests passed! SeekDB is ready to use. -============================================================ -``` +## Installation Steps -### 3. Install Application Dependencies +### 1. Install application dependencies ```bash # From project root -cd /home/ubuntu/Desktop/mine-kb - -# Install frontend dependencies -npm install # or tnpm install +npm install # or tnpm install -# Rust dependencies will be installed automatically during build +# Rust (including seekdb-rs) is pulled automatically on build ``` -### 4. Configure Application +### 2. Configure application ```bash -# Copy config template cp src-tauri/config.example.json src-tauri/config.json - -# Edit config file and add your API keys -nano src-tauri/config.json +# Edit config.json and add your API keys ``` -### 5. Build and Run +### 3. Build and run ```bash -# Development mode +# Development (default data dir: CONFIG_DIR=com.mine-kb) npm run tauri:dev +# Custom data directory +CONFIG_DIR=/path/to/your/data npm run tauri:dev + # Production build npm run tauri:build ``` -## Migration from SQLite (If Upgrading) - -If you have an existing SQLite database: - -```bash -cd src-tauri/python -python3 migrate_sqlite_to_seekdb.py ./oblite.db -``` - -Example: -```bash -# macOS -python3 migrate_sqlite_to_seekdb.py ~/Library/Application\ Support/mine-kb/mine_kb.db ./oblite.db - -# Linux -python3 migrate_sqlite_to_seekdb.py ~/.local/share/mine-kb/mine_kb.db ./oblite.db - -# Windows -python3 migrate_sqlite_to_seekdb.py %APPDATA%\mine-kb\mine_kb.db .\oblite.db -``` - -## Troubleshooting - -### Issue: "ModuleNotFoundError: No module named 'oblite'" - -**Solution:** -```bash -pip3 install seekdb==0.0.1.dev2 -i https://pypi.tuna.tsinghua.edu.cn/simple/ -``` - -### Issue: "Failed to start Python process" - -**Solutions:** -1. Verify Python 3 is in PATH: - ```bash - which python3 - ``` - -2. Check if SeekDB is installed: - ```bash - python3 -c "import oblite; print('OK')" - ``` - -3. Check script permissions: - ```bash - chmod +x src-tauri/python/seekdb_bridge.py - ``` - -### Issue: "Vector index creation failed" - -This is usually not critical. The application will work without the index, just slower for large datasets. - -To manually create index later: -```sql -CREATE VECTOR INDEX idx_embedding ON vector_documents(embedding) -WITH (distance=l2, type=hnsw, lib=vsag) -``` - -### Issue: Subprocess communication timeout - -**Solutions:** -1. Restart the application -2. Check system resources (CPU/memory) -3. Check Python process logs in stderr - -## Verification - -After setup, verify everything works: - -1. [ ] Application starts without errors -2. [ ] Can create a new project -3. [ ] Can upload a document -4. [ ] Document processing completes -5. [ ] Can query the document via chat -6. [ ] Chat responses are generated correctly -7. [ ] Data persists after application restart - -## Getting Help - -- Check [MIGRATION_SEEKDB.md](MIGRATION_SEEKDB.md) for detailed migration guide -- Check [MIGRATION_SUMMARY.md](MIGRATION_SUMMARY.md) for technical details -- Check [docs/seekdb.md](docs/seekdb.md) for SeekDB documentation -- Create an issue on GitHub - -## Checklist Summary +## Data directory -- [ ] Python 3.8+ installed -- [ ] SeekDB package installed -- [ ] Installation test passed -- [ ] Application dependencies installed -- [ ] Configuration file created -- [ ] Application builds successfully -- [ ] Application runs without errors -- [ ] (If upgrading) Data migrated from SQLite +- Default: `{app data dir}/mine_kb.db/` (e.g. `~/Library/Application Support/com.mine-kb.app/mine_kb.db/` on macOS). +- Override: set `CONFIG_DIR` to the desired app data root. -Once all items are checked, you're ready to use MineKB with SeekDB! 🎉 +## References +- [SeekDB in MineKB](./seekdb.md) +- [Development tutorial](./MINEKB_DEV_TUTORIAL.md) diff --git a/docs/SPLASH_SCREEN.md b/docs/SPLASH_SCREEN.md index c34f0d0..bf6aca5 100644 --- a/docs/SPLASH_SCREEN.md +++ b/docs/SPLASH_SCREEN.md @@ -166,8 +166,7 @@ app.emit_all("startup-progress", StartupEvent::error( 2. 前端显示 SplashScreen,监听启动事件 ↓ 3. 后端发送事件:步骤 1 - 检查 SeekDB 依赖 - ├─ 检查 oblite.so 是否存在 - ├─ 如不存在则自动下载 + ├─ 使用 seekdb-rs 打开/创建嵌入式数据库(无 Python) └─ 成功 → 发送成功事件 ↓ 4. 后端发送事件:步骤 2 - 加载配置文件 @@ -262,11 +261,9 @@ app.emit_all("startup-progress", StartupEvent::error( - 验证动画效果 - 验证启动成功后自动隐藏 -### 2. SeekDB 下载测试 -- 删除 `oblite.so` -- 启动应用 -- 观察下载进度是否正常显示 -- 验证下载成功后继续启动 +### 2. SeekDB 初始化测试 +- 使用全新数据目录启动应用 +- 验证嵌入式数据库创建成功并继续启动 ### 3. 配置错误测试 - 删除或重命名 `config.json` @@ -309,7 +306,7 @@ app.emit_all("startup-progress", StartupEvent::error( ## 相关文档 -- [SeekDB 自动安装文档](./SEEKDB_AUTO_INSTALL.md) +- [SeekDB in MineKB](./seekdb.md) - [应用配置指南](../README.md) --- diff --git a/docs/UPDATE_SUMMARY.md b/docs/UPDATE_SUMMARY.md index 6ce3dac..086d87f 100644 --- a/docs/UPDATE_SUMMARY.md +++ b/docs/UPDATE_SUMMARY.md @@ -1,8 +1,6 @@ # 更新摘要:混合检索与对话日志 > **历史文档**: 本文档记录了早期版本的功能更新。 -> **当前版本**: SeekDB 0.0.1.dev4,模块名已从 `oblite` 更改为 `seekdb`。 -> **参考**: [SeekDB 0.0.1.dev4 升级指南](UPGRADE_SEEKDB_0.0.1.dev4.md) ## ✅ 已完成的功能 @@ -92,7 +90,7 @@ npm run tauri dev **开发环境(推荐):** ```bash # 删除旧数据库 -rm -rf ~/.local/share/com.mine-kb.app/oblite.db +rm -rf ~/.local/share/com.mine-kb.app/ # 重启应用会自动创建新表结构 ``` diff --git a/docs/UPGRADE_SEEKDB_0.0.1.dev4.md b/docs/UPGRADE_SEEKDB_0.0.1.dev4.md deleted file mode 100644 index 2ffec4f..0000000 --- a/docs/UPGRADE_SEEKDB_0.0.1.dev4.md +++ /dev/null @@ -1,328 +0,0 @@ -# SeekDB 升级指南:0.0.1.dev2 → 0.0.1.dev4 - -## 📋 概述 - -本文档记录了将 mine-kb 项目中的 SeekDB 从 0.0.1.dev2 升级到 0.0.1.dev4 的详细过程和变更说明。 - -**升级日期**: 2025-11-05 -**升级版本**: seekdb 0.0.1.dev2 → 0.0.1.dev4 - ---- - -## 🔄 主要变更 - -### 1. 模块重命名 - -**最重要的变化**:`oblite` 模块已更名为 `seekdb` - -**变更前(0.0.1.dev2)**: -```python -import seekdb # seekdb 包 -import oblite # 实际使用的模块 - -oblite.open(db_path) -conn = oblite.connect(db_name) -``` - -**变更后(0.0.1.dev4)**: -```python -import seekdb # seekdb 包,直接使用 - -seekdb.open(db_path) -conn = seekdb.connect(db_name) -``` - -### 2. 新增特性 - -#### 2.1 向量列类型输出支持 -0.0.1.dev4 版本支持直接输出向量(vector)列类型,无需额外转换。 - -#### 2.2 数据库存在性验证 -`connect()` 方法现在会验证数据库是否存在: -- 如果数据库不存在,会抛出错误 -- 如果未指定数据库名,默认连接到 "test" 数据库 -- 支持 `connect.close()` 方法 - -**示例**: -```python -import seekdb - -seekdb.open("./mydb.db") - -# 方式1: 连接到已存在的数据库 -conn = seekdb.connect("mine_kb") # 如果 mine_kb 不存在,会报错 - -# 方式2: 先创建数据库,再连接 -admin_conn = seekdb.connect("") # 连接到管理上下文 -cursor = admin_conn.cursor() -cursor.execute("CREATE DATABASE IF NOT EXISTS mine_kb") -admin_conn.commit() -admin_conn.close() - -conn = seekdb.connect("mine_kb") # 现在可以安全连接 -``` - -#### 2.3 USE 语句支持 -现在支持标准的 `USE database` 语法: - -```python -cursor.execute("USE mine_kb") # 切换到指定数据库 -``` - -#### 2.4 自动提交模式 -支持在连接时指定自动提交模式: - -```python -# 手动提交(默认) -conn = seekdb.connect(db_name='mine_kb') -cursor.execute("INSERT INTO ...") -conn.commit() # 需要手动提交 - -# 自动提交模式 -conn = seekdb.connect(db_name='mine_kb', autocommit=True) -cursor.execute("INSERT INTO ...") # 自动提交 -``` - ---- - -## 📦 安装方式 - -### 使用清华镜像源安装 -```bash -pip install seekdb==0.0.1.dev4 -i https://pypi.tuna.tsinghua.edu.cn/simple -``` - -### 在虚拟环境中安装(推荐) - -#### Linux/macOS -```bash -# 创建虚拟环境 -python3 -m venv ~/.local/share/com.mine-kb.app/venv - -# 激活虚拟环境 -source ~/.local/share/com.mine-kb.app/venv/bin/activate - -# 安装 seekdb -pip install seekdb==0.0.1.dev4 -i https://pypi.tuna.tsinghua.edu.cn/simple - -# 验证安装 -python -c "import seekdb; print('SeekDB 安装成功!')" -``` - -#### Windows -```powershell -# 创建虚拟环境 -python -m venv %APPDATA%\com.mine-kb.app\venv - -# 激活虚拟环境 -%APPDATA%\com.mine-kb.app\venv\Scripts\activate - -# 安装 seekdb -pip install seekdb==0.0.1.dev4 -i https://pypi.tuna.tsinghua.edu.cn/simple - -# 验证安装 -python -c "import seekdb; print('SeekDB 安装成功!')" -``` - -### 自动安装脚本 - -项目提供了自动安装脚本: -```bash -cd src-tauri/python -bash install_deps.sh -``` - -该脚本会自动: -1. 检测 Python 3 安装 -2. 创建虚拟环境(如果不存在) -3. 安装 seekdb==0.0.1.dev4 -4. 验证安装是否成功 - ---- - -## 📝 API 变化对照表 - -| 功能 | 0.0.1.dev2 | 0.0.1.dev4 | -|------|-----------|-----------| -| **导入模块** | `import oblite` | `import seekdb` | -| **打开数据库** | `oblite.open(path)` | `seekdb.open(path)` | -| **连接数据库** | `oblite.connect(db_name)` | `seekdb.connect(db_name)` | -| **自动提交** | 不支持 | `seekdb.connect(db_name='test', autocommit=True)` | -| **USE 语句** | 不稳定 | `cursor.execute("USE database")` 稳定支持 | -| **向量输出** | 需要转换 | 原生支持 vector 列类型输出 | -| **数据库验证** | 不验证 | 自动验证数据库是否存在 | -| **关闭连接** | `conn.close()` | `conn.close()` + `connect.close()` | - ---- - -## 🔧 升级步骤 - -### 步骤 1: 更新依赖版本 -更新 `src-tauri/python/requirements.txt`: -```txt -seekdb==0.0.1.dev4 -``` - -### 步骤 2: 更新代码中的导入语句 -**查找并替换**所有代码中的: -- `import oblite` → `import seekdb` -- `oblite.open()` → `seekdb.open()` -- `oblite.connect()` → `seekdb.connect()` - -### 步骤 3: 更新数据库路径(可选) -建议将数据库文件名从 `oblite.db` 更新为 `mine_kb.db`: -```python -# 旧路径 -db_path = "~/.local/share/mine-kb/oblite.db" - -# 新路径(推荐) -db_path = "~/.local/share/mine-kb/mine_kb.db" -``` - -### 步骤 4: 重新安装依赖 -```bash -# 在虚拟环境中 -pip uninstall seekdb -y -pip install seekdb==0.0.1.dev4 -i https://pypi.tuna.tsinghua.edu.cn/simple -``` - -### 步骤 5: 测试验证 -运行测试脚本验证升级: -```bash -cd src-tauri/python -python test_seekdb.py -``` - ---- - -## 📂 已更新的文件列表 - -### 依赖配置文件 -- ✅ `src-tauri/python/requirements.txt` - 版本号更新为 0.0.1.dev4 -- ✅ `src-tauri/python/install_deps.sh` - 安装脚本更新 - -### 核心代码 -- ✅ `src-tauri/python/seekdb_bridge.py` - 导入语句和 API 调用更新 - -### 测试脚本 -- ✅ `src-tauri/python/test_seekdb.py` - 测试脚本更新 -- ✅ `scripts/test_oblite_upsert.py` → 重命名为 `test_seekdb_upsert.py`(建议) - -### 工具脚本 -- ✅ `src-tauri/python/migrate_sqlite_to_seekdb.py` - 迁移脚本更新 -- ✅ `scripts/debug_db_data.py` - 调试脚本更新 -- ✅ `scripts/verify_message_order.py` - 使用 seekdb_bridge,无需修改 - ---- - -## ⚠️ 注意事项 - -### 1. 数据库兼容性 -- 现有的数据库文件(.db)**完全兼容**,无需迁移数据 -- 数据表结构保持不变 -- 向量索引保持不变 - -### 2. 向后兼容性 -- 旧代码中的 `import oblite` 将**无法工作** -- 必须更新所有导入语句为 `import seekdb` - -### 3. 数据库创建 -0.0.1.dev4 版本对数据库存在性要求更严格: -```python -# ❌ 错误:如果数据库不存在会报错 -conn = seekdb.connect("nonexistent_db") - -# ✅ 正确:先创建数据库 -admin_conn = seekdb.connect("") -admin_conn.cursor().execute("CREATE DATABASE IF NOT EXISTS my_db") -admin_conn.commit() -admin_conn.close() -conn = seekdb.connect("my_db") -``` - -### 4. 虚拟环境 -**强烈建议**使用虚拟环境: -- 避免污染系统 Python 环境 -- 便于管理依赖版本 -- 提高应用隔离性 - -### 5. 测试建议 -升级后务必测试以下功能: -- ✅ 数据库连接和初始化 -- ✅ 基本 CRUD 操作 -- ✅ 向量搜索功能 -- ✅ 事务提交和回滚 -- ✅ 多线程/多进程访问 - ---- - -## 🐛 常见问题 - -### Q1: 升级后出现 "ModuleNotFoundError: No module named 'oblite'" -**原因**: 代码中仍有 `import oblite` 语句未更新 -**解决**: 使用全局搜索,将所有 `import oblite` 替换为 `import seekdb` - -### Q2: 数据库连接报错 "Database does not exist" -**原因**: 0.0.1.dev4 会验证数据库是否存在 -**解决**: 在连接前先创建数据库(参见注意事项 3) - -### Q3: 虚拟环境中找不到 seekdb -**原因**: seekdb 未安装在正确的虚拟环境中 -**解决**: -```bash -# 确认虚拟环境已激活 -which python # Linux/macOS -where python # Windows - -# 重新安装 -pip install seekdb==0.0.1.dev4 -i https://pypi.tuna.tsinghua.edu.cn/simple -``` - -### Q4: 向量搜索性能下降 -**原因**: 向量索引可能需要重建 -**解决**: -```sql -DROP INDEX idx_embedding; -CREATE VECTOR INDEX idx_embedding ON vector_documents(embedding) -WITH (distance=l2, type=hnsw, lib=vsag); -``` - ---- - -## 📚 相关资源 - -- **SeekDB 官方文档**: (待补充) -- **清华镜像源**: https://pypi.tuna.tsinghua.edu.cn/simple -- **项目 GitHub**: (待补充) - ---- - -## 📞 技术支持 - -如果在升级过程中遇到问题,请: -1. 查看本文档的"常见问题"章节 -2. 运行 `python test_seekdb.py` 诊断问题 -3. 检查虚拟环境是否正确激活 -4. 提交 Issue 到项目仓库 - ---- - -## ✅ 升级检查清单 - -- [ ] 更新 `requirements.txt` 中的版本号 -- [ ] 更新所有代码中的 `import oblite` 为 `import seekdb` -- [ ] 更新所有 `oblite.open()` 为 `seekdb.open()` -- [ ] 更新所有 `oblite.connect()` 为 `seekdb.connect()` -- [ ] 在虚拟环境中安装 seekdb==0.0.1.dev4 -- [ ] 运行 `test_seekdb.py` 验证安装 -- [ ] 测试数据库连接功能 -- [ ] 测试向量搜索功能 -- [ ] 测试现有数据读写 -- [ ] 更新部署文档 - ---- - -**文档版本**: 1.0 -**最后更新**: 2025-11-05 -**维护者**: mine-kb 开发团队 - diff --git a/docs/seekdb.md b/docs/seekdb.md index 68e35fd..d0c371d 100644 --- a/docs/seekdb.md +++ b/docs/seekdb.md @@ -1,306 +1,33 @@ -# SeekDB 基础文档 +# SeekDB in MineKB -> **版本**: SeekDB 0.0.1.dev4 -> **最后更新**: 2025-11-05 +MineKB uses **seekdb-rs** (Rust SDK) to talk to embedded SeekDB. There is no Python dependency; the app uses the native Rust client only. -> **重要更新**: 从 0.0.1.dev4 版本开始,模块名称从 `oblite` 更改为 `seekdb`。详见 [升级指南](UPGRADE_SEEKDB_0.0.1.dev4.md) +## 1. Product overview -# 1. 产品目标 +SeekDB is an AI-Native embedded database that supports: -轻量版嵌入式产品形态以库的形式集成在用户应用程序中,为开发者提供更强大灵活的数据管理解决方案,让数据管理无处不在(微控制器、物联网设备、边缘计算、移动应用、数据中心),快速上手使用ALL IN ONE(TP、AP、 AI Native)的产品能力 +- **TP (transactions)** – OLTP +- **AP (analytics)** – OLAP, columnar, materialized views +- **AI (vectors)** – vector type, HNSW index, hybrid search -![img](https://intranetproxy.alipay.com/skylark/lark/0/2025/png/275819/1756967663128-f86fe123-fc82-4e95-a87f-40c7e887eb04.png) +MineKB uses it in **embedded mode**: the database runs inside the app process via the seekdb-rs async **Client** (embedded). -# 2. 安装配置 +## 2. Data directory -## 2.1 MineKB 应用自动安装 +- **Default**: `{app data dir}/mine_kb.db/` (e.g. `~/Library/Application Support/com.mine-kb.app/mine_kb.db/` on macOS). +- **Override**: set `CONFIG_DIR` to the desired app data root; the DB path is then `$CONFIG_DIR/mine_kb.db/`. -MineKB 应用会在启动时**自动检查并安装** SeekDB 依赖库(oblite.so)。 +No venv or Python install is required. -### 应用数据目录位置 +## 3. seekdb-rs usage in MineKB -- **macOS**: `~/Library/Application Support/com.mine-kb.app/` -- **Linux**: `~/.local/share/com.mine-kb.app/` -- **Windows**: `%APPDATA%\com.mine-kb.app\` +- **Crate**: `seekdb-rs` (path dependency, feature `embedded` only). +- **Client**: async **Client** – built with `Client::builder().path(...).database(...).build().await`; all DB operations are async (no Python, no sync bridge). +- **Vector storage**: a single collection (e.g. `vector_documents`) holds chunk embeddings; projects are isolated by metadata filter (`project_id`). +- **Capabilities used**: parameterized SQL (`execute` / `fetch_all`), collection create/get, upsert, vector KNN, hybrid search (keyword + vector), delete by filter, count. -### 手动安装(可选) +For vector and hybrid search details, see the [seekdb-rs](https://github.com/ob-labs/seekdb-rs) project. For MineKB’s adapter API, see `src-tauri/src/services/seekdb_adapter.rs`. -如果自动下载失败,可以手动安装: +## 4. Switching to distributed SeekDB (optional) -```bash -pip install seekdb==0.0.1.dev2 -i https://pypi.tuna.tsinghua.edu.cn/simple/ -``` - -### 验证安装 - -查看应用日志,应该看到以下信息: - -``` -✅ oblite.so 存在,大小: XXXXX bytes -✅ PYTHONPATH 已配置 -✅ SeekDB 数据库连接正常 -✅ 应用启动成功! -``` - -## 2.2 独立使用 SeekDB - -如果要在其他 Python 项目中使用 SeekDB: - -**方法一:通过 pip 安装(推荐)** - -```bash -pip install seekdb==0.0.1.dev4 -i https://pypi.tuna.tsinghua.edu.cn/simple/ -``` - -**方法二:直接下载(不推荐)** - -```bash -# 注意:0.0.1.dev4 版本建议通过 pip 安装 -# 直接下载 .so 文件的方式不再推荐 -``` - -**最简使用(0.0.1.dev4 版本)** - -```python -import seekdb -seekdb.open() # 默认打开本地数据库目录 oblite.db(可自定义路径) -conn = seekdb.connect() # 默认连接数据库 test -cursor = conn.cursor() -cursor.execute("create table t1(c1 int primary key, c2 int)") -``` - -> **注意**: 从 0.0.1.dev4 版本开始,使用 `import seekdb` 而非 `import oblite`。 - -# 3. AI Native - -## 3.1 向量检索 - -```python -import seekdb - -seekdb.open("./mine_kb.db") -conn = seekdb.connect("test") -cursor = conn.cursor() -cursor.execute("create table test_vector(c1 int primary key, c2 vector(2), vector index idx1(c2) with (distance=l2, type=hnsw, lib=vsag))") - -cursor.execute("insert into test_vector values(1, [1, 1])") -cursor.execute("insert into test_vector values(2, [1, 2])") -cursor.execute("insert into test_vector values(3, [1, 3])") -conn.commit() - -cursor.execute("SELECT c1 FROM test_vector ORDER BY l2_distance(c2, '[1, 2.5]') APPROXIMATE LIMIT 2;") -print(cursor.fetchall()) -``` - -## 3.2 全文检索 - -```python -import seekdb - -seekdb.open("./mine_kb.db") -conn = seekdb.connect("test") -cursor = conn.cursor() -sql='''create table articles (title VARCHAR(200) primary key, body Text, - FULLTEXT fts_idx(title, body)); - ''' -cursor.execute(sql) - -sql='''insert into articles(title, body) values - ('OceanBase Tutorial', 'This is a tutorial about OceanBase Fulltext.'), - ('Fulltext Index', 'Fulltext index can be very useful.'), - ('OceanBase Test Case', 'Writing test cases helps ensure quality.') - ''' -cursor.execute(sql) -conn.commit() - -sql='''select - title, - match (title, body) against ("OceanBase") as score -from - articles -where - match (title, body) against ("OceanBase") -order by - score desc - ''' -cursor.execute(sql) -print(cursor.fetchall()) -``` - -## 3.3 混合检索 - -待patch44x到轻量版功能 - -```python -import seekdb - -seekdb.open("./mine_kb.db") -conn = seekdb.connect("test") -cursor = conn.cursor() -cursor.execute("create table doc_table(c1 int, vector vector(3), query varchar(255), content varchar(255), vector index idx1(vector) with (distance=l2, type=hnsw, lib=vsag), fulltext idx2(query), fulltext idx3(content))") - -sql = '''insert into doc_table values(1, '[1,2,3]', "hello world", "oceanbase Elasticsearch database"), - (2, '[1,2,1]', "hello world, what is your name", "oceanbase mysql database"), - (3, '[1,1,1]', "hello world, how are you", "oceanbase oracle database"), - (4, '[1,3,1]', "real world, where are you from", "postgres oracle database"), - (5, '[1,3,2]', "real world, how old are you", "redis oracle database"), - (6, '[2,1,1]', "hello world, where are you from", "starrocks oceanbase database");''' -cursor.execute(sql) -conn.commit() - -sql = '''set @parm = '{ - "query": { - "bool": { - "must": [ - {"match": {"query": "hi hello"}}, - {"match": { "content": "oceanbase mysql" }} - ] - } - }, - "knn" : { - "field": "vector", - "k": 5, - "num_candidates": 10, - "query_vector": [1,2,3], - "boost": 0.7 - }, - "_source" : ["query", "content", "_keyword_score", "_semantic_score"] - }';''' -cursor.execute(sql) -sql = '''select dbms_hybrid_search.search('doc_table', @parm);''' -cursor.execute(sql) -print(cursor.fetchall()) -``` - -# 4. 分析能力(OLAP) - -## 4.1 数据导入 - -```bash -cat /data/1/example.csv -1,10 -2,20 -3,30 -``` - - - -```python -import seekdb - -seekdb.open("./mine_kb.db") -conn = seekdb.connect("test") -cursor = conn.cursor() -cursor.execute("create table test_olap(c1 int, c2 int)") -cursor.execute("load data /*+ direct(true, 0) */ infile '/data/1/example.csv' into table test_olap fields terminated by ','") -cursor.execute("select count(*) from test_olap") -print(cursor.fetchall()) -``` - -## 4.2 列存 - -```python -import seekdb - -seekdb.open("./mine_kb.db") -conn = seekdb.connect("test") -cursor = conn.cursor() -sql='''create table each_column_group (col1 varchar(30) not null, col2 varchar(30) not null, col3 varchar(30) not null, col4 varchar(30) not null, col5 int) - with column group (each column); - ''' -cursor.execute(sql) -sql='''insert into each_column_group values('a', 'b', 'c', 'd', 1) - ''' -cursor.execute(sql) -conn.commit() -cursor.execute("select col1,col2 from each_column_group") -print(cursor.fetchall()) -``` - -## 4.3 物化视图 - -```python -import seekdb - -seekdb.open("./mine_kb.db") -conn = seekdb.connect("test") -cursor = conn.cursor() -cursor.execute("create table base_t1(a int primary key, b int)") -cursor.execute("create table base_t2(c int primary key, d int)") -cursor.execute("create materialized view log on base_t1 with(b)") -cursor.execute("create materialized view log on base_t2 with(d)") -cursor.execute("create materialized view mv REFRESH fast START WITH sysdate() NEXT sysdate() + INTERVAL 1 second as select a,b,c,d from base_t1 join base_t2 on base_t1.a=base_t2.c") -cursor.execute("insert into base_t1 values(1, 10)") -cursor.execute("insert into base_t2 values(1, 100)") -conn.commit() - -cursor.execute("select * from mv") -print(cursor.fetchall()) -``` - -## 4.4 外表 - -```bash -cat /data/1/example.csv -1,10 -2,20 -3,30 -``` - - - -```python -import seekdb - -seekdb.open("./mine_kb.db") -conn = seekdb.connect("test") -cursor = conn.cursor() -sql='''CREATE EXTERNAL TABLE test_external_table(c1 int, c2 int) LOCATION='/data/1' FORMAT=(TYPE='CSV' FIELD_DELIMITER=',') PATTERN='example.csv'; -''' -cursor.execute(sql) -cursor.execute("select * from test_external_table") -print(cursor.fetchall()) -``` - -# 5. 事务能力(OLTP) - -```python -import seekdb - -# open db -seekdb.open("./mine_kb.db") -# get connect -conn = seekdb.connect("test") -# create table -cursor = conn.cursor() -cursor.execute("create table test_oltp(c1 int primary key, c2 int)") -# insert -cursor.execute("insert into test_oltp values(1, 10)") -cursor.execute("insert into test_oltp values(2, 20)") -cursor.execute("insert into test_oltp values(3, 30)") -conn.commit() -# select -cursor.execute("select *,ORA_ROWSCN from test_oltp") -print(cursor.fetchall()) -``` - -**0.0.1.dev4 新特性:自动提交模式** - -```python -import seekdb - -seekdb.open("./mine_kb.db") -# 使用自动提交模式(无需手动 commit) -conn = seekdb.connect("test", autocommit=True) -cursor = conn.cursor() -cursor.execute("insert into test_oltp values(4, 40)") # 自动提交 -``` - -# 6. 平滑切换至分布式版本 - -用户通过嵌入式版本快速验证好产品原型后,想切换至分布式版本使用集群分布式处理能力,只需要修改导入包和相关配置即可,主体应用逻辑保持不变 - -```bash -import pymysql -conn = pymysql.connect(host='127.0.0.1', port=11002, user='root@sys', database='test') -``` +If you later move to a distributed SeekDB/OceanBase server, you would switch the client to the server mode (e.g. MySQL-compatible connection). Application logic above the adapter can remain the same. diff --git a/package.json b/package.json index ca5910b..7cce4a1 100644 --- a/package.json +++ b/package.json @@ -8,7 +8,7 @@ "build": "tsc && vite build", "preview": "vite preview", "tauri": "tauri", - "tauri:dev": "RUST_LOG=debug,info tauri dev", + "tauri:dev": "RUST_LOG=debug,info CONFIG_DIR=com.mine-kb.app tauri dev", "tauri:build": "bash scripts/clean-and-build.sh", "tauri:sign": "bash scripts/sign-and-notarize.sh", "test": "vitest", diff --git a/scripts/debug_db_data.py b/scripts/debug_db_data.py deleted file mode 100755 index 17ce43a..0000000 --- a/scripts/debug_db_data.py +++ /dev/null @@ -1,127 +0,0 @@ -#!/usr/bin/env python3 -""" -调试数据库数据格式问题的脚本 -检查projects和conversations表中的数据,特别是日期字段 -""" - -import sys -import os - -# 添加 python 目录到路径 -sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src-tauri', 'python')) - -try: - import seekdb - print("✅ seekdb 模块导入成功") -except ImportError as e: - print(f"❌ 无法导入 seekdb: {e}") - sys.exit(1) - -def main(): - db_path = os.path.expanduser("~/.local/share/mine-kb/seekdb.db") - db_name = "mine_kb" - - print(f"\n🔍 检查数据库: {db_path}") - print(f" 数据库名: {db_name}\n") - - try: - # 打开数据库 - seekdb.open(db_path) - print("✅ 数据库打开成功") - - # 连接到数据库 - conn = seekdb.connect(db_name) - cursor = conn.cursor() - print(f"✅ 连接到数据库 '{db_name}' 成功\n") - - # 检查 projects 表 - print("=" * 60) - print("检查 projects 表") - print("=" * 60) - - cursor.execute("SELECT id, name, description, status, document_count, created_at, updated_at FROM projects") - projects = cursor.fetchall() - - print(f"找到 {len(projects)} 个项目:\n") - - for i, proj in enumerate(projects, 1): - print(f"项目 {i}:") - print(f" ID: {proj[0]}") - print(f" 名称: {proj[1]}") - print(f" 描述: {proj[2]}") - print(f" 状态: {proj[3]}") - print(f" 文档数: {proj[4]}") - print(f" 创建时间: {proj[5]} (类型: {type(proj[5])})") - print(f" 更新时间: {proj[6]} (类型: {type(proj[6])})") - - # 检查日期字段是否为空或格式不正确 - if proj[5] is None or proj[5] == "": - print(f" ⚠️ 创建时间为空或无效") - if proj[6] is None or proj[6] == "": - print(f" ⚠️ 更新时间为空或无效") - print() - - # 检查 conversations 表 - print("=" * 60) - print("检查 conversations 表") - print("=" * 60) - - cursor.execute("SELECT id, project_id, title, created_at, updated_at, message_count FROM conversations") - conversations = cursor.fetchall() - - print(f"找到 {len(conversations)} 个对话:\n") - - for i, conv in enumerate(conversations, 1): - print(f"对话 {i}:") - print(f" ID: {conv[0]}") - print(f" 项目ID: {conv[1]}") - print(f" 标题: {conv[2]}") - print(f" 创建时间: {conv[3]} (类型: {type(conv[3])})") - print(f" 更新时间: {conv[4]} (类型: {type(conv[4])})") - print(f" 消息数: {conv[5]}") - - # 检查日期字段是否为空或格式不正确 - if conv[3] is None or conv[3] == "": - print(f" ⚠️ 创建时间为空或无效") - if conv[4] is None or conv[4] == "": - print(f" ⚠️ 更新时间为空或无效") - print() - - # 检查 messages 表 - print("=" * 60) - print("检查 messages 表") - print("=" * 60) - - cursor.execute("SELECT COUNT(*) FROM messages") - msg_count = cursor.fetchone()[0] - print(f"找到 {msg_count} 条消息\n") - - if msg_count > 0: - cursor.execute("SELECT id, conversation_id, role, created_at FROM messages LIMIT 3") - messages = cursor.fetchall() - - print("显示前 3 条消息:") - for i, msg in enumerate(messages, 1): - print(f"\n消息 {i}:") - print(f" ID: {msg[0]}") - print(f" 对话ID: {msg[1]}") - print(f" 角色: {msg[2]}") - print(f" 创建时间: {msg[3]} (类型: {type(msg[3])})") - - if msg[3] is None or msg[3] == "": - print(f" ⚠️ 创建时间为空或无效") - - conn.close() - print("\n✅ 数据库检查完成") - - except Exception as e: - print(f"\n❌ 错误: {e}") - import traceback - traceback.print_exc() - return 1 - - return 0 - -if __name__ == "__main__": - sys.exit(main()) - diff --git a/scripts/test_message_order.sh b/scripts/test_message_order.sh deleted file mode 100755 index aa22039..0000000 --- a/scripts/test_message_order.sh +++ /dev/null @@ -1,42 +0,0 @@ -#!/bin/bash -# 测试消息顺序修复 - -echo "🧪 测试消息顺序修复" -echo "====================" -echo "" - -# 检查数据库是否存在 -DB_PATH="$HOME/.mine-kb/mine_kb.db" -if [ ! -f "$DB_PATH" ]; then - echo "❌ 数据库文件不存在: $DB_PATH" - echo " 请先运行应用并创建一些对话" - exit 1 -fi - -echo "✅ 找到数据库: $DB_PATH" -echo "" - -# 运行 Python 验证脚本 -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -PYTHON_SCRIPT="$SCRIPT_DIR/verify_message_order.py" - -if [ ! -f "$PYTHON_SCRIPT" ]; then - echo "❌ 验证脚本不存在: $PYTHON_SCRIPT" - exit 1 -fi - -echo "🔍 检查数据库中的消息顺序..." -echo "" - -python3 "$PYTHON_SCRIPT" - -echo "" -echo "✅ 测试完成" -echo "" -echo "📝 说明:" -echo " - 消息应该按时间升序排列(从旧到新)" -echo " - 最早的消息显示在上面" -echo " - 最新的消息显示在下面" -echo "" -echo "🚀 重启应用后,进入任意历史对话验证修复效果" - diff --git a/scripts/test_oblite_upsert.py b/scripts/test_oblite_upsert.py deleted file mode 100755 index f8429df..0000000 --- a/scripts/test_oblite_upsert.py +++ /dev/null @@ -1,158 +0,0 @@ -#!/usr/bin/env python3 -""" -测试 ObLite 数据库的 UPSERT 语法支持 -""" -import sys -import os - -# 添加父目录到 Python 路径 -sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) - -try: - import seekdb - print("✅ seekdb 模块加载成功") -except ImportError as e: - print(f"❌ 无法导入 seekdb: {e}") - print(f"PYTHONPATH: {os.environ.get('PYTHONPATH', '(未设置)')}") - sys.exit(1) - -def test_upsert_syntax(): - """测试不同的 UPSERT 语法""" - - # 创建临时测试数据库 - test_db_path = "/tmp/test_seekdb_upsert.db" - test_db_name = "test_upsert" - - print(f"\n📋 测试 SeekDB UPSERT 语法") - print(f"数据库路径: {test_db_path}") - print(f"数据库名: {test_db_name}") - - try: - # 初始化数据库 - seekdb.open(test_db_path) - print("✅ 数据库打开成功") - - # 创建数据库 - admin_conn = seekdb.connect("") - admin_cursor = admin_conn.cursor() - admin_cursor.execute(f"CREATE DATABASE IF NOT EXISTS `{test_db_name}`") - admin_conn.commit() - admin_conn.close() - print(f"✅ 数据库 '{test_db_name}' 已创建") - - # 连接到测试数据库 - conn = seekdb.connect(test_db_name) - cursor = conn.cursor() - print(f"✅ 已连接到数据库 '{test_db_name}'") - - # 创建测试表 - cursor.execute(""" - CREATE TABLE IF NOT EXISTS test_projects ( - id VARCHAR(36) PRIMARY KEY, - name TEXT NOT NULL, - value INTEGER DEFAULT 0 - ) - """) - conn.commit() - print("✅ 测试表创建成功") - - # 测试 1: 基本 INSERT - print("\n📝 测试 1: 基本 INSERT") - cursor.execute("INSERT INTO test_projects VALUES ('test-1', 'Project 1', 10)") - conn.commit() - cursor.execute("SELECT * FROM test_projects WHERE id = 'test-1'") - result = cursor.fetchone() - print(f" 结果: {result}") - - # 测试 2: REPLACE INTO (MySQL 风格) - print("\n📝 测试 2: REPLACE INTO") - try: - cursor.execute("REPLACE INTO test_projects VALUES ('test-1', 'Project 1 Updated', 20)") - conn.commit() - cursor.execute("SELECT * FROM test_projects WHERE id = 'test-1'") - result = cursor.fetchone() - print(f" ✅ REPLACE INTO 语法支持!") - print(f" 结果: {result}") - except Exception as e: - print(f" ❌ REPLACE INTO 不支持: {e}") - - # 测试 3: ON DUPLICATE KEY UPDATE (MySQL 风格) - print("\n📝 测试 3: ON DUPLICATE KEY UPDATE") - try: - cursor.execute(""" - INSERT INTO test_projects VALUES ('test-2', 'Project 2', 30) - ON DUPLICATE KEY UPDATE name = 'Project 2 Updated', value = 40 - """) - conn.commit() - cursor.execute("SELECT * FROM test_projects WHERE id = 'test-2'") - result = cursor.fetchone() - print(f" ✅ ON DUPLICATE KEY UPDATE 语法支持!") - print(f" 结果: {result}") - - # 再次执行以测试更新 - cursor.execute(""" - INSERT INTO test_projects VALUES ('test-2', 'Project 2 Updated Again', 50) - ON DUPLICATE KEY UPDATE name = 'Project 2 Updated Again', value = 50 - """) - conn.commit() - cursor.execute("SELECT * FROM test_projects WHERE id = 'test-2'") - result = cursor.fetchone() - print(f" 结果(更新后): {result}") - except Exception as e: - print(f" ❌ ON DUPLICATE KEY UPDATE 不支持: {e}") - - # 测试 4: ON CONFLICT DO UPDATE (SQLite 风格) - print("\n📝 测试 4: ON CONFLICT DO UPDATE") - try: - cursor.execute(""" - INSERT INTO test_projects VALUES ('test-3', 'Project 3', 60) - ON CONFLICT(id) DO UPDATE SET name = 'Project 3 Updated', value = 70 - """) - conn.commit() - cursor.execute("SELECT * FROM test_projects WHERE id = 'test-3'") - result = cursor.fetchone() - print(f" ✅ ON CONFLICT DO UPDATE 语法支持!") - print(f" 结果: {result}") - except Exception as e: - print(f" ❌ ON CONFLICT DO UPDATE 不支持: {e}") - - # 测试 5: INSERT ... ON CONFLICT DO UPDATE with excluded (SQLite 风格) - print("\n📝 测试 5: INSERT ... ON CONFLICT DO UPDATE with excluded") - try: - cursor.execute(""" - INSERT INTO test_projects (id, name, value) - VALUES ('test-4', 'Project 4', 80) - ON CONFLICT(id) DO UPDATE SET - name = excluded.name, - value = excluded.value - """) - conn.commit() - cursor.execute("SELECT * FROM test_projects WHERE id = 'test-4'") - result = cursor.fetchone() - print(f" ✅ ON CONFLICT DO UPDATE with excluded 语法支持!") - print(f" 结果: {result}") - except Exception as e: - print(f" ❌ ON CONFLICT DO UPDATE with excluded 不支持: {e}") - - # 显示所有数据 - print("\n📊 最终数据:") - cursor.execute("SELECT * FROM test_projects ORDER BY id") - for row in cursor.fetchall(): - print(f" {row}") - - # 清理 - conn.close() - print("\n✅ 测试完成") - - except Exception as e: - print(f"\n❌ 测试失败: {e}") - import traceback - traceback.print_exc() - return False - - return True - -if __name__ == "__main__": - success = test_upsert_syntax() - sys.exit(0 if success else 1) - diff --git a/scripts/test_path_finder.sh b/scripts/test_path_finder.sh deleted file mode 100644 index 873bf61..0000000 --- a/scripts/test_path_finder.sh +++ /dev/null @@ -1,37 +0,0 @@ -#!/bin/bash - -echo "Testing SeekDB path finder..." -echo "" - -# Test from project root -echo "Test 1: From project root" -cd /home/ubuntu/Desktop/mine-kb -if [ -f "src-tauri/python/seekdb_bridge.py" ]; then - echo " ✅ Found: src-tauri/python/seekdb_bridge.py" -else - echo " ❌ Not found: src-tauri/python/seekdb_bridge.py" -fi - -# Test from src-tauri -echo "" -echo "Test 2: From src-tauri directory" -cd /home/ubuntu/Desktop/mine-kb/src-tauri -if [ -f "python/seekdb_bridge.py" ]; then - echo " ✅ Found: python/seekdb_bridge.py" -else - echo " ❌ Not found: python/seekdb_bridge.py" -fi - -# Test from src-tauri/src -echo "" -echo "Test 3: From src-tauri/src directory" -cd /home/ubuntu/Desktop/mine-kb/src-tauri/src -if [ -f "../python/seekdb_bridge.py" ]; then - echo " ✅ Found: ../python/seekdb_bridge.py" -else - echo " ❌ Not found: ../python/seekdb_bridge.py" -fi - -echo "" -echo "All paths exist correctly!" - diff --git a/scripts/verify_fix.sh b/scripts/verify_fix.sh deleted file mode 100755 index a30188c..0000000 --- a/scripts/verify_fix.sh +++ /dev/null @@ -1,55 +0,0 @@ -#!/bin/bash -# 验证 ObLite execute() 修复的快速测试脚本 - -echo "🔍 验证修复..." -echo "" - -# 检查修改的文件 -echo "📋 检查修改的文件:" -echo " - src-tauri/python/seekdb_bridge.py" - -# 检查关键方法是否存在 -if grep -q "def format_sql_value" src-tauri/python/seekdb_bridge.py; then - echo " ✅ format_sql_value() 方法已添加" -else - echo " ❌ format_sql_value() 方法未找到" -fi - -if grep -q "def build_sql_with_values" src-tauri/python/seekdb_bridge.py; then - echo " ✅ build_sql_with_values() 方法已添加" -else - echo " ❌ build_sql_with_values() 方法未找到" -fi - -# 检查 handle_execute 是否更新 -if grep -q "final_sql = self.build_sql_with_values" src-tauri/python/seekdb_bridge.py; then - echo " ✅ handle_execute() 已更新使用新方法" -else - echo " ❌ handle_execute() 未更新" -fi - -echo "" -echo "📝 修复文档:" -if [ -f "docs/FIX_OBLITE_EXECUTE_ERROR.md" ]; then - echo " ✅ docs/FIX_OBLITE_EXECUTE_ERROR.md 已创建" -else - echo " ❌ 修复文档未找到" -fi - -echo "" -echo "🧪 测试脚本:" -if [ -f "scripts/test_oblite_upsert.py" ]; then - echo " ✅ scripts/test_oblite_upsert.py 已创建" -else - echo " ❌ 测试脚本未找到" -fi - -echo "" -echo "✅ 修复验证完成!" -echo "" -echo "📖 下一步:" -echo " 1. 重新编译 Tauri 应用" -echo " 2. 启动应用并测试创建知识库" -echo " 3. 查看日志确认没有 execute() 参数错误" -echo "" - diff --git a/scripts/verify_message_order.py b/scripts/verify_message_order.py deleted file mode 100755 index 6d33037..0000000 --- a/scripts/verify_message_order.py +++ /dev/null @@ -1,94 +0,0 @@ -#!/usr/bin/env python3 -""" -验证消息排序顺序的测试脚本 -""" -import sys -import os - -# 添加 python 目录到路径 -sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../src-tauri/python')) - -from seekdb_bridge import SeekDbBridge, Value -from pathlib import Path - -def verify_message_order(): - """验证数据库中消息的排序顺序""" - # 获取数据库路径 - home = Path.home() - db_path = home / '.mine-kb' / 'mine_kb.db' - - print(f"📂 数据库路径: {db_path}") - - if not db_path.exists(): - print("❌ 数据库文件不存在") - return - - # 初始化 SeekDB - bridge = SeekDbBridge() - bridge.init_db(str(db_path.parent / 'oblite.db'), 'mine_kb') - - # 获取所有对话 - print("\n🔍 查询所有对话...") - conversations = bridge.query( - "SELECT id, title FROM conversations LIMIT 5", - [] - ) - - if not conversations: - print("⚠️ 没有找到对话") - return - - print(f"✅ 找到 {len(conversations)} 个对话\n") - - # 检查每个对话的消息顺序 - for conv in conversations: - conv_id = conv[0] - conv_title = conv[1] - - print(f"📝 对话: {conv_title} (ID: {conv_id})") - print("-" * 60) - - # 查询消息(不带 ORDER BY,看数据库原始顺序) - messages = bridge.query( - "SELECT id, role, created_at, SUBSTR(content, 1, 50) as content_preview FROM messages WHERE conversation_id = ?", - [Value.String(conv_id)] - ) - - if not messages: - print(" (无消息)\n") - continue - - print(f" 找到 {len(messages)} 条消息:") - for idx, msg in enumerate(messages, 1): - msg_id = msg[0] - role = msg[1] - created_at = msg[2] - content_preview = msg[3] - - print(f" {idx}. [{role}] {created_at}") - print(f" 内容: {content_preview}...") - print() - - # 检查时间顺序 - if len(messages) > 1: - timestamps = [msg[2] for msg in messages] - is_ascending = all(timestamps[i] <= timestamps[i+1] for i in range(len(timestamps)-1)) - is_descending = all(timestamps[i] >= timestamps[i+1] for i in range(len(timestamps)-1)) - - if is_ascending: - print(" ✅ 消息按时间升序排列 (从旧到新)") - elif is_descending: - print(" ⚠️ 消息按时间降序排列 (从新到旧)") - else: - print(" ❌ 消息时间顺序混乱") - - print() - -if __name__ == '__main__': - try: - verify_message_order() - except Exception as e: - print(f"❌ 错误: {e}") - import traceback - traceback.print_exc() - diff --git a/src-tauri/Cargo.lock b/src-tauri/Cargo.lock index 38170e8..bea6fc6 100644 --- a/src-tauri/Cargo.lock +++ b/src-tauri/Cargo.lock @@ -26,6 +26,19 @@ dependencies = [ "pom", ] +[[package]] +name = "ahash" +version = "0.8.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" +dependencies = [ + "cfg-if", + "getrandom 0.3.3", + "once_cell", + "version_check", + "zerocopy", +] + [[package]] name = "aho-corasick" version = "1.1.3" @@ -50,6 +63,12 @@ dependencies = [ "alloc-no-stdlib", ] +[[package]] +name = "allocator-api2" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" + [[package]] name = "android_system_properties" version = "0.1.5" @@ -59,12 +78,84 @@ dependencies = [ "libc", ] +[[package]] +name = "anstream" +version = "0.6.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" + +[[package]] +name = "anstyle-parse" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" +dependencies = [ + "windows-sys 0.61.1", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" +dependencies = [ + "anstyle", + "once_cell_polyfill", + "windows-sys 0.61.1", +] + [[package]] name = "anyhow" version = "1.0.100" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" +[[package]] +name = "arbitrary" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3d036a3c4ab069c7b410a2ce876bd74808d2d0888a82667669f8e783a898bf1" +dependencies = [ + "derive_arbitrary", +] + +[[package]] +name = "async-compression" +version = "0.4.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93c1f86859c1af3d514fa19e8323147ff10ea98684e6c7b307912509f50e67b2" +dependencies = [ + "compression-codecs", + "compression-core", + "futures-core", + "pin-project-lite", + "tokio", +] + [[package]] name = "async-stream" version = "0.3.6" @@ -87,6 +178,17 @@ dependencies = [ "syn 2.0.106", ] +[[package]] +name = "async-trait" +version = "0.1.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.106", +] + [[package]] name = "atk" version = "0.15.1" @@ -111,6 +213,21 @@ dependencies = [ "system-deps 6.2.2", ] +[[package]] +name = "atoi" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f28d99ec8bfea296261ca1af174f24225171fea9664ba9003cbebee704810528" +dependencies = [ + "num-traits", +] + +[[package]] +name = "atomic-waker" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" + [[package]] name = "autocfg" version = "1.5.0" @@ -150,6 +267,12 @@ version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" +[[package]] +name = "base64ct" +version = "1.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2af50177e190e07a26ab74f8b1efbfe2ef87da2116221318cb1c2e82baf7de06" + [[package]] name = "bincode" version = "1.3.3" @@ -159,6 +282,24 @@ dependencies = [ "serde", ] +[[package]] +name = "bindgen" +version = "0.71.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f58bf3d7db68cfbac37cfc485a8d711e87e064c3d0fe0435b92f7a407f9d6b3" +dependencies = [ + "bitflags 2.9.4", + "cexpr", + "clang-sys", + "itertools 0.13.0", + "proc-macro2", + "quote", + "regex", + "rustc-hash", + "shlex", + "syn 2.0.106", +] + [[package]] name = "bitflags" version = "1.3.2" @@ -170,6 +311,9 @@ name = "bitflags" version = "2.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2261d10cca569e4643e526d8dc2e62e433cc8aba21ab764233731f8d369bf394" +dependencies = [ + "serde", +] [[package]] name = "block" @@ -254,7 +398,7 @@ dependencies = [ "cairo-sys-rs", "glib", "libc", - "thiserror", + "thiserror 1.0.69", ] [[package]] @@ -294,6 +438,15 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6d43a04d8753f35258c91f8ec639f792891f748a1edbd759cf1dcea3382ad83c" +[[package]] +name = "cexpr" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" +dependencies = [ + "nom", +] + [[package]] name = "cfb" version = "0.7.3" @@ -330,6 +483,12 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9" +[[package]] +name = "cfg_aliases" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" + [[package]] name = "chrono" version = "0.4.42" @@ -344,6 +503,57 @@ dependencies = [ "windows-link", ] +[[package]] +name = "clang-sys" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4" +dependencies = [ + "glob", + "libc", + "libloading", +] + +[[package]] +name = "clap" +version = "4.5.60" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2797f34da339ce31042b27d23607e051786132987f595b02ba4f6a6dffb7030a" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.60" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24a241312cea5059b13574bb9b3861cabf758b879c15190b37b6d6fd63ab6876" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim 0.11.1", +] + +[[package]] +name = "clap_derive" +version = "4.5.55" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a92793da1a46a5f2a02a6f4c46c6496b28c43638adea8306fcb0caa1634f24e5" +dependencies = [ + "heck 0.5.0", + "proc-macro2", + "quote", + "syn 2.0.106", +] + +[[package]] +name = "clap_lex" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a822ea5bc7590f9d40f1ba12c0dc3c2760f3482c6984db1573ad11031420831" + [[package]] name = "cocoa" version = "0.24.1" @@ -380,6 +590,12 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3d7b894f5411737b7867f4827955924d7c254fc9f4d91a6aad6b097804b1018b" +[[package]] +name = "colorchoice" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" + [[package]] name = "combine" version = "4.6.7" @@ -390,6 +606,42 @@ dependencies = [ "memchr", ] +[[package]] +name = "compression-codecs" +version = "0.4.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "680dc087785c5230f8e8843e2e57ac7c1c90488b6a91b88caa265410568f441b" +dependencies = [ + "compression-core", + "flate2", + "memchr", +] + +[[package]] +name = "compression-core" +version = "0.4.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75984efb6ed102a0d42db99afb6c1948f0380d1d91808d5529916e6c08b49d8d" + +[[package]] +name = "console" +version = "0.15.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "054ccb5b10f9f2cbf51eb355ca1d05c2d279ce1804688d0db74b4733a5aeafd8" +dependencies = [ + "encode_unicode", + "libc", + "once_cell", + "unicode-width", + "windows-sys 0.59.0", +] + +[[package]] +name = "const-oid" +version = "0.9.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8" + [[package]] name = "convert_case" version = "0.4.0" @@ -445,6 +697,21 @@ dependencies = [ "libc", ] +[[package]] +name = "crc" +version = "3.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5eb8a2a1cd12ab0d987a5d5e825195d372001a4094a0376319d5a0ad71c1ba0d" +dependencies = [ + "crc-catalog", +] + +[[package]] +name = "crc-catalog" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5" + [[package]] name = "crc32fast" version = "1.5.0" @@ -482,6 +749,15 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "crossbeam-queue" +version = "0.3.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f58bbc28f91df819d0aa2a2c00cd19754769c2fad90579b3592b1c9ba7a3115" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "crossbeam-utils" version = "0.8.21" @@ -535,14 +811,38 @@ dependencies = [ "syn 2.0.106", ] +[[package]] +name = "darling" +version = "0.14.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b750cb3417fd1b327431a470f388520309479ab0bf5e323505daf0290cd3850" +dependencies = [ + "darling_core 0.14.4", + "darling_macro 0.14.4", +] + [[package]] name = "darling" version = "0.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9cdf337090841a411e2a7f3deb9187445851f91b309c0c0a29e05f74a00a48c0" dependencies = [ - "darling_core", - "darling_macro", + "darling_core 0.21.3", + "darling_macro 0.21.3", +] + +[[package]] +name = "darling_core" +version = "0.14.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "109c1ca6e6b7f82cc233a97004ea8ed7ca123a9af07a8230878fcfda9b158bf0" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim 0.10.0", + "syn 1.0.109", ] [[package]] @@ -555,21 +855,43 @@ dependencies = [ "ident_case", "proc-macro2", "quote", - "strsim", + "strsim 0.11.1", "syn 2.0.106", ] +[[package]] +name = "darling_macro" +version = "0.14.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4aab4dbc9f7611d8b55048a3a16d2d010c2c8334e46304b40ac1cc14bf3b48e" +dependencies = [ + "darling_core 0.14.4", + "quote", + "syn 1.0.109", +] + [[package]] name = "darling_macro" version = "0.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d38308df82d1080de0afee5d069fa14b0326a88c14f15c5ccda35b4a6c414c81" dependencies = [ - "darling_core", + "darling_core 0.21.3", "quote", "syn 2.0.106", ] +[[package]] +name = "der" +version = "0.7.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7c1832837b905bbfb5101e07cc24c8deddf52f93225eee6ead5f4d63d53ddcb" +dependencies = [ + "const-oid", + "pem-rfc7468", + "zeroize", +] + [[package]] name = "deranged" version = "0.5.4" @@ -580,6 +902,48 @@ dependencies = [ "serde_core", ] +[[package]] +name = "derive_arbitrary" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e567bd82dcff979e4b03460c307b3cdc9e96fde3d73bed1496d2bc75d9dd62a" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.106", +] + +[[package]] +name = "derive_builder" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d67778784b508018359cbc8696edb3db78160bab2c2a28ba7f56ef6932997f8" +dependencies = [ + "derive_builder_macro", +] + +[[package]] +name = "derive_builder_core" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c11bdc11a0c47bc7d37d582b5285da6849c96681023680b906673c5707af7b0f" +dependencies = [ + "darling 0.14.4", + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "derive_builder_macro" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebcda35c7a396850a55ffeac740804b40ffec779b98fffbb1738f4033f0ee79e" +dependencies = [ + "derive_builder_core", + "syn 1.0.109", +] + [[package]] name = "derive_more" version = "0.99.20" @@ -600,10 +964,20 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" dependencies = [ "block-buffer", + "const-oid", "crypto-common", "subtle", ] +[[package]] +name = "dirs" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3e8aa94d75141228480295a7d0e7feb620b1a5ad9f12bc40be62411e38cce4e" +dependencies = [ + "dirs-sys", +] + [[package]] name = "dirs-next" version = "2.0.0" @@ -614,6 +988,18 @@ dependencies = [ "dirs-sys-next", ] +[[package]] +name = "dirs-sys" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e01a3366d27ee9890022452ee61b2b63a67e6f13f58900b651ff5665f0bb1fab" +dependencies = [ + "libc", + "option-ext", + "redox_users 0.5.2", + "windows-sys 0.61.1", +] + [[package]] name = "dirs-sys-next" version = "0.1.2" @@ -621,7 +1007,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4ebda144c4fe02d1f7ea1a7d9641b6fc6b580adcfa024ae48797ecdeb6825b4d" dependencies = [ "libc", - "redox_users", + "redox_users 0.4.6", "winapi", ] @@ -652,11 +1038,17 @@ dependencies = [ "image", "serde", "serde_json", - "thiserror", + "thiserror 1.0.69", "xml-rs", - "zip", + "zip 0.6.6", ] +[[package]] +name = "dotenvy" +version = "0.15.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b" + [[package]] name = "dtoa" version = "1.0.10" @@ -684,6 +1076,15 @@ version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555" +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" +dependencies = [ + "serde", +] + [[package]] name = "embed-resource" version = "2.5.2" @@ -704,6 +1105,12 @@ version = "1.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4ef6b89e5b37196644d8796de5268852ff179b44e96276cf4290264843743bb7" +[[package]] +name = "encode_unicode" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0" + [[package]] name = "encoding_rs" version = "0.8.35" @@ -743,14 +1150,29 @@ dependencies = [ ] [[package]] -name = "euclid" -version = "0.20.14" +name = "esaxx-rs" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d817e038c30374a4bcb22f94d0a8a0e216958d4c3dcde369b1439fec4bdda6e6" +dependencies = [ + "cc", +] + +[[package]] +name = "euclid" +version = "0.20.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2bb7ef65b3777a325d1eeefefab5b6d4959da54747e33bd6258e789640f307ad" dependencies = [ "num-traits", ] +[[package]] +name = "event-listener" +version = "2.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0206175f82b8d6bf6652ff7d71a1e27fd2e4efde587fd368662814d6ec1d9ce0" + [[package]] name = "fastrand" version = "2.3.0" @@ -895,6 +1317,17 @@ dependencies = [ "futures-util", ] +[[package]] +name = "futures-intrusive" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d930c203dd0b6ff06e0201a4a2fe9149b43c684fd4420555b26d21b1a02956f" +dependencies = [ + "futures-core", + "lock_api", + "parking_lot", +] + [[package]] name = "futures-io" version = "0.3.31" @@ -1078,8 +1511,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" dependencies = [ "cfg-if", + "js-sys", "libc", "wasi 0.11.1+wasi-snapshot-preview1", + "wasm-bindgen", ] [[package]] @@ -1089,9 +1524,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" dependencies = [ "cfg-if", + "js-sys", "libc", "r-efi", "wasi 0.14.7+wasi-0.2.4", + "wasm-bindgen", ] [[package]] @@ -1124,7 +1561,7 @@ dependencies = [ "glib", "libc", "once_cell", - "thiserror", + "thiserror 1.0.69", ] [[package]] @@ -1157,7 +1594,7 @@ dependencies = [ "libc", "once_cell", "smallvec", - "thiserror", + "thiserror 1.0.69", ] [[package]] @@ -1281,7 +1718,7 @@ dependencies = [ "futures-core", "futures-sink", "futures-util", - "http", + "http 0.2.12", "indexmap 2.11.4", "slab", "tokio", @@ -1295,12 +1732,31 @@ version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +dependencies = [ + "ahash", + "allocator-api2", +] + [[package]] name = "hashbrown" version = "0.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5419bdc4f6a9207fbeba6d11b604d481addf78ecd10c11ad51e76c2f6482748d" +[[package]] +name = "hashlink" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8094feaf31ff591f651a2664fb9cfd92bba7a60ce3197265e9482ebe753c8f7" +dependencies = [ + "hashbrown 0.14.5", +] + [[package]] name = "heck" version = "0.3.3" @@ -1315,6 +1771,9 @@ name = "heck" version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" +dependencies = [ + "unicode-segmentation", +] [[package]] name = "heck" @@ -1334,6 +1793,34 @@ version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" +[[package]] +name = "hf-hub" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "629d8f3bbeda9d148036d6b0de0a3ab947abd08ce90626327fc3547a49d59d97" +dependencies = [ + "dirs", + "http 1.4.0", + "indicatif", + "libc", + "log", + "rand 0.9.2", + "serde", + "serde_json", + "thiserror 2.0.18", + "ureq 2.12.1", + "windows-sys 0.60.2", +] + +[[package]] +name = "hkdf" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b5f8eb2ad728638ea2c7d47a21db23b7b58a72ed6a38256b8a1849f15fbbdf7" +dependencies = [ + "hmac", +] + [[package]] name = "hmac" version = "0.12.1" @@ -1343,6 +1830,12 @@ dependencies = [ "digest", ] +[[package]] +name = "hmac-sha256" +version = "1.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec9d92d097f4749b64e8cc33d924d9f40a2d4eb91402b458014b781f5733d60f" + [[package]] name = "html5ever" version = "0.26.0" @@ -1368,6 +1861,16 @@ dependencies = [ "itoa 1.0.15", ] +[[package]] +name = "http" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3ba2a386d7f85a81f119ad7498ebe444d2e22c2af0b86b069416ace48b3311a" +dependencies = [ + "bytes", + "itoa 1.0.15", +] + [[package]] name = "http-body" version = "0.4.6" @@ -1375,7 +1878,30 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" dependencies = [ "bytes", - "http", + "http 0.2.12", + "pin-project-lite", +] + +[[package]] +name = "http-body" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" +dependencies = [ + "bytes", + "http 1.4.0", +] + +[[package]] +name = "http-body-util" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a" +dependencies = [ + "bytes", + "futures-core", + "http 1.4.0", + "http-body 1.0.1", "pin-project-lite", ] @@ -1414,8 +1940,8 @@ dependencies = [ "futures-core", "futures-util", "h2", - "http", - "http-body", + "http 0.2.12", + "http-body 0.4.6", "httparse", "httpdate", "itoa 1.0.15", @@ -1427,6 +1953,58 @@ dependencies = [ "want", ] +[[package]] +name = "hyper" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ab2d4f250c3d7b1c9fcdff1cece94ea4e2dfbec68614f7b87cb205f24ca9d11" +dependencies = [ + "atomic-waker", + "bytes", + "futures-channel", + "futures-core", + "http 1.4.0", + "http-body 1.0.1", + "httparse", + "itoa 1.0.15", + "pin-project-lite", + "pin-utils", + "smallvec", + "tokio", + "want", +] + +[[package]] +name = "hyper-rustls" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590" +dependencies = [ + "futures-util", + "http 0.2.12", + "hyper 0.14.32", + "rustls 0.21.12", + "tokio", + "tokio-rustls 0.24.1", +] + +[[package]] +name = "hyper-rustls" +version = "0.27.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58" +dependencies = [ + "http 1.4.0", + "hyper 1.8.1", + "hyper-util", + "rustls 0.23.37", + "rustls-pki-types", + "tokio", + "tokio-rustls 0.26.4", + "tower-service", + "webpki-roots 1.0.6", +] + [[package]] name = "hyper-tls" version = "0.5.0" @@ -1434,12 +2012,35 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905" dependencies = [ "bytes", - "hyper", + "hyper 0.14.32", "native-tls", "tokio", "tokio-native-tls", ] +[[package]] +name = "hyper-util" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96547c2556ec9d12fb1578c4eaf448b04993e7fb79cbaad930a656880a6bdfa0" +dependencies = [ + "base64 0.22.1", + "bytes", + "futures-channel", + "futures-util", + "http 1.4.0", + "http-body 1.0.1", + "hyper 1.8.1", + "ipnet", + "libc", + "percent-encoding", + "pin-project-lite", + "socket2 0.6.0", + "tokio", + "tower-service", + "tracing", +] + [[package]] name = "iana-time-zone" version = "0.1.64" @@ -1642,6 +2243,19 @@ dependencies = [ "serde_core", ] +[[package]] +name = "indicatif" +version = "0.17.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "183b3088984b400f4cfac3620d5e076c84da5364016b4f49473de574b2586235" +dependencies = [ + "console", + "number_prefix", + "portable-atomic", + "unicode-width", + "web-time", +] + [[package]] name = "infer" version = "0.13.0" @@ -1677,6 +2291,16 @@ version = "2.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130" +[[package]] +name = "iri-string" +version = "0.7.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c91338f0783edbd6195decb37bae672fd3b165faffb89bf7b9e6942f8b1a731a" +dependencies = [ + "memchr", + "serde", +] + [[package]] name = "is-terminal" version = "0.4.16" @@ -1688,6 +2312,39 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "is_terminal_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" + +[[package]] +name = "itertools" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" +dependencies = [ + "either", +] + +[[package]] +name = "itertools" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" +dependencies = [ + "either", +] + +[[package]] +name = "itertools" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "0.4.8" @@ -1733,7 +2390,7 @@ dependencies = [ "combine", "jni-sys", "log", - "thiserror", + "thiserror 1.0.69", "walkdir", ] @@ -1768,7 +2425,7 @@ dependencies = [ "jsonptr", "serde", "serde_json", - "thiserror", + "thiserror 1.0.69", ] [[package]] @@ -1800,6 +2457,9 @@ name = "lazy_static" version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" +dependencies = [ + "spin", +] [[package]] name = "libc" @@ -1807,6 +2467,22 @@ version = "0.2.176" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "58f929b4d672ea937a23a1ab494143d968337a5f47e56d0815df1e0890ddf174" +[[package]] +name = "libloading" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55" +dependencies = [ + "cfg-if", + "windows-link", +] + +[[package]] +name = "libm" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981" + [[package]] name = "libredox" version = "0.1.10" @@ -1878,12 +2554,40 @@ dependencies = [ "weezl", ] +[[package]] +name = "lru-slab" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" + +[[package]] +name = "lzma-rust2" +version = "0.15.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1670343e58806300d87950e3401e820b519b9384281bbabfb15e3636689ffd69" + [[package]] name = "mac" version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" +[[package]] +name = "macro_rules_attribute" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "65049d7923698040cd0b1ddcced9b0eb14dd22c5f86ae59c3740eab64a676520" +dependencies = [ + "macro_rules_attribute-proc_macro", + "paste", +] + +[[package]] +name = "macro_rules_attribute-proc_macro" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "670fdfda89751bc4a84ac13eaa63e205cf0fd22b4c9a5fbfa085b63c1f1d3a30" + [[package]] name = "malloc_buf" version = "0.0.6" @@ -1922,6 +2626,16 @@ version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2532096657941c2fea9c289d370a250971c689d4f143798ff67113ec042024a5" +[[package]] +name = "matrixmultiply" +version = "0.3.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a06de3016e9fae57a36fd14dba131fccf49f74b40b7fbdb472f96e361ec71a08" +dependencies = [ + "autocfg", + "rawpointer", +] + [[package]] name = "md-5" version = "0.10.6" @@ -1959,6 +2673,7 @@ version = "0.1.0" dependencies = [ "anyhow", "async-stream", + "async-trait", "base64 0.22.1", "bincode", "chrono", @@ -1969,14 +2684,16 @@ dependencies = [ "log", "pdf-extract", "regex", - "reqwest", + "reqwest 0.11.27", + "seekdb-rs", "serde", "serde_json", "sha1", "sha2", "tauri", "tauri-build", - "thiserror", + "tempfile", + "thiserror 1.0.69", "tokio", "url", "urlencoding", @@ -2011,6 +2728,28 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "monostate" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3341a273f6c9d5bef1908f17b7267bbab0e95c9bf69a0d4dcf8e9e1b2c76ef67" +dependencies = [ + "monostate-impl", + "serde", + "serde_core", +] + +[[package]] +name = "monostate-impl" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4db6d5580af57bf992f59068d4ea26fd518574ff48d7639b255a36f9de6e7e9" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.106", +] + [[package]] name = "native-tls" version = "0.2.14" @@ -2028,6 +2767,21 @@ dependencies = [ "tempfile", ] +[[package]] +name = "ndarray" +version = "0.17.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "520080814a7a6b4a6e9070823bb24b4531daac8c4627e08ba5de8c5ef2f2752d" +dependencies = [ + "matrixmultiply", + "num-complex", + "num-integer", + "num-traits", + "portable-atomic", + "portable-atomic-util", + "rawpointer", +] + [[package]] name = "ndk" version = "0.6.0" @@ -2037,8 +2791,8 @@ dependencies = [ "bitflags 1.3.2", "jni-sys", "ndk-sys", - "num_enum", - "thiserror", + "num_enum 0.5.11", + "thiserror 1.0.69", ] [[package]] @@ -2087,12 +2841,57 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "num-bigint-dig" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e661dda6640fad38e827a6d4a310ff4763082116fe217f279885c97f511bb0b7" +dependencies = [ + "lazy_static", + "libm", + "num-integer", + "num-iter", + "num-traits", + "rand 0.8.5", + "smallvec", + "zeroize", +] + +[[package]] +name = "num-complex" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" +dependencies = [ + "num-traits", +] + [[package]] name = "num-conv" version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-iter" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + [[package]] name = "num-traits" version = "0.2.19" @@ -2100,6 +2899,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ "autocfg", + "libm", ] [[package]] @@ -2108,12 +2908,22 @@ version = "0.5.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1f646caf906c20226733ed5b1374287eb97e3c2a5c227ce668c1f2ce20ae57c9" dependencies = [ - "num_enum_derive", + "num_enum_derive 0.5.11", ] [[package]] -name = "num_enum_derive" -version = "0.5.11" +name = "num_enum" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1207a7e20ad57b847bbddc6776b968420d38292bbfe2089accff5e19e82454c" +dependencies = [ + "num_enum_derive 0.7.5", + "rustversion", +] + +[[package]] +name = "num_enum_derive" +version = "0.5.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dcbff9bc912032c62bf65ef1d5aea88983b420f4f839db1e9b0c281a25c9c799" dependencies = [ @@ -2123,6 +2933,24 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "num_enum_derive" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff32365de1b6743cb203b710788263c44a03de03802daf96092f2da4fe6ba4d7" +dependencies = [ + "proc-macro-crate", + "proc-macro2", + "quote", + "syn 2.0.106", +] + +[[package]] +name = "number_prefix" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" + [[package]] name = "objc" version = "0.2.7" @@ -2177,6 +3005,34 @@ version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" +[[package]] +name = "once_cell_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" + +[[package]] +name = "onig" +version = "6.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "336b9c63443aceef14bea841b899035ae3abe89b7c486aaf4c5bd8aafedac3f0" +dependencies = [ + "bitflags 2.9.4", + "libc", + "once_cell", + "onig_sys", +] + +[[package]] +name = "onig_sys" +version = "69.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7f86c6eef3d6df15f23bcfb6af487cbd2fed4e5581d58d5bf1f5f8b7f6727dc" +dependencies = [ + "cc", + "pkg-config", +] + [[package]] name = "open" version = "3.2.0" @@ -2231,6 +3087,36 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "option-ext" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" + +[[package]] +name = "ort" +version = "2.0.0-rc.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a5df903c0d2c07b56950f1058104ab0c8557159f2741782223704de9be73c3c" +dependencies = [ + "ndarray", + "ort-sys", + "smallvec", + "tracing", + "ureq 3.2.0", +] + +[[package]] +name = "ort-sys" +version = "2.0.0-rc.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06503bb33f294c5f1ba484011e053bfa6ae227074bdb841e9863492dc5960d4b" +dependencies = [ + "hmac-sha256", + "lzma-rust2", + "ureq 3.2.0", +] + [[package]] name = "pango" version = "0.15.10" @@ -2279,6 +3165,12 @@ dependencies = [ "windows-link", ] +[[package]] +name = "paste" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" + [[package]] name = "pathdiff" version = "0.2.3" @@ -2300,6 +3192,15 @@ dependencies = [ "unicode-normalization", ] +[[package]] +name = "pem-rfc7468" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88b39c9bfcfc231068454382784bb460aae594343fb030d46e9f50a645418412" +dependencies = [ + "base64ct", +] + [[package]] name = "percent-encoding" version = "2.3.2" @@ -2452,6 +3353,27 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +[[package]] +name = "pkcs1" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8ffb9f10fa047879315e6625af03c164b16962a5368d724ed16323b68ace47f" +dependencies = [ + "der", + "pkcs8", + "spki", +] + +[[package]] +name = "pkcs8" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f950b2377845cebe5cf8b5165cb3cc1a5e0fa5cfa3e1f7f55707d8fd82e0a7b7" +dependencies = [ + "der", + "spki", +] + [[package]] name = "pkg-config" version = "0.3.32" @@ -2490,6 +3412,21 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "60f6ce597ecdcc9a098e7fddacb1065093a3d66446fa16c675e7e71d1b5c28e6" +[[package]] +name = "portable-atomic" +version = "1.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" + +[[package]] +name = "portable-atomic-util" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a9db96d7fa8782dd8c15ce32ffe8680bbd1e978a43bf51a34d39483540495f5" +dependencies = [ + "portable-atomic", +] + [[package]] name = "postscript" version = "0.14.1" @@ -2584,6 +3521,61 @@ dependencies = [ "memchr", ] +[[package]] +name = "quinn" +version = "0.11.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e20a958963c291dc322d98411f541009df2ced7b5a4f2bd52337638cfccf20" +dependencies = [ + "bytes", + "cfg_aliases", + "pin-project-lite", + "quinn-proto", + "quinn-udp", + "rustc-hash", + "rustls 0.23.37", + "socket2 0.6.0", + "thiserror 2.0.18", + "tokio", + "tracing", + "web-time", +] + +[[package]] +name = "quinn-proto" +version = "0.11.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1906b49b0c3bc04b5fe5d86a77925ae6524a19b816ae38ce1e426255f1d8a31" +dependencies = [ + "bytes", + "getrandom 0.3.3", + "lru-slab", + "rand 0.9.2", + "ring", + "rustc-hash", + "rustls 0.23.37", + "rustls-pki-types", + "slab", + "thiserror 2.0.18", + "tinyvec", + "tracing", + "web-time", +] + +[[package]] +name = "quinn-udp" +version = "0.5.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "addec6a0dcad8a8d96a771f815f0eaf55f9d1805756410b39f5fa81332574cbd" +dependencies = [ + "cfg_aliases", + "libc", + "once_cell", + "socket2 0.6.0", + "tracing", + "windows-sys 0.60.2", +] + [[package]] name = "quote" version = "1.0.41" @@ -2624,6 +3616,16 @@ dependencies = [ "rand_core 0.6.4", ] +[[package]] +name = "rand" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" +dependencies = [ + "rand_chacha 0.9.0", + "rand_core 0.9.5", +] + [[package]] name = "rand_chacha" version = "0.2.2" @@ -2644,6 +3646,16 @@ dependencies = [ "rand_core 0.6.4", ] +[[package]] +name = "rand_chacha" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +dependencies = [ + "ppv-lite86", + "rand_core 0.9.5", +] + [[package]] name = "rand_core" version = "0.5.1" @@ -2662,6 +3674,15 @@ dependencies = [ "getrandom 0.2.16", ] +[[package]] +name = "rand_core" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c" +dependencies = [ + "getrandom 0.3.3", +] + [[package]] name = "rand_hc" version = "0.2.0" @@ -2692,6 +3713,43 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f2ff9a1f06a88b01621b7ae906ef0211290d1c8a168a15542486a8f61c0833b9" +[[package]] +name = "rawpointer" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3" + +[[package]] +name = "rayon" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-cond" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "059f538b55efd2309c9794130bc149c6a553db90e9d99c2030785c82f0bd7df9" +dependencies = [ + "either", + "itertools 0.11.0", + "rayon", +] + +[[package]] +name = "rayon-core" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + [[package]] name = "redox_syscall" version = "0.5.17" @@ -2709,7 +3767,18 @@ checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43" dependencies = [ "getrandom 0.2.16", "libredox", - "thiserror", + "thiserror 1.0.69", +] + +[[package]] +name = "redox_users" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4e608c6638b9c18977b00b475ac1f28d14e84b27d8d42f70e0bf1e3dec127ac" +dependencies = [ + "getrandom 0.2.16", + "libredox", + "thiserror 2.0.18", ] [[package]] @@ -2767,15 +3836,17 @@ version = "0.11.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dd67538700a17451e7cba03ac727fb961abb7607553461627b97de0b89cf4a62" dependencies = [ + "async-compression", "base64 0.21.7", "bytes", "encoding_rs", "futures-core", "futures-util", "h2", - "http", - "http-body", - "hyper", + "http 0.2.12", + "http-body 0.4.6", + "hyper 0.14.32", + "hyper-rustls 0.24.2", "hyper-tls", "ipnet", "js-sys", @@ -2785,14 +3856,16 @@ dependencies = [ "once_cell", "percent-encoding", "pin-project-lite", + "rustls 0.21.12", "rustls-pemfile", "serde", "serde_json", "serde_urlencoded", - "sync_wrapper", + "sync_wrapper 0.1.2", "system-configuration", "tokio", "tokio-native-tls", + "tokio-rustls 0.24.1", "tokio-util", "tower-service", "url", @@ -2800,9 +3873,50 @@ dependencies = [ "wasm-bindgen-futures", "wasm-streams", "web-sys", + "webpki-roots 0.25.4", "winreg 0.50.0", ] +[[package]] +name = "reqwest" +version = "0.12.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eddd3ca559203180a307f12d114c268abf583f59b03cb906fd0b3ff8646c1147" +dependencies = [ + "base64 0.22.1", + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "http 1.4.0", + "http-body 1.0.1", + "http-body-util", + "hyper 1.8.1", + "hyper-rustls 0.27.7", + "hyper-util", + "js-sys", + "log", + "percent-encoding", + "pin-project-lite", + "quinn", + "rustls 0.23.37", + "rustls-pki-types", + "serde", + "serde_json", + "serde_urlencoded", + "sync_wrapper 1.0.2", + "tokio", + "tokio-rustls 0.26.4", + "tower", + "tower-http", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", + "webpki-roots 1.0.6", +] + [[package]] name = "rfd" version = "0.10.0" @@ -2827,12 +3941,52 @@ dependencies = [ "windows 0.37.0", ] +[[package]] +name = "ring" +version = "0.17.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7" +dependencies = [ + "cc", + "cfg-if", + "getrandom 0.2.16", + "libc", + "untrusted", + "windows-sys 0.52.0", +] + +[[package]] +name = "rsa" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8573f03f5883dcaebdfcf4725caa1ecb9c15b2ef50c43a07b816e06799bb12d" +dependencies = [ + "const-oid", + "digest", + "num-bigint-dig", + "num-integer", + "num-traits", + "pkcs1", + "pkcs8", + "rand_core 0.6.4", + "signature", + "spki", + "subtle", + "zeroize", +] + [[package]] name = "rustc-demangle" version = "0.1.26" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56f7d92ca342cea22a06f2121d944b4fd82af56988c270852495420f961d4ace" +[[package]] +name = "rustc-hash" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" + [[package]] name = "rustc_version" version = "0.4.1" @@ -2855,6 +4009,33 @@ dependencies = [ "windows-sys 0.61.1", ] +[[package]] +name = "rustls" +version = "0.21.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f56a14d1f48b391359b22f731fd4bd7e43c97f3c50eee276f3aa09c94784d3e" +dependencies = [ + "log", + "ring", + "rustls-webpki 0.101.7", + "sct", +] + +[[package]] +name = "rustls" +version = "0.23.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "758025cb5fccfd3bc2fd74708fd4682be41d99e5dff73c377c0646c6012c73a4" +dependencies = [ + "log", + "once_cell", + "ring", + "rustls-pki-types", + "rustls-webpki 0.103.9", + "subtle", + "zeroize", +] + [[package]] name = "rustls-pemfile" version = "1.0.4" @@ -2864,6 +4045,37 @@ dependencies = [ "base64 0.21.7", ] +[[package]] +name = "rustls-pki-types" +version = "1.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be040f8b0a225e40375822a563fa9524378b9d63112f53e19ffff34df5d33fdd" +dependencies = [ + "web-time", + "zeroize", +] + +[[package]] +name = "rustls-webpki" +version = "0.101.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765" +dependencies = [ + "ring", + "untrusted", +] + +[[package]] +name = "rustls-webpki" +version = "0.103.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7df23109aa6c1567d1c575b9952556388da57401e4ace1d15f79eedad0d8f53" +dependencies = [ + "ring", + "rustls-pki-types", + "untrusted", +] + [[package]] name = "rustversion" version = "1.0.22" @@ -2930,6 +4142,16 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "sct" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414" +dependencies = [ + "ring", + "untrusted", +] + [[package]] name = "security-framework" version = "2.11.1" @@ -2953,6 +4175,27 @@ dependencies = [ "libc", ] +[[package]] +name = "seekdb-rs" +version = "0.1.1" +dependencies = [ + "anyhow", + "async-trait", + "bindgen", + "hf-hub", + "once_cell", + "ort", + "reqwest 0.11.27", + "reqwest 0.12.28", + "serde", + "serde_json", + "sqlx", + "thiserror 1.0.69", + "tokenizers", + "tokio", + "zip 1.1.4", +] + [[package]] name = "selectors" version = "0.22.0" @@ -3084,7 +4327,7 @@ version = "3.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a7e6c180db0816026a61afa1cff5344fb7ebded7e4d3062772179f2501481c27" dependencies = [ - "darling", + "darling 0.21.3", "proc-macro2", "quote", "syn 2.0.106", @@ -3168,6 +4411,16 @@ dependencies = [ "libc", ] +[[package]] +name = "signature" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77549399552de45a898a580c1b41d445bf730df867cc44e6c0233bbc4b8329de" +dependencies = [ + "digest", + "rand_core 0.6.4", +] + [[package]] name = "simd-adler32" version = "0.3.7" @@ -3218,6 +4471,17 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "socks" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0c3dbbd9ae980613c6dd8e28a9407b50509d3803b57624d5dfe8315218cd58b" +dependencies = [ + "byteorder", + "libc", + "winapi", +] + [[package]] name = "soup2" version = "0.2.1" @@ -3229,21 +4493,190 @@ dependencies = [ "glib", "libc", "once_cell", - "soup2-sys", + "soup2-sys", +] + +[[package]] +name = "soup2-sys" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "009ef427103fcb17f802871647a7fa6c60cbb654b4c4e4c0ac60a31c5f6dc9cf" +dependencies = [ + "bitflags 1.3.2", + "gio-sys", + "glib-sys", + "gobject-sys", + "libc", + "system-deps 5.0.0", +] + +[[package]] +name = "spin" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" + +[[package]] +name = "spki" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d91ed6c858b01f942cd56b37a94b3e0a1798290327d1236e4d9cf4eaca44d29d" +dependencies = [ + "base64ct", + "der", +] + +[[package]] +name = "spm_precompiled" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5851699c4033c63636f7ea4cf7b7c1f1bf06d0cc03cfb42e711de5a5c46cf326" +dependencies = [ + "base64 0.13.1", + "nom", + "serde", + "unicode-segmentation", +] + +[[package]] +name = "sqlformat" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7bba3a93db0cc4f7bdece8bb09e77e2e785c20bfebf79eb8340ed80708048790" +dependencies = [ + "nom", + "unicode_categories", +] + +[[package]] +name = "sqlx" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c9a2ccff1a000a5a59cd33da541d9f2fdcd9e6e8229cc200565942bff36d0aaa" +dependencies = [ + "sqlx-core", + "sqlx-macros", + "sqlx-mysql", +] + +[[package]] +name = "sqlx-core" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24ba59a9342a3d9bab6c56c118be528b27c9b60e490080e9711a04dccac83ef6" +dependencies = [ + "ahash", + "atoi", + "byteorder", + "bytes", + "crc", + "crossbeam-queue", + "either", + "event-listener", + "futures-channel", + "futures-core", + "futures-intrusive", + "futures-io", + "futures-util", + "hashlink", + "hex", + "indexmap 2.11.4", + "log", + "memchr", + "once_cell", + "paste", + "percent-encoding", + "rustls 0.21.12", + "rustls-pemfile", + "serde", + "sha2", + "smallvec", + "sqlformat", + "thiserror 1.0.69", + "tokio", + "tokio-stream", + "tracing", + "url", + "webpki-roots 0.25.4", +] + +[[package]] +name = "sqlx-macros" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ea40e2345eb2faa9e1e5e326db8c34711317d2b5e08d0d5741619048a803127" +dependencies = [ + "proc-macro2", + "quote", + "sqlx-core", + "sqlx-macros-core", + "syn 1.0.109", +] + +[[package]] +name = "sqlx-macros-core" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5833ef53aaa16d860e92123292f1f6a3d53c34ba8b1969f152ef1a7bb803f3c8" +dependencies = [ + "dotenvy", + "either", + "heck 0.4.1", + "hex", + "once_cell", + "proc-macro2", + "quote", + "serde", + "serde_json", + "sha2", + "sqlx-core", + "sqlx-mysql", + "syn 1.0.109", + "tempfile", + "tokio", + "url", ] [[package]] -name = "soup2-sys" -version = "0.2.0" +name = "sqlx-mysql" +version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "009ef427103fcb17f802871647a7fa6c60cbb654b4c4e4c0ac60a31c5f6dc9cf" +checksum = "1ed31390216d20e538e447a7a9b959e06ed9fc51c37b514b46eb758016ecd418" dependencies = [ - "bitflags 1.3.2", - "gio-sys", - "glib-sys", - "gobject-sys", - "libc", - "system-deps 5.0.0", + "atoi", + "base64 0.21.7", + "bitflags 2.9.4", + "byteorder", + "bytes", + "crc", + "digest", + "dotenvy", + "either", + "futures-channel", + "futures-core", + "futures-io", + "futures-util", + "generic-array", + "hex", + "hkdf", + "hmac", + "itoa 1.0.15", + "log", + "md-5", + "memchr", + "once_cell", + "percent-encoding", + "rand 0.8.5", + "rsa", + "serde", + "sha1", + "sha2", + "smallvec", + "sqlx-core", + "stringprep", + "thiserror 1.0.69", + "tracing", + "whoami", ] [[package]] @@ -3286,6 +4719,23 @@ dependencies = [ "quote", ] +[[package]] +name = "stringprep" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b4df3d392d81bd458a8a621b8bffbd2302a12ffe288a9d931670948749463b1" +dependencies = [ + "unicode-bidi", + "unicode-normalization", + "unicode-properties", +] + +[[package]] +name = "strsim" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" + [[package]] name = "strsim" version = "0.11.1" @@ -3326,6 +4776,15 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" +[[package]] +name = "sync_wrapper" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263" +dependencies = [ + "futures-core", +] + [[package]] name = "synstructure" version = "0.13.2" @@ -3479,7 +4938,7 @@ dependencies = [ "glob", "gtk", "heck 0.5.0", - "http", + "http 0.2.12", "ignore", "indexmap 1.9.3", "log", @@ -3491,7 +4950,7 @@ dependencies = [ "rand 0.8.5", "raw-window-handle", "regex", - "reqwest", + "reqwest 0.11.27", "rfd", "semver", "serde", @@ -3505,7 +4964,7 @@ dependencies = [ "tauri-runtime-wry", "tauri-utils", "tempfile", - "thiserror", + "thiserror 1.0.69", "tokio", "url", "uuid", @@ -3553,7 +5012,7 @@ dependencies = [ "serde_json", "sha2", "tauri-utils", - "thiserror", + "thiserror 1.0.69", "time", "uuid", "walkdir", @@ -3580,14 +5039,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8066855882f00172935e3fa7d945126580c34dcbabab43f5d4f0c2398a67d47b" dependencies = [ "gtk", - "http", + "http 0.2.12", "http-range", "rand 0.8.5", "raw-window-handle", "serde", "serde_json", "tauri-utils", - "thiserror", + "thiserror 1.0.69", "url", "uuid", "webview2-com", @@ -3638,7 +5097,7 @@ dependencies = [ "serde", "serde_json", "serde_with", - "thiserror", + "thiserror 1.0.69", "url", "walkdir", "windows-version", @@ -3699,7 +5158,16 @@ version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" dependencies = [ - "thiserror-impl", + "thiserror-impl 1.0.69", +] + +[[package]] +name = "thiserror" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" +dependencies = [ + "thiserror-impl 2.0.18", ] [[package]] @@ -3713,6 +5181,17 @@ dependencies = [ "syn 2.0.106", ] +[[package]] +name = "thiserror-impl" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.106", +] + [[package]] name = "thread_local" version = "1.1.9" @@ -3789,6 +5268,39 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" +[[package]] +name = "tokenizers" +version = "0.15.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3dd47962b0ba36e7fd33518fbf1754d136fd1474000162bbf2a8b5fcb2d3654d" +dependencies = [ + "aho-corasick", + "clap", + "derive_builder", + "esaxx-rs", + "getrandom 0.2.16", + "indicatif", + "itertools 0.12.1", + "lazy_static", + "log", + "macro_rules_attribute", + "monostate", + "onig", + "paste", + "rand 0.8.5", + "rayon", + "rayon-cond", + "regex", + "regex-syntax", + "serde", + "serde_json", + "spm_precompiled", + "thiserror 1.0.69", + "unicode-normalization-alignments", + "unicode-segmentation", + "unicode_categories", +] + [[package]] name = "tokio" version = "1.47.1" @@ -3830,6 +5342,37 @@ dependencies = [ "tokio", ] +[[package]] +name = "tokio-rustls" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081" +dependencies = [ + "rustls 0.21.12", + "tokio", +] + +[[package]] +name = "tokio-rustls" +version = "0.26.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1729aa945f29d91ba541258c8df89027d5792d85a8841fb65e8bf0f4ede4ef61" +dependencies = [ + "rustls 0.23.37", + "tokio", +] + +[[package]] +name = "tokio-stream" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32da49809aab5c3bc678af03902d4ccddea2a87d028d86392a4b1560c6906c70" +dependencies = [ + "futures-core", + "pin-project-lite", + "tokio", +] + [[package]] name = "tokio-util" version = "0.7.16" @@ -3918,6 +5461,45 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801" +[[package]] +name = "tower" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebe5ef63511595f1344e2d5cfa636d973292adc0eec1f0ad45fae9f0851ab1d4" +dependencies = [ + "futures-core", + "futures-util", + "pin-project-lite", + "sync_wrapper 1.0.2", + "tokio", + "tower-layer", + "tower-service", +] + +[[package]] +name = "tower-http" +version = "0.6.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8" +dependencies = [ + "bitflags 2.9.4", + "bytes", + "futures-util", + "http 1.4.0", + "http-body 1.0.1", + "iri-string", + "pin-project-lite", + "tower", + "tower-layer", + "tower-service", +] + +[[package]] +name = "tower-layer" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e" + [[package]] name = "tower-service" version = "0.3.3" @@ -3930,6 +5512,7 @@ version = "0.1.41" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" dependencies = [ + "log", "pin-project-lite", "tracing-attributes", "tracing-core", @@ -4006,6 +5589,12 @@ version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" +[[package]] +name = "unicode-bidi" +version = "0.3.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c1cb5db39152898a79168971543b1cb5020dff7fe43c8dc468b0885f5e29df5" + [[package]] name = "unicode-ident" version = "1.0.19" @@ -4021,12 +5610,94 @@ dependencies = [ "tinyvec", ] +[[package]] +name = "unicode-normalization-alignments" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43f613e4fa046e69818dd287fdc4bc78175ff20331479dab6e1b0f98d57062de" +dependencies = [ + "smallvec", +] + +[[package]] +name = "unicode-properties" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7df058c713841ad818f1dc5d3fd88063241cc61f49f5fbea4b951e8cf5a8d71d" + [[package]] name = "unicode-segmentation" version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" +[[package]] +name = "unicode-width" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" + +[[package]] +name = "unicode_categories" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e" + +[[package]] +name = "untrusted" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" + +[[package]] +name = "ureq" +version = "2.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02d1a66277ed75f640d608235660df48c8e3c19f3b4edb6a263315626cc3c01d" +dependencies = [ + "base64 0.22.1", + "flate2", + "log", + "once_cell", + "rustls 0.23.37", + "rustls-pki-types", + "serde", + "serde_json", + "socks", + "url", + "webpki-roots 0.26.11", +] + +[[package]] +name = "ureq" +version = "3.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdc97a28575b85cfedf2a7e7d3cc64b3e11bd8ac766666318003abbacc7a21fc" +dependencies = [ + "base64 0.22.1", + "der", + "log", + "native-tls", + "percent-encoding", + "rustls-pki-types", + "socks", + "ureq-proto", + "utf-8", + "webpki-root-certs", +] + +[[package]] +name = "ureq-proto" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d81f9efa9df032be5934a46a068815a10a042b494b6a58cb0a1a97bb5467ed6f" +dependencies = [ + "base64 0.22.1", + "http 1.4.0", + "httparse", + "log", +] + [[package]] name = "url" version = "2.5.7" @@ -4057,6 +5728,12 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + [[package]] name = "uuid" version = "1.18.1" @@ -4168,6 +5845,12 @@ dependencies = [ "wit-bindgen", ] +[[package]] +name = "wasite" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8dad83b4f25e74f184f64c43b150b91efe7647395b42289f38e50566d82855b" + [[package]] name = "wasm-bindgen" version = "0.2.104" @@ -4263,6 +5946,16 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "web-time" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + [[package]] name = "webkit2gtk" version = "0.18.2" @@ -4310,6 +6003,39 @@ dependencies = [ "system-deps 6.2.2", ] +[[package]] +name = "webpki-root-certs" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "804f18a4ac2676ffb4e8b5b5fa9ae38af06df08162314f96a68d2a363e21a8ca" +dependencies = [ + "rustls-pki-types", +] + +[[package]] +name = "webpki-roots" +version = "0.25.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f20c57d8d7db6d3b86154206ae5d8fba62dd39573114de97c2cb0578251f8e1" + +[[package]] +name = "webpki-roots" +version = "0.26.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "521bc38abb08001b01866da9f51eb7c5d647a19260e00054a8c7fd5f9e57f7a9" +dependencies = [ + "webpki-roots 1.0.6", +] + +[[package]] +name = "webpki-roots" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22cfaf3c063993ff62e73cb4311efde4db1efb31ab78a3e5c457939ad5cc0bed" +dependencies = [ + "rustls-pki-types", +] + [[package]] name = "webview2-com" version = "0.19.1" @@ -4342,7 +6068,7 @@ dependencies = [ "regex", "serde", "serde_json", - "thiserror", + "thiserror 1.0.69", "windows 0.39.0", "windows-bindgen", "windows-metadata", @@ -4354,6 +6080,16 @@ version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a751b3277700db47d3e574514de2eced5e54dc8a5436a3bf7a0b248b2cee16f3" +[[package]] +name = "whoami" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d4a4db5077702ca3015d3d02d74974948aba2ad9e12ab7df718ee64ccd7e97d" +dependencies = [ + "libredox", + "wasite", +] + [[package]] name = "winapi" version = "0.3.9" @@ -4936,7 +6672,7 @@ dependencies = [ "glib", "gtk", "html5ever", - "http", + "http 0.2.12", "kuchikiki", "libc", "log", @@ -4948,7 +6684,7 @@ dependencies = [ "sha2", "soup2", "tao", - "thiserror", + "thiserror 1.0.69", "url", "webkit2gtk", "webkit2gtk-sys", @@ -5059,6 +6795,12 @@ dependencies = [ "synstructure", ] +[[package]] +name = "zeroize" +version = "1.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0" + [[package]] name = "zerotrie" version = "0.2.2" @@ -5103,3 +6845,19 @@ dependencies = [ "crossbeam-utils", "flate2", ] + +[[package]] +name = "zip" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9cc23c04387f4da0374be4533ad1208cbb091d5c11d070dfef13676ad6497164" +dependencies = [ + "arbitrary", + "crc32fast", + "crossbeam-utils", + "displaydoc", + "flate2", + "indexmap 2.11.4", + "num_enum 0.7.5", + "thiserror 1.0.69", +] diff --git a/src-tauri/Cargo.toml b/src-tauri/Cargo.toml index e6ed5b7..5289c3d 100644 --- a/src-tauri/Cargo.toml +++ b/src-tauri/Cargo.toml @@ -45,7 +45,9 @@ regex = "1.0" sha2 = "0.10" # 文件系统操作 walkdir = "2.0" -# 嵌入式向量数据库 (现使用 SeekDB via Python subprocess) +# seekdb rust sdk +seekdb-rs = { path = "../../seekdb-rs", default-features = false, features = ["embedded"] } +async-trait = "0.1" # rusqlite = { version = "0.29", features = ["bundled"] } # 序列化 bincode = "1.3" @@ -58,6 +60,9 @@ sha1 = "0.10" url = "2.4" urlencoding = "2.1" +[dev-dependencies] +tempfile = "3" + [features] # This feature is used for production builds or when `devPath` points to the filesystem # DO NOT REMOVE!! diff --git a/src-tauri/python/__pycache__/seekdb_bridge.cpython-310.pyc b/src-tauri/python/__pycache__/seekdb_bridge.cpython-310.pyc deleted file mode 100644 index 3d746bc..0000000 Binary files a/src-tauri/python/__pycache__/seekdb_bridge.cpython-310.pyc and /dev/null differ diff --git a/src-tauri/python/install_deps.sh b/src-tauri/python/install_deps.sh deleted file mode 100755 index fa0e492..0000000 --- a/src-tauri/python/install_deps.sh +++ /dev/null @@ -1,73 +0,0 @@ -#!/bin/bash -# Install Python dependencies for SeekDB bridge -# Note: The application will automatically create a virtual environment and install dependencies. -# This script is provided for manual installation if needed. - -set -e - -echo "🐍 Installing Python dependencies for SeekDB..." -echo - -# Check if Python 3 is installed -if ! command -v python3 &> /dev/null; then - echo "❌ Error: Python 3 is not installed" - echo "Please install Python 3 first:" - echo " Ubuntu/Debian: sudo apt install python3 python3-venv python3-pip" - echo " macOS: brew install python3" - echo " Windows: Download from python.org" - exit 1 -fi - -echo "✓ Python 3 found: $(python3 --version)" -echo - -# Determine the application data directory -if [[ "$OSTYPE" == "darwin"* ]]; then - # macOS - APP_DATA_DIR="$HOME/Library/Application Support/com.mine-kb.app" -elif [[ "$OSTYPE" == "msys" || "$OSTYPE" == "win32" ]]; then - # Windows - APP_DATA_DIR="$APPDATA/com.mine-kb.app" -else - # Linux - APP_DATA_DIR="$HOME/.local/share/com.mine-kb.app" -fi - -VENV_DIR="$APP_DATA_DIR/venv" - -echo "📁 Virtual environment directory: $VENV_DIR" -echo - -# Create virtual environment if it doesn't exist -if [ ! -d "$VENV_DIR" ]; then - echo "🔧 Creating Python virtual environment..." - python3 -m venv "$VENV_DIR" - echo "✅ Virtual environment created" -else - echo "✅ Virtual environment already exists" -fi - -# Activate virtual environment -echo "🔌 Activating virtual environment..." -if [[ "$OSTYPE" == "msys" || "$OSTYPE" == "win32" ]]; then - source "$VENV_DIR/Scripts/activate" -else - source "$VENV_DIR/bin/activate" -fi - -# Upgrade pip -echo "⬆️ Upgrading pip..." -pip install --upgrade pip -i https://pypi.tuna.tsinghua.edu.cn/simple/ - -# Install seekdb -echo "📦 Installing seekdb package..." -pip install seekdb==0.0.1.dev4 -i https://pypi.tuna.tsinghua.edu.cn/simple/ - -echo -echo "✅ All dependencies installed successfully!" -echo -echo "To verify installation, run:" -echo " $VENV_DIR/bin/python3 -c 'import seekdb; print(\"SeekDB OK\")'" -echo -echo "Note: The application will use this virtual environment automatically." - diff --git a/src-tauri/python/migrate_sqlite_to_seekdb.py b/src-tauri/python/migrate_sqlite_to_seekdb.py deleted file mode 100755 index 587dbc4..0000000 --- a/src-tauri/python/migrate_sqlite_to_seekdb.py +++ /dev/null @@ -1,330 +0,0 @@ -#!/usr/bin/env python3 -""" -Migration script to transfer data from SQLite to SeekDB - -Usage: - python migrate_sqlite_to_seekdb.py - -Example: - python migrate_sqlite_to_seekdb.py mine_kb.db ./oblite.db -""" - -import sys -import sqlite3 -import seekdb -import json -import struct -from typing import List, Tuple - -def read_blob_as_f64_array(blob_data: bytes) -> List[float]: - """ - Convert binary blob (bincode serialized Vec) to list of floats - This assumes the blob is a bincode-serialized Rust Vec - """ - try: - # bincode format for Vec: 8-byte length + array of f64s - if len(blob_data) < 8: - return [] - - # Read length (u64, little-endian) - length = struct.unpack(' len(blob_data): - break - value = struct.unpack(' 1536: - # Truncate - embedding_list = embedding_list[:1536] - - # Convert to JSON array string - embedding_str = "[" + ",".join(str(v) for v in embedding_list) + "]" - - return (id, project_id, document_id, chunk_index, content, - embedding_str, metadata, created_at) - - migrate_table( - sqlite_cursor, - seekdb_cursor, - "vector_documents", - ["id", "project_id", "document_id", "chunk_index", "content", - "embedding", "metadata", "created_at"], - transform_row=transform_vector_row - ) - - print() - - # Migrate conversations - migrate_table( - sqlite_cursor, - seekdb_cursor, - "conversations", - ["id", "project_id", "title", "created_at", "updated_at", "message_count"] - ) - - print() - - # Migrate messages - migrate_table( - sqlite_cursor, - seekdb_cursor, - "messages", - ["id", "conversation_id", "role", "content", "created_at", "sources"] - ) - - print() - print("💾 Committing changes...") - try: - seekdb_conn.commit() - print(" ✓ Changes committed") - except Exception as e: - print(f" ✗ Failed to commit: {e}") - sqlite_conn.close() - seekdb_conn.close() - return 1 - - # Close connections - sqlite_conn.close() - seekdb_conn.close() - - print() - print("="*60) - print("✅ Migration completed successfully!") - print("="*60) - - return 0 - -if __name__ == "__main__": - if len(sys.argv) < 3: - print("Usage: python migrate_sqlite_to_seekdb.py [db_name]") - print() - print("Example:") - print(" python migrate_sqlite_to_seekdb.py mine_kb.db ./oblite.db") - print(" python migrate_sqlite_to_seekdb.py mine_kb.db ./oblite.db custom_db") - sys.exit(1) - - sqlite_path = sys.argv[1] - seekdb_path = sys.argv[2] - db_name = sys.argv[3] if len(sys.argv) > 3 else "mine_kb" - - exit_code = migrate_sqlite_to_seekdb(sqlite_path, seekdb_path, db_name) - sys.exit(exit_code) - diff --git a/src-tauri/python/requirements.txt b/src-tauri/python/requirements.txt deleted file mode 100644 index a0e980a..0000000 --- a/src-tauri/python/requirements.txt +++ /dev/null @@ -1,6 +0,0 @@ -# Python dependencies for SeekDB bridge -# Install with: pip install -r requirements.txt - -# SeekDB embedded database -seekdb==0.0.1.dev4 - diff --git a/src-tauri/python/seekdb_bridge.py b/src-tauri/python/seekdb_bridge.py deleted file mode 100755 index 69b4d3a..0000000 --- a/src-tauri/python/seekdb_bridge.py +++ /dev/null @@ -1,354 +0,0 @@ -#!/usr/bin/env python3 -""" -SeekDB Bridge - Python subprocess that handles database operations via JSON protocol -Communicates with Rust via stdin/stdout using newline-delimited JSON -""" - -import sys -import json -import traceback -import os -from typing import Any, Dict, List, Optional -from datetime import datetime, date -from decimal import Decimal - -# 尝试导入 seekdb,如果失败则提供详细的错误信息 -try: - import seekdb -except ImportError as e: - print(f"[SeekDB Bridge] ❌ 无法导入 seekdb 模块", file=sys.stderr) - print(f"[SeekDB Bridge] 错误详情: {e}", file=sys.stderr) - print(f"[SeekDB Bridge] ", file=sys.stderr) - print(f"[SeekDB Bridge] 诊断信息:", file=sys.stderr) - print(f"[SeekDB Bridge] - Python 版本: {sys.version}", file=sys.stderr) - print(f"[SeekDB Bridge] - Python 路径: {sys.executable}", file=sys.stderr) - print(f"[SeekDB Bridge] - PYTHONPATH: {os.environ.get('PYTHONPATH', '(未设置)')}", file=sys.stderr) - print(f"[SeekDB Bridge] - sys.path: {sys.path}", file=sys.stderr) - print(f"[SeekDB Bridge] ", file=sys.stderr) - print(f"[SeekDB Bridge] 解决方法:", file=sys.stderr) - print(f"[SeekDB Bridge] 1. 确保 seekdb 包已安装", file=sys.stderr) - print(f"[SeekDB Bridge] 2. 通过 pip 安装: python -m pip install seekdb==0.0.1.dev4 -i https://pypi.tuna.tsinghua.edu.cn/simple", file=sys.stderr) - print(f"[SeekDB Bridge] 3. 检查虚拟环境是否正确激活", file=sys.stderr) - sys.exit(1) -except Exception as e: - print(f"[SeekDB Bridge] ❌ 加载 seekdb 模块时发生未知错误", file=sys.stderr) - print(f"[SeekDB Bridge] 错误详情: {e}", file=sys.stderr) - print(f"[SeekDB Bridge] Traceback:", file=sys.stderr) - traceback.print_exc(file=sys.stderr) - sys.exit(1) - -class SeekDBBridge: - def __init__(self): - self.conn = None - self.cursor = None - self.db_path = None - self.db_name = None - - def log(self, msg: str): - """Log to stderr (stdout is reserved for responses)""" - print(f"[SeekDB Bridge] {msg}", file=sys.stderr, flush=True) - - def convert_value_for_json(self, value: Any) -> Any: - """Convert Python objects to JSON-serializable format""" - if value is None: - return None - elif isinstance(value, (datetime, date)): - # Convert datetime/date to ISO format string - return value.isoformat() - elif isinstance(value, Decimal): - # Convert Decimal to float - return float(value) - elif isinstance(value, bytes): - # Convert bytes to base64 string - import base64 - return base64.b64encode(value).decode('utf-8') - elif isinstance(value, (list, tuple)): - # Recursively convert list/tuple items - return [self.convert_value_for_json(v) for v in value] - elif isinstance(value, dict): - # Recursively convert dict values - return {k: self.convert_value_for_json(v) for k, v in value.items()} - else: - # Return as-is for basic types (str, int, float, bool) - return value - - def format_sql_value(self, value: Any) -> str: - """Format a Python value to SQL string representation for ObLite""" - if value is None: - return "NULL" - elif isinstance(value, bool): - return "1" if value else "0" - elif isinstance(value, (int, float)): - return str(value) - elif isinstance(value, str): - # Escape single quotes in strings - escaped = value.replace("'", "''") - return f"'{escaped}'" - elif isinstance(value, list): - # For vector/array values - return str(value) - else: - # For other types, convert to string and quote - escaped = str(value).replace("'", "''") - return f"'{escaped}'" - - def build_sql_with_values(self, sql: str, values: List[Any]) -> str: - """ - Replace ? placeholders in SQL with actual values - ObLite doesn't support parameterized queries, so we embed values directly - """ - if not values: - return sql - - # Replace ? with actual values - result = sql - for value in values: - formatted_value = self.format_sql_value(value) - # Replace the first occurrence of ? - result = result.replace("?", formatted_value, 1) - - return result - - def send_response(self, response: Dict[str, Any]): - """Send JSON response to stdout""" - json.dump(response, sys.stdout) - sys.stdout.write('\n') - sys.stdout.flush() - - def send_success(self, data: Any = None): - """Send success response""" - self.send_response({"status": "success", "data": data}) - - def send_error(self, error: str, details: str = ""): - """Send error response""" - self.send_response({ - "status": "error", - "error": error, - "details": details - }) - - def handle_init(self, params: Dict[str, Any]): - """Initialize SeekDB connection""" - try: - db_path = params.get("db_path", "./seekdb.db") - db_name = params.get("db_name", "mine_kb") - - self.log(f"Initializing SeekDB: path={db_path}, db={db_name}") - - # Open database instance - seekdb.open(db_path) - - # Always ensure database exists before connecting - # Note: In seekdb 0.0.1.dev4, connect() will validate database existence - try: - self.log(f"Ensuring database '{db_name}' exists...") - # Connect to default "test" database to create new database - # SeekDB 0.0.1.dev4: connects to "test" by default when unspecified - admin_conn = seekdb.connect("test") - admin_cursor = admin_conn.cursor() - admin_cursor.execute(f"CREATE DATABASE IF NOT EXISTS `{db_name}`") - admin_conn.commit() - admin_conn.close() - self.log(f"✅ Database '{db_name}' created successfully") - except Exception as create_error: - self.log(f"❌ Error: Failed to create database: {create_error}") - self.log(f"Traceback: {traceback.format_exc()}") - # If database creation fails, raise exception to prevent connecting to non-existent database - raise Exception(f"Cannot create database '{db_name}': {create_error}") - - # Now connect to the database - self.conn = seekdb.connect(db_name) - self.log(f"✅ Connected to database '{db_name}'") - - self.cursor = self.conn.cursor() - self.db_path = db_path - self.db_name = db_name - - # Ensure we're using the correct database - try: - self.cursor.execute(f"USE `{db_name}`") - self.log(f"Switched to database '{db_name}'") - except Exception as use_error: - self.log(f"Warning: Failed to execute USE {db_name}: {use_error}") - # This might not be supported, continue anyway - - self.log("SeekDB initialized successfully") - self.send_success({"db_path": db_path, "db_name": db_name}) - - except Exception as e: - self.log(f"Init error: {e}") - self.log(f"Traceback: {traceback.format_exc()}") - error_details = ( - f"数据库初始化失败\n" - f"路径: {params.get('db_path', './oblite.db')}\n" - f"数据库名: {params.get('db_name', 'mine_kb')}\n" - f"错误: {str(e)}" - ) - self.send_error("InitError", error_details) - - def handle_execute(self, params: Dict[str, Any]): - """Execute SQL statement (INSERT, UPDATE, DELETE, CREATE, etc.)""" - try: - sql = params["sql"] - values = params.get("values", []) - - # ObLite doesn't support parameterized queries, embed values directly - final_sql = self.build_sql_with_values(sql, values) - - self.log(f"Executing: {final_sql[:200]}...") - - # ObLite execute() only accepts one argument - self.cursor.execute(final_sql) - - rows_affected = self.cursor.rowcount if hasattr(self.cursor, 'rowcount') else 0 - self.send_success({"rows_affected": rows_affected}) - - except Exception as e: - self.log(f"Execute error: {e}") - self.send_error("ExecuteError", str(e)) - - def handle_query(self, params: Dict[str, Any]): - """Execute SELECT query and return results""" - try: - sql = params["sql"] - values = params.get("values", []) - - # ObLite doesn't support parameterized queries, embed values directly - final_sql = self.build_sql_with_values(sql, values) - - self.log(f"Querying: {final_sql[:200]}...") - - # ObLite execute() only accepts one argument - self.cursor.execute(final_sql) - - rows = self.cursor.fetchall() - - # Convert rows to list of lists, handling datetime and other special types - if rows: - result = [] - for row in rows: - converted_row = [self.convert_value_for_json(val) for val in row] - result.append(converted_row) - else: - result = [] - - self.log(f"Query returned {len(result)} rows") - self.send_success({"rows": result}) - - except Exception as e: - self.log(f"Query error: {e}") - self.log(f"Traceback: {traceback.format_exc()}") - self.send_error("QueryError", str(e)) - - def handle_query_one(self, params: Dict[str, Any]): - """Execute SELECT query and return first row""" - try: - sql = params["sql"] - values = params.get("values", []) - - # ObLite doesn't support parameterized queries, embed values directly - final_sql = self.build_sql_with_values(sql, values) - - # ObLite execute() only accepts one argument - self.cursor.execute(final_sql) - - row = self.cursor.fetchone() - - # Convert row values, handling datetime and other special types - if row: - result = [self.convert_value_for_json(val) for val in row] - else: - result = None - - self.send_success({"row": result}) - - except Exception as e: - self.log(f"Query one error: {e}") - self.log(f"Traceback: {traceback.format_exc()}") - self.send_error("QueryOneError", str(e)) - - def handle_commit(self, params: Dict[str, Any]): - """Commit current transaction""" - try: - self.log("Committing transaction") - self.conn.commit() - self.send_success() - - except Exception as e: - self.log(f"Commit error: {e}") - self.send_error("CommitError", str(e)) - - def handle_rollback(self, params: Dict[str, Any]): - """Rollback current transaction""" - try: - self.log("Rolling back transaction") - self.conn.rollback() - self.send_success() - - except Exception as e: - self.log(f"Rollback error: {e}") - self.send_error("RollbackError", str(e)) - - def handle_ping(self, params: Dict[str, Any]): - """Health check""" - self.send_success({"message": "pong"}) - - def handle_command(self, command: Dict[str, Any]): - """Route command to appropriate handler""" - cmd_type = command.get("command") - params = command.get("params", {}) - - handlers = { - "init": self.handle_init, - "execute": self.handle_execute, - "query": self.handle_query, - "query_one": self.handle_query_one, - "commit": self.handle_commit, - "rollback": self.handle_rollback, - "ping": self.handle_ping, - } - - handler = handlers.get(cmd_type) - if handler: - handler(params) - else: - self.send_error("UnknownCommand", f"Unknown command: {cmd_type}") - - def run(self): - """Main loop - read commands from stdin and execute them""" - self.log("SeekDB Bridge started, waiting for commands...") - - try: - for line in sys.stdin: - line = line.strip() - if not line: - continue - - try: - command = json.loads(line) - self.handle_command(command) - - except json.JSONDecodeError as e: - self.log(f"JSON decode error: {e}") - self.send_error("JSONError", str(e)) - - except Exception as e: - self.log(f"Unexpected error: {e}") - self.log(traceback.format_exc()) - self.send_error("InternalError", str(e)) - - except KeyboardInterrupt: - self.log("Received interrupt signal, shutting down...") - - finally: - if self.conn: - try: - self.conn.close() - self.log("Database connection closed") - except: - pass - -if __name__ == "__main__": - bridge = SeekDBBridge() - bridge.run() - diff --git a/src-tauri/python/test_datetime_fix.py b/src-tauri/python/test_datetime_fix.py deleted file mode 100644 index b020801..0000000 --- a/src-tauri/python/test_datetime_fix.py +++ /dev/null @@ -1,56 +0,0 @@ -#!/usr/bin/env python3 -""" -测试 datetime 序列化修复 -""" - -import sys -import json -from datetime import datetime, date -from decimal import Decimal - -# 导入 SeekDBBridge 类 -sys.path.insert(0, '.') -from seekdb_bridge import SeekDBBridge - -def test_datetime_conversion(): - """测试 datetime 转换功能""" - bridge = SeekDBBridge() - - # 测试用例 - test_cases = [ - ("datetime", datetime(2025, 1, 29, 10, 30, 45)), - ("date", date(2025, 1, 29)), - ("decimal", Decimal("123.456")), - ("string", "test string"), - ("int", 42), - ("float", 3.14), - ("bool", True), - ("none", None), - ("list", [1, datetime(2025, 1, 29), "text"]), - ("dict", {"date": datetime(2025, 1, 29), "value": 100}), - ] - - print("测试 datetime 转换功能") - print("=" * 60) - - all_passed = True - for name, value in test_cases: - try: - converted = bridge.convert_value_for_json(value) - json_str = json.dumps(converted) # 尝试序列化为 JSON - print(f"✅ {name:12} | {str(value)[:30]:30} -> {json_str[:40]}") - except Exception as e: - print(f"❌ {name:12} | {str(value)[:30]:30} -> ERROR: {e}") - all_passed = False - - print("=" * 60) - if all_passed: - print("✅ 所有测试通过!") - return 0 - else: - print("❌ 部分测试失败") - return 1 - -if __name__ == "__main__": - exit(test_datetime_conversion()) - diff --git a/src-tauri/python/test_seekdb.py b/src-tauri/python/test_seekdb.py deleted file mode 100755 index b73064d..0000000 --- a/src-tauri/python/test_seekdb.py +++ /dev/null @@ -1,177 +0,0 @@ -#!/usr/bin/env python3 -""" -Simple test script to verify SeekDB installation and basic operations -""" - -import sys - -def test_import(): - """Test if seekdb module can be imported""" - print("Testing seekdb import...", end=" ") - try: - import seekdb - print("✅ OK") - return True - except ImportError as e: - print(f"❌ FAILED: {e}") - print("\nPlease install SeekDB:") - print(" pip install seekdb==0.0.1.dev4 -i https://pypi.tuna.tsinghua.edu.cn/simple/") - return False - -def test_basic_operations(): - """Test basic database operations""" - print("\nTesting basic operations...") - - try: - import seekdb - import tempfile - import os - - # Create temp database - temp_dir = tempfile.mkdtemp() - db_path = os.path.join(temp_dir, "test.db") - - print(f" Creating database at {db_path}...", end=" ") - seekdb.open(db_path) - conn = seekdb.connect("test_db") - cursor = conn.cursor() - print("✅") - - # Create table - print(" Creating table...", end=" ") - cursor.execute("CREATE TABLE test_table (id INT PRIMARY KEY, name VARCHAR(50))") - print("✅") - - # Insert data - print(" Inserting data...", end=" ") - cursor.execute("INSERT INTO test_table VALUES (1, 'Test')") - conn.commit() - print("✅") - - # Query data - print(" Querying data...", end=" ") - cursor.execute("SELECT * FROM test_table") - rows = cursor.fetchall() - assert len(rows) == 1 - assert rows[0][0] == 1 - assert rows[0][1] == 'Test' - print("✅") - - # Close connection - print(" Closing connection...", end=" ") - conn.close() - print("✅") - - # Cleanup - import shutil - shutil.rmtree(temp_dir) - - print("\n✅ All basic operations passed!") - return True - - except Exception as e: - print(f"❌ FAILED: {e}") - import traceback - traceback.print_exc() - return False - -def test_vector_operations(): - """Test vector operations""" - print("\nTesting vector operations...") - - try: - import seekdb - import tempfile - import os - - temp_dir = tempfile.mkdtemp() - db_path = os.path.join(temp_dir, "test_vector.db") - - print(f" Creating vector database...", end=" ") - seekdb.open(db_path) - conn = seekdb.connect("test_vector") - cursor = conn.cursor() - print("✅") - - # Create table with vector column - print(" Creating table with vector column...", end=" ") - cursor.execute(""" - CREATE TABLE test_vectors ( - id INT PRIMARY KEY, - embedding vector(3) - ) - """) - print("✅") - - # Create vector index - print(" Creating vector index...", end=" ") - try: - cursor.execute(""" - CREATE VECTOR INDEX idx_test ON test_vectors(embedding) - WITH (distance=l2, type=hnsw, lib=vsag) - """) - print("✅") - except Exception as e: - print(f"⚠️ SKIPPED: {e}") - - # Insert vector data - print(" Inserting vector data...", end=" ") - cursor.execute("INSERT INTO test_vectors VALUES (1, '[1.0, 2.0, 3.0]')") - cursor.execute("INSERT INTO test_vectors VALUES (2, '[2.0, 3.0, 4.0]')") - conn.commit() - print("✅") - - # Vector similarity search - print(" Testing vector search...", end=" ") - cursor.execute(""" - SELECT id, l2_distance(embedding, '[1.0, 2.0, 3.0]') as distance - FROM test_vectors - ORDER BY distance - LIMIT 1 - """) - rows = cursor.fetchall() - assert len(rows) == 1 - assert rows[0][0] == 1 # Should return the exact match - print("✅") - - conn.close() - - # Cleanup - import shutil - shutil.rmtree(temp_dir) - - print("\n✅ All vector operations passed!") - return True - - except Exception as e: - print(f"❌ FAILED: {e}") - import traceback - traceback.print_exc() - return False - -def main(): - """Run all tests""" - print("="*60) - print("SeekDB Installation Test") - print("="*60) - - # Test import - if not test_import(): - sys.exit(1) - - # Test basic operations - if not test_basic_operations(): - sys.exit(1) - - # Test vector operations - if not test_vector_operations(): - sys.exit(1) - - print("\n" + "="*60) - print("✅ All tests passed! SeekDB is ready to use.") - print("="*60) - sys.exit(0) - -if __name__ == "__main__": - main() - diff --git a/src-tauri/src/commands/chat.rs b/src-tauri/src/commands/chat.rs index 4f0205b..40d2b78 100644 --- a/src-tauri/src/commands/chat.rs +++ b/src-tauri/src/commands/chat.rs @@ -253,7 +253,7 @@ pub async fn send_message( let document_service = state.document_service(); let document_service_guard = document_service.lock().await; - match document_service_guard.search_similar_chunks(&project_id.to_string(), &request.content, 5).await { + match document_service_guard.search_similar_chunks_for_chat(&project_id.to_string(), &request.content, 5).await { Ok(chunks) => { log::info!("✅ [CHAT] SeekDB向量检索成功,找到 {} 个相关文档块", chunks.len()); @@ -430,8 +430,9 @@ pub async fn send_message( let document_service = state.document_service(); let doc_service_guard = document_service.lock().await; let db = doc_service_guard.get_vector_db(); - let mut db_guard = db.lock().await; - db_guard.save_message(&message_clone) + let adapter = db.lock().await.clone(); + adapter.save_message(&message_clone) + .await .map_err(|e| { log::error!("❌ [CHAT] 更新消息 sources 失败: {}", e); format!("更新消息 sources 失败: {}", e) diff --git a/src-tauri/src/commands/documents.rs b/src-tauri/src/commands/documents.rs index 3809cfa..ac7d4cf 100644 --- a/src-tauri/src/commands/documents.rs +++ b/src-tauri/src/commands/documents.rs @@ -150,25 +150,36 @@ pub async fn upload_documents( // 更新项目的文档数量 { - // 先计算文档数量(从数据库查询,确保是累加的总数) + // 从数据库查询实际文档数(vector_documents 中该项目的 DISTINCT document_id 数) let doc_count = { let doc_service = state.document_service(); let doc_service_guard = doc_service.lock().await; doc_service_guard.count_documents(Some(project_id)).await }; - log::info!("📊 项目 {} 的文档总数: {}", project_id, doc_count); - - // 然后更新项目 let project_service = state.project_service(); let mut project_service_guard = project_service.lock().await; if let Some(project) = project_service_guard.get_project_mut(project_id) { - project.document_count = doc_count as u32; + let previous_count = project.document_count as usize; + // 若 DB 统计为 0 但本批有成功上传,用「原数量 + 本批成功数」兜底,避免界面一直显示 0 + let final_count = if doc_count == 0 && !successful_docs.is_empty() { + let fallback = previous_count + successful_docs.len(); + log::warn!( + "📊 项目 {} 的 DB 文档数为 0,本批成功 {} 个,使用兜底数量: {}", + project_id, + successful_docs.len(), + fallback + ); + fallback + } else { + log::info!("📊 项目 {} 的文档总数: {} (DB)", project_id, doc_count); + doc_count + }; + project.document_count = final_count as u32; project.updated_at = chrono::Utc::now(); - // 保存更新到数据库 let project_clone = project.clone(); - let _ = project_service_guard.save_project_to_db(&project_clone); + let _ = project_service_guard.save_project_to_db(&project_clone).await; } } diff --git a/src-tauri/src/commands/projects.rs b/src-tauri/src/commands/projects.rs index 7d38f05..69df691 100644 --- a/src-tauri/src/commands/projects.rs +++ b/src-tauri/src/commands/projects.rs @@ -50,6 +50,7 @@ pub async fn create_project( let mut project_service = project_service_arc.lock().await; project_service .create_project(request.name.clone(), request.description.clone()) + .await .map_err(|e| format!("创建项目失败: {}", e))? }; @@ -80,9 +81,8 @@ pub async fn create_project( project.document_count = document_count; project.updated_at = chrono::Utc::now(); } - // 保存更新后的项目到数据库 if let Some(project) = project_service.get_project(project_id) { - let _ = project_service.save_project_to_db(project); + let _ = project_service.save_project_to_db(project).await; } project_service .get_project(project_id) @@ -225,6 +225,7 @@ pub async fn delete_project( let mut project_service = project_service_arc.lock().await; project_service .delete_project(project_uuid) + .await .map_err(|e| format!("删除项目失败: {}", e))?; log::info!("项目删除成功: {}", project_id); @@ -258,9 +259,9 @@ pub async fn rename_project( let project_service_arc = state.project_service(); let mut project_service = project_service_arc.lock().await; - // 更新项目名称 project_service .update_project(project_uuid, Some(request.new_name.trim().to_string()), None) + .await .map_err(|e| format!("重命名项目失败: {}", e))?; // 获取更新后的项目信息 diff --git a/src-tauri/src/commands/system.rs b/src-tauri/src/commands/system.rs index b667a65..5cd5415 100644 --- a/src-tauri/src/commands/system.rs +++ b/src-tauri/src/commands/system.rs @@ -1,8 +1,12 @@ use serde::{Deserialize, Serialize}; use tauri::command; +use tauri::State; use tauri::api::dialog::blocking::FileDialogBuilder; use std::path::Path; use std::fs; +use std::io::Write; +use base64::Engine; +use crate::AppDataDirPath; #[derive(Debug, Serialize, Deserialize)] pub struct AppStatusResponse { @@ -28,6 +32,47 @@ pub struct FileInfo { pub size: u64, } +/// 返回当前使用的应用数据目录(与后端一致;前端用于 temp 等路径) +#[command] +pub fn get_app_data_dir(app_data_dir: State<'_, AppDataDirPath>) -> Result { + Ok(app_data_dir.0.clone()) +} + +/// 请求体:前端使用 camelCase(contentBase64),用 serde 对齐 +#[derive(Debug, Deserialize)] +pub struct SaveFileToAppTmpArgs { + pub filename: String, + #[serde(rename = "contentBase64")] + pub content_base64: String, +} + +/// 将文件内容写入应用数据目录下的 tmp,避免前端 fs scope 限制(开发时 CONFIG_DIR 与 $APPDATA 不一致)。 +/// 返回写入后的绝对路径。 +#[command] +pub fn save_file_to_app_tmp( + app_data_dir: State<'_, AppDataDirPath>, + args: SaveFileToAppTmpArgs, +) -> Result { + if args.filename.contains("..") || args.filename.contains('/') || args.filename.contains('\\') { + return Err("filename must not contain path segments".to_string()); + } + let tmp_dir = Path::new(&app_data_dir.0).join("tmp"); + if !tmp_dir.exists() { + fs::create_dir_all(&tmp_dir).map_err(|e| e.to_string())?; + } + let file_path = tmp_dir.join(&args.filename); + let bytes = base64::engine::general_purpose::STANDARD + .decode(&args.content_base64) + .map_err(|e| format!("base64 decode: {}", e))?; + let mut f = fs::File::create(&file_path).map_err(|e| e.to_string())?; + f.write_all(&bytes).map_err(|e| e.to_string())?; + f.sync_all().map_err(|e| e.to_string())?; + file_path + .into_os_string() + .into_string() + .map_err(|_| "path not UTF-8".to_string()) +} + #[command] pub async fn get_app_status() -> Result { // TODO: Implement get app status diff --git a/src-tauri/src/lib.rs b/src-tauri/src/lib.rs index 265d028..3808b05 100644 --- a/src-tauri/src/lib.rs +++ b/src-tauri/src/lib.rs @@ -3,6 +3,9 @@ #![allow(dead_code)] +/// 应用数据目录路径(供前端 get_app_data_dir 使用,保证 temp 等与后端一致) +pub struct AppDataDirPath(pub String); + pub mod commands; pub mod config; pub mod models; diff --git a/src-tauri/src/main.rs b/src-tauri/src/main.rs index c70a219..6ca4645 100644 --- a/src-tauri/src/main.rs +++ b/src-tauri/src/main.rs @@ -4,10 +4,9 @@ use mine_kb::commands::{chat, documents, projects, system, speech, initialization}; use mine_kb::services::app_state::AppState; -use mine_kb::services::python_env::PythonEnv; -use mine_kb::services::seekdb_package::SeekDbPackage; use mine_kb::config::AppConfig; use mine_kb::app_state_wrapper::AppStateWrapper; +use mine_kb::AppDataDirPath; use std::fs; use std::path::PathBuf; use std::sync::Arc; @@ -32,7 +31,7 @@ impl StartupEvent { fn progress(step: u32, message: impl Into) -> Self { Self { step, - total_steps: 3, + total_steps: 2, message: message.into(), status: "progress".to_string(), details: None, @@ -43,7 +42,7 @@ impl StartupEvent { fn progress_with_details(step: u32, message: impl Into, details: impl Into) -> Self { Self { step, - total_steps: 3, + total_steps: 2, message: message.into(), status: "progress".to_string(), details: Some(details.into()), @@ -54,7 +53,7 @@ impl StartupEvent { fn success(step: u32, message: impl Into) -> Self { Self { step, - total_steps: 3, + total_steps: 2, message: message.into(), status: "success".to_string(), details: None, @@ -65,7 +64,7 @@ impl StartupEvent { fn error(message: impl Into, error: impl Into) -> Self { Self { step: 0, - total_steps: 3, + total_steps: 2, message: message.into(), status: "error".to_string(), details: None, @@ -92,98 +91,13 @@ async fn initialize_app_async( log::info!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"); // ============================================================ - // 1. Python 环境和 SeekDB 安装 + // 1. 配置文件加载 // ============================================================ log::info!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"); - log::info!(" 步骤 1/3: 初始化 Python 环境和 SeekDB"); + log::info!(" 步骤 1/2: 加载配置文件"); log::info!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"); - let _ = app_handle.emit_all("startup-progress", StartupEvent::progress(1, "初始化 Python 环境")); - - // 创建 Python 虚拟环境 - let python_env = match PythonEnv::new(&app_data_dir) { - Ok(env) => env, - Err(e) => { - log::error!("Python 环境初始化失败: {}", e); - let _ = app_handle.emit_all("startup-progress", StartupEvent::error( - "Python 环境初始化失败", - format!("{}", e) - )); - return; - } - }; - - if let Err(e) = python_env.ensure_venv() { - log::error!("Python 虚拟环境创建失败: {}", e); - let _ = app_handle.emit_all("startup-progress", StartupEvent::error( - "Python 虚拟环境创建失败", - format!("{}", e) - )); - return; - } - - let _ = app_handle.emit_all("startup-progress", StartupEvent::progress(1, "检查 SeekDB 包")); - - // 检查并安装 SeekDB - let seekdb_pkg = SeekDbPackage::new(&python_env); - - match seekdb_pkg.is_installed() { - Ok(false) => { - log::info!("📦 SeekDB 未安装,开始安装..."); - let _ = app_handle.emit_all("startup-progress", StartupEvent::progress_with_details( - 1, - "安装 SeekDB", - "首次运行需要下载并安装 SeekDB(约3GB),可能需要几分钟..." - )); - - if let Err(e) = seekdb_pkg.install() { - log::error!("SeekDB 安装失败: {}", e); - let _ = app_handle.emit_all("startup-progress", StartupEvent::error( - "SeekDB 安装失败", - format!("{}", e) - )); - return; - } - } - Ok(true) => { - log::info!("✅ SeekDB 已安装"); - } - Err(e) => { - log::warn!("⚠️ 检查 SeekDB 安装状态失败,尝试安装: {}", e); - if let Err(e) = seekdb_pkg.install() { - log::error!("SeekDB 安装失败: {}", e); - let _ = app_handle.emit_all("startup-progress", StartupEvent::error( - "SeekDB 安装失败", - format!("{}", e) - )); - return; - } - } - } - - if let Err(e) = seekdb_pkg.verify() { - log::error!("SeekDB 验证失败: {}", e); - let _ = app_handle.emit_all("startup-progress", StartupEvent::error( - "SeekDB 验证失败", - format!("{}", e) - )); - return; - } - - let python_path = python_env.get_python_executable(); - let python_path_str = python_path.to_str().expect("无法转换 Python 路径"); - log::info!("✅ Python 可执行文件: {}", python_path_str); - - let _ = app_handle.emit_all("startup-progress", StartupEvent::success(1, "Python 环境和 SeekDB 准备完成")); - - // ============================================================ - // 2. 配置文件加载 - // ============================================================ - log::info!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"); - log::info!(" 步骤 2/3: 加载配置文件"); - log::info!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"); - - let _ = app_handle.emit_all("startup-progress", StartupEvent::progress(2, "加载配置文件")); + let _ = app_handle.emit_all("startup-progress", StartupEvent::progress(1, "加载配置文件")); let app_config = load_app_config(&app_data_dir); @@ -204,30 +118,24 @@ async fn initialize_app_async( return; } - let _ = app_handle.emit_all("startup-progress", StartupEvent::success(2, "配置文件加载完成")); + let _ = app_handle.emit_all("startup-progress", StartupEvent::success(1, "配置文件加载完成")); // ============================================================ - // 3. 初始化应用状态 + // 2. 初始化应用状态 // ============================================================ log::info!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"); - log::info!(" 步骤 3/3: 初始化应用状态"); + log::info!(" 步骤 2/2: 初始化应用状态"); log::info!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"); let _ = app_handle.emit_all("startup-progress", StartupEvent::progress_with_details( - 3, + 2, "初始化应用状态", "正在初始化向量数据库和AI服务..." )); log::info!("开始初始化应用状态..."); - let app_state_result = AppState::new_with_full_config( - &db_path_str, - app_config, - model_cache_dir_str, - Some(python_path_str) - ) - .await; + let app_state_result = AppState::new_with_full_config(&db_path_str, app_config, model_cache_dir_str).await; match app_state_result { Ok(app_state) => { @@ -239,7 +147,7 @@ async fn initialize_app_async( log::info!(" ✅ 应用启动成功!"); log::info!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"); - let _ = app_handle.emit_all("startup-progress", StartupEvent::success(3, "应用启动成功!")); + let _ = app_handle.emit_all("startup-progress", StartupEvent::success(2, "应用启动成功!")); } Err(e) => { log::error!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"); @@ -263,26 +171,50 @@ fn main() { log::info!(" Setup: 快速准备(非阻塞)"); log::info!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"); - // 获取应用数据目录 - let app_data_dir = app - .path_resolver() - .app_data_dir() - .expect("Failed to get app data directory"); + // 应用数据目录:优先使用环境变量 CONFIG_DIR(本地开发可设),否则使用系统应用数据目录(与 Build/安装逻辑一致) + let app_data_dir = std::env::var("CONFIG_DIR") + .ok() + .map(PathBuf::from) + .or_else(|| app.path_resolver().app_data_dir()) + .expect("Failed to get app data directory (set CONFIG_DIR or use default)"); + if std::env::var("CONFIG_DIR").is_ok() { + log::info!("使用 CONFIG_DIR 指定数据目录"); + } // 确保数据目录存在 if !app_data_dir.exists() { fs::create_dir_all(&app_data_dir) .expect("Failed to create app data directory"); } + // 规范为绝对路径,供前端 fs scope 校验通过($APPDATA 解析为绝对路径) + let app_data_dir = app_data_dir + .canonicalize() + .unwrap_or(app_data_dir); + + // 创建 tmp 目录(上传等临时文件),与数据目录一致 + let tmp_dir = app_data_dir.join("tmp"); + if !tmp_dir.exists() { + fs::create_dir_all(&tmp_dir).expect("Failed to create tmp directory"); + } + + // 供前端获取(使 temp 等路径与后端一致;必须为绝对路径以匹配 tauri fs scope) + let app_data_dir_str = app_data_dir + .to_str() + .expect("App data dir not UTF-8") + .to_string(); + app.manage(AppDataDirPath(app_data_dir_str)); - // 创建数据库文件路径 + // 嵌入模式数据目录:.../com.mine-kb.app/mine_kb.db/(SeekDB 实例目录,数据集中在此目录下不再平铺在 app_data_dir) let db_path = app_data_dir.join("mine_kb.db"); + if !db_path.exists() { + fs::create_dir_all(&db_path).expect("Failed to create SeekDB data directory"); + } let db_path_str = db_path .to_str() .expect("Failed to convert database path to string") .to_string(); - log::info!("数据库文件路径: {}", db_path_str); + log::info!("数据库目录: {}", db_path_str); // 创建模型缓存目录 let model_cache_dir = app_data_dir.join("models"); @@ -372,6 +304,8 @@ fn main() { chat::clear_messages, chat::rename_conversation, // System commands + system::get_app_data_dir, + system::save_file_to_app_tmp, system::get_app_status, system::configure_llm_service, system::select_directory, @@ -384,26 +318,51 @@ fn main() { .expect("error while running tauri application"); } +/// 开发时 src-tauri/config.json 的候选路径(tauri dev 时 cwd 可能是 target/debug,需多路径解析) +fn dev_config_candidates() -> Vec { + let mut candidates = vec![ + PathBuf::from("src-tauri/config.json"), + PathBuf::from("../src-tauri/config.json"), + PathBuf::from("../../src-tauri/config.json"), + ]; + if let Ok(exe) = std::env::current_exe() { + if let Some(root) = exe.parent().and_then(|p| p.parent()).and_then(|p| p.parent()) { + candidates.push(root.join("src-tauri/config.json")); + } + } + if let Ok(cwd) = std::env::current_dir() { + let from_cwd = cwd.join("src-tauri/config.json"); + if !candidates.contains(&from_cwd) { + candidates.push(from_cwd); + } + } + candidates +} + /// 加载应用配置 +/// 开发时优先使用 src-tauri/config.json;打包运行后使用应用数据目录中的 config.json。 fn load_app_config(app_data_dir: &PathBuf) -> Option { - // 配置文件优先级: - // 1. 应用数据目录中的 config.json - // 2. 项目根目录的 config.json - // 3. 环境变量 - - let config_paths = vec![ - app_data_dir.join("config.json"), - PathBuf::from("config.json"), - PathBuf::from("../config.json"), - ]; + let mut config_paths = dev_config_candidates(); + config_paths.push(app_data_dir.join("config.json")); + config_paths.push(PathBuf::from("config.json")); + config_paths.push(PathBuf::from("../config.json")); for config_path in config_paths { if config_path.exists() { log::info!("尝试从配置文件读取: {:?}", config_path); match AppConfig::load_from_file(&config_path) { Ok(config) => { - log::info!("成功从配置文件读取配置: {:?}", config_path); + let path_display = config_path.canonicalize().unwrap_or(config_path.clone()); + log::info!("当前使用的配置文件(LLM API Key 由此文件提供): {}", path_display.display()); log::info!(" - Model: {}", config.llm.model); + let key_preview = if config.llm.api_key.len() >= 12 { + format!("{}***", &config.llm.api_key[..12]) + } else if config.llm.api_key.is_empty() { + "(空)".to_string() + } else { + "***".to_string() + }; + log::info!(" - API Key: {} (长度 {},若 401 请编辑上方路径对应文件中的 llm.apiKey)", key_preview, config.llm.api_key.len()); log::info!(" - Max Tokens: {:?}", config.llm.max_tokens); log::info!(" - Temperature: {:?}", config.llm.temperature); if let Some(base_url) = &config.llm.base_url { diff --git a/src-tauri/src/services/app_state.rs b/src-tauri/src/services/app_state.rs index 151d105..b95a930 100644 --- a/src-tauri/src/services/app_state.rs +++ b/src-tauri/src/services/app_state.rs @@ -7,6 +7,7 @@ use crate::services::{ use crate::config::{AppConfig, LlmConfig}; use anyhow::{Result, anyhow}; use std::sync::Arc; +use std::time::Instant; use tokio::sync::Mutex; /// 应用全局状态管理 @@ -22,13 +23,12 @@ impl AppState { // 初始化各个服务 let document_service = Arc::new(Mutex::new(DocumentService::new().await?)); - // 获取 document_service 中的 vector_db 引用 let vector_db = { let doc_service = document_service.lock().await; doc_service.get_vector_db() }; - let project_service = Arc::new(Mutex::new(ProjectService::new(vector_db.clone()))); + let project_service = Arc::new(Mutex::new(ProjectService::new_async(vector_db.clone()).await?)); let conversation_service = Arc::new(Mutex::new(ConversationService::new(vector_db).await)); // 初始化 LLM 客户端(从环境变量) @@ -47,20 +47,16 @@ impl AppState { } pub async fn new_with_config(db_path: &str, app_config: Option, _model_cache_dir: Option) -> Result { - Self::new_with_full_config(db_path, app_config, _model_cache_dir, None).await + Self::new_with_full_config(db_path, app_config, _model_cache_dir).await } pub async fn new_with_full_config( - db_path: &str, - app_config: Option, + db_path: &str, + app_config: Option, _model_cache_dir: Option, - python_path: Option<&str> ) -> Result { log::info!("📦 初始化应用状态..."); log::info!(" - 数据库路径: {}", db_path); - if let Some(py_path) = python_path { - log::info!(" - Python 路径: {}", py_path); - } // 从配置文件或环境变量获取 API Key let api_key = if let Some(ref config) = app_config { @@ -71,23 +67,27 @@ impl AppState { }; // 获取 embedding base URL(优先使用 embedding 配置,而不是 LLM 配置) - let embedding_base_url = app_config.as_ref() + let embedding_base_url = app_config + .as_ref() .and_then(|c| c.embedding.as_ref()) .and_then(|e| e.base_url.clone()); - // 初始化各个服务,使用指定的数据库路径和 API 配置 + // 初始化各服务(嵌入式 SeekDB) + let t_doc = Instant::now(); let document_service = Arc::new(Mutex::new( - DocumentService::with_full_config(db_path, api_key, embedding_base_url, python_path).await? + DocumentService::with_full_config(db_path, api_key, embedding_base_url).await?, )); + log::info!("📦 DocumentService (含 SeekDB+Embedding) 初始化总耗时: {:?}", t_doc.elapsed()); - // 获取 document_service 中的 vector_db 引用 let vector_db = { let doc_service = document_service.lock().await; doc_service.get_vector_db() }; - let project_service = Arc::new(Mutex::new(ProjectService::new(vector_db.clone()))); + let project_service = Arc::new(Mutex::new(ProjectService::new_async(vector_db.clone()).await?)); + let t_conv = Instant::now(); let conversation_service = Arc::new(Mutex::new(ConversationService::new(vector_db).await)); + log::info!("📦 ConversationService 初始化总耗时: {:?}", t_conv.elapsed()); // 初始化 LLM 客户端(使用配置文件的配置) let llm_config = app_config.as_ref().map(|c| c.llm.clone()); @@ -166,6 +166,8 @@ impl AppState { ) }; + log::info!("[CHAT] LLM API Key (调试): {}", api_key); + // 确定 Base URL let base_url = if let Some(url) = base_url_opt { log::info!("使用配置的 Base URL: {}", url); diff --git a/src-tauri/src/services/conversation_service.rs b/src-tauri/src/services/conversation_service.rs index dcd5d83..eef0970 100644 --- a/src-tauri/src/services/conversation_service.rs +++ b/src-tauri/src/services/conversation_service.rs @@ -45,19 +45,17 @@ impl ConversationService { async fn load_from_database(&mut self) -> Result<()> { log::info!("load_from_database: 开始执行"); - let db = self.db.lock().await; + let adapter = self.db.lock().await.clone(); log::info!("load_from_database: 成功获取数据库锁"); - // 加载所有对话 - let conversations = db.load_all_conversations()?; + let conversations = adapter.load_all_conversations().await?; log::info!("✅ 从数据库加载了 {} 个对话", conversations.len()); for conv in conversations { let conv_id = conv.id; log::info!("处理对话: id={}, title={}", conv_id, conv.title); - // 加载该对话的所有消息 - match db.load_messages_by_conversation(&conv_id.to_string()) { + match adapter.load_messages_by_conversation(&conv_id.to_string()).await { Ok(messages) => { log::info!("✅ 对话 {} 加载了 {} 条消息", conv_id, messages.len()); self.conversations.insert(conv_id, conv); @@ -66,7 +64,6 @@ impl ConversationService { Err(e) => { log::error!("❌ 对话 {} 加载消息失败: {}", conv_id, e); log::error!("错误详情: {:?}", e); - // 即使某个对话加载失败,也继续加载其他对话 self.conversations.insert(conv_id, conv); self.messages.insert(conv_id, Vec::new()); } @@ -81,11 +78,8 @@ impl ConversationService { let conversation = Conversation::new(project_id, title)?; let conversation_id = conversation.id; - // 保存到数据库 - { - let mut db = self.db.lock().await; - db.save_conversation(&conversation)?; - } + let adapter = self.db.lock().await.clone(); + adapter.save_conversation(&conversation).await?; self.conversations.insert(conversation_id, conversation); self.messages.insert(conversation_id, Vec::new()); @@ -129,31 +123,28 @@ impl ConversationService { let message_id = message.id; log::info!("创建消息对象成功: message_id={}", message_id); - // ⭐ 保存前检查数据库状态 { - let db = self.db.lock().await; - let count = db.get_message_count().unwrap_or(-1); + let adapter = self.db.lock().await.clone(); + let count = adapter.get_message_count().await.unwrap_or(-1); log::warn!("🔍 [BEFORE-SAVE] 锁定数据库前,messages总数: {}", count); } - // 保存消息到数据库 { log::info!("尝试获取数据库锁以保存消息..."); - let mut db = self.db.lock().await; + let adapter = self.db.lock().await.clone(); log::info!("成功获取数据库锁"); log::info!("调用 save_message..."); - db.save_message(&message)?; + adapter.save_message(&message).await?; log::info!("消息保存到数据库成功"); - // ⭐ 保存后立即验证 - let count = db.get_message_count().unwrap_or(-1); + let count = adapter.get_message_count().await.unwrap_or(-1); log::warn!("🔍 [AFTER-SAVE-IN-LOCK] 保存后,释放锁前,messages总数: {}", count); } // ⭐ 释放锁后立即检查 { - let db = self.db.lock().await; - let count = db.get_message_count().unwrap_or(-1); + let adapter = self.db.lock().await.clone(); + let count = adapter.get_message_count().await.unwrap_or(-1); log::warn!("🔍 [AFTER-LOCK-RELEASE] 释放锁后,messages总数: {}", count); } @@ -166,22 +157,19 @@ impl ConversationService { conversation.increment_message_count(); log::info!("对话消息计数已更新"); - // 更新对话到数据库 { log::info!("尝试获取数据库锁以更新对话..."); - let mut db = self.db.lock().await; + let adapter = self.db.lock().await.clone(); log::info!("成功获取数据库锁"); - // ⭐ 更新对话前再次检查 - let count = db.get_message_count().unwrap_or(-1); + let count = adapter.get_message_count().await.unwrap_or(-1); log::warn!("🔍 [BEFORE-UPDATE-CONV] 更新对话前,messages总数: {}", count); log::info!("调用 save_conversation..."); - db.save_conversation(conversation)?; + adapter.save_conversation(conversation).await?; log::info!("对话更新到数据库成功"); - // ⭐ 更新后检查 - let count = db.get_message_count().unwrap_or(-1); + let count = adapter.get_message_count().await.unwrap_or(-1); log::warn!("🔍 [AFTER-UPDATE-CONV] 更新对话后,messages总数: {}", count); } @@ -196,20 +184,18 @@ impl ConversationService { conversation.update_title(title)?; - // 保存到数据库 { - let mut db = self.db.lock().await; - db.save_conversation(conversation)?; + let adapter = self.db.lock().await.clone(); + adapter.save_conversation(conversation).await?; } Ok(()) } pub async fn delete_conversation(&mut self, conversation_id: Uuid) -> Result<()> { - // 从数据库删除 { - let mut db = self.db.lock().await; - db.delete_conversation_by_id(&conversation_id.to_string())?; + let adapter = self.db.lock().await.clone(); + adapter.delete_conversation_by_id(&conversation_id.to_string()).await?; } self.conversations @@ -234,19 +220,16 @@ impl ConversationService { return Err(anyhow!("Message not found: {}", message_id)); } - // 从数据库删除 { - let mut db = self.db.lock().await; - db.delete_message_by_id(&message_id.to_string())?; + let adapter = self.db.lock().await.clone(); + adapter.delete_message_by_id(&message_id.to_string()).await?; } - // 更新对话的消息数量 conversation.update_message_count(messages.len() as u32); - // 更新对话到数据库 { - let mut db = self.db.lock().await; - db.save_conversation(conversation)?; + let adapter = self.db.lock().await.clone(); + adapter.save_conversation(conversation).await?; } Ok(()) @@ -257,20 +240,17 @@ impl ConversationService { .get_mut(&conversation_id) .ok_or_else(|| anyhow!("Conversation not found: {}", conversation_id))?; - // 从数据库删除所有消息 { - let mut db = self.db.lock().await; - db.delete_messages_by_conversation(&conversation_id.to_string())?; + let adapter = self.db.lock().await.clone(); + adapter.delete_messages_by_conversation(&conversation_id.to_string()).await?; } - // 清空内存中的消息 self.messages.entry(conversation_id).or_insert_with(Vec::new).clear(); conversation.update_message_count(0); - // 更新对话到数据库 { - let mut db = self.db.lock().await; - db.save_conversation(conversation)?; + let adapter = self.db.lock().await.clone(); + adapter.save_conversation(conversation).await?; } Ok(()) @@ -285,8 +265,7 @@ impl ConversationService { let mut messages = self.messages.get(&conversation_id).cloned().unwrap_or_default(); - // 确保消息按创建时间升序排序(从旧到新) - messages.sort_by(|a, b| a.timestamp.cmp(&b.timestamp)); + messages.sort_by(|a, b| a.timestamp.cmp(&b.timestamp).then_with(|| a.id.cmp(&b.id))); log::info!("get_conversation_messages: 从内存返回 {} 条消息(已按时间排序)", messages.len()); @@ -317,49 +296,57 @@ impl ConversationService { #[cfg(test)] mod tests { use super::*; + use crate::services::seekdb_adapter::SeekDbAdapter; + use std::sync::Arc; - #[test] - fn test_conversation_service_creation() { - let service = ConversationService::new(); + async fn test_service() -> ConversationService { + let path = std::env::temp_dir().join(format!("mine_kb_test_conv_{}.db", std::process::id())); + let adapter = SeekDbAdapter::new_async(&path).await.unwrap(); + ConversationService::new(Arc::new(Mutex::new(adapter))).await + } + + #[tokio::test] + async fn test_conversation_service_creation() { + let service = test_service().await; assert_eq!(service.conversations.len(), 0); } - #[test] - fn test_create_and_get_conversation() { - let mut service = ConversationService::new(); + #[tokio::test] + async fn test_create_and_get_conversation() { + let mut service = test_service().await; let project_id = Uuid::new_v4(); - let conversation_id = service.create_conversation(project_id, Some("Test Conversation".to_string())).unwrap(); + let conversation_id = service.create_conversation(project_id, Some("Test Conversation".to_string())).await.unwrap(); let conversation = service.get_conversation(conversation_id).unwrap(); assert_eq!(conversation.title, "Test Conversation"); assert_eq!(conversation.project_id, project_id); - assert_eq!(conversation.messages.len(), 0); + assert_eq!(service.get_conversation_messages(conversation_id).unwrap().len(), 0); } - #[test] - fn test_add_message() { - let mut service = ConversationService::new(); + #[tokio::test] + async fn test_add_message() { + let mut service = test_service().await; let project_id = Uuid::new_v4(); - let conversation_id = service.create_conversation(project_id, Some("Test".to_string())).unwrap(); - let message_id = service.add_message(conversation_id, MessageRole::User, "Hello".to_string()).unwrap(); + let conversation_id = service.create_conversation(project_id, Some("Test".to_string())).await.unwrap(); + let message_id = service.add_message(conversation_id, MessageRole::User, "Hello".to_string()).await.unwrap(); - let conversation = service.get_conversation(conversation_id).unwrap(); - assert_eq!(conversation.messages.len(), 1); - assert_eq!(conversation.messages[0].id, message_id); - assert_eq!(conversation.messages[0].content, "Hello"); + let messages = service.get_conversation_messages(conversation_id).unwrap(); + assert_eq!(messages.len(), 1); + assert_eq!(messages[0].id, message_id); + assert_eq!(messages[0].content, "Hello"); } - #[test] - fn test_list_conversations_by_project() { - let mut service = ConversationService::new(); + #[tokio::test] + async fn test_list_conversations_by_project() { + let mut service = test_service().await; let project1 = Uuid::new_v4(); let project2 = Uuid::new_v4(); - service.create_conversation(project1, Some("Conv 1".to_string())).unwrap(); - service.create_conversation(project1, Some("Conv 2".to_string())).unwrap(); - service.create_conversation(project2, Some("Conv 3".to_string())).unwrap(); + service.create_conversation(project1, Some("Conv 1".to_string())).await.unwrap(); + service.create_conversation(project1, Some("Conv 2".to_string())).await.unwrap(); + service.create_conversation(project2, Some("Conv 3".to_string())).await.unwrap(); let project1_conversations = service.list_conversations(Some(project1)); assert_eq!(project1_conversations.len(), 2); @@ -368,15 +355,15 @@ mod tests { assert_eq!(all_conversations.len(), 3); } - #[test] - fn test_delete_conversation() { - let mut service = ConversationService::new(); + #[tokio::test] + async fn test_delete_conversation() { + let mut service = test_service().await; let project_id = Uuid::new_v4(); - let conversation_id = service.create_conversation(project_id, Some("Test".to_string())).unwrap(); + let conversation_id = service.create_conversation(project_id, Some("Test".to_string())).await.unwrap(); assert!(service.get_conversation(conversation_id).is_some()); - service.delete_conversation(conversation_id).unwrap(); + service.delete_conversation(conversation_id).await.unwrap(); assert!(service.get_conversation(conversation_id).is_none()); } } diff --git a/src-tauri/src/services/document_service.rs b/src-tauri/src/services/document_service.rs index 08cc649..6fc3fd9 100644 --- a/src-tauri/src/services/document_service.rs +++ b/src-tauri/src/services/document_service.rs @@ -31,7 +31,7 @@ impl DocumentService { // Use in-memory path for testing/temporary usage let temp_dir = std::env::temp_dir(); let db_path = temp_dir.join("mine_kb_temp.db"); - let vector_db = Arc::new(Mutex::new(SeekDbAdapter::new(db_path)?)); + let vector_db = Arc::new(Mutex::new(SeekDbAdapter::new_async(db_path).await?)); // 从环境变量读取 API Key let api_key = std::env::var("DASHSCOPE_API_KEY") @@ -47,7 +47,7 @@ impl DocumentService { } pub async fn with_db_path(db_path: &str) -> Result { - let vector_db = Arc::new(Mutex::new(SeekDbAdapter::new(db_path)?)); + let vector_db = Arc::new(Mutex::new(SeekDbAdapter::new_async(db_path).await?)); let api_key = std::env::var("DASHSCOPE_API_KEY") .map_err(|_| anyhow!("未找到 DASHSCOPE_API_KEY 环境变量"))?; @@ -66,19 +66,16 @@ impl DocumentService { api_key: String, base_url: Option ) -> Result { - Self::with_full_config(db_path, api_key, base_url, None).await + Self::with_full_config(db_path, api_key, base_url).await } pub async fn with_full_config( db_path: &str, api_key: String, base_url: Option, - python_path: Option<&str> ) -> Result { log::info!("🏗️ [DOC-SERVICE] 初始化DocumentService, db_path: {}", db_path); - let vector_db = Arc::new(Mutex::new( - SeekDbAdapter::new_with_python(db_path, python_path.unwrap_or("python3"))? - )); + let vector_db = Arc::new(Mutex::new(SeekDbAdapter::new_async(db_path).await?)); log::info!("🏗️ [DOC-SERVICE] 数据库实例已创建"); log::info!("🎯 使用阿里云百炼 Embedding API (text-embedding-v2)"); @@ -166,10 +163,8 @@ impl DocumentService { } // Store vectors in database - { - let mut db = self.vector_db.lock().await; - db.add_documents(vector_docs)?; - } + let adapter = self.vector_db.lock().await.clone(); + adapter.add_documents(vector_docs).await?; // Update document status document.processing_status = ProcessingStatus::Indexed; @@ -206,20 +201,76 @@ impl DocumentService { let query_embedding = self.embedding_service.embed_text(query).await?; let project_id_str = project_id.map(|id| id.to_string()); - let db = self.vector_db.lock().await; - - // 使用 DashScope embedding,相似度通常在 0.5-0.9 之间 - let results = db.similarity_search( - &query_embedding, - project_id_str.as_deref(), - limit, - 0.5, // DashScope embedding 质量高,可以设置较高阈值 - )?; + let adapter = self.vector_db.lock().await.clone(); + let results = adapter + .similarity_search( + &query_embedding, + project_id_str.as_deref(), + limit, + 0.5, // DashScope embedding 质量高,可以设置较高阈值 + ) + .await?; Ok(results) } + /// 聊天用检索:先尝试混合检索,若嵌入式 SeekDB 报 "Not supported feature or function" 则回退到纯向量检索。 + pub async fn search_similar_chunks_for_chat( + &self, + project_id: &str, + query: &str, + top_k: usize, + ) -> Result> { + let query_embedding = self.embedding_service.embed_text(query).await?; + let adapter = self.vector_db.lock().await.clone(); + + // 先尝试混合检索(服务端 SeekDB 支持) + match adapter.hybrid_search(query, &query_embedding, Some(project_id), top_k, 0.7).await { + Ok(results) => { + log::info!("✅ [CHAT] 混合检索成功,找到 {} 个结果", results.len()); + log::info!("📌 [CHAT] 本次检索方式: 混合检索"); + return self.search_results_to_similar_chunks(&results); + } + Err(e) => { + let err_str = e.to_string(); + let err_lower = err_str.to_lowercase(); + let fallback = err_str.contains("Not supported") || err_lower.contains("not supported") + || err_lower.contains("parse error"); + if fallback { + log::info!("⚠️ [CHAT] 混合检索不可用,回退到纯向量检索: {}", err_str); + let results = adapter + .similarity_search(&query_embedding, Some(project_id), top_k, 0.2) + .await?; + log::info!("✅ [CHAT] 向量检索完成,找到 {} 个结果", results.len()); + log::info!("📌 [CHAT] 本次检索方式: 纯向量检索(回退)"); + return self.search_results_to_similar_chunks(&results); + } + return Err(e.into()); + } + } + } + + fn search_results_to_similar_chunks( + &self, + results: &[crate::services::seekdb_adapter::SearchResult], + ) -> Result> { + let chunks: Vec = results + .iter() + .map(|result| { + let filename = result.document.metadata.get("filename").cloned(); + SimilarChunk { + document_id: result.document.document_id.clone(), + filename, + content: result.document.content.clone(), + relevance_score: result.similarity, + } + }) + .collect(); + Ok(chunks) + } + /// 使用混合检索搜索相关文档块(向量+全文,用于聊天上下文) + /// 通过 hybrid_search_by_text 由 adapter 内部做 query 向量化,无需先调用 embed_text。 pub async fn search_similar_chunks_hybrid( &self, project_id: &str, @@ -233,24 +284,17 @@ impl DocumentService { log::info!("💬 查询内容: {}", query); log::info!("📊 返回数量: {}", top_k); - // 使用 DashScope API 生成查询向量 - log::info!("🌐 调用 DashScope Embedding API..."); - let query_embedding = self.embedding_service.embed_text(query).await?; - log::info!("✅ 生成查询向量成功,维度: {}", query_embedding.len()); - - // 从向量数据库执行混合搜索 - let db = self.vector_db.lock().await; - - log::info!("🔄 执行混合检索(语义权重=0.7)..."); + let adapter = self.vector_db.lock().await.clone(); + log::info!("🔄 执行混合检索(hybrid_search_by_text,adapter 内部向量化)..."); - // 使用混合检索 (语义权重 0.7 表示更偏重向量相似度) - let results = db.hybrid_search( - query, - &query_embedding, - Some(project_id), - top_k, - 0.7, // semantic boost: 0.7 表示向量检索占 70% 权重 - )?; + let results = adapter + .hybrid_search_by_text( + self.embedding_service.clone(), + Some(project_id), + query, + top_k, + ) + .await?; log::info!("✅ 混合检索完成,找到 {} 个结果", results.len()); @@ -305,19 +349,17 @@ impl DocumentService { let query_embedding = self.embedding_service.embed_text(query).await?; log::info!("✅ 生成查询向量成功,维度: {}", query_embedding.len()); - // 从向量数据库搜索 - let db = self.vector_db.lock().await; - + let adapter = self.vector_db.lock().await.clone(); log::info!("🔍 使用SeekDB向量检索,阈值=0.3"); - // 使用 DashScope embedding,相似度通常在 0.3-0.9 之间 - // 0.3 作为阈值可以获得较宽泛但相关的结果 - let results = db.similarity_search( - &query_embedding, - Some(project_id), - top_k, - 0.3, // DashScope embedding: 0.3=宽泛, 0.4=中等, 0.5+=严格 - )?; + let results = adapter + .similarity_search( + &query_embedding, + Some(project_id), + top_k, + 0.3, // DashScope embedding: 0.3=宽泛, 0.4=中等, 0.5+=严格 + ) + .await?; log::info!("✅ 向量搜索完成(阈值=0.3),找到 {} 个结果", results.len()); @@ -415,12 +457,11 @@ impl DocumentService { // 从数据库查询实际的文档数量,而不是从内存统计 // 这样可以确保统计的是累加的总数,而不是当前批次的数量 if let Some(pid) = project_id { - let db = self.vector_db.lock().await; - match db.count_project_documents(&pid.to_string()) { + let adapter = self.vector_db.lock().await.clone(); + match adapter.count_project_documents(&pid.to_string()).await { Ok(count) => count, Err(e) => { log::error!("从数据库统计文档数量失败: {}", e); - // 降级到内存统计 self.documents .values() .filter(|doc| doc.project_id == pid) @@ -462,20 +503,19 @@ impl DocumentService { mod tests { use super::*; - fn create_test_service() -> DocumentService { - let vector_db = VectorDbService::new("localhost", 8000); - DocumentService::new(vector_db) + async fn create_test_service() -> Result { + DocumentService::new().await } - #[test] - fn test_document_service_creation() { - let service = create_test_service(); + #[tokio::test] + async fn test_document_service_creation() { + let service = create_test_service().await.unwrap(); assert_eq!(service.documents.len(), 0); } #[tokio::test] async fn test_add_document() { - let mut service = create_test_service(); + let mut service = create_test_service().await.unwrap(); let project_id = Uuid::new_v4(); // This would fail in a real test because the file doesn't exist @@ -491,9 +531,9 @@ mod tests { assert!(result.is_err()); } - #[test] - fn test_list_documents_by_project() { - let service = create_test_service(); + #[tokio::test] + async fn test_list_documents_by_project() { + let service = create_test_service().await.unwrap(); let project_id = Uuid::new_v4(); let documents = service.list_documents(Some(project_id)); @@ -503,9 +543,9 @@ mod tests { assert_eq!(all_documents.len(), 0); } - #[test] - fn test_supported_file_check() { - let service = create_test_service(); + #[tokio::test] + async fn test_supported_file_check() { + let service = create_test_service().await.unwrap(); assert!(service.is_supported_file("test.txt")); assert!(service.is_supported_file("test.md")); @@ -513,9 +553,9 @@ mod tests { assert!(!service.is_supported_file("test.exe")); } - #[test] - fn test_processing_stats() { - let service = create_test_service(); + #[tokio::test] + async fn test_processing_stats() { + let service = create_test_service().await.unwrap(); let stats = service.get_processing_stats(None); assert!(stats.is_empty()); } diff --git a/src-tauri/src/services/embedded_vector_db.rs b/src-tauri/src/services/embedded_vector_db.rs deleted file mode 100644 index 265643f..0000000 --- a/src-tauri/src/services/embedded_vector_db.rs +++ /dev/null @@ -1,974 +0,0 @@ -use anyhow::Result; -use rusqlite::{params, Connection, Row}; -use serde::{Deserialize, Serialize}; -use std::collections::HashMap; -use std::path::Path; - -/// 嵌入式向量数据库,基于SQLite实现 -#[derive(Debug)] -pub struct EmbeddedVectorDb { - conn: Connection, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct VectorDocument { - pub id: String, - pub project_id: String, - pub document_id: String, - pub chunk_index: i32, - pub content: String, - pub embedding: Vec, - pub metadata: HashMap, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct SearchResult { - pub document: VectorDocument, - pub similarity: f64, -} - -impl EmbeddedVectorDb { - /// 创建新的嵌入式向量数据库实例 - pub fn new>(db_path: P) -> Result { - let db_path_str = db_path.as_ref().display().to_string(); - log::info!("🔗 [NEW-DB] 打开数据库文件: {}", db_path_str); - - // 获取数据库文件的绝对路径 - let absolute_path = std::fs::canonicalize(db_path.as_ref()) - .unwrap_or_else(|_| db_path.as_ref().to_path_buf()); - log::info!("🔗 [NEW-DB] 数据库绝对路径: {:?}", absolute_path); - - let conn = Connection::open(db_path)?; - - // 验证打开的是哪个数据库 - let db_file: String = conn.query_row( - "PRAGMA database_list", - [], - |row| row.get(2) - )?; - log::info!("🔗 [NEW-DB] 实际连接的数据库: {}", db_file); - - // 启用外键约束并设置 WAL 模式和同步选项 - conn.execute_batch( - "PRAGMA foreign_keys = ON; - PRAGMA journal_mode = WAL; - PRAGMA synchronous = FULL;" - )?; - - log::info!("🔗 [NEW-DB] 数据库配置: foreign_keys=ON, journal_mode=WAL, synchronous=FULL"); - - let mut db = Self { conn }; - db.initialize_schema()?; - - // 初始化后立即验证 - let msg_count: i32 = db.conn.query_row( - "SELECT COUNT(*) FROM messages", - [], - |row| row.get(0) - )?; - let conv_count: i32 = db.conn.query_row( - "SELECT COUNT(*) FROM conversations", - [], - |row| row.get(0) - )?; - log::info!("🔗 [NEW-DB] 数据库初始化完成,conversations: {}, messages: {}", - conv_count, msg_count); - - Ok(db) - } - - /// 创建内存数据库(用于测试) - pub fn new_in_memory() -> Result { - let conn = Connection::open_in_memory()?; - let mut db = Self { conn }; - db.initialize_schema()?; - Ok(db) - } - - /// 初始化数据库模式 - fn initialize_schema(&mut self) -> Result<()> { - // 创建 projects 表 - self.conn.execute( - "CREATE TABLE IF NOT EXISTS projects ( - id TEXT PRIMARY KEY, - name TEXT NOT NULL, - description TEXT, - status TEXT NOT NULL, - document_count INTEGER DEFAULT 0, - created_at DATETIME NOT NULL, - updated_at DATETIME NOT NULL - )", - [], - )?; - - // 创建 vector_documents 表 - self.conn.execute( - "CREATE TABLE IF NOT EXISTS vector_documents ( - id TEXT PRIMARY KEY, - project_id TEXT NOT NULL, - document_id TEXT NOT NULL, - chunk_index INTEGER NOT NULL, - content TEXT NOT NULL, - embedding BLOB NOT NULL, - metadata TEXT NOT NULL, - created_at DATETIME DEFAULT CURRENT_TIMESTAMP, - UNIQUE(document_id, chunk_index) - )", - [], - )?; - - // 创建索引以提高查询性能 - self.conn.execute( - "CREATE INDEX IF NOT EXISTS idx_project_id ON vector_documents(project_id)", - [], - )?; - - self.conn.execute( - "CREATE INDEX IF NOT EXISTS idx_document_id ON vector_documents(document_id)", - [], - )?; - - // 创建 conversations 表 - self.conn.execute( - "CREATE TABLE IF NOT EXISTS conversations ( - id TEXT PRIMARY KEY, - project_id TEXT NOT NULL, - title TEXT NOT NULL, - created_at DATETIME NOT NULL, - updated_at DATETIME NOT NULL, - message_count INTEGER DEFAULT 0, - FOREIGN KEY (project_id) REFERENCES projects(id) ON DELETE CASCADE - )", - [], - )?; - - // 创建 messages 表 - self.conn.execute( - "CREATE TABLE IF NOT EXISTS messages ( - id TEXT PRIMARY KEY, - conversation_id TEXT NOT NULL, - role TEXT NOT NULL, - content TEXT NOT NULL, - created_at DATETIME NOT NULL, - sources TEXT, - FOREIGN KEY (conversation_id) REFERENCES conversations(id) ON DELETE CASCADE - )", - [], - )?; - - // 如果 messages 表已存在但没有 sources 列,则添加(向后兼容) - let has_sources_column = self.conn - .prepare("SELECT COUNT(*) FROM pragma_table_info('messages') WHERE name='sources'")? - .query_row([], |row| row.get::<_, i64>(0)) - .unwrap_or(0) > 0; - - if !has_sources_column { - log::info!("添加 sources 列到 messages 表"); - self.conn.execute("ALTER TABLE messages ADD COLUMN sources TEXT", [])?; - } - - // 创建对话表索引 - self.conn.execute( - "CREATE INDEX IF NOT EXISTS idx_conversation_project_id ON conversations(project_id)", - [], - )?; - - self.conn.execute( - "CREATE INDEX IF NOT EXISTS idx_message_conversation_id ON messages(conversation_id)", - [], - )?; - - Ok(()) - } - - /// 添加向量文档 - pub fn add_document(&mut self, doc: VectorDocument) -> Result<()> { - let embedding_bytes = bincode::serialize(&doc.embedding)?; - let metadata_json = serde_json::to_string(&doc.metadata)?; - - self.conn.execute( - "INSERT OR REPLACE INTO vector_documents - (id, project_id, document_id, chunk_index, content, embedding, metadata) - VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)", - params![ - doc.id, - doc.project_id, - doc.document_id, - doc.chunk_index, - doc.content, - embedding_bytes, - metadata_json - ], - )?; - - Ok(()) - } - - /// 批量添加向量文档 - pub fn add_documents(&mut self, docs: Vec) -> Result<()> { - let tx = self.conn.transaction()?; - - for doc in docs { - let embedding_bytes = bincode::serialize(&doc.embedding)?; - let metadata_json = serde_json::to_string(&doc.metadata)?; - - tx.execute( - "INSERT OR REPLACE INTO vector_documents - (id, project_id, document_id, chunk_index, content, embedding, metadata) - VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)", - params![ - doc.id, - doc.project_id, - doc.document_id, - doc.chunk_index, - doc.content, - embedding_bytes, - metadata_json - ], - )?; - } - - tx.commit()?; - Ok(()) - } - - /// 向量相似度搜索 - pub fn similarity_search( - &self, - query_embedding: &[f64], - project_id: Option<&str>, - limit: usize, - threshold: f64, - ) -> Result> { - let mut query = "SELECT * FROM vector_documents".to_string(); - let mut params: Vec> = Vec::new(); - - if let Some(pid) = project_id { - query.push_str(" WHERE project_id = ?"); - params.push(Box::new(pid.to_string())); - } - - let mut stmt = self.conn.prepare(&query)?; - let rows = stmt.query_map( - rusqlite::params_from_iter(params.iter().map(|p| p.as_ref())), - |row| self.row_to_vector_document(row), - )?; - - let mut results = Vec::new(); - for row_result in rows { - let doc = row_result?; - let similarity = self.cosine_similarity(query_embedding, &doc.embedding); - - if similarity >= threshold { - results.push(SearchResult { - document: doc, - similarity, - }); - } - } - - // 按相似度降序排序 - results.sort_by(|a, b| b.similarity.partial_cmp(&a.similarity).unwrap()); - - // 限制结果数量 - results.truncate(limit); - - Ok(results) - } - - /// 获取项目的所有文档 - pub fn get_project_documents(&self, project_id: &str) -> Result> { - let mut stmt = self.conn.prepare( - "SELECT * FROM vector_documents WHERE project_id = ? ORDER BY document_id, chunk_index" - )?; - - let rows = stmt.query_map([project_id], |row| self.row_to_vector_document(row))?; - - let mut documents = Vec::new(); - for row_result in rows { - documents.push(row_result?); - } - - Ok(documents) - } - - /// 删除项目的所有文档 - pub fn delete_project_documents(&mut self, project_id: &str) -> Result { - let count = self.conn.execute( - "DELETE FROM vector_documents WHERE project_id = ?", - [project_id], - )?; - Ok(count) - } - - /// 删除特定文档 - pub fn delete_document(&mut self, document_id: &str) -> Result { - let count = self.conn.execute( - "DELETE FROM vector_documents WHERE document_id = ?", - [document_id], - )?; - Ok(count) - } - - /// 获取数据库统计信息 - pub fn get_stats(&self) -> Result> { - let mut stats = HashMap::new(); - - // 总文档数 - let total_docs: i64 = self.conn.query_row( - "SELECT COUNT(*) FROM vector_documents", - [], - |row| row.get(0), - )?; - stats.insert("total_documents".to_string(), total_docs); - - // 项目数 - let total_projects: i64 = self.conn.query_row( - "SELECT COUNT(DISTINCT project_id) FROM vector_documents", - [], - |row| row.get(0), - )?; - stats.insert("total_projects".to_string(), total_projects); - - Ok(stats) - } - - /// 统计项目的文档数量(基于不同的 document_id) - pub fn count_project_documents(&self, project_id: &str) -> Result { - let count: i64 = self.conn.query_row( - "SELECT COUNT(DISTINCT document_id) FROM vector_documents WHERE project_id = ?", - [project_id], - |row| row.get(0), - )?; - Ok(count as usize) - } - - /// 将数据库行转换为VectorDocument - fn row_to_vector_document(&self, row: &Row) -> rusqlite::Result { - let embedding_bytes: Vec = row.get("embedding")?; - let embedding: Vec = bincode::deserialize(&embedding_bytes) - .map_err(|e| rusqlite::Error::FromSqlConversionFailure( - 0, - rusqlite::types::Type::Blob, - Box::new(e) - ))?; - - let metadata_json: String = row.get("metadata")?; - let metadata: HashMap = serde_json::from_str(&metadata_json) - .map_err(|e| rusqlite::Error::FromSqlConversionFailure( - 0, - rusqlite::types::Type::Text, - Box::new(e) - ))?; - - Ok(VectorDocument { - id: row.get("id")?, - project_id: row.get("project_id")?, - document_id: row.get("document_id")?, - chunk_index: row.get("chunk_index")?, - content: row.get("content")?, - embedding, - metadata, - }) - } - - /// 计算余弦相似度 - fn cosine_similarity(&self, a: &[f64], b: &[f64]) -> f64 { - if a.len() != b.len() { - return 0.0; - } - - let dot_product: f64 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum(); - let norm_a: f64 = a.iter().map(|x| x * x).sum::().sqrt(); - let norm_b: f64 = b.iter().map(|x| x * x).sum::().sqrt(); - - if norm_a == 0.0 || norm_b == 0.0 { - 0.0 - } else { - dot_product / (norm_a * norm_b) - } - } - - /// 保存项目到数据库 - pub fn save_project(&mut self, project: &crate::models::project::Project) -> Result<()> { - log::info!("💾 [SAVE-PROJECT] 保存项目: id={}, name={}, document_count={}", - project.id, project.name, project.document_count); - - // 使用事务确保数据一致性 - let tx = self.conn.transaction()?; - - // ⚠️ 关键修复:使用 INSERT ... ON CONFLICT DO UPDATE 而不是 INSERT OR REPLACE - // INSERT OR REPLACE 会触发 DELETE,导致 CASCADE 删除所有关联的 conversations 和 messages - let rows_affected = tx.execute( - "INSERT INTO projects (id, name, description, status, document_count, created_at, updated_at) - VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7) - ON CONFLICT(id) DO UPDATE SET - name = excluded.name, - description = excluded.description, - status = excluded.status, - document_count = excluded.document_count, - updated_at = excluded.updated_at", - params![ - project.id.to_string(), - project.name, - project.description, - project.status.to_string(), - project.document_count as i64, - project.created_at.to_rfc3339(), - project.updated_at.to_rfc3339() - ], - )?; - - // 提交事务 - tx.commit()?; - - log::info!("💾 [SAVE-PROJECT-END] 项目保存成功,rows_affected={}", rows_affected); - - Ok(()) - } - - /// 从数据库加载所有项目 - pub fn load_all_projects(&self) -> Result> { - let mut stmt = self.conn.prepare( - "SELECT id, name, description, status, document_count, created_at, updated_at - FROM projects ORDER BY updated_at DESC" - )?; - - let rows = stmt.query_map([], |row| { - use chrono::DateTime; - let id: String = row.get(0)?; - let name: String = row.get(1)?; - let description: Option = row.get(2)?; - let status_str: String = row.get(3)?; - let document_count: i64 = row.get(4)?; - let created_at_str: String = row.get(5)?; - let updated_at_str: String = row.get(6)?; - - let id = uuid::Uuid::parse_str(&id) - .map_err(|e| rusqlite::Error::FromSqlConversionFailure(0, rusqlite::types::Type::Text, Box::new(e)))?; - - let status = match status_str.as_str() { - "Created" => crate::models::project::ProjectStatus::Created, - "Processing" => crate::models::project::ProjectStatus::Processing, - "Ready" => crate::models::project::ProjectStatus::Ready, - "Error" => crate::models::project::ProjectStatus::Error, - _ => crate::models::project::ProjectStatus::Created, - }; - - let created_at = DateTime::parse_from_rfc3339(&created_at_str) - .map_err(|e| rusqlite::Error::FromSqlConversionFailure(0, rusqlite::types::Type::Text, Box::new(e)))? - .with_timezone(&chrono::Utc); - - let updated_at = DateTime::parse_from_rfc3339(&updated_at_str) - .map_err(|e| rusqlite::Error::FromSqlConversionFailure(0, rusqlite::types::Type::Text, Box::new(e)))? - .with_timezone(&chrono::Utc); - - Ok(crate::models::project::Project { - id, - name, - description, - status, - document_count: document_count as u32, - created_at, - updated_at, - }) - })?; - - let mut projects = Vec::new(); - for row_result in rows { - projects.push(row_result?); - } - - Ok(projects) - } - - /// 从数据库删除项目 - pub fn delete_project_by_id(&mut self, project_id: &str) -> Result { - let count = self.conn.execute( - "DELETE FROM projects WHERE id = ?", - [project_id], - )?; - Ok(count) - } - - /// 更新项目的文档数量 - pub fn update_project_document_count(&mut self, project_id: &str, count: u32) -> Result<()> { - self.conn.execute( - "UPDATE projects SET document_count = ?, updated_at = ? WHERE id = ?", - params![ - count as i64, - chrono::Utc::now().to_rfc3339(), - project_id - ], - )?; - Ok(()) - } - - // ==================== 对话管理方法 ==================== - - /// 保存对话到数据库 - pub fn save_conversation(&mut self, conversation: &crate::models::conversation::Conversation) -> Result<()> { - log::info!("💾 [SAVE-CONV-START] 保存对话: id={}, message_count={}", - conversation.id, conversation.message_count); - - // 使用事务确保数据一致性 - let tx = self.conn.transaction()?; - - // ⚠️ 关键修复:使用 INSERT ... ON CONFLICT DO UPDATE 而不是 INSERT OR REPLACE - // INSERT OR REPLACE 会触发 DELETE,导致 CASCADE 删除所有关联的 messages - let rows_affected = tx.execute( - "INSERT INTO conversations (id, project_id, title, created_at, updated_at, message_count) - VALUES (?, ?, ?, ?, ?, ?) - ON CONFLICT(id) DO UPDATE SET - title = excluded.title, - updated_at = excluded.updated_at, - message_count = excluded.message_count", - params![ - conversation.id.to_string(), - conversation.project_id.to_string(), - conversation.title, - conversation.created_at.to_rfc3339(), - conversation.updated_at.to_rfc3339(), - conversation.message_count as i64, - ], - )?; - - // 提交事务 - tx.commit()?; - - log::info!("💾 [SAVE-CONV-END] 对话保存成功,rows_affected={}", rows_affected); - - Ok(()) - } - - /// 从数据库加载指定项目的所有对话 - pub fn load_conversations_by_project(&self, project_id: &str) -> Result> { - use uuid::Uuid; - use chrono::DateTime; - - let mut stmt = self.conn.prepare( - "SELECT id, project_id, title, created_at, updated_at, message_count - FROM conversations - WHERE project_id = ? - ORDER BY updated_at DESC" - )?; - - let rows = stmt.query_map([project_id], |row| { - let id_str: String = row.get(0)?; - let project_id_str: String = row.get(1)?; - let title: String = row.get(2)?; - let created_at_str: String = row.get(3)?; - let updated_at_str: String = row.get(4)?; - let message_count: i64 = row.get(5)?; - - let id = Uuid::parse_str(&id_str) - .map_err(|e| rusqlite::Error::FromSqlConversionFailure(0, rusqlite::types::Type::Text, Box::new(e)))?; - let project_id = Uuid::parse_str(&project_id_str) - .map_err(|e| rusqlite::Error::FromSqlConversionFailure(1, rusqlite::types::Type::Text, Box::new(e)))?; - let created_at = DateTime::parse_from_rfc3339(&created_at_str) - .map_err(|e| rusqlite::Error::FromSqlConversionFailure(3, rusqlite::types::Type::Text, Box::new(e)))? - .with_timezone(&chrono::Utc); - let updated_at = DateTime::parse_from_rfc3339(&updated_at_str) - .map_err(|e| rusqlite::Error::FromSqlConversionFailure(4, rusqlite::types::Type::Text, Box::new(e)))? - .with_timezone(&chrono::Utc); - - Ok(crate::models::conversation::Conversation { - id, - project_id, - title, - created_at, - updated_at, - message_count: message_count as u32, - }) - })?; - - let mut conversations = Vec::new(); - for row_result in rows { - conversations.push(row_result?); - } - - Ok(conversations) - } - - /// 从数据库加载所有对话 - pub fn load_all_conversations(&self) -> Result> { - use uuid::Uuid; - use chrono::DateTime; - - let mut stmt = self.conn.prepare( - "SELECT id, project_id, title, created_at, updated_at, message_count - FROM conversations - ORDER BY updated_at DESC" - )?; - - let rows = stmt.query_map([], |row| { - let id_str: String = row.get(0)?; - let project_id_str: String = row.get(1)?; - let title: String = row.get(2)?; - let created_at_str: String = row.get(3)?; - let updated_at_str: String = row.get(4)?; - let message_count: i64 = row.get(5)?; - - let id = Uuid::parse_str(&id_str) - .map_err(|e| rusqlite::Error::FromSqlConversionFailure(0, rusqlite::types::Type::Text, Box::new(e)))?; - let project_id = Uuid::parse_str(&project_id_str) - .map_err(|e| rusqlite::Error::FromSqlConversionFailure(1, rusqlite::types::Type::Text, Box::new(e)))?; - let created_at = DateTime::parse_from_rfc3339(&created_at_str) - .map_err(|e| rusqlite::Error::FromSqlConversionFailure(3, rusqlite::types::Type::Text, Box::new(e)))? - .with_timezone(&chrono::Utc); - let updated_at = DateTime::parse_from_rfc3339(&updated_at_str) - .map_err(|e| rusqlite::Error::FromSqlConversionFailure(4, rusqlite::types::Type::Text, Box::new(e)))? - .with_timezone(&chrono::Utc); - - Ok(crate::models::conversation::Conversation { - id, - project_id, - title, - created_at, - updated_at, - message_count: message_count as u32, - }) - })?; - - let mut conversations = Vec::new(); - for row_result in rows { - conversations.push(row_result?); - } - - Ok(conversations) - } - - /// 删除对话 - pub fn delete_conversation_by_id(&mut self, conversation_id: &str) -> Result { - // 由于有 ON DELETE CASCADE,删除对话会自动删除相关消息 - let count = self.conn.execute( - "DELETE FROM conversations WHERE id = ?", - [conversation_id], - )?; - Ok(count) - } - - /// 删除单条消息 - pub fn delete_message_by_id(&mut self, message_id: &str) -> Result { - let count = self.conn.execute( - "DELETE FROM messages WHERE id = ?", - [message_id], - )?; - Ok(count) - } - - /// 删除对话的所有消息 - pub fn delete_messages_by_conversation(&mut self, conversation_id: &str) -> Result { - let count = self.conn.execute( - "DELETE FROM messages WHERE conversation_id = ?", - [conversation_id], - )?; - log::info!("删除对话 {} 的所有消息,共 {} 条", conversation_id, count); - Ok(count) - } - - /// 保存消息到数据库 - pub fn save_message(&mut self, message: &crate::models::conversation::Message) -> Result<()> { - log::info!( - "📝 [SAVE-MSG-START] id={}, conversation_id={}, role={}, content_len={}", - message.id, - message.conversation_id, - message.role.to_string(), - message.content.len() - ); - - // 在开始前查询总数 - let total_before: i32 = self.conn.query_row( - "SELECT COUNT(*) FROM messages", - [], - |row| row.get(0) - )?; - log::info!("📝 [SAVE-MSG-START] 当前数据库messages总数(插入前): {}", total_before); - - // ⭐ 添加:检查PRAGMA设置 - let foreign_keys_enabled: i32 = self.conn.query_row( - "PRAGMA foreign_keys", - [], - |row| row.get(0) - )?; - log::info!("💡 当前连接 foreign_keys = {}", foreign_keys_enabled); - - if foreign_keys_enabled == 0 { - log::warn!("⚠️ 外键约束未启用,尝试启用..."); - self.conn.execute("PRAGMA foreign_keys = ON", [])?; - } - - // ⭐ 添加:验证conversation存在 - let conv_exists: i32 = self.conn.query_row( - "SELECT COUNT(*) FROM conversations WHERE id = ?", - [message.conversation_id.to_string()], - |row| row.get(0) - )?; - log::info!("💡 对话存在性检查: conversation_id={}, exists={}", - message.conversation_id, conv_exists); - - if conv_exists == 0 { - return Err(anyhow::anyhow!("对话不存在: {}", message.conversation_id)); - } - - // 使用事务确保数据一致性 - let tx = self.conn.transaction()?; - - log::info!("💡 事务已开启"); - - // 序列化 sources 为 JSON - let sources_json = message.sources.as_ref() - .map(|sources| serde_json::to_string(sources).ok()) - .flatten(); - - let rows_affected = match tx.execute( - "INSERT INTO messages (id, conversation_id, role, content, created_at, sources) - VALUES (?, ?, ?, ?, ?, ?)", - params![ - message.id.to_string(), - message.conversation_id.to_string(), - message.role.to_string(), - message.content, - message.timestamp.to_rfc3339(), - sources_json, - ], - ) { - Ok(n) => { - log::info!("✅ INSERT 成功: rows={}", n); - n - } - Err(e) => { - log::error!("❌ INSERT 失败: {}, 尝试 UPDATE", e); - // 如果插入失败(可能是主键冲突),尝试更新 - tx.execute( - "UPDATE messages SET role=?, content=?, created_at=?, sources=? WHERE id=?", - params![ - message.role.to_string(), - message.content, - message.timestamp.to_rfc3339(), - sources_json, - message.id.to_string(), - ], - )? - } - }; - - // ⭐ 添加:事务提交前验证数据 - let count_before_commit: i32 = tx.query_row( - "SELECT COUNT(*) FROM messages WHERE id = ?", - [message.id.to_string()], - |row| row.get(0) - )?; - log::info!("💡 提交前验证: message_id={}, count={}", message.id, count_before_commit); - - // 提交事务 - match tx.commit() { - Ok(_) => { - log::info!("✅ [SAVE-MSG] 事务提交成功: rows_affected={}", rows_affected); - } - Err(e) => { - log::error!("❌ [SAVE-MSG] 事务提交失败: {}", e); - return Err(anyhow::anyhow!("事务提交失败: {}", e)); - } - } - - // 提交后立即验证数据 - let count_after_commit: i32 = self.conn.query_row( - "SELECT COUNT(*) FROM messages WHERE id = ?", - [message.id.to_string()], - |row| row.get(0) - )?; - log::info!("💡 [SAVE-MSG] 提交后验证: message_id={}, count={}", message.id, count_after_commit); - - // 再次确认连接的数据库文件 - let db_file: String = self.conn.query_row( - "PRAGMA database_list", - [], - |row| row.get(2) - )?; - log::info!("💡 [SAVE-MSG] 当前操作的数据库文件: {}", db_file); - - // 检查所有消息总数 - let total_after: i32 = self.conn.query_row( - "SELECT COUNT(*) FROM messages", - [], - |row| row.get(0) - )?; - log::info!("📝 [SAVE-MSG-END] 数据库messages总数(插入后): {} -> {}", - total_before, total_after); - - if total_after != total_before + 1 { - log::warn!("⚠️ [SAVE-MSG] 警告:总数变化不正常!expected={}, actual={}", - total_before + 1, total_after); - } - - if count_after_commit == 0 { - log::error!("🚨 [SAVE-MSG] 严重错误:事务提交成功但数据不在数据库中!"); - return Err(anyhow::anyhow!("数据未能持久化")); - } - - log::info!("🎉 [SAVE-MSG-SUCCESS] message_id={}, 数据已确认写入", message.id); - - Ok(()) - } - - /// 获取消息总数(用于调试) - pub fn get_message_count(&self) -> Result { - let count: i32 = self.conn.query_row( - "SELECT COUNT(*) FROM messages", - [], - |row| row.get(0) - )?; - Ok(count) - } - - /// 获取特定对话的消息数量 - pub fn get_conversation_message_count(&self, conversation_id: &str) -> Result { - let count: i32 = self.conn.query_row( - "SELECT COUNT(*) FROM messages WHERE conversation_id = ?", - [conversation_id], - |row| row.get(0) - )?; - Ok(count) - } - - /// 从数据库加载对话的所有消息 - pub fn load_messages_by_conversation(&self, conversation_id: &str) -> Result> { - use uuid::Uuid; - use chrono::DateTime; - - log::info!("load_messages_by_conversation: conversation_id={}", conversation_id); - - let mut stmt = self.conn.prepare( - "SELECT id, conversation_id, role, content, created_at, sources - FROM messages - WHERE conversation_id = ? - ORDER BY created_at ASC" - )?; - - let rows = stmt.query_map([conversation_id], |row| { - let id_str: String = row.get(0)?; - let conversation_id_str: String = row.get(1)?; - let role_str: String = row.get(2)?; - let content: String = row.get(3)?; - let created_at_str: String = row.get(4)?; - let sources_json: Option = row.get(5)?; - - log::debug!("加载消息: id={}, role={}", id_str, role_str); - - let id = Uuid::parse_str(&id_str) - .map_err(|e| rusqlite::Error::FromSqlConversionFailure(0, rusqlite::types::Type::Text, Box::new(e)))?; - let conversation_id = Uuid::parse_str(&conversation_id_str) - .map_err(|e| rusqlite::Error::FromSqlConversionFailure(1, rusqlite::types::Type::Text, Box::new(e)))?; - let created_at = DateTime::parse_from_rfc3339(&created_at_str) - .map_err(|e| rusqlite::Error::FromSqlConversionFailure(4, rusqlite::types::Type::Text, Box::new(e)))? - .with_timezone(&chrono::Utc); - - let role = match role_str.as_str() { - // 匹配大写(数据库中的实际格式 - Display trait 输出) - "User" => crate::models::conversation::MessageRole::User, - "Assistant" => crate::models::conversation::MessageRole::Assistant, - "System" => crate::models::conversation::MessageRole::System, - // 兼容小写(向后兼容,可能存在的旧数据) - "user" => crate::models::conversation::MessageRole::User, - "assistant" => crate::models::conversation::MessageRole::Assistant, - "system" => crate::models::conversation::MessageRole::System, - _ => return Err(rusqlite::Error::FromSqlConversionFailure( - 2, - rusqlite::types::Type::Text, - Box::new(std::io::Error::new( - std::io::ErrorKind::InvalidData, - format!("Invalid role: '{}'", role_str) - )) - )) - }; - - // 解析 sources JSON - let sources = sources_json - .and_then(|json| serde_json::from_str(&json).ok()); - - Ok(crate::models::conversation::Message { - id, - conversation_id, - role, - content, - timestamp: created_at, - token_count: 0, // Not stored in DB, will be recalculated if needed - context_chunks: Vec::new(), // Context not stored in DB - processing_time: None, // Not stored in DB - sources, // Load sources from DB - }) - })?; - - let mut messages = Vec::new(); - for row_result in rows { - match row_result { - Ok(msg) => messages.push(msg), - Err(e) => { - log::error!("解析消息行失败: {:?}", e); - return Err(anyhow::anyhow!("解析消息失败: {}", e)); - } - } - } - - log::info!("load_messages_by_conversation 完成: 加载了 {} 条消息", messages.len()); - Ok(messages) - } -} - -impl Drop for EmbeddedVectorDb { - fn drop(&mut self) { - log::warn!("🔥 [DB-DROP] 数据库连接即将关闭!"); - - // 在关闭前检查数据 - if let Ok(msg_count) = self.conn.query_row::( - "SELECT COUNT(*) FROM messages", - [], - |row| row.get(0) - ) { - log::warn!("🔥 [DB-DROP] 关闭时messages数量: {}", msg_count); - } - - // 执行最终checkpoint - if let Err(e) = self.conn.execute_batch("PRAGMA wal_checkpoint(TRUNCATE);") { - log::error!("🔥 [DB-DROP] 最终checkpoint失败: {}", e); - } else { - log::info!("🔥 [DB-DROP] 最终checkpoint完成"); - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_embedded_vector_db() -> Result<()> { - let mut db = EmbeddedVectorDb::new_in_memory()?; - - let doc = VectorDocument { - id: Uuid::new_v4().to_string(), - project_id: Uuid::new_v4().to_string(), - document_id: Uuid::new_v4().to_string(), - chunk_index: 0, - content: "测试文档内容".to_string(), - embedding: vec![0.1, 0.2, 0.3, 0.4, 0.5], - metadata: HashMap::new(), - }; - - db.add_document(doc.clone())?; - - let results = db.similarity_search( - &[0.1, 0.2, 0.3, 0.4, 0.5], - Some(&doc.project_id), - 10, - 0.0, - )?; - - assert_eq!(results.len(), 1); - assert_eq!(results[0].document.content, "测试文档内容"); - assert!((results[0].similarity - 1.0).abs() < 0.001); - - Ok(()) - } -} diff --git a/src-tauri/src/services/mod.rs b/src-tauri/src/services/mod.rs index b420b23..74ac059 100644 --- a/src-tauri/src/services/mod.rs +++ b/src-tauri/src/services/mod.rs @@ -3,14 +3,10 @@ pub mod conversation_service; pub mod dashscope_embedding_service; pub mod document_processor; pub mod document_service; -// pub mod embedded_vector_db; // Removed - replaced by seekdb_adapter pub mod llm_client; pub mod project_service; pub mod prompts; -pub mod python_env; -pub mod python_subprocess; pub mod seekdb_adapter; -pub mod seekdb_package; pub mod simple_embeddings; pub mod speech_service; pub mod vector_db; diff --git a/src-tauri/src/services/project_service.rs b/src-tauri/src/services/project_service.rs index 85ff14c..d85bd29 100644 --- a/src-tauri/src/services/project_service.rs +++ b/src-tauri/src/services/project_service.rs @@ -14,30 +14,23 @@ pub struct ProjectService { } impl ProjectService { - pub fn new(db: Arc>) -> Self { + pub async fn new_async(db: Arc>) -> Result { let mut service = Self { projects: HashMap::new(), db, }; - // 从数据库加载已有项目 - if let Err(e) = service.load_projects_from_db() { + if let Err(e) = service.load_projects_from_db().await { log::error!("加载项目失败: {}", e); } - service + Ok(service) } /// 从数据库加载项目到内存 - fn load_projects_from_db(&mut self) -> Result<()> { - let db = self.db.clone(); - let db_guard = tokio::task::block_in_place(|| { - tokio::runtime::Handle::current().block_on(async { - db.lock().await - }) - }); - - let projects = db_guard.load_all_projects()?; + async fn load_projects_from_db(&mut self) -> Result<()> { + let adapter = self.db.lock().await.clone(); + let projects = adapter.load_all_projects().await?; log::info!("从数据库加载了 {} 个项目", projects.len()); for project in projects { @@ -48,37 +41,25 @@ impl ProjectService { } /// 保存项目到数据库 - pub fn save_project_to_db(&self, project: &Project) -> Result<()> { - let db = self.db.clone(); - tokio::task::block_in_place(|| { - tokio::runtime::Handle::current().block_on(async { - let mut db_guard = db.lock().await; - db_guard.save_project(project) - }) - }) + pub async fn save_project_to_db(&self, project: &Project) -> Result<()> { + let adapter = self.db.lock().await.clone(); + adapter.save_project(project).await } /// 从数据库删除项目 - fn delete_project_from_db(&self, project_id: Uuid) -> Result<()> { - let db = self.db.clone(); - tokio::task::block_in_place(|| { - tokio::runtime::Handle::current().block_on(async { - let mut db_guard = db.lock().await; - db_guard.delete_project_by_id(&project_id.to_string())?; - db_guard.delete_project_documents(&project_id.to_string()) - .map(|_| ()) - }) - }) + async fn delete_project_from_db(&self, project_id: Uuid) -> Result<()> { + let adapter = self.db.lock().await.clone(); + adapter.delete_project_by_id(&project_id.to_string()).await?; + adapter.delete_project_documents(&project_id.to_string()).await?; + Ok(()) } - pub fn create_project(&mut self, name: String, description: Option) -> Result { + pub async fn create_project(&mut self, name: String, description: Option) -> Result { let project = Project::new(name, description)?; let project_id = project.id; - // 保存到数据库 - self.save_project_to_db(&project)?; + self.save_project_to_db(&project).await?; - // 保存到内存 self.projects.insert(project_id, project); Ok(project_id) } @@ -95,7 +76,7 @@ impl ProjectService { self.projects.values().collect() } - pub fn update_project( + pub async fn update_project( &mut self, project_id: Uuid, name: Option, @@ -115,21 +96,19 @@ impl ProjectService { } } - // 保存到数据库 if let Some(project) = self.projects.get(&project_id) { - self.save_project_to_db(project)?; + self.save_project_to_db(project).await?; } Ok(()) } - pub fn delete_project(&mut self, project_id: Uuid) -> Result<()> { + pub async fn delete_project(&mut self, project_id: Uuid) -> Result<()> { self.projects .remove(&project_id) .ok_or_else(|| anyhow!("Project not found: {}", project_id))?; - // 从数据库删除 - self.delete_project_from_db(project_id)?; + self.delete_project_from_db(project_id).await?; Ok(()) } @@ -167,7 +146,7 @@ impl ProjectService { }) } - pub fn update_project_status(&mut self, project_id: Uuid, status: crate::models::project::ProjectStatus) -> Result<()> { + pub async fn update_project_status(&mut self, project_id: Uuid, status: crate::models::project::ProjectStatus) -> Result<()> { { let project = self.projects .get_mut(&project_id) @@ -176,9 +155,8 @@ impl ProjectService { project.update_status(status); } - // 保存到数据库 if let Some(project) = self.projects.get(&project_id) { - self.save_project_to_db(project)?; + self.save_project_to_db(project).await?; } Ok(()) @@ -207,63 +185,71 @@ pub struct ProjectStats { #[cfg(test)] mod tests { use super::*; + use crate::services::seekdb_adapter::SeekDbAdapter; + use std::sync::Arc; + use tokio::sync::Mutex; + + async fn test_service() -> ProjectService { + let path = std::env::temp_dir().join(format!("mine_kb_test_proj_{}.db", std::process::id())); + let adapter = SeekDbAdapter::new_async(&path).await.unwrap(); + ProjectService::new_async(Arc::new(Mutex::new(adapter))).await.unwrap() + } - #[test] - fn test_project_service_creation() { - let service = ProjectService::new(); + #[tokio::test] + async fn test_project_service_creation() { + let service = test_service().await; assert_eq!(service.projects.len(), 0); } - #[test] - fn test_create_and_get_project() { - let mut service = ProjectService::new(); + #[tokio::test] + async fn test_create_and_get_project() { + let mut service = test_service().await; let project_id = service.create_project( "Test Project".to_string(), Some("A test project".to_string()), - ).unwrap(); + ).await.unwrap(); let project = service.get_project(project_id).unwrap(); assert_eq!(project.name, "Test Project"); assert_eq!(project.description, Some("A test project".to_string())); - assert!(!project.is_archived); } - #[test] - fn test_update_project() { - let mut service = ProjectService::new(); + #[tokio::test] + async fn test_update_project() { + let mut service = test_service().await; - let project_id = service.create_project("Original".to_string(), None).unwrap(); + let project_id = service.create_project("Original".to_string(), None).await.unwrap(); service.update_project( project_id, Some("Updated".to_string()), Some("Updated description".to_string()), - ).unwrap(); + ).await.unwrap(); let project = service.get_project(project_id).unwrap(); assert_eq!(project.name, "Updated"); assert_eq!(project.description, Some("Updated description".to_string())); } - #[test] - fn test_delete_project() { - let mut service = ProjectService::new(); + #[tokio::test] + async fn test_delete_project() { + let mut service = test_service().await; - let project_id = service.create_project("Test".to_string(), None).unwrap(); + let project_id = service.create_project("Test".to_string(), None).await.unwrap(); assert!(service.get_project(project_id).is_some()); - service.delete_project(project_id).unwrap(); + service.delete_project(project_id).await.unwrap(); assert!(service.get_project(project_id).is_none()); } - #[test] - fn test_find_projects_by_name() { - let mut service = ProjectService::new(); + #[tokio::test] + async fn test_find_projects_by_name() { + let mut service = test_service().await; - service.create_project("My Project".to_string(), None).unwrap(); - service.create_project("Another Project".to_string(), None).unwrap(); - service.create_project("Something Else".to_string(), None).unwrap(); + service.create_project("My Project".to_string(), None).await.unwrap(); + service.create_project("Another Project".to_string(), None).await.unwrap(); + service.create_project("Something Else".to_string(), None).await.unwrap(); let results = service.find_projects_by_name("project"); assert_eq!(results.len(), 2); @@ -272,20 +258,17 @@ mod tests { assert_eq!(results.len(), 1); } - #[test] - fn test_project_status_update() { - let mut service = ProjectService::new(); + #[tokio::test] + async fn test_project_status_update() { + let mut service = test_service().await; - let project_id = service.create_project("Test".to_string(), None).unwrap(); + let project_id = service.create_project("Test".to_string(), None).await.unwrap(); - // Initially Created status assert_eq!(service.get_project(project_id).unwrap().status, crate::models::project::ProjectStatus::Created); - // Update to Processing - service.update_project_status(project_id, crate::models::project::ProjectStatus::Processing).unwrap(); + service.update_project_status(project_id, crate::models::project::ProjectStatus::Processing).await.unwrap(); assert_eq!(service.get_project(project_id).unwrap().status, crate::models::project::ProjectStatus::Processing); - // Test filtering by status let processing_projects = service.list_projects_by_status(crate::models::project::ProjectStatus::Processing); assert_eq!(processing_projects.len(), 1); @@ -293,11 +276,11 @@ mod tests { assert_eq!(ready_projects.len(), 0); } - #[test] - fn test_project_stats() { - let mut service = ProjectService::new(); + #[tokio::test] + async fn test_project_stats() { + let mut service = test_service().await; - let project_id = service.create_project("Test".to_string(), None).unwrap(); + let project_id = service.create_project("Test".to_string(), None).await.unwrap(); let stats = service.get_project_stats(project_id).unwrap(); assert_eq!(stats.project_id, project_id); @@ -305,11 +288,11 @@ mod tests { assert_eq!(stats.conversation_count, 0); } - #[test] - fn test_project_exists() { - let mut service = ProjectService::new(); + #[tokio::test] + async fn test_project_exists() { + let mut service = test_service().await; - let project_id = service.create_project("Test".to_string(), None).unwrap(); + let project_id = service.create_project("Test".to_string(), None).await.unwrap(); assert!(service.project_exists(project_id)); let non_existent_id = Uuid::new_v4(); diff --git a/src-tauri/src/services/python_env.rs b/src-tauri/src/services/python_env.rs deleted file mode 100644 index 45bc6a5..0000000 --- a/src-tauri/src/services/python_env.rs +++ /dev/null @@ -1,226 +0,0 @@ -use anyhow::{anyhow, Result}; -use std::path::{Path, PathBuf}; -use std::process::Command; - -/// Python 虚拟环境管理器 -pub struct PythonEnv { - venv_dir: PathBuf, - python_executable: PathBuf, -} - -impl PythonEnv { - /// 创建新的 Python 环境管理器 - pub fn new(app_data_dir: &Path) -> Result { - let venv_dir = app_data_dir.join("venv"); - - // 确定虚拟环境中的 Python 可执行文件路径 - #[cfg(target_os = "windows")] - let python_executable = venv_dir.join("Scripts").join("python.exe"); - - #[cfg(not(target_os = "windows"))] - let python_executable = venv_dir.join("bin").join("python3"); - - Ok(Self { - venv_dir, - python_executable, - }) - } - - /// 检查虚拟环境是否存在 - pub fn venv_exists(&self) -> bool { - self.venv_dir.exists() && self.python_executable.exists() - } - - /// 确保虚拟环境存在,如果不存在则创建 - pub fn ensure_venv(&self) -> Result<()> { - if self.venv_exists() { - log::info!("✅ Python 虚拟环境已存在: {:?}", self.venv_dir); - return Ok(()); - } - - log::info!("🔧 创建 Python 虚拟环境..."); - log::info!(" 位置: {:?}", self.venv_dir); - - // 检查系统 Python 是否存在 - self.check_system_python()?; - - // 创建虚拟环境 - self.create_venv()?; - - // 验证虚拟环境 - if !self.venv_exists() { - return Err(anyhow!( - "虚拟环境创建失败\n\ - 预期位置: {:?}\n\ - Python 可执行文件: {:?}", - self.venv_dir, - self.python_executable - )); - } - - // 确保 pip 可用 - self.ensure_pip()?; - - log::info!("✅ Python 虚拟环境创建成功"); - Ok(()) - } - - /// 检查系统 Python 是否可用 - fn check_system_python(&self) -> Result<()> { - let output = Command::new("python3") - .arg("--version") - .output(); - - match output { - Ok(output) => { - if output.status.success() { - let version = String::from_utf8_lossy(&output.stdout); - log::info!(" 系统 Python: {}", version.trim()); - Ok(()) - } else { - Err(anyhow!("Python3 未正确安装")) - } - } - Err(_) => { - Err(anyhow!( - "未找到 Python3\n\n\ - 请先安装 Python 3.8 或更高版本:\n\ - - Ubuntu/Debian: sudo apt install python3 python3-venv\n\ - - macOS: brew install python3\n\ - - Windows: 从 python.org 下载安装" - )) - } - } - } - - /// 创建虚拟环境 - fn create_venv(&self) -> Result<()> { - log::info!(" 执行: python3 -m venv {:?}", self.venv_dir); - - let output = Command::new("python3") - .arg("-m") - .arg("venv") - .arg(&self.venv_dir) - .output() - .map_err(|e| anyhow!("创建虚拟环境失败: {}", e))?; - - if !output.status.success() { - let stderr = String::from_utf8_lossy(&output.stderr); - let stdout = String::from_utf8_lossy(&output.stdout); - - // 检查是否是 python3-venv 缺失的问题 - let is_venv_missing = stderr.contains("ensurepip is not available") - || stderr.contains("python3-venv"); - - let error_msg = if is_venv_missing { - format!( - "虚拟环境创建失败:缺少 python3-venv 模块\n\n\ - 请先安装 python3-venv:\n\ - Ubuntu/Debian: sudo apt install python3-venv\n\ - 或: sudo apt install python3.10-venv\n\n\ - 详细错误信息:\n{}", - stderr.trim() - ) - } else { - format!( - "虚拟环境创建失败(退出码: {:?})\n\n\ - 标准错误输出:\n{}\n\ - 标准输出:\n{}", - output.status.code(), - stderr.trim(), - stdout.trim() - ) - }; - - return Err(anyhow!(error_msg)); - } - - Ok(()) - } - - /// 确保 pip 可用 - fn ensure_pip(&self) -> Result<()> { - log::info!("🔍 检查 pip 是否可用..."); - - // 尝试运行 python -m pip --version - let output = Command::new(&self.python_executable) - .arg("-m") - .arg("pip") - .arg("--version") - .output(); - - match output { - Ok(output) if output.status.success() => { - let version = String::from_utf8_lossy(&output.stdout); - log::info!("✅ pip 已可用: {}", version.trim()); - Ok(()) - } - _ => { - log::warn!("⚠️ pip 不可用,尝试使用 ensurepip 安装..."); - - // 使用 ensurepip 模块安装 pip - let install_output = Command::new(&self.python_executable) - .arg("-m") - .arg("ensurepip") - .arg("--default-pip") - .output() - .map_err(|e| anyhow!("安装 pip 失败: {}", e))?; - - if !install_output.status.success() { - let stderr = String::from_utf8_lossy(&install_output.stderr); - return Err(anyhow!( - "pip 安装失败\n\n\ - 错误信息:\n{}\n\n\ - 请尝试手动安装:\n\ - 1. {:?} -m ensurepip --default-pip\n\ - 或\n\ - 2. curl https://bootstrap.pypa.io/get-pip.py | {:?}", - stderr.trim(), - self.python_executable, - self.python_executable - )); - } - - log::info!("✅ pip 安装成功"); - Ok(()) - } - } - } - - /// 获取虚拟环境的 Python 可执行文件路径 - pub fn get_python_executable(&self) -> &Path { - &self.python_executable - } - - /// 获取虚拟环境的 pip 可执行文件路径 - pub fn get_pip_executable(&self) -> PathBuf { - #[cfg(target_os = "windows")] - let pip = self.venv_dir.join("Scripts").join("pip.exe"); - - #[cfg(not(target_os = "windows"))] - let pip = self.venv_dir.join("bin").join("pip3"); - - pip - } - - /// 获取虚拟环境目录 - pub fn get_venv_dir(&self) -> &Path { - &self.venv_dir - } -} - -#[cfg(test)] -mod tests { - use super::*; - use std::env; - - #[test] - fn test_python_env_creation() { - let temp_dir = env::temp_dir().join("test_python_env"); - let python_env = PythonEnv::new(&temp_dir).unwrap(); - - assert!(python_env.get_venv_dir().to_string_lossy().contains("venv")); - assert!(python_env.get_python_executable().to_string_lossy().contains("python")); - } -} - diff --git a/src-tauri/src/services/python_subprocess.rs b/src-tauri/src/services/python_subprocess.rs deleted file mode 100644 index eb55e3d..0000000 --- a/src-tauri/src/services/python_subprocess.rs +++ /dev/null @@ -1,290 +0,0 @@ -use anyhow::{anyhow, Result}; -use serde::{Deserialize, Serialize}; -use serde_json::Value; -use std::io::{BufRead, BufReader, Write}; -use std::process::{Child, ChildStdin, ChildStdout, Command, Stdio}; -use std::sync::{Arc, Mutex}; -use std::thread; -use std::time::Duration; - -/// Request sent to Python subprocess -#[derive(Debug, Serialize)] -struct Request { - command: String, - params: Value, -} - -/// Response from Python subprocess -#[derive(Debug, Deserialize)] -struct Response { - status: String, - #[serde(default)] - data: Option, - #[serde(default)] - error: Option, - #[serde(default)] - details: Option, -} - -/// Python subprocess manager for SeekDB operations -#[derive(Debug)] -pub struct PythonSubprocess { - child: Arc>>, - stdin: Arc>>, - stdout: Arc>>>, - script_path: String, - python_executable: String, -} - -impl PythonSubprocess { - /// Create and start a new Python subprocess - pub fn new(script_path: &str) -> Result { - Self::new_with_python(script_path, "python3") - } - - /// Create and start a new Python subprocess with custom Python executable - pub fn new_with_python(script_path: &str, python_executable: &str) -> Result { - log::info!("🐍 Starting Python subprocess: {}", script_path); - log::info!(" Python 可执行文件: {}", python_executable); - - let mut command = Command::new(python_executable); - command - .arg(script_path) - .stdin(Stdio::piped()) - .stdout(Stdio::piped()) - .stderr(Stdio::inherit()); // Log stderr to our log - - let mut child = command - .spawn() - .map_err(|e| anyhow!("Failed to start Python process: {}", e))?; - - let stdin = child.stdin.take().ok_or_else(|| anyhow!("Failed to open stdin"))?; - let stdout = child.stdout.take().ok_or_else(|| anyhow!("Failed to open stdout"))?; - let stdout = BufReader::new(stdout); - - log::info!("✅ Python subprocess started successfully"); - - Ok(Self { - child: Arc::new(Mutex::new(Some(child))), - stdin: Arc::new(Mutex::new(Some(stdin))), - stdout: Arc::new(Mutex::new(Some(stdout))), - script_path: script_path.to_string(), - python_executable: python_executable.to_string(), - }) - } - - /// Send a command and wait for response - pub fn send_command(&self, command: &str, params: Value) -> Result { - let request = Request { - command: command.to_string(), - params: params.clone(), - }; - - // Serialize request to JSON - let request_json = serde_json::to_string(&request)?; - - log::debug!("📤 Sending command: {} (params: {})", command, - serde_json::to_string(¶ms).unwrap_or_default()); - - // Write to stdin - { - let mut stdin_guard = self.stdin.lock().unwrap(); - let stdin = stdin_guard.as_mut().ok_or_else(|| anyhow!("Stdin not available"))?; - - writeln!(stdin, "{}", request_json)?; - stdin.flush()?; - } - - // Read response from stdout - let response_line = { - let mut stdout_guard = self.stdout.lock().unwrap(); - let stdout = stdout_guard.as_mut().ok_or_else(|| anyhow!("Stdout not available"))?; - - let mut line = String::new(); - stdout.read_line(&mut line)?; - line - }; - - log::debug!("📥 Received response: {}", response_line.trim()); - - // Parse response - let response: Response = serde_json::from_str(&response_line) - .map_err(|e| anyhow!("Failed to parse response: {}", e))?; - - // Check response status - if response.status == "success" { - Ok(response.data.unwrap_or(Value::Null)) - } else { - let error_msg = response.error.unwrap_or_else(|| "Unknown error".to_string()); - let details = response.details.unwrap_or_default(); - Err(anyhow!("Python subprocess error: {} - {}", error_msg, details)) - } - } - - /// Initialize SeekDB database - pub fn init_db(&self, db_path: &str, db_name: &str) -> Result<()> { - log::info!("🔧 Initializing SeekDB: path={}, name={}", db_path, db_name); - - let params = serde_json::json!({ - "db_path": db_path, - "db_name": db_name - }); - - self.send_command("init", params)?; - log::info!("✅ SeekDB initialized"); - Ok(()) - } - - /// Execute SQL statement (INSERT, UPDATE, DELETE, CREATE, etc.) - pub fn execute(&self, sql: &str, values: Vec) -> Result { - let params = serde_json::json!({ - "sql": sql, - "values": values - }); - - let response = self.send_command("execute", params)?; - let rows_affected = response - .get("rows_affected") - .and_then(|v| v.as_i64()) - .unwrap_or(0); - - Ok(rows_affected) - } - - /// Execute SELECT query and return all rows - pub fn query(&self, sql: &str, values: Vec) -> Result>> { - let params = serde_json::json!({ - "sql": sql, - "values": values - }); - - let response = self.send_command("query", params)?; - let rows = response - .get("rows") - .and_then(|v| v.as_array()) - .ok_or_else(|| anyhow!("Invalid query response"))?; - - let result: Vec> = rows - .iter() - .filter_map(|row| row.as_array().map(|r| r.clone())) - .collect(); - - Ok(result) - } - - /// Execute SELECT query and return first row - pub fn query_one(&self, sql: &str, values: Vec) -> Result>> { - let params = serde_json::json!({ - "sql": sql, - "values": values - }); - - let response = self.send_command("query_one", params)?; - let row = response.get("row"); - - match row { - Some(Value::Array(arr)) => Ok(Some(arr.clone())), - Some(Value::Null) | None => Ok(None), - _ => Err(anyhow!("Invalid query_one response")), - } - } - - /// Commit current transaction - pub fn commit(&self) -> Result<()> { - self.send_command("commit", Value::Null)?; - Ok(()) - } - - /// Rollback current transaction - pub fn rollback(&self) -> Result<()> { - self.send_command("rollback", Value::Null)?; - Ok(()) - } - - /// Ping to check if subprocess is alive - pub fn ping(&self) -> Result<()> { - self.send_command("ping", Value::Null)?; - Ok(()) - } - - /// Check if subprocess is still running - pub fn is_alive(&self) -> bool { - let child_guard = self.child.lock().unwrap(); - if let Some(_child) = child_guard.as_ref() { - // Try to ping - drop(child_guard); - self.ping().is_ok() - } else { - false - } - } - - /// Restart the subprocess if it has died - pub fn restart_if_needed(&mut self) -> Result<()> { - if !self.is_alive() { - log::warn!("⚠️ Python subprocess is not responding, restarting..."); - self.shutdown(); - - // Create new subprocess with same configuration - let python_executable = self.python_executable.clone(); - let new_subprocess = Self::new_with_python(&self.script_path, &python_executable)?; - *self = new_subprocess; - - log::info!("✅ Python subprocess restarted"); - } - Ok(()) - } - - /// Gracefully shutdown the subprocess - pub fn shutdown(&mut self) { - log::info!("🛑 Shutting down Python subprocess..."); - - // Close stdin to signal subprocess to exit - { - let mut stdin_guard = self.stdin.lock().unwrap(); - *stdin_guard = None; - } - - // Wait for child process to exit (with timeout) - { - let mut child_guard = self.child.lock().unwrap(); - if let Some(mut child) = child_guard.take() { - thread::sleep(Duration::from_millis(500)); - - match child.try_wait() { - Ok(Some(status)) => { - log::info!("Python subprocess exited with status: {}", status); - } - Ok(None) => { - log::warn!("Python subprocess still running, killing..."); - let _ = child.kill(); - let _ = child.wait(); - } - Err(e) => { - log::error!("Error waiting for subprocess: {}", e); - } - } - } - } - - log::info!("✅ Python subprocess shutdown complete"); - } -} - -impl Drop for PythonSubprocess { - fn drop(&mut self) { - self.shutdown(); - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_subprocess_creation() { - // This test would require the actual Python script to exist - // Skipping in unit tests, should be tested in integration tests - } -} - diff --git a/src-tauri/src/services/seekdb_adapter.rs b/src-tauri/src/services/seekdb_adapter.rs index 9f95dff..90e121a 100644 --- a/src-tauri/src/services/seekdb_adapter.rs +++ b/src-tauri/src/services/seekdb_adapter.rs @@ -1,11 +1,106 @@ use anyhow::{anyhow, Result}; +use async_trait::async_trait; +use seekdb_rs::{ + Client, DeleteQuery, DistanceMetric, EmbeddedDatabase, Filter, HnswConfig, HybridKnn, + IncludeField, QueryParam, QueryResult, row_to_json_values, SeekDbError, UpsertBatch, +}; +use seekdb_rs::EmbeddingFunction; use serde::{Deserialize, Serialize}; -use serde_json::Value; +use serde_json::{json, Value}; use std::collections::HashMap; use std::path::Path; -use std::sync::{Arc, Mutex}; +use std::sync::Arc; +use std::time::Instant; -use super::python_subprocess::PythonSubprocess; +const VECTOR_COLLECTION_NAME: &str = "vector_documents"; +const VECTOR_DIMENSION: u32 = 1536; + +fn embedding_f64_to_f32(v: &[f64]) -> Vec { + v.iter().map(|&x| x as f32).collect() +} + +/// 将文档/查询文本转为向量,委托 DashScope 服务,用于混合检索时对 query 文本做向量化。 +pub struct DashScopeEmbeddingFunction { + pub service: Arc, +} + +#[async_trait] +impl EmbeddingFunction for DashScopeEmbeddingFunction { + async fn embed_documents(&self, docs: &[String]) -> std::result::Result>, SeekDbError> { + if docs.is_empty() { + return Ok(Vec::new()); + } + let texts: Vec = docs.to_vec(); + let embeddings = self + .service + .embed_batch(&texts) + .await + .map_err(|e| SeekDbError::Embedding(e.to_string()))?; + Ok(embeddings + .into_iter() + .map(|v: Vec| v.into_iter().map(|x| x as f32).collect::>()) + .collect::>>()) + } + fn dimension(&self) -> usize { + self.service.embedding_dim() + } +} + +fn doc_to_meta(doc: &VectorDocument) -> Value { + let mut m = serde_json::Map::new(); + m.insert("project_id".to_string(), json!(doc.project_id)); + m.insert("document_id".to_string(), json!(doc.document_id)); + m.insert("chunk_index".to_string(), json!(doc.chunk_index)); + for (k, v) in &doc.metadata { + m.insert(k.clone(), Value::String(v.clone())); + } + Value::Object(m) +} + +fn meta_to_doc_meta(meta: &Value) -> HashMap { + let mut out = HashMap::new(); + if let Some(obj) = meta.as_object() { + for (k, v) in obj { + if k == "project_id" || k == "document_id" || k == "chunk_index" { + continue; + } + if let Some(s) = v.as_str() { + out.insert(k.clone(), s.to_string()); + } + } + } + out +} + +fn query_result_to_search_results(qr: QueryResult, limit: usize) -> Vec { + let ids = qr.ids.get(0).map(|v| v.as_slice()).unwrap_or(&[]); + let docs = qr.documents.as_ref().and_then(|d| d.get(0)).map(|v| v.as_slice()).unwrap_or(&[]); + let metas = qr.metadatas.as_ref().and_then(|m| m.get(0)).map(|v| v.as_slice()).unwrap_or(&[]); + let dists = qr.distances.as_ref().and_then(|d| d.get(0)).map(|v| v.as_slice()).unwrap_or(&[]); + let mut results = Vec::new(); + for (i, id) in ids.iter().take(limit).enumerate() { + let content = docs.get(i).cloned().unwrap_or_default(); + let meta = metas.get(i).cloned().unwrap_or(json!({})); + let project_id = meta.get("project_id").and_then(|v| v.as_str()).unwrap_or("").to_string(); + let document_id = meta.get("document_id").and_then(|v| v.as_str()).unwrap_or("").to_string(); + let chunk_index = meta.get("chunk_index").and_then(|v| v.as_i64()).unwrap_or(0) as i32; + let distance = dists.get(i).copied().unwrap_or(0.0); + let similarity = 1.0 / (1.0 + distance as f64); + results.push(SearchResult { + document: VectorDocument { + id: id.clone(), + project_id, + document_id, + chunk_index, + content, + embedding: vec![], + metadata: meta_to_doc_meta(&meta), + }, + similarity, + }); + } + results +} /// Vector document structure (same as before) #[derive(Debug, Clone, Serialize, Deserialize)] @@ -26,110 +121,150 @@ pub struct SearchResult { pub similarity: f64, } -/// SeekDB adapter - manages database operations through Python subprocess -#[derive(Clone, Debug)] +#[derive(Clone)] pub struct SeekDbAdapter { - subprocess: Arc>, + client: Client, + hnsw_config: HnswConfig, db_path: String, db_name: String, } -impl SeekDbAdapter { - /// Create new SeekDB adapter instance - pub fn new>(db_path: P) -> Result { - Self::new_with_python(db_path, "python3") +impl std::fmt::Debug for SeekDbAdapter { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("SeekDbAdapter") + .field("db_path", &self.db_path) + .field("db_name", &self.db_name) + .finish_non_exhaustive() + } +} + +/// 将 JSON 值转为 SQL 参数(用于参数化查询)。 +fn value_to_query_param(v: &Value) -> QueryParam { + QueryParam::from_metadata_value(v) +} + +fn seekdb_err(e: seekdb_rs::SeekDbError) -> anyhow::Error { + anyhow!("SeekDB: {}", e) +} + +/// 解析 DB 返回的 created_at,兼容两种格式,避免解析失败导致顺序错乱。 +fn parse_datetime_from_db(s: &str) -> chrono::DateTime { + use chrono::{DateTime, NaiveDateTime, Utc}; + if s.is_empty() { + return Utc::now(); } - - /// Create new SeekDB adapter instance with custom Python executable - pub fn new_with_python>(db_path: P, python_executable: &str) -> Result { - let db_path_str = db_path.as_ref().display().to_string(); - log::info!("🔗 [NEW-DB] Opening SeekDB: {}", db_path_str); - - // Get absolute path for database directory - let db_dir = if db_path.as_ref().is_absolute() { - db_path.as_ref().parent().unwrap().to_path_buf() + if let Ok(dt) = DateTime::parse_from_rfc3339(s) { + return dt.with_timezone(&Utc); + } + if let Ok(naive) = NaiveDateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S") { + return DateTime::from_naive_utc_and_offset(naive, Utc); + } + Utc::now() +} + +impl SeekDbAdapter { + /// 使用 `db_path` 作为 SeekDB 实例目录;数据集中在该目录下。 + /// 异步构建,需在 async 上下文中调用。 + pub async fn new_async>(db_path: P) -> Result { + let db_path_ref = db_path.as_ref(); + let db_dir = if db_path_ref.is_absolute() { + db_path_ref.to_path_buf() } else { - std::env::current_dir()?.join(db_path.as_ref()).parent().unwrap().to_path_buf() + std::env::current_dir()?.join(db_path_ref) }; - - // Get the database file name (without extension) and normalize it - // Replace hyphens with underscores for SQL compatibility - let db_name = db_path.as_ref() - .file_stem() + let db_path_str = db_dir.display().to_string(); + let db_dir_str = db_path_str.clone(); + let db_name = db_dir + .file_name() .and_then(|s| s.to_str()) - .unwrap_or("mine_kb") - .replace("-", "_"); // Normalize: mine-kb -> mine_kb - - log::info!("🔗 [NEW-DB] Database directory: {:?}", db_dir); + .map(|s| s.trim_end_matches(".db").replace('-', "_")) + .filter(|s| !s.is_empty()) + .unwrap_or_else(|| "mine_kb".to_string()); + + log::info!("🔗 [NEW-DB] Opening embedded SeekDB: {}", db_path_str); log::info!("🔗 [NEW-DB] Database name: {}", db_name); - log::info!("🔗 [NEW-DB] Python executable: {}", python_executable); - - // Determine Python script path with multiple fallbacks - let script_path = std::env::current_exe() - .ok() - .and_then(|exe| exe.parent().map(|p| p.join("python/seekdb_bridge.py"))) - .filter(|p| p.exists()) - .or_else(|| { - // Try to find script relative to current directory - if let Ok(cwd) = std::env::current_dir() { - log::debug!("🔍 Current directory: {:?}", cwd); - - // Try multiple possible locations - let candidates = vec![ - cwd.join("python/seekdb_bridge.py"), // If in src-tauri - cwd.join("src-tauri/python/seekdb_bridge.py"), // If in project root - cwd.parent()?.join("python/seekdb_bridge.py"), // If in src-tauri/src - ]; - - for candidate in candidates { - log::debug!("🔍 Checking: {:?}", candidate); - if candidate.exists() { - log::info!("✅ Found script at: {:?}", candidate); - return Some(candidate); - } - } - } - None - }) - .unwrap_or_else(|| { - // Last resort: use relative path and hope for the best - log::warn!("⚠️ Could not find seekdb_bridge.py in expected locations"); - std::path::PathBuf::from("src-tauri/python/seekdb_bridge.py") - }); - - log::info!("🔗 [NEW-DB] Python script: {:?}", script_path); - - // Start Python subprocess with specified Python executable - let subprocess = PythonSubprocess::new_with_python( - script_path.to_str().unwrap(), - python_executable - )?; - - // Initialize database - use the actual db_path passed to the function - subprocess.init_db(&db_path_str, &db_name)?; - + + let t_open = Instant::now(); + EmbeddedDatabase::open(&db_dir).map_err(|e| anyhow!("SeekDB open: {}", e))?; + log::info!("🔗 [NEW-DB] Open data dir took {:?}", t_open.elapsed()); + + let t_build = Instant::now(); + let client = Client::builder() + .path(&db_dir_str) + .database(&db_name) + .build() + .await + .map_err(seekdb_err)?; + log::info!("🔗 [NEW-DB] Build client took {:?}", t_build.elapsed()); + + let hnsw_config = HnswConfig::new(VECTOR_DIMENSION, DistanceMetric::L2) + .map_err(|e| anyhow!("HnswConfig: {}", e))?; + let adapter = Self { - subprocess: Arc::new(Mutex::new(subprocess)), - db_path: db_path_str.clone(), - db_name: db_name.clone(), + client, + hnsw_config, + db_path: db_path_str, + db_name, }; - - // Initialize schema - adapter.initialize_schema()?; - - log::info!("🔗 [NEW-DB] SeekDB adapter initialized successfully"); - + let t_schema = Instant::now(); + adapter.initialize_schema().await?; + log::info!("🔗 [NEW-DB] Init schema took {:?}", t_schema.elapsed()); + log::info!("🔗 [NEW-DB] Database ready"); Ok(adapter) } - - /// Initialize database schema - fn initialize_schema(&self) -> Result<()> { - log::info!("📋 Initializing database schema..."); - - let subprocess = self.subprocess.lock().unwrap(); - - // Create projects table - subprocess.execute( + + async fn execute(&self, sql: &str, params: Vec) -> Result<()> { + let params_q: Vec = params.iter().map(value_to_query_param).collect(); + let params_ref = if params_q.is_empty() { + None + } else { + Some(params_q.as_slice()) + }; + self.client.execute(sql, params_ref).await.map_err(seekdb_err) + } + + async fn execute_no_params(&self, sql: &str) -> Result<()> { + self.client.execute(sql, None).await.map_err(seekdb_err) + } + + async fn query(&self, sql: &str, params: Vec) -> Result>> { + let params_q: Vec = params.iter().map(value_to_query_param).collect(); + let params_ref = if params_q.is_empty() { + None + } else { + Some(params_q.as_slice()) + }; + let max_cols = 64usize; + let rows = self.client.fetch_all(sql, params_ref).await.map_err(seekdb_err)?; + let converted: Vec> = rows + .into_iter() + .map(|r| row_to_json_values(r.as_ref(), max_cols)) + .collect(); + Ok(converted) + } + + async fn query_one(&self, sql: &str, params: Vec) -> Result>> { + let rows = self.query(sql, params).await?; + Ok(rows.into_iter().next()) + } + + async fn commit(&self) -> Result<()> { + self.execute_no_params("COMMIT").await + } + + /// Initialize database schema(每步打耗时日志,便于排查慢的根因) + async fn initialize_schema(&self) -> Result<()> { + log::info!("📋 Initializing schema..."); + + let run = |name: String, sql: String| async move { + let t = Instant::now(); + self.execute_no_params(&sql).await?; + log::info!("📋 [schema] {} took {:?}", name, t.elapsed()); + Ok::<(), anyhow::Error>(()) + }; + + run( + "CREATE TABLE projects".to_string(), "CREATE TABLE IF NOT EXISTS projects ( id VARCHAR(36) PRIMARY KEY, name TEXT NOT NULL, @@ -138,41 +273,12 @@ impl SeekDbAdapter { document_count INTEGER DEFAULT 0, created_at DATETIME NOT NULL, updated_at DATETIME NOT NULL - )", - vec![], - )?; - - // Create vector_documents table with vector index and fulltext index for hybrid search - subprocess.execute( - "CREATE TABLE IF NOT EXISTS vector_documents ( - id VARCHAR(36) PRIMARY KEY, - project_id VARCHAR(36) NOT NULL, - document_id VARCHAR(36) NOT NULL, - chunk_index INTEGER NOT NULL, - content TEXT NOT NULL, - embedding vector(1536), - metadata TEXT NOT NULL, - created_at DATETIME DEFAULT CURRENT_TIMESTAMP, - UNIQUE(document_id, chunk_index), - VECTOR INDEX idx_embedding(embedding) WITH (distance=l2, type=hnsw, lib=vsag), - FULLTEXT idx_content(content) - )", - vec![], - )?; - - // Create regular indexes - subprocess.execute( - "CREATE INDEX IF NOT EXISTS idx_project_id ON vector_documents(project_id)", - vec![], - )?; - - subprocess.execute( - "CREATE INDEX IF NOT EXISTS idx_document_id ON vector_documents(document_id)", - vec![], - )?; - - // Create conversations table - subprocess.execute( + )".to_string(), + ) + .await?; + + run( + "CREATE TABLE conversations".to_string(), "CREATE TABLE IF NOT EXISTS conversations ( id VARCHAR(36) PRIMARY KEY, project_id VARCHAR(36) NOT NULL, @@ -180,13 +286,14 @@ impl SeekDbAdapter { created_at DATETIME NOT NULL, updated_at DATETIME NOT NULL, message_count INTEGER DEFAULT 0, - FOREIGN KEY (project_id) REFERENCES projects(id) ON DELETE CASCADE - )", - vec![], - )?; - - // Create messages table - subprocess.execute( + FOREIGN KEY (project_id) REFERENCES projects(id) ON DELETE CASCADE, + KEY idx_conversation_project_id(project_id) + )".to_string(), + ) + .await?; + + run( + "CREATE TABLE messages".to_string(), "CREATE TABLE IF NOT EXISTS messages ( id VARCHAR(36) PRIMARY KEY, conversation_id VARCHAR(36) NOT NULL, @@ -194,482 +301,345 @@ impl SeekDbAdapter { content TEXT NOT NULL, created_at DATETIME NOT NULL, sources TEXT, - FOREIGN KEY (conversation_id) REFERENCES conversations(id) ON DELETE CASCADE - )", - vec![], - )?; - - // Create conversation indexes - subprocess.execute( - "CREATE INDEX IF NOT EXISTS idx_conversation_project_id ON conversations(project_id)", - vec![], - )?; - - subprocess.execute( - "CREATE INDEX IF NOT EXISTS idx_message_conversation_id ON messages(conversation_id)", - vec![], - )?; - - // Commit schema changes - subprocess.commit()?; - + FOREIGN KEY (conversation_id) REFERENCES conversations(id) ON DELETE CASCADE, + KEY idx_message_conversation_id(conversation_id) + )".to_string(), + ) + .await?; + + let t_commit = Instant::now(); + self.commit().await?; + log::info!("📋 [schema] COMMIT took {:?}", t_commit.elapsed()); log::info!("✅ Database schema initialized"); Ok(()) } - - /// Add a single vector document - pub fn add_document(&mut self, doc: VectorDocument) -> Result<()> { - let subprocess = self.subprocess.lock().unwrap(); - - let metadata_json = serde_json::to_string(&doc.metadata)?; - - // Convert embedding to JSON array string format for SeekDB - let embedding_str = format!("[{}]", - doc.embedding.iter() - .map(|v| v.to_string()) - .collect::>() - .join(",") - ); - - subprocess.execute( - "INSERT INTO vector_documents - (id, project_id, document_id, chunk_index, content, embedding, metadata, created_at) - VALUES (?, ?, ?, ?, ?, ?, ?, NOW()) - ON DUPLICATE KEY UPDATE - content = VALUES(content), - embedding = VALUES(embedding), - metadata = VALUES(metadata)", - vec![ - Value::String(doc.id), - Value::String(doc.project_id), - Value::String(doc.document_id), - Value::Number(doc.chunk_index.into()), - Value::String(doc.content), - Value::String(embedding_str), - Value::String(metadata_json), - ], - )?; - - Ok(()) + + /// Add a single vector document. + pub async fn add_document(&self, doc: VectorDocument) -> Result<()> { + let coll = self + .client + .get_or_create_collection::( + VECTOR_COLLECTION_NAME, + Some(self.hnsw_config.clone()), + None, + ) + .await + .map_err(seekdb_err)?; + let id = doc.id.clone(); + let emb = embedding_f64_to_f32(&doc.embedding); + let meta = doc_to_meta(&doc); + let content = doc.content.clone(); + coll.upsert_batch( + UpsertBatch::new(&[id]) + .embeddings(&[emb]) + .metadatas(&[meta]) + .documents(&[content]), + ) + .await + .map_err(seekdb_err) } - - /// Add multiple vector documents in a transaction - pub fn add_documents(&mut self, docs: Vec) -> Result<()> { - let subprocess = self.subprocess.lock().unwrap(); - - for doc in docs { - let metadata_json = serde_json::to_string(&doc.metadata)?; - let embedding_str = format!("[{}]", - doc.embedding.iter() - .map(|v| v.to_string()) - .collect::>() - .join(",") - ); - - subprocess.execute( - "INSERT INTO vector_documents - (id, project_id, document_id, chunk_index, content, embedding, metadata, created_at) - VALUES (?, ?, ?, ?, ?, ?, ?, NOW()) - ON DUPLICATE KEY UPDATE - content = VALUES(content), - embedding = VALUES(embedding), - metadata = VALUES(metadata)", - vec![ - Value::String(doc.id), - Value::String(doc.project_id), - Value::String(doc.document_id), - Value::Number(doc.chunk_index.into()), - Value::String(doc.content), - Value::String(embedding_str), - Value::String(metadata_json), - ], - )?; + + /// Add multiple vector documents (batch upsert). + pub async fn add_documents(&self, docs: Vec) -> Result<()> { + if docs.is_empty() { + return Ok(()); } - - subprocess.commit()?; - Ok(()) + let coll = self + .client + .get_or_create_collection::( + VECTOR_COLLECTION_NAME, + Some(self.hnsw_config.clone()), + None, + ) + .await + .map_err(seekdb_err)?; + let ids: Vec = docs.iter().map(|d| d.id.clone()).collect(); + let embeddings: Vec> = + docs.iter().map(|d| embedding_f64_to_f32(&d.embedding)).collect(); + let metadatas: Vec = docs.iter().map(doc_to_meta).collect(); + let contents: Vec = docs.iter().map(|d| d.content.clone()).collect(); + coll.upsert_batch( + UpsertBatch::new(&ids) + .embeddings(&embeddings) + .metadatas(&metadatas) + .documents(&contents), + ) + .await + .map_err(seekdb_err) } - - /// Hybrid search using SeekDB's native hybrid search (vector + fulltext) - pub fn hybrid_search( + + /// 向量 KNN 检索,可按 project_id 过滤。 + pub async fn hybrid_search( &self, - query_text: &str, + _query_text: &str, query_embedding: &[f64], project_id: Option<&str>, limit: usize, - semantic_boost: f64, + _semantic_boost: f64, ) -> Result> { - log::info!("🔍 [HYBRID-SEARCH] 开始混合检索"); - log::info!(" 查询文本: {}", query_text); - log::info!(" 向量维度: {}", query_embedding.len()); - log::info!(" 项目ID: {:?}", project_id); - log::info!(" 返回数量: {}", limit); - log::info!(" 语义权重: {}", semantic_boost); - - let subprocess = self.subprocess.lock().unwrap(); - - // Convert query embedding to JSON array - let embedding_json = format!("[{}]", - query_embedding.iter() - .map(|v| v.to_string()) - .collect::>() - .join(",") - ); - - // Build hybrid search query using dbms_hybrid_search.search() - // Reference: docs/seekdb.md section 3.3 - let search_param = if let Some(pid) = project_id { - format!(r#"{{ - "query": {{ - "bool": {{ - "must": [ - {{"match": {{"content": "{}"}}}} - ] - }} - }}, - "knn": {{ - "field": "embedding", - "k": {}, - "num_candidates": {}, - "query_vector": {}, - "boost": {} - }}, - "filter": {{ - "term": {{"project_id": "{}"}} - }}, - "_source": ["id", "project_id", "document_id", "chunk_index", "content", "metadata", "_keyword_score", "_semantic_score"] - }}"#, - query_text.replace('"', "\\\""), - limit, - limit * 2, - embedding_json, - semantic_boost, - pid + log::info!("🔍 [HYBRID-SEARCH] 向量 KNN 检索"); + let coll = self + .client + .get_or_create_collection::( + VECTOR_COLLECTION_NAME, + Some(self.hnsw_config.clone()), + None, ) - } else { - format!(r#"{{ - "query": {{ - "bool": {{ - "must": [ - {{"match": {{"content": "{}"}}}} - ] - }} - }}, - "knn": {{ - "field": "embedding", - "k": {}, - "num_candidates": {}, - "query_vector": {}, - "boost": {} - }}, - "_source": ["id", "project_id", "document_id", "chunk_index", "content", "metadata", "_keyword_score", "_semantic_score"] - }}"#, - query_text.replace('"', "\\\""), - limit, - limit * 2, - embedding_json, - semantic_boost + .await + .map_err(seekdb_err)?; + let query_emb = embedding_f64_to_f32(query_embedding); + let where_meta = project_id + .map(|pid| Filter::Eq { field: "project_id".to_string(), value: json!(pid) }); + let limit_u = limit as u32; + let qr = coll + .query_embeddings( + &[query_emb], + limit_u, + where_meta.as_ref(), + None, + Some(&[IncludeField::Documents, IncludeField::Metadatas]), ) + .await + .map_err(seekdb_err)?; + let results = query_result_to_search_results(qr, limit); + log::info!("✅ [HYBRID-SEARCH] 返回 {} 个结果", results.len()); + Ok(results) + } + + /// 混合检索(关键词+向量):用 query 文本直接检索,内部对 query 做向量化并执行混合搜索。 + pub async fn hybrid_search_by_text( + &self, + embedding_service: Arc, + project_id: Option<&str>, + query_text: &str, + limit: usize, + ) -> Result> { + log::info!("🔍 [HYBRID-BY-TEXT] 混合检索(关键词+向量)"); + let ef = DashScopeEmbeddingFunction { service: embedding_service }; + let coll = self + .client + .get_or_create_collection::( + VECTOR_COLLECTION_NAME, + Some(self.hnsw_config.clone()), + Some(ef), + ) + .await + .map_err(seekdb_err)?; + let query_text = query_text.to_string(); + let where_meta = project_id + .map(|pid| Filter::Eq { field: "project_id".to_string(), value: json!(pid) }); + let limit_u = limit as u32; + let knn = HybridKnn { + query_texts: Some(vec![query_text]), + query_embeddings: None, + where_meta, + n_results: Some(limit_u), }; - - log::debug!("混合搜索参数: {}", search_param); - - // Set the parameter variable - subprocess.execute( - &format!("SET @search_param = '{}'", search_param.replace('\'', "\\'")), - vec![], - )?; - - // Execute hybrid search - let rows = subprocess.query( - "SELECT dbms_hybrid_search.search('vector_documents', @search_param)", - vec![], - )?; - - log::info!("✅ [HYBRID-SEARCH] 混合检索返回 {} 行结果", rows.len()); - - // Parse results - let mut results = Vec::new(); - for row in rows { - if row.is_empty() { - continue; - } - - // The result is a JSON string - let result_json = row[0].as_str().unwrap_or("{}"); - log::debug!("结果 JSON: {}", result_json); - - // Parse the JSON result - if let Ok(result_obj) = serde_json::from_str::(result_json) { - if let Some(hits) = result_obj["hits"]["hits"].as_array() { - for hit in hits { - let source = &hit["_source"]; - let id = source["id"].as_str().unwrap_or_default().to_string(); - let project_id = source["project_id"].as_str().unwrap_or_default().to_string(); - let document_id = source["document_id"].as_str().unwrap_or_default().to_string(); - let chunk_index = source["chunk_index"].as_i64().unwrap_or(0) as i32; - let content = source["content"].as_str().unwrap_or_default().to_string(); - - // Get scores - let keyword_score = source["_keyword_score"].as_f64().unwrap_or(0.0); - let semantic_score = source["_semantic_score"].as_f64().unwrap_or(0.0); - let total_score = hit["_score"].as_f64().unwrap_or(0.0); - - log::debug!(" 文档ID: {}, 关键词分数: {:.4}, 语义分数: {:.4}, 总分: {:.4}", - document_id, keyword_score, semantic_score, total_score); - - // Parse metadata - let metadata_str = source["metadata"].as_str().unwrap_or("{}"); - let metadata: HashMap = serde_json::from_str(metadata_str).unwrap_or_default(); - - // We don't have the embedding in the result, use empty vector - results.push(SearchResult { - document: VectorDocument { - id, - project_id, - document_id, - chunk_index, - content, - embedding: vec![], - metadata, - }, - similarity: total_score, - }); - } - } - } - } - - log::info!("✅ [HYBRID-SEARCH] 解析得到 {} 个有效结果", results.len()); - + let qr = coll + .hybrid_search_advanced( + None, + Some(knn), + None, + limit_u, + Some(&[IncludeField::Documents, IncludeField::Metadatas]), + ) + .await + .map_err(seekdb_err)?; + let results = query_result_to_search_results(qr, limit); + log::info!("✅ [HYBRID-BY-TEXT] 返回 {} 个结果", results.len()); Ok(results) } - - /// Vector similarity search using SeekDB's native L2 distance - pub fn similarity_search( + + /// 向量相似度检索(L2 距离),按 threshold 过滤后截断条数。 + pub async fn similarity_search( &self, query_embedding: &[f64], project_id: Option<&str>, limit: usize, threshold: f64, ) -> Result> { - let subprocess = self.subprocess.lock().unwrap(); - - // Convert query embedding to SeekDB format - let embedding_str = format!("[{}]", - query_embedding.iter() - .map(|v| v.to_string()) - .collect::>() - .join(",") - ); - - // Build SQL query with SeekDB's native vector search - // Note: We don't SELECT the embedding field because SeekDB doesn't support - // fetching vector columns when using vector functions (l2_distance) with APPROXIMATE - let sql = if project_id.is_some() { - format!( - "SELECT id, project_id, document_id, chunk_index, content, metadata, - l2_distance(embedding, '{}') as distance - FROM vector_documents - WHERE project_id = ? - ORDER BY l2_distance(embedding, '{}') APPROXIMATE - LIMIT {}", - embedding_str, embedding_str, limit * 2 // Get more to filter by threshold + let coll = self + .client + .get_or_create_collection::( + VECTOR_COLLECTION_NAME, + Some(self.hnsw_config.clone()), + None, ) - } else { - format!( - "SELECT id, project_id, document_id, chunk_index, content, metadata, - l2_distance(embedding, '{}') as distance - FROM vector_documents - ORDER BY l2_distance(embedding, '{}') APPROXIMATE - LIMIT {}", - embedding_str, embedding_str, limit * 2 + .await + .map_err(seekdb_err)?; + let query_emb = embedding_f64_to_f32(query_embedding); + let where_meta = project_id + .map(|pid| Filter::Eq { field: "project_id".to_string(), value: json!(pid) }); + let limit_u = (limit * 2).min(1000) as u32; + let qr = coll + .query_embeddings( + &[query_emb], + limit_u, + where_meta.as_ref(), + None, + Some(&[IncludeField::Documents, IncludeField::Metadatas]), ) - }; - - let values = if project_id.is_some() { - vec![Value::String(project_id.unwrap().to_string())] - } else { - vec![] - }; - - let rows = subprocess.query(&sql, values)?; - - let mut results = Vec::new(); - for row in rows { - if row.len() < 7 { - continue; - } - - let id = row[0].as_str().unwrap_or_default().to_string(); - let project_id = row[1].as_str().unwrap_or_default().to_string(); - let document_id = row[2].as_str().unwrap_or_default().to_string(); - let chunk_index = row[3].as_i64().unwrap_or(0) as i32; - let content = row[4].as_str().unwrap_or_default().to_string(); - - // Parse metadata - let metadata_str = row[5].as_str().unwrap_or("{}"); - let metadata: HashMap = serde_json::from_str(metadata_str).unwrap_or_default(); - - // Get distance (L2) and convert to similarity (inverse) - let distance = row[6].as_f64().unwrap_or(f64::MAX); - - // Convert L2 distance to cosine similarity approximation - // For normalized vectors, cosine similarity ≈ 1 - (L2_distance^2 / 2) - // But since we don't know if vectors are normalized, we'll use a simple inverse - let similarity = if distance > 0.0 { - 1.0 / (1.0 + distance) - } else { - 1.0 - }; - - // Filter by threshold - if similarity >= threshold { - results.push(SearchResult { - document: VectorDocument { - id, - project_id, - document_id, - chunk_index, - content, - embedding: vec![], // Empty vector - not returned by query for performance - metadata, - }, - similarity, - }); - } - } - - // Limit results + .await + .map_err(seekdb_err)?; + let mut results = query_result_to_search_results(qr, limit_u as usize); + results.retain(|r| r.similarity >= threshold); results.truncate(limit); - Ok(results) } - - /// Get all documents for a project - pub fn get_project_documents(&self, project_id: &str) -> Result> { - let subprocess = self.subprocess.lock().unwrap(); - - // Note: SeekDB may not support selecting vector columns in all contexts - // We query without embedding field and use empty vectors - let rows = subprocess.query( - "SELECT id, project_id, document_id, chunk_index, content, metadata - FROM vector_documents - WHERE project_id = ?", - vec![Value::String(project_id.to_string())], - )?; - + + /// 获取项目下所有向量文档(按 project_id 过滤)。 + pub async fn get_project_documents(&self, project_id: &str) -> Result> { + let coll = self + .client + .get_or_create_collection::( + VECTOR_COLLECTION_NAME, + Some(self.hnsw_config.clone()), + None, + ) + .await + .map_err(seekdb_err)?; + let filter = + Filter::Eq { field: "project_id".to_string(), value: json!(project_id) }; + let get_result = coll + .get( + None, + Some(&filter), + None, + Some(100_000), + Some(0), + Some(&[IncludeField::Documents, IncludeField::Metadatas]), + ) + .await + .map_err(seekdb_err)?; + let ids = get_result.ids; + let docs = get_result.documents.unwrap_or_default(); + let metas = get_result.metadatas.unwrap_or_default(); let mut documents = Vec::new(); - for row in rows { - if row.len() < 6 { - continue; - } - - let id = row[0].as_str().unwrap_or_default().to_string(); - let project_id = row[1].as_str().unwrap_or_default().to_string(); - let document_id = row[2].as_str().unwrap_or_default().to_string(); - let chunk_index = row[3].as_i64().unwrap_or(0) as i32; - let content = row[4].as_str().unwrap_or_default().to_string(); - - let metadata_str = row[5].as_str().unwrap_or("{}"); - let metadata: HashMap = serde_json::from_str(metadata_str).unwrap_or_default(); - + for (i, id) in ids.into_iter().enumerate() { + let content = docs.get(i).cloned().unwrap_or_default(); + let meta = metas.get(i).cloned().unwrap_or(json!({})); + let project_id_str = + meta.get("project_id").and_then(|v| v.as_str()).unwrap_or("").to_string(); + let document_id = + meta.get("document_id").and_then(|v| v.as_str()).unwrap_or("").to_string(); + let chunk_index = + meta.get("chunk_index").and_then(|v| v.as_i64()).unwrap_or(0) as i32; documents.push(VectorDocument { id, - project_id, + project_id: project_id_str, document_id, chunk_index, content, - embedding: vec![], // Empty vector - not needed for this query - metadata, + embedding: vec![], + metadata: meta_to_doc_meta(&meta), }); } - - // Sort documents by document_id and chunk_index in memory documents.sort_by(|a, b| { match a.document_id.cmp(&b.document_id) { std::cmp::Ordering::Equal => a.chunk_index.cmp(&b.chunk_index), other => other, } }); - Ok(documents) } - - /// Delete all documents for a project - pub fn delete_project_documents(&mut self, project_id: &str) -> Result { - let subprocess = self.subprocess.lock().unwrap(); - - let count = subprocess.execute( - "DELETE FROM vector_documents WHERE project_id = ?", - vec![Value::String(project_id.to_string())], - )?; - - subprocess.commit()?; - Ok(count as usize) + + /// 按项目删除向量文档。 + pub async fn delete_project_documents(&self, project_id: &str) -> Result { + let coll = self + .client + .get_or_create_collection::( + VECTOR_COLLECTION_NAME, + Some(self.hnsw_config.clone()), + None, + ) + .await + .map_err(seekdb_err)?; + let filter = + Filter::Eq { field: "project_id".to_string(), value: json!(project_id) }; + coll.delete_query(DeleteQuery::new().with_where_meta(&filter)) + .await + .map_err(seekdb_err)?; + Ok(0) + } + + /// 按 document_id 删除向量文档。 + pub async fn delete_document(&self, document_id: &str) -> Result { + let coll = self + .client + .get_or_create_collection::( + VECTOR_COLLECTION_NAME, + Some(self.hnsw_config.clone()), + None, + ) + .await + .map_err(seekdb_err)?; + let filter = + Filter::Eq { field: "document_id".to_string(), value: json!(document_id) }; + coll.delete_query(DeleteQuery::new().with_where_meta(&filter)) + .await + .map_err(seekdb_err)?; + Ok(0) + } + + /// 从查询结果解析整型(兼容 Number 或 String 列)。 + fn value_as_i64(v: &Value) -> i64 { + v.as_i64() + .or_else(|| v.as_str().and_then(|s| s.parse::().ok())) + .unwrap_or(0) } - - /// Delete a specific document - pub fn delete_document(&mut self, document_id: &str) -> Result { - let subprocess = self.subprocess.lock().unwrap(); - - let count = subprocess.execute( - "DELETE FROM vector_documents WHERE document_id = ?", - vec![Value::String(document_id.to_string())], - )?; - - subprocess.commit()?; - Ok(count as usize) + + /// 从查询结果解析浮点(兼容 Number 或 String 列,如 distance)。 + fn value_as_f64(v: &Value) -> f64 { + v.as_f64() + .or_else(|| v.as_str().and_then(|s| s.parse::().ok())) + .unwrap_or(f64::MAX) } - - /// Get database statistics - pub fn get_stats(&self) -> Result> { - let subprocess = self.subprocess.lock().unwrap(); + + /// 统计:向量总条数 + 项目数(项目数来自 projects 表)。 + pub async fn get_stats(&self) -> Result> { let mut stats = HashMap::new(); - - // Total documents - if let Some(row) = subprocess.query_one("SELECT COUNT(*) FROM vector_documents", vec![])? { - if let Some(count) = row[0].as_i64() { - stats.insert("total_documents".to_string(), count); - } - } - - // Total projects - if let Some(row) = subprocess.query_one( - "SELECT COUNT(DISTINCT project_id) FROM vector_documents", - vec![], - )? { - if let Some(count) = row[0].as_i64() { - stats.insert("total_projects".to_string(), count); - } + let coll = self + .client + .get_or_create_collection::( + VECTOR_COLLECTION_NAME, + Some(self.hnsw_config.clone()), + None, + ) + .await + .map_err(seekdb_err)?; + let total_documents = coll.count().await.map_err(seekdb_err)?; + stats.insert("total_documents".to_string(), total_documents as i64); + if let Some(row) = self.query_one("SELECT COUNT(*) FROM projects", vec![]).await? { + stats.insert("total_projects".to_string(), Self::value_as_i64(&row[0])); } - Ok(stats) } - - /// Count documents in a project - pub fn count_project_documents(&self, project_id: &str) -> Result { - let subprocess = self.subprocess.lock().unwrap(); - - if let Some(row) = subprocess.query_one( - "SELECT COUNT(DISTINCT document_id) FROM vector_documents WHERE project_id = ?", - vec![Value::String(project_id.to_string())], - )? { - if let Some(count) = row[0].as_i64() { - return Ok(count as usize); - } - } - - Ok(0) + + /// 统计指定项目下的向量条数。 + pub async fn count_project_documents(&self, project_id: &str) -> Result { + let coll = self + .client + .get_or_create_collection::( + VECTOR_COLLECTION_NAME, + Some(self.hnsw_config.clone()), + None, + ) + .await + .map_err(seekdb_err)?; + let filter = + Filter::Eq { field: "project_id".to_string(), value: json!(project_id) }; + let res = coll + .get(None, Some(&filter), None, Some(100_000), Some(0), None) + .await + .map_err(seekdb_err)?; + Ok(res.ids.len()) } - + /// Save project to database - pub fn save_project(&mut self, project: &crate::models::project::Project) -> Result<()> { + pub async fn save_project(&self, project: &crate::models::project::Project) -> Result<()> { log::info!("💾 [SAVE-PROJECT] Saving project: id={}, name={}", project.id, project.name); - - let subprocess = self.subprocess.lock().unwrap(); - - subprocess.execute( + + self.execute( "INSERT INTO projects (id, name, description, status, document_count, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?) ON DUPLICATE KEY UPDATE @@ -687,97 +657,72 @@ impl SeekDbAdapter { Value::String(project.created_at.to_rfc3339()), Value::String(project.updated_at.to_rfc3339()), ], - )?; - - subprocess.commit()?; + ) + .await?; + self.commit().await?; log::info!("💾 [SAVE-PROJECT] Project saved successfully"); Ok(()) } - + /// Load all projects from database - pub fn load_all_projects(&self) -> Result> { + pub async fn load_all_projects(&self) -> Result> { use chrono::DateTime; use uuid::Uuid; - - let subprocess = self.subprocess.lock().unwrap(); - - // Note: SeekDB/ObLite doesn't support ORDER BY, so we sort in memory - let rows = subprocess.query( + + let rows = self.query( "SELECT id, name, description, status, document_count, created_at, updated_at FROM projects", vec![], - )?; - + ) + .await?; + let mut projects = Vec::new(); for (idx, row) in rows.iter().enumerate() { if row.len() < 7 { log::warn!("跳过项目 #{}: 列数不足 ({})", idx, row.len()); continue; } - - // 解析 ID let id_str = row[0].as_str().unwrap_or_default(); if id_str.is_empty() { - log::warn!("跳过项目 #{}: ID 为空", idx); continue; } - let id = match Uuid::parse_str(id_str) { Ok(id) => id, - Err(e) => { - log::warn!("跳过项目 #{}: ID 解析失败 '{}': {}", idx, id_str, e); - continue; - } + Err(_) => continue, }; - let name = row[1].as_str().unwrap_or_default().to_string(); let description = row[2].as_str().and_then(|s| { - if s.is_empty() { None } else { Some(s.to_string()) } + if s.is_empty() { + None + } else { + Some(s.to_string()) + } }); - let status_str = row[3].as_str().unwrap_or("Created"); let status = match status_str { - "Created" => crate::models::project::ProjectStatus::Created, "Processing" => crate::models::project::ProjectStatus::Processing, "Ready" => crate::models::project::ProjectStatus::Ready, "Error" => crate::models::project::ProjectStatus::Error, _ => crate::models::project::ProjectStatus::Created, }; - - let document_count = row[4].as_i64().unwrap_or(0) as u32; - - // 解析创建时间 - 添加更好的错误处理 + let document_count = Self::value_as_i64(&row[4]) as u32; let created_at_str = row[5].as_str().unwrap_or_default(); let created_at = if created_at_str.is_empty() { - log::warn!("项目 {} '{}': 创建时间为空,使用当前时间", id, name); chrono::Utc::now() } else { - match DateTime::parse_from_rfc3339(created_at_str) { - Ok(dt) => dt.with_timezone(&chrono::Utc), - Err(e) => { - log::warn!("项目 {} '{}': 创建时间解析失败 '{}': {},使用当前时间", - id, name, created_at_str, e); - chrono::Utc::now() - } - } + DateTime::parse_from_rfc3339(created_at_str) + .map(|dt| dt.with_timezone(&chrono::Utc)) + .unwrap_or_else(|_| chrono::Utc::now()) }; - - // 解析更新时间 - 添加更好的错误处理 let updated_at_str = row[6].as_str().unwrap_or_default(); let updated_at = if updated_at_str.is_empty() { - log::warn!("项目 {} '{}': 更新时间为空,使用创建时间", id, name); created_at } else { - match DateTime::parse_from_rfc3339(updated_at_str) { - Ok(dt) => dt.with_timezone(&chrono::Utc), - Err(e) => { - log::warn!("项目 {} '{}': 更新时间解析失败 '{}': {},使用创建时间", - id, name, updated_at_str, e); - created_at - } - } + DateTime::parse_from_rfc3339(updated_at_str) + .map(|dt| dt.with_timezone(&chrono::Utc)) + .unwrap_or(created_at) }; - + projects.push(crate::models::project::Project { id, name, @@ -788,53 +733,40 @@ impl SeekDbAdapter { updated_at, }); } - - log::info!("成功加载 {} 个项目", projects.len()); - - // Sort by updated_at DESC in memory projects.sort_by(|a, b| b.updated_at.cmp(&a.updated_at)); - Ok(projects) } - + /// Delete project by ID - pub fn delete_project_by_id(&mut self, project_id: &str) -> Result { - let subprocess = self.subprocess.lock().unwrap(); - - let count = subprocess.execute( - "DELETE FROM projects WHERE id = ?", - vec![Value::String(project_id.to_string())], - )?; - - subprocess.commit()?; - Ok(count as usize) + pub async fn delete_project_by_id(&self, project_id: &str) -> Result { + self.execute("DELETE FROM projects WHERE id = ?", vec![Value::String(project_id.to_string())]) + .await?; + self.commit().await?; + Ok(0) } - + /// Update project document count - pub fn update_project_document_count(&mut self, project_id: &str, count: u32) -> Result<()> { - let subprocess = self.subprocess.lock().unwrap(); - - subprocess.execute( + pub async fn update_project_document_count(&self, project_id: &str, count: u32) -> Result<()> { + self.execute( "UPDATE projects SET document_count = ?, updated_at = NOW() WHERE id = ?", vec![ Value::Number((count as i64).into()), Value::String(project_id.to_string()), ], - )?; - - subprocess.commit()?; + ) + .await?; + self.commit().await?; Ok(()) } - - // ==================== Conversation Management ==================== - + /// Save conversation to database - pub fn save_conversation(&mut self, conversation: &crate::models::conversation::Conversation) -> Result<()> { + pub async fn save_conversation( + &self, + conversation: &crate::models::conversation::Conversation, + ) -> Result<()> { log::info!("💾 [SAVE-CONV] Saving conversation: id={}", conversation.id); - - let subprocess = self.subprocess.lock().unwrap(); - - subprocess.execute( + + self.execute( "INSERT INTO conversations (id, project_id, title, created_at, updated_at, message_count) VALUES (?, ?, ?, ?, ?, ?) ON DUPLICATE KEY UPDATE @@ -849,99 +781,55 @@ impl SeekDbAdapter { Value::String(conversation.updated_at.to_rfc3339()), Value::Number((conversation.message_count as i64).into()), ], - )?; - - subprocess.commit()?; + ) + .await?; + self.commit().await?; log::info!("💾 [SAVE-CONV] Conversation saved successfully"); Ok(()) } - + /// Load conversations by project - pub fn load_conversations_by_project( + pub async fn load_conversations_by_project( &self, project_id: &str, ) -> Result> { use chrono::DateTime; use uuid::Uuid; - - let subprocess = self.subprocess.lock().unwrap(); - - // Note: SeekDB/ObLite doesn't support ORDER BY, so we sort in memory - let rows = subprocess.query( + + let rows = self.query( "SELECT id, project_id, title, created_at, updated_at, message_count FROM conversations WHERE project_id = ?", vec![Value::String(project_id.to_string())], - )?; - + ) + .await?; + let mut conversations = Vec::new(); - for (idx, row) in rows.iter().enumerate() { + for row in rows.iter() { if row.len() < 6 { - log::warn!("跳过对话 #{}: 列数不足 ({})", idx, row.len()); continue; } - - // 解析 ID let id_str = row[0].as_str().unwrap_or_default(); - if id_str.is_empty() { - log::warn!("跳过对话 #{}: ID 为空", idx); - continue; - } - let id = match Uuid::parse_str(id_str) { Ok(id) => id, - Err(e) => { - log::warn!("跳过对话 #{}: ID 解析失败 '{}': {}", idx, id_str, e); - continue; - } + Err(_) => continue, }; - - // 解析项目 ID let project_id_str = row[1].as_str().unwrap_or_default(); let project_id = match Uuid::parse_str(project_id_str) { Ok(pid) => pid, - Err(e) => { - log::warn!("跳过对话 {}: 项目ID 解析失败 '{}': {}", id, project_id_str, e); - continue; - } + Err(_) => continue, }; - let title = row[2].as_str().unwrap_or_default().to_string(); - - // 解析创建时间 let created_at_str = row[3].as_str().unwrap_or_default(); - let created_at = if created_at_str.is_empty() { - log::warn!("对话 {} '{}': 创建时间为空,使用当前时间", id, title); - chrono::Utc::now() - } else { - match DateTime::parse_from_rfc3339(created_at_str) { - Ok(dt) => dt.with_timezone(&chrono::Utc), - Err(e) => { - log::warn!("对话 {} '{}': 创建时间解析失败 '{}': {},使用当前时间", - id, title, created_at_str, e); - chrono::Utc::now() - } - } - }; - - // 解析更新时间 + let created_at = DateTime::parse_from_rfc3339(created_at_str) + .map(|dt| dt.with_timezone(&chrono::Utc)) + .unwrap_or_else(|_| chrono::Utc::now()); let updated_at_str = row[4].as_str().unwrap_or_default(); - let updated_at = if updated_at_str.is_empty() { - log::warn!("对话 {} '{}': 更新时间为空,使用创建时间", id, title); - created_at - } else { - match DateTime::parse_from_rfc3339(updated_at_str) { - Ok(dt) => dt.with_timezone(&chrono::Utc), - Err(e) => { - log::warn!("对话 {} '{}': 更新时间解析失败 '{}': {},使用创建时间", - id, title, updated_at_str, e); - created_at - } - } - }; - - let message_count = row[5].as_i64().unwrap_or(0) as u32; - + let updated_at = DateTime::parse_from_rfc3339(updated_at_str) + .map(|dt| dt.with_timezone(&chrono::Utc)) + .unwrap_or(created_at); + let message_count = Self::value_as_i64(&row[5]) as u32; + conversations.push(crate::models::conversation::Conversation { id, project_id, @@ -951,100 +839,48 @@ impl SeekDbAdapter { message_count, }); } - - // Sort by updated_at DESC in memory conversations.sort_by(|a, b| b.updated_at.cmp(&a.updated_at)); - Ok(conversations) } - + /// Load all conversations - pub fn load_all_conversations(&self) -> Result> { + pub async fn load_all_conversations(&self) -> Result> { use chrono::DateTime; use uuid::Uuid; - - let subprocess = self.subprocess.lock().unwrap(); - - // Note: SeekDB/ObLite doesn't support ORDER BY, so we sort in memory - let rows = subprocess.query( + + let rows = self.query( "SELECT id, project_id, title, created_at, updated_at, message_count FROM conversations", vec![], - )?; - + ) + .await?; + let mut conversations = Vec::new(); - for (idx, row) in rows.iter().enumerate() { + for row in rows.iter() { if row.len() < 6 { - log::warn!("跳过对话 #{}: 列数不足 ({})", idx, row.len()); continue; } - - // 解析 ID let id_str = row[0].as_str().unwrap_or_default(); - if id_str.is_empty() { - log::warn!("跳过对话 #{}: ID 为空", idx); - continue; - } - let id = match Uuid::parse_str(id_str) { Ok(id) => id, - Err(e) => { - log::warn!("跳过对话 #{}: ID 解析失败 '{}': {}", idx, id_str, e); - continue; - } + Err(_) => continue, }; - - // 解析项目 ID let project_id_str = row[1].as_str().unwrap_or_default(); - if project_id_str.is_empty() { - log::warn!("跳过对话 {} : 项目ID 为空", id); - continue; - } - let project_id = match Uuid::parse_str(project_id_str) { Ok(pid) => pid, - Err(e) => { - log::warn!("跳过对话 {}: 项目ID 解析失败 '{}': {}", id, project_id_str, e); - continue; - } + Err(_) => continue, }; - let title = row[2].as_str().unwrap_or_default().to_string(); - - // 解析创建时间 - 添加更好的错误处理 let created_at_str = row[3].as_str().unwrap_or_default(); - let created_at = if created_at_str.is_empty() { - log::warn!("对话 {} '{}': 创建时间为空,使用当前时间", id, title); - chrono::Utc::now() - } else { - match DateTime::parse_from_rfc3339(created_at_str) { - Ok(dt) => dt.with_timezone(&chrono::Utc), - Err(e) => { - log::warn!("对话 {} '{}': 创建时间解析失败 '{}': {},使用当前时间", - id, title, created_at_str, e); - chrono::Utc::now() - } - } - }; - - // 解析更新时间 - 添加更好的错误处理 + let created_at = DateTime::parse_from_rfc3339(created_at_str) + .map(|dt| dt.with_timezone(&chrono::Utc)) + .unwrap_or_else(|_| chrono::Utc::now()); let updated_at_str = row[4].as_str().unwrap_or_default(); - let updated_at = if updated_at_str.is_empty() { - log::warn!("对话 {} '{}': 更新时间为空,使用创建时间", id, title); - created_at - } else { - match DateTime::parse_from_rfc3339(updated_at_str) { - Ok(dt) => dt.with_timezone(&chrono::Utc), - Err(e) => { - log::warn!("对话 {} '{}': 更新时间解析失败 '{}': {},使用创建时间", - id, title, updated_at_str, e); - created_at - } - } - }; - - let message_count = row[5].as_i64().unwrap_or(0) as u32; - + let updated_at = DateTime::parse_from_rfc3339(updated_at_str) + .map(|dt| dt.with_timezone(&chrono::Utc)) + .unwrap_or(created_at); + let message_count = Self::value_as_i64(&row[5]) as u32; + conversations.push(crate::models::conversation::Conversation { id, project_id, @@ -1054,66 +890,53 @@ impl SeekDbAdapter { message_count, }); } - - log::info!("成功加载 {} 个对话", conversations.len()); - - // Sort by updated_at DESC in memory conversations.sort_by(|a, b| b.updated_at.cmp(&a.updated_at)); - Ok(conversations) } - + /// Delete conversation by ID - pub fn delete_conversation_by_id(&mut self, conversation_id: &str) -> Result { - let subprocess = self.subprocess.lock().unwrap(); - - let count = subprocess.execute( + pub async fn delete_conversation_by_id(&self, conversation_id: &str) -> Result { + self.execute( "DELETE FROM conversations WHERE id = ?", vec![Value::String(conversation_id.to_string())], - )?; - - subprocess.commit()?; - Ok(count as usize) + ) + .await?; + self.commit().await?; + Ok(0) } - + /// Delete message by ID - pub fn delete_message_by_id(&mut self, message_id: &str) -> Result { - let subprocess = self.subprocess.lock().unwrap(); - - let count = subprocess.execute( + pub async fn delete_message_by_id(&self, message_id: &str) -> Result { + self.execute( "DELETE FROM messages WHERE id = ?", vec![Value::String(message_id.to_string())], - )?; - - subprocess.commit()?; - Ok(count as usize) + ) + .await?; + self.commit().await?; + Ok(0) } - + /// Delete all messages in a conversation - pub fn delete_messages_by_conversation(&mut self, conversation_id: &str) -> Result { - let subprocess = self.subprocess.lock().unwrap(); - - let count = subprocess.execute( + pub async fn delete_messages_by_conversation(&self, conversation_id: &str) -> Result { + self.execute( "DELETE FROM messages WHERE conversation_id = ?", vec![Value::String(conversation_id.to_string())], - )?; - - subprocess.commit()?; - Ok(count as usize) + ) + .await?; + self.commit().await?; + Ok(0) } - + /// Save message to database - pub fn save_message(&mut self, message: &crate::models::conversation::Message) -> Result<()> { + pub async fn save_message(&self, message: &crate::models::conversation::Message) -> Result<()> { log::info!("📝 [SAVE-MSG] Saving message: id={}", message.id); - - let subprocess = self.subprocess.lock().unwrap(); - - let sources_json = message.sources.as_ref() - .map(|s| serde_json::to_string(s).ok()) - .flatten(); - - // 尝试 INSERT - let insert_result = subprocess.execute( + + let sources_json = message + .sources + .as_ref() + .and_then(|s| serde_json::to_string(s).ok()); + + let insert_result = self.execute( "INSERT INTO messages (id, conversation_id, role, content, created_at, sources) VALUES (?, ?, ?, ?, ?, ?)", vec![ @@ -1122,156 +945,106 @@ impl SeekDbAdapter { Value::String(message.role.to_string()), Value::String(message.content.clone()), Value::String(message.timestamp.to_rfc3339()), - sources_json.clone().map(Value::String).unwrap_or(Value::Null), + sources_json + .as_ref() + .map(|s| Value::String(s.clone())) + .unwrap_or(Value::Null), ], - ); - - // 如果 INSERT 失败(主键冲突),尝试 UPDATE + ) + .await; + match insert_result { - Ok(_) => { - log::info!("✅ [SAVE-MSG] INSERT 成功"); - } + Ok(()) => {} Err(e) => { let error_msg = e.to_string(); if error_msg.contains("Duplicated primary key") || error_msg.contains("1062") { - log::info!("💡 [SAVE-MSG] 主键已存在,尝试 UPDATE"); - subprocess.execute( + self.execute( "UPDATE messages SET role=?, content=?, created_at=?, sources=? WHERE id=?", vec![ Value::String(message.role.to_string()), Value::String(message.content.clone()), Value::String(message.timestamp.to_rfc3339()), - sources_json.map(Value::String).unwrap_or(Value::Null), + sources_json + .map(Value::String) + .unwrap_or(Value::Null), Value::String(message.id.to_string()), ], - )?; - log::info!("✅ [SAVE-MSG] UPDATE 成功"); + ) + .await?; } else { - log::error!("❌ [SAVE-MSG] INSERT 失败: {}", e); return Err(e); } } } - - subprocess.commit()?; + self.commit().await?; log::info!("📝 [SAVE-MSG] Message saved successfully"); Ok(()) } - + /// Get message count - pub fn get_message_count(&self) -> Result { - let subprocess = self.subprocess.lock().unwrap(); - - if let Some(row) = subprocess.query_one("SELECT COUNT(*) FROM messages", vec![])? { - if let Some(count) = row[0].as_i64() { - return Ok(count as i32); - } + pub async fn get_message_count(&self) -> Result { + if let Some(row) = self.query_one("SELECT COUNT(*) FROM messages", vec![]).await? { + return Ok(Self::value_as_i64(&row[0]) as i32); } - Ok(0) } - + /// Get conversation message count - pub fn get_conversation_message_count(&self, conversation_id: &str) -> Result { - let subprocess = self.subprocess.lock().unwrap(); - - if let Some(row) = subprocess.query_one( + pub async fn get_conversation_message_count(&self, conversation_id: &str) -> Result { + if let Some(row) = self.query_one( "SELECT COUNT(*) FROM messages WHERE conversation_id = ?", vec![Value::String(conversation_id.to_string())], - )? { - if let Some(count) = row[0].as_i64() { - return Ok(count as i32); - } + ) + .await? + { + return Ok(Self::value_as_i64(&row[0]) as i32); } - Ok(0) } - + /// Load messages by conversation - pub fn load_messages_by_conversation( + pub async fn load_messages_by_conversation( &self, conversation_id: &str, ) -> Result> { - use chrono::DateTime; use uuid::Uuid; - - let subprocess = self.subprocess.lock().unwrap(); - - // Note: SeekDB/ObLite doesn't support ORDER BY, so we sort in memory - let rows = subprocess.query( + + let rows = self.query( "SELECT id, conversation_id, role, content, created_at, sources FROM messages WHERE conversation_id = ?", vec![Value::String(conversation_id.to_string())], - )?; - + ) + .await?; + let mut messages = Vec::new(); - for (idx, row) in rows.iter().enumerate() { + for row in rows.iter() { if row.len() < 6 { - log::warn!("跳过消息 #{}: 列数不足 ({})", idx, row.len()); continue; } - - // 解析消息 ID let id_str = row[0].as_str().unwrap_or_default(); - if id_str.is_empty() { - log::warn!("跳过消息 #{}: ID 为空", idx); - continue; - } - let id = match Uuid::parse_str(id_str) { Ok(id) => id, - Err(e) => { - log::warn!("跳过消息 #{}: ID 解析失败 '{}': {}", idx, id_str, e); - continue; - } + Err(_) => continue, }; - - // 解析对话 ID let conversation_id_str = row[1].as_str().unwrap_or_default(); let conversation_id = match Uuid::parse_str(conversation_id_str) { Ok(cid) => cid, - Err(e) => { - log::warn!("跳过消息 {}: 对话ID 解析失败 '{}': {}", id, conversation_id_str, e); - continue; - } + Err(_) => continue, }; - let role_str = row[2].as_str().unwrap_or("User"); let role = match role_str { - "User" | "user" => crate::models::conversation::MessageRole::User, "Assistant" | "assistant" => crate::models::conversation::MessageRole::Assistant, "System" | "system" => crate::models::conversation::MessageRole::System, _ => crate::models::conversation::MessageRole::User, }; - let content = row[3].as_str().unwrap_or_default().to_string(); - - // 解析创建时间 let created_at_str = row[4].as_str().unwrap_or_default(); - let created_at = if created_at_str.is_empty() { - log::warn!("消息 {}: 创建时间为空,使用当前时间", id); - chrono::Utc::now() - } else { - match DateTime::parse_from_rfc3339(created_at_str) { - Ok(dt) => dt.with_timezone(&chrono::Utc), - Err(e) => { - log::warn!("消息 {}: 创建时间解析失败 '{}': {},使用当前时间", - id, created_at_str, e); - chrono::Utc::now() - } - } - }; - - let sources = row[5].as_str() - .and_then(|s| { - if s.is_empty() { - None - } else { - serde_json::from_str(s).ok() - } - }); - + let created_at = parse_datetime_from_db(created_at_str); + let sources = row[5] + .as_str() + .and_then(|s| if s.is_empty() { None } else { serde_json::from_str(s).ok() }); + messages.push(crate::models::conversation::Message { id, conversation_id, @@ -1284,26 +1057,18 @@ impl SeekDbAdapter { sources, }); } - - // Sort by created_at ASC in memory - messages.sort_by(|a, b| a.timestamp.cmp(&b.timestamp)); - + messages.sort_by(|a, b| a.timestamp.cmp(&b.timestamp).then_with(|| a.id.cmp(&b.id))); Ok(messages) } - - /// Verify database connection by running a simple query - pub fn verify_connection(&self) -> Result<()> { + + /// Verify database connection + pub async fn verify_connection(&self) -> Result<()> { log::info!("🔍 验证 SeekDB 数据库连接..."); - - let subprocess = self.subprocess.lock().unwrap(); - - // Try to execute a simple query - match subprocess.query("SELECT 1", vec![]) { + match self.query("SELECT 1", vec![]).await { Ok(rows) => { if rows.is_empty() || rows[0].is_empty() { return Err(anyhow!("数据库查询返回空结果")); } - log::info!("✅ SeekDB 数据库连接正常"); Ok(()) } @@ -1313,25 +1078,12 @@ impl SeekDbAdapter { } } } - - /// Health check - ping subprocess and verify connection - pub fn health_check(&self) -> Result<()> { + + /// Health check + pub async fn health_check(&self) -> Result<()> { log::info!("🏥 执行 SeekDB 健康检查..."); - - // Check if subprocess is alive - let subprocess = self.subprocess.lock().unwrap(); - subprocess.ping() - .map_err(|e| anyhow!("Python 子进程无响应: {}", e))?; - - drop(subprocess); - - // Verify database connection - self.verify_connection()?; - + self.verify_connection().await?; log::info!("✅ SeekDB 健康检查通过"); Ok(()) } } - -// No Drop implementation needed - Python subprocess manager handles cleanup - diff --git a/src-tauri/src/services/seekdb_package.rs b/src-tauri/src/services/seekdb_package.rs deleted file mode 100644 index 07cffa7..0000000 --- a/src-tauri/src/services/seekdb_package.rs +++ /dev/null @@ -1,166 +0,0 @@ -use anyhow::{anyhow, Result}; -use std::process::Command; -use super::python_env::PythonEnv; - -const SEEKDB_VERSION: &str = "0.0.1.dev4"; -const PYPI_INDEX: &str = "https://pypi.tuna.tsinghua.edu.cn/simple/"; - -/// SeekDB 包管理器 -pub struct SeekDbPackage<'a> { - python_env: &'a PythonEnv, -} - -impl<'a> SeekDbPackage<'a> { - /// 创建新的 SeekDB 包管理器 - pub fn new(python_env: &'a PythonEnv) -> Self { - Self { python_env } - } - - /// 检查 seekdb 包是否已安装 - pub fn is_installed(&self) -> Result { - log::info!("🔍 检查 seekdb 包是否已安装..."); - - let output = Command::new(self.python_env.get_python_executable()) - .arg("-c") - .arg("import seekdb; print(seekdb.__file__)") - .output(); - - match output { - Ok(output) => { - if output.status.success() { - let path = String::from_utf8_lossy(&output.stdout); - log::info!("✅ seekdb 已安装: {}", path.trim()); - Ok(true) - } else { - log::info!("⚠️ seekdb 未安装"); - Ok(false) - } - } - Err(e) => { - log::warn!("检查 seekdb 安装状态失败: {}", e); - Ok(false) - } - } - } - - /// 安装 seekdb 包 - pub fn install(&self) -> Result<()> { - log::info!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"); - log::info!(" 📦 安装 SeekDB 包"); - log::info!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"); - log::info!(" 版本: {}", SEEKDB_VERSION); - log::info!(" 镜像: {}", PYPI_INDEX); - log::info!(""); - log::info!("这可能需要几分钟时间,请稍候..."); - - let python_executable = self.python_env.get_python_executable(); - - // 首先升级 pip - log::info!("🔧 升级 pip..."); - let upgrade_pip = Command::new(python_executable) - .arg("-m") - .arg("pip") - .arg("install") - .arg("--upgrade") - .arg("pip") - .arg("-i") - .arg(PYPI_INDEX) - .status(); - - match upgrade_pip { - Ok(status) if status.success() => { - log::info!("✅ pip 升级完成"); - } - _ => { - log::warn!("⚠️ pip 升级失败,继续安装 seekdb..."); - } - } - - // 安装 seekdb - log::info!("📦 安装 seekdb=={}...", SEEKDB_VERSION); - - let status = Command::new(python_executable) - .arg("-m") - .arg("pip") - .arg("install") - .arg(format!("seekdb=={}", SEEKDB_VERSION)) - .arg("-i") - .arg(PYPI_INDEX) - .status() - .map_err(|e| anyhow!("执行 pip install 失败: {}", e))?; - - if !status.success() { - return Err(anyhow!( - "seekdb 安装失败(退出码: {:?})\n\n\ - 请检查:\n\ - 1. 网络连接是否正常\n\ - 2. 镜像源是否可访问: {}\n\ - 3. 系统架构是否支持 seekdb\n\n\ - 您也可以手动安装:\n\ - {:?} -m pip install seekdb=={} -i {}", - status.code(), - PYPI_INDEX, - python_executable, - SEEKDB_VERSION, - PYPI_INDEX - )); - } - - log::info!("✅ seekdb 安装完成"); - Ok(()) - } - - /// 验证 seekdb 安装 - pub fn verify(&self) -> Result<()> { - log::info!("🔍 验证 seekdb 安装..."); - - // 尝试导入 seekdb 模块(0.0.1.dev4 版本已移除 oblite 模块) - let output = Command::new(self.python_env.get_python_executable()) - .arg("-c") - .arg("import seekdb; print('seekdb location:', seekdb.__file__)") - .output() - .map_err(|e| anyhow!("验证 seekdb 失败: {}", e))?; - - if !output.status.success() { - let stderr = String::from_utf8_lossy(&output.stderr); - return Err(anyhow!( - "seekdb 验证失败\n\n\ - 无法导入 seekdb 模块\n\ - 错误信息: {}\n\n\ - 请尝试重新安装:\n\ - {:?} -m pip install --force-reinstall seekdb=={} -i {}", - stderr.trim(), - self.python_env.get_python_executable(), - SEEKDB_VERSION, - PYPI_INDEX - )); - } - - let stdout = String::from_utf8_lossy(&output.stdout); - log::info!("✅ seekdb 验证通过"); - for line in stdout.lines() { - log::info!(" {}", line); - } - - Ok(()) - } - - /// 获取 seekdb 版本信息 - pub fn get_version_info(&self) -> Result { - let output = Command::new(self.python_env.get_python_executable()) - .arg("-c") - .arg(format!( - "try:\n import seekdb\n print('{}')\nexcept:\n print('unknown')", - SEEKDB_VERSION - )) - .output() - .map_err(|e| anyhow!("获取版本信息失败: {}", e))?; - - if output.status.success() { - Ok(String::from_utf8_lossy(&output.stdout).trim().to_string()) - } else { - Ok("unknown".to_string()) - } - } -} - diff --git a/src-tauri/src/services/vector_db.rs b/src-tauri/src/services/vector_db.rs index 7751f5e..d61ea8e 100644 --- a/src-tauri/src/services/vector_db.rs +++ b/src-tauri/src/services/vector_db.rs @@ -271,8 +271,6 @@ impl VectorDbService { #[cfg(test)] mod tests { use super::*; - use crate::models::document::DocumentChunk; - use chrono::Utc; #[test] fn test_collection_name_generation() { diff --git a/src-tauri/tauri.conf.json b/src-tauri/tauri.conf.json index dd5e752..4dc7e7b 100644 --- a/src-tauri/tauri.conf.json +++ b/src-tauri/tauri.conf.json @@ -15,7 +15,14 @@ "all": false, "fs": { "all": true, - "scope": ["$APPDATA", "$APPDATA/**", "$HOME/mine-kb-data", "$HOME/mine-kb-data/**"] + "scope": [ + "$APPDATA", + "$APPDATA/**", + "$APPDATA/tmp", + "$APPDATA/tmp/**", + "$HOME/mine-kb-data", + "$HOME/mine-kb-data/**" + ] }, "path": { "all": true @@ -34,7 +41,9 @@ }, "bundle": { "active": true, - "targets": "all", + "targets": [ + "app" + ], "identifier": "com.mine-kb.app", "icon": [ "icons/32x32.png", @@ -52,7 +61,10 @@ "license": null, "providerShortName": null }, - "resources": ["config.example.json", "config.json"] + "resources": [ + "config.example.json", + "config.json" + ] }, "security": { "csp": null @@ -64,9 +76,9 @@ "title": "MineKB", "width": 1200, "height": 800, - "minWidth": 800, - "minHeight": 600 + "minWidth": 960, + "minHeight": 640 } ] } -} +} \ No newline at end of file diff --git a/src/components/ChatPanel/ChatPanel.tsx b/src/components/ChatPanel/ChatPanel.tsx index ecf1fa5..dcc9367 100644 --- a/src/components/ChatPanel/ChatPanel.tsx +++ b/src/components/ChatPanel/ChatPanel.tsx @@ -556,45 +556,35 @@ const ChatPanel: React.FC = ({ projectId, projectName }) => { }); }; - // 格式化消息时间 + // 格式化消息时间(精确到秒) + const timeOptions: Intl.DateTimeFormatOptions = { + hour: '2-digit', + minute: '2-digit', + second: '2-digit', + hour12: false, + }; + const formatMessageTime = (timestamp: string): string => { const msgDate = new Date(timestamp); const now = new Date(); const diffMs = now.getTime() - msgDate.getTime(); const diffHours = diffMs / (1000 * 60 * 60); - // 24小时内,只显示时间 if (diffHours < 24) { - return msgDate.toLocaleTimeString('zh-CN', { - hour: '2-digit', - minute: '2-digit', - }); + return msgDate.toLocaleTimeString('zh-CN', timeOptions); } - // 判断是否跨年 const msgYear = msgDate.getFullYear(); const nowYear = now.getFullYear(); + const month = String(msgDate.getMonth() + 1).padStart(2, '0'); + const day = String(msgDate.getDate()).padStart(2, '0'); + const time = msgDate.toLocaleTimeString('zh-CN', timeOptions); if (msgYear === nowYear) { - // 同年,显示月日时间 - const month = String(msgDate.getMonth() + 1).padStart(2, '0'); - const day = String(msgDate.getDate()).padStart(2, '0'); - const time = msgDate.toLocaleTimeString('zh-CN', { - hour: '2-digit', - minute: '2-digit', - }); return `${month}-${day} ${time}`; - } else { - // 跨年,显示年月日时间 - const year = msgDate.getFullYear(); - const month = String(msgDate.getMonth() + 1).padStart(2, '0'); - const day = String(msgDate.getDate()).padStart(2, '0'); - const time = msgDate.toLocaleTimeString('zh-CN', { - hour: '2-digit', - minute: '2-digit', - }); - return `${year}-${month}-${day} ${time}`; } + const year = msgDate.getFullYear(); + return `${year}-${month}-${day} ${time}`; }; // 键盘事件处理 @@ -694,7 +684,7 @@ const ChatPanel: React.FC = ({ projectId, projectName }) => { } return ( -
+
{/* 对话列表侧边栏 */} {!isConversationListCollapsed && (
= ({ projectId, projectName }) => { /> )} - {/* 聊天区域 */} -
+ {/* 聊天区域:min-h-0 让 flex-1 消息区正确占满剩余高度并滚动 */} +
{/* 头部 */}
@@ -853,8 +843,8 @@ const ChatPanel: React.FC = ({ projectId, projectName }) => {
- {/* 消息列表 */} -
+ {/* 消息列表:min-h-0 使 flex-1 生效,占满剩余高度并在内部滚动 */} +
{!selectedConversationId ? (
@@ -890,14 +880,14 @@ const ChatPanel: React.FC = ({ projectId, projectName }) => { className={`flex flex-col group ${msg.role === 'user' ? 'items-end' : 'items-start'}`} >
{msg?.role === 'assistant' ? ( -
+
= ({ projectId, projectName }) => { const { children, className, ...rest } = props; const match = /language-(\w+)/.exec(className || ''); return match ? ( - - {String(children).replace(/\n$/, '')} - +
+ + {String(children).replace(/\n$/, '')} + +
) : ( {children} diff --git a/src/components/Layout/Layout.tsx b/src/components/Layout/Layout.tsx index ff6395e..4a942cb 100644 --- a/src/components/Layout/Layout.tsx +++ b/src/components/Layout/Layout.tsx @@ -202,7 +202,7 @@ const Layout: React.FC = ({ selectedProjectId, onProjectSelect, the }; return ( -
+
{/* Left Panel - Project List */}
= ({ error, onRetry, }) => { - const progress = (step / totalSteps) * 100; + // 仅当 status 为 success 且步骤完成时才显示 100%,避免「步骤 2/2 进行中」就显示 100% 的误导 + const progress = + status === 'success' && step === totalSteps + ? 100 + : Math.min(95, (step / totalSteps) * 100); return (
@@ -151,8 +155,8 @@ const SplashScreen: React.FC = ({ {/* Status Message */}
-
-
+
+
{status === 'success' ? ( { const filePaths: string[] = []; try { - // 确保临时目录存在 - const appDir = await appDataDir(); - const tempDir = await join(appDir, 'temp'); - - try { - await createDir(tempDir, { recursive: true }); - } catch (error) { - // 目录可能已存在,忽略错误 - console.log('临时目录已存在或创建失败:', error); - } - for (const file of files) { try { - // 生成临时文件名 const timestamp = Date.now(); const randomId = Math.random().toString(36).substring(2, 15); const fileName = `${timestamp}_${randomId}_${file.name}`; - const filePath = await join(tempDir, fileName); - // 读取文件内容 const arrayBuffer = await file.arrayBuffer(); - const uint8Array = new Uint8Array(arrayBuffer); - - // 写入文件到临时目录 - await writeBinaryFile(filePath, uint8Array); + const bytes = new Uint8Array(arrayBuffer); + let binary = ''; + for (let i = 0; i < bytes.length; i++) { + binary += String.fromCharCode(bytes[i]); + } + const contentBase64 = btoa(binary); + + const filePath = await invoke('save_file_to_app_tmp', { + args: { filename: fileName, contentBase64 }, + }); filePaths.push(filePath); - console.log(`文件已保存到: ${filePath}`); } catch (error) { console.error(`保存文件 ${file.name} 失败:`, error); throw new Error(`保存文件 ${file.name} 失败: ${error}`); } } - return filePaths; } catch (error) { console.error('创建临时目录失败:', error); diff --git a/src/styles/index.css b/src/styles/index.css index e1c7fc4..0e23556 100644 --- a/src/styles/index.css +++ b/src/styles/index.css @@ -80,17 +80,23 @@ -webkit-text-size-adjust: 100%; } +html { + height: 100%; +} + body { margin: 0; display: flex; place-items: center; min-width: 320px; - min-height: 100vh; + min-height: 100%; + height: 100%; } #root { width: 100%; - height: 100vh; + height: 100%; + min-height: 100vh; margin: 0 auto; }