Skip to content

Commit 14528ed

Browse files
authored
Refine index configuration validation and typed helpers (#173)
## Summary - add typed HNSW/fulltext configuration helpers and export them from package entry points - enforce stricter parameter validation and normalization for dense/sparse/fulltext index configs while preserving forward-compatible pass-through for unknown analyzers/properties - update vector index SQL generation to honor configured `type`/`lib` and extend unit tests for new defaults and validation behavior ## Test plan - [x] `PYTHONPATH=src pytest tests/unit_tests/test_configuration.py tests/unit_tests/test_sparse_vector_index_config.py` - [ ] Run full unit/integration suite in CI Made with [Cursor](https://cursor.com) <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit * **New Features** * Added specialized fulltext configs (IK, SPACE, BENG, NGRAM, NGRAM2) and BQ/SQ variants for HNSW vector indexes. * **Improvements** * Stronger validation, normalization and safer defaults for index and sparse-vector configs (updated prune/refine/drop/refine_k defaults). * HNSW config now exposes configurable type/lib and uses those values in index generation. * **Breaking Changes** * Top-level export for the embedded client removed — import the client from the client module instead. * **Chores** * Excluded a specific numpy release in dependency constraints. <!-- end of auto-generated comment: release notes by coderabbit.ai -->
1 parent a73f63f commit 14528ed

11 files changed

Lines changed: 634 additions & 176 deletions

File tree

.github/workflows/ci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ jobs:
100100
fi
101101
set -o pipefail
102102
uv run pytest tests/integration_tests/ -v --log-cli-level=${log_level} -k "not server and not embedded and not oceanbase" | tee pytest.log
103-
tail -n 1 pytest.log | grep '=======' | grep 'passed' | grep -q 'failed' && exit 1 || exit 0
103+
tail -n 1 pytest.log | grep '=======' | grep 'passed' |grep -q 'failed' && exit 1 || exit 0
104104
105105
integration-test:
106106
runs-on: ubuntu-latest

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ dependencies = [
1818
"tqdm; python_version < \"3.14\"",
1919
"sentence-transformers; python_version >= \"3.14\"",
2020
"tenacity",
21-
"numpy>=1.26",
21+
"numpy>=1.26,!=2.4.0",
2222
]
2323

2424
[project.urls]

src/pyseekdb/__init__.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,8 @@
7171
AdminClient,
7272
BaseClient,
7373
BaseConnection,
74+
BengFulltextIndexConfig,
75+
BqHNSWConfiguration,
7476
Client,
7577
ClientAPI,
7678
Configuration,
@@ -79,14 +81,19 @@
7981
EmbeddingFunction,
8082
FulltextIndexConfig,
8183
HNSWConfiguration,
84+
IKFulltextIndexConfig,
85+
IKMode,
8286
K,
87+
Ngram2FulltextIndexConfig,
88+
NgramFulltextIndexConfig,
8389
RemoteServerClient,
8490
Schema,
85-
SeekdbEmbeddedClient,
91+
SpaceFulltextIndexConfig,
8692
SparseEmbeddingFunction,
8793
SparseEmbeddingFunctionRegistry,
8894
SparseVector,
8995
SparseVectorIndexConfig,
96+
SqHNSWConfiguration,
9097
VectorIndexConfig,
9198
Version,
9299
get_default_embedding_function,
@@ -107,6 +114,8 @@
107114
"AdminClient",
108115
"BaseClient",
109116
"BaseConnection",
117+
"BengFulltextIndexConfig",
118+
"BqHNSWConfiguration",
110119
"Client",
111120
"ClientAPI",
112121
"Collection",
@@ -116,14 +125,19 @@
116125
"EmbeddingFunction",
117126
"FulltextIndexConfig",
118127
"HNSWConfiguration",
128+
"IKFulltextIndexConfig",
129+
"IKMode",
119130
"K",
131+
"Ngram2FulltextIndexConfig",
132+
"NgramFulltextIndexConfig",
120133
"RemoteServerClient",
121134
"Schema",
122-
"SeekdbEmbeddedClient",
135+
"SpaceFulltextIndexConfig",
123136
"SparseEmbeddingFunction",
124137
"SparseEmbeddingFunctionRegistry",
125138
"SparseVector",
126139
"SparseVectorIndexConfig",
140+
"SqHNSWConfiguration",
127141
"VectorIndexConfig",
128142
"Version",
129143
"get_default_embedding_function",

src/pyseekdb/client/__init__.py

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,18 +18,25 @@
1818

1919
import logging
2020
import os
21-
from typing import Any
21+
from typing import TYPE_CHECKING, Any
2222

2323
from .admin_client import AdminAPI, _AdminClientProxy, _ClientProxy
2424
from .base_connection import BaseConnection
2525
from .client_base import BaseClient, ClientAPI
26-
from .client_seekdb_embedded import SeekdbEmbeddedClient
2726
from .client_seekdb_server import RemoteServerClient
2827
from .configuration import (
28+
BengFulltextIndexConfig,
29+
BqHNSWConfiguration,
2930
Configuration,
3031
FulltextIndexConfig,
3132
HNSWConfiguration,
33+
IKFulltextIndexConfig,
34+
IKMode,
35+
Ngram2FulltextIndexConfig,
36+
NgramFulltextIndexConfig,
37+
SpaceFulltextIndexConfig,
3238
SparseVectorIndexConfig,
39+
SqHNSWConfiguration,
3340
VectorIndexConfig,
3441
)
3542
from .database import Database
@@ -49,6 +56,9 @@
4956
from .types import K
5057
from .version import Version
5158

59+
if TYPE_CHECKING:
60+
from .client_seekdb_embedded import SeekdbEmbeddedClient
61+
5262
logger = logging.getLogger(__name__)
5363

5464

@@ -81,6 +91,8 @@ def _create_server_client(
8191
if/else by sharing this helper between Client() and AdminClient().
8292
"""
8393
if path is not None:
94+
from .client_seekdb_embedded import SeekdbEmbeddedClient
95+
8496
if is_admin:
8597
logger.debug(f"Creating embedded admin client: path={path}")
8698
else:
@@ -111,6 +123,8 @@ def _create_server_client(
111123
from .client_seekdb_embedded import _PYLIBSEEKDB_AVAILABLE
112124

113125
if _PYLIBSEEKDB_AVAILABLE:
126+
from .client_seekdb_embedded import SeekdbEmbeddedClient
127+
114128
default_path = _default_seekdb_path()
115129
if is_admin:
116130
logger.debug(f"Creating embedded admin client (default): path={default_path}")
@@ -124,11 +138,27 @@ def _create_server_client(
124138
)
125139

126140

141+
def __getattr__(name: str) -> Any:
142+
"""
143+
Lazily expose optional embedded client symbols.
144+
145+
This avoids importing pylibseekdb during `import pyseekdb`, which can crash
146+
on unsupported interpreter/platform combinations (e.g. some Python 3.14 CI environments).
147+
"""
148+
if name == "SeekdbEmbeddedClient":
149+
from .client_seekdb_embedded import SeekdbEmbeddedClient
150+
151+
return SeekdbEmbeddedClient
152+
raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
153+
154+
127155
__all__ = [
128156
"AdminAPI",
129157
"AdminClient",
130158
"BaseClient",
131159
"BaseConnection",
160+
"BengFulltextIndexConfig",
161+
"BqHNSWConfiguration",
132162
"Client",
133163
"ClientAPI",
134164
"Configuration",
@@ -137,14 +167,20 @@ def _create_server_client(
137167
"EmbeddingFunction",
138168
"FulltextIndexConfig",
139169
"HNSWConfiguration",
170+
"IKFulltextIndexConfig",
171+
"IKMode",
140172
"K",
173+
"Ngram2FulltextIndexConfig",
174+
"NgramFulltextIndexConfig",
141175
"RemoteServerClient",
142176
"Schema",
143177
"SeekdbEmbeddedClient",
178+
"SpaceFulltextIndexConfig",
144179
"SparseEmbeddingFunction",
145180
"SparseEmbeddingFunctionRegistry",
146181
"SparseVector",
147182
"SparseVectorIndexConfig",
183+
"SqHNSWConfiguration",
148184
"VectorIndexConfig",
149185
"Version",
150186
"get_default_embedding_function",

src/pyseekdb/client/client_base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,7 @@ def _get_vector_index_sql(hnsw_config: HNSWConfiguration) -> str:
175175
property_parts.append(f"{k}={v}")
176176
property_str = ", ".join(property_parts)
177177
properties_str = f", {property_str}" if property_str else ""
178-
return f"WITH (DISTANCE={hnsw_config.distance}, TYPE=hnsw, LIB=vsag{properties_str})"
178+
return f"WITH (DISTANCE={hnsw_config.distance}, TYPE={hnsw_config.type}, LIB={hnsw_config.lib}{properties_str})"
179179

180180

181181
def _get_sparse_vector_index_sql(sparse_config: SparseVectorIndexConfig) -> str:

0 commit comments

Comments
 (0)