Skip to content

Commit

Permalink
feat: sync CREATE INDEX (#462)
Browse files Browse the repository at this point in the history
* feat: sync CREATE INDEX

Signed-off-by: usamoi <[email protected]>

* test: fix IVF tests

Signed-off-by: usamoi <[email protected]>

* feat: add Alter trait

Signed-off-by: usamoi <[email protected]>

* chore: rename IndexOptions2 to IndexAlterableOptions

Signed-off-by: usamoi <[email protected]>

---------

Signed-off-by: usamoi <[email protected]>
  • Loading branch information
usamoi committed Apr 9, 2024
1 parent b3c9181 commit 06137e1
Show file tree
Hide file tree
Showing 42 changed files with 738 additions and 523 deletions.
21 changes: 16 additions & 5 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ thiserror.workspace = true
tikv-jemallocator = { version = "0.5.4", features = [
"disable_initial_exec_tls",
] }
toml = "0.8.10"
toml.workspace = true
validator.workspace = true

base = { path = "crates/base" }
Expand Down Expand Up @@ -87,8 +87,9 @@ rustix = { version = "0.38.31", features = ["fs", "mm", "net"] }
serde = "1"
serde_json = "1"
thiserror = "1"
toml = "0.8.10"
uuid = { version = "1.7.0", features = ["serde", "v4"] }
validator = { version = "0.17.0", features = ["derive"] }
validator = { version = "0.18.0", features = ["derive"] }

[workspace.lints]
rust.unsafe_op_in_unsafe_fn = "forbid"
Expand Down
2 changes: 2 additions & 0 deletions crates/base/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,11 @@ num-traits.workspace = true
rand.workspace = true
serde.workspace = true
thiserror.workspace = true
toml.workspace = true
uuid.workspace = true
validator.workspace = true

base_macros = { path = "../base_macros" }
c = { path = "../c" }
detect = { path = "../detect" }

Expand Down
103 changes: 65 additions & 38 deletions crates/base/src/index.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use crate::distance::*;
use crate::vector::*;
use base_macros::Alter;
use serde::{Deserialize, Serialize};
use thiserror::Error;
use uuid::Uuid;
Expand All @@ -8,7 +9,7 @@ use validator::{Validate, ValidationError};
#[must_use]
#[derive(Debug, Clone, Error, Serialize, Deserialize)]
pub enum CreateError {
#[error("Invalid index options.")]
#[error("Invalid index options: {reason}.")]
InvalidIndexOptions { reason: String },
}

Expand Down Expand Up @@ -81,47 +82,37 @@ pub enum StatError {
#[must_use]
#[derive(Debug, Clone, Error, Serialize, Deserialize)]
pub enum AlterError {
#[error("Setting key {key} is not exist.")]
BadKey { key: String },
#[error("Setting key {key} has a wrong value {value}.")]
BadValue { key: String, value: String },
#[error("Index not found.")]
NotExist,
#[error("Key {key} not found.")]
KeyNotExists { key: String },
#[error("Invalid index options: {reason}.")]
InvalidIndexOptions { reason: String },
}

#[derive(Debug, Clone, Serialize, Deserialize, Validate)]
#[serde(deny_unknown_fields)]
pub struct IndexFlexibleOptions {
#[serde(default = "IndexFlexibleOptions::default_optimizing_threads")]
#[validate(range(min = 1, max = 65535))]
pub optimizing_threads: u16,
}

impl IndexFlexibleOptions {
pub fn default_optimizing_threads() -> u16 {
1
}
#[must_use]
#[derive(Debug, Clone, Error, Serialize, Deserialize)]
pub enum StopError {
#[error("Index not found.")]
NotExist,
}

impl Default for IndexFlexibleOptions {
fn default() -> Self {
Self {
optimizing_threads: Self::default_optimizing_threads(),
}
}
#[must_use]
#[derive(Debug, Clone, Error, Serialize, Deserialize)]
pub enum StartError {
#[error("Index not found.")]
NotExist,
}

#[derive(Debug, Clone, Serialize, Deserialize, Validate)]
#[serde(deny_unknown_fields)]
#[validate(schema(function = "IndexOptions::validate_index_options"))]
pub struct IndexOptions {
#[validate]
#[validate(nested)]
pub vector: VectorOptions,
#[validate]
#[validate(nested)]
pub segment: SegmentsOptions,
#[validate]
pub optimizing: OptimizingOptions,
#[validate]
#[validate(nested)]
pub indexing: IndexingOptions,
}

Expand All @@ -140,13 +131,20 @@ impl IndexOptions {
};
if !is_trivial {
return Err(ValidationError::new(
"Quantization is not supported for svector, bvector, and vecint8.",
"Quantization is not supported for svector, bvector, and veci8.",
));
}
Ok(())
}
}

#[derive(Debug, Clone, Serialize, Deserialize, Validate, Alter)]
#[serde(deny_unknown_fields)]
pub struct IndexAlterableOptions {
#[validate(nested)]
pub optimizing: OptimizingOptions,
}

#[derive(Debug, Clone, Serialize, Deserialize, Validate)]
#[serde(deny_unknown_fields)]
#[validate(schema(function = "Self::validate_0"))]
Expand Down Expand Up @@ -222,9 +220,12 @@ impl Default for SegmentsOptions {
}
}

#[derive(Debug, Clone, Serialize, Deserialize, Validate)]
#[derive(Debug, Clone, Serialize, Deserialize, Validate, Alter)]
#[serde(deny_unknown_fields)]
pub struct OptimizingOptions {
#[serde(default = "OptimizingOptions::default_optimizing_threads")]
#[validate(range(min = 1, max = 65535))]
pub optimizing_threads: u16,
#[serde(default = "OptimizingOptions::default_sealing_secs")]
#[validate(range(min = 1, max = 60))]
pub sealing_secs: u64,
Expand All @@ -237,6 +238,9 @@ pub struct OptimizingOptions {
}

impl OptimizingOptions {
fn default_optimizing_threads() -> u16 {
1
}
fn default_sealing_secs() -> u64 {
60
}
Expand All @@ -251,6 +255,7 @@ impl OptimizingOptions {
impl Default for OptimizingOptions {
fn default() -> Self {
Self {
optimizing_threads: Self::default_optimizing_threads(),
sealing_secs: Self::default_sealing_secs(),
sealing_size: Self::default_sealing_size(),
delete_threshold: Self::default_delete_threshold(),
Expand Down Expand Up @@ -308,7 +313,7 @@ impl Validate for IndexingOptions {
#[serde(deny_unknown_fields)]
pub struct FlatIndexingOptions {
#[serde(default)]
#[validate]
#[validate(nested)]
pub quantization: QuantizationOptions,
}

Expand Down Expand Up @@ -336,7 +341,7 @@ pub struct IvfIndexingOptions {
#[validate(range(min = 1, max = 1_000_000))]
pub nsample: u32,
#[serde(default)]
#[validate]
#[validate(nested)]
pub quantization: QuantizationOptions,
}

Expand Down Expand Up @@ -375,17 +380,17 @@ pub struct HnswIndexingOptions {
pub m: u32,
#[serde(default = "HnswIndexingOptions::default_ef_construction")]
#[validate(range(min = 10, max = 2000))]
pub ef_construction: usize,
pub ef_construction: u32,
#[serde(default)]
#[validate]
#[validate(nested)]
pub quantization: QuantizationOptions,
}

impl HnswIndexingOptions {
fn default_m() -> u32 {
12
}
fn default_ef_construction() -> usize {
fn default_ef_construction() -> u32 {
300
}
}
Expand Down Expand Up @@ -492,7 +497,7 @@ impl Default for ProductQuantizationOptionsRatio {
pub struct SearchOptions {
pub prefilter_enable: bool,
#[validate(range(min = 1, max = 65535))]
pub hnsw_ef_search: usize,
pub hnsw_ef_search: u32,
#[validate(range(min = 1, max = 1_000_000))]
pub ivf_nprobe: u32,
}
Expand All @@ -507,8 +512,30 @@ pub struct IndexStat {
#[derive(Debug, Serialize, Deserialize)]
pub struct SegmentStat {
pub id: Uuid,
#[serde(rename = "type")]
pub typ: String,
pub r#type: String,
pub length: usize,
pub size: u64,
}

pub trait Alter {
fn alter(&mut self, key: &[&str], value: &str) -> Result<(), AlterError>;
}

macro_rules! impl_alter_for {
{$($t:ty)*} => {
$(impl Alter for $t {
fn alter(&mut self, key: &[&str], value: &str) -> Result<(), AlterError> {
use std::str::FromStr;
if key.is_empty() {
*self = FromStr::from_str(value).map_err(|_| AlterError::InvalidIndexOptions { reason: "failed to parse".to_string() })?;
return Ok(());
}
Err(AlterError::KeyNotExists { key: key.join(".") })
}
})*
};
}

impl_alter_for! {
String u8 u16 u32 u64 i8 i16 i32 i64 f32 f64 bool
}
11 changes: 9 additions & 2 deletions crates/base/src/worker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,12 @@ use crate::search::*;
use crate::vector::*;

pub trait WorkerOperations {
fn create(&self, handle: Handle, options: IndexOptions) -> Result<(), CreateError>;
fn create(
&self,
handle: Handle,
options: IndexOptions,
alterable_options: IndexAlterableOptions,
) -> Result<(), CreateError>;
fn drop(&self, handle: Handle) -> Result<(), DropError>;
fn flush(&self, handle: Handle) -> Result<(), FlushError>;
fn insert(
Expand All @@ -17,7 +22,9 @@ pub trait WorkerOperations {
fn view_vbase(&self, handle: Handle) -> Result<impl ViewVbaseOperations, VbaseError>;
fn view_list(&self, handle: Handle) -> Result<impl ViewListOperations, ListError>;
fn stat(&self, handle: Handle) -> Result<IndexStat, StatError>;
fn alter(&self, handle: Handle, key: String, value: String) -> Result<(), AlterError>;
fn alter(&self, handle: Handle, key: &str, value: &str) -> Result<(), AlterError>;
fn stop(&self, handle: Handle) -> Result<(), StopError>;
fn start(&self, handle: Handle) -> Result<(), StartError>;
}

pub trait ViewBasicOperations {
Expand Down
21 changes: 21 additions & 0 deletions crates/base_macros/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
[package]
name = "base_macros"
version.workspace = true
edition.workspace = true

[lib]
proc-macro = true

[dependencies]
proc-macro2 = { version = "1.0.79", features = ["proc-macro"] }
quote = "1.0.35"
syn = { version = "2.0.53", default-features = false, features = [
"clone-impls",
"full",
"parsing",
"printing",
"proc-macro",
] }

[lints]
workspace = true
Loading

0 comments on commit 06137e1

Please sign in to comment.