From 95136d598d1a3f1774272dfdc6a7cbb5ba39e41d Mon Sep 17 00:00:00 2001 From: baishen Date: Thu, 18 Dec 2025 11:40:29 +0800 Subject: [PATCH 1/2] feat(query): Inverted Index and Vector Index support hybrid cache --- src/query/config/src/config.rs | 40 ++++ src/query/config/src/inner.rs | 16 ++ src/query/storages/common/cache/src/caches.rs | 8 +- .../storages/common/cache/src/manager.rs | 199 ++++++++++++++---- .../common/index/src/hnsw_index/mod.rs | 67 +++++- .../common/index/src/inverted_index.rs | 68 +++++- .../fuse/src/io/read/meta/meta_readers.rs | 4 +- src/query/storages/system/src/caches_table.rs | 24 ++- 8 files changed, 368 insertions(+), 58 deletions(-) diff --git a/src/query/config/src/config.rs b/src/query/config/src/config.rs index 730b948b52626..b31992d393f02 100644 --- a/src/query/config/src/config.rs +++ b/src/query/config/src/config.rs @@ -3316,6 +3316,14 @@ pub struct CacheConfig { )] pub inverted_index_meta_count: u64, + /// Max bytes of cached inverted index metadata on disk. Set it to 0 to disable it. + #[clap( + long = "disk-cache-inverted-index-meta-size", + value_name = "VALUE", + default_value = "0" + )] + pub disk_cache_inverted_index_meta_size: u64, + /// Max bytes of cached inverted index filters used. Set it to 0 to disable it. #[clap( long = "cache-inverted-index-filter-size", @@ -3324,6 +3332,14 @@ pub struct CacheConfig { )] pub inverted_index_filter_size: u64, + /// Max bytes of cached inverted index filters on disk. Set it to 0 to disable it. + #[clap( + long = "disk-cache-inverted-index-data-size", + value_name = "VALUE", + default_value = "0" + )] + pub disk_cache_inverted_index_data_size: u64, + /// Max percentage of in memory inverted index filter cache relative to whole memory. By default it is 0 (disabled). #[clap( long = "cache-inverted-index-filter-memory-ratio", @@ -3340,6 +3356,14 @@ pub struct CacheConfig { )] pub vector_index_meta_count: u64, + /// Max bytes of cached vector index metadata on disk. Set it to 0 to disable it. + #[clap( + long = "disk-cache-vector-index-meta-size", + value_name = "VALUE", + default_value = "0" + )] + pub disk_cache_vector_index_meta_size: u64, + /// Max bytes of cached vector index filters used. Set it to 0 to disable it. #[clap( long = "cache-vector-index-filter-size", @@ -3348,6 +3372,14 @@ pub struct CacheConfig { )] pub vector_index_filter_size: u64, + /// Max bytes of cached vector index filters on disk. Set it to 0 to disable it. + #[clap( + long = "disk-cache-vector-index-data-size", + value_name = "VALUE", + default_value = "0" + )] + pub disk_cache_vector_index_data_size: u64, + /// Max percentage of in memory vector index filter cache relative to whole memory. By default it is 0 (disabled). #[clap( long = "cache-vector-index-filter-memory-ratio", @@ -3697,10 +3729,14 @@ mod cache_config_converters { disk_cache_table_bloom_index_data_size: value .disk_cache_table_bloom_index_data_size, inverted_index_meta_count: value.inverted_index_meta_count, + disk_cache_inverted_index_meta_size: value.disk_cache_inverted_index_meta_size, inverted_index_filter_size: value.inverted_index_filter_size, + disk_cache_inverted_index_data_size: value.disk_cache_inverted_index_data_size, inverted_index_filter_memory_ratio: value.inverted_index_filter_memory_ratio, vector_index_meta_count: value.vector_index_meta_count, + disk_cache_vector_index_meta_size: value.disk_cache_vector_index_meta_size, vector_index_filter_size: value.vector_index_filter_size, + disk_cache_vector_index_data_size: value.disk_cache_vector_index_data_size, vector_index_filter_memory_ratio: value.vector_index_filter_memory_ratio, table_prune_partitions_count: value.table_prune_partitions_count, data_cache_storage: value.data_cache_storage.try_into()?, @@ -3737,10 +3773,14 @@ mod cache_config_converters { disk_cache_table_bloom_index_data_size: value .disk_cache_table_bloom_index_data_size, inverted_index_meta_count: value.inverted_index_meta_count, + disk_cache_inverted_index_meta_size: value.disk_cache_inverted_index_meta_size, inverted_index_filter_size: value.inverted_index_filter_size, + disk_cache_inverted_index_data_size: value.disk_cache_inverted_index_data_size, inverted_index_filter_memory_ratio: value.inverted_index_filter_memory_ratio, vector_index_meta_count: value.vector_index_meta_count, + disk_cache_vector_index_meta_size: value.disk_cache_vector_index_meta_size, vector_index_filter_size: value.vector_index_filter_size, + disk_cache_vector_index_data_size: value.disk_cache_vector_index_data_size, vector_index_filter_memory_ratio: value.vector_index_filter_memory_ratio, table_prune_partitions_count: value.table_prune_partitions_count, data_cache_storage: value.data_cache_storage.into(), diff --git a/src/query/config/src/inner.rs b/src/query/config/src/inner.rs index b3cc31b0a8415..ebd0847d60945 100644 --- a/src/query/config/src/inner.rs +++ b/src/query/config/src/inner.rs @@ -610,18 +610,30 @@ pub struct CacheConfig { /// Max number of cached inverted index meta objects. Set it to 0 to disable it. pub inverted_index_meta_count: u64, + /// Max bytes of cached inverted index meta data on disk. Set it to 0 to disable it. + pub disk_cache_inverted_index_meta_size: u64, + /// Max bytes of cached inverted index filters used. Set it to 0 to disable it. pub inverted_index_filter_size: u64, + /// Max bytes of cached inverted index filters on disk. Set it to 0 to disable it. + pub disk_cache_inverted_index_data_size: u64, + /// Max percentage of in memory inverted index filters cache relative to whole memory. By default it is 0 (disabled). pub inverted_index_filter_memory_ratio: u64, /// Max number of cached vector index meta objects. Set it to 0 to disable it. pub vector_index_meta_count: u64, + /// Max bytes of cached vector index meta data on disk. Set it to 0 to disable it. + pub disk_cache_vector_index_meta_size: u64, + /// Max bytes of cached vector index filters used. Set it to 0 to disable it. pub vector_index_filter_size: u64, + /// Max bytes of cached vector index filters on disk. Set it to 0 to disable it. + pub disk_cache_vector_index_data_size: u64, + /// Max percentage of in memory vector index filters cache relative to whole memory. By default it is 0 (disabled). pub vector_index_filter_memory_ratio: u64, @@ -757,10 +769,14 @@ impl Default for CacheConfig { disk_cache_table_bloom_index_data_size: 0, disk_cache_table_bloom_index_meta_size: 0, inverted_index_meta_count: 30000, + disk_cache_inverted_index_meta_size: 0, inverted_index_filter_size: 64424509440, + disk_cache_inverted_index_data_size: 0, inverted_index_filter_memory_ratio: 0, vector_index_meta_count: 30000, + disk_cache_vector_index_meta_size: 0, vector_index_filter_size: 64424509440, + disk_cache_vector_index_data_size: 0, vector_index_filter_memory_ratio: 0, table_prune_partitions_count: 256, data_cache_storage: Default::default(), diff --git a/src/query/storages/common/cache/src/caches.rs b/src/query/storages/common/cache/src/caches.rs index 92c4d84e0c80f..4aa4f79d84978 100644 --- a/src/query/storages/common/cache/src/caches.rs +++ b/src/query/storages/common/cache/src/caches.rs @@ -51,11 +51,11 @@ pub type BloomIndexFilterCache = HybridCache; /// In memory object cache of parquet FileMetaData of bloom index data pub type BloomIndexMetaCache = HybridCache; -pub type InvertedIndexMetaCache = InMemoryLruCache; -pub type InvertedIndexFileCache = InMemoryLruCache; +pub type InvertedIndexMetaCache = HybridCache; +pub type InvertedIndexFileCache = HybridCache; -pub type VectorIndexMetaCache = InMemoryLruCache; -pub type VectorIndexFileCache = InMemoryLruCache; +pub type VectorIndexMetaCache = HybridCache; +pub type VectorIndexFileCache = HybridCache; /// In memory object cache of parquet FileMetaData of external parquet rs files pub type ParquetMetaDataCache = InMemoryLruCache; diff --git a/src/query/storages/common/cache/src/manager.rs b/src/query/storages/common/cache/src/manager.rs index 4586c3456694a..886a9021b15f4 100644 --- a/src/query/storages/common/cache/src/manager.rs +++ b/src/query/storages/common/cache/src/manager.rs @@ -298,12 +298,25 @@ impl CacheManager { )? }; - let inverted_index_meta_cache = Self::new_items_cache_slot( - MEMORY_CACHE_INVERTED_INDEX_FILE_META_DATA, - config.inverted_index_meta_count as usize, - ); + let inverted_index_meta_cache = { + let inverted_index_meta_on_disk_cache_path = + PathBuf::from(&config.disk_cache_config.path) + .join(tenant_id.clone()) + .join("inverted_index_meta_v1"); + Self::new_hybrid_cache_slot( + HYBRID_CACHE_INVERTED_INDEX_FILE_META_DATA, + config.inverted_index_meta_count as usize, + Unit::Count, + &inverted_index_meta_on_disk_cache_path, + on_disk_cache_queue_size, + config.disk_cache_inverted_index_meta_size as usize, + DiskCacheKeyReloadPolicy::Fuzzy, + on_disk_cache_sync_data, + ee_mode, + )? + }; - // setup in-memory inverted index filter cache + // setup inverted index filter cache let inverted_index_file_size = if config.inverted_index_filter_memory_ratio != 0 { (*max_server_memory_usage as usize) * config.inverted_index_filter_memory_ratio as usize @@ -311,17 +324,43 @@ impl CacheManager { } else { config.inverted_index_filter_size as usize }; - let inverted_index_file_cache = Self::new_bytes_cache_slot( - MEMORY_CACHE_INVERTED_INDEX_FILE, - inverted_index_file_size, - ); + let inverted_index_file_cache = { + let inverted_index_file_on_disk_cache_path = + PathBuf::from(&config.disk_cache_config.path) + .join(tenant_id.clone()) + .join("inverted_index_file_v1"); + Self::new_hybrid_cache_slot( + HYBRID_CACHE_INVERTED_INDEX_FILE, + inverted_index_file_size, + Unit::Bytes, + &inverted_index_file_on_disk_cache_path, + on_disk_cache_queue_size, + config.disk_cache_inverted_index_data_size as usize, + DiskCacheKeyReloadPolicy::Fuzzy, + on_disk_cache_sync_data, + ee_mode, + )? + }; - let vector_index_meta_cache = Self::new_items_cache_slot( - MEMORY_CACHE_VECTOR_INDEX_FILE_META_DATA, - config.vector_index_meta_count as usize, - ); + let vector_index_meta_cache = { + let vector_index_meta_on_disk_cache_path = + PathBuf::from(&config.disk_cache_config.path) + .join(tenant_id.clone()) + .join("vector_index_meta_v1"); + Self::new_hybrid_cache_slot( + HYBRID_CACHE_VECTOR_INDEX_FILE_META_DATA, + config.vector_index_meta_count as usize, + Unit::Count, + &vector_index_meta_on_disk_cache_path, + on_disk_cache_queue_size, + config.disk_cache_vector_index_meta_size as usize, + DiskCacheKeyReloadPolicy::Fuzzy, + on_disk_cache_sync_data, + ee_mode, + )? + }; - // setup in-memory vector index filter cache + // setup vector index filter cache let vector_index_file_size = if config.vector_index_filter_memory_ratio != 0 { (*max_server_memory_usage as usize) * config.vector_index_filter_memory_ratio as usize @@ -329,10 +368,23 @@ impl CacheManager { } else { config.vector_index_filter_size as usize }; - let vector_index_file_cache = Self::new_bytes_cache_slot( - MEMORY_CACHE_VECTOR_INDEX_FILE, - vector_index_file_size, - ); + let vector_index_file_cache = { + let vector_index_file_on_disk_cache_path = + PathBuf::from(&config.disk_cache_config.path) + .join(tenant_id.clone()) + .join("vector_index_file_v1"); + Self::new_hybrid_cache_slot( + HYBRID_CACHE_VECTOR_INDEX_FILE, + vector_index_file_size, + Unit::Bytes, + &vector_index_file_on_disk_cache_path, + on_disk_cache_queue_size, + config.disk_cache_vector_index_data_size as usize, + DiskCacheKeyReloadPolicy::Fuzzy, + on_disk_cache_sync_data, + ee_mode, + )? + }; let prune_partitions_cache = Self::new_items_cache_slot( MEMORY_CACHE_PRUNE_PARTITIONS, @@ -444,22 +496,6 @@ impl CacheManager { let cache = &self.prune_partitions_cache; Self::set_items_capacity(cache, new_capacity, name) } - MEMORY_CACHE_INVERTED_INDEX_FILE => { - let cache = &self.inverted_index_file_cache; - Self::set_bytes_capacity(cache, new_capacity, name); - } - MEMORY_CACHE_INVERTED_INDEX_FILE_META_DATA => { - let cache = &self.inverted_index_meta_cache; - Self::set_items_capacity(cache, new_capacity, name); - } - MEMORY_CACHE_VECTOR_INDEX_FILE => { - let cache = &self.vector_index_file_cache; - Self::set_bytes_capacity(cache, new_capacity, name); - } - MEMORY_CACHE_VECTOR_INDEX_FILE_META_DATA => { - let cache = &self.vector_index_meta_cache; - Self::set_items_capacity(cache, new_capacity, name); - } HYBRID_CACHE_BLOOM_INDEX_FILE_META_DATA | IN_MEMORY_CACHE_BLOOM_INDEX_FILE_META_DATA => { Self::set_hybrid_cache_items_capacity( @@ -475,6 +511,36 @@ impl CacheManager { name, ); } + HYBRID_CACHE_INVERTED_INDEX_FILE_META_DATA + | IN_MEMORY_HYBRID_CACHE_INVERTED_INDEX_FILE_META_DATA => { + Self::set_hybrid_cache_items_capacity( + &self.inverted_index_meta_cache, + new_capacity, + name, + ); + } + HYBRID_CACHE_INVERTED_INDEX_FILE | IN_MEMORY_HYBRID_CACHE_INVERTED_INDEX_FILE => { + Self::set_hybrid_cache_bytes_capacity( + &self.inverted_index_file_cache, + new_capacity, + name, + ); + } + HYBRID_CACHE_VECTOR_INDEX_FILE_META_DATA + | IN_MEMORY_HYBRID_CACHE_VECTOR_INDEX_FILE_META_DATA => { + Self::set_hybrid_cache_items_capacity( + &self.vector_index_meta_cache, + new_capacity, + name, + ); + } + HYBRID_CACHE_VECTOR_INDEX_FILE | IN_MEMORY_HYBRID_CACHE_VECTOR_INDEX_FILE => { + Self::set_hybrid_cache_bytes_capacity( + &self.vector_index_file_cache, + new_capacity, + name, + ); + } MEMORY_CACHE_COMPACT_SEGMENT_INFO => { Self::set_bytes_capacity(&self.compact_segment_info_cache, new_capacity, name); } @@ -633,19 +699,19 @@ impl CacheManager { } pub fn get_inverted_index_meta_cache(&self) -> Option { - self.inverted_index_meta_cache.get() + self.get_hybrid_cache(self.inverted_index_meta_cache.get()) } pub fn get_inverted_index_file_cache(&self) -> Option { - self.inverted_index_file_cache.get() + self.get_hybrid_cache(self.inverted_index_file_cache.get()) } pub fn get_vector_index_meta_cache(&self) -> Option { - self.vector_index_meta_cache.get() + self.get_hybrid_cache(self.vector_index_meta_cache.get()) } pub fn get_vector_index_file_cache(&self) -> Option { - self.vector_index_file_cache.get() + self.get_hybrid_cache(self.vector_index_file_cache.get()) } pub fn get_prune_partitions_cache(&self) -> Option { @@ -788,11 +854,16 @@ impl CacheManager { const MEMORY_CACHE_TABLE_DATA: &str = "memory_cache_table_data"; const MEMORY_CACHE_PARQUET_META_DATA: &str = "memory_cache_parquet_meta_data"; const MEMORY_CACHE_PRUNE_PARTITIONS: &str = "memory_cache_prune_partitions"; -const MEMORY_CACHE_INVERTED_INDEX_FILE: &str = "memory_cache_inverted_index_file"; -const MEMORY_CACHE_INVERTED_INDEX_FILE_META_DATA: &str = +const HYBRID_CACHE_INVERTED_INDEX_FILE: &str = "cache_inverted_index_file"; +const IN_MEMORY_HYBRID_CACHE_INVERTED_INDEX_FILE: &str = "memory_cache_inverted_index_file"; +const HYBRID_CACHE_INVERTED_INDEX_FILE_META_DATA: &str = "cache_inverted_index_file_meta_data"; +const IN_MEMORY_HYBRID_CACHE_INVERTED_INDEX_FILE_META_DATA: &str = "memory_cache_inverted_index_file_meta_data"; -const MEMORY_CACHE_VECTOR_INDEX_FILE: &str = "memory_cache_vector_index_file"; -const MEMORY_CACHE_VECTOR_INDEX_FILE_META_DATA: &str = "memory_cache_vector_index_file_meta_data"; +const HYBRID_CACHE_VECTOR_INDEX_FILE: &str = "cache_vector_index_file"; +const IN_MEMORY_HYBRID_CACHE_VECTOR_INDEX_FILE: &str = "memory_cache_vector_index_file"; +const HYBRID_CACHE_VECTOR_INDEX_FILE_META_DATA: &str = "cache_vector_index_file_meta_data"; +const IN_MEMORY_HYBRID_CACHE_VECTOR_INDEX_FILE_META_DATA: &str = + "memory_cache_vector_index_file_meta_data"; const HYBRID_CACHE_BLOOM_INDEX_FILE_META_DATA: &str = "cache_bloom_index_file_meta_data"; const HYBRID_CACHE_COLUMN_DATA: &str = "cache_column_data"; @@ -843,6 +914,10 @@ mod tests { }, disk_cache_table_bloom_index_data_size: 1024 * 1024, disk_cache_table_bloom_index_meta_size: 1024 * 1024, + disk_cache_inverted_index_meta_size: 1024 * 1024, + disk_cache_inverted_index_data_size: 1024 * 1024, + disk_cache_vector_index_meta_size: 1024 * 1024, + disk_cache_vector_index_data_size: 1024 * 1024, ..CacheConfig::default() } } @@ -852,6 +927,10 @@ mod tests { data_cache_storage: CacheStorageTypeInnerConfig::None, disk_cache_table_bloom_index_data_size: 0, disk_cache_table_bloom_index_meta_size: 0, + disk_cache_inverted_index_meta_size: 0, + disk_cache_inverted_index_data_size: 0, + disk_cache_vector_index_meta_size: 0, + disk_cache_vector_index_data_size: 0, ..CacheConfig::default() } } @@ -866,7 +945,23 @@ mod tests { .on_disk_cache() .is_some() && cache_manager - .get_bloom_index_meta_cache() + .get_bloom_index_filter_cache() + .on_disk_cache() + .is_some() + && cache_manager + .get_inverted_index_meta_cache() + .on_disk_cache() + .is_some() + && cache_manager + .get_inverted_index_file_cache() + .on_disk_cache() + .is_some() + && cache_manager + .get_vector_index_meta_cache() + .on_disk_cache() + .is_some() + && cache_manager + .get_vector_index_file_cache() .on_disk_cache() .is_some() } @@ -881,7 +976,23 @@ mod tests { .on_disk_cache() .is_none() && cache_manager - .get_bloom_index_meta_cache() + .get_bloom_index_filter_cache() + .on_disk_cache() + .is_none() + && cache_manager + .get_inverted_index_meta_cache() + .on_disk_cache() + .is_none() + && cache_manager + .get_inverted_index_file_cache() + .on_disk_cache() + .is_none() + && cache_manager + .get_vector_index_meta_cache() + .on_disk_cache() + .is_none() + && cache_manager + .get_vector_index_file_cache() .on_disk_cache() .is_none() } diff --git a/src/query/storages/common/index/src/hnsw_index/mod.rs b/src/query/storages/common/index/src/hnsw_index/mod.rs index 7a24761b17b81..2882d4d190846 100644 --- a/src/query/storages/common/index/src/hnsw_index/mod.rs +++ b/src/query/storages/common/index/src/hnsw_index/mod.rs @@ -34,8 +34,7 @@ use databend_storages_common_table_meta::meta::SingleColumnMeta; pub use hnsw::HNSWIndex; use parquet::format::FileMetaData; pub use quantization::DistanceType; - -#[derive(Clone)] +#[derive(Clone, serde::Serialize, serde::Deserialize)] pub struct VectorIndexMeta { pub columns: Vec<(String, SingleColumnMeta)>, pub metadata: BTreeMap, @@ -53,6 +52,70 @@ impl VectorIndexFile { } } +#[derive(serde::Serialize, serde::Deserialize)] +struct SerializableVectorIndexFile { + name: String, + data: Vec, +} + +impl TryFrom<&VectorIndexMeta> for Vec { + type Error = ErrorCode; + + fn try_from(value: &VectorIndexMeta) -> std::result::Result { + bincode::serde::encode_to_vec(value, bincode::config::standard()).map_err(|e| { + ErrorCode::StorageOther(format!("failed to encode vector index meta {:?}", e)) + }) + } +} + +impl TryFrom for VectorIndexMeta { + type Error = ErrorCode; + + fn try_from(value: Bytes) -> std::result::Result { + bincode::serde::decode_from_slice(value.as_ref(), bincode::config::standard()) + .map(|(v, len)| { + assert_eq!(len, value.len()); + v + }) + .map_err(|e| { + ErrorCode::StorageOther(format!("failed to decode vector index meta {:?}", e)) + }) + } +} + +impl TryFrom<&VectorIndexFile> for Vec { + type Error = ErrorCode; + + fn try_from(value: &VectorIndexFile) -> std::result::Result { + let serializable = SerializableVectorIndexFile { + name: value.name.clone(), + data: value.data.to_vec(), + }; + bincode::serde::encode_to_vec(&serializable, bincode::config::standard()).map_err(|e| { + ErrorCode::StorageOther(format!("failed to encode vector index file {:?}", e)) + }) + } +} + +impl TryFrom for VectorIndexFile { + type Error = ErrorCode; + + fn try_from(value: Bytes) -> std::result::Result { + bincode::serde::decode_from_slice(value.as_ref(), bincode::config::standard()) + .map(|(v, len)| { + assert_eq!(len, value.len()); + v + }) + .map(|v: SerializableVectorIndexFile| VectorIndexFile { + name: v.name, + data: v.data.into(), + }) + .map_err(|e| { + ErrorCode::StorageOther(format!("failed to decode vector index file {:?}", e)) + }) + } +} + impl TryFrom for VectorIndexMeta { type Error = ErrorCode; diff --git a/src/query/storages/common/index/src/inverted_index.rs b/src/query/storages/common/index/src/inverted_index.rs index b5d55cc9f5b70..c825af9987640 100644 --- a/src/query/storages/common/index/src/inverted_index.rs +++ b/src/query/storages/common/index/src/inverted_index.rs @@ -50,6 +50,7 @@ use std::path::PathBuf; use std::result; use std::sync::Arc; +use bytes::Bytes; use crc32fast::Hasher; use databend_common_exception::ErrorCode; use databend_common_exception::Result; @@ -107,7 +108,6 @@ use tantivy_fst::Automaton; use tantivy_fst::IntoStreamer; use tantivy_fst::Regex; use tantivy_fst::Streamer; - // tantivy version is used to generate the footer data // The magic byte of the footer to identify corruption @@ -1236,7 +1236,7 @@ impl DocIdsCollector { } } -#[derive(Clone)] +#[derive(Clone, serde::Serialize, serde::Deserialize)] pub struct InvertedIndexMeta { pub version: usize, pub columns: Vec<(String, SingleColumnMeta)>, @@ -1300,6 +1300,70 @@ impl InvertedIndexFile { } } +#[derive(serde::Serialize, serde::Deserialize)] +struct SerializableInvertedIndexFile { + name: String, + data: Vec, +} + +impl TryFrom<&InvertedIndexFile> for Vec { + type Error = ErrorCode; + + fn try_from(value: &InvertedIndexFile) -> std::result::Result { + let serializable = SerializableInvertedIndexFile { + name: value.name.clone(), + data: value.data.as_slice().to_vec(), + }; + bincode::serde::encode_to_vec(&serializable, bincode::config::standard()).map_err(|e| { + ErrorCode::StorageOther(format!("failed to encode inverted index file {:?}", e)) + }) + } +} + +impl TryFrom for InvertedIndexFile { + type Error = ErrorCode; + + fn try_from(value: Bytes) -> std::result::Result { + bincode::serde::decode_from_slice(value.as_ref(), bincode::config::standard()) + .map(|(v, len)| { + assert_eq!(len, value.len()); + v + }) + .map(|v: SerializableInvertedIndexFile| InvertedIndexFile { + name: v.name, + data: OwnedBytes::new(v.data), + }) + .map_err(|e| { + ErrorCode::StorageOther(format!("failed to decode inverted index file {:?}", e)) + }) + } +} + +impl TryFrom<&InvertedIndexMeta> for Vec { + type Error = ErrorCode; + + fn try_from(value: &InvertedIndexMeta) -> std::result::Result { + bincode::serde::encode_to_vec(value, bincode::config::standard()).map_err(|e| { + ErrorCode::StorageOther(format!("failed to encode inverted index meta {:?}", e)) + }) + } +} + +impl TryFrom for InvertedIndexMeta { + type Error = ErrorCode; + + fn try_from(value: Bytes) -> std::result::Result { + bincode::serde::decode_from_slice(value.as_ref(), bincode::config::standard()) + .map(|(v, len)| { + assert_eq!(len, value.len()); + v + }) + .map_err(|e| { + ErrorCode::StorageOther(format!("failed to decode inverted index meta {:?}", e)) + }) + } +} + /// The Writer just writes a buffer. struct VecWriter { path: PathBuf, diff --git a/src/query/storages/fuse/src/io/read/meta/meta_readers.rs b/src/query/storages/fuse/src/io/read/meta/meta_readers.rs index 07ba4e81668f5..55759c9269118 100644 --- a/src/query/storages/fuse/src/io/read/meta/meta_readers.rs +++ b/src/query/storages/fuse/src/io/read/meta/meta_readers.rs @@ -53,8 +53,8 @@ pub type BloomIndexMetaReader = HybridCacheReader>; pub type CompactSegmentInfoReader = InMemoryCacheReader>; -pub type InvertedIndexMetaReader = InMemoryCacheReader>; -pub type VectorIndexMetaReader = InMemoryCacheReader>; +pub type InvertedIndexMetaReader = HybridCacheReader>; +pub type VectorIndexMetaReader = HybridCacheReader>; pub type SegmentStatsReader = InMemoryCacheReader>; pub struct MetaReaders; diff --git a/src/query/storages/system/src/caches_table.rs b/src/query/storages/system/src/caches_table.rs index df15dd2c36f90..7209397bfe6a1 100644 --- a/src/query/storages/system/src/caches_table.rs +++ b/src/query/storages/system/src/caches_table.rs @@ -139,19 +139,35 @@ impl SyncSystemTable for CachesTable { } if let Some(inverted_index_meta_cache) = inverted_index_meta_cache { - Self::append_row(&inverted_index_meta_cache, &local_node, &mut columns); + Self::append_rows_of_hybrid_cache( + &inverted_index_meta_cache, + &local_node, + &mut columns, + ); } if let Some(inverted_index_file_cache) = inverted_index_file_cache { - Self::append_row(&inverted_index_file_cache, &local_node, &mut columns); + Self::append_rows_of_hybrid_cache( + &inverted_index_file_cache, + &local_node, + &mut columns, + ); } if let Some(vector_index_meta_cache) = vector_index_meta_cache { - Self::append_row(&vector_index_meta_cache, &local_node, &mut columns); + Self::append_rows_of_hybrid_cache( + &vector_index_meta_cache, + &local_node, + &mut columns, + ); } if let Some(vector_index_file_cache) = vector_index_file_cache { - Self::append_row(&vector_index_file_cache, &local_node, &mut columns); + Self::append_rows_of_hybrid_cache( + &vector_index_file_cache, + &local_node, + &mut columns, + ); } if let Some(prune_partitions_cache) = prune_partitions_cache { From 520ba02f46e067620f2dab280eebbe0fbbc0f4e0 Mon Sep 17 00:00:00 2001 From: baishen Date: Thu, 18 Dec 2025 16:48:11 +0800 Subject: [PATCH 2/2] fix --- .../it/storages/testdata/configs_table_basic.txt | 4 ++++ src/query/storages/system/src/caches_table.rs | 12 ++---------- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/src/query/service/tests/it/storages/testdata/configs_table_basic.txt b/src/query/service/tests/it/storages/testdata/configs_table_basic.txt index 27aef619bd62e..5d8509da52274 100644 --- a/src/query/service/tests/it/storages/testdata/configs_table_basic.txt +++ b/src/query/service/tests/it/storages/testdata/configs_table_basic.txt @@ -11,8 +11,12 @@ DB.Table: 'system'.'configs', Table: configs-table_id:1, ver:0, Engine: SystemCo | 'cache' | 'disk.max_bytes' | '21474836480' | '' | | 'cache' | 'disk.path' | './.databend/_cache' | '' | | 'cache' | 'disk.sync_data' | 'true' | '' | +| 'cache' | 'disk_cache_inverted_index_data_size' | '0' | '' | +| 'cache' | 'disk_cache_inverted_index_meta_size' | '0' | '' | | 'cache' | 'disk_cache_table_bloom_index_data_size' | '0' | '' | | 'cache' | 'disk_cache_table_bloom_index_meta_size' | '0' | '' | +| 'cache' | 'disk_cache_vector_index_data_size' | '0' | '' | +| 'cache' | 'disk_cache_vector_index_meta_size' | '0' | '' | | 'cache' | 'enable_table_bloom_index_cache' | 'true' | '' | | 'cache' | 'enable_table_meta_cache' | 'true' | '' | | 'cache' | 'iceberg_table_meta_count' | '1024' | '' | diff --git a/src/query/storages/system/src/caches_table.rs b/src/query/storages/system/src/caches_table.rs index 7209397bfe6a1..7a6ed66f0cb40 100644 --- a/src/query/storages/system/src/caches_table.rs +++ b/src/query/storages/system/src/caches_table.rs @@ -155,19 +155,11 @@ impl SyncSystemTable for CachesTable { } if let Some(vector_index_meta_cache) = vector_index_meta_cache { - Self::append_rows_of_hybrid_cache( - &vector_index_meta_cache, - &local_node, - &mut columns, - ); + Self::append_rows_of_hybrid_cache(&vector_index_meta_cache, &local_node, &mut columns); } if let Some(vector_index_file_cache) = vector_index_file_cache { - Self::append_rows_of_hybrid_cache( - &vector_index_file_cache, - &local_node, - &mut columns, - ); + Self::append_rows_of_hybrid_cache(&vector_index_file_cache, &local_node, &mut columns); } if let Some(prune_partitions_cache) = prune_partitions_cache {