Skip to content

Commit 682c0ff

Browse files
committed
Add composite aggregation
1 parent 0358ca3 commit 682c0ff

File tree

5 files changed

+186
-13
lines changed

5 files changed

+186
-13
lines changed

quickwit/Cargo.lock

Lines changed: 10 additions & 10 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

quickwit/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -346,7 +346,7 @@ quickwit-serve = { path = "quickwit-serve" }
346346
quickwit-storage = { path = "quickwit-storage" }
347347
quickwit-telemetry = { path = "quickwit-telemetry" }
348348

349-
tantivy = { git = "https://github.com/quickwit-oss/tantivy/", rev = "25d44fcec8", default-features = false, features = [
349+
tantivy = { git = "https://github.com/SekoiaLab/tantivy/", rev = "ebf6078", default-features = false, features = [
350350
"lz4-compression",
351351
"mmap",
352352
"quickwit",

quickwit/quickwit-indexing/src/actors/merge_executor.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -839,7 +839,8 @@ mod tests {
839839
let documents_left = searcher
840840
.search(
841841
&tantivy::query::AllQuery,
842-
&tantivy::collector::TopDocs::with_limit(result_docs.len() + 1),
842+
&tantivy::collector::TopDocs::with_limit(result_docs.len() + 1)
843+
.order_by_score(),
843844
)?
844845
.into_iter()
845846
.map(|(_, doc_address)| {

quickwit/quickwit-query/src/aggregations.rs

Lines changed: 89 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@ use tantivy::aggregation::Key as TantivyKey;
1818
use tantivy::aggregation::agg_result::{
1919
AggregationResult as TantivyAggregationResult, AggregationResults as TantivyAggregationResults,
2020
BucketEntries as TantivyBucketEntries, BucketEntry as TantivyBucketEntry,
21-
BucketResult as TantivyBucketResult, MetricResult as TantivyMetricResult,
21+
BucketResult as TantivyBucketResult, CompositeBucketEntry as TantivyCompositeBucketEntry,
22+
CompositeKey as TantivyCompositeKey, MetricResult as TantivyMetricResult,
2223
RangeBucketEntry as TantivyRangeBucketEntry,
2324
};
2425
use tantivy::aggregation::metric::{
@@ -169,6 +170,13 @@ pub enum BucketResult {
169170
/// The upper bound error for the doc count of each term.
170171
doc_count_error_upper_bound: Option<u64>,
171172
},
173+
/// This is the composite aggregation result
174+
Composite {
175+
/// The buckets
176+
buckets: Vec<CompositeBucketEntry>,
177+
/// The key to start after when paginating
178+
after_key: FxHashMap<String, CompositeKey>,
179+
},
172180
}
173181

174182
impl From<TantivyBucketResult> for BucketResult {
@@ -192,6 +200,10 @@ impl From<TantivyBucketResult> for BucketResult {
192200
TantivyBucketResult::Filter(_filter_bucket_result) => {
193201
unimplemented!("filter aggregation is not yet supported in quickwit")
194202
}
203+
TantivyBucketResult::Composite { buckets, after_key } => BucketResult::Composite {
204+
buckets: buckets.into_iter().map(Into::into).collect(),
205+
after_key: after_key.into_iter().map(|(k, v)| (k, v.into())).collect(),
206+
},
195207
}
196208
}
197209
}
@@ -214,6 +226,10 @@ impl From<BucketResult> for TantivyBucketResult {
214226
sum_other_doc_count,
215227
doc_count_error_upper_bound,
216228
},
229+
BucketResult::Composite { buckets, after_key } => TantivyBucketResult::Composite {
230+
buckets: buckets.into_iter().map(Into::into).collect(),
231+
after_key: after_key.into_iter().map(|(k, v)| (k, v.into())).collect(),
232+
},
217233
}
218234
}
219235
}
@@ -413,3 +429,75 @@ impl From<PercentilesMetricResult> for TantivyPercentilesMetricResult {
413429
TantivyPercentilesMetricResult { values }
414430
}
415431
}
432+
433+
#[derive(Clone, Debug, Serialize, Deserialize)]
434+
pub enum CompositeKey {
435+
/// Boolean key
436+
Bool(bool),
437+
/// String key
438+
Str(String),
439+
/// `i64` key
440+
I64(i64),
441+
/// `u64` key
442+
U64(u64),
443+
/// `f64` key
444+
F64(f64),
445+
/// Null key
446+
Null,
447+
}
448+
449+
#[derive(Clone, Debug, Serialize, Deserialize)]
450+
pub struct CompositeBucketEntry {
451+
/// The identifier of the bucket.
452+
pub key: FxHashMap<String, CompositeKey>,
453+
/// Number of documents in the bucket.
454+
pub doc_count: u64,
455+
/// Sub-aggregations in this bucket.
456+
pub sub_aggregation: AggregationResults,
457+
}
458+
459+
impl From<TantivyCompositeKey> for CompositeKey {
460+
fn from(value: TantivyCompositeKey) -> CompositeKey {
461+
match value {
462+
TantivyCompositeKey::Bool(b) => CompositeKey::Bool(b),
463+
TantivyCompositeKey::Str(s) => CompositeKey::Str(s),
464+
TantivyCompositeKey::I64(i) => CompositeKey::I64(i),
465+
TantivyCompositeKey::U64(u) => CompositeKey::U64(u),
466+
TantivyCompositeKey::F64(f) => CompositeKey::F64(f),
467+
TantivyCompositeKey::Null => CompositeKey::Null,
468+
}
469+
}
470+
}
471+
472+
impl From<CompositeKey> for TantivyCompositeKey {
473+
fn from(value: CompositeKey) -> TantivyCompositeKey {
474+
match value {
475+
CompositeKey::Bool(b) => TantivyCompositeKey::Bool(b),
476+
CompositeKey::Str(s) => TantivyCompositeKey::Str(s),
477+
CompositeKey::I64(i) => TantivyCompositeKey::I64(i),
478+
CompositeKey::U64(u) => TantivyCompositeKey::U64(u),
479+
CompositeKey::F64(f) => TantivyCompositeKey::F64(f),
480+
CompositeKey::Null => TantivyCompositeKey::Null,
481+
}
482+
}
483+
}
484+
485+
impl From<TantivyCompositeBucketEntry> for CompositeBucketEntry {
486+
fn from(value: TantivyCompositeBucketEntry) -> CompositeBucketEntry {
487+
CompositeBucketEntry {
488+
key: value.key.into_iter().map(|(k, v)| (k, v.into())).collect(),
489+
doc_count: value.doc_count,
490+
sub_aggregation: value.sub_aggregation.into(),
491+
}
492+
}
493+
}
494+
495+
impl From<CompositeBucketEntry> for TantivyCompositeBucketEntry {
496+
fn from(value: CompositeBucketEntry) -> TantivyCompositeBucketEntry {
497+
TantivyCompositeBucketEntry {
498+
key: value.key.into_iter().map(|(k, v)| (k, v.into())).collect(),
499+
doc_count: value.doc_count,
500+
sub_aggregation: value.sub_aggregation.into(),
501+
}
502+
}
503+
}

quickwit/rest-api-tests/scenarii/aggregations/0001-aggregations.yaml

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -375,6 +375,7 @@ expected:
375375
aggregations:
376376
response_stats:
377377
sum_of_squares: 55300.0
378+
---
378379
# Test term aggs number precision
379380
method: [GET]
380381
engines:
@@ -393,3 +394,86 @@ expected:
393394
buckets:
394395
- doc_count: 1
395396
key: 1769070189829214200
397+
---
398+
# Test composite aggregation
399+
method: [GET]
400+
engines:
401+
- quickwit
402+
endpoint: _elastic/aggregations/_search
403+
json:
404+
size: 0
405+
aggs:
406+
host_name_composite:
407+
composite:
408+
size: 5
409+
sources:
410+
- host:
411+
terms:
412+
field: "host"
413+
missing_bucket: true
414+
- name:
415+
terms:
416+
field: "name"
417+
- response:
418+
histogram:
419+
field: "response"
420+
interval: 50
421+
expected:
422+
aggregations:
423+
host_name_composite:
424+
buckets:
425+
- key: { "host": null, "name": "Bernhard", "response": 100.0 }
426+
doc_count: 1
427+
- key: { "host": null, "name": "Fritz", "response": 0.0 }
428+
doc_count: 2
429+
- key: { "host": "192.168.0.1", "name": "Fred", "response": 100.0 }
430+
doc_count: 1
431+
- key: { "host": "192.168.0.1", "name": "Fritz", "response": 0.0 }
432+
doc_count: 1
433+
- key: { "host": "192.168.0.10", "name": "Albert", "response": 100.0 }
434+
doc_count: 1
435+
after_key:
436+
host: "192.168.0.10"
437+
name: "Albert"
438+
response: 100.0
439+
440+
---
441+
# Test composite aggregation paging
442+
method: [GET]
443+
engines:
444+
- quickwit
445+
endpoint: _elastic/aggregations/_search
446+
json:
447+
size: 0
448+
aggs:
449+
host_name_composite:
450+
composite:
451+
size: 5
452+
sources:
453+
- host:
454+
terms:
455+
field: "host"
456+
missing_bucket: true
457+
- name:
458+
terms:
459+
field: "name"
460+
- response:
461+
histogram:
462+
field: "response"
463+
interval: 50
464+
after:
465+
host: "192.168.0.10"
466+
name: "Albert"
467+
response: 100.0
468+
expected:
469+
aggregations:
470+
host_name_composite:
471+
buckets:
472+
- key: { "host": "192.168.0.10", "name": "Holger", "response": 0.0 }
473+
doc_count: 1
474+
# Horst is missing because his response field is missing
475+
- key: { "host": "192.168.0.10", "name": "Werner", "response": 0.0 }
476+
doc_count: 1
477+
- key: { "host": "192.168.0.11", "name": "Manfred", "response": 100.0 }
478+
doc_count: 1
479+
---

0 commit comments

Comments
 (0)