-
Notifications
You must be signed in to change notification settings - Fork 590
feat(ts): add the support of TWA aggregator to Range and MRange #3262
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: unstable
Are you sure you want to change the base?
Changes from 7 commits
fc7b622
e83a348
279c9bd
9d8e076
9174002
d4b814b
ae2268f
021bae7
7e2e5e9
2e6dfb6
45ff6fa
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -72,16 +72,20 @@ | |
| [](const TSSample &a, const TSSample &b) { return a.v < b.v; }); | ||
| return max->v - min->v; | ||
| } | ||
| static inline double Area(nonstd::span<const TSSample> samples) { | ||
| // Intra bucket area is 0 for single element. | ||
| double result = 0; | ||
| for (size_t i = 1; i < samples.size(); i++) { | ||
| auto t_diff = static_cast<double>(samples[i].ts - samples[i - 1].ts); | ||
| // Area of bottom rectangle + Area of above triangle | ||
| result += (t_diff * samples[i - 1].v) + (t_diff * (samples[i].v - samples[i - 1].v) * 0.5); | ||
| } | ||
| return result; | ||
| } | ||
| }; | ||
|
|
||
| std::vector<TSSample> AggregateSamplesByRangeOption(std::vector<TSSample> samples, const TSRangeOption &option) { | ||
|
Check failure on line 87 in src/types/redis_timeseries.cc
|
||
| const auto &aggregator = option.aggregator; | ||
| std::vector<TSSample> res; | ||
| if (aggregator.type == TSAggregatorType::NONE || samples.empty()) { | ||
| res = std::move(samples); | ||
| return res; | ||
| } | ||
| auto spans = aggregator.SplitSamplesToBuckets(samples); | ||
|
|
||
| auto get_bucket_ts = [&](uint64_t left) -> uint64_t { | ||
| using BucketTimestampType = TSRangeOption::BucketTimestampType; | ||
|
|
@@ -97,7 +101,114 @@ | |
| } | ||
| return 0; | ||
| }; | ||
| /// Computes area of polygon from start of the current bucket to the first sample of the current span. | ||
| /// Total Area = Area of bottom rectangle + Area of above triangle. | ||
| auto front_area = [](uint64_t bucket_left, const TSSample &prev, const TSSample &curr) { | ||
| auto x = static_cast<double>(bucket_left - prev.ts); // Distance from | ||
| auto y = static_cast<double>(curr.ts - prev.ts); | ||
| auto z = curr.v - prev.v; | ||
| auto triangle_area = (z * (y - (x * x) / y)) / 2; | ||
| auto rect_area = static_cast<double>(y - x) * prev.v; | ||
|
Check warning on line 111 in src/types/redis_timeseries.cc
|
||
| return triangle_area + rect_area; | ||
| }; | ||
| /// Computes area of polygon from the last sample of the current span to the end of current bucket. | ||
| /// Total Area = Area of bottom rectangle + Area of above triangle. | ||
| auto end_area = [](uint64_t bucket_right, const TSSample &curr, const TSSample &next) { | ||
| auto x = static_cast<double>(bucket_right - curr.ts); | ||
| auto y = static_cast<double>(next.ts - curr.ts); | ||
| auto z = next.v - curr.v; | ||
| auto rect_area = x * curr.v; | ||
| auto triangle_area = (x * x * z) / (2 * y); | ||
| return triangle_area + rect_area; | ||
| }; | ||
yezhizi marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| // Computes the TWA of empty bucket from its neighbor samples. | ||
| auto empty_bucket_twa = [&front_area](const TSSample &left_nb, uint64_t bucket_left, uint64_t bucket_right, | ||
| const TSSample &right_nb) { | ||
| // Area of empty bucket = Area from left_nb to bucket_right - Area from left_nb to bucket_left | ||
| auto f_area = front_area(bucket_left, left_nb, right_nb); | ||
| auto s_area = front_area(bucket_right, left_nb, right_nb); | ||
| return (f_area - s_area) / static_cast<double>(bucket_right - bucket_left); | ||
| }; | ||
|
|
||
| // Retrieve prev_sample and next_sample from samples when TWA aggregation. | ||
| TSSample prev_sample, next_sample; | ||
|
Check warning on line 134 in src/types/redis_timeseries.cc
|
||
| bool is_twa_aggregator = aggregator.type == TSAggregatorType::TWA, prev_available = false, next_available = false; | ||
|
Check warning on line 135 in src/types/redis_timeseries.cc
|
||
| if (is_twa_aggregator) { | ||
| const bool discard_boundaries = !option.filter_by_ts.empty() || option.filter_by_value.has_value(); | ||
| next_sample = samples.back(); | ||
| samples.pop_back(); | ||
| prev_sample = samples.back(); | ||
| samples.pop_back(); | ||
| // When FILTER_BY_TS/FILTER_BY_VALUE is enabled, discard out-of-boundary samples. | ||
| prev_available = discard_boundaries ? false : !samples.empty() && (samples.front().ts != prev_sample.ts); | ||
| next_available = discard_boundaries ? false : !samples.empty() && (samples.back().ts != next_sample.ts); | ||
| } | ||
|
Comment on lines
124
to
133
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we can pass And modify the function interface to:
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hi @yezhizi the I declared the struct as Is there any other need for them to be declared as
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Using |
||
| std::vector<TSSample> res; | ||
| if (is_twa_aggregator && option.is_return_empty && samples.empty()) { | ||
| const bool early_return = prev_sample.ts == TSSample::MAX_TIMESTAMP || next_sample.ts == TSSample::MAX_TIMESTAMP || | ||
| prev_sample.ts == next_sample.ts; // When filter entire range lies left or right to data. | ||
| if (early_return) { | ||
|
Check warning on line 150 in src/types/redis_timeseries.cc
|
||
| res = std::move(samples); | ||
| return res; | ||
| } | ||
| // Both prev and next should be available. Total range should be in between the prev and next samples. | ||
| assert(prev_sample.ts <= option.start_ts && option.end_ts <= next_sample.ts); | ||
|
|
||
yezhizi marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| uint64_t n_buckets_estimate = (option.end_ts - option.start_ts) / option.aggregator.bucket_duration; | ||
| res.reserve(n_buckets_estimate + 1); | ||
| uint64_t bucket_left = aggregator.CalculateAlignedBucketLeft(option.start_ts); | ||
| uint64_t bucket_right = aggregator.CalculateAlignedBucketRight(bucket_left); | ||
| for (size_t i = 0; i < n_buckets_estimate; i++) { | ||
| bucket_left = std::max(bucket_left, option.start_ts); | ||
| bucket_right = std::min(bucket_right, option.end_ts); | ||
| TSSample sample; | ||
| sample.ts = bucket_left; | ||
| sample.v = empty_bucket_twa(prev_sample, bucket_left, bucket_right, next_sample); | ||
| res.push_back(sample); | ||
| bucket_left = bucket_right; | ||
| bucket_right = aggregator.CalculateAlignedBucketRight(bucket_left); | ||
| } | ||
| // Process last bucket. | ||
| TSSample sample; | ||
| sample.ts = bucket_left; | ||
| if (bucket_left == option.end_ts) { // Calculate last sample. | ||
| double y_diff = next_sample.v - prev_sample.v; | ||
| auto x_diff = static_cast<double>(next_sample.ts - prev_sample.ts); | ||
| auto x_prime_diff = static_cast<double>(option.end_ts - prev_sample.ts); | ||
| double y_prime_diff = (x_prime_diff * y_diff) / x_diff; | ||
| sample.v = y_prime_diff + prev_sample.v; | ||
yezhizi marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| } else { | ||
| sample.v = empty_bucket_twa(prev_sample, bucket_left, bucket_right, next_sample); | ||
| } | ||
| res.push_back(sample); | ||
| return res; | ||
| } else if (aggregator.type == TSAggregatorType::NONE || samples.empty()) { | ||
| res = std::move(samples); | ||
| return res; | ||
| } | ||
|
|
||
| auto spans = aggregator.SplitSamplesToBuckets(samples); | ||
| res.reserve(spans.size()); | ||
|
|
||
| auto non_empty_left_bucket_idx = [&spans](size_t curr) { | ||
| while (--curr && spans[curr].empty()); | ||
| return curr; | ||
| }; | ||
| auto non_empty_right_bucket_idx = [&spans](size_t curr) { | ||
| while (++curr < spans.size() && spans[curr].empty()); | ||
| return curr; | ||
| }; | ||
|
|
||
| std::vector<std::pair<TSSample, TSSample>> neighbors; | ||
| neighbors.reserve(spans.size()); | ||
| for (size_t i = 0; i < spans.size(); i++) { | ||
| TSSample prev = (i != 0) ? spans[non_empty_left_bucket_idx(i)].back() : prev_sample; | ||
| TSSample next = (i != (spans.size() - 1)) ? spans[non_empty_right_bucket_idx(i)].front() : next_sample; | ||
| neighbors.emplace_back(prev, next); | ||
| assert(spans[i].empty() || | ||
| (neighbors[i].first.ts <= spans[i].front().ts && spans[i].back().ts <= neighbors[i].second.ts)); | ||
| } // Should follow: neighbors[i].first <= span[i].front() <= span[i].back() <= neighbors[i].second; | ||
yezhizi marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| uint64_t bucket_left = aggregator.CalculateAlignedBucketLeft(samples.front().ts); | ||
| for (size_t i = 0; i < spans.size(); i++) { | ||
| if (option.count_limit && res.size() >= option.count_limit) { | ||
|
|
@@ -114,6 +225,14 @@ | |
| case TSAggregatorType::COUNT: | ||
| sample.v = 0; | ||
| break; | ||
| case TSAggregatorType::TWA: | ||
| if ((i == 0 && !prev_available) || (i == spans.size() - 1 && !next_available)) { | ||
|
Check failure on line 229 in src/types/redis_timeseries.cc
|
||
| sample.v = TSSample::NAN_VALUE; | ||
| } else { | ||
| auto bucket_right = aggregator.CalculateAlignedBucketRight(bucket_left); | ||
| sample.v = empty_bucket_twa(neighbors[i].first, bucket_left, bucket_right, neighbors[i].second); | ||
| } | ||
| break; | ||
| case TSAggregatorType::LAST: | ||
| if (i == 0 || spans[i - 1].empty()) { | ||
| sample.v = TSSample::NAN_VALUE; | ||
|
|
@@ -126,6 +245,24 @@ | |
| } | ||
| } else if (!spans[i].empty()) { | ||
| sample.v = aggregator.AggregateSamplesValue(spans[i]); | ||
|
|
||
| if (is_twa_aggregator) { | ||
| auto bucket_right = aggregator.CalculateAlignedBucketRight(bucket_left); | ||
| // Cut left and right empty regions. In case of first and last bucket. | ||
| bucket_left = std::max(bucket_left, option.start_ts); | ||
| bucket_right = std::min(bucket_right, option.end_ts); | ||
| // Front area available iff prev_sample < bucket_left < span[i].front(). Similarly for end_area. | ||
| bool front_available = (spans[i].front().ts != bucket_left) && (neighbors[i].first.ts <= bucket_left); | ||
| bool back_available = (spans[i].back().ts != bucket_right) && (bucket_right <= neighbors[i].second.ts); | ||
| double area = 0; | ||
| area += front_available ? front_area(bucket_left, neighbors[i].first, spans[i].front()) : 0.0; | ||
| area += back_available ? end_area(bucket_right, spans[i].back(), neighbors[i].second) : 0.0; | ||
| // Edge case: If single bucket and it contains only one element. | ||
| area += !front_available && !back_available && spans[i].size() == 1 ? spans[i][0].v : 0; | ||
| uint64_t l = front_available ? bucket_left : spans[i].front().ts; | ||
| uint64_t r = back_available ? bucket_right : spans[i].back().ts; | ||
| sample.v = (sample.v + area) / std::max(static_cast<double>(r - l), 1.0); | ||
| } | ||
| } else { | ||
| continue; | ||
| } | ||
|
|
@@ -810,6 +947,9 @@ | |
| case TSAggregatorType::VAR_S: | ||
| res = Reducer::VarS(samples); | ||
| break; | ||
| case TSAggregatorType::TWA: | ||
| res = Reducer::Area(samples); | ||
| break; | ||
| default: | ||
| unreachable(); | ||
| } | ||
|
|
@@ -1055,18 +1195,24 @@ | |
| bool has_aggregator = aggregator.type != TSAggregatorType::NONE; | ||
| if (iter->Valid()) { | ||
| if (option.count_limit != 0 && !has_aggregator) { | ||
| temp_results.reserve(option.count_limit); | ||
| temp_results.reserve(option.count_limit + 2); | ||
| } else { | ||
| chunk = CreateTSChunkFromData(iter->value()); | ||
| auto range = chunk->GetLastTimestamp() - chunk->GetFirstTimestamp() + 1; | ||
| auto estimate_chunks = std::min((end_timestamp - start_timestamp) / range, uint64_t(32)); | ||
| temp_results.reserve(estimate_chunks * metadata.chunk_size); | ||
| temp_results.reserve(estimate_chunks * metadata.chunk_size + 2); | ||
| } | ||
| } | ||
| // Get samples from chunks | ||
| uint64_t bucket_count = 0; | ||
| uint64_t last_bucket = 0; | ||
| bool is_not_enough = true; | ||
| // Add these two samples at end when aggregator is TWA. | ||
| TSSample prev_sample, next_sample; | ||
|
Check warning on line 1211 in src/types/redis_timeseries.cc
|
||
| prev_sample.ts = TSSample::MAX_TIMESTAMP; | ||
| next_sample.ts = TSSample::MAX_TIMESTAMP; | ||
| const bool is_twa_aggregator = option.aggregator.type == TSAggregatorType::TWA; | ||
|
|
||
| for (; iter->Valid() && is_not_enough; iter->Next()) { | ||
| chunk = CreateTSChunkFromData(iter->value()); | ||
| auto it = chunk->CreateIterator(); | ||
|
|
@@ -1081,7 +1227,12 @@ | |
| const bool not_time_filtered = option.filter_by_ts.empty() || option.filter_by_ts.count(sample->ts); | ||
| const bool value_in_range = !option.filter_by_value || (sample->v >= option.filter_by_value->first && | ||
| sample->v <= option.filter_by_value->second); | ||
|
|
||
| // Record prev and next samples around the filtered range when aggregator is TWA | ||
| if (is_twa_aggregator) { | ||
| prev_sample = (sample->ts <= start_timestamp) ? *sample : prev_sample; | ||
| next_sample = | ||
| (sample->ts >= end_timestamp && next_sample.ts == TSSample::MAX_TIMESTAMP) ? *sample : next_sample; | ||
| } | ||
| if (!in_time_range || !not_time_filtered || !value_in_range) { | ||
| continue; | ||
| } | ||
|
|
@@ -1103,6 +1254,18 @@ | |
| } | ||
| } | ||
|
|
||
| if (is_twa_aggregator) { | ||
| // If the first element of the series is in first bucket, prev_sample might not get initialized. Similarly if the | ||
| // last element in the series is in last bucket, next_sample might not get initialized. If the series is empty, | ||
| // prev_sample and next_sample points to infinity (MAX_TIMESTAMP) | ||
| prev_sample = | ||
| prev_sample.ts == TSSample::MAX_TIMESTAMP && !temp_results.empty() ? temp_results.front() : prev_sample; | ||
| next_sample = | ||
| next_sample.ts == TSSample::MAX_TIMESTAMP && !temp_results.empty() ? temp_results.back() : next_sample; | ||
| temp_results.push_back(prev_sample); | ||
| temp_results.push_back(next_sample); | ||
| } | ||
|
|
||
| // Process compaction logic | ||
| *res = AggregateSamplesByRangeOption(std::move(temp_results), option); | ||
|
|
||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.