From 232957722dedc56c36dcf6db849214d01eae5e74 Mon Sep 17 00:00:00 2001
From: filipecosta90 <filipecosta.90@gmail.com>
Date: Wed, 12 Oct 2022 16:49:02 +0100
Subject: [PATCH] T-Digest weights are now long long

---
 CMakeLists.txt       |   4 +-
 src/tdigest.c        | 139 +++++++++++++++------------
 src/tdigest.h        |  14 +--
 tests/unit/td_test.c | 220 +++++++++++++++++--------------------------
 4 files changed, 173 insertions(+), 204 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2fbbc3a..7b69609 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -33,8 +33,8 @@ if(ENABLE_SANITIZERS)
   message(STATUS "Forcing build type to Debug to run coverage.")
       set(CMAKE_BUILD_TYPE "Debug" CACHE
               STRING "Choose the type of build." FORCE)
-      set (CMAKE_C_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fno-omit-frame-pointer -fsanitize=address")
-      set (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fno-omit-frame-pointer -fsanitize=address")
+      set (CMAKE_C_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Wshadow -Wpointer-arith -Wcast-qual -Wunused -Wstrict-prototypes -Wmissing-prototypes -Wwrite-strings -Werror -fno-omit-frame-pointer -fsanitize=address")
+      set (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Wshadow -Wpointer-arith -Wcast-qual -Wunused -Wstrict-prototypes -Wmissing-prototypes -Wwrite-strings -Werror  -fno-omit-frame-pointer -fsanitize=address")
       set (CMAKE_LINKER_FLAGS_DEBUG "${CMAKE_LINKER_FLAGS_DEBUG} -fno-omit-frame-pointer -fsanitize=address")
 ENDIF()
 
diff --git a/src/tdigest.c b/src/tdigest.c
index 4ab18c0..7b49d31 100644
--- a/src/tdigest.c
+++ b/src/tdigest.c
@@ -20,6 +20,14 @@ static inline double weighted_average_sorted(double x1, double w1, double x2, do
     return __td_max(x1, __td_min(x, x2));
 }
 
+static inline bool _tdigest_long_long_add_safe(long long a, long long b) {
+    if (b < 0) {
+        return (a >= __LONG_LONG_MAX__ - b);
+    } else {
+        return (a <= __LONG_LONG_MAX__ - b);
+    }
+}
+
 static inline double weighted_average(double x1, double w1, double x2, double w2) {
     if (x1 <= x2) {
         return weighted_average_sorted(x1, w1, x2, w2);
@@ -34,11 +42,17 @@ static void inline swap(double *arr, int i, int j) {
     arr[j] = temp;
 }
 
-static unsigned int partition(double *means, double *weights, unsigned int start, unsigned int end,
-                              unsigned int pivot_idx) {
+static void inline swap_l(long long *arr, int i, int j) {
+    const long long temp = arr[i];
+    arr[i] = arr[j];
+    arr[j] = temp;
+}
+
+static unsigned int partition(double *means, long long *weights, unsigned int start,
+                              unsigned int end, unsigned int pivot_idx) {
     const double pivotMean = means[pivot_idx];
     swap(means, pivot_idx, end);
-    swap(weights, pivot_idx, end);
+    swap_l(weights, pivot_idx, end);
 
     int i = start - 1;
 
@@ -48,11 +62,11 @@ static unsigned int partition(double *means, double *weights, unsigned int start
             // increment index of smaller element
             i++;
             swap(means, i, j);
-            swap(weights, i, j);
+            swap_l(weights, i, j);
         }
     }
     swap(means, i + 1, end);
-    swap(weights, i + 1, end);
+    swap_l(weights, i + 1, end);
     return i + 1;
 }
 
@@ -64,13 +78,13 @@ static unsigned int partition(double *means, double *weights, unsigned int start
  * @param start  The beginning of the values to sort
  * @param end    The value after the last value to sort
  */
-void td_qsort(double *means, double *weights, unsigned int start, unsigned int end) {
+static void td_qsort(double *means, long long *weights, unsigned int start, unsigned int end) {
     if (start < end) {
         // two elements can be directly compared
         if ((end - start) == 1) {
             if (means[start] > means[end]) {
                 swap(means, start, end);
-                swap(weights, start, end);
+                swap_l(weights, start, end);
             }
             return;
         }
@@ -160,7 +174,7 @@ int td_init(double compression, td_histogram_t **result) {
         td_free(histogram);
         return 1;
     }
-    histogram->nodes_weight = (double *)__td_calloc(capacity, sizeof(double));
+    histogram->nodes_weight = (long long *)__td_calloc(capacity, sizeof(long long));
     if (!histogram->nodes_weight) {
         td_free(histogram);
         return 1;
@@ -187,19 +201,22 @@ void td_free(td_histogram_t *histogram) {
 }
 
 int td_merge(td_histogram_t *into, td_histogram_t *from) {
-    td_compress(into);
-    td_compress(from);
-    for (int i = 0; i < from->merged_nodes; i++) {
+    if (td_compress(into) != 0)
+        return EDOM;
+    if (td_compress(from) != 0)
+        return EDOM;
+    const int pos = from->merged_nodes + from->unmerged_nodes;
+    for (int i = 0; i < pos; i++) {
         const double mean = from->nodes_mean[i];
-        const double count = from->nodes_weight[i];
-        if (td_add(into, mean, count) != 0) {
+        const long long weight = from->nodes_weight[i];
+        if (td_add(into, mean, weight) != 0) {
             return EDOM;
         }
     }
     return 0;
 }
 
-double td_size(td_histogram_t *h) { return h->merged_weight + h->unmerged_weight; }
+long long td_size(td_histogram_t *h) { return h->merged_weight + h->unmerged_weight; }
 
 double td_cdf(td_histogram_t *h, double val) {
     td_compress(h);
@@ -229,7 +246,8 @@ double td_cdf(td_histogram_t *h, double val) {
     const int n = h->merged_nodes;
     // check for the left tail
     const double left_centroid_mean = h->nodes_mean[0];
-    const double left_centroid_weight = h->nodes_weight[0];
+    const double left_centroid_weight = (double)h->nodes_weight[0];
+    const double merged_weight_d = (double)h->merged_weight;
     if (val < left_centroid_mean) {
         // note that this is different than h->nodes_mean[0] > min
         // ... this guarantees we divide by non-zero number and interpolation works
@@ -237,10 +255,10 @@ double td_cdf(td_histogram_t *h, double val) {
         if (width > 0) {
             // must be a sample exactly at min
             if (val == h->min) {
-                return 0.5 / h->merged_weight;
+                return 0.5 / merged_weight_d;
             } else {
                 return (1 + (val - h->min) / width * (left_centroid_weight / 2 - 1)) /
-                       h->merged_weight;
+                       merged_weight_d;
             }
         } else {
             // this should be redundant with the check val < h->min
@@ -249,16 +267,16 @@ double td_cdf(td_histogram_t *h, double val) {
     }
     // and the right tail
     const double right_centroid_mean = h->nodes_mean[n - 1];
-    const double right_centroid_weight = h->nodes_weight[n - 1];
+    const double right_centroid_weight = (double)h->nodes_weight[n - 1];
     if (val > right_centroid_mean) {
         const double width = h->max - right_centroid_mean;
         if (width > 0) {
             if (val == h->max) {
-                return 1 - 0.5 / h->merged_weight;
+                return 1 - 0.5 / merged_weight_d;
             } else {
                 // there has to be a single sample exactly at max
                 const double dq = (1 + (h->max - val) / width * (right_centroid_weight / 2 - 1)) /
-                                  h->merged_weight;
+                                  merged_weight_d;
                 return 1 - dq;
             }
         } else {
@@ -276,13 +294,13 @@ double td_cdf(td_histogram_t *h, double val) {
             // dw will accumulate the weight of all of the centroids at x
             double dw = 0;
             while (it < n && h->nodes_mean[it] == val) {
-                dw += h->nodes_weight[it];
+                dw += (double)h->nodes_weight[it];
                 it++;
             }
-            return (weightSoFar + dw / 2) / h->merged_weight;
+            return (weightSoFar + dw / 2) / (double)h->merged_weight;
         } else if (h->nodes_mean[it] <= val && val < h->nodes_mean[it + 1]) {
-            const double node_weight = h->nodes_weight[it];
-            const double node_weight_next = h->nodes_weight[it + 1];
+            const double node_weight = (double)h->nodes_weight[it];
+            const double node_weight_next = (double)h->nodes_weight[it + 1];
             const double node_mean = h->nodes_mean[it];
             const double node_mean_next = h->nodes_mean[it + 1];
             // landed between centroids ... check for floating point madness
@@ -297,7 +315,7 @@ double td_cdf(td_histogram_t *h, double val) {
                     if (node_weight_next == 1) {
                         // two singletons means no interpolation
                         // left singleton is in, right is out
-                        return (weightSoFar + 1) / h->merged_weight;
+                        return (weightSoFar + 1) / merged_weight_d;
                     } else {
                         leftExcludedW = 0.5;
                     }
@@ -311,19 +329,19 @@ double td_cdf(td_histogram_t *h, double val) {
 
                 double base = weightSoFar + node_weight / 2 + leftExcludedW;
                 return (base + dwNoSingleton * (val - node_mean) / (node_mean_next - node_mean)) /
-                       h->merged_weight;
+                       merged_weight_d;
             } else {
                 // this is simply caution against floating point madness
                 // it is conceivable that the centroids will be different
                 // but too near to allow safe interpolation
                 double dw = (node_weight + node_weight_next) / 2;
-                return (weightSoFar + dw) / h->merged_weight;
+                return (weightSoFar + dw) / merged_weight_d;
             }
         } else {
-            weightSoFar += h->nodes_weight[it];
+            weightSoFar += (double)h->nodes_weight[it];
         }
     }
-    return 1 - 0.5 / h->merged_weight;
+    return 1 - 0.5 / merged_weight_d;
 }
 
 static double td_internal_iterate_centroids_to_index(const td_histogram_t *h, const double index,
@@ -342,17 +360,18 @@ static double td_internal_iterate_centroids_to_index(const td_histogram_t *h, co
 
     // if the right-most centroid has more than one sample, we still know
     // that one sample occurred at max so we can do some interpolation
-    const double right_centroid_weight = h->nodes_weight[total_centroids - 1];
+    const double right_centroid_weight = (double)h->nodes_weight[total_centroids - 1];
     const double right_centroid_mean = h->nodes_mean[total_centroids - 1];
-    if (right_centroid_weight > 1 && h->merged_weight - index <= right_centroid_weight / 2) {
-        return h->max - (h->merged_weight - index - 1) / (right_centroid_weight / 2 - 1) *
+    if (right_centroid_weight > 1 &&
+        (double)h->merged_weight - index <= right_centroid_weight / 2) {
+        return h->max - ((double)h->merged_weight - index - 1) / (right_centroid_weight / 2 - 1) *
                             (h->max - right_centroid_mean);
     }
 
     for (; *node_pos < total_centroids - 1; (*node_pos)++) {
         const int i = *node_pos;
-        const double node_weight = h->nodes_weight[i];
-        const double node_weight_next = h->nodes_weight[i + 1];
+        const double node_weight = (double)h->nodes_weight[i];
+        const double node_weight_next = (double)h->nodes_weight[i + 1];
         const double node_mean = h->nodes_mean[i];
         const double node_mean_next = h->nodes_mean[i + 1];
         const double dw = (node_weight + node_weight_next) / 2;
@@ -402,7 +421,7 @@ double td_quantile(td_histogram_t *h, double q) {
     }
 
     // if values were stored in a sorted array, index would be the offset we are interested in
-    const double index = q * h->merged_weight;
+    const double index = q * (double)h->merged_weight;
 
     // beyond the boundaries, we return min or max
     // usually, the first centroid will have unit weight so this will make it moot
@@ -415,7 +434,7 @@ double td_quantile(td_histogram_t *h, double q) {
 
     // if the left centroid has more than one sample, we still know
     // that one sample occurred at min so we can do some interpolation
-    const double left_centroid_weight = h->nodes_weight[0];
+    const double left_centroid_weight = (double)h->nodes_weight[0];
 
     // in between extremes we interpolate between centroids
     double weightSoFar = left_centroid_weight / 2;
@@ -456,11 +475,7 @@ int td_quantiles(td_histogram_t *h, const double *quantiles, double *values, siz
     // we know that there are at least two centroids now
     // if the left centroid has more than one sample, we still know
     // that one sample occurred at min so we can do some interpolation
-    const double left_centroid_weight = h->nodes_weight[0];
-
-    // if the right-most centroid has more than one sample, we still know
-    // that one sample occurred at max so we can do some interpolation
-    const double right_centroid_weight = h->nodes_weight[n - 1];
+    const double left_centroid_weight = (double)h->nodes_weight[0];
 
     // in between extremes we interpolate between centroids
     double weightSoFar = left_centroid_weight / 2;
@@ -469,7 +484,7 @@ int td_quantiles(td_histogram_t *h, const double *quantiles, double *values, siz
     // to avoid allocations we use the values array for intermediate computation
     // i.e. to store the expected cumulative count at each percentile
     for (size_t qpos = 0; qpos < length; qpos++) {
-        const double index = quantiles[qpos] * h->merged_weight;
+        const double index = quantiles[qpos] * (double)h->merged_weight;
         values[qpos] = td_internal_iterate_centroids_to_index(h, index, left_centroid_weight, n,
                                                               &weightSoFar, &node_pos);
     }
@@ -483,7 +498,7 @@ static double td_internal_trimmed_mean(const td_histogram_t *h, const double lef
     double trimmed_count = 0;
     for (int i = 0; i < h->merged_nodes; i++) {
 
-        const double n_weight = h->nodes_weight[i];
+        const double n_weight = (double)h->nodes_weight[i];
         // Assume the whole centroid falls into the range
         double count_add = n_weight;
 
@@ -521,8 +536,8 @@ double td_trimmed_mean_symmetric(td_histogram_t *h, double proportion_to_cut) {
     }
 
     /* translate the percentiles to counts */
-    const double leftmost_weight = floor(h->merged_weight * proportion_to_cut);
-    const double rightmost_weight = ceil(h->merged_weight * (1.0 - proportion_to_cut));
+    const double leftmost_weight = floor((double)h->merged_weight * proportion_to_cut);
+    const double rightmost_weight = ceil((double)h->merged_weight * (1.0 - proportion_to_cut));
 
     return td_internal_trimmed_mean(h, leftmost_weight, rightmost_weight);
 }
@@ -540,13 +555,13 @@ double td_trimmed_mean(td_histogram_t *h, double leftmost_cut, double rightmost_
     }
 
     /* translate the percentiles to counts */
-    const double leftmost_weight = floor(h->merged_weight * leftmost_cut);
-    const double rightmost_weight = ceil(h->merged_weight * rightmost_cut);
+    const double leftmost_weight = floor((double)h->merged_weight * leftmost_cut);
+    const double rightmost_weight = ceil((double)h->merged_weight * rightmost_cut);
 
     return td_internal_trimmed_mean(h, leftmost_weight, rightmost_weight);
 }
 
-int td_add(td_histogram_t *h, double mean, double weight) {
+int td_add(td_histogram_t *h, double mean, long long weight) {
     if (should_td_compress(h)) {
         const int overflow_res = td_compress(h);
         if (overflow_res != 0)
@@ -555,10 +570,15 @@ int td_add(td_histogram_t *h, double mean, double weight) {
     const int pos = next_node(h);
     if (pos >= h->cap)
         return EDOM;
-    const double new_unmerged_weight = h->unmerged_weight + weight;
-    const double new_total_weight = new_unmerged_weight + h->merged_weight;
+    if (_tdigest_long_long_add_safe(h->unmerged_weight, weight) == false)
+        return EDOM;
+    const long long new_unmerged_weight = h->unmerged_weight + weight;
+    if (_tdigest_long_long_add_safe(new_unmerged_weight, h->merged_weight) == false)
+        return EDOM;
+    const long long new_total_weight = new_unmerged_weight + h->merged_weight;
     // double-precision overflow detected
-    const int overflow_res = _check_td_overflow(new_unmerged_weight, new_total_weight);
+    const int overflow_res =
+        _check_td_overflow((double)new_unmerged_weight, (double)new_total_weight);
     if (overflow_res != 0)
         return overflow_res;
 
@@ -581,9 +601,9 @@ int td_compress(td_histogram_t *h) {
     }
     int N = h->merged_nodes + h->unmerged_nodes;
     td_qsort(h->nodes_mean, h->nodes_weight, 0, N - 1);
-    const double total_weight = h->merged_weight + h->unmerged_weight;
+    const double total_weight = (double)h->merged_weight + (double)h->unmerged_weight;
     // double-precision overflow detected
-    const int overflow_res = _check_td_overflow(h->unmerged_weight, total_weight);
+    const int overflow_res = _check_td_overflow((double)h->unmerged_weight, (double)total_weight);
     if (overflow_res != 0)
         return overflow_res;
     if (total_weight <= 1)
@@ -600,7 +620,7 @@ int td_compress(td_histogram_t *h) {
     double weight_so_far = 0;
 
     for (int i = 1; i < N; i++) {
-        const double proposed_weight = h->nodes_weight[cur] + h->nodes_weight[i];
+        const double proposed_weight = (double)h->nodes_weight[cur] + (double)h->nodes_weight[i];
         const double z = proposed_weight * normalizer;
         // quantile up to cur
         const double q0 = weight_so_far / total_weight;
@@ -622,7 +642,7 @@ int td_compress(td_histogram_t *h) {
             h->nodes_mean[cur] = h->nodes_mean[i];
         }
         if (cur != i) {
-            h->nodes_weight[i] = 0.0;
+            h->nodes_weight[i] = 0;
             h->nodes_mean[i] = 0.0;
         }
     }
@@ -640,16 +660,11 @@ double td_max(td_histogram_t *h) { return h->max; }
 
 int td_compression(td_histogram_t *h) { return h->compression; }
 
-const double *td_centroids_weight(td_histogram_t *h) { return h->nodes_weight; }
+const long long *td_centroids_weight(td_histogram_t *h) { return h->nodes_weight; }
 
 const double *td_centroids_mean(td_histogram_t *h) { return h->nodes_mean; }
 
-double td_centroids_weight_at(td_histogram_t *h, int pos) {
-    if (pos < 0 || pos > h->merged_nodes) {
-        return NAN;
-    }
-    return h->nodes_weight[pos];
-}
+long long td_centroids_weight_at(td_histogram_t *h, int pos) { return h->nodes_weight[pos]; }
 
 double td_centroids_mean_at(td_histogram_t *h, int pos) {
     if (pos < 0 || pos > h->merged_nodes) {
diff --git a/src/tdigest.h b/src/tdigest.h
index 32b35ba..c07436c 100644
--- a/src/tdigest.h
+++ b/src/tdigest.h
@@ -42,11 +42,11 @@ struct td_histogram {
     // we run the merge in reverse every other merge to avoid left-to-right bias in merging
     long long total_compressions;
 
-    double merged_weight;
-    double unmerged_weight;
+    long long merged_weight;
+    long long unmerged_weight;
 
     double *nodes_mean;
-    double *nodes_weight;
+    long long *nodes_weight;
 };
 
 typedef struct td_histogram td_histogram_t;
@@ -106,7 +106,7 @@ void td_reset(td_histogram_t *h);
  * weight.
  *
  */
-int td_add(td_histogram_t *h, double val, double weight);
+int td_add(td_histogram_t *h, double val, long long weight);
 
 /**
  * Re-examines a t-digest to determine whether some centroids are redundant.  If your data are
@@ -190,7 +190,7 @@ int td_compression(td_histogram_t *h);
  *
  * @return The sum of the weights on all centroids.
  */
-double td_size(td_histogram_t *h);
+long long td_size(td_histogram_t *h);
 
 /**
  * Returns the number of centroids being used by this TDigest.
@@ -222,7 +222,7 @@ double td_max(td_histogram_t *h);
  *
  * @return The full centroids weight array.
  */
-const double *td_centroids_weight(td_histogram_t *h);
+const long long *td_centroids_weight(td_histogram_t *h);
 
 /**
  * Get the full centroids mean array for 'this' histogram.
@@ -241,7 +241,7 @@ const double *td_centroids_mean(td_histogram_t *h);
  *
  * @return The centroid weight.
  */
-double td_centroids_weight_at(td_histogram_t *h, int pos);
+long long td_centroids_weight_at(td_histogram_t *h, int pos);
 
 /**
  * Get the centroid mean for 'this' histogram and 'pos'.
diff --git a/tests/unit/td_test.c b/tests/unit/td_test.c
index 2434066..272fbfb 100644
--- a/tests/unit/td_test.c
+++ b/tests/unit/td_test.c
@@ -21,42 +21,11 @@
 
 static double randfrom(double M, double N) { return M + (rand() / (RAND_MAX / (N - M))); }
 
-/**
- * Reference implementations for cdf if we have all data.
- */
-static double dist_cdf(double x, double *data, int data_length) {
-    double n1 = 0;
-    double n2 = 0;
-    for (size_t i = 0; i < data_length; i++) {
-        const double v = data[i];
-        n1 += (v < x) ? 1 : 0;
-        n2 += (v == x) ? 1 : 0;
-    }
-    return (n1 + n2 / 2.0) / data_length;
-}
-
-/**
- * Reference implementations for quantile if we have all data.
- */
-static double dist_quantile(double q, double *data, int data_length) {
-    if (data_length == 0) {
-        return NAN;
-    }
-    double index = q * data_length;
-    if (index < 0) {
-        index = 0;
-    }
-    if (index > data_length - 1) {
-        index = data_length - 1;
-    }
-    return data[(int)floor(index)];
-}
-
 int tests_run = 0;
 
 td_histogram_t *histogram = NULL;
 
-static void load_histograms() {
+static void load_histograms(void) {
     const int compression = 500;
 
     int i;
@@ -73,13 +42,13 @@ static void load_histograms() {
 MU_TEST(test_basic) {
     td_histogram_t *t = td_new(10);
     mu_assert(t != NULL, "created_histogram");
-    mu_assert_double_eq(0, t->unmerged_weight);
-    mu_assert_double_eq(0, t->merged_weight);
+    mu_assert_long_eq(0, t->unmerged_weight);
+    mu_assert_long_eq(0, t->merged_weight);
     mu_assert(td_add(t, 0.0, 1) == 0, "Insertion");
     // with one data point, all quantiles lead to Rome
     mu_assert_double_eq(0.0, td_quantile(t, .0));
     mu_assert_double_eq(0.0, td_quantile(t, 0.5));
-    mu_assert_double_eq(0.0, td_quantile(t, 1.0));
+    mu_assert_double_eq(0.0, td_quantile(t, 1));
     mu_assert(td_add(t, 10.0, 1) == 0, "Insertion");
     mu_assert_double_eq(0.0, td_min(t));
     mu_assert_double_eq(10.0, td_max(t));
@@ -99,18 +68,21 @@ MU_TEST(test_overflow) {
     td_histogram_t *t = td_new(10);
     td_histogram_t *t2 = td_new(10);
     mu_assert(t != NULL, "created_histogram");
-    // mu_assert(t2 != NULL, "created_histogram");
-    mu_assert_double_eq(0, t->unmerged_weight);
-    mu_assert_double_eq(0, t->merged_weight);
-    mu_assert_double_eq(0, t2->unmerged_weight);
-    mu_assert_double_eq(0, t2->merged_weight);
-    for (size_t i = 0; i < 4; i++) {
-        mu_assert(td_add(t, 5.0, 1e304) == 0, "Insertion of 1e304");
-    }
-    mu_assert(td_add(t, 5.0, 1e304) == EDOM, "5th insertion of 1e305 should overflow");
+    mu_assert(t2 != NULL, "created_histogram");
+    mu_assert_long_eq(0, t->unmerged_weight);
+    mu_assert_long_eq(0, t->merged_weight);
+    mu_assert_long_eq(0, t2->unmerged_weight);
+    mu_assert_long_eq(0, t2->merged_weight);
+    mu_assert(td_add(t, 5.0, __LONG_LONG_MAX__ - 1) == 0, "Insertion of __LONG_LONG_MAX__");
+    mu_assert(td_add(t, 5.0, __LONG_LONG_MAX__ - 1) == EDOM,
+              "second insertion of __LONG_LONG_MAX__ should overflow");
+    mu_assert_long_eq(__LONG_LONG_MAX__ - 1, t->merged_weight + t->unmerged_weight);
     // overflow on merge
-    mu_assert(td_add(t2, 5.0, 1e304) == 0, "First insertion of 1e304");
-    mu_assert(td_merge(t2, t) == EDOM, "Merge due to second insertion of 1e300 should overflow");
+    mu_assert(td_add(t2, 5.0, __LONG_LONG_MAX__ - 1) == 0, "First insertion of __LONG_LONG_MAX__");
+    mu_assert_long_eq(__LONG_LONG_MAX__ - 1, t2->merged_weight + t2->unmerged_weight);
+    mu_assert(td_add(t2, 1.0, 1) == 0, "Insertion of 1");
+    mu_assert(td_add(t2, 5.0, __LONG_LONG_MAX__ - 1) == EDOM,
+              "Second insertion of __LONG_LONG_MAX__");
     td_free(t);
     td_free(t2);
 }
@@ -122,50 +94,50 @@ MU_TEST(test_overflow_merge) {
     mu_assert(x != NULL, "created_histogram");
     mu_assert(y != NULL, "created_histogram");
     mu_assert(z != NULL, "created_histogram");
-    mu_assert_double_eq(0, x->unmerged_weight);
-    mu_assert_double_eq(0, x->merged_weight);
-    mu_assert_double_eq(0, y->unmerged_weight);
-    mu_assert_double_eq(0, y->merged_weight);
-    mu_assert(td_add(x, 1, 1.0) == 0, "Insertion of 1");
-    mu_assert(td_add(x, 2, 1.0) == 0, "Insertion of 2");
-    mu_assert(td_add(x, 3, 1.0) == 0, "Insertion of 3");
-    mu_assert(td_add(x, 4, 1.0) == 0, "Insertion of 4");
-    mu_assert(td_add(x, 5, 1.0) == 0, "Insertion of 5");
-    mu_assert(td_add(x, 6, 1.0) == 0, "Insertion of 6");
-    mu_assert(td_add(x, 7, 1.0) == 0, "Insertion of 7");
-    mu_assert(td_add(x, 8, 1.0) == 0, "Insertion of 8");
-    mu_assert(td_add(x, 9, 1.0) == 0, "Insertion of 9");
-    mu_assert(td_add(x, 10, 1.0) == 0, "Insertion of 10");
-    mu_assert(td_add(x, 11, 1.0) == 0, "Insertion of 11");
-    mu_assert(td_add(x, 12, 1.0) == 0, "Insertion of 12");
-    mu_assert(td_add(x, 13, 1.0) == 0, "Insertion of 13");
-    mu_assert(td_add(x, 14, 1.0) == 0, "Insertion of 14");
-    mu_assert(td_add(x, 15, 1.0) == 0, "Insertion of 15");
-    mu_assert(td_add(x, 16, 1.0) == 0, "Insertion of 16");
-    mu_assert(td_add(x, 17, 1.0) == 0, "Insertion of 17");
-    mu_assert(td_add(x, 18, 1.0) == 0, "Insertion of 18");
-    mu_assert(td_add(x, 19, 1.0) == 0, "Insertion of 19");
-    mu_assert(td_add(x, 20, 1.0) == 0, "Insertion of 20");
-    mu_assert(td_add(y, 101, 1.0) == 0, "Insertion of 101");
-    mu_assert(td_add(y, 102, 1.0) == 0, "Insertion of 102");
-    mu_assert(td_add(y, 103, 1.0) == 0, "Insertion of 103");
-    mu_assert(td_add(y, 104, 1.0) == 0, "Insertion of 104");
-    mu_assert(td_add(y, 105, 1.0) == 0, "Insertion of 105");
-    mu_assert(td_add(y, 106, 1.0) == 0, "Insertion of 106");
-    mu_assert(td_add(y, 107, 1.0) == 0, "Insertion of 107");
-    mu_assert(td_add(y, 108, 1.0) == 0, "Insertion of 108");
-    mu_assert(td_add(y, 109, 1.0) == 0, "Insertion of 109");
-    mu_assert(td_add(y, 110, 1.0) == 0, "Insertion of 110");
-    mu_assert(td_add(y, 111, 1.0) == 0, "Insertion of 111");
-    mu_assert(td_add(y, 112, 1.0) == 0, "Insertion of 112");
-    mu_assert(td_add(y, 113, 1.0) == 0, "Insertion of 113");
-    mu_assert(td_add(y, 114, 1.0) == 0, "Insertion of 114");
-    mu_assert(td_add(y, 115, 1.0) == 0, "Insertion of 115");
-    mu_assert(td_add(y, 116, 1.0) == 0, "Insertion of 116");
-    mu_assert(td_add(y, 117, 1.0) == 0, "Insertion of 117");
-    mu_assert(td_add(y, 118, 1.0) == 0, "Insertion of 118");
-    mu_assert(td_add(y, 119, 1.0) == 0, "Insertion of 119");
-    mu_assert(td_add(y, 120, 1.0) == 0, "Insertion of 120");
+    mu_assert_long_eq(0, x->unmerged_weight);
+    mu_assert_long_eq(0, x->merged_weight);
+    mu_assert_long_eq(0, y->unmerged_weight);
+    mu_assert_long_eq(0, y->merged_weight);
+    mu_assert(td_add(x, 1, 1) == 0, "Insertion of 1");
+    mu_assert(td_add(x, 2, 1) == 0, "Insertion of 2");
+    mu_assert(td_add(x, 3, 1) == 0, "Insertion of 3");
+    mu_assert(td_add(x, 4, 1) == 0, "Insertion of 4");
+    mu_assert(td_add(x, 5, 1) == 0, "Insertion of 5");
+    mu_assert(td_add(x, 6, 1) == 0, "Insertion of 6");
+    mu_assert(td_add(x, 7, 1) == 0, "Insertion of 7");
+    mu_assert(td_add(x, 8, 1) == 0, "Insertion of 8");
+    mu_assert(td_add(x, 9, 1) == 0, "Insertion of 9");
+    mu_assert(td_add(x, 10, 1) == 0, "Insertion of 10");
+    mu_assert(td_add(x, 11, 1) == 0, "Insertion of 11");
+    mu_assert(td_add(x, 12, 1) == 0, "Insertion of 12");
+    mu_assert(td_add(x, 13, 1) == 0, "Insertion of 13");
+    mu_assert(td_add(x, 14, 1) == 0, "Insertion of 14");
+    mu_assert(td_add(x, 15, 1) == 0, "Insertion of 15");
+    mu_assert(td_add(x, 16, 1) == 0, "Insertion of 16");
+    mu_assert(td_add(x, 17, 1) == 0, "Insertion of 17");
+    mu_assert(td_add(x, 18, 1) == 0, "Insertion of 18");
+    mu_assert(td_add(x, 19, 1) == 0, "Insertion of 19");
+    mu_assert(td_add(x, 20, 1) == 0, "Insertion of 20");
+    mu_assert(td_add(y, 101, 1) == 0, "Insertion of 101");
+    mu_assert(td_add(y, 102, 1) == 0, "Insertion of 102");
+    mu_assert(td_add(y, 103, 1) == 0, "Insertion of 103");
+    mu_assert(td_add(y, 104, 1) == 0, "Insertion of 104");
+    mu_assert(td_add(y, 105, 1) == 0, "Insertion of 105");
+    mu_assert(td_add(y, 106, 1) == 0, "Insertion of 106");
+    mu_assert(td_add(y, 107, 1) == 0, "Insertion of 107");
+    mu_assert(td_add(y, 108, 1) == 0, "Insertion of 108");
+    mu_assert(td_add(y, 109, 1) == 0, "Insertion of 109");
+    mu_assert(td_add(y, 110, 1) == 0, "Insertion of 110");
+    mu_assert(td_add(y, 111, 1) == 0, "Insertion of 111");
+    mu_assert(td_add(y, 112, 1) == 0, "Insertion of 112");
+    mu_assert(td_add(y, 113, 1) == 0, "Insertion of 113");
+    mu_assert(td_add(y, 114, 1) == 0, "Insertion of 114");
+    mu_assert(td_add(y, 115, 1) == 0, "Insertion of 115");
+    mu_assert(td_add(y, 116, 1) == 0, "Insertion of 116");
+    mu_assert(td_add(y, 117, 1) == 0, "Insertion of 117");
+    mu_assert(td_add(y, 118, 1) == 0, "Insertion of 118");
+    mu_assert(td_add(y, 119, 1) == 0, "Insertion of 119");
+    mu_assert(td_add(y, 120, 1) == 0, "Insertion of 120");
 
     for (size_t i = 0; i < 10; i++) {
         td_histogram_t *zz = td_new(10);
@@ -193,16 +165,17 @@ MU_TEST(test_overflow_merge) {
 MU_TEST(test_quantile_interpolations) {
     td_histogram_t *t = td_new(10);
     mu_assert(t != NULL, "created_histogram");
-    mu_assert_double_eq(0, t->unmerged_weight);
-    mu_assert_double_eq(0, t->merged_weight);
+    mu_assert_long_eq(0, t->unmerged_weight);
+    mu_assert_long_eq(0, t->merged_weight);
     mu_assert(td_add(t, 5.0, 2) == 0, "add");
-    mu_assert_double_eq(1, t->unmerged_weight);
+    mu_assert_long_eq(2, t->unmerged_weight);
     // with one data point, all quantiles lead to Rome
-    mu_assert_double_eq(0.0, td_quantile(t, .0));
-    mu_assert_double_eq(0.0, td_quantile(t, 0.5));
+    mu_assert_double_eq(5.0, td_quantile(t, .0));
+    mu_assert_double_eq(5.0, td_quantile(t, 0.5));
+    mu_assert_double_eq(5.0, td_quantile(t, 1.0));
     mu_assert(td_compress(t) == 0, "compress");
-    mu_assert_double_eq(0, t->unmerged_weight);
-    mu_assert_double_eq(2.0, t->merged_weight);
+    mu_assert_long_eq(0, t->unmerged_weight);
+    mu_assert_long_eq(2, t->merged_weight);
     mu_assert(td_add(t, 100.0, 1) == 0, "Insertion");
     // we know that there are at least two centroids now
     td_free(t);
@@ -220,8 +193,8 @@ MU_TEST(test_trimmed_mean_simple) {
      */
     td_histogram_t *t = td_new(100);
     mu_assert(t != NULL, "created_histogram");
-    mu_assert_double_eq(0, t->unmerged_weight);
-    mu_assert_double_eq(0, t->merged_weight);
+    mu_assert_long_eq(0, t->unmerged_weight);
+    mu_assert_long_eq(0, t->merged_weight);
     //    stats.trim_mean([], 0.49)
     //    nan
     mu_assert_double_eq(NAN, td_trimmed_mean_symmetric(t, .49));
@@ -238,17 +211,17 @@ MU_TEST(test_trimmed_mean_simple) {
     // 5.0
     // stats.trim_mean(x, 0.0)
     mu_assert_double_eq(5, td_trimmed_mean_symmetric(t, .0));
-    mu_assert_double_eq(5, td_trimmed_mean(t, 0.0, 1.0));
+    mu_assert_double_eq(5, td_trimmed_mean(t, 0.0, 1));
     // 5.0
     mu_assert(td_add(t, 5.0, 2) == 0, "Insertion");
     mu_assert_double_eq(5, td_trimmed_mean_symmetric(t, .0));
-    mu_assert_double_eq(5, td_trimmed_mean(t, 0.0, 1.0));
+    mu_assert_double_eq(5, td_trimmed_mean(t, 0.0, 1));
     mu_assert(td_add(t, 10.0, 1) == 0, "Insertion");
     mu_assert(td_add(t, 15.0, 3) == 0, "Insertion");
     //    stats.trim_mean(x, 0.0)
     //    10.0
     mu_assert_double_eq(10, td_trimmed_mean_symmetric(t, .0));
-    mu_assert_double_eq(10, td_trimmed_mean(t, 0.0, 1.0));
+    mu_assert_double_eq(10, td_trimmed_mean(t, 0.0, 1));
     // trimmed mean and mean should lead to 10 in here
     //    stats.trim_mean(x, 0.1)
     //    10.0
@@ -272,8 +245,8 @@ MU_TEST(test_trimmed_mean_complex) {
      */
     td_histogram_t *t = td_new(100);
     mu_assert(t != NULL, "created_histogram");
-    mu_assert_double_eq(0, t->unmerged_weight);
-    mu_assert_double_eq(0, t->merged_weight);
+    mu_assert_long_eq(0, t->unmerged_weight);
+    mu_assert_long_eq(0, t->merged_weight);
     for (int i = 0; i < 20; ++i) {
         mu_assert(td_add(t, (double)i, 1) == 0, "Insertion");
     }
@@ -285,8 +258,8 @@ MU_TEST(test_trimmed_mean_complex) {
     td_free(t);
     t = td_new(100);
     mu_assert(t != NULL, "created_histogram");
-    mu_assert_double_eq(0, t->unmerged_weight);
-    mu_assert_double_eq(0, t->merged_weight);
+    mu_assert_long_eq(0, t->unmerged_weight);
+    mu_assert_long_eq(0, t->merged_weight);
     for (int i = 0; i < 200; ++i) {
         mu_assert(td_add(t, (double)i, 1) == 0, "Insertion");
     }
@@ -326,20 +299,20 @@ MU_TEST(test_trimmed_mean_complex) {
 MU_TEST(test_compress_small) {
     td_histogram_t *t = td_new(100);
     mu_assert(t != NULL, "created_histogram");
-    mu_assert(td_add(t, 1.0, 1.0) == 0, "Insertion");
+    mu_assert(td_add(t, 1.0, 1) == 0, "Insertion");
     mu_assert_double_eq(1.0, td_min(t));
     mu_assert_double_eq(1.0, td_max(t));
     mu_assert_double_eq(1.0, td_size(t));
     mu_assert_int_eq(1, td_centroid_count(t));
     mu_assert_long_eq(0, t->total_compressions);
     mu_assert_double_eq(1.0, td_centroids_mean_at(t, 0));
-    mu_assert_double_eq(1.0, td_centroids_weight_at(t, 0));
+    mu_assert_long_eq(1, td_centroids_weight_at(t, 0));
     mu_assert_int_eq(1, t->unmerged_nodes);
     mu_assert_int_eq(0, t->merged_nodes);
     mu_assert(td_compress(t) == 0, "compress");
-    mu_assert_int_eq(1, t->unmerged_nodes + t->merged_nodes);
+    mu_assert_long_eq(1, t->unmerged_nodes + t->merged_nodes);
     mu_assert_double_eq(1.0, td_centroids_mean_at(t, 0));
-    mu_assert_double_eq(1.0, td_centroids_weight_at(t, 0));
+    mu_assert_long_eq(1, td_centroids_weight_at(t, 0));
     mu_assert_double_eq(1.0, td_quantile(t, 0.001));
     mu_assert_double_eq(1.0, td_quantile(t, 0.01));
     mu_assert_double_eq(1.0, td_quantile(t, 0.5));
@@ -387,7 +360,7 @@ MU_TEST(test_negative_values) {
     mu_assert_double_eq_epsilon(-98.5, td_quantile(t, 0.01), 0.75);
     mu_assert_double_eq_epsilon(98.5, td_quantile(t, 0.99), 0.75);
     mu_assert_double_eq(100, td_quantile(t, 0.999));
-    mu_assert_double_eq(100, td_quantile(t, 1.0));
+    mu_assert_double_eq(100, td_quantile(t, 1));
     td_free(t);
 }
 
@@ -410,7 +383,7 @@ MU_TEST(test_negative_values_merge) {
     mu_assert_double_eq_epsilon(-98.5, td_quantile(d1, 0.01), 0.75);
     mu_assert_double_eq_epsilon(98.5, td_quantile(d1, 0.99), 0.75);
     mu_assert_double_eq(100, td_quantile(d1, 0.999));
-    mu_assert_double_eq(100, td_quantile(d1, 1.0));
+    mu_assert_double_eq(100, td_quantile(d1, 1));
     td_free(d1);
     td_free(d2);
 }
@@ -432,9 +405,6 @@ MU_TEST(test_nans) {
     mu_assert(isnan(td_quantile(t, 0)), "empty value at 0");
     mu_assert(isnan(td_quantile(t, 0.5)), "empty value at .5");
     mu_assert(isnan(td_quantile(t, 1)), "empty value at 1");
-    mu_assert(isnan(td_centroids_weight_at(t, 1)),
-              "td_centroids_weight_at on pos > h->merged_nodes");
-    mu_assert(isnan(td_centroids_weight_at(t, -1)), "td_centroids_weight_at on pos < 0");
     mu_assert(isnan(td_centroids_mean_at(t, 1)), "td_centroids_mean_at on pos > h->merged_nodes");
     mu_assert(isnan(td_centroids_mean_at(t, -1)), "td_centroids_mean_at on pos < 0");
     mu_assert(td_add(t, 1, 1) == 0, "Insertion");
@@ -541,23 +511,6 @@ MU_TEST(test_td_init) {
     td_free(t);
 }
 
-bool compare_double(double a, double b, double delta) {
-    if (fabs(a - b) < delta) {
-        return true;
-    }
-
-    printf("[compare_double] fabs(%f, %f) < %f == false\n", a, b, delta);
-    return false;
-}
-
-static bool compare_values(double a, double b, double variation) {
-    return compare_double(a, b, b * variation);
-}
-
-static bool compare_percentile(int64_t a, double b, double variation) {
-    return compare_values((double)a, b, variation);
-}
-
 MU_TEST(test_quantiles) {
     load_histograms();
     mu_assert_double_eq_epsilon(0.0, td_quantile(histogram, 0.0), 0.001);
@@ -573,7 +526,7 @@ MU_TEST(test_quantiles) {
     mu_assert_double_eq_epsilon(9.99, td_quantile(histogram, 0.999), 0.01);
     mu_assert_double_eq_epsilon(9.999, td_quantile(histogram, 0.9999), 0.01);
     mu_assert_double_eq_epsilon(9.9999, td_quantile(histogram, 0.99999), 0.01);
-    mu_assert_double_eq_epsilon(10.0, td_quantile(histogram, 1.0), 0.001);
+    mu_assert_double_eq_epsilon(10.0, td_quantile(histogram, 1), 0.001);
 }
 
 MU_TEST(test_quantiles_multiple) {
@@ -643,6 +596,7 @@ MU_TEST_SUITE(test_suite) {
     MU_RUN_TEST(test_td_min);
     MU_RUN_TEST(test_quantiles);
     MU_RUN_TEST(test_quantiles_multiple);
+    MU_RUN_TEST(test_quantile_interpolations);
     MU_RUN_TEST(test_trimmed_mean_simple);
     MU_RUN_TEST(test_trimmed_mean_complex);
     MU_RUN_TEST(test_overflow);