[PATCH 12/18] histogram: add function for computing lower bound percentile estimate

Vladimir Davydov vdavydov.dev at gmail.com
Thu Aug 16 19:12:06 MSK 2018


The value returned by histogram_percentile() is an upper bound estimate.
This is fine for measuring latency, because we are interested in the
worst, i.e. highest, observations, but doesn't suit particularly well
if we want to keep track of the lowest observations, as it is the case
with bandwidth. So this patch introduces histogram_percentile_lower(),
a function that is similar to histogram_percentile(), but returns a
lower bound estimate of the requested percentile.
---
 src/histogram.c       | 13 +++++++++++++
 src/histogram.h       |  7 +++++++
 test/unit/histogram.c |  4 ++++
 3 files changed, 24 insertions(+)

diff --git a/src/histogram.c b/src/histogram.c
index c934ee9f..8c6cd6c0 100644
--- a/src/histogram.c
+++ b/src/histogram.c
@@ -145,6 +145,19 @@ histogram_percentile(struct histogram *hist, int pct)
 	return hist->max;
 }
 
+int64_t
+histogram_percentile_lower(struct histogram *hist, int pct)
+{
+	size_t count = 0;
+
+	for (size_t i = 0; i < hist->n_buckets; i++) {
+		count += hist->buckets[i].count;
+		if (count * 100 > hist->total * pct)
+			return hist->buckets[i > 0 ? i - 1 : 0].max;
+	}
+	return hist->max;
+}
+
 int
 histogram_snprint(char *buf, int size, struct histogram *hist)
 {
diff --git a/src/histogram.h b/src/histogram.h
index 95d47d40..2c3df5d1 100644
--- a/src/histogram.h
+++ b/src/histogram.h
@@ -90,6 +90,13 @@ int64_t
 histogram_percentile(struct histogram *hist, int pct);
 
 /**
+ * Same as histogram_percentile(), but return a lower bound
+ * estimate of the percentile.
+ */
+int64_t
+histogram_percentile_lower(struct histogram *hist, int pct);
+
+/**
  * Print string representation of a histogram.
  */
 int
diff --git a/test/unit/histogram.c b/test/unit/histogram.c
index 4c980d24..a026be26 100644
--- a/test/unit/histogram.c
+++ b/test/unit/histogram.c
@@ -154,14 +154,18 @@ test_percentile(void)
 	for (int pct = 5; pct < 100; pct += 5) {
 		int64_t val = data[data_len * pct / 100];
 		int64_t expected = max;
+		int64_t expected_lo = max;
 		for (size_t b = 0; b < n_buckets; b++) {
 			if (buckets[b] >= val) {
 				expected = buckets[b];
+				expected_lo = buckets[b > 0 ? b - 1 : 0];
 				break;
 			}
 		}
 		int64_t result = histogram_percentile(hist, pct);
 		fail_if(result != expected);
+		int64_t result_lo = histogram_percentile_lower(hist, pct);
+		fail_if(result_lo != expected_lo);
 	}
 
 	histogram_delete(hist);
-- 
2.11.0




More information about the Tarantool-patches mailing list