[Tarantool-patches] [PATCH v2] core: introduce evenly distributed int64 random in range

Ilya Kosarev i.kosarev at tarantool.org
Wed Sep 30 03:06:52 MSK 2020


Tarantool codebase had at least two functions to generate random
integer in given range and both of them had problems at least with
integer overflow. This patch brings nice function to generate random
int64_t in given range without overflow while preserving uniform random
generator distribution using unbiased bitmask with rejection method.
It is now possible to use xoshiro256++ PRNG or random bytes as a basis.
Most relevant replacements have been made.

Closes #5075
---
Branch: https://github.com/tarantool/tarantool/tree/i.kosarev/gh-5075-fine-random-in-range
Issue: https://github.com/tarantool/tarantool/issues/5075

@ChangeLog:
 * Introduce overflow resistant & evenly distributed random in range (gh-5075).

Changes in v2:
 - implemented fine 64-bit random
 - implemented fine range bounding

 src/lib/core/random.c | 57 +++++++++++++++++++++++++++++++++++++++++++
 src/lib/core/random.h | 38 +++++++++++++++++++++++++++++
 src/lib/swim/swim.c   | 26 +++-----------------
 test/unit/histogram.c | 12 +++------
 4 files changed, 102 insertions(+), 31 deletions(-)

diff --git a/src/lib/core/random.c b/src/lib/core/random.c
index f4fa75b1c..4b52408ea 100644
--- a/src/lib/core/random.c
+++ b/src/lib/core/random.c
@@ -39,6 +39,8 @@
 
 static int rfd;
 
+static uint64_t state[4];
+
 void
 random_init(void)
 {
@@ -63,6 +65,8 @@ random_init(void)
 srand:
 	srandom(seed);
 	srand(seed);
+
+	random_bytes((char *)state, 32);
 }
 
 void
@@ -97,3 +101,56 @@ rand:
 	while (generated < size)
 		buf[generated++] = rand();
 }
+
+uint64_t
+real_random(void)
+{
+	uint64_t result;
+	random_bytes((char *)(&result), 8);
+	return result;
+}
+
+static inline
+uint64_t rotl(const uint64_t x, int k)
+{
+	return (x << k) | (x >> (64 - k));
+}
+
+uint64_t
+pseudo_random(void)
+{
+	const uint64_t result = rotl(state[0] + state[3], 23) + state[0];
+	const uint64_t t = state[1] << 17;
+	state[2] ^= state[0];
+	state[3] ^= state[1];
+	state[1] ^= state[2];
+	state[0] ^= state[3];
+	state[2] ^= t;
+	state[3] = rotl(state[3], 45);
+	return result;
+}
+
+static inline uint64_t
+random_in_range(uint64_t (*rnd)(void), uint64_t range)
+{
+	uint64_t mask = UINT64_MAX >> __builtin_clzll(range | 1);
+	uint64_t result;
+	do {
+		result = rnd() & mask;
+	} while (result > range);
+	return result;
+}
+
+int64_t
+real_random_in_range(int64_t min, int64_t max)
+{
+	assert(max >= min);
+	return min + random_in_range(real_random, (uint64_t) (max - min));
+}
+
+int64_t
+pseudo_random_in_range(int64_t min, int64_t max)
+{
+	assert(max >= min);
+	return min + random_in_range(pseudo_random, (uint64_t) (max - min));
+}
diff --git a/src/lib/core/random.h b/src/lib/core/random.h
index 26109b14b..9228df72e 100644
--- a/src/lib/core/random.h
+++ b/src/lib/core/random.h
@@ -32,6 +32,7 @@
  */
 
 #include <stddef.h>
+#include <stdint.h>
 
 #if defined(__cplusplus)
 extern "C" {
@@ -45,6 +46,43 @@ random_free(void);
 void
 random_bytes(char *buf, size_t size);
 
+/**
+ * Just 8 random_bytes().
+ */
+uint64_t
+real_random(void);
+
+/**
+ * xoshiro256++ pseudo random generator.
+ * http://prng.di.unimi.it/
+ * State is initialized with random_bytes().
+ */
+uint64_t
+pseudo_random(void);
+
+/**
+ * Return a random int64_t number within given boundaries.
+ *
+ * Instead of blindly calculating a modulo, this function uses
+ * unbiased bitmask with rejection method to provide number in
+ * given boundaries while preserving uniform distribution and
+ * avoiding overflow. It uses random bytes as a basis.
+ */
+int64_t
+real_random_in_range(int64_t min, int64_t max);
+
+/**
+ * Return a pseudo random int64_t number within given boundaries.
+ *
+ * Instead of blindly calculating a modulo, this function uses
+ * unbiased bitmask with rejection method to provide number in
+ * given boundaries while preserving uniform distribution and
+ * avoiding overflow. It uses xoshiro256++ as an uint64 pseudo
+ * random generator.
+ */
+int64_t
+pseudo_random_in_range(int64_t min, int64_t max);
+
 #if defined(__cplusplus)
 }
 #endif /* extern "C" */
diff --git a/src/lib/swim/swim.c b/src/lib/swim/swim.c
index 396bd7c45..9b5458635 100644
--- a/src/lib/swim/swim.c
+++ b/src/lib/swim/swim.c
@@ -34,6 +34,7 @@
 #include "swim_ev.h"
 #include "uri/uri.h"
 #include "fiber.h"
+#include "random.h"
 #include "msgpuck.h"
 #include "assoc.h"
 #include "sio.h"
@@ -188,25 +189,6 @@ enum {
 	INDIRECT_PING_COUNT = 2,
 };
 
-/**
- * Return a random number within given boundaries.
- *
- * Instead of blindly calculating a modulo, scale the random
- * number down the given boundaries to preserve the original
- * distribution. The result belongs to range [start, end].
- */
-static inline int
-swim_scaled_rand(int start, int end)
-{
-	assert(end >= start);
-	/*
-	 * RAND_MAX is likely to be INT_MAX - hardly SWIM will
-	 * ever be used in such a huge cluster.
-	 */
-	assert(end - start < RAND_MAX);
-	return rand() / (RAND_MAX / (end - start + 1) + 1);
-}
-
 /** Calculate UUID hash to use as a member table key. */
 static inline uint32_t
 swim_uuid_hash(const struct tt_uuid *uuid)
@@ -980,7 +962,7 @@ swim_shuffle_members(struct swim *swim)
 	for (mh_int_t node = mh_first(members), end = mh_end(members);
 	     node != end; node = mh_next(members, node), ++i) {
 		swim->shuffled[i] = *mh_swim_table_node(members, node);
-		int j = swim_scaled_rand(0, i);
+		int j = pseudo_random_in_range(0, i);
 		SWAP(swim->shuffled[i], swim->shuffled[j]);
 	}
 }
@@ -1069,7 +1051,7 @@ swim_encode_anti_entropy(struct swim *swim, struct swim_packet *packet)
 	swim_member_payload_bin_create(&payload_header);
 	struct mh_swim_table_t *t = swim->members;
 	int i = 0, member_count = mh_size(t);
-	int rnd = swim_scaled_rand(0, member_count - 1);
+	int rnd = pseudo_random_in_range(0, member_count - 1);
 	for (mh_int_t rc = mh_swim_table_random(t, rnd), end = mh_end(t);
 	     i < member_count; ++i) {
 		struct swim_member *m = *mh_swim_table_node(t, rc);
@@ -1380,7 +1362,7 @@ swim_send_indirect_pings(struct swim *swim, const struct swim_member *dst)
 {
 	struct mh_swim_table_t *t = swim->members;
 	int member_count = mh_size(t);
-	int rnd = swim_scaled_rand(0, member_count - 1);
+	int rnd = pseudo_random_in_range(0, member_count - 1);
 	mh_int_t rc = mh_swim_table_random(t, rnd), end = mh_end(t);
 	for (int member_i = 0, task_i = 0; member_i < member_count &&
 	     task_i < INDIRECT_PING_COUNT; ++member_i) {
diff --git a/test/unit/histogram.c b/test/unit/histogram.c
index 3056d20fe..ef5cd714f 100644
--- a/test/unit/histogram.c
+++ b/test/unit/histogram.c
@@ -5,6 +5,7 @@
 #include "memory.h"
 #include "unit.h"
 #include "trivia/util.h"
+#include "core/random.h"
 
 static int
 int64_cmp(const void *p1, const void *p2)
@@ -46,13 +47,6 @@ gen_rand_data(size_t *p_len)
 	return data;
 }
 
-static int64_t
-gen_rand_value(int64_t min, int64_t max)
-{
-	assert(max >= min);
-	return min + rand() % (max - min + 1);
-}
-
 static void
 test_counts(void)
 {
@@ -97,7 +91,7 @@ test_discard(void)
 
 	struct histogram *hist = histogram_new(buckets, n_buckets);
 
-	size_t bucket_sz = gen_rand_value(2, 10);
+	size_t bucket_sz = pseudo_random_in_range(2, 10);
 	size_t data_len = (n_buckets + 1) * bucket_sz;
 	int64_t *data = calloc(data_len, sizeof(*data));
 
@@ -105,7 +99,7 @@ test_discard(void)
 		int64_t min = (b == 0 ? INT64_MIN : buckets[b - 1] + 1);
 		int64_t max = (b == n_buckets ? INT64_MAX : buckets[b]);
 		for (size_t i = 0; i < bucket_sz; i++)
-			data[b * bucket_sz + i] = gen_rand_value(min, max);
+			data[b * bucket_sz + i] = pseudo_random_in_range(min, max);
 	}
 
 	for (size_t i = 0; i < data_len; i++)
-- 
2.17.1



More information about the Tarantool-patches mailing list