Tarantool development patches archive
 help / color / mirror / Atom feed
* [Tarantool-patches] [PATCH] core: introduce evenly distributed int64 random in range
@ 2020-09-04 13:51 Ilya Kosarev
  2020-09-04 22:39 ` Vladislav Shpilevoy
  0 siblings, 1 reply; 5+ messages in thread
From: Ilya Kosarev @ 2020-09-04 13:51 UTC (permalink / raw)
  To: alyapunov; +Cc: tarantool-patches, v.shpilevoy

Tarantool codebase had at least two functions to generate random
integer in given range and both of them had problems at least with
integer overflow. This patch brings nice function to generate random
int64_t in given range without overflow while preserving uniform random
generator distribution. Most relevant replacements have been made.

Closes #5075
---
Branch: https://github.com/tarantool/tarantool/tree/i.kosarev/gh-5075-fine-random-in-range
Issue: https://github.com/tarantool/tarantool/issues/5075

@ChangeLog:
 * Introduce overflow resistand & evenly distributed random in range (gh-5075).

 src/lib/core/random.c |  8 ++++++++
 src/lib/core/random.h | 12 ++++++++++++
 src/lib/swim/swim.c   | 26 ++++----------------------
 test/unit/histogram.c | 12 +++---------
 4 files changed, 27 insertions(+), 31 deletions(-)

diff --git a/src/lib/core/random.c b/src/lib/core/random.c
index f4fa75b1c1..2229c5fbbc 100644
--- a/src/lib/core/random.c
+++ b/src/lib/core/random.c
@@ -97,3 +97,11 @@ rand:
 	while (generated < size)
 		buf[generated++] = rand();
 }
+
+int64_t
+random_in_range(int64_t min, int64_t max)
+{
+	assert(max >= min);
+	double drand = drand48();
+	return (int64_t)(drand * max + (1 - drand) * min);
+}
diff --git a/src/lib/core/random.h b/src/lib/core/random.h
index 26109b14b2..8433660c99 100644
--- a/src/lib/core/random.h
+++ b/src/lib/core/random.h
@@ -32,6 +32,7 @@
  */
 
 #include <stddef.h>
+#include <stdint.h>
 
 #if defined(__cplusplus)
 extern "C" {
@@ -45,6 +46,17 @@ random_free(void);
 void
 random_bytes(char *buf, size_t size);
 
+/**
+ * Return a random int64_t number within given boundaries.
+ *
+ * Instead of blindly calculating a modulo, this function uses
+ * uniformly distributed over the interval [0.0 , 1.0) drand48()
+ * to provide number in given boundaries while preserving uniform
+ * distribution and avoiding overflow.
+ */
+int64_t
+random_in_range(int64_t min, int64_t max);
+
 #if defined(__cplusplus)
 }
 #endif /* extern "C" */
diff --git a/src/lib/swim/swim.c b/src/lib/swim/swim.c
index 396bd7c452..3994bbbef6 100644
--- a/src/lib/swim/swim.c
+++ b/src/lib/swim/swim.c
@@ -34,6 +34,7 @@
 #include "swim_ev.h"
 #include "uri/uri.h"
 #include "fiber.h"
+#include "random.h"
 #include "msgpuck.h"
 #include "assoc.h"
 #include "sio.h"
@@ -188,25 +189,6 @@ enum {
 	INDIRECT_PING_COUNT = 2,
 };
 
-/**
- * Return a random number within given boundaries.
- *
- * Instead of blindly calculating a modulo, scale the random
- * number down the given boundaries to preserve the original
- * distribution. The result belongs to range [start, end].
- */
-static inline int
-swim_scaled_rand(int start, int end)
-{
-	assert(end >= start);
-	/*
-	 * RAND_MAX is likely to be INT_MAX - hardly SWIM will
-	 * ever be used in such a huge cluster.
-	 */
-	assert(end - start < RAND_MAX);
-	return rand() / (RAND_MAX / (end - start + 1) + 1);
-}
-
 /** Calculate UUID hash to use as a member table key. */
 static inline uint32_t
 swim_uuid_hash(const struct tt_uuid *uuid)
@@ -980,7 +962,7 @@ swim_shuffle_members(struct swim *swim)
 	for (mh_int_t node = mh_first(members), end = mh_end(members);
 	     node != end; node = mh_next(members, node), ++i) {
 		swim->shuffled[i] = *mh_swim_table_node(members, node);
-		int j = swim_scaled_rand(0, i);
+		int j = random_in_range(0, i);
 		SWAP(swim->shuffled[i], swim->shuffled[j]);
 	}
 }
@@ -1069,7 +1051,7 @@ swim_encode_anti_entropy(struct swim *swim, struct swim_packet *packet)
 	swim_member_payload_bin_create(&payload_header);
 	struct mh_swim_table_t *t = swim->members;
 	int i = 0, member_count = mh_size(t);
-	int rnd = swim_scaled_rand(0, member_count - 1);
+	int rnd = random_in_range(0, member_count - 1);
 	for (mh_int_t rc = mh_swim_table_random(t, rnd), end = mh_end(t);
 	     i < member_count; ++i) {
 		struct swim_member *m = *mh_swim_table_node(t, rc);
@@ -1380,7 +1362,7 @@ swim_send_indirect_pings(struct swim *swim, const struct swim_member *dst)
 {
 	struct mh_swim_table_t *t = swim->members;
 	int member_count = mh_size(t);
-	int rnd = swim_scaled_rand(0, member_count - 1);
+	int rnd = random_in_range(0, member_count - 1);
 	mh_int_t rc = mh_swim_table_random(t, rnd), end = mh_end(t);
 	for (int member_i = 0, task_i = 0; member_i < member_count &&
 	     task_i < INDIRECT_PING_COUNT; ++member_i) {
diff --git a/test/unit/histogram.c b/test/unit/histogram.c
index 3056d20fe8..02e6f80dec 100644
--- a/test/unit/histogram.c
+++ b/test/unit/histogram.c
@@ -5,6 +5,7 @@
 #include "memory.h"
 #include "unit.h"
 #include "trivia/util.h"
+#include "core/random.h"
 
 static int
 int64_cmp(const void *p1, const void *p2)
@@ -46,13 +47,6 @@ gen_rand_data(size_t *p_len)
 	return data;
 }
 
-static int64_t
-gen_rand_value(int64_t min, int64_t max)
-{
-	assert(max >= min);
-	return min + rand() % (max - min + 1);
-}
-
 static void
 test_counts(void)
 {
@@ -97,7 +91,7 @@ test_discard(void)
 
 	struct histogram *hist = histogram_new(buckets, n_buckets);
 
-	size_t bucket_sz = gen_rand_value(2, 10);
+	size_t bucket_sz = random_in_range(2, 10);
 	size_t data_len = (n_buckets + 1) * bucket_sz;
 	int64_t *data = calloc(data_len, sizeof(*data));
 
@@ -105,7 +99,7 @@ test_discard(void)
 		int64_t min = (b == 0 ? INT64_MIN : buckets[b - 1] + 1);
 		int64_t max = (b == n_buckets ? INT64_MAX : buckets[b]);
 		for (size_t i = 0; i < bucket_sz; i++)
-			data[b * bucket_sz + i] = gen_rand_value(min, max);
+			data[b * bucket_sz + i] = random_in_range(min, max);
 	}
 
 	for (size_t i = 0; i < data_len; i++)
-- 
2.17.1

^ permalink raw reply	[flat|nested] 5+ messages in thread
* [Tarantool-patches] [PATCH] core: introduce evenly distributed int64 random in range
@ 2020-09-04 13:20 Ilya Kosarev
  0 siblings, 0 replies; 5+ messages in thread
From: Ilya Kosarev @ 2020-09-04 13:20 UTC (permalink / raw)
  To: alyapunov; +Cc: tarantool-patches, v.shpilevoy

Tarantool codebase had at least two functions to generate random
integer in given range and both of them had problems at least with
integer overflow. This patch brings nice function to generate random
int64_t in given range without overflow while preserving uniform random
generator distribution. Most relevant replacements have been made.

Closes #5075
---
Branch: https://github.com/tarantool/tarantool/tree/i.kosarev/gh-5075-fine-random-in-range
Issue: https://github.com/tarantool/tarantool/issues/5075

 src/lib/core/random.c |  8 ++++++++
 src/lib/core/random.h | 12 ++++++++++++
 src/lib/swim/swim.c   | 26 ++++----------------------
 test/unit/histogram.c | 12 +++---------
 4 files changed, 27 insertions(+), 31 deletions(-)

diff --git a/src/lib/core/random.c b/src/lib/core/random.c
index f4fa75b1c1..2229c5fbbc 100644
--- a/src/lib/core/random.c
+++ b/src/lib/core/random.c
@@ -97,3 +97,11 @@ rand:
 	while (generated < size)
 		buf[generated++] = rand();
 }
+
+int64_t
+random_in_range(int64_t min, int64_t max)
+{
+	assert(max >= min);
+	double drand = drand48();
+	return (int64_t)(drand * max + (1 - drand) * min);
+}
diff --git a/src/lib/core/random.h b/src/lib/core/random.h
index 26109b14b2..8433660c99 100644
--- a/src/lib/core/random.h
+++ b/src/lib/core/random.h
@@ -32,6 +32,7 @@
  */
 
 #include <stddef.h>
+#include <stdint.h>
 
 #if defined(__cplusplus)
 extern "C" {
@@ -45,6 +46,17 @@ random_free(void);
 void
 random_bytes(char *buf, size_t size);
 
+/**
+ * Return a random int64_t number within given boundaries.
+ *
+ * Instead of blindly calculating a modulo, this function uses
+ * uniformly distributed over the interval [0.0 , 1.0) drand48()
+ * to provide number in given boundaries while preserving uniform
+ * distribution and avoiding overflow.
+ */
+int64_t
+random_in_range(int64_t min, int64_t max);
+
 #if defined(__cplusplus)
 }
 #endif /* extern "C" */
diff --git a/src/lib/swim/swim.c b/src/lib/swim/swim.c
index 396bd7c452..3994bbbef6 100644
--- a/src/lib/swim/swim.c
+++ b/src/lib/swim/swim.c
@@ -34,6 +34,7 @@
 #include "swim_ev.h"
 #include "uri/uri.h"
 #include "fiber.h"
+#include "random.h"
 #include "msgpuck.h"
 #include "assoc.h"
 #include "sio.h"
@@ -188,25 +189,6 @@ enum {
 	INDIRECT_PING_COUNT = 2,
 };
 
-/**
- * Return a random number within given boundaries.
- *
- * Instead of blindly calculating a modulo, scale the random
- * number down the given boundaries to preserve the original
- * distribution. The result belongs to range [start, end].
- */
-static inline int
-swim_scaled_rand(int start, int end)
-{
-	assert(end >= start);
-	/*
-	 * RAND_MAX is likely to be INT_MAX - hardly SWIM will
-	 * ever be used in such a huge cluster.
-	 */
-	assert(end - start < RAND_MAX);
-	return rand() / (RAND_MAX / (end - start + 1) + 1);
-}
-
 /** Calculate UUID hash to use as a member table key. */
 static inline uint32_t
 swim_uuid_hash(const struct tt_uuid *uuid)
@@ -980,7 +962,7 @@ swim_shuffle_members(struct swim *swim)
 	for (mh_int_t node = mh_first(members), end = mh_end(members);
 	     node != end; node = mh_next(members, node), ++i) {
 		swim->shuffled[i] = *mh_swim_table_node(members, node);
-		int j = swim_scaled_rand(0, i);
+		int j = random_in_range(0, i);
 		SWAP(swim->shuffled[i], swim->shuffled[j]);
 	}
 }
@@ -1069,7 +1051,7 @@ swim_encode_anti_entropy(struct swim *swim, struct swim_packet *packet)
 	swim_member_payload_bin_create(&payload_header);
 	struct mh_swim_table_t *t = swim->members;
 	int i = 0, member_count = mh_size(t);
-	int rnd = swim_scaled_rand(0, member_count - 1);
+	int rnd = random_in_range(0, member_count - 1);
 	for (mh_int_t rc = mh_swim_table_random(t, rnd), end = mh_end(t);
 	     i < member_count; ++i) {
 		struct swim_member *m = *mh_swim_table_node(t, rc);
@@ -1380,7 +1362,7 @@ swim_send_indirect_pings(struct swim *swim, const struct swim_member *dst)
 {
 	struct mh_swim_table_t *t = swim->members;
 	int member_count = mh_size(t);
-	int rnd = swim_scaled_rand(0, member_count - 1);
+	int rnd = random_in_range(0, member_count - 1);
 	mh_int_t rc = mh_swim_table_random(t, rnd), end = mh_end(t);
 	for (int member_i = 0, task_i = 0; member_i < member_count &&
 	     task_i < INDIRECT_PING_COUNT; ++member_i) {
diff --git a/test/unit/histogram.c b/test/unit/histogram.c
index 3056d20fe8..02e6f80dec 100644
--- a/test/unit/histogram.c
+++ b/test/unit/histogram.c
@@ -5,6 +5,7 @@
 #include "memory.h"
 #include "unit.h"
 #include "trivia/util.h"
+#include "core/random.h"
 
 static int
 int64_cmp(const void *p1, const void *p2)
@@ -46,13 +47,6 @@ gen_rand_data(size_t *p_len)
 	return data;
 }
 
-static int64_t
-gen_rand_value(int64_t min, int64_t max)
-{
-	assert(max >= min);
-	return min + rand() % (max - min + 1);
-}
-
 static void
 test_counts(void)
 {
@@ -97,7 +91,7 @@ test_discard(void)
 
 	struct histogram *hist = histogram_new(buckets, n_buckets);
 
-	size_t bucket_sz = gen_rand_value(2, 10);
+	size_t bucket_sz = random_in_range(2, 10);
 	size_t data_len = (n_buckets + 1) * bucket_sz;
 	int64_t *data = calloc(data_len, sizeof(*data));
 
@@ -105,7 +99,7 @@ test_discard(void)
 		int64_t min = (b == 0 ? INT64_MIN : buckets[b - 1] + 1);
 		int64_t max = (b == n_buckets ? INT64_MAX : buckets[b]);
 		for (size_t i = 0; i < bucket_sz; i++)
-			data[b * bucket_sz + i] = gen_rand_value(min, max);
+			data[b * bucket_sz + i] = random_in_range(min, max);
 	}
 
 	for (size_t i = 0; i < data_len; i++)
-- 
2.17.1

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2020-09-17 13:56 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-09-04 13:51 [Tarantool-patches] [PATCH] core: introduce evenly distributed int64 random in range Ilya Kosarev
2020-09-04 22:39 ` Vladislav Shpilevoy
2020-09-06 13:19   ` Ilya Kosarev
2020-09-17 13:56     ` Vladislav Shpilevoy
  -- strict thread matches above, loose matches on Subject: below --
2020-09-04 13:20 Ilya Kosarev

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox