Tarantool development patches archive
 help / color / mirror / Atom feed
* [Tarantool-patches] [PATCH v2] core: introduce evenly distributed int64 random in range
@ 2020-09-30  0:06 Ilya Kosarev
  2020-10-02 22:43 ` Vladislav Shpilevoy
  0 siblings, 1 reply; 3+ messages in thread
From: Ilya Kosarev @ 2020-09-30  0:06 UTC (permalink / raw)
  To: v.shpilevoy, alyapunov; +Cc: tarantool-patches

Tarantool codebase had at least two functions to generate random
integer in given range and both of them had problems at least with
integer overflow. This patch brings nice function to generate random
int64_t in given range without overflow while preserving uniform random
generator distribution using unbiased bitmask with rejection method.
It is now possible to use xoshiro256++ PRNG or random bytes as a basis.
Most relevant replacements have been made.

Closes #5075
---
Branch: https://github.com/tarantool/tarantool/tree/i.kosarev/gh-5075-fine-random-in-range
Issue: https://github.com/tarantool/tarantool/issues/5075

@ChangeLog:
 * Introduce overflow resistant & evenly distributed random in range (gh-5075).

Changes in v2:
 - implemented fine 64-bit random
 - implemented fine range bounding

 src/lib/core/random.c | 57 +++++++++++++++++++++++++++++++++++++++++++
 src/lib/core/random.h | 38 +++++++++++++++++++++++++++++
 src/lib/swim/swim.c   | 26 +++-----------------
 test/unit/histogram.c | 12 +++------
 4 files changed, 102 insertions(+), 31 deletions(-)

diff --git a/src/lib/core/random.c b/src/lib/core/random.c
index f4fa75b1c..4b52408ea 100644
--- a/src/lib/core/random.c
+++ b/src/lib/core/random.c
@@ -39,6 +39,8 @@
 
 static int rfd;
 
+static uint64_t state[4];
+
 void
 random_init(void)
 {
@@ -63,6 +65,8 @@ random_init(void)
 srand:
 	srandom(seed);
 	srand(seed);
+
+	random_bytes((char *)state, 32);
 }
 
 void
@@ -97,3 +101,56 @@ rand:
 	while (generated < size)
 		buf[generated++] = rand();
 }
+
+uint64_t
+real_random(void)
+{
+	uint64_t result;
+	random_bytes((char *)(&result), 8);
+	return result;
+}
+
+static inline
+uint64_t rotl(const uint64_t x, int k)
+{
+	return (x << k) | (x >> (64 - k));
+}
+
+uint64_t
+pseudo_random(void)
+{
+	const uint64_t result = rotl(state[0] + state[3], 23) + state[0];
+	const uint64_t t = state[1] << 17;
+	state[2] ^= state[0];
+	state[3] ^= state[1];
+	state[1] ^= state[2];
+	state[0] ^= state[3];
+	state[2] ^= t;
+	state[3] = rotl(state[3], 45);
+	return result;
+}
+
+static inline uint64_t
+random_in_range(uint64_t (*rnd)(void), uint64_t range)
+{
+	uint64_t mask = UINT64_MAX >> __builtin_clzll(range | 1);
+	uint64_t result;
+	do {
+		result = rnd() & mask;
+	} while (result > range);
+	return result;
+}
+
+int64_t
+real_random_in_range(int64_t min, int64_t max)
+{
+	assert(max >= min);
+	return min + random_in_range(real_random, (uint64_t) (max - min));
+}
+
+int64_t
+pseudo_random_in_range(int64_t min, int64_t max)
+{
+	assert(max >= min);
+	return min + random_in_range(pseudo_random, (uint64_t) (max - min));
+}
diff --git a/src/lib/core/random.h b/src/lib/core/random.h
index 26109b14b..9228df72e 100644
--- a/src/lib/core/random.h
+++ b/src/lib/core/random.h
@@ -32,6 +32,7 @@
  */
 
 #include <stddef.h>
+#include <stdint.h>
 
 #if defined(__cplusplus)
 extern "C" {
@@ -45,6 +46,43 @@ random_free(void);
 void
 random_bytes(char *buf, size_t size);
 
+/**
+ * Just 8 random_bytes().
+ */
+uint64_t
+real_random(void);
+
+/**
+ * xoshiro256++ pseudo random generator.
+ * http://prng.di.unimi.it/
+ * State is initialized with random_bytes().
+ */
+uint64_t
+pseudo_random(void);
+
+/**
+ * Return a random int64_t number within given boundaries.
+ *
+ * Instead of blindly calculating a modulo, this function uses
+ * unbiased bitmask with rejection method to provide number in
+ * given boundaries while preserving uniform distribution and
+ * avoiding overflow. It uses random bytes as a basis.
+ */
+int64_t
+real_random_in_range(int64_t min, int64_t max);
+
+/**
+ * Return a pseudo random int64_t number within given boundaries.
+ *
+ * Instead of blindly calculating a modulo, this function uses
+ * unbiased bitmask with rejection method to provide number in
+ * given boundaries while preserving uniform distribution and
+ * avoiding overflow. It uses xoshiro256++ as an uint64 pseudo
+ * random generator.
+ */
+int64_t
+pseudo_random_in_range(int64_t min, int64_t max);
+
 #if defined(__cplusplus)
 }
 #endif /* extern "C" */
diff --git a/src/lib/swim/swim.c b/src/lib/swim/swim.c
index 396bd7c45..9b5458635 100644
--- a/src/lib/swim/swim.c
+++ b/src/lib/swim/swim.c
@@ -34,6 +34,7 @@
 #include "swim_ev.h"
 #include "uri/uri.h"
 #include "fiber.h"
+#include "random.h"
 #include "msgpuck.h"
 #include "assoc.h"
 #include "sio.h"
@@ -188,25 +189,6 @@ enum {
 	INDIRECT_PING_COUNT = 2,
 };
 
-/**
- * Return a random number within given boundaries.
- *
- * Instead of blindly calculating a modulo, scale the random
- * number down the given boundaries to preserve the original
- * distribution. The result belongs to range [start, end].
- */
-static inline int
-swim_scaled_rand(int start, int end)
-{
-	assert(end >= start);
-	/*
-	 * RAND_MAX is likely to be INT_MAX - hardly SWIM will
-	 * ever be used in such a huge cluster.
-	 */
-	assert(end - start < RAND_MAX);
-	return rand() / (RAND_MAX / (end - start + 1) + 1);
-}
-
 /** Calculate UUID hash to use as a member table key. */
 static inline uint32_t
 swim_uuid_hash(const struct tt_uuid *uuid)
@@ -980,7 +962,7 @@ swim_shuffle_members(struct swim *swim)
 	for (mh_int_t node = mh_first(members), end = mh_end(members);
 	     node != end; node = mh_next(members, node), ++i) {
 		swim->shuffled[i] = *mh_swim_table_node(members, node);
-		int j = swim_scaled_rand(0, i);
+		int j = pseudo_random_in_range(0, i);
 		SWAP(swim->shuffled[i], swim->shuffled[j]);
 	}
 }
@@ -1069,7 +1051,7 @@ swim_encode_anti_entropy(struct swim *swim, struct swim_packet *packet)
 	swim_member_payload_bin_create(&payload_header);
 	struct mh_swim_table_t *t = swim->members;
 	int i = 0, member_count = mh_size(t);
-	int rnd = swim_scaled_rand(0, member_count - 1);
+	int rnd = pseudo_random_in_range(0, member_count - 1);
 	for (mh_int_t rc = mh_swim_table_random(t, rnd), end = mh_end(t);
 	     i < member_count; ++i) {
 		struct swim_member *m = *mh_swim_table_node(t, rc);
@@ -1380,7 +1362,7 @@ swim_send_indirect_pings(struct swim *swim, const struct swim_member *dst)
 {
 	struct mh_swim_table_t *t = swim->members;
 	int member_count = mh_size(t);
-	int rnd = swim_scaled_rand(0, member_count - 1);
+	int rnd = pseudo_random_in_range(0, member_count - 1);
 	mh_int_t rc = mh_swim_table_random(t, rnd), end = mh_end(t);
 	for (int member_i = 0, task_i = 0; member_i < member_count &&
 	     task_i < INDIRECT_PING_COUNT; ++member_i) {
diff --git a/test/unit/histogram.c b/test/unit/histogram.c
index 3056d20fe..ef5cd714f 100644
--- a/test/unit/histogram.c
+++ b/test/unit/histogram.c
@@ -5,6 +5,7 @@
 #include "memory.h"
 #include "unit.h"
 #include "trivia/util.h"
+#include "core/random.h"
 
 static int
 int64_cmp(const void *p1, const void *p2)
@@ -46,13 +47,6 @@ gen_rand_data(size_t *p_len)
 	return data;
 }
 
-static int64_t
-gen_rand_value(int64_t min, int64_t max)
-{
-	assert(max >= min);
-	return min + rand() % (max - min + 1);
-}
-
 static void
 test_counts(void)
 {
@@ -97,7 +91,7 @@ test_discard(void)
 
 	struct histogram *hist = histogram_new(buckets, n_buckets);
 
-	size_t bucket_sz = gen_rand_value(2, 10);
+	size_t bucket_sz = pseudo_random_in_range(2, 10);
 	size_t data_len = (n_buckets + 1) * bucket_sz;
 	int64_t *data = calloc(data_len, sizeof(*data));
 
@@ -105,7 +99,7 @@ test_discard(void)
 		int64_t min = (b == 0 ? INT64_MIN : buckets[b - 1] + 1);
 		int64_t max = (b == n_buckets ? INT64_MAX : buckets[b]);
 		for (size_t i = 0; i < bucket_sz; i++)
-			data[b * bucket_sz + i] = gen_rand_value(min, max);
+			data[b * bucket_sz + i] = pseudo_random_in_range(min, max);
 	}
 
 	for (size_t i = 0; i < data_len; i++)
-- 
2.17.1

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [Tarantool-patches] [PATCH v2] core: introduce evenly distributed int64 random in range
  2020-09-30  0:06 [Tarantool-patches] [PATCH v2] core: introduce evenly distributed int64 random in range Ilya Kosarev
@ 2020-10-02 22:43 ` Vladislav Shpilevoy
  2020-10-23 21:12   ` Ilya Kosarev
  0 siblings, 1 reply; 3+ messages in thread
From: Vladislav Shpilevoy @ 2020-10-02 22:43 UTC (permalink / raw)
  To: Ilya Kosarev, alyapunov; +Cc: tarantool-patches

Hi! Thanks for the patch and for the good research, seriously!

See 11 comments below.

On 30.09.2020 02:06, Ilya Kosarev wrote:
> Tarantool codebase had at least two functions to generate random
> integer in given range and both of them had problems at least with
> integer overflow. This patch brings nice function to generate random
> int64_t in given range without overflow while preserving uniform random
> generator distribution using unbiased bitmask with rejection method.
> It is now possible to use xoshiro256++ PRNG or random bytes as a basis.
> Most relevant replacements have been made.
> 
> Closes #5075
> ---
> Branch: https://github.com/tarantool/tarantool/tree/i.kosarev/gh-5075-fine-random-in-range
> Issue: https://github.com/tarantool/tarantool/issues/5075
> 
> @ChangeLog:
>  * Introduce overflow resistant & evenly distributed random in range (gh-5075).

1. I wouldn't put it into the changelog. It is not a visible bugfix,
nor a public feature. Rather some refactoring.

> Changes in v2:
>  - implemented fine 64-bit random
>  - implemented fine range bounding
> 
>  src/lib/core/random.c | 57 +++++++++++++++++++++++++++++++++++++++++++
>  src/lib/core/random.h | 38 +++++++++++++++++++++++++++++
>  src/lib/swim/swim.c   | 26 +++-----------------
>  test/unit/histogram.c | 12 +++------

2. Please, add some unit tests checking that, for example,
the randoms work fine even on the maximal range [INT64_MIN, INT64_MAX],
with some edge cases like [INT64_MIN, INT64_MIN], [INT64_MAX, INT64_MAX],
[1, 1], [0, 0], and some regular cases. Also the tests should check the
borders are respected, and ASAN passes (originally the issue was discovered
by ASAN which found signed integer overflows under ENABLE_UB_SANITIZER
option).

ASAN checks for signed int overflow UB are disabled for now because
of SQL producing this UB a lot. But you can enable it by changing
compiler.cmake, and running the unit tests. If you want to be sure
it won't fail, when the overflow check will be enabled after SQL is
fixed.

>  4 files changed, 102 insertions(+), 31 deletions(-)
> 
> diff --git a/src/lib/core/random.c b/src/lib/core/random.c
> index f4fa75b1c..4b52408ea 100644
> --- a/src/lib/core/random.c
> +++ b/src/lib/core/random.c> @@ -63,6 +65,8 @@ random_init(void)
>  srand:
>  	srandom(seed);
>  	srand(seed);
> +
> +	random_bytes((char *)state, 32);

3. Never hardcode an array size except maybe in its declaration.
sizeof(state) will return the same, but it is safe in case the
state size ever changes.

>  }
>  
>  void
> @@ -97,3 +101,56 @@ rand:
>  	while (generated < size)
>  		buf[generated++] = rand();
>  }
> +
> +uint64_t
> +real_random(void)
> +{
> +	uint64_t result;
> +	random_bytes((char *)(&result), 8);
> +	return result;
> +}
> +
> +static inline
> +uint64_t rotl(const uint64_t x, int k)

4. Sorry, we never put return type on the same line as function
name. Also why did you declare 'x' const? Why 'k' is not const
then? It seems you copy-pasted it from somewhere without
changing. Please, convert it to our code style.

5. What is 'rotl'?

> +{
> +	return (x << k) | (x >> (64 - k));
> +}
> +
> +uint64_t
> +pseudo_random(void)
> +{
> +	const uint64_t result = rotl(state[0] + state[3], 23) + state[0];
> +	const uint64_t t = state[1] << 17;
> +	state[2] ^= state[0];
> +	state[3] ^= state[1];
> +	state[1] ^= state[2];
> +	state[0] ^= state[3];
> +	state[2] ^= t;
> +	state[3] = rotl(state[3], 45);> +	return result;
> +}
> +
> +static inline uint64_t
> +random_in_range(uint64_t (*rnd)(void), uint64_t range)
> +{
> +	uint64_t mask = UINT64_MAX >> __builtin_clzll(range | 1);

6. Use bit_clz_u64(). You may not have __builtin_clzll.

> +	uint64_t result;
> +	do {
> +		result = rnd() & mask;

7. I guess we can hope the compiler will inline rnd() virtual
call to a non-virtual call below, but I can't be 100% sure, and
nobody can. Even if now you see it inlined in the assembly. Better
duplicate these few lines or use a macro, than relying on the
compiler. A single virtual call would ruin the perf of the
algorithm you chose over mersen twister.

> +	} while (result > range);
> +	return result;
> +}
> +
> +int64_t
> +real_random_in_range(int64_t min, int64_t max)
> +{
> +	assert(max >= min);
> +	return min + random_in_range(real_random, (uint64_t) (max - min));

8. I guess this will fail UB checks from ASAN, because max-min will
be firstly stored into int64_t and only then cast to uint64_t. The
first step can overflow int64_t. I remember there was a simple solution,
but can't remember what it was.

> +}
> +
> +int64_t
> +pseudo_random_in_range(int64_t min, int64_t max)
> +{
> +	assert(max >= min);
> +	return min + random_in_range(pseudo_random, (uint64_t) (max - min));
> +}
> diff --git a/src/lib/core/random.h b/src/lib/core/random.h
> index 26109b14b..9228df72e 100644
> --- a/src/lib/core/random.h
> +++ b/src/lib/core/random.h
> @@ -45,6 +46,43 @@ random_free(void);
>  void
>  random_bytes(char *buf, size_t size);
>  
> +/**
> + * Just 8 random_bytes().
> + */
> +uint64_t
> +real_random(void);
> +
> +/**
> + * xoshiro256++ pseudo random generator.
> + * http://prng.di.unimi.it/
> + * State is initialized with random_bytes().
> + */
> +uint64_t
> +pseudo_random(void);
> +
> +/**
> + * Return a random int64_t number within given boundaries.
> + *
> + * Instead of blindly calculating a modulo, this function uses
> + * unbiased bitmask with rejection method to provide number in
> + * given boundaries while preserving uniform distribution and
> + * avoiding overflow. It uses random bytes as a basis.

9. Are the boundaries included or not? Both included, both
excluded, first included and last excluded?

> + */
> +int64_t
> +real_random_in_range(int64_t min, int64_t max);
> +
> +/**
> + * Return a pseudo random int64_t number within given boundaries.
> + *
> + * Instead of blindly calculating a modulo, this function uses
> + * unbiased bitmask with rejection method to provide number in
> + * given boundaries while preserving uniform distribution and
> + * avoiding overflow. It uses xoshiro256++ as an uint64 pseudo
> + * random generator.
> + */
> +int64_t
> +pseudo_random_in_range(int64_t min, int64_t max);
> +
>  #if defined(__cplusplus)
>  }

10. When you changed SWIM to use the new generator, you made the
tests harder to reproduce in case they fail. SWIM tests print random
seed in the beginning, so as it could be used to repeat the test.
Now you need to print the state of your pseudo generator too, so as
it could be restored and reproduced. See swim_run_test().

11. Would be nice to see motivation why the chosen generator was
chosen in the comments in random.c.

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [Tarantool-patches] [PATCH v2] core: introduce evenly distributed int64 random in range
  2020-10-02 22:43 ` Vladislav Shpilevoy
@ 2020-10-23 21:12   ` Ilya Kosarev
  0 siblings, 0 replies; 3+ messages in thread
From: Ilya Kosarev @ 2020-10-23 21:12 UTC (permalink / raw)
  To: Vladislav Shpilevoy; +Cc: tarantool-patches

[-- Attachment #1: Type: text/plain, Size: 7633 bytes --]


Hi,
 
Thanks for your review!
 
Sent v3 of the patch considering your comments. 
>Суббота, 3 октября 2020, 1:43 +03:00 от Vladislav Shpilevoy <v.shpilevoy@tarantool.org>:
> 
>Hi! Thanks for the patch and for the good research, seriously!
>
>See 11 comments below.
>
>On 30.09.2020 02:06, Ilya Kosarev wrote:
>> Tarantool codebase had at least two functions to generate random
>> integer in given range and both of them had problems at least with
>> integer overflow. This patch brings nice function to generate random
>> int64_t in given range without overflow while preserving uniform random
>> generator distribution using unbiased bitmask with rejection method.
>> It is now possible to use xoshiro256++ PRNG or random bytes as a basis.
>> Most relevant replacements have been made.
>>
>> Closes #5075
>> ---
>> Branch:  https://github.com/tarantool/tarantool/tree/i.kosarev/gh-5075-fine-random-in-range
>> Issue:  https://github.com/tarantool/tarantool/issues/5075
>>
>> @ChangeLog:
>> * Introduce overflow resistant & evenly distributed random in range (gh-5075).
>
>1. I wouldn't put it into the changelog. It is not a visible bugfix,
>nor a public feature. Rather some refactoring.
Ok, i see, removed in v3 of the patch.
>
>> Changes in v2:
>> - implemented fine 64-bit random
>> - implemented fine range bounding
>>
>> src/lib/core/random.c | 57 +++++++++++++++++++++++++++++++++++++++++++
>> src/lib/core/random.h | 38 +++++++++++++++++++++++++++++
>> src/lib/swim/swim.c | 26 +++-----------------
>> test/unit/histogram.c | 12 +++------
>
>2. Please, add some unit tests checking that, for example,
>the randoms work fine even on the maximal range [INT64_MIN, INT64_MAX],
>with some edge cases like [INT64_MIN, INT64_MIN], [INT64_MAX, INT64_MAX],
>[1, 1], [0, 0], and some regular cases. Also the tests should check the
>borders are respected, and ASAN passes (originally the issue was discovered
>by ASAN which found signed integer overflows under ENABLE_UB_SANITIZER
>option).
All right, done in v3.
>
>ASAN checks for signed int overflow UB are disabled for now because
>of SQL producing this UB a lot. But you can enable it by changing
>compiler.cmake, and running the unit tests. If you want to be sure
>it won't fail, when the overflow check will be enabled after SQL is
>fixed.
Yes, thanks for the instruction, checked locally.
>
>> 4 files changed, 102 insertions(+), 31 deletions(-)
>>
>> diff --git a/src/lib/core/random.c b/src/lib/core/random.c
>> index f4fa75b1c..4b52408ea 100644
>> --- a/src/lib/core/random.c
>> +++ b/src/lib/core/random.c> @@ -63,6 +65,8 @@ random_init(void)
>> srand:
>> srandom(seed);
>> srand(seed);
>> +
>> + random_bytes((char *)state, 32);
>
>3. Never hardcode an array size except maybe in its declaration.
>sizeof(state) will return the same, but it is safe in case the
>state size ever changes.
Right, fixed in v3.
>
>> }
>>
>> void
>> @@ -97,3 +101,56 @@ rand:
>> while (generated < size)
>> buf[generated++] = rand();
>> }
>> +
>> +uint64_t
>> +real_random(void)
>> +{
>> + uint64_t result;
>> + random_bytes((char *)(&result), 8);
>> + return result;
>> +}
>> +
>> +static inline
>> +uint64_t rotl(const uint64_t x, int k)
>
>4. Sorry, we never put return type on the same line as function
>name. Also why did you declare 'x' const? Why 'k' is not const
>then? It seems you copy-pasted it from somewhere without
>changing. Please, convert it to our code style.
Yes, right, didn’t think about the difference. Fixed in v3.
Well, yes, it was borrowed from here originally, as mentioned:
http://prng.di.unimi.it/xoshiro256plusplus.c
>
>5. What is 'rotl'?
Comment is added in v3: «rotate left».
>
>> +{
>> + return (x << k) | (x >> (64 - k));
>> +}
>> +
>> +uint64_t
>> +pseudo_random(void)
>> +{
>> + const uint64_t result = rotl(state[0] + state[3], 23) + state[0];
>> + const uint64_t t = state[1] << 17;
>> + state[2] ^= state[0];
>> + state[3] ^= state[1];
>> + state[1] ^= state[2];
>> + state[0] ^= state[3];
>> + state[2] ^= t;
>> + state[3] = rotl(state[3], 45);> + return result;
>> +}
>> +
>> +static inline uint64_t
>> +random_in_range(uint64_t (*rnd)(void), uint64_t range)
>> +{
>> + uint64_t mask = UINT64_MAX >> __builtin_clzll(range | 1);
>
>6. Use bit_clz_u64(). You may not have __builtin_clzll.
All right, fixed in v3.
>
>> + uint64_t result;
>> + do {
>> + result = rnd() & mask;
>
>7. I guess we can hope the compiler will inline rnd() virtual
>call to a non-virtual call below, but I can't be 100% sure, and
>nobody can. Even if now you see it inlined in the assembly. Better
>duplicate these few lines or use a macro, than relying on the
>compiler. A single virtual call would ruin the perf of the
>algorithm you chose over mersen twister.
I was quite sure it is reliable, but couldn’t find the proof. Changed in v3.
>
>> + } while (result > range);
>> + return result;
>> +}
>> +
>> +int64_t
>> +real_random_in_range(int64_t min, int64_t max)
>> +{
>> + assert(max >= min);
>> + return min + random_in_range(real_random, (uint64_t) (max - min));
>
>8. I guess this will fail UB checks from ASAN, because max-min will
>be firstly stored into int64_t and only then cast to uint64_t. The
>first step can overflow int64_t. I remember there was a simple solution,
>but can't remember what it was.
Yes, ASAN is fine with  ( uint64_t )max - min.  Fixed in v3.
>
>> +}
>> +
>> +int64_t
>> +pseudo_random_in_range(int64_t min, int64_t max)
>> +{
>> + assert(max >= min);
>> + return min + random_in_range(pseudo_random, (uint64_t) (max - min));
>> +}
>> diff --git a/src/lib/core/random.h b/src/lib/core/random.h
>> index 26109b14b..9228df72e 100644
>> --- a/src/lib/core/random.h
>> +++ b/src/lib/core/random.h
>> @@ -45,6 +46,43 @@ random_free(void);
>> void
>> random_bytes(char *buf, size_t size);
>>
>> +/**
>> + * Just 8 random_bytes().
>> + */
>> +uint64_t
>> +real_random(void);
>> +
>> +/**
>> + * xoshiro256++ pseudo random generator.
>> + *  http://prng.di.unimi.it/
>> + * State is initialized with random_bytes().
>> + */
>> +uint64_t
>> +pseudo_random(void);
>> +
>> +/**
>> + * Return a random int64_t number within given boundaries.
>> + *
>> + * Instead of blindly calculating a modulo, this function uses
>> + * unbiased bitmask with rejection method to provide number in
>> + * given boundaries while preserving uniform distribution and
>> + * avoiding overflow. It uses random bytes as a basis.
>
>9. Are the boundaries included or not? Both included, both
>excluded, first included and last excluded?
Added to the comment in v3.
>
>> + */
>> +int64_t
>> +real_random_in_range(int64_t min, int64_t max);
>> +
>> +/**
>> + * Return a pseudo random int64_t number within given boundaries.
>> + *
>> + * Instead of blindly calculating a modulo, this function uses
>> + * unbiased bitmask with rejection method to provide number in
>> + * given boundaries while preserving uniform distribution and
>> + * avoiding overflow. It uses xoshiro256++ as an uint64 pseudo
>> + * random generator.
>> + */
>> +int64_t
>> +pseudo_random_in_range(int64_t min, int64_t max);
>> +
>> #if defined(__cplusplus)
>> }
>
>10. When you changed SWIM to use the new generator, you made the
>tests harder to reproduce in case they fail. SWIM tests print random
>seed in the beginning, so as it could be used to repeat the test.
>Now you need to print the state of your pseudo generator too, so as
>it could be restored and reproduced. See swim_run_test().
All right, done in v3.
>
>11. Would be nice to see motivation why the chosen generator was
>chosen in the comments in random.c.
Right, added some arguments in comment.
 
--
Ilya Kosarev

[-- Attachment #2: Type: text/html, Size: 11680 bytes --]

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2020-10-23 21:12 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-09-30  0:06 [Tarantool-patches] [PATCH v2] core: introduce evenly distributed int64 random in range Ilya Kosarev
2020-10-02 22:43 ` Vladislav Shpilevoy
2020-10-23 21:12   ` Ilya Kosarev

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox