[Tarantool-patches] [PATCH v3] core: introduce evenly distributed int64 random in range

Tue Nov 3 00:38:42 MSK 2020

Hi! Thanks for the patch!

Almost finished!

See 5 comments below.

> diff --git a/src/lib/core/random.c b/src/lib/core/random.c
> index f4fa75b1c..cfca2b7a9 100644
> --- a/src/lib/core/random.c
> +++ b/src/lib/core/random.c
> @@ -97,3 +102,85 @@ rand:
>  	while (generated < size)
>  		buf[generated++] = rand();
>  }
> +
> +uint64_t
> +real_random(void)
> +{
> +	uint64_t result;
> +	random_bytes((char *)(&result), sizeof(result));

1. Why do you need () around &result?

> +	return result;
> +}
> +
> +uint64_t *
> +xoshiro_state(void)
> +{
> +	return state;
> +}
> +
> +size_t
> +xoshiro_state_size(void)
> +{
> +	return sizeof(state) / sizeof(state[0]);
> +}

2. Or instead of these two functions it could be simpler
to implement xoshiro_state_str(), which would return
its 4 uint64 numbers as tt_sprintf(). Anyway we need to
print the state in a human readable format.

> +
> +int64_t
> +real_random_in_range(int64_t min, int64_t max)
> +{
> +	assert(max >= min);
> +	uint64_t range = (uint64_t)max - min;

3. This is UB, if max is negative.

> +	uint64_t mask = UINT64_MAX >> bit_clz_u64(range | 1);
> +	uint64_t result;
> +	do {
> +		result = real_random() & mask;
> +	} while (result > range);
> +	return min + result;
> +}
> +
> +int64_t
> +pseudo_random_in_range(int64_t min, int64_t max)
> +{
> +	assert(max >= min);
> +	uint64_t range = (uint64_t)max - min;
> +	uint64_t mask = UINT64_MAX >> bit_clz_u64(range | 1);
> +	uint64_t result;
> +	do {
> +		result = xoshiro_random() & mask;
> +	} while (result > range);
> +	return min + result;
> +}
> diff --git a/test/unit/random.c b/test/unit/random.c
> new file mode 100644
> index 000000000..3d7416988
> --- /dev/null
> +++ b/test/unit/random.c
> @@ -0,0 +1,48 @@
> +#include <core/random.h>
> +
> +#include <stdio.h>
> +#include <assert.h>
> +
> +#include "unit.h"
> +
> +static void
> +test_xoshiro_random_in_range_one(int64_t min, int64_t max)
> +{
> +	int64_t result = pseudo_random_in_range(min, max);
> +	assert(min <= result && result <= max);
> +	if (min == max)
> +		printf("pseudo_random_in_range(%lld, %lld) = %lld\n",
> +		       (long long)min, (long long)max, (long long)result);
> +}

4. Why no tests for real random?

> diff --git a/test/unit/swim_test_utils.c b/test/unit/swim_test_utils.c
> index 9dbd28a9f..590ce97a1 100644
> --- a/test/unit/swim_test_utils.c
> +++ b/test/unit/swim_test_utils.c
> @@ -875,7 +875,15 @@ swim_run_test(const char *log_file, fiber_func test)
>  	 * Print the seed to be able to reproduce a bug with the
>  	 * same seed.
>  	 */
> -	say_info("Random seed = %llu", (unsigned long long) seed);
> +	say_info("Random seed = %llu", (unsigned long long)seed);
> +	uint64_t *state = xoshiro_state();
> +	char state_buf[100] = "";
> +	int filled = 0;
> +	for (size_t i = 0; i < xoshiro_state_size(); i++)
> +		filled += snprintf(state_buf + filled,
> +				   sizeof(state_buf) - filled,
> +				   " %llu", (unsigned long long)state[i]);
> +	say_info("xoshiro random state =%s", state_buf);

5. snprintf() returns number of bytes that would have been
printed if the buffer would be long enough. It means, that
"sizeof(state_buf) - filled" may become negative with such
accumulation. If state size will even become bigger,
for example. And since snprintf()'s second argument is
size_t, it will overflow to a huge value, leading to the
stack corruption.

Better return a string on tt_sprintf() right from random.c,
so as not to care about snprintf specifics in the tests.

>  
>  	struct fiber *main_fiber = fiber_new("main", test);
>  	fiber_set_joinable(main_fiber, true);
>