[tarantool-patches] Re: [PATCH v2 1/1] box: create bigrefs for tuples

Vladislav Shpilevoy v.shpilevoy at tarantool.org
Thu May 31 15:51:01 MSK 2018


Hello. Thanks for the fixes! See 15 comments below.

On 30/05/2018 18:26, imeevma at tarantool.org wrote:
> Due to limitation of number of bigrefs being only 65535 it
> was possible to reach this limitation. This patch increases
> capacity of reference counter to 4 billions.
> 
> Closes #3224
> ---
> Branch: https://github.com/tarantool/tarantool/tree/gh-3224-tuple-bigrefs
> Issue: https://github.com/tarantool/tarantool/issues/3224

1. As I said in the previous review: use your gh login as a prefix for a
branch name.

> 
>   src/box/errcode.h        |   2 +-
>   src/box/tuple.c          | 168 +++++++++++++++++++++++++++++++++++++++++++++++
>   src/box/tuple.h          |  36 ++++++----
>   test/box/misc.result     |  39 ++++++-----
>   test/box/select.result   |  32 ++++++++-
>   test/box/select.test.lua |  15 ++++-
>   6 files changed, 256 insertions(+), 36 deletions(-)
> 
> diff --git a/src/box/errcode.h b/src/box/errcode.h
> index a0759f8..e009524 100644
> --- a/src/box/errcode.h
> +++ b/src/box/errcode.h
> @@ -138,7 +138,7 @@ struct errcode_record {
>   	/* 83 */_(ER_ROLE_EXISTS,		"Role '%s' already exists") \
>   	/* 84 */_(ER_CREATE_ROLE,		"Failed to create role '%s': %s") \
>   	/* 85 */_(ER_INDEX_EXISTS,		"Index '%s' already exists") \
> -	/* 86 */_(ER_TUPLE_REF_OVERFLOW,	"Tuple reference counter overflow") \
> +	/* 86 */_(ER_UNUSED6,			"") \
>   	/* 87 */_(ER_ROLE_LOOP,			"Granting role '%s' to role '%s' would create a loop") \
>   	/* 88 */_(ER_GRANT,			"Incorrect grant arguments: %s") \
>   	/* 89 */_(ER_PRIV_GRANTED,		"User '%s' already has %s access on %s '%s'") \
> diff --git a/src/box/tuple.c b/src/box/tuple.c
> index 014f374..60280e4 100644
> --- a/src/box/tuple.c
> +++ b/src/box/tuple.c
> @@ -48,6 +48,16 @@ enum {
>   	OBJSIZE_MIN = 16,
>   };
>   
> +/**
> +* @brief Container for bigrefs.

2. Please, describe in details what bigrefs is, how works and why is needed.

> +*/
> +struct tuple_bigrefs
> +{
> +	uint32_t *refs;
> +	uint16_t size;
> +	uint16_t capacity;
> +};
> +
>   static const double ALLOC_FACTOR = 1.05;
>   
>   /**
> @@ -244,6 +264,152 @@ tuple_arena_create(struct slab_arena *arena, struct quota *quota,
>   	}
>   }
>   
> +enum {
> +	TUPLE_BIGREF_FACTOR = 2,
> +	TUPLE_BIGREF_MAX = UINT32_MAX,
> +	TUPLE_BIGREF_ERROR = UINT16_MAX,

3. As we discussed with Kostja, now tuple_ref must either be finished
ok, or finish the process with panic(). You should not have
TUPLE_BIGREF_ERROR.

> +	TUPLE_BIGREF_FLAG = UINT16_MAX ^ (UINT16_MAX >> 1),> +	TUPLE_BIGREF_MIN_CAPACITY = 16,
> +	TUPLE_BIGREF_MAX_CAPACITY = UINT16_MAX >> 1
> +};
> +
> +/**
> + * Free memory that was allocated for big references.
> + */
> +static inline void
> +tuple_bigrefs_free(void)

4. Please, obey the convention:
create/destroy functions for an object with no freeing/allocating itself,
and new/delete functions for an object that must be allocated/freed.

Here you do destroy.

> +{
> +	free(bigrefs.refs);
> +	bigrefs.size = 0;
> +	bigrefs.refs = NULL;
> +	bigrefs.capacity = 0;
> +}
> +
> +/**
> + * Return index for new big reference counter and allocate memory if needed.
> + * @retval index for new big reference counter.
> + */
> +static inline uint16_t
> +tuple_bigref_retrieve(void)
> +{
> +	if (bigrefs.size < bigrefs.capacity) {
> +		for (uint16_t i = 0; i < bigrefs.capacity; ++i) {
> +			if (bigrefs.refs[i] == 0) {
> +				++bigrefs.size;
> +				return i;
> +			}
> +		}
> +		unreachable();
> +	}
> +	/* Extend the array ... */

5. I used this comment in review as a placeholder for the code. You should
not include it here.

> +	uint16_t capacity = bigrefs.capacity;
> +	if(bigrefs.refs == NULL) {
> +		assert(bigrefs.size == 0 && bigrefs.capacity == 0);
> +		capacity = TUPLE_BIGREF_MIN_CAPACITY;
> +		bigrefs.refs = (uint32_t *) malloc(capacity *
> +						   sizeof(*bigrefs.refs));

6. Please, reuse realloc. Realloc works ok with NULL.

if capacity == 0 then
	new_capacity = TUPLE_BIGREF_MIN_CAPACITY
else
	new_capacity = capacity * 2
refs = realloc(refs, new_capacity)
...

> +		if(bigrefs.refs == NULL) {
> +			diag_set(OutOfMemory, capacity * sizeof(*bigrefs.refs),
> +				 "malloc", "bigrefs.refs");
> +			return TUPLE_BIGREF_ERROR;
> +		}
> +		bigrefs.capacity = capacity;
> +		return bigrefs.size++;
> +	}
> +	if(capacity >= TUPLE_BIGREF_MAX_CAPACITY) {
> +		tuple_bigrefs_free();
> +		return TUPLE_BIGREF_ERROR;

7. Panic.

> +	}
> +	if(capacity > TUPLE_BIGREF_MAX_CAPACITY / TUPLE_BIGREF_FACTOR)
> +		capacity = TUPLE_BIGREF_MAX_CAPACITY;
> +	else
> +		capacity *= TUPLE_BIGREF_FACTOR;
> +	uint32_t *refs = (uint32_t *) realloc(bigrefs.refs, capacity *
> +					      sizeof(*bigrefs.refs));
> +	if (refs == NULL) {
> +		tuple_bigrefs_free();

8. On free you would destroy all the valid bigrefs too. Anyway just put
panic() here.

> +		diag_set(OutOfMemory, capacity * sizeof(*bigrefs.refs),
> +			 "realloc", "bigrefs.refs");
> +		return TUPLE_BIGREF_ERROR;
> +	}
> +	bigrefs.refs = refs;
> +	bigrefs.capacity = capacity;
> +	return bigrefs.size++;
> +}
> +
> +int
> +tuple_ref_slow(struct tuple *tuple)
> +{
> +	if(tuple->refs == TUPLE_REF_MAX) {

9. It must be an assertion. You should not call tuple_ref_slow when
a tuple still can use its local refs.

> +		uint16_t index = tuple_bigref_retrieve();
> +		if(index > TUPLE_BIGREF_MAX_CAPACITY) {
> +			panic("Tuple reference counter overflow");
> +			return -1;
> +		}
> +		tuple->refs = TUPLE_BIGREF_FLAG | index;
> +		bigrefs.refs[index] = TUPLE_REF_MAX;
> +	}
> +	uint16_t index = tuple->refs ^ TUPLE_BIGREF_FLAG;
> +	if (bigrefs.refs[index] > TUPLE_BIGREF_MAX - 1) {
> +		panic("Tuple reference counter overflow");
> +		return -1;
> +	}
> +	bigrefs.refs[index]++;
> +	return 0;
> +}
> +
> +/**
> + * Try to decrease allocated memory if it is possible. Free memory when
> + * size == 0.
> + */
> +static inline void
> +tuple_bigrefs_release(void)
> +{
> +	if(bigrefs.size == 0) {
> +		tuple_bigrefs_free();
> +		return;
> +	}
> +	if(bigrefs.capacity == TUPLE_BIGREF_MIN_CAPACITY)
> +		return;
> +	uint16_t reserved_bigrefs = 0;
> +	uint16_t capacity = bigrefs.capacity;
> +	for(uint16_t i = 0; i < capacity; ++i) {
> +		if(bigrefs.refs[i] != 0)
> +			reserved_bigrefs = i + 1;

10. Why do you need this? You already know the count. It is bigrefs.size.

> +	}
> +	if(reserved_bigrefs >= capacity / TUPLE_BIGREF_FACTOR) {
> +		return;

11. You already have this check in tuple_unref_slow. Lets rename this
function to tuple_bigrefs_compact, call it always after unref_slow, and
remove this check from unref_slow.

> +	} else if(reserved_bigrefs < TUPLE_BIGREF_MIN_CAPACITY) {
> +		capacity = TUPLE_BIGREF_MIN_CAPACITY;
> +	} else {
> +		while(reserved_bigrefs < capacity / TUPLE_BIGREF_FACTOR)
> +			capacity /= TUPLE_BIGREF_FACTOR;
> +	}
> +	uint32_t *refs = (uint32_t *) realloc(bigrefs.refs, capacity *
> +					      sizeof(*bigrefs.refs));
> +	if(refs == NULL) {
> +		tuple_bigrefs_free();
> +		diag_set(OutOfMemory, capacity, "realloc", "bigrefs.refs");

12. Panic.

> +		return;
> +	}
> +	bigrefs.refs = refs;
> +	bigrefs.capacity = capacity;
> +}
> +
> +void
> +tuple_unref_slow(struct tuple *tuple)
> +{
> +	uint16_t index = tuple->refs ^ TUPLE_BIGREF_FLAG;

13. Please, add an assertion that tuple->refs FLAG is set.

> +	bigrefs.refs[index]--;
> +	if(bigrefs.refs[index] <= TUPLE_REF_MAX) {
> +		tuple->refs = bigrefs.refs[index];
> +		bigrefs.refs[index] = 0;
> +		bigrefs.size--;
> +		if(bigrefs.size < bigrefs.capacity / TUPLE_BIGREF_FACTOR)
> +			tuple_bigrefs_release();
> +	}
> +}
> +
>   void
>   tuple_arena_destroy(struct slab_arena *arena)
>   {
> @@ -265,6 +431,8 @@ tuple_free(void)
>   	tuple_format_free();
>   
>   	coll_id_cache_destroy();
> +
> +	tuple_bigrefs_free();
>   }
>   
>   box_tuple_format_t *
> diff --git a/src/box/tuple.h b/src/box/tuple.h
> index e2384dd..dd5e11e 100644
> --- a/src/box/tuple.h
> +++ b/src/box/tuple.h
> @@ -774,26 +774,40 @@ tuple_field_uuid(const struct tuple *tuple, int fieldno,
>   	return 0;
>   }
>   
> -enum { TUPLE_REF_MAX = UINT16_MAX };
> +enum { TUPLE_REF_MAX = UINT16_MAX >> 1 };
> +
> +/**
> + * Increase tuple big reference counter.
> + * @param tuple Tuple to reference.
> + * @retval  0 Success.
> + * @retval -1 Error.

14. Now tuple_ref and tuple_ref_slow can not fail. Please,
change their retval to void. Anyway you already ignore
tuple_unref_slow() result in tuple_unref().

> + */
> +int
> +tuple_ref_slow(struct tuple *tuple);
>  
> diff --git a/test/box/select.result b/test/box/select.result
> index 4aed706..3d3a0d3 100644
> --- a/test/box/select.result
> +++ b/test/box/select.result
> @@ -619,6 +619,7 @@ collectgarbage('collect')
>   ---
>   - 0
>   ...
> +-- gh-3224 resurrect tuple bigrefs
>   s = box.schema.space.create('select', { temporary = true })
>   ---
>   ...
> @@ -634,13 +635,19 @@ lots_of_links = {}
>   ref_count = 0
>   ---
>   ...
> -while (true) do table.insert(lots_of_links, s:get{0}) ref_count = ref_count + 1 end
> +while (ref_count < 100000) do table.insert(lots_of_links, s:get{0}) ref_count = ref_count + 1 end
>   ---
> -- error: Tuple reference counter overflow
>   ...
>   ref_count
>   ---
> -- 65531
> +- 100000
> +...
> +for k, v in pairs(lots_of_links) do ref_count = ref_count - 1 end
> +---
> +...
> +ref_count
> +---
> +- 0
>   ...
>   lots_of_links = {}
>   ---
> @@ -648,3 +655,22 @@ lots_of_links = {}
>   s:drop()
>   ---
>   ...
> +-- Check that tuple deleted when ref counter = 0
> +weak_tuple = setmetatable({}, {__mode = 'v'})
> +---
> +...
> +table.insert(weak_tuple, box.tuple.new(1))

15. This test checks nothing. Once allocated tuple stored in a table
has 1 ref. Here you did not check, that it is deleted after reaching
bigref and called gc.

I remember about the problem that GC works too early and deletes this
references before you reach the bigref. But collectgarbage() has many
options other than 'collect'. Please, investigate how to stop GC until
you reached bigref.

> +---
> +...
> +weak_tuple
> +---
> +- - [1]
> +...
> +collectgarbage('collect')
> +---
> +- 0
> +...
> +weak_tuple
> +---
> +- []
> +...
> diff --git a/test/box/select.test.lua b/test/box/select.test.lua
> index 54c2ecc..910d4f7 100644
> --- a/test/box/select.test.lua
> +++ b/test/box/select.test.lua
> @@ -124,12 +124,25 @@ test.random(s.index[0], 48)
>   s:drop()
>   
>   collectgarbage('collect')
> +
> +-- gh-3224 resurrect tuple bigrefs
> +
>   s = box.schema.space.create('select', { temporary = true })
>   index = s:create_index('primary', { type = 'tree' })
>   a = s:insert{0}
>   lots_of_links = {}
>   ref_count = 0
> -while (true) do table.insert(lots_of_links, s:get{0}) ref_count = ref_count + 1 end
> +while (ref_count < 100000) do table.insert(lots_of_links, s:get{0}) ref_count = ref_count + 1 end
> +ref_count
> +for k, v in pairs(lots_of_links) do ref_count = ref_count - 1 end
>   ref_count
>   lots_of_links = {}
>   s:drop()
> +
> +-- Check that tuple deleted when ref counter = 0
> +
> +weak_tuple = setmetatable({}, {__mode = 'v'})
> +table.insert(weak_tuple, box.tuple.new(1))
> +weak_tuple
> +collectgarbage('collect')
> +weak_tuple
> 




More information about the Tarantool-patches mailing list