Tarantool development patches archive
 help / color / mirror / Atom feed
* [tarantool-patches] [PATCH v2 1/1] box: create bigrefs for tuples
@ 2018-05-30 15:26 imeevma
  2018-05-31 12:51 ` [tarantool-patches] " Vladislav Shpilevoy
  2018-05-31 15:29 ` Vladislav Shpilevoy
  0 siblings, 2 replies; 11+ messages in thread
From: imeevma @ 2018-05-30 15:26 UTC (permalink / raw)
  To: tarantool-patches

Due to limitation of number of bigrefs being only 65535 it
was possible to reach this limitation. This patch increases
capacity of reference counter to 4 billions.

Closes #3224
---
Branch: https://github.com/tarantool/tarantool/tree/gh-3224-tuple-bigrefs
Issue: https://github.com/tarantool/tarantool/issues/3224

 src/box/errcode.h        |   2 +-
 src/box/tuple.c          | 168 +++++++++++++++++++++++++++++++++++++++++++++++
 src/box/tuple.h          |  36 ++++++----
 test/box/misc.result     |  39 ++++++-----
 test/box/select.result   |  32 ++++++++-
 test/box/select.test.lua |  15 ++++-
 6 files changed, 256 insertions(+), 36 deletions(-)

diff --git a/src/box/errcode.h b/src/box/errcode.h
index a0759f8..e009524 100644
--- a/src/box/errcode.h
+++ b/src/box/errcode.h
@@ -138,7 +138,7 @@ struct errcode_record {
 	/* 83 */_(ER_ROLE_EXISTS,		"Role '%s' already exists") \
 	/* 84 */_(ER_CREATE_ROLE,		"Failed to create role '%s': %s") \
 	/* 85 */_(ER_INDEX_EXISTS,		"Index '%s' already exists") \
-	/* 86 */_(ER_TUPLE_REF_OVERFLOW,	"Tuple reference counter overflow") \
+	/* 86 */_(ER_UNUSED6,			"") \
 	/* 87 */_(ER_ROLE_LOOP,			"Granting role '%s' to role '%s' would create a loop") \
 	/* 88 */_(ER_GRANT,			"Incorrect grant arguments: %s") \
 	/* 89 */_(ER_PRIV_GRANTED,		"User '%s' already has %s access on %s '%s'") \
diff --git a/src/box/tuple.c b/src/box/tuple.c
index 014f374..60280e4 100644
--- a/src/box/tuple.c
+++ b/src/box/tuple.c
@@ -48,6 +48,16 @@ enum {
 	OBJSIZE_MIN = 16,
 };
 
+/**
+* @brief Container for bigrefs.
+*/
+struct tuple_bigrefs
+{
+	uint32_t *refs;
+	uint16_t size;
+	uint16_t capacity;
+};
+
 static const double ALLOC_FACTOR = 1.05;
 
 /**
@@ -58,6 +68,12 @@ struct tuple *box_tuple_last;
 
 struct tuple_format *tuple_format_runtime;
 
+/**
+* All bigrefs of tuples.
+* \sa tuple_ref_slow()
+*/
+static struct tuple_bigrefs bigrefs;
+
 static void
 runtime_tuple_delete(struct tuple_format *format, struct tuple *tuple);
 
@@ -211,6 +227,10 @@ tuple_init(field_name_hash_f hash)
 
 	box_tuple_last = NULL;
 
+	bigrefs.size = 0;
+	bigrefs.refs = NULL;
+	bigrefs.capacity = 0;
+
 	if (coll_id_cache_init() != 0)
 		return -1;
 
@@ -244,6 +264,152 @@ tuple_arena_create(struct slab_arena *arena, struct quota *quota,
 	}
 }
 
+enum {
+	TUPLE_BIGREF_FACTOR = 2,
+	TUPLE_BIGREF_MAX = UINT32_MAX,
+	TUPLE_BIGREF_ERROR = UINT16_MAX,
+	TUPLE_BIGREF_FLAG = UINT16_MAX ^ (UINT16_MAX >> 1),
+	TUPLE_BIGREF_MIN_CAPACITY = 16,
+	TUPLE_BIGREF_MAX_CAPACITY = UINT16_MAX >> 1
+};
+
+/**
+ * Free memory that was allocated for big references.
+ */
+static inline void
+tuple_bigrefs_free(void)
+{
+	free(bigrefs.refs);
+	bigrefs.size = 0;
+	bigrefs.refs = NULL;
+	bigrefs.capacity = 0;
+}
+
+/**
+ * Return index for new big reference counter and allocate memory if needed.
+ * @retval index for new big reference counter.
+ */
+static inline uint16_t
+tuple_bigref_retrieve(void)
+{
+	if (bigrefs.size < bigrefs.capacity) {
+		for (uint16_t i = 0; i < bigrefs.capacity; ++i) {
+			if (bigrefs.refs[i] == 0) {
+				++bigrefs.size;
+				return i;
+			}
+		}
+		unreachable();
+	}
+	/* Extend the array ... */
+	uint16_t capacity = bigrefs.capacity;
+	if(bigrefs.refs == NULL) {
+		assert(bigrefs.size == 0 && bigrefs.capacity == 0);
+		capacity = TUPLE_BIGREF_MIN_CAPACITY;
+		bigrefs.refs = (uint32_t *) malloc(capacity *
+						   sizeof(*bigrefs.refs));
+		if(bigrefs.refs == NULL) {
+			diag_set(OutOfMemory, capacity * sizeof(*bigrefs.refs),
+				 "malloc", "bigrefs.refs");
+			return TUPLE_BIGREF_ERROR;
+		}
+		bigrefs.capacity = capacity;
+		return bigrefs.size++;
+	}
+	if(capacity >= TUPLE_BIGREF_MAX_CAPACITY) {
+		tuple_bigrefs_free();
+		return TUPLE_BIGREF_ERROR;
+	}
+	if(capacity > TUPLE_BIGREF_MAX_CAPACITY / TUPLE_BIGREF_FACTOR)
+		capacity = TUPLE_BIGREF_MAX_CAPACITY;
+	else
+		capacity *= TUPLE_BIGREF_FACTOR;
+	uint32_t *refs = (uint32_t *) realloc(bigrefs.refs, capacity *
+					      sizeof(*bigrefs.refs));
+	if (refs == NULL) {
+		tuple_bigrefs_free();
+		diag_set(OutOfMemory, capacity * sizeof(*bigrefs.refs),
+			 "realloc", "bigrefs.refs");
+		return TUPLE_BIGREF_ERROR;
+	}
+	bigrefs.refs = refs;
+	bigrefs.capacity = capacity;
+	return bigrefs.size++;
+}
+
+int
+tuple_ref_slow(struct tuple *tuple)
+{
+	if(tuple->refs == TUPLE_REF_MAX) {
+		uint16_t index = tuple_bigref_retrieve();
+		if(index > TUPLE_BIGREF_MAX_CAPACITY) {
+			panic("Tuple reference counter overflow");
+			return -1;
+		}
+		tuple->refs = TUPLE_BIGREF_FLAG | index;
+		bigrefs.refs[index] = TUPLE_REF_MAX;
+	}
+	uint16_t index = tuple->refs ^ TUPLE_BIGREF_FLAG;
+	if (bigrefs.refs[index] > TUPLE_BIGREF_MAX - 1) {
+		panic("Tuple reference counter overflow");
+		return -1;
+	}
+	bigrefs.refs[index]++;
+	return 0;
+}
+
+/**
+ * Try to decrease allocated memory if it is possible. Free memory when
+ * size == 0.
+ */
+static inline void
+tuple_bigrefs_release(void)
+{
+	if(bigrefs.size == 0) {
+		tuple_bigrefs_free();
+		return;
+	}
+	if(bigrefs.capacity == TUPLE_BIGREF_MIN_CAPACITY)
+		return;
+	uint16_t reserved_bigrefs = 0;
+	uint16_t capacity = bigrefs.capacity;
+	for(uint16_t i = 0; i < capacity; ++i) {
+		if(bigrefs.refs[i] != 0)
+			reserved_bigrefs = i + 1;
+	}
+	if(reserved_bigrefs >= capacity / TUPLE_BIGREF_FACTOR) {
+		return;
+	} else if(reserved_bigrefs < TUPLE_BIGREF_MIN_CAPACITY) {
+		capacity = TUPLE_BIGREF_MIN_CAPACITY;
+	} else {
+		while(reserved_bigrefs < capacity / TUPLE_BIGREF_FACTOR)
+			capacity /= TUPLE_BIGREF_FACTOR;
+	}
+	uint32_t *refs = (uint32_t *) realloc(bigrefs.refs, capacity *
+					      sizeof(*bigrefs.refs));
+	if(refs == NULL) {
+		tuple_bigrefs_free();
+		diag_set(OutOfMemory, capacity, "realloc", "bigrefs.refs");
+		return;
+	}
+	bigrefs.refs = refs;
+	bigrefs.capacity = capacity;
+}
+
+void
+tuple_unref_slow(struct tuple *tuple)
+{
+	uint16_t index = tuple->refs ^ TUPLE_BIGREF_FLAG;
+	bigrefs.refs[index]--;
+	if(bigrefs.refs[index] <= TUPLE_REF_MAX) {
+		tuple->refs = bigrefs.refs[index];
+		bigrefs.refs[index] = 0;
+		bigrefs.size--;
+		if(bigrefs.size < bigrefs.capacity / TUPLE_BIGREF_FACTOR)
+			tuple_bigrefs_release();
+	}
+}
+
 void
 tuple_arena_destroy(struct slab_arena *arena)
 {
@@ -265,6 +431,8 @@ tuple_free(void)
 	tuple_format_free();
 
 	coll_id_cache_destroy();
+
+	tuple_bigrefs_free();
 }
 
 box_tuple_format_t *
diff --git a/src/box/tuple.h b/src/box/tuple.h
index e2384dd..dd5e11e 100644
--- a/src/box/tuple.h
+++ b/src/box/tuple.h
@@ -774,26 +774,40 @@ tuple_field_uuid(const struct tuple *tuple, int fieldno,
 	return 0;
 }
 
-enum { TUPLE_REF_MAX = UINT16_MAX };
+enum { TUPLE_REF_MAX = UINT16_MAX >> 1 };
+
+/**
+ * Increase tuple big reference counter.
+ * @param tuple Tuple to reference.
+ * @retval  0 Success.
+ * @retval -1 Error.
+ */
+int
+tuple_ref_slow(struct tuple *tuple);
 
 /**
  * Increment tuple reference counter.
  * @param tuple Tuple to reference.
  * @retval  0 Success.
- * @retval -1 Too many refs error.
+ * @retval -1 Error.
  */
 static inline int
 tuple_ref(struct tuple *tuple)
 {
-	if (tuple->refs + 1 > TUPLE_REF_MAX) {
-		diag_set(ClientError, ER_TUPLE_REF_OVERFLOW);
-		return -1;
-	}
+	if (tuple->refs >= TUPLE_REF_MAX)
+		return tuple_ref_slow(tuple);
 	tuple->refs++;
 	return 0;
 }
 
 /**
+ * Decrease tuple big reference counter.
+ * @param tuple Tuple to reference.
+ */
+void
+tuple_unref_slow(struct tuple *tuple);
+
+/**
  * Decrement tuple reference counter. If it has reached zero, free the tuple.
  *
  * @pre tuple->refs + count >= 0
@@ -802,6 +816,10 @@ static inline void
 tuple_unref(struct tuple *tuple)
 {
 	assert(tuple->refs - 1 >= 0);
+	if (tuple->refs > TUPLE_REF_MAX) {
+		tuple_unref_slow(tuple);
+		return;
+	}
 
 	tuple->refs--;
 
@@ -823,12 +841,8 @@ static inline box_tuple_t *
 tuple_bless(struct tuple *tuple)
 {
 	assert(tuple != NULL);
-	/* Ensure tuple can be referenced at least once after return */
-	if (tuple->refs + 2 > TUPLE_REF_MAX) {
-		diag_set(ClientError, ER_TUPLE_REF_OVERFLOW);
+	if(tuple_ref(tuple) < 0)
 		return NULL;
-	}
-	tuple->refs++;
 	/* Remove previous tuple */
 	if (likely(box_tuple_last != NULL))
 		tuple_unref(box_tuple_last); /* do not throw */
diff --git a/test/box/misc.result b/test/box/misc.result
index 8f94f55..f7703ba 100644
--- a/test/box/misc.result
+++ b/test/box/misc.result
@@ -332,7 +332,6 @@ t;
   - 'box.error.UNKNOWN_UPDATE_OP : 28'
   - 'box.error.WRONG_COLLATION_OPTIONS : 151'
   - 'box.error.CURSOR_NO_TRANSACTION : 80'
-  - 'box.error.TUPLE_REF_OVERFLOW : 86'
   - 'box.error.ALTER_SEQUENCE : 143'
   - 'box.error.INVALID_XLOG_NAME : 75'
   - 'box.error.SAVEPOINT_EMPTY_TX : 60'
@@ -360,7 +359,7 @@ t;
   - 'box.error.VINYL_MAX_TUPLE_SIZE : 139'
   - 'box.error.LOAD_FUNCTION : 99'
   - 'box.error.INVALID_XLOG : 74'
-  - 'box.error.PRIV_NOT_GRANTED : 91'
+  - 'box.error.READ_VIEW_ABORTED : 130'
   - 'box.error.TRANSACTION_CONFLICT : 97'
   - 'box.error.GUEST_USER_PASSWORD : 96'
   - 'box.error.PROC_C : 102'
@@ -405,7 +404,7 @@ t;
   - 'box.error.injection : table: <address>
   - 'box.error.NULLABLE_MISMATCH : 153'
   - 'box.error.LAST_DROP : 15'
-  - 'box.error.NO_SUCH_ROLE : 82'
+  - 'box.error.TUPLE_FORMAT_LIMIT : 16'
   - 'box.error.DECOMPRESSION : 124'
   - 'box.error.CREATE_SEQUENCE : 142'
   - 'box.error.CREATE_USER : 43'
@@ -414,66 +413,66 @@ t;
   - 'box.error.SEQUENCE_OVERFLOW : 147'
   - 'box.error.SYSTEM : 115'
   - 'box.error.KEY_PART_IS_TOO_LONG : 118'
-  - 'box.error.TUPLE_FORMAT_LIMIT : 16'
-  - 'box.error.BEFORE_REPLACE_RET : 53'
+  - 'box.error.INJECTION : 8'
+  - 'box.error.INVALID_MSGPACK : 20'
   - 'box.error.NO_SUCH_SAVEPOINT : 61'
   - 'box.error.TRUNCATE_SYSTEM_SPACE : 137'
   - 'box.error.VY_QUOTA_TIMEOUT : 135'
   - 'box.error.WRONG_INDEX_OPTIONS : 108'
   - 'box.error.INVALID_VYLOG_FILE : 133'
   - 'box.error.INDEX_FIELD_COUNT_LIMIT : 127'
-  - 'box.error.READ_VIEW_ABORTED : 130'
+  - 'box.error.PRIV_NOT_GRANTED : 91'
   - 'box.error.USER_MAX : 56'
-  - 'box.error.PROTOCOL : 104'
+  - 'box.error.BEFORE_REPLACE_RET : 53'
   - 'box.error.TUPLE_NOT_ARRAY : 22'
   - 'box.error.KEY_PART_COUNT : 31'
   - 'box.error.ALTER_SPACE : 12'
   - 'box.error.ACTIVE_TRANSACTION : 79'
   - 'box.error.EXACT_FIELD_COUNT : 38'
   - 'box.error.DROP_SEQUENCE : 144'
-  - 'box.error.INVALID_MSGPACK : 20'
   - 'box.error.MORE_THAN_ONE_TUPLE : 41'
-  - 'box.error.RTREE_RECT : 101'
-  - 'box.error.SUB_STMT_MAX : 121'
+  - 'box.error.UPSERT_UNIQUE_SECONDARY_KEY : 105'
   - 'box.error.UNKNOWN_REQUEST_TYPE : 48'
-  - 'box.error.SPACE_EXISTS : 10'
+  - 'box.error.SUB_STMT_MAX : 121'
   - 'box.error.PROC_LUA : 32'
+  - 'box.error.SPACE_EXISTS : 10'
   - 'box.error.ROLE_NOT_GRANTED : 92'
+  - 'box.error.UNSUPPORTED : 5'
   - 'box.error.NO_SUCH_SPACE : 36'
   - 'box.error.WRONG_INDEX_PARTS : 107'
-  - 'box.error.DROP_SPACE : 11'
   - 'box.error.MIN_FIELD_COUNT : 39'
   - 'box.error.REPLICASET_UUID_MISMATCH : 63'
   - 'box.error.UPDATE_FIELD : 29'
+  - 'box.error.INDEX_EXISTS : 85'
   - 'box.error.COMPRESSION : 119'
   - 'box.error.INVALID_ORDER : 68'
-  - 'box.error.INDEX_EXISTS : 85'
   - 'box.error.SPLICE : 25'
   - 'box.error.UNKNOWN : 0'
+  - 'box.error.IDENTIFIER : 70'
   - 'box.error.DROP_PRIMARY_KEY : 17'
   - 'box.error.NULLABLE_PRIMARY : 152'
   - 'box.error.NO_SUCH_SEQUENCE : 145'
   - 'box.error.RELOAD_CFG : 58'
   - 'box.error.INVALID_UUID : 64'
-  - 'box.error.INJECTION : 8'
+  - 'box.error.DROP_SPACE : 11'
   - 'box.error.TIMEOUT : 78'
-  - 'box.error.IDENTIFIER : 70'
   - 'box.error.ITERATOR_TYPE : 72'
   - 'box.error.REPLICA_MAX : 73'
+  - 'box.error.NO_SUCH_ROLE : 82'
   - 'box.error.MISSING_REQUEST_FIELD : 69'
   - 'box.error.MISSING_SNAPSHOT : 93'
   - 'box.error.WRONG_SPACE_OPTIONS : 111'
   - 'box.error.READONLY : 7'
-  - 'box.error.UNSUPPORTED : 5'
   - 'box.error.UPDATE_INTEGER_OVERFLOW : 95'
+  - 'box.error.RTREE_RECT : 101'
   - 'box.error.NO_CONNECTION : 77'
   - 'box.error.INVALID_XLOG_ORDER : 76'
-  - 'box.error.UPSERT_UNIQUE_SECONDARY_KEY : 105'
-  - 'box.error.ROLLBACK_IN_SUB_STMT : 123'
   - 'box.error.WRONG_SCHEMA_VERSION : 109'
-  - 'box.error.UNSUPPORTED_INDEX_FEATURE : 112'
-  - 'box.error.INDEX_PART_TYPE_MISMATCH : 24'
+  - 'box.error.ROLLBACK_IN_SUB_STMT : 123'
+  - 'box.error.PROTOCOL : 104'
   - 'box.error.INVALID_XLOG_TYPE : 125'
+  - 'box.error.INDEX_PART_TYPE_MISMATCH : 24'
+  - 'box.error.UNSUPPORTED_INDEX_FEATURE : 112'
 ...
 test_run:cmd("setopt delimiter ''");
 ---
diff --git a/test/box/select.result b/test/box/select.result
index 4aed706..3d3a0d3 100644
--- a/test/box/select.result
+++ b/test/box/select.result
@@ -619,6 +619,7 @@ collectgarbage('collect')
 ---
 - 0
 ...
+-- gh-3224 resurrect tuple bigrefs
 s = box.schema.space.create('select', { temporary = true })
 ---
 ...
@@ -634,13 +635,19 @@ lots_of_links = {}
 ref_count = 0
 ---
 ...
-while (true) do table.insert(lots_of_links, s:get{0}) ref_count = ref_count + 1 end
+while (ref_count < 100000) do table.insert(lots_of_links, s:get{0}) ref_count = ref_count + 1 end
 ---
-- error: Tuple reference counter overflow
 ...
 ref_count
 ---
-- 65531
+- 100000
+...
+for k, v in pairs(lots_of_links) do ref_count = ref_count - 1 end
+---
+...
+ref_count
+---
+- 0
 ...
 lots_of_links = {}
 ---
@@ -648,3 +655,22 @@ lots_of_links = {}
 s:drop()
 ---
 ...
+-- Check that tuple deleted when ref counter = 0
+weak_tuple = setmetatable({}, {__mode = 'v'})
+---
+...
+table.insert(weak_tuple, box.tuple.new(1))
+---
+...
+weak_tuple
+---
+- - [1]
+...
+collectgarbage('collect')
+---
+- 0
+...
+weak_tuple
+---
+- []
+...
diff --git a/test/box/select.test.lua b/test/box/select.test.lua
index 54c2ecc..910d4f7 100644
--- a/test/box/select.test.lua
+++ b/test/box/select.test.lua
@@ -124,12 +124,25 @@ test.random(s.index[0], 48)
 s:drop()
 
 collectgarbage('collect')
+
+-- gh-3224 resurrect tuple bigrefs
+
 s = box.schema.space.create('select', { temporary = true })
 index = s:create_index('primary', { type = 'tree' })
 a = s:insert{0}
 lots_of_links = {}
 ref_count = 0
-while (true) do table.insert(lots_of_links, s:get{0}) ref_count = ref_count + 1 end
+while (ref_count < 100000) do table.insert(lots_of_links, s:get{0}) ref_count = ref_count + 1 end
+ref_count
+for k, v in pairs(lots_of_links) do ref_count = ref_count - 1 end
 ref_count
 lots_of_links = {}
 s:drop()
+
+-- Check that tuple deleted when ref counter = 0
+
+weak_tuple = setmetatable({}, {__mode = 'v'})
+table.insert(weak_tuple, box.tuple.new(1))
+weak_tuple
+collectgarbage('collect')
+weak_tuple
-- 
2.7.4

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [tarantool-patches] Re: [PATCH v2 1/1] box: create bigrefs for tuples
  2018-05-30 15:26 [tarantool-patches] [PATCH v2 1/1] box: create bigrefs for tuples imeevma
@ 2018-05-31 12:51 ` Vladislav Shpilevoy
  2018-05-31 15:29 ` Vladislav Shpilevoy
  1 sibling, 0 replies; 11+ messages in thread
From: Vladislav Shpilevoy @ 2018-05-31 12:51 UTC (permalink / raw)
  To: tarantool-patches, imeevma

Hello. Thanks for the fixes! See 15 comments below.

On 30/05/2018 18:26, imeevma@tarantool.org wrote:
> Due to limitation of number of bigrefs being only 65535 it
> was possible to reach this limitation. This patch increases
> capacity of reference counter to 4 billions.
> 
> Closes #3224
> ---
> Branch: https://github.com/tarantool/tarantool/tree/gh-3224-tuple-bigrefs
> Issue: https://github.com/tarantool/tarantool/issues/3224

1. As I said in the previous review: use your gh login as a prefix for a
branch name.

> 
>   src/box/errcode.h        |   2 +-
>   src/box/tuple.c          | 168 +++++++++++++++++++++++++++++++++++++++++++++++
>   src/box/tuple.h          |  36 ++++++----
>   test/box/misc.result     |  39 ++++++-----
>   test/box/select.result   |  32 ++++++++-
>   test/box/select.test.lua |  15 ++++-
>   6 files changed, 256 insertions(+), 36 deletions(-)
> 
> diff --git a/src/box/errcode.h b/src/box/errcode.h
> index a0759f8..e009524 100644
> --- a/src/box/errcode.h
> +++ b/src/box/errcode.h
> @@ -138,7 +138,7 @@ struct errcode_record {
>   	/* 83 */_(ER_ROLE_EXISTS,		"Role '%s' already exists") \
>   	/* 84 */_(ER_CREATE_ROLE,		"Failed to create role '%s': %s") \
>   	/* 85 */_(ER_INDEX_EXISTS,		"Index '%s' already exists") \
> -	/* 86 */_(ER_TUPLE_REF_OVERFLOW,	"Tuple reference counter overflow") \
> +	/* 86 */_(ER_UNUSED6,			"") \
>   	/* 87 */_(ER_ROLE_LOOP,			"Granting role '%s' to role '%s' would create a loop") \
>   	/* 88 */_(ER_GRANT,			"Incorrect grant arguments: %s") \
>   	/* 89 */_(ER_PRIV_GRANTED,		"User '%s' already has %s access on %s '%s'") \
> diff --git a/src/box/tuple.c b/src/box/tuple.c
> index 014f374..60280e4 100644
> --- a/src/box/tuple.c
> +++ b/src/box/tuple.c
> @@ -48,6 +48,16 @@ enum {
>   	OBJSIZE_MIN = 16,
>   };
>   
> +/**
> +* @brief Container for bigrefs.

2. Please, describe in details what bigrefs is, how works and why is needed.

> +*/
> +struct tuple_bigrefs
> +{
> +	uint32_t *refs;
> +	uint16_t size;
> +	uint16_t capacity;
> +};
> +
>   static const double ALLOC_FACTOR = 1.05;
>   
>   /**
> @@ -244,6 +264,152 @@ tuple_arena_create(struct slab_arena *arena, struct quota *quota,
>   	}
>   }
>   
> +enum {
> +	TUPLE_BIGREF_FACTOR = 2,
> +	TUPLE_BIGREF_MAX = UINT32_MAX,
> +	TUPLE_BIGREF_ERROR = UINT16_MAX,

3. As we discussed with Kostja, now tuple_ref must either be finished
ok, or finish the process with panic(). You should not have
TUPLE_BIGREF_ERROR.

> +	TUPLE_BIGREF_FLAG = UINT16_MAX ^ (UINT16_MAX >> 1),> +	TUPLE_BIGREF_MIN_CAPACITY = 16,
> +	TUPLE_BIGREF_MAX_CAPACITY = UINT16_MAX >> 1
> +};
> +
> +/**
> + * Free memory that was allocated for big references.
> + */
> +static inline void
> +tuple_bigrefs_free(void)

4. Please, obey the convention:
create/destroy functions for an object with no freeing/allocating itself,
and new/delete functions for an object that must be allocated/freed.

Here you do destroy.

> +{
> +	free(bigrefs.refs);
> +	bigrefs.size = 0;
> +	bigrefs.refs = NULL;
> +	bigrefs.capacity = 0;
> +}
> +
> +/**
> + * Return index for new big reference counter and allocate memory if needed.
> + * @retval index for new big reference counter.
> + */
> +static inline uint16_t
> +tuple_bigref_retrieve(void)
> +{
> +	if (bigrefs.size < bigrefs.capacity) {
> +		for (uint16_t i = 0; i < bigrefs.capacity; ++i) {
> +			if (bigrefs.refs[i] == 0) {
> +				++bigrefs.size;
> +				return i;
> +			}
> +		}
> +		unreachable();
> +	}
> +	/* Extend the array ... */

5. I used this comment in review as a placeholder for the code. You should
not include it here.

> +	uint16_t capacity = bigrefs.capacity;
> +	if(bigrefs.refs == NULL) {
> +		assert(bigrefs.size == 0 && bigrefs.capacity == 0);
> +		capacity = TUPLE_BIGREF_MIN_CAPACITY;
> +		bigrefs.refs = (uint32_t *) malloc(capacity *
> +						   sizeof(*bigrefs.refs));

6. Please, reuse realloc. Realloc works ok with NULL.

if capacity == 0 then
	new_capacity = TUPLE_BIGREF_MIN_CAPACITY
else
	new_capacity = capacity * 2
refs = realloc(refs, new_capacity)
...

> +		if(bigrefs.refs == NULL) {
> +			diag_set(OutOfMemory, capacity * sizeof(*bigrefs.refs),
> +				 "malloc", "bigrefs.refs");
> +			return TUPLE_BIGREF_ERROR;
> +		}
> +		bigrefs.capacity = capacity;
> +		return bigrefs.size++;
> +	}
> +	if(capacity >= TUPLE_BIGREF_MAX_CAPACITY) {
> +		tuple_bigrefs_free();
> +		return TUPLE_BIGREF_ERROR;

7. Panic.

> +	}
> +	if(capacity > TUPLE_BIGREF_MAX_CAPACITY / TUPLE_BIGREF_FACTOR)
> +		capacity = TUPLE_BIGREF_MAX_CAPACITY;
> +	else
> +		capacity *= TUPLE_BIGREF_FACTOR;
> +	uint32_t *refs = (uint32_t *) realloc(bigrefs.refs, capacity *
> +					      sizeof(*bigrefs.refs));
> +	if (refs == NULL) {
> +		tuple_bigrefs_free();

8. On free you would destroy all the valid bigrefs too. Anyway just put
panic() here.

> +		diag_set(OutOfMemory, capacity * sizeof(*bigrefs.refs),
> +			 "realloc", "bigrefs.refs");
> +		return TUPLE_BIGREF_ERROR;
> +	}
> +	bigrefs.refs = refs;
> +	bigrefs.capacity = capacity;
> +	return bigrefs.size++;
> +}
> +
> +int
> +tuple_ref_slow(struct tuple *tuple)
> +{
> +	if(tuple->refs == TUPLE_REF_MAX) {

9. It must be an assertion. You should not call tuple_ref_slow when
a tuple still can use its local refs.

> +		uint16_t index = tuple_bigref_retrieve();
> +		if(index > TUPLE_BIGREF_MAX_CAPACITY) {
> +			panic("Tuple reference counter overflow");
> +			return -1;
> +		}
> +		tuple->refs = TUPLE_BIGREF_FLAG | index;
> +		bigrefs.refs[index] = TUPLE_REF_MAX;
> +	}
> +	uint16_t index = tuple->refs ^ TUPLE_BIGREF_FLAG;
> +	if (bigrefs.refs[index] > TUPLE_BIGREF_MAX - 1) {
> +		panic("Tuple reference counter overflow");
> +		return -1;
> +	}
> +	bigrefs.refs[index]++;
> +	return 0;
> +}
> +
> +/**
> + * Try to decrease allocated memory if it is possible. Free memory when
> + * size == 0.
> + */
> +static inline void
> +tuple_bigrefs_release(void)
> +{
> +	if(bigrefs.size == 0) {
> +		tuple_bigrefs_free();
> +		return;
> +	}
> +	if(bigrefs.capacity == TUPLE_BIGREF_MIN_CAPACITY)
> +		return;
> +	uint16_t reserved_bigrefs = 0;
> +	uint16_t capacity = bigrefs.capacity;
> +	for(uint16_t i = 0; i < capacity; ++i) {
> +		if(bigrefs.refs[i] != 0)
> +			reserved_bigrefs = i + 1;

10. Why do you need this? You already know the count. It is bigrefs.size.

> +	}
> +	if(reserved_bigrefs >= capacity / TUPLE_BIGREF_FACTOR) {
> +		return;

11. You already have this check in tuple_unref_slow. Lets rename this
function to tuple_bigrefs_compact, call it always after unref_slow, and
remove this check from unref_slow.

> +	} else if(reserved_bigrefs < TUPLE_BIGREF_MIN_CAPACITY) {
> +		capacity = TUPLE_BIGREF_MIN_CAPACITY;
> +	} else {
> +		while(reserved_bigrefs < capacity / TUPLE_BIGREF_FACTOR)
> +			capacity /= TUPLE_BIGREF_FACTOR;
> +	}
> +	uint32_t *refs = (uint32_t *) realloc(bigrefs.refs, capacity *
> +					      sizeof(*bigrefs.refs));
> +	if(refs == NULL) {
> +		tuple_bigrefs_free();
> +		diag_set(OutOfMemory, capacity, "realloc", "bigrefs.refs");

12. Panic.

> +		return;
> +	}
> +	bigrefs.refs = refs;
> +	bigrefs.capacity = capacity;
> +}
> +
> +void
> +tuple_unref_slow(struct tuple *tuple)
> +{
> +	uint16_t index = tuple->refs ^ TUPLE_BIGREF_FLAG;

13. Please, add an assertion that tuple->refs FLAG is set.

> +	bigrefs.refs[index]--;
> +	if(bigrefs.refs[index] <= TUPLE_REF_MAX) {
> +		tuple->refs = bigrefs.refs[index];
> +		bigrefs.refs[index] = 0;
> +		bigrefs.size--;
> +		if(bigrefs.size < bigrefs.capacity / TUPLE_BIGREF_FACTOR)
> +			tuple_bigrefs_release();
> +	}
> +}
> +
>   void
>   tuple_arena_destroy(struct slab_arena *arena)
>   {
> @@ -265,6 +431,8 @@ tuple_free(void)
>   	tuple_format_free();
>   
>   	coll_id_cache_destroy();
> +
> +	tuple_bigrefs_free();
>   }
>   
>   box_tuple_format_t *
> diff --git a/src/box/tuple.h b/src/box/tuple.h
> index e2384dd..dd5e11e 100644
> --- a/src/box/tuple.h
> +++ b/src/box/tuple.h
> @@ -774,26 +774,40 @@ tuple_field_uuid(const struct tuple *tuple, int fieldno,
>   	return 0;
>   }
>   
> -enum { TUPLE_REF_MAX = UINT16_MAX };
> +enum { TUPLE_REF_MAX = UINT16_MAX >> 1 };
> +
> +/**
> + * Increase tuple big reference counter.
> + * @param tuple Tuple to reference.
> + * @retval  0 Success.
> + * @retval -1 Error.

14. Now tuple_ref and tuple_ref_slow can not fail. Please,
change their retval to void. Anyway you already ignore
tuple_unref_slow() result in tuple_unref().

> + */
> +int
> +tuple_ref_slow(struct tuple *tuple);
>  
> diff --git a/test/box/select.result b/test/box/select.result
> index 4aed706..3d3a0d3 100644
> --- a/test/box/select.result
> +++ b/test/box/select.result
> @@ -619,6 +619,7 @@ collectgarbage('collect')
>   ---
>   - 0
>   ...
> +-- gh-3224 resurrect tuple bigrefs
>   s = box.schema.space.create('select', { temporary = true })
>   ---
>   ...
> @@ -634,13 +635,19 @@ lots_of_links = {}
>   ref_count = 0
>   ---
>   ...
> -while (true) do table.insert(lots_of_links, s:get{0}) ref_count = ref_count + 1 end
> +while (ref_count < 100000) do table.insert(lots_of_links, s:get{0}) ref_count = ref_count + 1 end
>   ---
> -- error: Tuple reference counter overflow
>   ...
>   ref_count
>   ---
> -- 65531
> +- 100000
> +...
> +for k, v in pairs(lots_of_links) do ref_count = ref_count - 1 end
> +---
> +...
> +ref_count
> +---
> +- 0
>   ...
>   lots_of_links = {}
>   ---
> @@ -648,3 +655,22 @@ lots_of_links = {}
>   s:drop()
>   ---
>   ...
> +-- Check that tuple deleted when ref counter = 0
> +weak_tuple = setmetatable({}, {__mode = 'v'})
> +---
> +...
> +table.insert(weak_tuple, box.tuple.new(1))

15. This test checks nothing. Once allocated tuple stored in a table
has 1 ref. Here you did not check, that it is deleted after reaching
bigref and called gc.

I remember about the problem that GC works too early and deletes this
references before you reach the bigref. But collectgarbage() has many
options other than 'collect'. Please, investigate how to stop GC until
you reached bigref.

> +---
> +...
> +weak_tuple
> +---
> +- - [1]
> +...
> +collectgarbage('collect')
> +---
> +- 0
> +...
> +weak_tuple
> +---
> +- []
> +...
> diff --git a/test/box/select.test.lua b/test/box/select.test.lua
> index 54c2ecc..910d4f7 100644
> --- a/test/box/select.test.lua
> +++ b/test/box/select.test.lua
> @@ -124,12 +124,25 @@ test.random(s.index[0], 48)
>   s:drop()
>   
>   collectgarbage('collect')
> +
> +-- gh-3224 resurrect tuple bigrefs
> +
>   s = box.schema.space.create('select', { temporary = true })
>   index = s:create_index('primary', { type = 'tree' })
>   a = s:insert{0}
>   lots_of_links = {}
>   ref_count = 0
> -while (true) do table.insert(lots_of_links, s:get{0}) ref_count = ref_count + 1 end
> +while (ref_count < 100000) do table.insert(lots_of_links, s:get{0}) ref_count = ref_count + 1 end
> +ref_count
> +for k, v in pairs(lots_of_links) do ref_count = ref_count - 1 end
>   ref_count
>   lots_of_links = {}
>   s:drop()
> +
> +-- Check that tuple deleted when ref counter = 0
> +
> +weak_tuple = setmetatable({}, {__mode = 'v'})
> +table.insert(weak_tuple, box.tuple.new(1))
> +weak_tuple
> +collectgarbage('collect')
> +weak_tuple
> 

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [tarantool-patches] Re: [PATCH v2 1/1] box: create bigrefs for tuples
  2018-05-30 15:26 [tarantool-patches] [PATCH v2 1/1] box: create bigrefs for tuples imeevma
  2018-05-31 12:51 ` [tarantool-patches] " Vladislav Shpilevoy
@ 2018-05-31 15:29 ` Vladislav Shpilevoy
  1 sibling, 0 replies; 11+ messages in thread
From: Vladislav Shpilevoy @ 2018-05-31 15:29 UTC (permalink / raw)
  To: tarantool-patches, imeevma

One more comment. I have an idea how to remove FLAG from enum.

Lets do this:

diff --git a/src/box/tuple.h b/src/box/tuple.h
index dd5e11e5e..e8bcf5cf9 100644
--- a/src/box/tuple.h
+++ b/src/box/tuple.h
@@ -308,7 +308,8 @@ box_tuple_upsert(const box_tuple_t *tuple, const char *expr, const
  struct PACKED tuple
  {
         /** reference counter */
-       uint16_t refs;
+       uint16_t refs : 15;
+       bool is_bigref : 1;
         /** format identifier */
         uint16_t format_id;
         /**

So you still store 15 bits refs and 1 bit flag. But now can access
them directly with no xor. I have checked, it weights the same byte count as
single uint16_t field. (sizeof(tuple) is still 10).

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [tarantool-patches] Re: [PATCH v2 1/1] box: create bigrefs for tuples
  2018-06-04 21:20       ` Vladislav Shpilevoy
@ 2018-06-08  3:27         ` Konstantin Osipov
  0 siblings, 0 replies; 11+ messages in thread
From: Konstantin Osipov @ 2018-06-08  3:27 UTC (permalink / raw)
  To: tarantool-patches; +Cc: Vladimir Davydov, Imeev Mergen

* Vladislav Shpilevoy <v.shpilevoy@tarantool.org> [18/06/05 00:21]:
> Sorry, Vladimir. Please, disregard it. I have found and fixed a crash.
> Looks like Mergen must implement unit tests.

Just sent my email and read this one :)


-- 
Konstantin Osipov, Moscow, Russia, +7 903 626 22 32
http://tarantool.io - www.twitter.com/kostja_osipov

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [tarantool-patches] Re: [PATCH v2 1/1] box: create bigrefs for tuples
  2018-06-04 20:57     ` Vladislav Shpilevoy
  2018-06-04 21:20       ` Vladislav Shpilevoy
  2018-06-08  3:18       ` Konstantin Osipov
@ 2018-06-08  3:26       ` Konstantin Osipov
  2 siblings, 0 replies; 11+ messages in thread
From: Konstantin Osipov @ 2018-06-08  3:26 UTC (permalink / raw)
  To: tarantool-patches; +Cc: Vladimir Davydov, Imeev Mergen

* Vladislav Shpilevoy <v.shpilevoy@tarantool.org> [18/06/05 00:01]:
> +static inline void
> +bigref_list_delete_index(uint16_t index)
> +{
> +	bigref_list.refs[index] = 0;
> +	if (--bigref_list.size == 0) {
> +		bigref_list_destroy();
> +		return;
> +	}
> +	if (bigref_list.capacity == BIGREF_MIN_CAPACITY ||
> +	    bigref_list.size > bigref_list.capacity / BIGREF_FACTOR)
> +		return;
> +
> +	uint16_t top_index = bigref_list.capacity - 1;
> +	while (bigref_list.refs[top_index] == 0)
> +		top_index--;
> +
> +	uint16_t needed_capacity = top_index + 1;
> +	if (needed_capacity < BIGREF_MIN_CAPACITY)
> +		needed_capacity = BIGREF_MIN_CAPACITY;
> +	if (needed_capacity > bigref_list.capacity / BIGREF_FACTOR)
> +		return;
> +	/* Round up capacity to the next highest power of 2. */
> +	assert(sizeof(needed_capacity) == sizeof(uint16_t));
> +	needed_capacity--;
> +	needed_capacity |= needed_capacity >> 1;
> +	needed_capacity |= needed_capacity >> 2;
> +	needed_capacity |= needed_capacity >> 4;
> +	needed_capacity |= needed_capacity >> 8;
> +	assert(needed_capacity < UINT16_MAX);
> +	needed_capacity++;

This is perhaps a very elegant piece of code, but it 
has no test. And a functional test for it would be difficult.
Could you please write a unit test?

-- 
Konstantin Osipov, Moscow, Russia, +7 903 626 22 32
http://tarantool.io - www.twitter.com/kostja_osipov

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [tarantool-patches] Re: [PATCH v2 1/1] box: create bigrefs for tuples
  2018-06-04 20:57     ` Vladislav Shpilevoy
  2018-06-04 21:20       ` Vladislav Shpilevoy
@ 2018-06-08  3:18       ` Konstantin Osipov
  2018-06-08  3:26       ` Konstantin Osipov
  2 siblings, 0 replies; 11+ messages in thread
From: Konstantin Osipov @ 2018-06-08  3:18 UTC (permalink / raw)
  To: tarantool-patches; +Cc: Vladimir Davydov, Imeev Mergen

* Vladislav Shpilevoy <v.shpilevoy@tarantool.org> [18/06/05 00:01]:
> > +/** Initialize big references container. */
> > +static inline void
> > +bigref_list_create()
> > +{
> > +    bigref_list.size = 0;
> > +    bigref_list.refs = NULL;
> > +    bigref_list.capacity = 0;
> 
> 2. Memset is faster than per-member resetting. We with
> Vladimir Davydov have benched it.

This code runs only once at program startup. Please do not apply
memset() optimization to it.

I have been using memset() myself for things I have seen take a
lot of CPU time with a profiler and  on a very common workload. 

Thanks,

-- 
Konstantin Osipov, Moscow, Russia, +7 903 626 22 32
http://tarantool.io - www.twitter.com/kostja_osipov

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [tarantool-patches] Re: [PATCH v2 1/1] box: create bigrefs for tuples
  2018-06-04 20:57     ` Vladislav Shpilevoy
@ 2018-06-04 21:20       ` Vladislav Shpilevoy
  2018-06-08  3:27         ` Konstantin Osipov
  2018-06-08  3:18       ` Konstantin Osipov
  2018-06-08  3:26       ` Konstantin Osipov
  2 siblings, 1 reply; 11+ messages in thread
From: Vladislav Shpilevoy @ 2018-06-04 21:20 UTC (permalink / raw)
  To: tarantool-patches, Vladimir Davydov; +Cc: Imeev Mergen

Sorry, Vladimir. Please, disregard it. I have found and fixed a crash.
Looks like Mergen must implement unit tests.

Test bigrefs via Lua is not possible because of its memory limitation.

Mergen, please, implement unit tests on C for your patch and for my
follow up. You must cover each branch in each new function.


On 04/06/2018 23:57, Vladislav Shpilevoy wrote:
> Thanks for the patch, Mergen! Please look at my 5 comments.
> 
> Vova, please, take a look on the patch. I have pasted it at the end
> of the letter.
> 
> On 04/06/2018 17:21, Imeev Mergen wrote:
>>
>>
>> On 06/04/2018 02:21 PM, Vladislav Shpilevoy wrote:
>>> Hello. Thanks for the patch! It is almost perfect! See 6 comments
>>> below.
>>>
>>> On 01/06/2018 18:41, imeevma@tarantool.org wrote:
>>>> Due to limitation of reference counters for tuple being only
>>>> 65535 it was possible to reach this limitation. This patch
>>>> increases capacity of reference counters to 4 billions.
>>>>
>>>> Closes #3224
>>>> ---
>>>> Branch: https://github.com/tarantool/tarantool/tree/imeevma/gh-3224-tuple-bigrefs
>>>> Issue: https://github.com/tarantool/tarantool/issues/3224
>>>>
>>
>> commit 7b7965f6c5462142a607d9db542d698fc94fbbbe
>> Author: Mergen Imeev <imeevma@gmail.com>
>> Date:   Mon May 28 19:17:51 2018 +0300
>>
>>      box: create bigrefs for tuples
>>
>>      Due to limitation of reference counters for tuple being only
>>      65535 it was possible to reach this limitation. This patch
>>      increases capacity of reference counters to 4 billions.
>>
>>      Closes #3224
>>
>> diff --git a/src/box/tuple.c b/src/box/tuple.c
>> index 014f374..6e63433 100644
>> --- a/src/box/tuple.c
>> +++ b/src/box/tuple.c
>> @@ -48,6 +48,26 @@ enum {
>>       OBJSIZE_MIN = 16,
>>   };
>>
>> +/**
>> + * Container for big reference counters. Contains array of big
>> + * reference counters, size of this array and number of non-zero
>> + * big reference counters. When reference counter of tuple becomes
>> + * more than 32767, field refs of this tuple becomes index of big
>> + * reference counter in big reference counter array and field
>> + * is_bigref is set true. The moment big reference becomes equal
>> + * 32767 it is set to 0, refs of the tuple becomes 32767 and
>> + * is_bigref becomes false. Big reference counter can be equal to
>> + * 0 or be more than 32767.
>> + */
>> +struct bigref_list {
>> +    /** Array of big reference counters */
>> +    uint32_t *refs;
>> +    /** Number of non-zero elements in the array */
>> +    uint16_t size;
>> +    /** Capacity of the array */
>> +    uint16_t capacity;
>> +};
> 
> 1. Instead of
> 
>      struct name {
>          ...
>      };
>      static struct name name;
> 
> you can do this
> 
>      static struct name {
>          ...
>      } name;
> 
>> +
>>   static const double ALLOC_FACTOR = 1.05;
>>
>>   /**
>> @@ -151,6 +177,15 @@ tuple_validate_raw(struct tuple_format *format, const char *tuple)
>>       return 0;
>>   }
>>
>> +/** Initialize big references container. */
>> +static inline void
>> +bigref_list_create()
>> +{
>> +    bigref_list.size = 0;
>> +    bigref_list.refs = NULL;
>> +    bigref_list.capacity = 0;
> 
> 2. Memset is faster than per-member resetting. We with
> Vladimir Davydov have benched it.
> 
>> +/**
>> + * Return index for new big reference counter and allocate memory
>> + * if needed.
>> + * @retval index for new big reference counter.
>> + */
>> +static inline uint16_t
>> +tuple_bigref_retrieve()
>> +{
>> +    if (bigref_list.size < bigref_list.capacity) {
>> +        uint16_t vacant_index = 0;
>> +        while (bigref_list.refs[vacant_index] != 0)
>> +            ++vacant_index;
>> +        return vacant_index;
>> +    }
>> +    /* Extend the array. */
>> +    uint16_t capacity = bigref_list.capacity;
>> +    if (capacity == 0) {
>> +        capacity = TUPLE_BIGREF_MIN_CAPACITY;
>> +    } else {
>> +        capacity *= TUPLE_BIGREF_FACTOR;
>> +        if (capacity >= TUPLE_BIGREF_MAX_CAPACITY)
> 
> 3. By such check you have limited the capacity with 16384 bigrefs instead of
> 32k. Capacity here is always a power of 2. TUPLE_BIGREF_MAX_CAPACITY is
> 2^15. So here after 2^14 you get overflow. 2^14 = 16384. I have fixed it on
> the branch.
> 
>> +            panic("Too many big references");
>> +    }
>> +    uint32_t *refs = (uint32_t *) realloc(bigref_list.refs, capacity *
>> +                          sizeof(*bigref_list.refs));
>> +    if (refs == NULL) {
>> +        panic("failed to reallocate %zu bytes: Cannot allocate"\
>> +              " memory.", capacity * sizeof(*bigref_list.refs));
>> +    }
>> +    bigref_list.refs = refs;
>> +    memset(bigref_list.refs + bigref_list.capacity, 0, capacity -
>> +           bigref_list.capacity);
>> +    bigref_list.capacity = capacity;
>> +    return bigref_list.size++;
>> +}
>> +
>> +/**
>> + * Try to decrease allocated memory if it is possible. Free memory
>> + * when size == 0.
>> + */
>> +static inline void
>> +bigref_list_compact()
>> +{
>> +    if (bigref_list.size == 0) {
>> +        bigref_list_destroy();
>> +        return;
>> +    }
>> +    if (bigref_list.capacity == TUPLE_BIGREF_MIN_CAPACITY ||
>> +        bigref_list.size > bigref_list.capacity / TUPLE_BIGREF_FACTOR)
>> +        return;
>> +    uint16_t capacity = bigref_list.capacity;
>> +    while (bigref_list.refs[capacity - 1] == 0)
>> +        capacity--;
>> +    if (capacity < TUPLE_BIGREF_MIN_CAPACITY)
>> +        capacity = TUPLE_BIGREF_MIN_CAPACITY;
>> +    if (capacity > bigref_list.capacity / TUPLE_BIGREF_FACTOR)
>> +        return;
>> +    /* Round up capacity to the next highest power of 2*/
>> +    capacity--;
>> +    capacity |= capacity >> 1;
>> +    capacity |= capacity >> 2;
>> +    capacity |= capacity >> 4;
>> +    capacity |= capacity >> 8;
>> +    capacity |= capacity >> 16;
> 
> 4. This is because blind copy and paste is bad. The algorithm implementation
> I provided to you was for 32 bit unsigned integers, but here you have 16 bits
> only, so capacity >> 16 is always 0.
> 
>> +    capacity++;
>> +    uint32_t *refs = (uint32_t *) realloc(bigref_list.refs, capacity *
>> +                          sizeof(*bigref_list.refs));
>> +    if (refs == NULL)
>> +        panic("failed to reallocate %zu bytes: Cannot allocate"\
>> +              " memory.", capacity * sizeof(*bigref_list.refs));
>> +    bigref_list.refs = refs;
>> +    bigref_list.capacity = capacity;
>> +}
>> +
> 
> 5. I have removed tuple_bless == NULL checks from all the code.
> 
> Here the patch:
> 
> ============================================================================
> 
> commit 9d82fa9f9ae24c7dc91cf9dd7a416ec533fb8147
> Author: Mergen Imeev <imeevma@gmail.com>
> Date:   Mon May 28 19:17:51 2018 +0300
> 
>      box: create bigrefs for tuples
>      Due to limitation of reference counters for tuple being only
>      65535 it was possible to reach this limitation. This patch
>      increases capacity of reference counters to 4 billions.
>      Closes #3224
> 
> diff --git a/src/box/box.cc b/src/box/box.cc
> index c728a4c53..42578613d 100644
> --- a/src/box/box.cc
> +++ b/src/box/box.cc
> @@ -174,20 +174,22 @@ process_rw(struct request *request, struct space *space, struct tuple **result)
>           txn_rollback_stmt();
>           return -1;
>       }
> +    if (result == NULL)
> +        return txn_commit_stmt(txn, request);
> +    *result = tuple;
> +    if (tuple == NULL)
> +        return txn_commit_stmt(txn, request);
>       /*
>        * Pin the tuple locally before the commit,
>        * otherwise it may go away during yield in
>        * when WAL is written in autocommit mode.
>        */
> -    TupleRefNil ref(tuple);
> -    if (txn_commit_stmt(txn, request) != 0)
> -        return -1;
> -    if (result != NULL) {
> -        if (tuple != NULL && tuple_bless(tuple) == NULL)
> -            return -1;
> -        *result = tuple;
> -    }
> -    return 0;
> +    tuple_ref(tuple);
> +    int rc = txn_commit_stmt(txn, request);
> +    if (rc == 0)
> +        tuple_bless(tuple);
> +    tuple_unref(tuple);
> +    return rc;
>   }
> 
>   void
> diff --git a/src/box/errcode.h b/src/box/errcode.h
> index a0759f8f4..e009524ad 100644
> --- a/src/box/errcode.h
> +++ b/src/box/errcode.h
> @@ -138,7 +138,7 @@ struct errcode_record {
>       /* 83 */_(ER_ROLE_EXISTS,        "Role '%s' already exists") \
>       /* 84 */_(ER_CREATE_ROLE,        "Failed to create role '%s': %s") \
>       /* 85 */_(ER_INDEX_EXISTS,        "Index '%s' already exists") \
> -    /* 86 */_(ER_TUPLE_REF_OVERFLOW,    "Tuple reference counter overflow") \
> +    /* 86 */_(ER_UNUSED6,            "") \
>       /* 87 */_(ER_ROLE_LOOP,            "Granting role '%s' to role '%s' would create a loop") \
>       /* 88 */_(ER_GRANT,            "Incorrect grant arguments: %s") \
>       /* 89 */_(ER_PRIV_GRANTED,        "User '%s' already has %s access on %s '%s'") \
> diff --git a/src/box/index.cc b/src/box/index.cc
> index 3c62ec1cc..f992bc94c 100644
> --- a/src/box/index.cc
> +++ b/src/box/index.cc
> @@ -220,8 +220,8 @@ box_index_random(uint32_t space_id, uint32_t index_id, uint32_t rnd,
>       /* No tx management, random() is for approximation anyway. */
>       if (index_random(index, rnd, result) != 0)
>           return -1;
> -    if (*result != NULL && tuple_bless(*result) == NULL)
> -        return -1;
> +    if (*result != NULL)
> +        tuple_bless(*result);
>       return 0;
>   }
> 
> @@ -253,8 +253,8 @@ box_index_get(uint32_t space_id, uint32_t index_id, const char *key,
>       txn_commit_ro_stmt(txn);
>       /* Count statistics. */
>       rmean_collect(rmean_box, IPROTO_SELECT, 1);
> -    if (*result != NULL && tuple_bless(*result) == NULL)
> -        return -1;
> +    if (*result != NULL)
> +        tuple_bless(*result);
>       return 0;
>   }
> 
> @@ -285,8 +285,8 @@ box_index_min(uint32_t space_id, uint32_t index_id, const char *key,
>           return -1;
>       }
>       txn_commit_ro_stmt(txn);
> -    if (*result != NULL && tuple_bless(*result) == NULL)
> -        return -1;
> +    if (*result != NULL)
> +        tuple_bless(*result);
>       return 0;
>   }
> 
> @@ -317,8 +317,8 @@ box_index_max(uint32_t space_id, uint32_t index_id, const char *key,
>           return -1;
>       }
>       txn_commit_ro_stmt(txn);
> -    if (*result != NULL && tuple_bless(*result) == NULL)
> -        return -1;
> +    if (*result != NULL)
> +        tuple_bless(*result);
>       return 0;
>   }
> 
> @@ -397,8 +397,8 @@ box_iterator_next(box_iterator_t *itr, box_tuple_t **result)
>       assert(result != NULL);
>       if (iterator_next(itr, result) != 0)
>           return -1;
> -    if (*result != NULL && tuple_bless(*result) == NULL)
> -        return -1;
> +    if (*result != NULL)
> +        tuple_bless(*result);
>       return 0;
>   }
> 
> diff --git a/src/box/lua/tuple.c b/src/box/lua/tuple.c
> index 80573313d..22fe69611 100644
> --- a/src/box/lua/tuple.c
> +++ b/src/box/lua/tuple.c
> @@ -496,10 +496,7 @@ luaT_pushtuple(struct lua_State *L, box_tuple_t *tuple)
>           luaL_pushcdata(L, CTID_CONST_STRUCT_TUPLE_REF);
>       *ptr = tuple;
>       /* The order is important - first reference tuple, next set gc */
> -    if (box_tuple_ref(tuple) != 0) {
> -        luaT_error(L);
> -        return;
> -    }
> +    box_tuple_ref(tuple);
>       lua_pushcfunction(L, lbox_tuple_gc);
>       luaL_setcdatagc(L, -2);
>   }
> diff --git a/src/box/port.c b/src/box/port.c
> index 03f6be79d..9b6b8580c 100644
> --- a/src/box/port.c
> +++ b/src/box/port.c
> @@ -45,8 +45,7 @@ port_tuple_add(struct port *base, struct tuple *tuple)
>       struct port_tuple *port = port_tuple(base);
>       struct port_tuple_entry *e;
>       if (port->size == 0) {
> -        if (tuple_ref(tuple) != 0)
> -            return -1;
> +        tuple_ref(tuple);
>           e = &port->first_entry;
>           port->first = port->last = e;
>       } else {
> @@ -55,10 +54,7 @@ port_tuple_add(struct port *base, struct tuple *tuple)
>               diag_set(OutOfMemory, sizeof(*e), "mempool_alloc", "e");
>               return -1;
>           }
> -        if (tuple_ref(tuple) != 0) {
> -            mempool_free(&port_tuple_entry_pool, e);
> -            return -1;
> -        }
> +        tuple_ref(tuple);
>           port->last->next = e;
>           port->last = e;
>       }
> diff --git a/src/box/tuple.c b/src/box/tuple.c
> index 014f37406..5fc1c1283 100644
> --- a/src/box/tuple.c
> +++ b/src/box/tuple.c
> @@ -48,6 +48,26 @@ enum {
>       OBJSIZE_MIN = 16,
>   };
> 
> +/**
> + * Container for big reference counters. Contains array of big
> + * reference counters, size of this array and number of non-zero
> + * big reference counters. When reference counter of tuple becomes
> + * more than 32767, field refs of this tuple becomes index of big
> + * reference counter in big reference counter array and field
> + * is_bigref is set true. The moment big reference becomes equal
> + * 32767 it is set to 0, refs of the tuple becomes 32767 and
> + * is_bigref becomes false. Big reference counter can be equal to
> + * 0 or be more than 32767.
> + */
> +static struct bigref_list {
> +    /** Array of big reference counters. */
> +    uint32_t *refs;
> +    /** Number of non-zero elements in the array. */
> +    uint16_t size;
> +    /** Capacity of the array. */
> +    uint16_t capacity;
> +} bigref_list;
> +
>   static const double ALLOC_FACTOR = 1.05;
> 
>   /**
> @@ -151,6 +171,13 @@ tuple_validate_raw(struct tuple_format *format, const char *tuple)
>       return 0;
>   }
> 
> +/** Initialize big references container. */
> +static inline void
> +bigref_list_create()
> +{
> +    memset(&bigref_list, 0, sizeof(bigref_list));
> +}
> +
>   /**
>    * Incremented on every snapshot and is used to distinguish tuples
>    * which were created after start of a snapshot (these tuples can
> @@ -211,6 +238,8 @@ tuple_init(field_name_hash_f hash)
> 
>       box_tuple_last = NULL;
> 
> +    bigref_list_create();
> +
>       if (coll_id_cache_init() != 0)
>           return -1;
> 
> @@ -244,6 +273,131 @@ tuple_arena_create(struct slab_arena *arena, struct quota *quota,
>       }
>   }
> 
> +enum {
> +    BIGREF_FACTOR = 2,
> +    BIGREF_MAX = UINT32_MAX,
> +    BIGREF_MIN_CAPACITY = 16,
> +    /**
> +     * Only 15 bits are available for bigref list index in
> +     * struct tuple.
> +     */
> +    BIGREF_MAX_CAPACITY = UINT16_MAX >> 1
> +};
> +
> +/** Destroy big references and free memory that was allocated. */
> +static inline void
> +bigref_list_destroy()
> +{
> +    free(bigref_list.refs);
> +    bigref_list_create();
> +}
> +
> +/**
> + * Return index for new big reference counter and allocate memory
> + * if needed.
> + * @retval index for new big reference counter.
> + */
> +static inline uint16_t
> +bigref_list_new_index()
> +{
> +    if (bigref_list.size < bigref_list.capacity) {
> +        uint16_t vacant_index = 0;
> +        while (bigref_list.refs[vacant_index] != 0)
> +            ++vacant_index;
> +        return vacant_index;
> +    }
> +    /* Extend the array. */
> +    uint16_t capacity = bigref_list.capacity;
> +    if (capacity == 0)
> +        capacity = BIGREF_MIN_CAPACITY;
> +    else if (capacity < BIGREF_MAX_CAPACITY)
> +        capacity = MIN(capacity * BIGREF_FACTOR, BIGREF_MAX_CAPACITY);
> +    else
> +        panic("Too many big references");
> +    uint32_t *refs = (uint32_t *) realloc(bigref_list.refs, capacity *
> +                          sizeof(*bigref_list.refs));
> +    if (refs == NULL) {
> +        panic("failed to reallocate %zu bytes: Cannot allocate "\
> +              "memory.", capacity * sizeof(*bigref_list.refs));
> +    }
> +    bigref_list.refs = refs;
> +    memset(bigref_list.refs + bigref_list.capacity, 0, capacity -
> +           bigref_list.capacity);
> +    bigref_list.capacity = capacity;
> +    return bigref_list.size++;
> +}
> +
> +void
> +tuple_ref_slow(struct tuple *tuple)
> +{
> +    assert(tuple->is_bigref || tuple->refs == TUPLE_REF_MAX);
> +    if (! tuple->is_bigref) {
> +        tuple->ref_index = bigref_list_new_index();
> +        tuple->is_bigref = true;
> +        bigref_list.refs[tuple->ref_index] = TUPLE_REF_MAX;
> +    } else if (bigref_list.refs[tuple->ref_index] == BIGREF_MAX) {
> +        panic("Tuple big reference counter overflow");
> +    }
> +    bigref_list.refs[tuple->ref_index]++;
> +}
> +
> +/**
> + * Try to decrease allocated memory if it is possible. Free memory
> + * when size == 0.
> + */
> +static inline void
> +bigref_list_delete_index(uint16_t index)
> +{
> +    bigref_list.refs[index] = 0;
> +    if (--bigref_list.size == 0) {
> +        bigref_list_destroy();
> +        return;
> +    }
> +    if (bigref_list.capacity == BIGREF_MIN_CAPACITY ||
> +        bigref_list.size > bigref_list.capacity / BIGREF_FACTOR)
> +        return;
> +
> +    uint16_t top_index = bigref_list.capacity - 1;
> +    while (bigref_list.refs[top_index] == 0)
> +        top_index--;
> +
> +    uint16_t needed_capacity = top_index + 1;
> +    if (needed_capacity < BIGREF_MIN_CAPACITY)
> +        needed_capacity = BIGREF_MIN_CAPACITY;
> +    if (needed_capacity > bigref_list.capacity / BIGREF_FACTOR)
> +        return;
> +    /* Round up capacity to the next highest power of 2. */
> +    assert(sizeof(needed_capacity) == sizeof(uint16_t));
> +    needed_capacity--;
> +    needed_capacity |= needed_capacity >> 1;
> +    needed_capacity |= needed_capacity >> 2;
> +    needed_capacity |= needed_capacity >> 4;
> +    needed_capacity |= needed_capacity >> 8;
> +    assert(needed_capacity < UINT16_MAX);
> +    needed_capacity++;
> +    uint32_t *refs =
> +        (uint32_t *) realloc(bigref_list.refs, needed_capacity *
> +                     sizeof(*bigref_list.refs));
> +    if (refs == NULL) {
> +        panic("failed to reallocate %zu bytes: Cannot allocate "\
> +              "memory.", needed_capacity * sizeof(*bigref_list.refs));
> +    }
> +    bigref_list.refs = refs;
> +    bigref_list.capacity = needed_capacity;
> +}
> +
> +void
> +tuple_unref_slow(struct tuple *tuple)
> +{
> +    assert(tuple->is_bigref &&
> +           bigref_list.refs[tuple->ref_index] > TUPLE_REF_MAX);
> +    if(--bigref_list.refs[tuple->ref_index] == TUPLE_REF_MAX) {
> +        bigref_list_delete_index(tuple->ref_index);
> +        tuple->ref_index = TUPLE_REF_MAX;
> +        tuple->is_bigref = false;
> +    }
> +}
> +
>   void
>   tuple_arena_destroy(struct slab_arena *arena)
>   {
> @@ -265,6 +419,8 @@ tuple_free(void)
>       tuple_format_free();
> 
>       coll_id_cache_destroy();
> +
> +    bigref_list_destroy();
>   }
> 
>   box_tuple_format_t *
> @@ -288,7 +444,8 @@ int
>   box_tuple_ref(box_tuple_t *tuple)
>   {
>       assert(tuple != NULL);
> -    return tuple_ref(tuple);
> +    tuple_ref(tuple);
> +    return 0;
>   }
> 
>   void
> @@ -357,10 +514,7 @@ box_tuple_iterator(box_tuple_t *tuple)
>                "mempool", "new slab");
>           return NULL;
>       }
> -    if (tuple_ref(tuple) != 0) {
> -        mempool_free(&tuple_iterator_pool, it);
> -        return NULL;
> -    }
> +    tuple_ref(tuple);
>       tuple_rewind(it, tuple);
>       return it;
>   }
> @@ -451,7 +605,6 @@ box_tuple_new(box_tuple_format_t *format, const char *data, const char *end)
>       struct tuple *ret = tuple_new(format, data, end);
>       if (ret == NULL)
>           return NULL;
> -    /* Can't fail on zero refs. */
>       return tuple_bless(ret);
>   }
> 
> diff --git a/src/box/tuple.h b/src/box/tuple.h
> index e2384dda0..14dbd4094 100644
> --- a/src/box/tuple.h
> +++ b/src/box/tuple.h
> @@ -105,8 +105,7 @@ typedef struct tuple box_tuple_t;
>    * tuple will leak.
>    *
>    * \param tuple a tuple
> - * \retval -1 on error (check box_error_last())
> - * \retval 0 on success
> + * \retval 0 always
>    * \sa box_tuple_unref()
>    */
>   int
> @@ -269,8 +268,7 @@ box_tuple_next(box_tuple_iterator_t *it);
>    * Use box_tuple_format_default() to create space-independent tuple.
>    * \param data tuple data in MsgPack Array format ([field1, field2, ...]).
>    * \param end the end of \a data
> - * \retval NULL on out of memory
> - * \retval tuple otherwise
> + * \retval tuple
>    * \pre data, end is valid MsgPack Array
>    * \sa \code box.tuple.new(data) \endcode
>    */
> @@ -307,9 +305,17 @@ box_tuple_upsert(const box_tuple_t *tuple, const char *expr, const
>    */
>   struct PACKED tuple
>   {
> -    /** reference counter */
> -    uint16_t refs;
> -    /** format identifier */
> +    union {
> +        /** Reference counter. */
> +        uint16_t refs;
> +        struct {
> +            /** Index of big reference counter. */
> +            uint16_t ref_index : 15;
> +            /** Big reference flag. */
> +            bool is_bigref : 1;
> +        };
> +    };
> +    /** Format identifier. */
>       uint16_t format_id;
>       /**
>        * Length of the MessagePack data in raw part of the
> @@ -774,25 +780,35 @@ tuple_field_uuid(const struct tuple *tuple, int fieldno,
>       return 0;
>   }
> 
> -enum { TUPLE_REF_MAX = UINT16_MAX };
> +enum { TUPLE_REF_MAX = UINT16_MAX >> 1 };
> +
> +/**
> + * Increase tuple big reference counter.
> + * @param tuple Tuple to reference.
> + */
> +void
> +tuple_ref_slow(struct tuple *tuple);
> 
>   /**
>    * Increment tuple reference counter.
>    * @param tuple Tuple to reference.
> - * @retval  0 Success.
> - * @retval -1 Too many refs error.
>    */
> -static inline int
> +static inline void
>   tuple_ref(struct tuple *tuple)
>   {
> -    if (tuple->refs + 1 > TUPLE_REF_MAX) {
> -        diag_set(ClientError, ER_TUPLE_REF_OVERFLOW);
> -        return -1;
> -    }
> -    tuple->refs++;
> -    return 0;
> +    if (unlikely(tuple->refs >= TUPLE_REF_MAX))
> +        tuple_ref_slow(tuple);
> +    else
> +        tuple->refs++;
>   }
> 
> +/**
> + * Decrease tuple big reference counter.
> + * @param tuple Tuple to reference.
> + */
> +void
> +tuple_unref_slow(struct tuple *tuple);
> +
>   /**
>    * Decrement tuple reference counter. If it has reached zero, free the tuple.
>    *
> @@ -802,10 +818,9 @@ static inline void
>   tuple_unref(struct tuple *tuple)
>   {
>       assert(tuple->refs - 1 >= 0);
> -
> -    tuple->refs--;
> -
> -    if (tuple->refs == 0)
> +    if (unlikely(tuple->is_bigref))
> +        tuple_unref_slow(tuple);
> +    else if (--tuple->refs == 0)
>           tuple_delete(tuple);
>   }
> 
> @@ -813,25 +828,18 @@ extern struct tuple *box_tuple_last;
> 
>   /**
>    * Convert internal `struct tuple` to public `box_tuple_t`.
> - * \retval tuple on success
> - * \retval NULL on error, check diag
> + * \retval tuple
>    * \post \a tuple ref counted until the next call.
> - * \post tuple_ref() doesn't fail at least once
>    * \sa tuple_ref
>    */
>   static inline box_tuple_t *
>   tuple_bless(struct tuple *tuple)
>   {
>       assert(tuple != NULL);
> -    /* Ensure tuple can be referenced at least once after return */
> -    if (tuple->refs + 2 > TUPLE_REF_MAX) {
> -        diag_set(ClientError, ER_TUPLE_REF_OVERFLOW);
> -        return NULL;
> -    }
> -    tuple->refs++;
> +    tuple_ref(tuple);
>       /* Remove previous tuple */
>       if (likely(box_tuple_last != NULL))
> -        tuple_unref(box_tuple_last); /* do not throw */
> +        tuple_unref(box_tuple_last);
>       /* Remember current tuple */
>       box_tuple_last = tuple;
>       return tuple;
> @@ -849,41 +857,6 @@ tuple_to_buf(const struct tuple *tuple, char *buf, size_t size);
>   #include "tuple_update.h"
>   #include "errinj.h"
> 
> -/**
> - * \copydoc tuple_ref()
> - * \throws if overflow detected.
> - */
> -static inline void
> -tuple_ref_xc(struct tuple *tuple)
> -{
> -    if (tuple_ref(tuple))
> -        diag_raise();
> -}
> -
> -/**
> - * \copydoc tuple_bless
> - * \throw ER_TUPLE_REF_OVERFLOW
> - */
> -static inline box_tuple_t *
> -tuple_bless_xc(struct tuple *tuple)
> -{
> -    box_tuple_t *blessed = tuple_bless(tuple);
> -    if (blessed == NULL)
> -        diag_raise();
> -    return blessed;
> -}
> -
> -/** Make tuple references exception-friendly in absence of @finally. */
> -struct TupleRefNil {
> -    struct tuple *tuple;
> -    TupleRefNil (struct tuple *arg) :tuple(arg)
> -    { if (tuple) tuple_ref_xc(tuple); }
> -    ~TupleRefNil() { if (tuple) tuple_unref(tuple); }
> -
> -    TupleRefNil(const TupleRefNil&) = delete;
> -    void operator=(const TupleRefNil&) = delete;
> -};
> -
>   /* @copydoc tuple_field_with_type() */
>   static inline const char *
>   tuple_field_with_type_xc(const struct tuple *tuple, uint32_t fieldno,
> diff --git a/src/box/vinyl.c b/src/box/vinyl.c
> index f0d26874e..dc0d02054 100644
> --- a/src/box/vinyl.c
> +++ b/src/box/vinyl.c
> @@ -3822,7 +3822,6 @@ vinyl_iterator_primary_next(struct iterator *base, struct tuple **ret)
>       assert(base->next = vinyl_iterator_primary_next);
>       struct vinyl_iterator *it = (struct vinyl_iterator *)base;
>       assert(it->lsm->index_id == 0);
> -    struct tuple *tuple;
> 
>       if (it->tx == NULL) {
>           diag_set(ClientError, ER_CURSOR_NO_TRANSACTION);
> @@ -3833,18 +3832,15 @@ vinyl_iterator_primary_next(struct iterator *base, struct tuple **ret)
>           goto fail;
>       }
> 
> -    if (vy_read_iterator_next(&it->iterator, &tuple) != 0)
> +    if (vy_read_iterator_next(&it->iterator, ret) != 0)
>           goto fail;
> -
> -    if (tuple == NULL) {
> +    if (*ret == NULL) {
>           /* EOF. Close the iterator immediately. */
>           vinyl_iterator_close(it);
> -        *ret = NULL;
> -        return 0;
> +    } else {
> +        tuple_bless(*ret);
>       }
> -    *ret = tuple_bless(tuple);
> -    if (*ret != NULL)
> -        return 0;
> +    return 0;
>   fail:
>       vinyl_iterator_close(it);
>       return -1;
> @@ -3890,11 +3886,10 @@ next:
>        * Note, there's no need in vy_tx_track() as the
>        * tuple is already tracked in the secondary index.
>        */
> -    struct tuple *full_tuple;
>       if (vy_point_lookup(it->lsm->pk, it->tx, vy_tx_read_view(it->tx),
> -                tuple, &full_tuple) != 0)
> +                tuple, ret) != 0)
>           goto fail;
> -    if (full_tuple == NULL) {
> +    if (*ret == NULL) {
>           /*
>            * All indexes of a space must be consistent, i.e.
>            * if a tuple is present in one index, it must be
> @@ -3908,10 +3903,9 @@ next:
>                vy_lsm_name(it->lsm), vy_stmt_str(tuple));
>           goto next;
>       }
> -    *ret = tuple_bless(full_tuple);
> -    tuple_unref(full_tuple);
> -    if (*ret != NULL)
> -        return 0;
> +    tuple_bless(*ret);
> +    tuple_unref(*ret);
> +    return 0;
>   fail:
>       vinyl_iterator_close(it);
>       return -1;
> @@ -3997,16 +3991,12 @@ vinyl_index_get(struct index *index, const char *key,
>       const struct vy_read_view **rv = (tx != NULL ? vy_tx_read_view(tx) :
>                         &env->xm->p_global_read_view);
> 
> -    struct tuple *tuple;
> -    if (vy_lsm_full_by_key(lsm, tx, rv, key, part_count, &tuple) != 0)
> +    if (vy_lsm_full_by_key(lsm, tx, rv, key, part_count, ret) != 0)
>           return -1;
> -
> -    if (tuple != NULL) {
> -        *ret = tuple_bless(tuple);
> -        tuple_unref(tuple);
> -        return *ret == NULL ? -1 : 0;
> +    if (*ret != NULL) {
> +        tuple_bless(*ret);
> +        tuple_unref(*ret);
>       }
> -    *ret = NULL;
>       return 0;
>   }
> 
> diff --git a/test/box/misc.result b/test/box/misc.result
> index 8f94f5513..f7703ba8d 100644
> --- a/test/box/misc.result
> +++ b/test/box/misc.result
> @@ -332,7 +332,6 @@ t;
>     - 'box.error.UNKNOWN_UPDATE_OP : 28'
>     - 'box.error.WRONG_COLLATION_OPTIONS : 151'
>     - 'box.error.CURSOR_NO_TRANSACTION : 80'
> -  - 'box.error.TUPLE_REF_OVERFLOW : 86'
>     - 'box.error.ALTER_SEQUENCE : 143'
>     - 'box.error.INVALID_XLOG_NAME : 75'
>     - 'box.error.SAVEPOINT_EMPTY_TX : 60'
> @@ -360,7 +359,7 @@ t;
>     - 'box.error.VINYL_MAX_TUPLE_SIZE : 139'
>     - 'box.error.LOAD_FUNCTION : 99'
>     - 'box.error.INVALID_XLOG : 74'
> -  - 'box.error.PRIV_NOT_GRANTED : 91'
> +  - 'box.error.READ_VIEW_ABORTED : 130'
>     - 'box.error.TRANSACTION_CONFLICT : 97'
>     - 'box.error.GUEST_USER_PASSWORD : 96'
>     - 'box.error.PROC_C : 102'
> @@ -405,7 +404,7 @@ t;
>     - 'box.error.injection : table: <address>
>     - 'box.error.NULLABLE_MISMATCH : 153'
>     - 'box.error.LAST_DROP : 15'
> -  - 'box.error.NO_SUCH_ROLE : 82'
> +  - 'box.error.TUPLE_FORMAT_LIMIT : 16'
>     - 'box.error.DECOMPRESSION : 124'
>     - 'box.error.CREATE_SEQUENCE : 142'
>     - 'box.error.CREATE_USER : 43'
> @@ -414,66 +413,66 @@ t;
>     - 'box.error.SEQUENCE_OVERFLOW : 147'
>     - 'box.error.SYSTEM : 115'
>     - 'box.error.KEY_PART_IS_TOO_LONG : 118'
> -  - 'box.error.TUPLE_FORMAT_LIMIT : 16'
> -  - 'box.error.BEFORE_REPLACE_RET : 53'
> +  - 'box.error.INJECTION : 8'
> +  - 'box.error.INVALID_MSGPACK : 20'
>     - 'box.error.NO_SUCH_SAVEPOINT : 61'
>     - 'box.error.TRUNCATE_SYSTEM_SPACE : 137'
>     - 'box.error.VY_QUOTA_TIMEOUT : 135'
>     - 'box.error.WRONG_INDEX_OPTIONS : 108'
>     - 'box.error.INVALID_VYLOG_FILE : 133'
>     - 'box.error.INDEX_FIELD_COUNT_LIMIT : 127'
> -  - 'box.error.READ_VIEW_ABORTED : 130'
> +  - 'box.error.PRIV_NOT_GRANTED : 91'
>     - 'box.error.USER_MAX : 56'
> -  - 'box.error.PROTOCOL : 104'
> +  - 'box.error.BEFORE_REPLACE_RET : 53'
>     - 'box.error.TUPLE_NOT_ARRAY : 22'
>     - 'box.error.KEY_PART_COUNT : 31'
>     - 'box.error.ALTER_SPACE : 12'
>     - 'box.error.ACTIVE_TRANSACTION : 79'
>     - 'box.error.EXACT_FIELD_COUNT : 38'
>     - 'box.error.DROP_SEQUENCE : 144'
> -  - 'box.error.INVALID_MSGPACK : 20'
>     - 'box.error.MORE_THAN_ONE_TUPLE : 41'
> -  - 'box.error.RTREE_RECT : 101'
> -  - 'box.error.SUB_STMT_MAX : 121'
> +  - 'box.error.UPSERT_UNIQUE_SECONDARY_KEY : 105'
>     - 'box.error.UNKNOWN_REQUEST_TYPE : 48'
> -  - 'box.error.SPACE_EXISTS : 10'
> +  - 'box.error.SUB_STMT_MAX : 121'
>     - 'box.error.PROC_LUA : 32'
> +  - 'box.error.SPACE_EXISTS : 10'
>     - 'box.error.ROLE_NOT_GRANTED : 92'
> +  - 'box.error.UNSUPPORTED : 5'
>     - 'box.error.NO_SUCH_SPACE : 36'
>     - 'box.error.WRONG_INDEX_PARTS : 107'
> -  - 'box.error.DROP_SPACE : 11'
>     - 'box.error.MIN_FIELD_COUNT : 39'
>     - 'box.error.REPLICASET_UUID_MISMATCH : 63'
>     - 'box.error.UPDATE_FIELD : 29'
> +  - 'box.error.INDEX_EXISTS : 85'
>     - 'box.error.COMPRESSION : 119'
>     - 'box.error.INVALID_ORDER : 68'
> -  - 'box.error.INDEX_EXISTS : 85'
>     - 'box.error.SPLICE : 25'
>     - 'box.error.UNKNOWN : 0'
> +  - 'box.error.IDENTIFIER : 70'
>     - 'box.error.DROP_PRIMARY_KEY : 17'
>     - 'box.error.NULLABLE_PRIMARY : 152'
>     - 'box.error.NO_SUCH_SEQUENCE : 145'
>     - 'box.error.RELOAD_CFG : 58'
>     - 'box.error.INVALID_UUID : 64'
> -  - 'box.error.INJECTION : 8'
> +  - 'box.error.DROP_SPACE : 11'
>     - 'box.error.TIMEOUT : 78'
> -  - 'box.error.IDENTIFIER : 70'
>     - 'box.error.ITERATOR_TYPE : 72'
>     - 'box.error.REPLICA_MAX : 73'
> +  - 'box.error.NO_SUCH_ROLE : 82'
>     - 'box.error.MISSING_REQUEST_FIELD : 69'
>     - 'box.error.MISSING_SNAPSHOT : 93'
>     - 'box.error.WRONG_SPACE_OPTIONS : 111'
>     - 'box.error.READONLY : 7'
> -  - 'box.error.UNSUPPORTED : 5'
>     - 'box.error.UPDATE_INTEGER_OVERFLOW : 95'
> +  - 'box.error.RTREE_RECT : 101'
>     - 'box.error.NO_CONNECTION : 77'
>     - 'box.error.INVALID_XLOG_ORDER : 76'
> -  - 'box.error.UPSERT_UNIQUE_SECONDARY_KEY : 105'
> -  - 'box.error.ROLLBACK_IN_SUB_STMT : 123'
>     - 'box.error.WRONG_SCHEMA_VERSION : 109'
> -  - 'box.error.UNSUPPORTED_INDEX_FEATURE : 112'
> -  - 'box.error.INDEX_PART_TYPE_MISMATCH : 24'
> +  - 'box.error.ROLLBACK_IN_SUB_STMT : 123'
> +  - 'box.error.PROTOCOL : 104'
>     - 'box.error.INVALID_XLOG_TYPE : 125'
> +  - 'box.error.INDEX_PART_TYPE_MISMATCH : 24'
> +  - 'box.error.UNSUPPORTED_INDEX_FEATURE : 112'
>   ...
>   test_run:cmd("setopt delimiter ''");
>   ---
> diff --git a/test/box/select.result b/test/box/select.result
> index 4aed70630..6e6ccade1 100644
> --- a/test/box/select.result
> +++ b/test/box/select.result
> @@ -619,6 +619,11 @@ collectgarbage('collect')
>   ---
>   - 0
>   ...
> +-- gh-3224 resurrect tuple bigrefs
> +collectgarbage('stop')
> +---
> +- 0
> +...
>   s = box.schema.space.create('select', { temporary = true })
>   ---
>   ...
> @@ -628,22 +633,37 @@ index = s:create_index('primary', { type = 'tree' })
>   a = s:insert{0}
>   ---
>   ...
> -lots_of_links = {}
> +lots_of_links = setmetatable({}, {__mode = 'v'})
> +---
> +...
> +ref_count = 0
> +---
> +...
> +while (ref_count < 100000) do table.insert(lots_of_links, s:get{0}) ref_count = ref_count + 1 end
>   ---
>   ...
> +ref_count
> +---
> +- 100000
> +...
>   ref_count = 0
>   ---
>   ...
> -while (true) do table.insert(lots_of_links, s:get{0}) ref_count = ref_count + 1 end
> +for k, v in pairs(lots_of_links) do ref_count = ref_count + 1 end
>   ---
> -- error: Tuple reference counter overflow
>   ...
>   ref_count
>   ---
> -- 65531
> +- 100000
> +...
> +-- check that tuples are deleted after gc is activated
> +collectgarbage('collect')
> +---
> +- 0
>   ...
> -lots_of_links = {}
> +lots_of_links
>   ---
> +- []
>   ...
>   s:drop()
>   ---
> diff --git a/test/box/select.test.lua b/test/box/select.test.lua
> index 54c2ecc1c..f15c6e275 100644
> --- a/test/box/select.test.lua
> +++ b/test/box/select.test.lua
> @@ -124,12 +124,21 @@ test.random(s.index[0], 48)
>   s:drop()
> 
>   collectgarbage('collect')
> +
> +-- gh-3224 resurrect tuple bigrefs
> +
> +collectgarbage('stop')
>   s = box.schema.space.create('select', { temporary = true })
>   index = s:create_index('primary', { type = 'tree' })
>   a = s:insert{0}
> -lots_of_links = {}
> +lots_of_links = setmetatable({}, {__mode = 'v'})
>   ref_count = 0
> -while (true) do table.insert(lots_of_links, s:get{0}) ref_count = ref_count + 1 end
> +while (ref_count < 100000) do table.insert(lots_of_links, s:get{0}) ref_count = ref_count + 1 end
>   ref_count
> -lots_of_links = {}
> +ref_count = 0
> +for k, v in pairs(lots_of_links) do ref_count = ref_count + 1 end
> +ref_count
> +-- check that tuples are deleted after gc is activated
> +collectgarbage('collect')
> +lots_of_links
>   s:drop()
> 
> 

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [tarantool-patches] Re: [PATCH v2 1/1] box: create bigrefs for tuples
  2018-06-04 14:21   ` Imeev Mergen
@ 2018-06-04 20:57     ` Vladislav Shpilevoy
  2018-06-04 21:20       ` Vladislav Shpilevoy
                         ` (2 more replies)
  0 siblings, 3 replies; 11+ messages in thread
From: Vladislav Shpilevoy @ 2018-06-04 20:57 UTC (permalink / raw)
  To: tarantool-patches, Vladimir Davydov; +Cc: Imeev Mergen

Thanks for the patch, Mergen! Please look at my 5 comments.

Vova, please, take a look on the patch. I have pasted it at the end
of the letter.

On 04/06/2018 17:21, Imeev Mergen wrote:
> 
> 
> On 06/04/2018 02:21 PM, Vladislav Shpilevoy wrote:
>> Hello. Thanks for the patch! It is almost perfect! See 6 comments
>> below.
>>
>> On 01/06/2018 18:41, imeevma@tarantool.org wrote:
>>> Due to limitation of reference counters for tuple being only
>>> 65535 it was possible to reach this limitation. This patch
>>> increases capacity of reference counters to 4 billions.
>>>
>>> Closes #3224
>>> ---
>>> Branch: https://github.com/tarantool/tarantool/tree/imeevma/gh-3224-tuple-bigrefs
>>> Issue: https://github.com/tarantool/tarantool/issues/3224
>>>
> 
> commit 7b7965f6c5462142a607d9db542d698fc94fbbbe
> Author: Mergen Imeev <imeevma@gmail.com>
> Date:   Mon May 28 19:17:51 2018 +0300
> 
>      box: create bigrefs for tuples
> 
>      Due to limitation of reference counters for tuple being only
>      65535 it was possible to reach this limitation. This patch
>      increases capacity of reference counters to 4 billions.
> 
>      Closes #3224
> 
> diff --git a/src/box/tuple.c b/src/box/tuple.c
> index 014f374..6e63433 100644
> --- a/src/box/tuple.c
> +++ b/src/box/tuple.c
> @@ -48,6 +48,26 @@ enum {
>       OBJSIZE_MIN = 16,
>   };
> 
> +/**
> + * Container for big reference counters. Contains array of big
> + * reference counters, size of this array and number of non-zero
> + * big reference counters. When reference counter of tuple becomes
> + * more than 32767, field refs of this tuple becomes index of big
> + * reference counter in big reference counter array and field
> + * is_bigref is set true. The moment big reference becomes equal
> + * 32767 it is set to 0, refs of the tuple becomes 32767 and
> + * is_bigref becomes false. Big reference counter can be equal to
> + * 0 or be more than 32767.
> + */
> +struct bigref_list {
> +    /** Array of big reference counters */
> +    uint32_t *refs;
> +    /** Number of non-zero elements in the array */
> +    uint16_t size;
> +    /** Capacity of the array */
> +    uint16_t capacity;
> +};

1. Instead of

     struct name {
         ...
     };
     static struct name name;

you can do this

     static struct name {
         ...
     } name;

> +
>   static const double ALLOC_FACTOR = 1.05;
> 
>   /**
> @@ -151,6 +177,15 @@ tuple_validate_raw(struct tuple_format *format, const char *tuple)
>       return 0;
>   }
> 
> +/** Initialize big references container. */
> +static inline void
> +bigref_list_create()
> +{
> +    bigref_list.size = 0;
> +    bigref_list.refs = NULL;
> +    bigref_list.capacity = 0;

2. Memset is faster than per-member resetting. We with
Vladimir Davydov have benched it.

> +/**
> + * Return index for new big reference counter and allocate memory
> + * if needed.
> + * @retval index for new big reference counter.
> + */
> +static inline uint16_t
> +tuple_bigref_retrieve()
> +{
> +    if (bigref_list.size < bigref_list.capacity) {
> +        uint16_t vacant_index = 0;
> +        while (bigref_list.refs[vacant_index] != 0)
> +            ++vacant_index;
> +        return vacant_index;
> +    }
> +    /* Extend the array. */
> +    uint16_t capacity = bigref_list.capacity;
> +    if (capacity == 0) {
> +        capacity = TUPLE_BIGREF_MIN_CAPACITY;
> +    } else {
> +        capacity *= TUPLE_BIGREF_FACTOR;
> +        if (capacity >= TUPLE_BIGREF_MAX_CAPACITY)

3. By such check you have limited the capacity with 16384 bigrefs instead of
32k. Capacity here is always a power of 2. TUPLE_BIGREF_MAX_CAPACITY is
2^15. So here after 2^14 you get overflow. 2^14 = 16384. I have fixed it on
the branch.

> +            panic("Too many big references");
> +    }
> +    uint32_t *refs = (uint32_t *) realloc(bigref_list.refs, capacity *
> +                          sizeof(*bigref_list.refs));
> +    if (refs == NULL) {
> +        panic("failed to reallocate %zu bytes: Cannot allocate"\
> +              " memory.", capacity * sizeof(*bigref_list.refs));
> +    }
> +    bigref_list.refs = refs;
> +    memset(bigref_list.refs + bigref_list.capacity, 0, capacity -
> +           bigref_list.capacity);
> +    bigref_list.capacity = capacity;
> +    return bigref_list.size++;
> +}
> +
> +/**
> + * Try to decrease allocated memory if it is possible. Free memory
> + * when size == 0.
> + */
> +static inline void
> +bigref_list_compact()
> +{
> +    if (bigref_list.size == 0) {
> +        bigref_list_destroy();
> +        return;
> +    }
> +    if (bigref_list.capacity == TUPLE_BIGREF_MIN_CAPACITY ||
> +        bigref_list.size > bigref_list.capacity / TUPLE_BIGREF_FACTOR)
> +        return;
> +    uint16_t capacity = bigref_list.capacity;
> +    while (bigref_list.refs[capacity - 1] == 0)
> +        capacity--;
> +    if (capacity < TUPLE_BIGREF_MIN_CAPACITY)
> +        capacity = TUPLE_BIGREF_MIN_CAPACITY;
> +    if (capacity > bigref_list.capacity / TUPLE_BIGREF_FACTOR)
> +        return;
> +    /* Round up capacity to the next highest power of 2*/
> +    capacity--;
> +    capacity |= capacity >> 1;
> +    capacity |= capacity >> 2;
> +    capacity |= capacity >> 4;
> +    capacity |= capacity >> 8;
> +    capacity |= capacity >> 16;

4. This is because blind copy and paste is bad. The algorithm implementation
I provided to you was for 32 bit unsigned integers, but here you have 16 bits
only, so capacity >> 16 is always 0.

> +    capacity++;
> +    uint32_t *refs = (uint32_t *) realloc(bigref_list.refs, capacity *
> +                          sizeof(*bigref_list.refs));
> +    if (refs == NULL)
> +        panic("failed to reallocate %zu bytes: Cannot allocate"\
> +              " memory.", capacity * sizeof(*bigref_list.refs));
> +    bigref_list.refs = refs;
> +    bigref_list.capacity = capacity;
> +}
> +

5. I have removed tuple_bless == NULL checks from all the code.

Here the patch:

============================================================================

commit 9d82fa9f9ae24c7dc91cf9dd7a416ec533fb8147
Author: Mergen Imeev <imeevma@gmail.com>
Date:   Mon May 28 19:17:51 2018 +0300

     box: create bigrefs for tuples
     
     Due to limitation of reference counters for tuple being only
     65535 it was possible to reach this limitation. This patch
     increases capacity of reference counters to 4 billions.
     
     Closes #3224

diff --git a/src/box/box.cc b/src/box/box.cc
index c728a4c53..42578613d 100644
--- a/src/box/box.cc
+++ b/src/box/box.cc
@@ -174,20 +174,22 @@ process_rw(struct request *request, struct space *space, struct tuple **result)
  		txn_rollback_stmt();
  		return -1;
  	}
+	if (result == NULL)
+		return txn_commit_stmt(txn, request);
+	*result = tuple;
+	if (tuple == NULL)
+		return txn_commit_stmt(txn, request);
  	/*
  	 * Pin the tuple locally before the commit,
  	 * otherwise it may go away during yield in
  	 * when WAL is written in autocommit mode.
  	 */
-	TupleRefNil ref(tuple);
-	if (txn_commit_stmt(txn, request) != 0)
-		return -1;
-	if (result != NULL) {
-		if (tuple != NULL && tuple_bless(tuple) == NULL)
-			return -1;
-		*result = tuple;
-	}
-	return 0;
+	tuple_ref(tuple);
+	int rc = txn_commit_stmt(txn, request);
+	if (rc == 0)
+		tuple_bless(tuple);
+	tuple_unref(tuple);
+	return rc;
  }
  
  void
diff --git a/src/box/errcode.h b/src/box/errcode.h
index a0759f8f4..e009524ad 100644
--- a/src/box/errcode.h
+++ b/src/box/errcode.h
@@ -138,7 +138,7 @@ struct errcode_record {
  	/* 83 */_(ER_ROLE_EXISTS,		"Role '%s' already exists") \
  	/* 84 */_(ER_CREATE_ROLE,		"Failed to create role '%s': %s") \
  	/* 85 */_(ER_INDEX_EXISTS,		"Index '%s' already exists") \
-	/* 86 */_(ER_TUPLE_REF_OVERFLOW,	"Tuple reference counter overflow") \
+	/* 86 */_(ER_UNUSED6,			"") \
  	/* 87 */_(ER_ROLE_LOOP,			"Granting role '%s' to role '%s' would create a loop") \
  	/* 88 */_(ER_GRANT,			"Incorrect grant arguments: %s") \
  	/* 89 */_(ER_PRIV_GRANTED,		"User '%s' already has %s access on %s '%s'") \
diff --git a/src/box/index.cc b/src/box/index.cc
index 3c62ec1cc..f992bc94c 100644
--- a/src/box/index.cc
+++ b/src/box/index.cc
@@ -220,8 +220,8 @@ box_index_random(uint32_t space_id, uint32_t index_id, uint32_t rnd,
  	/* No tx management, random() is for approximation anyway. */
  	if (index_random(index, rnd, result) != 0)
  		return -1;
-	if (*result != NULL && tuple_bless(*result) == NULL)
-		return -1;
+	if (*result != NULL)
+		tuple_bless(*result);
  	return 0;
  }
  
@@ -253,8 +253,8 @@ box_index_get(uint32_t space_id, uint32_t index_id, const char *key,
  	txn_commit_ro_stmt(txn);
  	/* Count statistics. */
  	rmean_collect(rmean_box, IPROTO_SELECT, 1);
-	if (*result != NULL && tuple_bless(*result) == NULL)
-		return -1;
+	if (*result != NULL)
+		tuple_bless(*result);
  	return 0;
  }
  
@@ -285,8 +285,8 @@ box_index_min(uint32_t space_id, uint32_t index_id, const char *key,
  		return -1;
  	}
  	txn_commit_ro_stmt(txn);
-	if (*result != NULL && tuple_bless(*result) == NULL)
-		return -1;
+	if (*result != NULL)
+		tuple_bless(*result);
  	return 0;
  }
  
@@ -317,8 +317,8 @@ box_index_max(uint32_t space_id, uint32_t index_id, const char *key,
  		return -1;
  	}
  	txn_commit_ro_stmt(txn);
-	if (*result != NULL && tuple_bless(*result) == NULL)
-		return -1;
+	if (*result != NULL)
+		tuple_bless(*result);
  	return 0;
  }
  
@@ -397,8 +397,8 @@ box_iterator_next(box_iterator_t *itr, box_tuple_t **result)
  	assert(result != NULL);
  	if (iterator_next(itr, result) != 0)
  		return -1;
-	if (*result != NULL && tuple_bless(*result) == NULL)
-		return -1;
+	if (*result != NULL)
+		tuple_bless(*result);
  	return 0;
  }
  
diff --git a/src/box/lua/tuple.c b/src/box/lua/tuple.c
index 80573313d..22fe69611 100644
--- a/src/box/lua/tuple.c
+++ b/src/box/lua/tuple.c
@@ -496,10 +496,7 @@ luaT_pushtuple(struct lua_State *L, box_tuple_t *tuple)
  		luaL_pushcdata(L, CTID_CONST_STRUCT_TUPLE_REF);
  	*ptr = tuple;
  	/* The order is important - first reference tuple, next set gc */
-	if (box_tuple_ref(tuple) != 0) {
-		luaT_error(L);
-		return;
-	}
+	box_tuple_ref(tuple);
  	lua_pushcfunction(L, lbox_tuple_gc);
  	luaL_setcdatagc(L, -2);
  }
diff --git a/src/box/port.c b/src/box/port.c
index 03f6be79d..9b6b8580c 100644
--- a/src/box/port.c
+++ b/src/box/port.c
@@ -45,8 +45,7 @@ port_tuple_add(struct port *base, struct tuple *tuple)
  	struct port_tuple *port = port_tuple(base);
  	struct port_tuple_entry *e;
  	if (port->size == 0) {
-		if (tuple_ref(tuple) != 0)
-			return -1;
+		tuple_ref(tuple);
  		e = &port->first_entry;
  		port->first = port->last = e;
  	} else {
@@ -55,10 +54,7 @@ port_tuple_add(struct port *base, struct tuple *tuple)
  			diag_set(OutOfMemory, sizeof(*e), "mempool_alloc", "e");
  			return -1;
  		}
-		if (tuple_ref(tuple) != 0) {
-			mempool_free(&port_tuple_entry_pool, e);
-			return -1;
-		}
+		tuple_ref(tuple);
  		port->last->next = e;
  		port->last = e;
  	}
diff --git a/src/box/tuple.c b/src/box/tuple.c
index 014f37406..5fc1c1283 100644
--- a/src/box/tuple.c
+++ b/src/box/tuple.c
@@ -48,6 +48,26 @@ enum {
  	OBJSIZE_MIN = 16,
  };
  
+/**
+ * Container for big reference counters. Contains array of big
+ * reference counters, size of this array and number of non-zero
+ * big reference counters. When reference counter of tuple becomes
+ * more than 32767, field refs of this tuple becomes index of big
+ * reference counter in big reference counter array and field
+ * is_bigref is set true. The moment big reference becomes equal
+ * 32767 it is set to 0, refs of the tuple becomes 32767 and
+ * is_bigref becomes false. Big reference counter can be equal to
+ * 0 or be more than 32767.
+ */
+static struct bigref_list {
+	/** Array of big reference counters. */
+	uint32_t *refs;
+	/** Number of non-zero elements in the array. */
+	uint16_t size;
+	/** Capacity of the array. */
+	uint16_t capacity;
+} bigref_list;
+
  static const double ALLOC_FACTOR = 1.05;
  
  /**
@@ -151,6 +171,13 @@ tuple_validate_raw(struct tuple_format *format, const char *tuple)
  	return 0;
  }
  
+/** Initialize big references container. */
+static inline void
+bigref_list_create()
+{
+	memset(&bigref_list, 0, sizeof(bigref_list));
+}
+
  /**
   * Incremented on every snapshot and is used to distinguish tuples
   * which were created after start of a snapshot (these tuples can
@@ -211,6 +238,8 @@ tuple_init(field_name_hash_f hash)
  
  	box_tuple_last = NULL;
  
+	bigref_list_create();
+
  	if (coll_id_cache_init() != 0)
  		return -1;
  
@@ -244,6 +273,131 @@ tuple_arena_create(struct slab_arena *arena, struct quota *quota,
  	}
  }
  
+enum {
+	BIGREF_FACTOR = 2,
+	BIGREF_MAX = UINT32_MAX,
+	BIGREF_MIN_CAPACITY = 16,
+	/**
+	 * Only 15 bits are available for bigref list index in
+	 * struct tuple.
+	 */
+	BIGREF_MAX_CAPACITY = UINT16_MAX >> 1
+};
+
+/** Destroy big references and free memory that was allocated. */
+static inline void
+bigref_list_destroy()
+{
+	free(bigref_list.refs);
+	bigref_list_create();
+}
+
+/**
+ * Return index for new big reference counter and allocate memory
+ * if needed.
+ * @retval index for new big reference counter.
+ */
+static inline uint16_t
+bigref_list_new_index()
+{
+	if (bigref_list.size < bigref_list.capacity) {
+		uint16_t vacant_index = 0;
+		while (bigref_list.refs[vacant_index] != 0)
+			++vacant_index;
+		return vacant_index;
+	}
+	/* Extend the array. */
+	uint16_t capacity = bigref_list.capacity;
+	if (capacity == 0)
+		capacity = BIGREF_MIN_CAPACITY;
+	else if (capacity < BIGREF_MAX_CAPACITY)
+		capacity = MIN(capacity * BIGREF_FACTOR, BIGREF_MAX_CAPACITY);
+	else
+		panic("Too many big references");
+	uint32_t *refs = (uint32_t *) realloc(bigref_list.refs, capacity *
+					      sizeof(*bigref_list.refs));
+	if (refs == NULL) {
+		panic("failed to reallocate %zu bytes: Cannot allocate "\
+		      "memory.", capacity * sizeof(*bigref_list.refs));
+	}
+	bigref_list.refs = refs;
+	memset(bigref_list.refs + bigref_list.capacity, 0, capacity -
+	       bigref_list.capacity);
+	bigref_list.capacity = capacity;
+	return bigref_list.size++;
+}
+
+void
+tuple_ref_slow(struct tuple *tuple)
+{
+	assert(tuple->is_bigref || tuple->refs == TUPLE_REF_MAX);
+	if (! tuple->is_bigref) {
+		tuple->ref_index = bigref_list_new_index();
+		tuple->is_bigref = true;
+		bigref_list.refs[tuple->ref_index] = TUPLE_REF_MAX;
+	} else if (bigref_list.refs[tuple->ref_index] == BIGREF_MAX) {
+		panic("Tuple big reference counter overflow");
+	}
+	bigref_list.refs[tuple->ref_index]++;
+}
+
+/**
+ * Try to decrease allocated memory if it is possible. Free memory
+ * when size == 0.
+ */
+static inline void
+bigref_list_delete_index(uint16_t index)
+{
+	bigref_list.refs[index] = 0;
+	if (--bigref_list.size == 0) {
+		bigref_list_destroy();
+		return;
+	}
+	if (bigref_list.capacity == BIGREF_MIN_CAPACITY ||
+	    bigref_list.size > bigref_list.capacity / BIGREF_FACTOR)
+		return;
+
+	uint16_t top_index = bigref_list.capacity - 1;
+	while (bigref_list.refs[top_index] == 0)
+		top_index--;
+
+	uint16_t needed_capacity = top_index + 1;
+	if (needed_capacity < BIGREF_MIN_CAPACITY)
+		needed_capacity = BIGREF_MIN_CAPACITY;
+	if (needed_capacity > bigref_list.capacity / BIGREF_FACTOR)
+		return;
+	/* Round up capacity to the next highest power of 2. */
+	assert(sizeof(needed_capacity) == sizeof(uint16_t));
+	needed_capacity--;
+	needed_capacity |= needed_capacity >> 1;
+	needed_capacity |= needed_capacity >> 2;
+	needed_capacity |= needed_capacity >> 4;
+	needed_capacity |= needed_capacity >> 8;
+	assert(needed_capacity < UINT16_MAX);
+	needed_capacity++;
+	uint32_t *refs =
+		(uint32_t *) realloc(bigref_list.refs, needed_capacity *
+				     sizeof(*bigref_list.refs));
+	if (refs == NULL) {
+		panic("failed to reallocate %zu bytes: Cannot allocate "\
+		      "memory.", needed_capacity * sizeof(*bigref_list.refs));
+	}
+	bigref_list.refs = refs;
+	bigref_list.capacity = needed_capacity;
+}
+
+void
+tuple_unref_slow(struct tuple *tuple)
+{
+	assert(tuple->is_bigref &&
+	       bigref_list.refs[tuple->ref_index] > TUPLE_REF_MAX);
+	if(--bigref_list.refs[tuple->ref_index] == TUPLE_REF_MAX) {
+		bigref_list_delete_index(tuple->ref_index);
+		tuple->ref_index = TUPLE_REF_MAX;
+		tuple->is_bigref = false;
+	}
+}
+
  void
  tuple_arena_destroy(struct slab_arena *arena)
  {
@@ -265,6 +419,8 @@ tuple_free(void)
  	tuple_format_free();
  
  	coll_id_cache_destroy();
+
+	bigref_list_destroy();
  }
  
  box_tuple_format_t *
@@ -288,7 +444,8 @@ int
  box_tuple_ref(box_tuple_t *tuple)
  {
  	assert(tuple != NULL);
-	return tuple_ref(tuple);
+	tuple_ref(tuple);
+	return 0;
  }
  
  void
@@ -357,10 +514,7 @@ box_tuple_iterator(box_tuple_t *tuple)
  			 "mempool", "new slab");
  		return NULL;
  	}
-	if (tuple_ref(tuple) != 0) {
-		mempool_free(&tuple_iterator_pool, it);
-		return NULL;
-	}
+	tuple_ref(tuple);
  	tuple_rewind(it, tuple);
  	return it;
  }
@@ -451,7 +605,6 @@ box_tuple_new(box_tuple_format_t *format, const char *data, const char *end)
  	struct tuple *ret = tuple_new(format, data, end);
  	if (ret == NULL)
  		return NULL;
-	/* Can't fail on zero refs. */
  	return tuple_bless(ret);
  }
  
diff --git a/src/box/tuple.h b/src/box/tuple.h
index e2384dda0..14dbd4094 100644
--- a/src/box/tuple.h
+++ b/src/box/tuple.h
@@ -105,8 +105,7 @@ typedef struct tuple box_tuple_t;
   * tuple will leak.
   *
   * \param tuple a tuple
- * \retval -1 on error (check box_error_last())
- * \retval 0 on success
+ * \retval 0 always
   * \sa box_tuple_unref()
   */
  int
@@ -269,8 +268,7 @@ box_tuple_next(box_tuple_iterator_t *it);
   * Use box_tuple_format_default() to create space-independent tuple.
   * \param data tuple data in MsgPack Array format ([field1, field2, ...]).
   * \param end the end of \a data
- * \retval NULL on out of memory
- * \retval tuple otherwise
+ * \retval tuple
   * \pre data, end is valid MsgPack Array
   * \sa \code box.tuple.new(data) \endcode
   */
@@ -307,9 +305,17 @@ box_tuple_upsert(const box_tuple_t *tuple, const char *expr, const
   */
  struct PACKED tuple
  {
-	/** reference counter */
-	uint16_t refs;
-	/** format identifier */
+	union {
+		/** Reference counter. */
+		uint16_t refs;
+		struct {
+			/** Index of big reference counter. */
+			uint16_t ref_index : 15;
+			/** Big reference flag. */
+			bool is_bigref : 1;
+		};
+	};
+	/** Format identifier. */
  	uint16_t format_id;
  	/**
  	 * Length of the MessagePack data in raw part of the
@@ -774,25 +780,35 @@ tuple_field_uuid(const struct tuple *tuple, int fieldno,
  	return 0;
  }
  
-enum { TUPLE_REF_MAX = UINT16_MAX };
+enum { TUPLE_REF_MAX = UINT16_MAX >> 1 };
+
+/**
+ * Increase tuple big reference counter.
+ * @param tuple Tuple to reference.
+ */
+void
+tuple_ref_slow(struct tuple *tuple);
  
  /**
   * Increment tuple reference counter.
   * @param tuple Tuple to reference.
- * @retval  0 Success.
- * @retval -1 Too many refs error.
   */
-static inline int
+static inline void
  tuple_ref(struct tuple *tuple)
  {
-	if (tuple->refs + 1 > TUPLE_REF_MAX) {
-		diag_set(ClientError, ER_TUPLE_REF_OVERFLOW);
-		return -1;
-	}
-	tuple->refs++;
-	return 0;
+	if (unlikely(tuple->refs >= TUPLE_REF_MAX))
+		tuple_ref_slow(tuple);
+	else
+		tuple->refs++;
  }
  
+/**
+ * Decrease tuple big reference counter.
+ * @param tuple Tuple to reference.
+ */
+void
+tuple_unref_slow(struct tuple *tuple);
+
  /**
   * Decrement tuple reference counter. If it has reached zero, free the tuple.
   *
@@ -802,10 +818,9 @@ static inline void
  tuple_unref(struct tuple *tuple)
  {
  	assert(tuple->refs - 1 >= 0);
-
-	tuple->refs--;
-
-	if (tuple->refs == 0)
+	if (unlikely(tuple->is_bigref))
+		tuple_unref_slow(tuple);
+	else if (--tuple->refs == 0)
  		tuple_delete(tuple);
  }
  
@@ -813,25 +828,18 @@ extern struct tuple *box_tuple_last;
  
  /**
   * Convert internal `struct tuple` to public `box_tuple_t`.
- * \retval tuple on success
- * \retval NULL on error, check diag
+ * \retval tuple
   * \post \a tuple ref counted until the next call.
- * \post tuple_ref() doesn't fail at least once
   * \sa tuple_ref
   */
  static inline box_tuple_t *
  tuple_bless(struct tuple *tuple)
  {
  	assert(tuple != NULL);
-	/* Ensure tuple can be referenced at least once after return */
-	if (tuple->refs + 2 > TUPLE_REF_MAX) {
-		diag_set(ClientError, ER_TUPLE_REF_OVERFLOW);
-		return NULL;
-	}
-	tuple->refs++;
+	tuple_ref(tuple);
  	/* Remove previous tuple */
  	if (likely(box_tuple_last != NULL))
-		tuple_unref(box_tuple_last); /* do not throw */
+		tuple_unref(box_tuple_last);
  	/* Remember current tuple */
  	box_tuple_last = tuple;
  	return tuple;
@@ -849,41 +857,6 @@ tuple_to_buf(const struct tuple *tuple, char *buf, size_t size);
  #include "tuple_update.h"
  #include "errinj.h"
  
-/**
- * \copydoc tuple_ref()
- * \throws if overflow detected.
- */
-static inline void
-tuple_ref_xc(struct tuple *tuple)
-{
-	if (tuple_ref(tuple))
-		diag_raise();
-}
-
-/**
- * \copydoc tuple_bless
- * \throw ER_TUPLE_REF_OVERFLOW
- */
-static inline box_tuple_t *
-tuple_bless_xc(struct tuple *tuple)
-{
-	box_tuple_t *blessed = tuple_bless(tuple);
-	if (blessed == NULL)
-		diag_raise();
-	return blessed;
-}
-
-/** Make tuple references exception-friendly in absence of @finally. */
-struct TupleRefNil {
-	struct tuple *tuple;
-	TupleRefNil (struct tuple *arg) :tuple(arg)
-	{ if (tuple) tuple_ref_xc(tuple); }
-	~TupleRefNil() { if (tuple) tuple_unref(tuple); }
-
-	TupleRefNil(const TupleRefNil&) = delete;
-	void operator=(const TupleRefNil&) = delete;
-};
-
  /* @copydoc tuple_field_with_type() */
  static inline const char *
  tuple_field_with_type_xc(const struct tuple *tuple, uint32_t fieldno,
diff --git a/src/box/vinyl.c b/src/box/vinyl.c
index f0d26874e..dc0d02054 100644
--- a/src/box/vinyl.c
+++ b/src/box/vinyl.c
@@ -3822,7 +3822,6 @@ vinyl_iterator_primary_next(struct iterator *base, struct tuple **ret)
  	assert(base->next = vinyl_iterator_primary_next);
  	struct vinyl_iterator *it = (struct vinyl_iterator *)base;
  	assert(it->lsm->index_id == 0);
-	struct tuple *tuple;
  
  	if (it->tx == NULL) {
  		diag_set(ClientError, ER_CURSOR_NO_TRANSACTION);
@@ -3833,18 +3832,15 @@ vinyl_iterator_primary_next(struct iterator *base, struct tuple **ret)
  		goto fail;
  	}
  
-	if (vy_read_iterator_next(&it->iterator, &tuple) != 0)
+	if (vy_read_iterator_next(&it->iterator, ret) != 0)
  		goto fail;
-
-	if (tuple == NULL) {
+	if (*ret == NULL) {
  		/* EOF. Close the iterator immediately. */
  		vinyl_iterator_close(it);
-		*ret = NULL;
-		return 0;
+	} else {
+		tuple_bless(*ret);
  	}
-	*ret = tuple_bless(tuple);
-	if (*ret != NULL)
-		return 0;
+	return 0;
  fail:
  	vinyl_iterator_close(it);
  	return -1;
@@ -3890,11 +3886,10 @@ next:
  	 * Note, there's no need in vy_tx_track() as the
  	 * tuple is already tracked in the secondary index.
  	 */
-	struct tuple *full_tuple;
  	if (vy_point_lookup(it->lsm->pk, it->tx, vy_tx_read_view(it->tx),
-			    tuple, &full_tuple) != 0)
+			    tuple, ret) != 0)
  		goto fail;
-	if (full_tuple == NULL) {
+	if (*ret == NULL) {
  		/*
  		 * All indexes of a space must be consistent, i.e.
  		 * if a tuple is present in one index, it must be
@@ -3908,10 +3903,9 @@ next:
  			 vy_lsm_name(it->lsm), vy_stmt_str(tuple));
  		goto next;
  	}
-	*ret = tuple_bless(full_tuple);
-	tuple_unref(full_tuple);
-	if (*ret != NULL)
-		return 0;
+	tuple_bless(*ret);
+	tuple_unref(*ret);
+	return 0;
  fail:
  	vinyl_iterator_close(it);
  	return -1;
@@ -3997,16 +3991,12 @@ vinyl_index_get(struct index *index, const char *key,
  	const struct vy_read_view **rv = (tx != NULL ? vy_tx_read_view(tx) :
  					  &env->xm->p_global_read_view);
  
-	struct tuple *tuple;
-	if (vy_lsm_full_by_key(lsm, tx, rv, key, part_count, &tuple) != 0)
+	if (vy_lsm_full_by_key(lsm, tx, rv, key, part_count, ret) != 0)
  		return -1;
-
-	if (tuple != NULL) {
-		*ret = tuple_bless(tuple);
-		tuple_unref(tuple);
-		return *ret == NULL ? -1 : 0;
+	if (*ret != NULL) {
+		tuple_bless(*ret);
+		tuple_unref(*ret);
  	}
-	*ret = NULL;
  	return 0;
  }
  
diff --git a/test/box/misc.result b/test/box/misc.result
index 8f94f5513..f7703ba8d 100644
--- a/test/box/misc.result
+++ b/test/box/misc.result
@@ -332,7 +332,6 @@ t;
    - 'box.error.UNKNOWN_UPDATE_OP : 28'
    - 'box.error.WRONG_COLLATION_OPTIONS : 151'
    - 'box.error.CURSOR_NO_TRANSACTION : 80'
-  - 'box.error.TUPLE_REF_OVERFLOW : 86'
    - 'box.error.ALTER_SEQUENCE : 143'
    - 'box.error.INVALID_XLOG_NAME : 75'
    - 'box.error.SAVEPOINT_EMPTY_TX : 60'
@@ -360,7 +359,7 @@ t;
    - 'box.error.VINYL_MAX_TUPLE_SIZE : 139'
    - 'box.error.LOAD_FUNCTION : 99'
    - 'box.error.INVALID_XLOG : 74'
-  - 'box.error.PRIV_NOT_GRANTED : 91'
+  - 'box.error.READ_VIEW_ABORTED : 130'
    - 'box.error.TRANSACTION_CONFLICT : 97'
    - 'box.error.GUEST_USER_PASSWORD : 96'
    - 'box.error.PROC_C : 102'
@@ -405,7 +404,7 @@ t;
    - 'box.error.injection : table: <address>
    - 'box.error.NULLABLE_MISMATCH : 153'
    - 'box.error.LAST_DROP : 15'
-  - 'box.error.NO_SUCH_ROLE : 82'
+  - 'box.error.TUPLE_FORMAT_LIMIT : 16'
    - 'box.error.DECOMPRESSION : 124'
    - 'box.error.CREATE_SEQUENCE : 142'
    - 'box.error.CREATE_USER : 43'
@@ -414,66 +413,66 @@ t;
    - 'box.error.SEQUENCE_OVERFLOW : 147'
    - 'box.error.SYSTEM : 115'
    - 'box.error.KEY_PART_IS_TOO_LONG : 118'
-  - 'box.error.TUPLE_FORMAT_LIMIT : 16'
-  - 'box.error.BEFORE_REPLACE_RET : 53'
+  - 'box.error.INJECTION : 8'
+  - 'box.error.INVALID_MSGPACK : 20'
    - 'box.error.NO_SUCH_SAVEPOINT : 61'
    - 'box.error.TRUNCATE_SYSTEM_SPACE : 137'
    - 'box.error.VY_QUOTA_TIMEOUT : 135'
    - 'box.error.WRONG_INDEX_OPTIONS : 108'
    - 'box.error.INVALID_VYLOG_FILE : 133'
    - 'box.error.INDEX_FIELD_COUNT_LIMIT : 127'
-  - 'box.error.READ_VIEW_ABORTED : 130'
+  - 'box.error.PRIV_NOT_GRANTED : 91'
    - 'box.error.USER_MAX : 56'
-  - 'box.error.PROTOCOL : 104'
+  - 'box.error.BEFORE_REPLACE_RET : 53'
    - 'box.error.TUPLE_NOT_ARRAY : 22'
    - 'box.error.KEY_PART_COUNT : 31'
    - 'box.error.ALTER_SPACE : 12'
    - 'box.error.ACTIVE_TRANSACTION : 79'
    - 'box.error.EXACT_FIELD_COUNT : 38'
    - 'box.error.DROP_SEQUENCE : 144'
-  - 'box.error.INVALID_MSGPACK : 20'
    - 'box.error.MORE_THAN_ONE_TUPLE : 41'
-  - 'box.error.RTREE_RECT : 101'
-  - 'box.error.SUB_STMT_MAX : 121'
+  - 'box.error.UPSERT_UNIQUE_SECONDARY_KEY : 105'
    - 'box.error.UNKNOWN_REQUEST_TYPE : 48'
-  - 'box.error.SPACE_EXISTS : 10'
+  - 'box.error.SUB_STMT_MAX : 121'
    - 'box.error.PROC_LUA : 32'
+  - 'box.error.SPACE_EXISTS : 10'
    - 'box.error.ROLE_NOT_GRANTED : 92'
+  - 'box.error.UNSUPPORTED : 5'
    - 'box.error.NO_SUCH_SPACE : 36'
    - 'box.error.WRONG_INDEX_PARTS : 107'
-  - 'box.error.DROP_SPACE : 11'
    - 'box.error.MIN_FIELD_COUNT : 39'
    - 'box.error.REPLICASET_UUID_MISMATCH : 63'
    - 'box.error.UPDATE_FIELD : 29'
+  - 'box.error.INDEX_EXISTS : 85'
    - 'box.error.COMPRESSION : 119'
    - 'box.error.INVALID_ORDER : 68'
-  - 'box.error.INDEX_EXISTS : 85'
    - 'box.error.SPLICE : 25'
    - 'box.error.UNKNOWN : 0'
+  - 'box.error.IDENTIFIER : 70'
    - 'box.error.DROP_PRIMARY_KEY : 17'
    - 'box.error.NULLABLE_PRIMARY : 152'
    - 'box.error.NO_SUCH_SEQUENCE : 145'
    - 'box.error.RELOAD_CFG : 58'
    - 'box.error.INVALID_UUID : 64'
-  - 'box.error.INJECTION : 8'
+  - 'box.error.DROP_SPACE : 11'
    - 'box.error.TIMEOUT : 78'
-  - 'box.error.IDENTIFIER : 70'
    - 'box.error.ITERATOR_TYPE : 72'
    - 'box.error.REPLICA_MAX : 73'
+  - 'box.error.NO_SUCH_ROLE : 82'
    - 'box.error.MISSING_REQUEST_FIELD : 69'
    - 'box.error.MISSING_SNAPSHOT : 93'
    - 'box.error.WRONG_SPACE_OPTIONS : 111'
    - 'box.error.READONLY : 7'
-  - 'box.error.UNSUPPORTED : 5'
    - 'box.error.UPDATE_INTEGER_OVERFLOW : 95'
+  - 'box.error.RTREE_RECT : 101'
    - 'box.error.NO_CONNECTION : 77'
    - 'box.error.INVALID_XLOG_ORDER : 76'
-  - 'box.error.UPSERT_UNIQUE_SECONDARY_KEY : 105'
-  - 'box.error.ROLLBACK_IN_SUB_STMT : 123'
    - 'box.error.WRONG_SCHEMA_VERSION : 109'
-  - 'box.error.UNSUPPORTED_INDEX_FEATURE : 112'
-  - 'box.error.INDEX_PART_TYPE_MISMATCH : 24'
+  - 'box.error.ROLLBACK_IN_SUB_STMT : 123'
+  - 'box.error.PROTOCOL : 104'
    - 'box.error.INVALID_XLOG_TYPE : 125'
+  - 'box.error.INDEX_PART_TYPE_MISMATCH : 24'
+  - 'box.error.UNSUPPORTED_INDEX_FEATURE : 112'
  ...
  test_run:cmd("setopt delimiter ''");
  ---
diff --git a/test/box/select.result b/test/box/select.result
index 4aed70630..6e6ccade1 100644
--- a/test/box/select.result
+++ b/test/box/select.result
@@ -619,6 +619,11 @@ collectgarbage('collect')
  ---
  - 0
  ...
+-- gh-3224 resurrect tuple bigrefs
+collectgarbage('stop')
+---
+- 0
+...
  s = box.schema.space.create('select', { temporary = true })
  ---
  ...
@@ -628,22 +633,37 @@ index = s:create_index('primary', { type = 'tree' })
  a = s:insert{0}
  ---
  ...
-lots_of_links = {}
+lots_of_links = setmetatable({}, {__mode = 'v'})
+---
+...
+ref_count = 0
+---
+...
+while (ref_count < 100000) do table.insert(lots_of_links, s:get{0}) ref_count = ref_count + 1 end
  ---
  ...
+ref_count
+---
+- 100000
+...
  ref_count = 0
  ---
  ...
-while (true) do table.insert(lots_of_links, s:get{0}) ref_count = ref_count + 1 end
+for k, v in pairs(lots_of_links) do ref_count = ref_count + 1 end
  ---
-- error: Tuple reference counter overflow
  ...
  ref_count
  ---
-- 65531
+- 100000
+...
+-- check that tuples are deleted after gc is activated
+collectgarbage('collect')
+---
+- 0
  ...
-lots_of_links = {}
+lots_of_links
  ---
+- []
  ...
  s:drop()
  ---
diff --git a/test/box/select.test.lua b/test/box/select.test.lua
index 54c2ecc1c..f15c6e275 100644
--- a/test/box/select.test.lua
+++ b/test/box/select.test.lua
@@ -124,12 +124,21 @@ test.random(s.index[0], 48)
  s:drop()
  
  collectgarbage('collect')
+
+-- gh-3224 resurrect tuple bigrefs
+
+collectgarbage('stop')
  s = box.schema.space.create('select', { temporary = true })
  index = s:create_index('primary', { type = 'tree' })
  a = s:insert{0}
-lots_of_links = {}
+lots_of_links = setmetatable({}, {__mode = 'v'})
  ref_count = 0
-while (true) do table.insert(lots_of_links, s:get{0}) ref_count = ref_count + 1 end
+while (ref_count < 100000) do table.insert(lots_of_links, s:get{0}) ref_count = ref_count + 1 end
  ref_count
-lots_of_links = {}
+ref_count = 0
+for k, v in pairs(lots_of_links) do ref_count = ref_count + 1 end
+ref_count
+-- check that tuples are deleted after gc is activated
+collectgarbage('collect')
+lots_of_links
  s:drop()

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [tarantool-patches] Re: [PATCH v2 1/1] box: create bigrefs for tuples
  2018-06-04 11:21 ` Vladislav Shpilevoy
@ 2018-06-04 14:21   ` Imeev Mergen
  2018-06-04 20:57     ` Vladislav Shpilevoy
  0 siblings, 1 reply; 11+ messages in thread
From: Imeev Mergen @ 2018-06-04 14:21 UTC (permalink / raw)
  To: Vladislav Shpilevoy, tarantool-patches



On 06/04/2018 02:21 PM, Vladislav Shpilevoy wrote:
> Hello. Thanks for the patch! It is almost perfect! See 6 comments
> below.
>
> On 01/06/2018 18:41, imeevma@tarantool.org wrote:
>> Due to limitation of reference counters for tuple being only
>> 65535 it was possible to reach this limitation. This patch
>> increases capacity of reference counters to 4 billions.
>>
>> Closes #3224
>> ---
>> Branch: 
>> https://github.com/tarantool/tarantool/tree/imeevma/gh-3224-tuple-bigrefs
>> Issue: https://github.com/tarantool/tarantool/issues/3224
>>
>>   src/box/errcode.h        |   2 +-
>>   src/box/lua/tuple.c      |   5 +-
>>   src/box/port.c           |   8 +--
>>   src/box/tuple.c          | 164 
>> +++++++++++++++++++++++++++++++++++++++++++++--
>>   src/box/tuple.h          |  63 ++++++++++--------
>>   test/box/misc.result     |  39 ++++++-----
>>   test/box/select.result   |  30 +++++++--
>>   test/box/select.test.lua |  15 ++++-
>>   8 files changed, 257 insertions(+), 69 deletions(-)
>>
>> diff --git a/src/box/tuple.c b/src/box/tuple.c
>> index 014f374..f59b27e 100644
>> --- a/src/box/tuple.c
>> +++ b/src/box/tuple.c> +
>> +/**
>> + * Try to decrease allocated memory if it is possible. Free memory when
>> + * size == 0.
>> + */
>> +static inline void
>> +tuple_bigrefs_compact(void)
>> +{
>> +    if (bigrefs.size == 0) {
>> +        tuple_bigrefs_destroy();
>> +        return;
>> +    }
>> +    if (bigrefs.capacity == TUPLE_BIGREF_MIN_CAPACITY ||
>> +        bigrefs.size > bigrefs.capacity / TUPLE_BIGREF_FACTOR)
>> +        return;
>> +    uint16_t size = bigrefs.size;
>> +    uint16_t reserved = 0;
>> +    uint16_t capacity = bigrefs.capacity;
>> +    while(size > 0 && reserved <= capacity / TUPLE_BIGREF_FACTOR) {
>> +        if (bigrefs.refs[reserved] != 0)
>> +            --size;
>> +        reserved++;
>> +    }
>
> 1. Why do you iterate from the beginning? As far as I remember, we have
> decided to iterate from the end. Here you try to find such the biggest
> index, that refs[index] != 0. So when you iterate from the beginning, you
> always need to go through capacity, but when you iterate from the end,
> it is enough to stop on the first non-zero element.
>
> And here you use reserved both as a count and as an index. It is 
> incorrect:
> count is 1-based value, index is 0 based.
Done. Added initialization with 0 when memory is allocated.
>
>> +    if (reserved < TUPLE_BIGREF_MIN_CAPACITY)
>> +        reserved = TUPLE_BIGREF_MIN_CAPACITY;
>> +    if (reserved > capacity / TUPLE_BIGREF_FACTOR)
>> +        return;
>> +    while(reserved <= capacity / TUPLE_BIGREF_FACTOR)
>> +        capacity /= TUPLE_BIGREF_MIN_CAPACITY;
>
> 2. I see that capacity is always a power of 2. And you can calculate
> the capacity as a next power of 2 starting from 'reserved'. See this
> algorithm: 
> http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2Float,
> paragraph "Round up to the next highest power of 2". It has O(1) as I 
> can see
> versus O(N) with N - bit count in your algorithm.
Done.
>
>> +    uint32_t *refs = (uint32_t *) realloc(bigrefs.refs, capacity *
>> +                          sizeof(*bigrefs.refs));
>> +    if (refs == NULL)
>> +        panic("failed to reallocate %zu bytes: Cannot allocate"\
>> +              " memory.", capacity * sizeof(*bigrefs.refs));
>> +    bigrefs.refs = refs;
>> +    bigrefs.capacity = capacity;
>> +}
>> +
>> diff --git a/src/box/tuple.h b/src/box/tuple.h
>> index e2384dd..6921124 100644
>> --- a/src/box/tuple.h
>> +++ b/src/box/tuple.h
>> @@ -307,8 +306,16 @@ box_tuple_upsert(const box_tuple_t *tuple, const 
>> char *expr, const
>>    */
>>   struct PACKED tuple
>>   {
>> -    /** reference counter */
>> -    uint16_t refs;
>> +    union {
>> +        /** reference counter */
>> +        uint16_t refs;
>> +        struct {
>> +            /** index of big reference counter */
>> +            uint16_t ref_index : 15;
>> +            /** big reference flag */
>> +            bool is_bigref : 1;
>
> 3. Please. Start a sentence with a capital letter, and finish with
> dot. I see comments in other struct tuple fields, but they are 
> incorrect too.
> Lets fix them alongside.
Done.
>
>> +        };
>> +    };
>>       /** format identifier */
>>       uint16_t format_id;
>>       /**
>> @@ -774,26 +781,36 @@ tuple_field_uuid(const struct tuple *tuple, int 
>> fieldno,
>>       return 0;
>>   }
>>   -enum { TUPLE_REF_MAX = UINT16_MAX };
>> +enum { TUPLE_REF_MAX = UINT16_MAX >> 1 };
>> +
>> +/**
>> + * Increase tuple big reference counter.
>> + * @param tuple Tuple to reference.
>> + */
>> +void
>> +tuple_ref_slow(struct tuple *tuple);
>>     /**
>>    * Increment tuple reference counter.
>>    * @param tuple Tuple to reference.
>> - * @retval  0 Success.
>> - * @retval -1 Too many refs error.
>>    */
>> -static inline int
>> +static inline void
>>   tuple_ref(struct tuple *tuple)
>>   {
>> -    if (tuple->refs + 1 > TUPLE_REF_MAX) {
>> -        diag_set(ClientError, ER_TUPLE_REF_OVERFLOW);
>> -        return -1;
>> -    }
>> -    tuple->refs++;
>> -    return 0;
>> +    if (tuple->is_bigref || tuple->refs == TUPLE_REF_MAX)
>
> 4. Is it the same as 'tuple->refs >= TUPLE_REF_MAX'? I would prefer
> that because single check is better than two. Tuple_ref is very hot
> path and must be very optimized. Maybe we should even wrap this
> condition with unlikely() macro.
>
> if (unlikely(tuple->refs >= TUPLE_REF_MAX))
>     ref_slow
> else
>     ref_fast
Done.
>
>> +        tuple_ref_slow(tuple);
>> +    else
>> +        tuple->refs++;
>>   }
>>     /**
>> + * Decrease tuple big reference counter.
>> + * @param tuple Tuple to reference.
>> + */
>> +void
>> +tuple_unref_slow(struct tuple *tuple);
>> +
>> +/**
>>    * Decrement tuple reference counter. If it has reached zero, free 
>> the tuple.
>>    *
>>    * @pre tuple->refs + count >= 0
>> @@ -802,6 +819,10 @@ static inline void
>>   tuple_unref(struct tuple *tuple)
>>   {
>>       assert(tuple->refs - 1 >= 0);
>> +    if (tuple->is_bigref) {
>
> 5. Same. Lets do if (unlikely(tuple->is_bigref)) { unref_slow ... }
Done.
>
>> +        tuple_unref_slow(tuple);
>> +        return;
>> +    }
>>         tuple->refs--;
>>   @@ -813,22 +834,15 @@ extern struct tuple *box_tuple_last;
>>     /**
>>    * Convert internal `struct tuple` to public `box_tuple_t`.
>> - * \retval tuple on success
>> - * \retval NULL on error, check diag
>> + * \retval tuple
>>    * \post \a tuple ref counted until the next call.
>> - * \post tuple_ref() doesn't fail at least once
>>    * \sa tuple_ref
>>    */
>>   static inline box_tuple_t *
>>   tuple_bless(struct tuple *tuple)
>>   {
>>       assert(tuple != NULL);
>> -    /* Ensure tuple can be referenced at least once after return */
>> -    if (tuple->refs + 2 > TUPLE_REF_MAX) {
>> -        diag_set(ClientError, ER_TUPLE_REF_OVERFLOW);
>> -        return NULL;
>> -    }
>> -    tuple->refs++;
>> +    tuple_ref(tuple);
>>       /* Remove previous tuple */
>>       if (likely(box_tuple_last != NULL))
>>           tuple_unref(box_tuple_last); /* do not throw */
>> @@ -856,8 +870,7 @@ tuple_to_buf(const struct tuple *tuple, char 
>> *buf, size_t size);
>>   static inline void
>>   tuple_ref_xc(struct tuple *tuple)
>
> 6. Lets remove tuple_ref_xc now. 'XC' here means 'eXCeption on an 
> error'. No an error
> is impossible, so tuple_ref_xc === tuple_ref.
>
>>   {
>> -    if (tuple_ref(tuple))
>> -        diag_raise();
>> +    tuple_ref(tuple);
>>   }
Done. Also deleted tuple_bless_xc.

commit 7b7965f6c5462142a607d9db542d698fc94fbbbe
Author: Mergen Imeev <imeevma@gmail.com>
Date:   Mon May 28 19:17:51 2018 +0300

     box: create bigrefs for tuples

     Due to limitation of reference counters for tuple being only
     65535 it was possible to reach this limitation. This patch
     increases capacity of reference counters to 4 billions.

     Closes #3224

diff --git a/src/box/errcode.h b/src/box/errcode.h
index a0759f8..e009524 100644
--- a/src/box/errcode.h
+++ b/src/box/errcode.h
@@ -138,7 +138,7 @@ struct errcode_record {
      /* 83 */_(ER_ROLE_EXISTS,        "Role '%s' already exists") \
      /* 84 */_(ER_CREATE_ROLE,        "Failed to create role '%s': %s") \
      /* 85 */_(ER_INDEX_EXISTS,        "Index '%s' already exists") \
-    /* 86 */_(ER_TUPLE_REF_OVERFLOW,    "Tuple reference counter 
overflow") \
+    /* 86 */_(ER_UNUSED6,            "") \
      /* 87 */_(ER_ROLE_LOOP,            "Granting role '%s' to role 
'%s' would create a loop") \
      /* 88 */_(ER_GRANT,            "Incorrect grant arguments: %s") \
      /* 89 */_(ER_PRIV_GRANTED,        "User '%s' already has %s access 
on %s '%s'") \
diff --git a/src/box/lua/tuple.c b/src/box/lua/tuple.c
index 8057331..22fe696 100644
--- a/src/box/lua/tuple.c
+++ b/src/box/lua/tuple.c
@@ -496,10 +496,7 @@ luaT_pushtuple(struct lua_State *L, box_tuple_t *tuple)
          luaL_pushcdata(L, CTID_CONST_STRUCT_TUPLE_REF);
      *ptr = tuple;
      /* The order is important - first reference tuple, next set gc */
-    if (box_tuple_ref(tuple) != 0) {
-        luaT_error(L);
-        return;
-    }
+    box_tuple_ref(tuple);
      lua_pushcfunction(L, lbox_tuple_gc);
      luaL_setcdatagc(L, -2);
  }
diff --git a/src/box/port.c b/src/box/port.c
index 03f6be7..9b6b858 100644
--- a/src/box/port.c
+++ b/src/box/port.c
@@ -45,8 +45,7 @@ port_tuple_add(struct port *base, struct tuple *tuple)
      struct port_tuple *port = port_tuple(base);
      struct port_tuple_entry *e;
      if (port->size == 0) {
-        if (tuple_ref(tuple) != 0)
-            return -1;
+        tuple_ref(tuple);
          e = &port->first_entry;
          port->first = port->last = e;
      } else {
@@ -55,10 +54,7 @@ port_tuple_add(struct port *base, struct tuple *tuple)
              diag_set(OutOfMemory, sizeof(*e), "mempool_alloc", "e");
              return -1;
          }
-        if (tuple_ref(tuple) != 0) {
-            mempool_free(&port_tuple_entry_pool, e);
-            return -1;
-        }
+        tuple_ref(tuple);
          port->last->next = e;
          port->last = e;
      }
diff --git a/src/box/tuple.c b/src/box/tuple.c
index 014f374..6e63433 100644
--- a/src/box/tuple.c
+++ b/src/box/tuple.c
@@ -48,6 +48,26 @@ enum {
      OBJSIZE_MIN = 16,
  };

+/**
+ * Container for big reference counters. Contains array of big
+ * reference counters, size of this array and number of non-zero
+ * big reference counters. When reference counter of tuple becomes
+ * more than 32767, field refs of this tuple becomes index of big
+ * reference counter in big reference counter array and field
+ * is_bigref is set true. The moment big reference becomes equal
+ * 32767 it is set to 0, refs of the tuple becomes 32767 and
+ * is_bigref becomes false. Big reference counter can be equal to
+ * 0 or be more than 32767.
+ */
+struct bigref_list {
+    /** Array of big reference counters */
+    uint32_t *refs;
+    /** Number of non-zero elements in the array */
+    uint16_t size;
+    /** Capacity of the array */
+    uint16_t capacity;
+};
+
  static const double ALLOC_FACTOR = 1.05;

  /**
@@ -58,6 +78,12 @@ struct tuple *box_tuple_last;

  struct tuple_format *tuple_format_runtime;

+/**
+* All bigrefs of tuples.
+* \sa tuple_ref_slow()
+*/
+static struct bigref_list bigref_list;
+
  static void
  runtime_tuple_delete(struct tuple_format *format, struct tuple *tuple);

@@ -151,6 +177,15 @@ tuple_validate_raw(struct tuple_format *format, 
const char *tuple)
      return 0;
  }

+/** Initialize big references container. */
+static inline void
+bigref_list_create()
+{
+    bigref_list.size = 0;
+    bigref_list.refs = NULL;
+    bigref_list.capacity = 0;
+}
+
  /**
   * Incremented on every snapshot and is used to distinguish tuples
   * which were created after start of a snapshot (these tuples can
@@ -211,6 +246,8 @@ tuple_init(field_name_hash_f hash)

      box_tuple_last = NULL;

+    bigref_list_create();
+
      if (coll_id_cache_init() != 0)
          return -1;

@@ -244,6 +281,124 @@ tuple_arena_create(struct slab_arena *arena, 
struct quota *quota,
      }
  }

+enum {
+    TUPLE_BIGREF_FACTOR = 2,
+    TUPLE_BIGREF_MAX = UINT32_MAX,
+    TUPLE_BIGREF_MIN_CAPACITY = 16,
+    TUPLE_BIGREF_MAX_CAPACITY = UINT16_MAX >> 1
+};
+
+/** Destroy big references and free memory that was allocated. */
+static inline void
+bigref_list_destroy()
+{
+    free(bigref_list.refs);
+    bigref_list.size = 0;
+    bigref_list.refs = NULL;
+    bigref_list.capacity = 0;
+}
+
+/**
+ * Return index for new big reference counter and allocate memory
+ * if needed.
+ * @retval index for new big reference counter.
+ */
+static inline uint16_t
+tuple_bigref_retrieve()
+{
+    if (bigref_list.size < bigref_list.capacity) {
+        uint16_t vacant_index = 0;
+        while (bigref_list.refs[vacant_index] != 0)
+            ++vacant_index;
+        return vacant_index;
+    }
+    /* Extend the array. */
+    uint16_t capacity = bigref_list.capacity;
+    if (capacity == 0) {
+        capacity = TUPLE_BIGREF_MIN_CAPACITY;
+    } else {
+        capacity *= TUPLE_BIGREF_FACTOR;
+        if (capacity >= TUPLE_BIGREF_MAX_CAPACITY)
+            panic("Too many big references");
+    }
+    uint32_t *refs = (uint32_t *) realloc(bigref_list.refs, capacity *
+                          sizeof(*bigref_list.refs));
+    if (refs == NULL) {
+        panic("failed to reallocate %zu bytes: Cannot allocate"\
+              " memory.", capacity * sizeof(*bigref_list.refs));
+    }
+    bigref_list.refs = refs;
+    memset(bigref_list.refs + bigref_list.capacity, 0, capacity -
+           bigref_list.capacity);
+    bigref_list.capacity = capacity;
+    return bigref_list.size++;
+}
+
+void
+tuple_ref_slow(struct tuple *tuple)
+{
+    assert(tuple->is_bigref || tuple->refs == TUPLE_REF_MAX);
+    if (! tuple->is_bigref) {
+        tuple->ref_index = tuple_bigref_retrieve();
+        tuple->is_bigref = true;
+        bigref_list.refs[tuple->ref_index] = TUPLE_REF_MAX;
+    } else if (bigref_list.refs[tuple->ref_index] == TUPLE_BIGREF_MAX)
+        panic("Tuple big reference counter overflow");
+    bigref_list.refs[tuple->ref_index]++;
+}
+
+/**
+ * Try to decrease allocated memory if it is possible. Free memory
+ * when size == 0.
+ */
+static inline void
+bigref_list_compact()
+{
+    if (bigref_list.size == 0) {
+        bigref_list_destroy();
+        return;
+    }
+    if (bigref_list.capacity == TUPLE_BIGREF_MIN_CAPACITY ||
+        bigref_list.size > bigref_list.capacity / TUPLE_BIGREF_FACTOR)
+        return;
+    uint16_t capacity = bigref_list.capacity;
+    while (bigref_list.refs[capacity - 1] == 0)
+        capacity--;
+    if (capacity < TUPLE_BIGREF_MIN_CAPACITY)
+        capacity = TUPLE_BIGREF_MIN_CAPACITY;
+    if (capacity > bigref_list.capacity / TUPLE_BIGREF_FACTOR)
+        return;
+    /* Round up capacity to the next highest power of 2*/
+    capacity--;
+    capacity |= capacity >> 1;
+    capacity |= capacity >> 2;
+    capacity |= capacity >> 4;
+    capacity |= capacity >> 8;
+    capacity |= capacity >> 16;
+    capacity++;
+    uint32_t *refs = (uint32_t *) realloc(bigref_list.refs, capacity *
+                          sizeof(*bigref_list.refs));
+    if (refs == NULL)
+        panic("failed to reallocate %zu bytes: Cannot allocate"\
+              " memory.", capacity * sizeof(*bigref_list.refs));
+    bigref_list.refs = refs;
+    bigref_list.capacity = capacity;
+}
+
+void
+tuple_unref_slow(struct tuple *tuple)
+{
+    assert(tuple->is_bigref &&
+           bigref_list.refs[tuple->ref_index] > TUPLE_REF_MAX);
+    if(--bigref_list.refs[tuple->ref_index] == TUPLE_REF_MAX) {
+        bigref_list.refs[tuple->ref_index] = 0;
+        bigref_list.size--;
+        tuple->ref_index = TUPLE_REF_MAX;
+        tuple->is_bigref = false;
+        bigref_list_compact();
+    }
+}
+
  void
  tuple_arena_destroy(struct slab_arena *arena)
  {
@@ -265,6 +420,8 @@ tuple_free(void)
      tuple_format_free();

      coll_id_cache_destroy();
+
+    bigref_list_destroy();
  }

  box_tuple_format_t *
@@ -288,7 +445,8 @@ int
  box_tuple_ref(box_tuple_t *tuple)
  {
      assert(tuple != NULL);
-    return tuple_ref(tuple);
+    tuple_ref(tuple);
+    return 0;
  }

  void
@@ -357,10 +515,7 @@ box_tuple_iterator(box_tuple_t *tuple)
               "mempool", "new slab");
          return NULL;
      }
-    if (tuple_ref(tuple) != 0) {
-        mempool_free(&tuple_iterator_pool, it);
-        return NULL;
-    }
+    tuple_ref(tuple);
      tuple_rewind(it, tuple);
      return it;
  }
diff --git a/src/box/tuple.h b/src/box/tuple.h
index e2384dd..d2cf662 100644
--- a/src/box/tuple.h
+++ b/src/box/tuple.h
@@ -105,8 +105,7 @@ typedef struct tuple box_tuple_t;
   * tuple will leak.
   *
   * \param tuple a tuple
- * \retval -1 on error (check box_error_last())
- * \retval 0 on success
+ * \retval 0 always
   * \sa box_tuple_unref()
   */
  int
@@ -307,9 +306,17 @@ box_tuple_upsert(const box_tuple_t *tuple, const 
char *expr, const
   */
  struct PACKED tuple
  {
-    /** reference counter */
-    uint16_t refs;
-    /** format identifier */
+    union {
+        /** Reference counter. */
+        uint16_t refs;
+        struct {
+            /** Index of big reference counter. */
+            uint16_t ref_index : 15;
+            /** Big reference flag. */
+            bool is_bigref : 1;
+        };
+    };
+    /** Format identifier. */
      uint16_t format_id;
      /**
       * Length of the MessagePack data in raw part of the
@@ -774,26 +781,36 @@ tuple_field_uuid(const struct tuple *tuple, int 
fieldno,
      return 0;
  }

-enum { TUPLE_REF_MAX = UINT16_MAX };
+enum { TUPLE_REF_MAX = UINT16_MAX >> 1 };
+
+/**
+ * Increase tuple big reference counter.
+ * @param tuple Tuple to reference.
+ */
+void
+tuple_ref_slow(struct tuple *tuple);

  /**
   * Increment tuple reference counter.
   * @param tuple Tuple to reference.
- * @retval  0 Success.
- * @retval -1 Too many refs error.
   */
-static inline int
+static inline void
  tuple_ref(struct tuple *tuple)
  {
-    if (tuple->refs + 1 > TUPLE_REF_MAX) {
-        diag_set(ClientError, ER_TUPLE_REF_OVERFLOW);
-        return -1;
-    }
-    tuple->refs++;
-    return 0;
+    if (unlikely(tuple->refs >= TUPLE_REF_MAX))
+        tuple_ref_slow(tuple);
+    else
+        tuple->refs++;
  }

  /**
+ * Decrease tuple big reference counter.
+ * @param tuple Tuple to reference.
+ */
+void
+tuple_unref_slow(struct tuple *tuple);
+
+/**
   * Decrement tuple reference counter. If it has reached zero, free the 
tuple.
   *
   * @pre tuple->refs + count >= 0
@@ -802,6 +819,10 @@ static inline void
  tuple_unref(struct tuple *tuple)
  {
      assert(tuple->refs - 1 >= 0);
+    if (unlikely(tuple->is_bigref)) {
+        tuple_unref_slow(tuple);
+        return;
+    }

      tuple->refs--;

@@ -813,22 +834,15 @@ extern struct tuple *box_tuple_last;

  /**
   * Convert internal `struct tuple` to public `box_tuple_t`.
- * \retval tuple on success
- * \retval NULL on error, check diag
+ * \retval tuple
   * \post \a tuple ref counted until the next call.
- * \post tuple_ref() doesn't fail at least once
   * \sa tuple_ref
   */
  static inline box_tuple_t *
  tuple_bless(struct tuple *tuple)
  {
      assert(tuple != NULL);
-    /* Ensure tuple can be referenced at least once after return */
-    if (tuple->refs + 2 > TUPLE_REF_MAX) {
-        diag_set(ClientError, ER_TUPLE_REF_OVERFLOW);
-        return NULL;
-    }
-    tuple->refs++;
+    tuple_ref(tuple);
      /* Remove previous tuple */
      if (likely(box_tuple_last != NULL))
          tuple_unref(box_tuple_last); /* do not throw */
@@ -849,35 +863,11 @@ tuple_to_buf(const struct tuple *tuple, char *buf, 
size_t size);
  #include "tuple_update.h"
  #include "errinj.h"

-/**
- * \copydoc tuple_ref()
- * \throws if overflow detected.
- */
-static inline void
-tuple_ref_xc(struct tuple *tuple)
-{
-    if (tuple_ref(tuple))
-        diag_raise();
-}
-
-/**
- * \copydoc tuple_bless
- * \throw ER_TUPLE_REF_OVERFLOW
- */
-static inline box_tuple_t *
-tuple_bless_xc(struct tuple *tuple)
-{
-    box_tuple_t *blessed = tuple_bless(tuple);
-    if (blessed == NULL)
-        diag_raise();
-    return blessed;
-}
-
  /** Make tuple references exception-friendly in absence of @finally. */
  struct TupleRefNil {
      struct tuple *tuple;
      TupleRefNil (struct tuple *arg) :tuple(arg)
-    { if (tuple) tuple_ref_xc(tuple); }
+    { if (tuple) tuple_ref(tuple); }
      ~TupleRefNil() { if (tuple) tuple_unref(tuple); }

      TupleRefNil(const TupleRefNil&) = delete;
diff --git a/test/box/misc.result b/test/box/misc.result
index 8f94f55..f7703ba 100644
--- a/test/box/misc.result
+++ b/test/box/misc.result
@@ -332,7 +332,6 @@ t;
    - 'box.error.UNKNOWN_UPDATE_OP : 28'
    - 'box.error.WRONG_COLLATION_OPTIONS : 151'
    - 'box.error.CURSOR_NO_TRANSACTION : 80'
-  - 'box.error.TUPLE_REF_OVERFLOW : 86'
    - 'box.error.ALTER_SEQUENCE : 143'
    - 'box.error.INVALID_XLOG_NAME : 75'
    - 'box.error.SAVEPOINT_EMPTY_TX : 60'
@@ -360,7 +359,7 @@ t;
    - 'box.error.VINYL_MAX_TUPLE_SIZE : 139'
    - 'box.error.LOAD_FUNCTION : 99'
    - 'box.error.INVALID_XLOG : 74'
-  - 'box.error.PRIV_NOT_GRANTED : 91'
+  - 'box.error.READ_VIEW_ABORTED : 130'
    - 'box.error.TRANSACTION_CONFLICT : 97'
    - 'box.error.GUEST_USER_PASSWORD : 96'
    - 'box.error.PROC_C : 102'
@@ -405,7 +404,7 @@ t;
    - 'box.error.injection : table: <address>
    - 'box.error.NULLABLE_MISMATCH : 153'
    - 'box.error.LAST_DROP : 15'
-  - 'box.error.NO_SUCH_ROLE : 82'
+  - 'box.error.TUPLE_FORMAT_LIMIT : 16'
    - 'box.error.DECOMPRESSION : 124'
    - 'box.error.CREATE_SEQUENCE : 142'
    - 'box.error.CREATE_USER : 43'
@@ -414,66 +413,66 @@ t;
    - 'box.error.SEQUENCE_OVERFLOW : 147'
    - 'box.error.SYSTEM : 115'
    - 'box.error.KEY_PART_IS_TOO_LONG : 118'
-  - 'box.error.TUPLE_FORMAT_LIMIT : 16'
-  - 'box.error.BEFORE_REPLACE_RET : 53'
+  - 'box.error.INJECTION : 8'
+  - 'box.error.INVALID_MSGPACK : 20'
    - 'box.error.NO_SUCH_SAVEPOINT : 61'
    - 'box.error.TRUNCATE_SYSTEM_SPACE : 137'
    - 'box.error.VY_QUOTA_TIMEOUT : 135'
    - 'box.error.WRONG_INDEX_OPTIONS : 108'
    - 'box.error.INVALID_VYLOG_FILE : 133'
    - 'box.error.INDEX_FIELD_COUNT_LIMIT : 127'
-  - 'box.error.READ_VIEW_ABORTED : 130'
+  - 'box.error.PRIV_NOT_GRANTED : 91'
    - 'box.error.USER_MAX : 56'
-  - 'box.error.PROTOCOL : 104'
+  - 'box.error.BEFORE_REPLACE_RET : 53'
    - 'box.error.TUPLE_NOT_ARRAY : 22'
    - 'box.error.KEY_PART_COUNT : 31'
    - 'box.error.ALTER_SPACE : 12'
    - 'box.error.ACTIVE_TRANSACTION : 79'
    - 'box.error.EXACT_FIELD_COUNT : 38'
    - 'box.error.DROP_SEQUENCE : 144'
-  - 'box.error.INVALID_MSGPACK : 20'
    - 'box.error.MORE_THAN_ONE_TUPLE : 41'
-  - 'box.error.RTREE_RECT : 101'
-  - 'box.error.SUB_STMT_MAX : 121'
+  - 'box.error.UPSERT_UNIQUE_SECONDARY_KEY : 105'
    - 'box.error.UNKNOWN_REQUEST_TYPE : 48'
-  - 'box.error.SPACE_EXISTS : 10'
+  - 'box.error.SUB_STMT_MAX : 121'
    - 'box.error.PROC_LUA : 32'
+  - 'box.error.SPACE_EXISTS : 10'
    - 'box.error.ROLE_NOT_GRANTED : 92'
+  - 'box.error.UNSUPPORTED : 5'
    - 'box.error.NO_SUCH_SPACE : 36'
    - 'box.error.WRONG_INDEX_PARTS : 107'
-  - 'box.error.DROP_SPACE : 11'
    - 'box.error.MIN_FIELD_COUNT : 39'
    - 'box.error.REPLICASET_UUID_MISMATCH : 63'
    - 'box.error.UPDATE_FIELD : 29'
+  - 'box.error.INDEX_EXISTS : 85'
    - 'box.error.COMPRESSION : 119'
    - 'box.error.INVALID_ORDER : 68'
-  - 'box.error.INDEX_EXISTS : 85'
    - 'box.error.SPLICE : 25'
    - 'box.error.UNKNOWN : 0'
+  - 'box.error.IDENTIFIER : 70'
    - 'box.error.DROP_PRIMARY_KEY : 17'
    - 'box.error.NULLABLE_PRIMARY : 152'
    - 'box.error.NO_SUCH_SEQUENCE : 145'
    - 'box.error.RELOAD_CFG : 58'
    - 'box.error.INVALID_UUID : 64'
-  - 'box.error.INJECTION : 8'
+  - 'box.error.DROP_SPACE : 11'
    - 'box.error.TIMEOUT : 78'
-  - 'box.error.IDENTIFIER : 70'
    - 'box.error.ITERATOR_TYPE : 72'
    - 'box.error.REPLICA_MAX : 73'
+  - 'box.error.NO_SUCH_ROLE : 82'
    - 'box.error.MISSING_REQUEST_FIELD : 69'
    - 'box.error.MISSING_SNAPSHOT : 93'
    - 'box.error.WRONG_SPACE_OPTIONS : 111'
    - 'box.error.READONLY : 7'
-  - 'box.error.UNSUPPORTED : 5'
    - 'box.error.UPDATE_INTEGER_OVERFLOW : 95'
+  - 'box.error.RTREE_RECT : 101'
    - 'box.error.NO_CONNECTION : 77'
    - 'box.error.INVALID_XLOG_ORDER : 76'
-  - 'box.error.UPSERT_UNIQUE_SECONDARY_KEY : 105'
-  - 'box.error.ROLLBACK_IN_SUB_STMT : 123'
    - 'box.error.WRONG_SCHEMA_VERSION : 109'
-  - 'box.error.UNSUPPORTED_INDEX_FEATURE : 112'
-  - 'box.error.INDEX_PART_TYPE_MISMATCH : 24'
+  - 'box.error.ROLLBACK_IN_SUB_STMT : 123'
+  - 'box.error.PROTOCOL : 104'
    - 'box.error.INVALID_XLOG_TYPE : 125'
+  - 'box.error.INDEX_PART_TYPE_MISMATCH : 24'
+  - 'box.error.UNSUPPORTED_INDEX_FEATURE : 112'
  ...
  test_run:cmd("setopt delimiter ''");
  ---
diff --git a/test/box/select.result b/test/box/select.result
index 4aed706..6e6ccad 100644
--- a/test/box/select.result
+++ b/test/box/select.result
@@ -619,6 +619,11 @@ collectgarbage('collect')
  ---
  - 0
  ...
+-- gh-3224 resurrect tuple bigrefs
+collectgarbage('stop')
+---
+- 0
+...
  s = box.schema.space.create('select', { temporary = true })
  ---
  ...
@@ -628,22 +633,37 @@ index = s:create_index('primary', { type = 'tree' })
  a = s:insert{0}
  ---
  ...
-lots_of_links = {}
+lots_of_links = setmetatable({}, {__mode = 'v'})
+---
+...
+ref_count = 0
+---
+...
+while (ref_count < 100000) do table.insert(lots_of_links, s:get{0}) 
ref_count = ref_count + 1 end
  ---
  ...
+ref_count
+---
+- 100000
+...
  ref_count = 0
  ---
  ...
-while (true) do table.insert(lots_of_links, s:get{0}) ref_count = 
ref_count + 1 end
+for k, v in pairs(lots_of_links) do ref_count = ref_count + 1 end
  ---
-- error: Tuple reference counter overflow
  ...
  ref_count
  ---
-- 65531
+- 100000
+...
+-- check that tuples are deleted after gc is activated
+collectgarbage('collect')
+---
+- 0
  ...
-lots_of_links = {}
+lots_of_links
  ---
+- []
  ...
  s:drop()
  ---
diff --git a/test/box/select.test.lua b/test/box/select.test.lua
index 54c2ecc..f15c6e2 100644
--- a/test/box/select.test.lua
+++ b/test/box/select.test.lua
@@ -124,12 +124,21 @@ test.random(s.index[0], 48)
  s:drop()

  collectgarbage('collect')
+
+-- gh-3224 resurrect tuple bigrefs
+
+collectgarbage('stop')
  s = box.schema.space.create('select', { temporary = true })
  index = s:create_index('primary', { type = 'tree' })
  a = s:insert{0}
-lots_of_links = {}
+lots_of_links = setmetatable({}, {__mode = 'v'})
  ref_count = 0
-while (true) do table.insert(lots_of_links, s:get{0}) ref_count = 
ref_count + 1 end
+while (ref_count < 100000) do table.insert(lots_of_links, s:get{0}) 
ref_count = ref_count + 1 end
  ref_count
-lots_of_links = {}
+ref_count = 0
+for k, v in pairs(lots_of_links) do ref_count = ref_count + 1 end
+ref_count
+-- check that tuples are deleted after gc is activated
+collectgarbage('collect')
+lots_of_links
  s:drop()

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [tarantool-patches] Re: [PATCH v2 1/1] box: create bigrefs for tuples
  2018-06-01 15:41 [tarantool-patches] " imeevma
  2018-06-01 19:11 ` [tarantool-patches] " Konstantin Osipov
@ 2018-06-04 11:21 ` Vladislav Shpilevoy
  2018-06-04 14:21   ` Imeev Mergen
  1 sibling, 1 reply; 11+ messages in thread
From: Vladislav Shpilevoy @ 2018-06-04 11:21 UTC (permalink / raw)
  To: tarantool-patches, imeevma

Hello. Thanks for the patch! It is almost perfect! See 6 comments
below.

On 01/06/2018 18:41, imeevma@tarantool.org wrote:
> Due to limitation of reference counters for tuple being only
> 65535 it was possible to reach this limitation. This patch
> increases capacity of reference counters to 4 billions.
> 
> Closes #3224
> ---
> Branch: https://github.com/tarantool/tarantool/tree/imeevma/gh-3224-tuple-bigrefs
> Issue: https://github.com/tarantool/tarantool/issues/3224
> 
>   src/box/errcode.h        |   2 +-
>   src/box/lua/tuple.c      |   5 +-
>   src/box/port.c           |   8 +--
>   src/box/tuple.c          | 164 +++++++++++++++++++++++++++++++++++++++++++++--
>   src/box/tuple.h          |  63 ++++++++++--------
>   test/box/misc.result     |  39 ++++++-----
>   test/box/select.result   |  30 +++++++--
>   test/box/select.test.lua |  15 ++++-
>   8 files changed, 257 insertions(+), 69 deletions(-)
> 
> diff --git a/src/box/tuple.c b/src/box/tuple.c
> index 014f374..f59b27e 100644
> --- a/src/box/tuple.c
> +++ b/src/box/tuple.c> +
> +/**
> + * Try to decrease allocated memory if it is possible. Free memory when
> + * size == 0.
> + */
> +static inline void
> +tuple_bigrefs_compact(void)
> +{
> +	if (bigrefs.size == 0) {
> +		tuple_bigrefs_destroy();
> +		return;
> +	}
> +	if (bigrefs.capacity == TUPLE_BIGREF_MIN_CAPACITY ||
> +	    bigrefs.size > bigrefs.capacity / TUPLE_BIGREF_FACTOR)
> +		return;
> +	uint16_t size = bigrefs.size;
> +	uint16_t reserved = 0;
> +	uint16_t capacity = bigrefs.capacity;
> +	while(size > 0 && reserved <= capacity / TUPLE_BIGREF_FACTOR) {
> +		if (bigrefs.refs[reserved] != 0)
> +			--size;
> +		reserved++;
> +	}

1. Why do you iterate from the beginning? As far as I remember, we have
decided to iterate from the end. Here you try to find such the biggest
index, that refs[index] != 0. So when you iterate from the beginning, you
always need to go through capacity, but when you iterate from the end,
it is enough to stop on the first non-zero element.

And here you use reserved both as a count and as an index. It is incorrect:
count is 1-based value, index is 0 based.

> +	if (reserved < TUPLE_BIGREF_MIN_CAPACITY)
> +		reserved = TUPLE_BIGREF_MIN_CAPACITY;
> +	if (reserved > capacity / TUPLE_BIGREF_FACTOR)
> +		return;
> +	while(reserved <= capacity / TUPLE_BIGREF_FACTOR)
> +		capacity /= TUPLE_BIGREF_MIN_CAPACITY;

2. I see that capacity is always a power of 2. And you can calculate
the capacity as a next power of 2 starting from 'reserved'. See this
algorithm: http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2Float,
paragraph "Round up to the next highest power of 2". It has O(1) as I can see
versus O(N) with N - bit count in your algorithm.

> +	uint32_t *refs = (uint32_t *) realloc(bigrefs.refs, capacity *
> +					      sizeof(*bigrefs.refs));
> +	if (refs == NULL)
> +		panic("failed to reallocate %zu bytes: Cannot allocate"\
> +		      " memory.", capacity * sizeof(*bigrefs.refs));
> +	bigrefs.refs = refs;
> +	bigrefs.capacity = capacity;
> +}
> +
> diff --git a/src/box/tuple.h b/src/box/tuple.h
> index e2384dd..6921124 100644
> --- a/src/box/tuple.h
> +++ b/src/box/tuple.h
> @@ -307,8 +306,16 @@ box_tuple_upsert(const box_tuple_t *tuple, const char *expr, const
>    */
>   struct PACKED tuple
>   {
> -	/** reference counter */
> -	uint16_t refs;
> +	union {
> +		/** reference counter */
> +		uint16_t refs;
> +		struct {
> +			/** index of big reference counter */
> +			uint16_t ref_index : 15;
> +			/** big reference flag */
> +			bool is_bigref : 1;

3. Please. Start a sentence with a capital letter, and finish with
dot. I see comments in other struct tuple fields, but they are incorrect too.
Lets fix them alongside.

> +		};
> +	};
>   	/** format identifier */
>   	uint16_t format_id;
>   	/**
> @@ -774,26 +781,36 @@ tuple_field_uuid(const struct tuple *tuple, int fieldno,
>   	return 0;
>   }
>   
> -enum { TUPLE_REF_MAX = UINT16_MAX };
> +enum { TUPLE_REF_MAX = UINT16_MAX >> 1 };
> +
> +/**
> + * Increase tuple big reference counter.
> + * @param tuple Tuple to reference.
> + */
> +void
> +tuple_ref_slow(struct tuple *tuple);
>   
>   /**
>    * Increment tuple reference counter.
>    * @param tuple Tuple to reference.
> - * @retval  0 Success.
> - * @retval -1 Too many refs error.
>    */
> -static inline int
> +static inline void
>   tuple_ref(struct tuple *tuple)
>   {
> -	if (tuple->refs + 1 > TUPLE_REF_MAX) {
> -		diag_set(ClientError, ER_TUPLE_REF_OVERFLOW);
> -		return -1;
> -	}
> -	tuple->refs++;
> -	return 0;
> +	if (tuple->is_bigref || tuple->refs == TUPLE_REF_MAX)

4. Is it the same as 'tuple->refs >= TUPLE_REF_MAX'? I would prefer
that because single check is better than two. Tuple_ref is very hot
path and must be very optimized. Maybe we should even wrap this
condition with unlikely() macro.

if (unlikely(tuple->refs >= TUPLE_REF_MAX))
	ref_slow
else
	ref_fast

> +		tuple_ref_slow(tuple);
> +	else
> +		tuple->refs++;
>   }
>   
>   /**
> + * Decrease tuple big reference counter.
> + * @param tuple Tuple to reference.
> + */
> +void
> +tuple_unref_slow(struct tuple *tuple);
> +
> +/**
>    * Decrement tuple reference counter. If it has reached zero, free the tuple.
>    *
>    * @pre tuple->refs + count >= 0
> @@ -802,6 +819,10 @@ static inline void
>   tuple_unref(struct tuple *tuple)
>   {
>   	assert(tuple->refs - 1 >= 0);
> +	if (tuple->is_bigref) {

5. Same. Lets do if (unlikely(tuple->is_bigref)) { unref_slow ... }

> +		tuple_unref_slow(tuple);
> +		return;
> +	}
>   
>   	tuple->refs--;
>   
> @@ -813,22 +834,15 @@ extern struct tuple *box_tuple_last;
>   
>   /**
>    * Convert internal `struct tuple` to public `box_tuple_t`.
> - * \retval tuple on success
> - * \retval NULL on error, check diag
> + * \retval tuple
>    * \post \a tuple ref counted until the next call.
> - * \post tuple_ref() doesn't fail at least once
>    * \sa tuple_ref
>    */
>   static inline box_tuple_t *
>   tuple_bless(struct tuple *tuple)
>   {
>   	assert(tuple != NULL);
> -	/* Ensure tuple can be referenced at least once after return */
> -	if (tuple->refs + 2 > TUPLE_REF_MAX) {
> -		diag_set(ClientError, ER_TUPLE_REF_OVERFLOW);
> -		return NULL;
> -	}
> -	tuple->refs++;
> +	tuple_ref(tuple);
>   	/* Remove previous tuple */
>   	if (likely(box_tuple_last != NULL))
>   		tuple_unref(box_tuple_last); /* do not throw */
> @@ -856,8 +870,7 @@ tuple_to_buf(const struct tuple *tuple, char *buf, size_t size);
>   static inline void
>   tuple_ref_xc(struct tuple *tuple)

6. Lets remove tuple_ref_xc now. 'XC' here means 'eXCeption on an error'. No an error
is impossible, so tuple_ref_xc === tuple_ref.

>   {
> -	if (tuple_ref(tuple))
> -		diag_raise();
> +	tuple_ref(tuple);
>   }
>   

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [tarantool-patches] Re: [PATCH v2 1/1] box: create bigrefs for tuples
  2018-06-01 15:41 [tarantool-patches] " imeevma
@ 2018-06-01 19:11 ` Konstantin Osipov
  2018-06-04 11:21 ` Vladislav Shpilevoy
  1 sibling, 0 replies; 11+ messages in thread
From: Konstantin Osipov @ 2018-06-01 19:11 UTC (permalink / raw)
  To: tarantool-patches

* imeevma@tarantool.org <imeevma@tarantool.org> [18/06/01 18:42]:
> Due to limitation of reference counters for tuple being only
> 65535 it was possible to reach this limitation. This patch
> increases capacity of reference counters to 4 billions.

Please rename plural 'bigrefs' to bigref_index or bigref_map or
bigref_list.


-- 
Konstantin Osipov, Moscow, Russia, +7 903 626 22 32
http://tarantool.io - www.twitter.com/kostja_osipov

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2018-06-08  3:27 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-05-30 15:26 [tarantool-patches] [PATCH v2 1/1] box: create bigrefs for tuples imeevma
2018-05-31 12:51 ` [tarantool-patches] " Vladislav Shpilevoy
2018-05-31 15:29 ` Vladislav Shpilevoy
2018-06-01 15:41 [tarantool-patches] " imeevma
2018-06-01 19:11 ` [tarantool-patches] " Konstantin Osipov
2018-06-04 11:21 ` Vladislav Shpilevoy
2018-06-04 14:21   ` Imeev Mergen
2018-06-04 20:57     ` Vladislav Shpilevoy
2018-06-04 21:20       ` Vladislav Shpilevoy
2018-06-08  3:27         ` Konstantin Osipov
2018-06-08  3:18       ` Konstantin Osipov
2018-06-08  3:26       ` Konstantin Osipov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox