[tarantool-patches] [PATCH v2 1/1] box: create bigrefs for tuples

imeevma at tarantool.org imeevma at tarantool.org
Wed May 30 17:14:48 MSK 2018


Due to limitation of number of bigrefs being only 65535 it
was possible to reach this limitation. This patch increases
capacity of reference counter to 4 billions.

Closes #3224
---
Branch: https://github.com/tarantool/tarantool/tree/gh-3224-tuple-bigrefs
Issue: https://github.com/tarantool/tarantool/issues/3224

 src/box/errcode.h        |   2 +-
 src/box/tuple.c          | 168 +++++++++++++++++++++++++++++++++++++++++++++++
 src/box/tuple.h          |  36 ++++++----
 test/box/select.result   |  32 ++++++++-
 test/box/select.test.lua |  15 ++++-
 5 files changed, 237 insertions(+), 16 deletions(-)

diff --git a/src/box/errcode.h b/src/box/errcode.h
index a0759f8..e009524 100644
--- a/src/box/errcode.h
+++ b/src/box/errcode.h
@@ -138,7 +138,7 @@ struct errcode_record {
 	/* 83 */_(ER_ROLE_EXISTS,		"Role '%s' already exists") \
 	/* 84 */_(ER_CREATE_ROLE,		"Failed to create role '%s': %s") \
 	/* 85 */_(ER_INDEX_EXISTS,		"Index '%s' already exists") \
-	/* 86 */_(ER_TUPLE_REF_OVERFLOW,	"Tuple reference counter overflow") \
+	/* 86 */_(ER_UNUSED6,			"") \
 	/* 87 */_(ER_ROLE_LOOP,			"Granting role '%s' to role '%s' would create a loop") \
 	/* 88 */_(ER_GRANT,			"Incorrect grant arguments: %s") \
 	/* 89 */_(ER_PRIV_GRANTED,		"User '%s' already has %s access on %s '%s'") \
diff --git a/src/box/tuple.c b/src/box/tuple.c
index 014f374..60280e4 100644
--- a/src/box/tuple.c
+++ b/src/box/tuple.c
@@ -48,6 +48,16 @@ enum {
 	OBJSIZE_MIN = 16,
 };
 
+/**
+* @brief Container for bigrefs.
+*/
+struct tuple_bigrefs
+{
+	uint32_t *refs;
+	uint16_t size;
+	uint16_t capacity;
+};
+
 static const double ALLOC_FACTOR = 1.05;
 
 /**
@@ -58,6 +68,12 @@ struct tuple *box_tuple_last;
 
 struct tuple_format *tuple_format_runtime;
 
+/**
+* All bigrefs of tuples.
+* \sa tuple_ref_slow()
+*/
+static struct tuple_bigrefs bigrefs;
+
 static void
 runtime_tuple_delete(struct tuple_format *format, struct tuple *tuple);
 
@@ -211,6 +227,10 @@ tuple_init(field_name_hash_f hash)
 
 	box_tuple_last = NULL;
 
+	bigrefs.size = 0;
+	bigrefs.refs = NULL;
+	bigrefs.capacity = 0;
+
 	if (coll_id_cache_init() != 0)
 		return -1;
 
@@ -244,6 +264,152 @@ tuple_arena_create(struct slab_arena *arena, struct quota *quota,
 	}
 }
 
+enum {
+	TUPLE_BIGREF_FACTOR = 2,
+	TUPLE_BIGREF_MAX = UINT32_MAX,
+	TUPLE_BIGREF_ERROR = UINT16_MAX,
+	TUPLE_BIGREF_FLAG = UINT16_MAX ^ (UINT16_MAX >> 1),
+	TUPLE_BIGREF_MIN_CAPACITY = 16,
+	TUPLE_BIGREF_MAX_CAPACITY = UINT16_MAX >> 1
+};
+
+/**
+ * Free memory that was allocated for big references.
+ */
+static inline void
+tuple_bigrefs_free(void)
+{
+	free(bigrefs.refs);
+	bigrefs.size = 0;
+	bigrefs.refs = NULL;
+	bigrefs.capacity = 0;
+}
+
+/**
+ * Return index for new big reference counter and allocate memory if needed.
+ * @retval index for new big reference counter.
+ */
+static inline uint16_t
+tuple_bigref_retrieve(void)
+{
+	if (bigrefs.size < bigrefs.capacity) {
+		for (uint16_t i = 0; i < bigrefs.capacity; ++i) {
+			if (bigrefs.refs[i] == 0) {
+				++bigrefs.size;
+				return i;
+			}
+		}
+		unreachable();
+	}
+	/* Extend the array ... */
+	uint16_t capacity = bigrefs.capacity;
+	if(bigrefs.refs == NULL) {
+		assert(bigrefs.size == 0 && bigrefs.capacity == 0);
+		capacity = TUPLE_BIGREF_MIN_CAPACITY;
+		bigrefs.refs = (uint32_t *) malloc(capacity *
+						   sizeof(*bigrefs.refs));
+		if(bigrefs.refs == NULL) {
+			diag_set(OutOfMemory, capacity * sizeof(*bigrefs.refs),
+				 "malloc", "bigrefs.refs");
+			return TUPLE_BIGREF_ERROR;
+		}
+		bigrefs.capacity = capacity;
+		return bigrefs.size++;
+	}
+	if(capacity >= TUPLE_BIGREF_MAX_CAPACITY) {
+		tuple_bigrefs_free();
+		return TUPLE_BIGREF_ERROR;
+	}
+	if(capacity > TUPLE_BIGREF_MAX_CAPACITY / TUPLE_BIGREF_FACTOR)
+		capacity = TUPLE_BIGREF_MAX_CAPACITY;
+	else
+		capacity *= TUPLE_BIGREF_FACTOR;
+	uint32_t *refs = (uint32_t *) realloc(bigrefs.refs, capacity *
+					      sizeof(*bigrefs.refs));
+	if (refs == NULL) {
+		tuple_bigrefs_free();
+		diag_set(OutOfMemory, capacity * sizeof(*bigrefs.refs),
+			 "realloc", "bigrefs.refs");
+		return TUPLE_BIGREF_ERROR;
+	}
+	bigrefs.refs = refs;
+	bigrefs.capacity = capacity;
+	return bigrefs.size++;
+}
+
+int
+tuple_ref_slow(struct tuple *tuple)
+{
+	if(tuple->refs == TUPLE_REF_MAX) {
+		uint16_t index = tuple_bigref_retrieve();
+		if(index > TUPLE_BIGREF_MAX_CAPACITY) {
+			panic("Tuple reference counter overflow");
+			return -1;
+		}
+		tuple->refs = TUPLE_BIGREF_FLAG | index;
+		bigrefs.refs[index] = TUPLE_REF_MAX;
+	}
+	uint16_t index = tuple->refs ^ TUPLE_BIGREF_FLAG;
+	if (bigrefs.refs[index] > TUPLE_BIGREF_MAX - 1) {
+		panic("Tuple reference counter overflow");
+		return -1;
+	}
+	bigrefs.refs[index]++;
+	return 0;
+}
+
+/**
+ * Try to decrease allocated memory if it is possible. Free memory when
+ * size == 0.
+ */
+static inline void
+tuple_bigrefs_release(void)
+{
+	if(bigrefs.size == 0) {
+		tuple_bigrefs_free();
+		return;
+	}
+	if(bigrefs.capacity == TUPLE_BIGREF_MIN_CAPACITY)
+		return;
+	uint16_t reserved_bigrefs = 0;
+	uint16_t capacity = bigrefs.capacity;
+	for(uint16_t i = 0; i < capacity; ++i) {
+		if(bigrefs.refs[i] != 0)
+			reserved_bigrefs = i + 1;
+	}
+	if(reserved_bigrefs >= capacity / TUPLE_BIGREF_FACTOR) {
+		return;
+	} else if(reserved_bigrefs < TUPLE_BIGREF_MIN_CAPACITY) {
+		capacity = TUPLE_BIGREF_MIN_CAPACITY;
+	} else {
+		while(reserved_bigrefs < capacity / TUPLE_BIGREF_FACTOR)
+			capacity /= TUPLE_BIGREF_FACTOR;
+	}
+	uint32_t *refs = (uint32_t *) realloc(bigrefs.refs, capacity *
+					      sizeof(*bigrefs.refs));
+	if(refs == NULL) {
+		tuple_bigrefs_free();
+		diag_set(OutOfMemory, capacity, "realloc", "bigrefs.refs");
+		return;
+	}
+	bigrefs.refs = refs;
+	bigrefs.capacity = capacity;
+}
+
+void
+tuple_unref_slow(struct tuple *tuple)
+{
+	uint16_t index = tuple->refs ^ TUPLE_BIGREF_FLAG;
+	bigrefs.refs[index]--;
+	if(bigrefs.refs[index] <= TUPLE_REF_MAX) {
+		tuple->refs = bigrefs.refs[index];
+		bigrefs.refs[index] = 0;
+		bigrefs.size--;
+		if(bigrefs.size < bigrefs.capacity / TUPLE_BIGREF_FACTOR)
+			tuple_bigrefs_release();
+	}
+}
+
 void
 tuple_arena_destroy(struct slab_arena *arena)
 {
@@ -265,6 +431,8 @@ tuple_free(void)
 	tuple_format_free();
 
 	coll_id_cache_destroy();
+
+	tuple_bigrefs_free();
 }
 
 box_tuple_format_t *
diff --git a/src/box/tuple.h b/src/box/tuple.h
index e2384dd..dd5e11e 100644
--- a/src/box/tuple.h
+++ b/src/box/tuple.h
@@ -774,26 +774,40 @@ tuple_field_uuid(const struct tuple *tuple, int fieldno,
 	return 0;
 }
 
-enum { TUPLE_REF_MAX = UINT16_MAX };
+enum { TUPLE_REF_MAX = UINT16_MAX >> 1 };
+
+/**
+ * Increase tuple big reference counter.
+ * @param tuple Tuple to reference.
+ * @retval  0 Success.
+ * @retval -1 Error.
+ */
+int
+tuple_ref_slow(struct tuple *tuple);
 
 /**
  * Increment tuple reference counter.
  * @param tuple Tuple to reference.
  * @retval  0 Success.
- * @retval -1 Too many refs error.
+ * @retval -1 Error.
  */
 static inline int
 tuple_ref(struct tuple *tuple)
 {
-	if (tuple->refs + 1 > TUPLE_REF_MAX) {
-		diag_set(ClientError, ER_TUPLE_REF_OVERFLOW);
-		return -1;
-	}
+	if (tuple->refs >= TUPLE_REF_MAX)
+		return tuple_ref_slow(tuple);
 	tuple->refs++;
 	return 0;
 }
 
 /**
+ * Decrease tuple big reference counter.
+ * @param tuple Tuple to reference.
+ */
+void
+tuple_unref_slow(struct tuple *tuple);
+
+/**
  * Decrement tuple reference counter. If it has reached zero, free the tuple.
  *
  * @pre tuple->refs + count >= 0
@@ -802,6 +816,10 @@ static inline void
 tuple_unref(struct tuple *tuple)
 {
 	assert(tuple->refs - 1 >= 0);
+	if (tuple->refs > TUPLE_REF_MAX) {
+		tuple_unref_slow(tuple);
+		return;
+	}
 
 	tuple->refs--;
 
@@ -823,12 +841,8 @@ static inline box_tuple_t *
 tuple_bless(struct tuple *tuple)
 {
 	assert(tuple != NULL);
-	/* Ensure tuple can be referenced at least once after return */
-	if (tuple->refs + 2 > TUPLE_REF_MAX) {
-		diag_set(ClientError, ER_TUPLE_REF_OVERFLOW);
+	if(tuple_ref(tuple) < 0)
 		return NULL;
-	}
-	tuple->refs++;
 	/* Remove previous tuple */
 	if (likely(box_tuple_last != NULL))
 		tuple_unref(box_tuple_last); /* do not throw */
diff --git a/test/box/select.result b/test/box/select.result
index 4aed706..3d3a0d3 100644
--- a/test/box/select.result
+++ b/test/box/select.result
@@ -619,6 +619,7 @@ collectgarbage('collect')
 ---
 - 0
 ...
+-- gh-3224 resurrect tuple bigrefs
 s = box.schema.space.create('select', { temporary = true })
 ---
 ...
@@ -634,13 +635,19 @@ lots_of_links = {}
 ref_count = 0
 ---
 ...
-while (true) do table.insert(lots_of_links, s:get{0}) ref_count = ref_count + 1 end
+while (ref_count < 100000) do table.insert(lots_of_links, s:get{0}) ref_count = ref_count + 1 end
 ---
-- error: Tuple reference counter overflow
 ...
 ref_count
 ---
-- 65531
+- 100000
+...
+for k, v in pairs(lots_of_links) do ref_count = ref_count - 1 end
+---
+...
+ref_count
+---
+- 0
 ...
 lots_of_links = {}
 ---
@@ -648,3 +655,22 @@ lots_of_links = {}
 s:drop()
 ---
 ...
+-- Check that tuple deleted when ref counter = 0
+weak_tuple = setmetatable({}, {__mode = 'v'})
+---
+...
+table.insert(weak_tuple, box.tuple.new(1))
+---
+...
+weak_tuple
+---
+- - [1]
+...
+collectgarbage('collect')
+---
+- 0
+...
+weak_tuple
+---
+- []
+...
diff --git a/test/box/select.test.lua b/test/box/select.test.lua
index 54c2ecc..910d4f7 100644
--- a/test/box/select.test.lua
+++ b/test/box/select.test.lua
@@ -124,12 +124,25 @@ test.random(s.index[0], 48)
 s:drop()
 
 collectgarbage('collect')
+
+-- gh-3224 resurrect tuple bigrefs
+
 s = box.schema.space.create('select', { temporary = true })
 index = s:create_index('primary', { type = 'tree' })
 a = s:insert{0}
 lots_of_links = {}
 ref_count = 0
-while (true) do table.insert(lots_of_links, s:get{0}) ref_count = ref_count + 1 end
+while (ref_count < 100000) do table.insert(lots_of_links, s:get{0}) ref_count = ref_count + 1 end
+ref_count
+for k, v in pairs(lots_of_links) do ref_count = ref_count - 1 end
 ref_count
 lots_of_links = {}
 s:drop()
+
+-- Check that tuple deleted when ref counter = 0
+
+weak_tuple = setmetatable({}, {__mode = 'v'})
+table.insert(weak_tuple, box.tuple.new(1))
+weak_tuple
+collectgarbage('collect')
+weak_tuple
-- 
2.7.4





More information about the Tarantool-patches mailing list