From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from localhost (localhost [127.0.0.1]) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTP id 4058A28683 for ; Fri, 1 Jun 2018 11:41:49 -0400 (EDT) Received: from turing.freelists.org ([127.0.0.1]) by localhost (turing.freelists.org [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id B7n5P-Pei0oK for ; Fri, 1 Jun 2018 11:41:49 -0400 (EDT) Received: from smtp40.i.mail.ru (smtp40.i.mail.ru [94.100.177.100]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTPS id C45DF28681 for ; Fri, 1 Jun 2018 11:41:48 -0400 (EDT) Received: from [185.6.245.156] (port=5150 helo=mimeev-ThinkPad-T460p.mail.msk) by smtp40.i.mail.ru with esmtpa (envelope-from ) id 1fOmBW-0007cV-Ja for tarantool-patches@freelists.org; Fri, 01 Jun 2018 18:41:46 +0300 From: imeevma@tarantool.org Subject: [tarantool-patches] [PATCH v2 1/1] box: create bigrefs for tuples Date: Fri, 1 Jun 2018 18:41:46 +0300 Message-Id: <9c250cefbee4375f722235eaf234b734bf98b83d.1527867534.git.imeevma@gmail.com> Sender: tarantool-patches-bounce@freelists.org Errors-to: tarantool-patches-bounce@freelists.org Reply-To: tarantool-patches@freelists.org List-help: List-unsubscribe: List-software: Ecartis version 1.0.0 List-Id: tarantool-patches List-subscribe: List-owner: List-post: List-archive: To: tarantool-patches@freelists.org Due to limitation of reference counters for tuple being only 65535 it was possible to reach this limitation. This patch increases capacity of reference counters to 4 billions. Closes #3224 --- Branch: https://github.com/tarantool/tarantool/tree/imeevma/gh-3224-tuple-bigrefs Issue: https://github.com/tarantool/tarantool/issues/3224 src/box/errcode.h | 2 +- src/box/lua/tuple.c | 5 +- src/box/port.c | 8 +-- src/box/tuple.c | 164 +++++++++++++++++++++++++++++++++++++++++++++-- src/box/tuple.h | 63 ++++++++++-------- test/box/misc.result | 39 ++++++----- test/box/select.result | 30 +++++++-- test/box/select.test.lua | 15 ++++- 8 files changed, 257 insertions(+), 69 deletions(-) diff --git a/src/box/errcode.h b/src/box/errcode.h index a0759f8..e009524 100644 --- a/src/box/errcode.h +++ b/src/box/errcode.h @@ -138,7 +138,7 @@ struct errcode_record { /* 83 */_(ER_ROLE_EXISTS, "Role '%s' already exists") \ /* 84 */_(ER_CREATE_ROLE, "Failed to create role '%s': %s") \ /* 85 */_(ER_INDEX_EXISTS, "Index '%s' already exists") \ - /* 86 */_(ER_TUPLE_REF_OVERFLOW, "Tuple reference counter overflow") \ + /* 86 */_(ER_UNUSED6, "") \ /* 87 */_(ER_ROLE_LOOP, "Granting role '%s' to role '%s' would create a loop") \ /* 88 */_(ER_GRANT, "Incorrect grant arguments: %s") \ /* 89 */_(ER_PRIV_GRANTED, "User '%s' already has %s access on %s '%s'") \ diff --git a/src/box/lua/tuple.c b/src/box/lua/tuple.c index 8057331..22fe696 100644 --- a/src/box/lua/tuple.c +++ b/src/box/lua/tuple.c @@ -496,10 +496,7 @@ luaT_pushtuple(struct lua_State *L, box_tuple_t *tuple) luaL_pushcdata(L, CTID_CONST_STRUCT_TUPLE_REF); *ptr = tuple; /* The order is important - first reference tuple, next set gc */ - if (box_tuple_ref(tuple) != 0) { - luaT_error(L); - return; - } + box_tuple_ref(tuple); lua_pushcfunction(L, lbox_tuple_gc); luaL_setcdatagc(L, -2); } diff --git a/src/box/port.c b/src/box/port.c index 03f6be7..9b6b858 100644 --- a/src/box/port.c +++ b/src/box/port.c @@ -45,8 +45,7 @@ port_tuple_add(struct port *base, struct tuple *tuple) struct port_tuple *port = port_tuple(base); struct port_tuple_entry *e; if (port->size == 0) { - if (tuple_ref(tuple) != 0) - return -1; + tuple_ref(tuple); e = &port->first_entry; port->first = port->last = e; } else { @@ -55,10 +54,7 @@ port_tuple_add(struct port *base, struct tuple *tuple) diag_set(OutOfMemory, sizeof(*e), "mempool_alloc", "e"); return -1; } - if (tuple_ref(tuple) != 0) { - mempool_free(&port_tuple_entry_pool, e); - return -1; - } + tuple_ref(tuple); port->last->next = e; port->last = e; } diff --git a/src/box/tuple.c b/src/box/tuple.c index 014f374..f59b27e 100644 --- a/src/box/tuple.c +++ b/src/box/tuple.c @@ -48,6 +48,25 @@ enum { OBJSIZE_MIN = 16, }; +/** + * Container for big reference counters. Contains array of big reference + * counters, size of this array and number of non-zero big reference counters. + * When reference counter of tuple becomes more than 32767, field refs of this + * tuple becomes index of big reference counter in big reference counter array + * and field is_bigref is set true. The moment big reference becomes equal + * 32767 it is set to 0, refs of the tuple becomes 32767 and is_bigref becomes + * false. Big reference counter can be equal to 0 or be more than 32767. + */ +struct tuple_bigrefs +{ + /** Array of big reference counters */ + uint32_t *refs; + /** Number of non-zero elements in the array */ + uint16_t size; + /** Capacity of the array */ + uint16_t capacity; +}; + static const double ALLOC_FACTOR = 1.05; /** @@ -58,6 +77,12 @@ struct tuple *box_tuple_last; struct tuple_format *tuple_format_runtime; +/** +* All bigrefs of tuples. +* \sa tuple_ref_slow() +*/ +static struct tuple_bigrefs bigrefs; + static void runtime_tuple_delete(struct tuple_format *format, struct tuple *tuple); @@ -152,6 +177,17 @@ tuple_validate_raw(struct tuple_format *format, const char *tuple) } /** + * Initialize big references container. + */ +static inline void +tuple_bigrefs_create(void) +{ + bigrefs.size = 0; + bigrefs.refs = NULL; + bigrefs.capacity = 0; +} + +/** * Incremented on every snapshot and is used to distinguish tuples * which were created after start of a snapshot (these tuples can * be freed right away, since they are not used for snapshot) or @@ -211,6 +247,8 @@ tuple_init(field_name_hash_f hash) box_tuple_last = NULL; + tuple_bigrefs_create(); + if (coll_id_cache_init() != 0) return -1; @@ -244,6 +282,122 @@ tuple_arena_create(struct slab_arena *arena, struct quota *quota, } } +enum { + TUPLE_BIGREF_FACTOR = 2, + TUPLE_BIGREF_MAX = UINT32_MAX, + TUPLE_BIGREF_MIN_CAPACITY = 16, + TUPLE_BIGREF_MAX_CAPACITY = UINT16_MAX >> 1 +}; + +/** + * Destroy big references and free memory that was allocated. + */ +static inline void +tuple_bigrefs_destroy(void) +{ + free(bigrefs.refs); + bigrefs.size = 0; + bigrefs.refs = NULL; + bigrefs.capacity = 0; +} + +/** + * Return index for new big reference counter and allocate memory if needed. + * @retval index for new big reference counter. + */ +static inline uint16_t +tuple_bigref_retrieve(void) +{ + if (bigrefs.size < bigrefs.capacity) { + uint16_t vacant_index = 0; + while(bigrefs.refs[vacant_index] != 0) + ++vacant_index; + return vacant_index; + } + /* Extend the array ... */ + uint16_t capacity = bigrefs.capacity; + if (capacity == 0) + capacity = TUPLE_BIGREF_MIN_CAPACITY; + else + capacity *= TUPLE_BIGREF_FACTOR; + if (capacity >= TUPLE_BIGREF_MAX_CAPACITY) + panic("Too many big references"); + uint32_t *refs = (uint32_t *) realloc(bigrefs.refs, capacity * + sizeof(*bigrefs.refs)); + if (refs == NULL) + panic("failed to reallocate %zu bytes: Cannot allocate"\ + " memory.", capacity * sizeof(*bigrefs.refs)); + bigrefs.refs = refs; + bigrefs.capacity = capacity; + return bigrefs.size++; +} + +void +tuple_ref_slow(struct tuple *tuple) +{ + assert(tuple->is_bigref == true || tuple->refs == TUPLE_REF_MAX); + if (tuple->is_bigref == false) { + tuple->ref_index = tuple_bigref_retrieve(); + tuple->is_bigref = true; + bigrefs.refs[tuple->ref_index] = TUPLE_REF_MAX; + } + if (bigrefs.refs[tuple->ref_index] == TUPLE_BIGREF_MAX) + panic("Tuple big reference counter overflow"); + bigrefs.refs[tuple->ref_index]++; +} + +/** + * Try to decrease allocated memory if it is possible. Free memory when + * size == 0. + */ +static inline void +tuple_bigrefs_compact(void) +{ + if (bigrefs.size == 0) { + tuple_bigrefs_destroy(); + return; + } + if (bigrefs.capacity == TUPLE_BIGREF_MIN_CAPACITY || + bigrefs.size > bigrefs.capacity / TUPLE_BIGREF_FACTOR) + return; + uint16_t size = bigrefs.size; + uint16_t reserved = 0; + uint16_t capacity = bigrefs.capacity; + while(size > 0 && reserved <= capacity / TUPLE_BIGREF_FACTOR) { + if (bigrefs.refs[reserved] != 0) + --size; + reserved++; + } + if (reserved < TUPLE_BIGREF_MIN_CAPACITY) + reserved = TUPLE_BIGREF_MIN_CAPACITY; + if (reserved > capacity / TUPLE_BIGREF_FACTOR) + return; + while(reserved <= capacity / TUPLE_BIGREF_FACTOR) + capacity /= TUPLE_BIGREF_MIN_CAPACITY; + uint32_t *refs = (uint32_t *) realloc(bigrefs.refs, capacity * + sizeof(*bigrefs.refs)); + if (refs == NULL) + panic("failed to reallocate %zu bytes: Cannot allocate"\ + " memory.", capacity * sizeof(*bigrefs.refs)); + bigrefs.refs = refs; + bigrefs.capacity = capacity; +} + +void +tuple_unref_slow(struct tuple *tuple) +{ + assert(tuple->is_bigref == true && + bigrefs.refs[tuple->ref_index] > TUPLE_REF_MAX); + bigrefs.refs[tuple->ref_index]--; + if(bigrefs.refs[tuple->ref_index] == TUPLE_REF_MAX) { + bigrefs.refs[tuple->ref_index] = 0; + bigrefs.size--; + tuple->ref_index = TUPLE_REF_MAX; + tuple->is_bigref = false; + tuple_bigrefs_compact(); + } +} + void tuple_arena_destroy(struct slab_arena *arena) { @@ -265,6 +419,8 @@ tuple_free(void) tuple_format_free(); coll_id_cache_destroy(); + + tuple_bigrefs_destroy(); } box_tuple_format_t * @@ -288,7 +444,8 @@ int box_tuple_ref(box_tuple_t *tuple) { assert(tuple != NULL); - return tuple_ref(tuple); + tuple_ref(tuple); + return 0; } void @@ -357,10 +514,7 @@ box_tuple_iterator(box_tuple_t *tuple) "mempool", "new slab"); return NULL; } - if (tuple_ref(tuple) != 0) { - mempool_free(&tuple_iterator_pool, it); - return NULL; - } + tuple_ref(tuple); tuple_rewind(it, tuple); return it; } diff --git a/src/box/tuple.h b/src/box/tuple.h index e2384dd..6921124 100644 --- a/src/box/tuple.h +++ b/src/box/tuple.h @@ -105,8 +105,7 @@ typedef struct tuple box_tuple_t; * tuple will leak. * * \param tuple a tuple - * \retval -1 on error (check box_error_last()) - * \retval 0 on success + * \retval 0 always * \sa box_tuple_unref() */ int @@ -307,8 +306,16 @@ box_tuple_upsert(const box_tuple_t *tuple, const char *expr, const */ struct PACKED tuple { - /** reference counter */ - uint16_t refs; + union { + /** reference counter */ + uint16_t refs; + struct { + /** index of big reference counter */ + uint16_t ref_index : 15; + /** big reference flag */ + bool is_bigref : 1; + }; + }; /** format identifier */ uint16_t format_id; /** @@ -774,26 +781,36 @@ tuple_field_uuid(const struct tuple *tuple, int fieldno, return 0; } -enum { TUPLE_REF_MAX = UINT16_MAX }; +enum { TUPLE_REF_MAX = UINT16_MAX >> 1 }; + +/** + * Increase tuple big reference counter. + * @param tuple Tuple to reference. + */ +void +tuple_ref_slow(struct tuple *tuple); /** * Increment tuple reference counter. * @param tuple Tuple to reference. - * @retval 0 Success. - * @retval -1 Too many refs error. */ -static inline int +static inline void tuple_ref(struct tuple *tuple) { - if (tuple->refs + 1 > TUPLE_REF_MAX) { - diag_set(ClientError, ER_TUPLE_REF_OVERFLOW); - return -1; - } - tuple->refs++; - return 0; + if (tuple->is_bigref || tuple->refs == TUPLE_REF_MAX) + tuple_ref_slow(tuple); + else + tuple->refs++; } /** + * Decrease tuple big reference counter. + * @param tuple Tuple to reference. + */ +void +tuple_unref_slow(struct tuple *tuple); + +/** * Decrement tuple reference counter. If it has reached zero, free the tuple. * * @pre tuple->refs + count >= 0 @@ -802,6 +819,10 @@ static inline void tuple_unref(struct tuple *tuple) { assert(tuple->refs - 1 >= 0); + if (tuple->is_bigref) { + tuple_unref_slow(tuple); + return; + } tuple->refs--; @@ -813,22 +834,15 @@ extern struct tuple *box_tuple_last; /** * Convert internal `struct tuple` to public `box_tuple_t`. - * \retval tuple on success - * \retval NULL on error, check diag + * \retval tuple * \post \a tuple ref counted until the next call. - * \post tuple_ref() doesn't fail at least once * \sa tuple_ref */ static inline box_tuple_t * tuple_bless(struct tuple *tuple) { assert(tuple != NULL); - /* Ensure tuple can be referenced at least once after return */ - if (tuple->refs + 2 > TUPLE_REF_MAX) { - diag_set(ClientError, ER_TUPLE_REF_OVERFLOW); - return NULL; - } - tuple->refs++; + tuple_ref(tuple); /* Remove previous tuple */ if (likely(box_tuple_last != NULL)) tuple_unref(box_tuple_last); /* do not throw */ @@ -856,8 +870,7 @@ tuple_to_buf(const struct tuple *tuple, char *buf, size_t size); static inline void tuple_ref_xc(struct tuple *tuple) { - if (tuple_ref(tuple)) - diag_raise(); + tuple_ref(tuple); } /** diff --git a/test/box/misc.result b/test/box/misc.result index 8f94f55..f7703ba 100644 --- a/test/box/misc.result +++ b/test/box/misc.result @@ -332,7 +332,6 @@ t; - 'box.error.UNKNOWN_UPDATE_OP : 28' - 'box.error.WRONG_COLLATION_OPTIONS : 151' - 'box.error.CURSOR_NO_TRANSACTION : 80' - - 'box.error.TUPLE_REF_OVERFLOW : 86' - 'box.error.ALTER_SEQUENCE : 143' - 'box.error.INVALID_XLOG_NAME : 75' - 'box.error.SAVEPOINT_EMPTY_TX : 60' @@ -360,7 +359,7 @@ t; - 'box.error.VINYL_MAX_TUPLE_SIZE : 139' - 'box.error.LOAD_FUNCTION : 99' - 'box.error.INVALID_XLOG : 74' - - 'box.error.PRIV_NOT_GRANTED : 91' + - 'box.error.READ_VIEW_ABORTED : 130' - 'box.error.TRANSACTION_CONFLICT : 97' - 'box.error.GUEST_USER_PASSWORD : 96' - 'box.error.PROC_C : 102' @@ -405,7 +404,7 @@ t; - 'box.error.injection : table:
- 'box.error.NULLABLE_MISMATCH : 153' - 'box.error.LAST_DROP : 15' - - 'box.error.NO_SUCH_ROLE : 82' + - 'box.error.TUPLE_FORMAT_LIMIT : 16' - 'box.error.DECOMPRESSION : 124' - 'box.error.CREATE_SEQUENCE : 142' - 'box.error.CREATE_USER : 43' @@ -414,66 +413,66 @@ t; - 'box.error.SEQUENCE_OVERFLOW : 147' - 'box.error.SYSTEM : 115' - 'box.error.KEY_PART_IS_TOO_LONG : 118' - - 'box.error.TUPLE_FORMAT_LIMIT : 16' - - 'box.error.BEFORE_REPLACE_RET : 53' + - 'box.error.INJECTION : 8' + - 'box.error.INVALID_MSGPACK : 20' - 'box.error.NO_SUCH_SAVEPOINT : 61' - 'box.error.TRUNCATE_SYSTEM_SPACE : 137' - 'box.error.VY_QUOTA_TIMEOUT : 135' - 'box.error.WRONG_INDEX_OPTIONS : 108' - 'box.error.INVALID_VYLOG_FILE : 133' - 'box.error.INDEX_FIELD_COUNT_LIMIT : 127' - - 'box.error.READ_VIEW_ABORTED : 130' + - 'box.error.PRIV_NOT_GRANTED : 91' - 'box.error.USER_MAX : 56' - - 'box.error.PROTOCOL : 104' + - 'box.error.BEFORE_REPLACE_RET : 53' - 'box.error.TUPLE_NOT_ARRAY : 22' - 'box.error.KEY_PART_COUNT : 31' - 'box.error.ALTER_SPACE : 12' - 'box.error.ACTIVE_TRANSACTION : 79' - 'box.error.EXACT_FIELD_COUNT : 38' - 'box.error.DROP_SEQUENCE : 144' - - 'box.error.INVALID_MSGPACK : 20' - 'box.error.MORE_THAN_ONE_TUPLE : 41' - - 'box.error.RTREE_RECT : 101' - - 'box.error.SUB_STMT_MAX : 121' + - 'box.error.UPSERT_UNIQUE_SECONDARY_KEY : 105' - 'box.error.UNKNOWN_REQUEST_TYPE : 48' - - 'box.error.SPACE_EXISTS : 10' + - 'box.error.SUB_STMT_MAX : 121' - 'box.error.PROC_LUA : 32' + - 'box.error.SPACE_EXISTS : 10' - 'box.error.ROLE_NOT_GRANTED : 92' + - 'box.error.UNSUPPORTED : 5' - 'box.error.NO_SUCH_SPACE : 36' - 'box.error.WRONG_INDEX_PARTS : 107' - - 'box.error.DROP_SPACE : 11' - 'box.error.MIN_FIELD_COUNT : 39' - 'box.error.REPLICASET_UUID_MISMATCH : 63' - 'box.error.UPDATE_FIELD : 29' + - 'box.error.INDEX_EXISTS : 85' - 'box.error.COMPRESSION : 119' - 'box.error.INVALID_ORDER : 68' - - 'box.error.INDEX_EXISTS : 85' - 'box.error.SPLICE : 25' - 'box.error.UNKNOWN : 0' + - 'box.error.IDENTIFIER : 70' - 'box.error.DROP_PRIMARY_KEY : 17' - 'box.error.NULLABLE_PRIMARY : 152' - 'box.error.NO_SUCH_SEQUENCE : 145' - 'box.error.RELOAD_CFG : 58' - 'box.error.INVALID_UUID : 64' - - 'box.error.INJECTION : 8' + - 'box.error.DROP_SPACE : 11' - 'box.error.TIMEOUT : 78' - - 'box.error.IDENTIFIER : 70' - 'box.error.ITERATOR_TYPE : 72' - 'box.error.REPLICA_MAX : 73' + - 'box.error.NO_SUCH_ROLE : 82' - 'box.error.MISSING_REQUEST_FIELD : 69' - 'box.error.MISSING_SNAPSHOT : 93' - 'box.error.WRONG_SPACE_OPTIONS : 111' - 'box.error.READONLY : 7' - - 'box.error.UNSUPPORTED : 5' - 'box.error.UPDATE_INTEGER_OVERFLOW : 95' + - 'box.error.RTREE_RECT : 101' - 'box.error.NO_CONNECTION : 77' - 'box.error.INVALID_XLOG_ORDER : 76' - - 'box.error.UPSERT_UNIQUE_SECONDARY_KEY : 105' - - 'box.error.ROLLBACK_IN_SUB_STMT : 123' - 'box.error.WRONG_SCHEMA_VERSION : 109' - - 'box.error.UNSUPPORTED_INDEX_FEATURE : 112' - - 'box.error.INDEX_PART_TYPE_MISMATCH : 24' + - 'box.error.ROLLBACK_IN_SUB_STMT : 123' + - 'box.error.PROTOCOL : 104' - 'box.error.INVALID_XLOG_TYPE : 125' + - 'box.error.INDEX_PART_TYPE_MISMATCH : 24' + - 'box.error.UNSUPPORTED_INDEX_FEATURE : 112' ... test_run:cmd("setopt delimiter ''"); --- diff --git a/test/box/select.result b/test/box/select.result index 4aed706..6e6ccad 100644 --- a/test/box/select.result +++ b/test/box/select.result @@ -619,6 +619,11 @@ collectgarbage('collect') --- - 0 ... +-- gh-3224 resurrect tuple bigrefs +collectgarbage('stop') +--- +- 0 +... s = box.schema.space.create('select', { temporary = true }) --- ... @@ -628,22 +633,37 @@ index = s:create_index('primary', { type = 'tree' }) a = s:insert{0} --- ... -lots_of_links = {} +lots_of_links = setmetatable({}, {__mode = 'v'}) +--- +... +ref_count = 0 +--- +... +while (ref_count < 100000) do table.insert(lots_of_links, s:get{0}) ref_count = ref_count + 1 end --- ... +ref_count +--- +- 100000 +... ref_count = 0 --- ... -while (true) do table.insert(lots_of_links, s:get{0}) ref_count = ref_count + 1 end +for k, v in pairs(lots_of_links) do ref_count = ref_count + 1 end --- -- error: Tuple reference counter overflow ... ref_count --- -- 65531 +- 100000 +... +-- check that tuples are deleted after gc is activated +collectgarbage('collect') +--- +- 0 ... -lots_of_links = {} +lots_of_links --- +- [] ... s:drop() --- diff --git a/test/box/select.test.lua b/test/box/select.test.lua index 54c2ecc..f15c6e2 100644 --- a/test/box/select.test.lua +++ b/test/box/select.test.lua @@ -124,12 +124,21 @@ test.random(s.index[0], 48) s:drop() collectgarbage('collect') + +-- gh-3224 resurrect tuple bigrefs + +collectgarbage('stop') s = box.schema.space.create('select', { temporary = true }) index = s:create_index('primary', { type = 'tree' }) a = s:insert{0} -lots_of_links = {} +lots_of_links = setmetatable({}, {__mode = 'v'}) ref_count = 0 -while (true) do table.insert(lots_of_links, s:get{0}) ref_count = ref_count + 1 end +while (ref_count < 100000) do table.insert(lots_of_links, s:get{0}) ref_count = ref_count + 1 end ref_count -lots_of_links = {} +ref_count = 0 +for k, v in pairs(lots_of_links) do ref_count = ref_count + 1 end +ref_count +-- check that tuples are deleted after gc is activated +collectgarbage('collect') +lots_of_links s:drop() -- 2.7.4