From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: From: Vladimir Davydov Subject: [PATCH 05/13] vinyl: store tuple comparison hints in range tree Date: Tue, 2 Apr 2019 20:33:42 +0300 Message-Id: <82724da4f25e781a3b344c6565a23235204294e3.1554225074.git.vdavydov.dev@gmail.com> In-Reply-To: References: In-Reply-To: References: To: kostja.osipov@gmail.com Cc: tarantool-patches@freelists.org List-ID: This patch incorporates tuple comparison hints into the vy_range_tree. Now, vy_range->begin and ->end are augmented with hints which are used for searching a range in an LSM tree. Although this isn't strictly mandatory for multikey support, since we only use key statements for range boundaries, which can't store multikey fields, we still need to be able to look up a hinted tuple in a tree, so augmenting range boundaries with hints makes the code look more consistent. Besides, it should speed up LSM tree lookups a little. --- src/box/vy_lsm.c | 44 +++++++++++++++++++++++++++++++------------- src/box/vy_point_lookup.c | 7 ++++--- src/box/vy_range.c | 32 ++++++++++++++++++++------------ src/box/vy_range.h | 31 +++++++++++++++++++++---------- src/box/vy_read_iterator.c | 6 +++++- test/unit/vy_point_lookup.c | 3 ++- 6 files changed, 83 insertions(+), 40 deletions(-) diff --git a/src/box/vy_lsm.c b/src/box/vy_lsm.c index 1e08c0e1..44973a3a 100644 --- a/src/box/vy_lsm.c +++ b/src/box/vy_lsm.c @@ -325,8 +325,8 @@ vy_lsm_create(struct vy_lsm *lsm) int64_t id = vy_log_next_id(); /* Create the initial range. */ - struct vy_range *range = vy_range_new(vy_log_next_id(), NULL, NULL, - lsm->cmp_def); + struct vy_range *range = vy_range_new(vy_log_next_id(), NULL, HINT_NONE, + NULL, HINT_NONE, lsm->cmp_def); if (range == NULL) return -1; assert(lsm->range_count == 0); @@ -443,6 +443,7 @@ vy_lsm_recover_range(struct vy_lsm *lsm, struct vy_run_env *run_env, bool force_recovery) { struct tuple *begin = NULL, *end = NULL; + hint_t begin_hint = HINT_NONE, end_hint = HINT_NONE; struct vy_range *range = NULL; if (range_info->begin != NULL) { @@ -450,22 +451,26 @@ vy_lsm_recover_range(struct vy_lsm *lsm, range_info->begin); if (begin == NULL) goto out; + begin_hint = vy_stmt_hint(begin, lsm->cmp_def); } if (range_info->end != NULL) { end = vy_key_from_msgpack(lsm->env->key_format, range_info->end); if (end == NULL) goto out; + end_hint = vy_stmt_hint(end, lsm->cmp_def); } if (begin != NULL && end != NULL && - vy_stmt_compare(begin, end, lsm->cmp_def) >= 0) { + vy_stmt_compare_hinted(begin, begin_hint, end, end_hint, + lsm->cmp_def) >= 0) { diag_set(ClientError, ER_INVALID_VYLOG_FILE, tt_sprintf("begin >= end for range %lld", (long long)range_info->id)); goto out; } - range = vy_range_new(range_info->id, begin, end, lsm->cmp_def); + range = vy_range_new(range_info->id, begin, begin_hint, + end, end_hint, lsm->cmp_def); if (range == NULL) goto out; @@ -584,8 +589,9 @@ vy_lsm_recover(struct vy_lsm *lsm, struct vy_recovery *recovery, * We need range tree initialized for all LSM trees, * even for dropped ones. */ - struct vy_range *range = vy_range_new(vy_log_next_id(), - NULL, NULL, lsm->cmp_def); + struct vy_range *range; + range = vy_range_new(vy_log_next_id(), NULL, HINT_NONE, + NULL, HINT_NONE, lsm->cmp_def); if (range == NULL) return -1; vy_lsm_add_range(lsm, range); @@ -639,8 +645,9 @@ vy_lsm_recover(struct vy_lsm *lsm, struct vy_recovery *recovery, int cmp = 0; if (prev != NULL && (prev->end == NULL || range->begin == NULL || - (cmp = vy_stmt_compare(prev->end, range->begin, - lsm->cmp_def)) != 0)) { + (cmp = vy_stmt_compare_hinted(prev->end, prev->end_hint, + range->begin, range->begin_hint, + lsm->cmp_def)) != 0)) { const char *errmsg = cmp > 0 ? "Nearby ranges %lld and %lld overlap" : "Keys between ranges %lld and %lld not spanned"; @@ -1071,18 +1078,23 @@ vy_lsm_find_range_intersection(struct vy_lsm *lsm, struct vy_range **begin, struct vy_range **end) { struct tuple_format *key_format = lsm->env->key_format; + struct vy_range_tree_key tree_key; struct tuple *stmt; stmt = vy_key_from_msgpack(key_format, min_key); if (stmt == NULL) return -1; - *begin = vy_range_tree_psearch(&lsm->range_tree, stmt); + tree_key.stmt = stmt; + tree_key.hint = vy_stmt_hint(stmt, lsm->cmp_def); + *begin = vy_range_tree_psearch(&lsm->range_tree, &tree_key); tuple_unref(stmt); stmt = vy_key_from_msgpack(key_format, max_key); if (stmt == NULL) return -1; - *end = vy_range_tree_psearch(&lsm->range_tree, stmt); + tree_key.stmt = stmt; + tree_key.hint = vy_stmt_hint(stmt, lsm->cmp_def); + *end = vy_range_tree_psearch(&lsm->range_tree, &tree_key); *end = vy_range_tree_next(&lsm->range_tree, *end); tuple_unref(stmt); @@ -1115,6 +1127,11 @@ vy_lsm_split_range(struct vy_lsm *lsm, struct vy_range *range) keys[1] = split_key; keys[2] = range->end; + hint_t hints[3]; + hints[0] = range->begin_hint; + hints[1] = vy_stmt_hint(split_key, lsm->cmp_def); + hints[2] = range->end_hint; + /* * Allocate new ranges and create slices of * the old range's runs for them. @@ -1122,8 +1139,8 @@ vy_lsm_split_range(struct vy_lsm *lsm, struct vy_range *range) struct vy_slice *slice, *new_slice; struct vy_range *part, *parts[2] = {NULL, }; for (int i = 0; i < n_parts; i++) { - part = vy_range_new(vy_log_next_id(), keys[i], keys[i + 1], - lsm->cmp_def); + part = vy_range_new(vy_log_next_id(), keys[i], hints[i], + keys[i + 1], hints[i + 1], lsm->cmp_def); if (part == NULL) goto fail; parts[i] = part; @@ -1209,7 +1226,8 @@ vy_lsm_coalesce_range(struct vy_lsm *lsm, struct vy_range *range) return false; struct vy_range *result = vy_range_new(vy_log_next_id(), - first->begin, last->end, lsm->cmp_def); + first->begin, first->begin_hint, + last->end, last->end_hint, lsm->cmp_def); if (result == NULL) goto fail_range; diff --git a/src/box/vy_point_lookup.c b/src/box/vy_point_lookup.c index 5f1be274..aa0625fb 100644 --- a/src/box/vy_point_lookup.c +++ b/src/box/vy_point_lookup.c @@ -161,10 +161,11 @@ vy_point_lookup_scan_slice(struct vy_lsm *lsm, struct vy_slice *slice, */ static int vy_point_lookup_scan_slices(struct vy_lsm *lsm, const struct vy_read_view **rv, - struct tuple *key, struct vy_history *history) + struct tuple *key, hint_t hint, + struct vy_history *history) { struct vy_range *range = vy_range_tree_find_by_key(&lsm->range_tree, - ITER_EQ, key); + ITER_EQ, key, hint); assert(range != NULL); int slice_count = range->slice_count; struct vy_slice **slices = (struct vy_slice **) @@ -231,7 +232,7 @@ restart: uint32_t mem_version = lsm->mem->version; uint32_t mem_list_version = lsm->mem_list_version; - rc = vy_point_lookup_scan_slices(lsm, rv, key, &disk_history); + rc = vy_point_lookup_scan_slices(lsm, rv, key, hint, &disk_history); if (rc != 0) goto done; diff --git a/src/box/vy_range.c b/src/box/vy_range.c index f76615c4..edeb8a5f 100644 --- a/src/box/vy_range.c +++ b/src/box/vy_range.c @@ -63,23 +63,25 @@ vy_range_tree_cmp(struct vy_range *range_a, struct vy_range *range_b) return 1; assert(range_a->cmp_def == range_b->cmp_def); - return vy_stmt_compare(range_a->begin, range_b->begin, - range_a->cmp_def); + return vy_stmt_compare_hinted(range_a->begin, range_a->begin_hint, + range_b->begin, range_b->begin_hint, + range_a->cmp_def); } int -vy_range_tree_key_cmp(const struct tuple *stmt, struct vy_range *range) +vy_range_tree_key_cmp(struct vy_range_tree_key *key, struct vy_range *range) { /* Any key > -inf. */ if (range->begin == NULL) return 1; - return vy_stmt_compare(stmt, range->begin, range->cmp_def); + return vy_stmt_compare_hinted(key->stmt, key->hint, range->begin, + range->begin_hint, range->cmp_def); } struct vy_range * vy_range_tree_find_by_key(vy_range_tree_t *tree, enum iterator_type iterator_type, - const struct tuple *key) + const struct tuple *key, hint_t hint) { if (vy_stmt_is_empty_key(key)) { switch (iterator_type) { @@ -97,6 +99,7 @@ vy_range_tree_find_by_key(vy_range_tree_t *tree, } } struct vy_range *range; + struct vy_range_tree_key tree_key = { key, hint }; if (iterator_type == ITER_GE || iterator_type == ITER_GT || iterator_type == ITER_EQ) { /** @@ -121,11 +124,13 @@ vy_range_tree_find_by_key(vy_range_tree_t *tree, * vy_range_tree_psearch finds least range with begin == key * or previous if equal was not found */ - range = vy_range_tree_psearch(tree, key); + range = vy_range_tree_psearch(tree, &tree_key); /* switch to previous for case (4) */ if (range != NULL && range->begin != NULL && !vy_stmt_is_full_key(key, range->cmp_def) && - vy_stmt_compare(key, range->begin, range->cmp_def) == 0) + vy_stmt_compare_hinted(key, hint, range->begin, + range->begin_hint, + range->cmp_def) == 0) range = vy_range_tree_prev(tree, range); /* for case 5 or subcase of case 4 */ if (range == NULL) @@ -155,12 +160,13 @@ vy_range_tree_find_by_key(vy_range_tree_t *tree, * vy_range_tree_nsearch finds most range with begin == key * or next if equal was not found */ - range = vy_range_tree_nsearch(tree, key); + range = vy_range_tree_nsearch(tree, &tree_key); if (range != NULL) { /* fix curr_range for cases 2 and 3 */ if (range->begin != NULL && - vy_stmt_compare(key, range->begin, - range->cmp_def) != 0) { + vy_stmt_compare_hinted(key, hint, range->begin, + range->begin_hint, + range->cmp_def) != 0) { struct vy_range *prev; prev = vy_range_tree_prev(tree, range); if (prev != NULL) @@ -175,8 +181,8 @@ vy_range_tree_find_by_key(vy_range_tree_t *tree, } struct vy_range * -vy_range_new(int64_t id, struct tuple *begin, struct tuple *end, - struct key_def *cmp_def) +vy_range_new(int64_t id, struct tuple *begin, hint_t begin_hint, + struct tuple *end, hint_t end_hint, struct key_def *cmp_def) { struct vy_range *range = calloc(1, sizeof(*range)); if (range == NULL) { @@ -193,6 +199,8 @@ vy_range_new(int64_t id, struct tuple *begin, struct tuple *end, tuple_ref(end); range->end = end; } + range->begin_hint = begin_hint; + range->end_hint = end_hint; range->cmp_def = cmp_def; rlist_create(&range->slices); heap_node_create(&range->heap_node); diff --git a/src/box/vy_range.h b/src/box/vy_range.h index 91f2682c..7b5988bf 100644 --- a/src/box/vy_range.h +++ b/src/box/vy_range.h @@ -39,6 +39,7 @@ #include #include "iterator_type.h" +#include "key_def.h" #define HEAP_FORWARD_DECLARATION #include "salad/heap.h" #include "trivia/util.h" @@ -49,7 +50,6 @@ extern "C" { #endif /* defined(__cplusplus) */ struct index_opts; -struct key_def; struct tuple; struct vy_slice; @@ -65,8 +65,12 @@ struct vy_range { * the full idexed key. */ struct tuple *begin; + /** Comparison hint of the range lower bound. */ + hint_t begin_hint; /** Range upper bound. NULL if range is rightmost. */ struct tuple *end; + /** Comparison hint of the range upper bound. */ + hint_t end_hint; /** Key definition for comparing range boundaries. * Contains secondary and primary key parts for secondary * keys, to ensure an always distinct result for @@ -164,15 +168,19 @@ vy_range_is_scheduled(struct vy_range *range) * vy_range->begin. Ranges in a tree are supposed to span * all possible keys without overlaps. */ +struct vy_range_tree_key { + const struct tuple *stmt; + hint_t hint; +}; int vy_range_tree_cmp(struct vy_range *range_a, struct vy_range *range_b); int -vy_range_tree_key_cmp(const struct tuple *stmt, struct vy_range *range); +vy_range_tree_key_cmp(struct vy_range_tree_key *key, struct vy_range *range); typedef rb_tree(struct vy_range) vy_range_tree_t; rb_gen_ext_key(MAYBE_UNUSED static inline, vy_range_tree_, vy_range_tree_t, struct vy_range, tree_node, vy_range_tree_cmp, - const struct tuple *, vy_range_tree_key_cmp); + struct vy_range_tree_key *, vy_range_tree_key_cmp); /** * Find the first range in which a given key should be looked up. @@ -180,29 +188,32 @@ rb_gen_ext_key(MAYBE_UNUSED static inline, vy_range_tree_, vy_range_tree_t, * @param tree Range tree to search. * @param iterator_type Iterator type. * @param key Key to look up. + * @param hint Comparison hint of @key. * * @retval The first range to look up the key in. */ struct vy_range * vy_range_tree_find_by_key(vy_range_tree_t *tree, enum iterator_type iterator_type, - const struct tuple *key); + const struct tuple *key, hint_t hint); /** * Allocate and initialize a range (either a new one or for * restore from disk). * - * @param id Range id. - * @param begin Range begin (inclusive) or NULL for -inf. - * @param end Range end (exclusive) or NULL for +inf. - * @param cmp_def Key definition for comparing range boundaries. + * @param id Range id. + * @param begin Range begin (inclusive) or NULL for -inf. + * @param begin_hint Comparison hint of @begin. + * @param end Range end (exclusive) or NULL for +inf. + * @param end_hint Comparison hint of @end. + * @param cmp_def Key definition for comparing range boundaries. * * @retval not NULL The new range. * @retval NULL Out of memory. */ struct vy_range * -vy_range_new(int64_t id, struct tuple *begin, struct tuple *end, - struct key_def *cmp_def); +vy_range_new(int64_t id, struct tuple *begin, hint_t begin_hint, + struct tuple *end, hint_t end_hint, struct key_def *cmp_def); /** * Free a range and all its slices. diff --git a/src/box/vy_read_iterator.c b/src/box/vy_read_iterator.c index b5012748..f1d8c8e4 100644 --- a/src/box/vy_read_iterator.c +++ b/src/box/vy_read_iterator.c @@ -730,10 +730,14 @@ vy_read_iterator_restore(struct vy_read_iterator *itr) { vy_read_iterator_cleanup(itr); + const struct tuple *key = itr->last_stmt != NULL ? + itr->last_stmt : itr->key; + hint_t hint = vy_stmt_hint(key, itr->lsm->cmp_def); + itr->mem_list_version = itr->lsm->mem_list_version; itr->range_tree_version = itr->lsm->range_tree_version; itr->curr_range = vy_range_tree_find_by_key(&itr->lsm->range_tree, - itr->iterator_type, itr->last_stmt ?: itr->key); + itr->iterator_type, key, hint); itr->range_version = itr->curr_range->version; if (itr->tx != NULL) { diff --git a/test/unit/vy_point_lookup.c b/test/unit/vy_point_lookup.c index f3cd84d4..8fb1a8d1 100644 --- a/test/unit/vy_point_lookup.c +++ b/test/unit/vy_point_lookup.c @@ -98,7 +98,8 @@ test_basic() index_def, format, NULL, 0); isnt(pk, NULL, "lsm is not NULL") - struct vy_range *range = vy_range_new(1, NULL, NULL, pk->cmp_def); + struct vy_range *range = vy_range_new(1, NULL, HINT_NONE, + NULL, HINT_NONE, pk->cmp_def); isnt(pk, NULL, "range is not NULL") vy_lsm_add_range(pk, range); -- 2.11.0