From: Vladimir Davydov <vdavydov.dev@gmail.com> To: kostja@tarantool.org Cc: tarantool-patches@freelists.org Subject: [PATCH 10/12] vinyl: refactor vy_read_iterator_next Date: Sun, 15 Apr 2018 22:55:23 +0300 [thread overview] Message-ID: <23d53017ad7228dc5b34463c9bfd6ceff1d5638e.1523820298.git.vdavydov.dev@gmail.com> (raw) In-Reply-To: <cover.1523820298.git.vdavydov.dev@gmail.com> In-Reply-To: <cover.1523820298.git.vdavydov.dev@gmail.com> The function looks rather messy now. Let's rearrange its code to make it look better and facilitate further development: - Move the debug checks making sure that the next key meets search criteria and doesn't violate the iteration order to the place where we look up the next key, i.e. to vy_read_iterator_next_key. - Move result tracking in tx read set from vy_read_iterator_next_key to vy_read_iterator_next so that the former now just advances the iterator to the next key and does nothing else. - Remove vy_read_iterator::search_started. We don't really need it as we can use (vy_read_iterator::last_stmt == NULL) condition instead. - Rearrange the code of vy_read_iterator_next to reduce indentation level. - Remove/rename some variables and labels. Add some comments. --- src/box/vy_read_iterator.c | 195 +++++++++++++++++++++------------------------ src/box/vy_read_iterator.h | 2 - 2 files changed, 90 insertions(+), 107 deletions(-) diff --git a/src/box/vy_read_iterator.c b/src/box/vy_read_iterator.c index 548cf234..0695eedb 100644 --- a/src/box/vy_read_iterator.c +++ b/src/box/vy_read_iterator.c @@ -443,9 +443,6 @@ vy_read_iterator_restore(struct vy_read_iterator *itr); static void vy_read_iterator_next_range(struct vy_read_iterator *itr); -static int -vy_read_iterator_track_read(struct vy_read_iterator *itr, struct tuple *stmt); - /** * Iterate to the next key * @retval 0 success or EOF (*ret == NULL) @@ -454,9 +451,6 @@ vy_read_iterator_track_read(struct vy_read_iterator *itr, struct tuple *stmt); static NODISCARD int vy_read_iterator_next_key(struct vy_read_iterator *itr, struct tuple **ret) { - uint32_t i; - bool stop = false; - if (itr->last_stmt != NULL && (itr->iterator_type == ITER_EQ || itr->iterator_type == ITER_REQ) && tuple_field_count(itr->key) >= itr->lsm->cmp_def->part_count) { @@ -469,9 +463,10 @@ vy_read_iterator_next_key(struct vy_read_iterator *itr, struct tuple **ret) } /* * Restore the iterator position if the LSM tree has changed - * since the last iteration. + * since the last iteration or this is the first iteration. */ - if (itr->mem_list_version != itr->lsm->mem_list_version || + if (itr->last_stmt == NULL || + itr->mem_list_version != itr->lsm->mem_list_version || itr->range_tree_version != itr->lsm->range_tree_version || itr->range_version != itr->curr_range->version) { vy_read_iterator_restore(itr); @@ -487,6 +482,7 @@ restart: * Look up the next key in read sources starting * from the one that stores newest data. */ + bool stop = false; vy_read_iterator_scan_txw(itr, &stop); if (stop) goto done; @@ -494,7 +490,7 @@ restart: if (stop) goto done; - for (i = itr->mem_src; i < itr->disk_src; i++) { + for (uint32_t i = itr->mem_src; i < itr->disk_src; i++) { if (vy_read_iterator_scan_mem(itr, i, &stop) != 0) return -1; if (stop) @@ -503,9 +499,11 @@ restart: rescan_disk: /* The following code may yield as it needs to access disk. */ vy_read_iterator_pin_slices(itr); - for (i = itr->disk_src; i < itr->src_count; i++) { - if (vy_read_iterator_scan_disk(itr, i, &stop) != 0) - goto err_disk; + for (uint32_t i = itr->disk_src; i < itr->src_count; i++) { + if (vy_read_iterator_scan_disk(itr, i, &stop) != 0) { + vy_read_iterator_unpin_slices(itr); + return -1; + } if (stop) break; } @@ -539,24 +537,34 @@ rescan_disk: goto rescan_disk; } done: - if (itr->last_stmt != NULL && itr->curr_stmt != NULL) +#ifndef NDEBUG + /* Check that the statement meets search criteria. */ + if (itr->curr_stmt != NULL) { + int cmp = vy_stmt_compare(itr->curr_stmt, itr->key, + itr->lsm->cmp_def); + cmp *= iterator_direction(itr->iterator_type); + if (itr->iterator_type == ITER_GT || + itr->iterator_type == ITER_LT) + assert(cmp > 0); + else + assert(cmp >= 0); + } + /* + * Ensure the read iterator does not return duplicates + * and respects statement order. + */ + if (itr->last_stmt != NULL && itr->curr_stmt != NULL) { assert(vy_read_iterator_cmp_stmt(itr, itr->curr_stmt, itr->last_stmt) > 0); - + } +#endif if (itr->need_check_eq && itr->curr_stmt != NULL && vy_stmt_compare(itr->curr_stmt, itr->key, itr->lsm->cmp_def) != 0) itr->curr_stmt = NULL; - if (vy_read_iterator_track_read(itr, itr->curr_stmt) != 0) - return -1; - *ret = itr->curr_stmt; return 0; - -err_disk: - vy_read_iterator_unpin_slices(itr); - return -1; } /** @@ -960,99 +968,70 @@ vy_read_iterator_next(struct vy_read_iterator *itr, struct tuple **result) { ev_tstamp start_time = ev_monotonic_now(loop()); - *result = NULL; - - if (!itr->search_started) { - itr->search_started = true; - itr->lsm->stat.lookup++; - vy_read_iterator_restore(itr); - } - - struct tuple *prev_key = itr->last_stmt; - if (prev_key != NULL) - tuple_ref(prev_key); - bool skipped_txw_delete = false; - - struct tuple *t = NULL; struct vy_lsm *lsm = itr->lsm; - int rc = 0; - while (true) { - rc = vy_read_iterator_next_key(itr, &t); - if (rc != 0) - goto clear; - if (t == NULL) { - if (itr->last_stmt != NULL) - tuple_unref(itr->last_stmt); - itr->last_stmt = NULL; - rc = 0; /* No more data. */ - break; - } - rc = vy_read_iterator_squash_upsert(itr, &t); - if (rc != 0) - goto clear; - if (itr->last_stmt != NULL) - tuple_unref(itr->last_stmt); - itr->last_stmt = t; - if (vy_stmt_type(t) == IPROTO_INSERT || - vy_stmt_type(t) == IPROTO_REPLACE) - break; - assert(vy_stmt_type(t) == IPROTO_DELETE); - if (vy_stmt_lsn(t) == INT64_MAX) /* t is from write set */ - skipped_txw_delete = true; - } + struct tuple *stmt, *prev_stmt; - *result = itr->last_stmt; - assert(*result == NULL || - vy_stmt_type(*result) == IPROTO_INSERT || - vy_stmt_type(*result) == IPROTO_REPLACE); - if (*result != NULL) - vy_stmt_counter_acct_tuple(&lsm->stat.get, *result); - -#ifndef NDEBUG - /* Check constraints. */ - int dir = iterator_direction(itr->iterator_type); /* - * Each result statement with iterator type GE/GT must - * be >= iterator key. And with LT/LE must - * be <= iterator_key. @sa gh-2614. + * Remember the statement returned by the last iteration. + * We will need it to update the cache. */ - if (itr->last_stmt != NULL && tuple_field_count(itr->key) > 0) { - int cmp = dir * vy_stmt_compare(*result, itr->key, - itr->lsm->cmp_def); - assert(cmp >= 0); - } + prev_stmt = itr->last_stmt; + if (prev_stmt != NULL) + tuple_ref(prev_stmt); + else /* first iteration */ + lsm->stat.lookup++; +next_key: + if (vy_read_iterator_next_key(itr, &stmt) != 0) + goto err; + /* - * Ensure the read iterator does not return duplicates - * and respects statements order (lsm->cmp_def includes - * primary parts, so prev_key != itr->last_stmt for any - * LSM tree). + * Fetching an older statement of the current key may yield + * so we must track the read before applying UPSERTs. */ - if (prev_key != NULL && itr->last_stmt != NULL) { - assert(dir * vy_tuple_compare(prev_key, itr->last_stmt, - lsm->cmp_def) < 0); + if (vy_read_iterator_track_read(itr, stmt) != 0) + goto err; + + if (stmt != NULL && + vy_read_iterator_squash_upsert(itr, &stmt) != 0) + goto err; + + if (itr->last_stmt != NULL) + tuple_unref(itr->last_stmt); + itr->last_stmt = stmt; + + if (stmt != NULL && vy_stmt_type(stmt) == IPROTO_DELETE) { + /* + * We don't return DELETEs so skip to the next key. + * If the DELETE was read from TX write set, there + * is a good chance that the space actually has + * the deleted key and hence we must not consider + * previous + current tuple as an unbroken chain. + */ + if (vy_stmt_lsn(stmt) == INT64_MAX) { + if (prev_stmt != NULL) + tuple_unref(prev_stmt); + prev_stmt = NULL; + } + goto next_key; } -#endif + assert(stmt == NULL || + vy_stmt_type(stmt) == IPROTO_INSERT || + vy_stmt_type(stmt) == IPROTO_REPLACE); - /** - * Add a statement to the cache + /* + * Store the result in the cache provided we are reading + * the latest data. */ - if ((**itr->read_view).vlsn == INT64_MAX) { /* Do not store non-latest data */ - struct tuple *cache_prev = prev_key; - if (skipped_txw_delete) { - /* - * If we skipped DELETE that was read from TX write - * set, there is a chance that the database actually - * has the deleted key and we must not consider - * previous+current tuple as an unbroken chain. - */ - cache_prev = NULL; - } - vy_cache_add(&itr->lsm->cache, *result, cache_prev, + if ((**itr->read_view).vlsn == INT64_MAX) { + vy_cache_add(&lsm->cache, stmt, prev_stmt, itr->key, itr->iterator_type); } -clear: - if (prev_key != NULL) - tuple_unref(prev_key); + if (prev_stmt != NULL) + tuple_unref(prev_stmt); + + /* Update LSM tree stats. */ + if (stmt != NULL) + vy_stmt_counter_acct_tuple(&lsm->stat.get, stmt); ev_tstamp latency = ev_monotonic_now(loop()) - start_time; latency_collect(&lsm->stat.latency, latency); @@ -1061,9 +1040,15 @@ clear: say_warn("%s: select(%s, %s) => %s took too long: %.3f sec", vy_lsm_name(lsm), tuple_str(itr->key), iterator_type_strs[itr->iterator_type], - vy_stmt_str(itr->last_stmt), latency); + vy_stmt_str(stmt), latency); } - return rc; + + *result = stmt; + return 0; +err: + if (prev_stmt != NULL) + tuple_unref(prev_stmt); + return -1; } /** diff --git a/src/box/vy_read_iterator.h b/src/box/vy_read_iterator.h index 4f9d3d4b..6aa84540 100644 --- a/src/box/vy_read_iterator.h +++ b/src/box/vy_read_iterator.h @@ -62,8 +62,6 @@ struct vy_read_iterator { * checked to match the search key. */ bool need_check_eq; - /** Set on the first call to vy_read_iterator_next(). */ - bool search_started; /** Last statement returned by vy_read_iterator_next(). */ struct tuple *last_stmt; /** -- 2.11.0
next prev parent reply other threads:[~2018-04-15 19:55 UTC|newest] Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top 2018-04-15 19:55 [PATCH 00/12] vinyl: prepare read iterator for index rebuild Vladimir Davydov 2018-04-15 19:55 ` [PATCH 01/12] vinyl: rename vy_stmt_history to vy_history Vladimir Davydov 2018-04-15 19:55 ` [PATCH 02/12] vinyl: factor out vy_history_apply from vy_point_lookup_apply_history Vladimir Davydov 2018-05-14 18:19 ` [tarantool-patches] " Vladislav Shpilevoy 2018-04-15 19:55 ` [PATCH 03/12] vinyl: add vy_history_append_stmt helper Vladimir Davydov 2018-04-15 19:55 ` [PATCH 04/12] vinyl: zap iterator_src_type enum Vladimir Davydov 2018-04-15 19:55 ` [PATCH 05/12] vinyl: encapsulate key history with struct Vladimir Davydov 2018-04-15 19:55 ` [PATCH 06/12] vinyl: refine vy_history_cleanup Vladimir Davydov 2018-04-15 19:55 ` [PATCH 07/12] vinyl: move vy_history to its own source file Vladimir Davydov 2018-04-15 19:55 ` [PATCH 08/12] vinyl: use mempool for vy_history_node allocations Vladimir Davydov 2018-04-15 19:55 ` [PATCH 09/12] vinyl: consolidate skip optimization checks in read iterator Vladimir Davydov 2018-05-14 18:25 ` [tarantool-patches] " Vladislav Shpilevoy 2018-05-15 15:00 ` Vladimir Davydov 2018-04-15 19:55 ` Vladimir Davydov [this message] 2018-04-15 19:55 ` [PATCH 11/12] vinyl: make read iterator always return newest tuple version Vladimir Davydov 2018-04-15 19:55 ` [PATCH 12/12] vinyl: zap vy_read_iterator::curr_stmt Vladimir Davydov 2018-05-04 18:05 ` [tarantool-patches] Re: [PATCH 00/12] vinyl: prepare read iterator for index rebuild Vladislav Shpilevoy
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=23d53017ad7228dc5b34463c9bfd6ceff1d5638e.1523820298.git.vdavydov.dev@gmail.com \ --to=vdavydov.dev@gmail.com \ --cc=kostja@tarantool.org \ --cc=tarantool-patches@freelists.org \ --subject='Re: [PATCH 10/12] vinyl: refactor vy_read_iterator_next' \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox