From: Vladimir Davydov <vdavydov.dev@gmail.com> To: kostja@tarantool.org Cc: tarantool-patches@freelists.org Subject: [PATCH 2/2] vinyl: optimize mem iterator for frequently updated keys Date: Thu, 28 Feb 2019 00:37:20 +0300 [thread overview] Message-ID: <db5f5a7065b7719781b82156272109756d33f707.1551302815.git.vdavydov.dev@gmail.com> (raw) In-Reply-To: <3295f9b656c0dcb116de25dea7c87b67b647ca37.1551302815.git.vdavydov.dev@gmail.com> In-Reply-To: <3295f9b656c0dcb116de25dea7c87b67b647ca37.1551302815.git.vdavydov.dev@gmail.com> If a key is frequently updated, iteration to the next key stored in the memory level can take quite a while, because: - In case of GE/GT iterator, vy_mem_iterator_next_key will have to iterate the tree from left to right to skip older key versions. - In case of LE/LT iterator, vy_mem_iterator_find_lsn will have to iterate the tree from right to left to find the newest key version visible in the read view. To avoid that, let's fall back on key lookup if we failed to find an appropriate statement after one iteration, because in this case there's a good chance that there's more statements for this key. This should be fine since a lookup in a memory tree is pretty cheap. --- https://github.com/tarantool/tarantool/commits/dv/vy-optimize-next-key src/box/vy_mem.c | 80 ++++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 57 insertions(+), 23 deletions(-) diff --git a/src/box/vy_mem.c b/src/box/vy_mem.c index cff1d978..9c4448eb 100644 --- a/src/box/vy_mem.c +++ b/src/box/vy_mem.c @@ -310,6 +310,7 @@ vy_mem_iterator_step(struct vy_mem_iterator *itr) static int vy_mem_iterator_find_lsn(struct vy_mem_iterator *itr) { + /* Skip to the first statement visible in the read view. */ assert(!vy_mem_tree_iterator_is_invalid(&itr->curr_pos)); assert(itr->curr_stmt == vy_mem_iterator_curr_stmt(itr)); struct key_def *cmp_def = itr->mem->cmp_def; @@ -322,25 +323,52 @@ vy_mem_iterator_find_lsn(struct vy_mem_iterator *itr) return 1; } } - if (itr->iterator_type == ITER_LE || itr->iterator_type == ITER_LT) { - struct vy_mem_tree_iterator prev_pos = itr->curr_pos; - vy_mem_tree_iterator_prev(&itr->mem->tree, &prev_pos); + if (iterator_direction(itr->iterator_type) > 0) + return 0; + /* + * Since statements are sorted by LSN in descending order, + * for LE/LT iterator we must skip to the statement with + * max LSN visible in the read view. + */ + struct vy_mem_tree_iterator prev_pos = itr->curr_pos; + vy_mem_tree_iterator_prev(&itr->mem->tree, &prev_pos); + if (vy_mem_tree_iterator_is_invalid(&prev_pos)) { + /* No more statements. */ + return 0; + } + const struct tuple *prev_stmt; + prev_stmt = *vy_mem_tree_iterator_get_elem(&itr->mem->tree, &prev_pos); + if (vy_stmt_lsn(prev_stmt) > (**itr->read_view).vlsn || + vy_tuple_compare(itr->curr_stmt, prev_stmt, cmp_def) != 0) { + /* + * The next statement is either invisible in + * the read view or for another key. + */ + return 0; + } + /* + * We could iterate linearly until a statement invisible + * in the read view is found, but there's a good chance + * that this key is frequently updated and so the iteration + * is going to take long. So instead we look it up - it's + * pretty cheap anyway. + */ + struct tree_mem_key tree_key; + tree_key.stmt = itr->curr_stmt; + tree_key.lsn = (**itr->read_view).vlsn; + itr->curr_pos = vy_mem_tree_lower_bound(&itr->mem->tree, + &tree_key, NULL); + assert(!vy_mem_tree_iterator_is_invalid(&itr->curr_pos)); + itr->curr_stmt = *vy_mem_tree_iterator_get_elem(&itr->mem->tree, + &itr->curr_pos); - while (!vy_mem_tree_iterator_is_invalid(&prev_pos)) { - const struct tuple *prev_stmt = - *vy_mem_tree_iterator_get_elem(&itr->mem->tree, - &prev_pos); - if (vy_stmt_lsn(prev_stmt) > (**itr->read_view).vlsn || - vy_stmt_flags(prev_stmt) & VY_STMT_SKIP_READ || - vy_tuple_compare(itr->curr_stmt, prev_stmt, - cmp_def) != 0) - break; - itr->curr_pos = prev_pos; - itr->curr_stmt = prev_stmt; - vy_mem_tree_iterator_prev(&itr->mem->tree, &prev_pos); - } + /* Skip VY_STMT_SKIP_READ statements, if any. */ + while (vy_stmt_flags(itr->curr_stmt) & VY_STMT_SKIP_READ) { + vy_mem_tree_iterator_next(&itr->mem->tree, &itr->curr_pos); + assert(!vy_mem_tree_iterator_is_invalid(&itr->curr_pos)); + itr->curr_stmt = *vy_mem_tree_iterator_get_elem(&itr->mem->tree, + &itr->curr_pos); } - assert(itr->curr_stmt != NULL); return 0; } @@ -450,12 +478,18 @@ vy_mem_iterator_next_key(struct vy_mem_iterator *itr) struct key_def *cmp_def = itr->mem->cmp_def; const struct tuple *prev_stmt = itr->curr_stmt; - do { - if (vy_mem_iterator_step(itr) != 0) { - itr->curr_stmt = NULL; - return 1; - } - } while (vy_tuple_compare(prev_stmt, itr->curr_stmt, cmp_def) == 0); + if (vy_mem_iterator_step(itr) != 0) { + itr->curr_stmt = NULL; + return 1; + } + /* + * If we are still on the same key after making a step, + * there's a good chance there's a lot of statements + * for this key so instead of iterating further we simply + * look up the next key - it's pretty cheap anyway. + */ + if (vy_tuple_compare(prev_stmt, itr->curr_stmt, cmp_def) == 0) + return vy_mem_iterator_seek(itr, itr->curr_stmt); if (itr->iterator_type == ITER_EQ && vy_stmt_compare(itr->key, itr->curr_stmt, cmp_def) != 0) { -- 2.11.0
next prev parent reply other threads:[~2019-02-27 21:37 UTC|newest] Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top 2019-02-27 21:37 [PATCH 1/2] vinyl: refactor vy_mem_iterator_seek Vladimir Davydov 2019-02-27 21:37 ` Vladimir Davydov [this message] 2019-02-28 11:09 ` [PATCH 2/2] vinyl: optimize mem iterator for frequently updated keys Vladimir Davydov 2019-02-28 16:02 ` Vladimir Davydov 2019-02-28 11:09 ` [PATCH 1/2] vinyl: refactor vy_mem_iterator_seek Vladimir Davydov
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=db5f5a7065b7719781b82156272109756d33f707.1551302815.git.vdavydov.dev@gmail.com \ --to=vdavydov.dev@gmail.com \ --cc=kostja@tarantool.org \ --cc=tarantool-patches@freelists.org \ --subject='Re: [PATCH 2/2] vinyl: optimize mem iterator for frequently updated keys' \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox