From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <vdavydov.dev@gmail.com>
From: Vladimir Davydov <vdavydov.dev@gmail.com>
Subject: [PATCH 2/2] vinyl: optimize mem iterator for frequently updated keys
Date: Thu, 28 Feb 2019 00:37:20 +0300
Message-Id: <db5f5a7065b7719781b82156272109756d33f707.1551302815.git.vdavydov.dev@gmail.com>
In-Reply-To: <3295f9b656c0dcb116de25dea7c87b67b647ca37.1551302815.git.vdavydov.dev@gmail.com>
References: <3295f9b656c0dcb116de25dea7c87b67b647ca37.1551302815.git.vdavydov.dev@gmail.com>
In-Reply-To: <3295f9b656c0dcb116de25dea7c87b67b647ca37.1551302815.git.vdavydov.dev@gmail.com>
References: <3295f9b656c0dcb116de25dea7c87b67b647ca37.1551302815.git.vdavydov.dev@gmail.com>
To: kostja@tarantool.org
Cc: tarantool-patches@freelists.org
List-ID: <tarantool-patches.dev.tarantool.org>

If a key is frequently updated, iteration to the next key stored in the
memory level can take quite a while, because:

 - In case of GE/GT iterator, vy_mem_iterator_next_key will have to
   iterate the tree from left to right to skip older key versions.

 - In case of LE/LT iterator, vy_mem_iterator_find_lsn will have to
   iterate the tree from right to left to find the newest key version
   visible in the read view.

To avoid that, let's fall back on key lookup if we failed to find an
appropriate statement after one iteration, because in this case there's
a good chance that there's more statements for this key. This should be
fine since a lookup in a memory tree is pretty cheap.
---
https://github.com/tarantool/tarantool/commits/dv/vy-optimize-next-key

 src/box/vy_mem.c | 80 ++++++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 57 insertions(+), 23 deletions(-)

diff --git a/src/box/vy_mem.c b/src/box/vy_mem.c
index cff1d978..9c4448eb 100644
--- a/src/box/vy_mem.c
+++ b/src/box/vy_mem.c
@@ -310,6 +310,7 @@ vy_mem_iterator_step(struct vy_mem_iterator *itr)
 static int
 vy_mem_iterator_find_lsn(struct vy_mem_iterator *itr)
 {
+	/* Skip to the first statement visible in the read view. */
 	assert(!vy_mem_tree_iterator_is_invalid(&itr->curr_pos));
 	assert(itr->curr_stmt == vy_mem_iterator_curr_stmt(itr));
 	struct key_def *cmp_def = itr->mem->cmp_def;
@@ -322,25 +323,52 @@ vy_mem_iterator_find_lsn(struct vy_mem_iterator *itr)
 			return 1;
 		}
 	}
-	if (itr->iterator_type == ITER_LE || itr->iterator_type == ITER_LT) {
-		struct vy_mem_tree_iterator prev_pos = itr->curr_pos;
-		vy_mem_tree_iterator_prev(&itr->mem->tree, &prev_pos);
+	if (iterator_direction(itr->iterator_type) > 0)
+		return 0;
+	/*
+	 * Since statements are sorted by LSN in descending order,
+	 * for LE/LT iterator we must skip to the statement with
+	 * max LSN visible in the read view.
+	 */
+	struct vy_mem_tree_iterator prev_pos = itr->curr_pos;
+	vy_mem_tree_iterator_prev(&itr->mem->tree, &prev_pos);
+	if (vy_mem_tree_iterator_is_invalid(&prev_pos)) {
+		/* No more statements. */
+		return 0;
+	}
+	const struct tuple *prev_stmt;
+	prev_stmt = *vy_mem_tree_iterator_get_elem(&itr->mem->tree, &prev_pos);
+	if (vy_stmt_lsn(prev_stmt) > (**itr->read_view).vlsn ||
+	    vy_tuple_compare(itr->curr_stmt, prev_stmt, cmp_def) != 0) {
+		/*
+		 * The next statement is either invisible in
+		 * the read view or for another key.
+		 */
+		return 0;
+	}
+	/*
+	 * We could iterate linearly until a statement invisible
+	 * in the read view is found, but there's a good chance
+	 * that this key is frequently updated and so the iteration
+	 * is going to take long. So instead we look it up - it's
+	 * pretty cheap anyway.
+	 */
+	struct tree_mem_key tree_key;
+	tree_key.stmt = itr->curr_stmt;
+	tree_key.lsn = (**itr->read_view).vlsn;
+	itr->curr_pos = vy_mem_tree_lower_bound(&itr->mem->tree,
+						&tree_key, NULL);
+	assert(!vy_mem_tree_iterator_is_invalid(&itr->curr_pos));
+	itr->curr_stmt = *vy_mem_tree_iterator_get_elem(&itr->mem->tree,
+							&itr->curr_pos);
 
-		while (!vy_mem_tree_iterator_is_invalid(&prev_pos)) {
-			const struct tuple *prev_stmt =
-				*vy_mem_tree_iterator_get_elem(&itr->mem->tree,
-							       &prev_pos);
-			if (vy_stmt_lsn(prev_stmt) > (**itr->read_view).vlsn ||
-			    vy_stmt_flags(prev_stmt) & VY_STMT_SKIP_READ ||
-			    vy_tuple_compare(itr->curr_stmt, prev_stmt,
-					     cmp_def) != 0)
-				break;
-			itr->curr_pos = prev_pos;
-			itr->curr_stmt = prev_stmt;
-			vy_mem_tree_iterator_prev(&itr->mem->tree, &prev_pos);
-		}
+	/* Skip VY_STMT_SKIP_READ statements, if any. */
+	while (vy_stmt_flags(itr->curr_stmt) & VY_STMT_SKIP_READ) {
+		vy_mem_tree_iterator_next(&itr->mem->tree, &itr->curr_pos);
+		assert(!vy_mem_tree_iterator_is_invalid(&itr->curr_pos));
+		itr->curr_stmt = *vy_mem_tree_iterator_get_elem(&itr->mem->tree,
+								&itr->curr_pos);
 	}
-	assert(itr->curr_stmt != NULL);
 	return 0;
 }
 
@@ -450,12 +478,18 @@ vy_mem_iterator_next_key(struct vy_mem_iterator *itr)
 	struct key_def *cmp_def = itr->mem->cmp_def;
 
 	const struct tuple *prev_stmt = itr->curr_stmt;
-	do {
-		if (vy_mem_iterator_step(itr) != 0) {
-			itr->curr_stmt = NULL;
-			return 1;
-		}
-	} while (vy_tuple_compare(prev_stmt, itr->curr_stmt, cmp_def) == 0);
+	if (vy_mem_iterator_step(itr) != 0) {
+		itr->curr_stmt = NULL;
+		return 1;
+	}
+	/*
+	 * If we are still on the same key after making a step,
+	 * there's a good chance there's a lot of statements
+	 * for this key so instead of iterating further we simply
+	 * look up the next key - it's pretty cheap anyway.
+	 */
+	if (vy_tuple_compare(prev_stmt, itr->curr_stmt, cmp_def) == 0)
+		return vy_mem_iterator_seek(itr, itr->curr_stmt);
 
 	if (itr->iterator_type == ITER_EQ &&
 	    vy_stmt_compare(itr->key, itr->curr_stmt, cmp_def) != 0) {
-- 
2.11.0