Tarantool development patches archive
 help / color / mirror / Atom feed
From: Vladimir Davydov <vdavydov.dev@gmail.com>
To: kostja@tarantool.org
Cc: tarantool-patches@freelists.org
Subject: [PATCH 10/12] vinyl: refactor vy_read_iterator_next
Date: Sun, 15 Apr 2018 22:55:23 +0300	[thread overview]
Message-ID: <23d53017ad7228dc5b34463c9bfd6ceff1d5638e.1523820298.git.vdavydov.dev@gmail.com> (raw)
In-Reply-To: <cover.1523820298.git.vdavydov.dev@gmail.com>
In-Reply-To: <cover.1523820298.git.vdavydov.dev@gmail.com>

The function looks rather messy now. Let's rearrange its code to make it
look better and facilitate further development:

 - Move the debug checks making sure that the next key meets search
   criteria and doesn't violate the iteration order to the place where
   we look up the next key, i.e. to vy_read_iterator_next_key.

 - Move result tracking in tx read set from vy_read_iterator_next_key
   to vy_read_iterator_next so that the former now just advances the
   iterator to the next key and does nothing else.

 - Remove vy_read_iterator::search_started. We don't really need it as
   we can use (vy_read_iterator::last_stmt == NULL) condition instead.

 - Rearrange the code of vy_read_iterator_next to reduce indentation
   level.

 - Remove/rename some variables and labels. Add some comments.
---
 src/box/vy_read_iterator.c | 195 +++++++++++++++++++++------------------------
 src/box/vy_read_iterator.h |   2 -
 2 files changed, 90 insertions(+), 107 deletions(-)

diff --git a/src/box/vy_read_iterator.c b/src/box/vy_read_iterator.c
index 548cf234..0695eedb 100644
--- a/src/box/vy_read_iterator.c
+++ b/src/box/vy_read_iterator.c
@@ -443,9 +443,6 @@ vy_read_iterator_restore(struct vy_read_iterator *itr);
 static void
 vy_read_iterator_next_range(struct vy_read_iterator *itr);
 
-static int
-vy_read_iterator_track_read(struct vy_read_iterator *itr, struct tuple *stmt);
-
 /**
  * Iterate to the next key
  * @retval 0 success or EOF (*ret == NULL)
@@ -454,9 +451,6 @@ vy_read_iterator_track_read(struct vy_read_iterator *itr, struct tuple *stmt);
 static NODISCARD int
 vy_read_iterator_next_key(struct vy_read_iterator *itr, struct tuple **ret)
 {
-	uint32_t i;
-	bool stop = false;
-
 	if (itr->last_stmt != NULL && (itr->iterator_type == ITER_EQ ||
 				       itr->iterator_type == ITER_REQ) &&
 	    tuple_field_count(itr->key) >= itr->lsm->cmp_def->part_count) {
@@ -469,9 +463,10 @@ vy_read_iterator_next_key(struct vy_read_iterator *itr, struct tuple **ret)
 	}
 	/*
 	 * Restore the iterator position if the LSM tree has changed
-	 * since the last iteration.
+	 * since the last iteration or this is the first iteration.
 	 */
-	if (itr->mem_list_version != itr->lsm->mem_list_version ||
+	if (itr->last_stmt == NULL ||
+	    itr->mem_list_version != itr->lsm->mem_list_version ||
 	    itr->range_tree_version != itr->lsm->range_tree_version ||
 	    itr->range_version != itr->curr_range->version) {
 		vy_read_iterator_restore(itr);
@@ -487,6 +482,7 @@ restart:
 	 * Look up the next key in read sources starting
 	 * from the one that stores newest data.
 	 */
+	bool stop = false;
 	vy_read_iterator_scan_txw(itr, &stop);
 	if (stop)
 		goto done;
@@ -494,7 +490,7 @@ restart:
 	if (stop)
 		goto done;
 
-	for (i = itr->mem_src; i < itr->disk_src; i++) {
+	for (uint32_t i = itr->mem_src; i < itr->disk_src; i++) {
 		if (vy_read_iterator_scan_mem(itr, i, &stop) != 0)
 			return -1;
 		if (stop)
@@ -503,9 +499,11 @@ restart:
 rescan_disk:
 	/* The following code may yield as it needs to access disk. */
 	vy_read_iterator_pin_slices(itr);
-	for (i = itr->disk_src; i < itr->src_count; i++) {
-		if (vy_read_iterator_scan_disk(itr, i, &stop) != 0)
-			goto err_disk;
+	for (uint32_t i = itr->disk_src; i < itr->src_count; i++) {
+		if (vy_read_iterator_scan_disk(itr, i, &stop) != 0) {
+			vy_read_iterator_unpin_slices(itr);
+			return -1;
+		}
 		if (stop)
 			break;
 	}
@@ -539,24 +537,34 @@ rescan_disk:
 		goto rescan_disk;
 	}
 done:
-	if (itr->last_stmt != NULL && itr->curr_stmt != NULL)
+#ifndef NDEBUG
+	/* Check that the statement meets search criteria. */
+	if (itr->curr_stmt != NULL) {
+		int cmp = vy_stmt_compare(itr->curr_stmt, itr->key,
+					  itr->lsm->cmp_def);
+		cmp *= iterator_direction(itr->iterator_type);
+		if (itr->iterator_type == ITER_GT ||
+		    itr->iterator_type == ITER_LT)
+			assert(cmp > 0);
+		else
+			assert(cmp >= 0);
+	}
+	/*
+	 * Ensure the read iterator does not return duplicates
+	 * and respects statement order.
+	 */
+	if (itr->last_stmt != NULL && itr->curr_stmt != NULL) {
 	       assert(vy_read_iterator_cmp_stmt(itr, itr->curr_stmt,
 						itr->last_stmt) > 0);
-
+	}
+#endif
 	if (itr->need_check_eq && itr->curr_stmt != NULL &&
 	    vy_stmt_compare(itr->curr_stmt, itr->key,
 			    itr->lsm->cmp_def) != 0)
 		itr->curr_stmt = NULL;
 
-	if (vy_read_iterator_track_read(itr, itr->curr_stmt) != 0)
-		return -1;
-
 	*ret = itr->curr_stmt;
 	return 0;
-
-err_disk:
-	vy_read_iterator_unpin_slices(itr);
-	return -1;
 }
 
 /**
@@ -960,99 +968,70 @@ vy_read_iterator_next(struct vy_read_iterator *itr, struct tuple **result)
 {
 	ev_tstamp start_time = ev_monotonic_now(loop());
 
-	*result = NULL;
-
-	if (!itr->search_started) {
-		itr->search_started = true;
-		itr->lsm->stat.lookup++;
-		vy_read_iterator_restore(itr);
-	}
-
-	struct tuple *prev_key = itr->last_stmt;
-	if (prev_key != NULL)
-		tuple_ref(prev_key);
-	bool skipped_txw_delete = false;
-
-	struct tuple *t = NULL;
 	struct vy_lsm *lsm = itr->lsm;
-	int rc = 0;
-	while (true) {
-		rc = vy_read_iterator_next_key(itr, &t);
-		if (rc != 0)
-			goto clear;
-		if (t == NULL) {
-			if (itr->last_stmt != NULL)
-				tuple_unref(itr->last_stmt);
-			itr->last_stmt = NULL;
-			rc = 0; /* No more data. */
-			break;
-		}
-		rc = vy_read_iterator_squash_upsert(itr, &t);
-		if (rc != 0)
-			goto clear;
-		if (itr->last_stmt != NULL)
-			tuple_unref(itr->last_stmt);
-		itr->last_stmt = t;
-		if (vy_stmt_type(t) == IPROTO_INSERT ||
-		    vy_stmt_type(t) == IPROTO_REPLACE)
-			break;
-		assert(vy_stmt_type(t) == IPROTO_DELETE);
-		if (vy_stmt_lsn(t) == INT64_MAX) /* t is from write set */
-			skipped_txw_delete = true;
-	}
+	struct tuple *stmt, *prev_stmt;
 
-	*result = itr->last_stmt;
-	assert(*result == NULL ||
-	       vy_stmt_type(*result) == IPROTO_INSERT ||
-	       vy_stmt_type(*result) == IPROTO_REPLACE);
-	if (*result != NULL)
-		vy_stmt_counter_acct_tuple(&lsm->stat.get, *result);
-
-#ifndef NDEBUG
-	/* Check constraints. */
-	int dir = iterator_direction(itr->iterator_type);
 	/*
-	 * Each result statement with iterator type GE/GT must
-	 * be >= iterator key. And with LT/LE must
-	 * be <= iterator_key. @sa gh-2614.
+	 * Remember the statement returned by the last iteration.
+	 * We will need it to update the cache.
 	 */
-	if (itr->last_stmt != NULL && tuple_field_count(itr->key) > 0) {
-		int cmp = dir * vy_stmt_compare(*result, itr->key,
-						itr->lsm->cmp_def);
-		assert(cmp >= 0);
-	}
+	prev_stmt = itr->last_stmt;
+	if (prev_stmt != NULL)
+		tuple_ref(prev_stmt);
+	else /* first iteration */
+		lsm->stat.lookup++;
+next_key:
+	if (vy_read_iterator_next_key(itr, &stmt) != 0)
+		goto err;
+
 	/*
-	 * Ensure the read iterator does not return duplicates
-	 * and respects statements order (lsm->cmp_def includes
-	 * primary parts, so prev_key != itr->last_stmt for any
-	 * LSM tree).
+	 * Fetching an older statement of the current key may yield
+	 * so we must track the read before applying UPSERTs.
 	 */
-	if (prev_key != NULL && itr->last_stmt != NULL) {
-		assert(dir * vy_tuple_compare(prev_key, itr->last_stmt,
-					      lsm->cmp_def) < 0);
+	if (vy_read_iterator_track_read(itr, stmt) != 0)
+		goto err;
+
+	if (stmt != NULL &&
+	    vy_read_iterator_squash_upsert(itr, &stmt) != 0)
+		goto err;
+
+	if (itr->last_stmt != NULL)
+		tuple_unref(itr->last_stmt);
+	itr->last_stmt = stmt;
+
+	if (stmt != NULL && vy_stmt_type(stmt) == IPROTO_DELETE) {
+		/*
+		 * We don't return DELETEs so skip to the next key.
+		 * If the DELETE was read from TX write set, there
+		 * is a good chance that the space actually has
+		 * the deleted key and hence we must not consider
+		 * previous + current tuple as an unbroken chain.
+		 */
+		if (vy_stmt_lsn(stmt) == INT64_MAX) {
+			if (prev_stmt != NULL)
+				tuple_unref(prev_stmt);
+			prev_stmt = NULL;
+		}
+		goto next_key;
 	}
-#endif
+	assert(stmt == NULL ||
+	       vy_stmt_type(stmt) == IPROTO_INSERT ||
+	       vy_stmt_type(stmt) == IPROTO_REPLACE);
 
-	/**
-	 * Add a statement to the cache
+	/*
+	 * Store the result in the cache provided we are reading
+	 * the latest data.
 	 */
-	if ((**itr->read_view).vlsn == INT64_MAX) { /* Do not store non-latest data */
-		struct tuple *cache_prev = prev_key;
-		if (skipped_txw_delete) {
-			/*
-			 * If we skipped DELETE that was read from TX write
-			 * set, there is a chance that the database actually
-			 * has the deleted key and we must not consider
-			 * previous+current tuple as an unbroken chain.
-			 */
-			cache_prev = NULL;
-		}
-		vy_cache_add(&itr->lsm->cache, *result, cache_prev,
+	if ((**itr->read_view).vlsn == INT64_MAX) {
+		vy_cache_add(&lsm->cache, stmt, prev_stmt,
 			     itr->key, itr->iterator_type);
 	}
-clear:
-	if (prev_key != NULL)
-		tuple_unref(prev_key);
+	if (prev_stmt != NULL)
+		tuple_unref(prev_stmt);
+
+	/* Update LSM tree stats. */
+	if (stmt != NULL)
+		vy_stmt_counter_acct_tuple(&lsm->stat.get, stmt);
 
 	ev_tstamp latency = ev_monotonic_now(loop()) - start_time;
 	latency_collect(&lsm->stat.latency, latency);
@@ -1061,9 +1040,15 @@ clear:
 		say_warn("%s: select(%s, %s) => %s took too long: %.3f sec",
 			 vy_lsm_name(lsm), tuple_str(itr->key),
 			 iterator_type_strs[itr->iterator_type],
-			 vy_stmt_str(itr->last_stmt), latency);
+			 vy_stmt_str(stmt), latency);
 	}
-	return rc;
+
+	*result = stmt;
+	return 0;
+err:
+	if (prev_stmt != NULL)
+		tuple_unref(prev_stmt);
+	return -1;
 }
 
 /**
diff --git a/src/box/vy_read_iterator.h b/src/box/vy_read_iterator.h
index 4f9d3d4b..6aa84540 100644
--- a/src/box/vy_read_iterator.h
+++ b/src/box/vy_read_iterator.h
@@ -62,8 +62,6 @@ struct vy_read_iterator {
 	 * checked to match the search key.
 	 */
 	bool need_check_eq;
-	/** Set on the first call to vy_read_iterator_next(). */
-	bool search_started;
 	/** Last statement returned by vy_read_iterator_next(). */
 	struct tuple *last_stmt;
 	/**
-- 
2.11.0

  parent reply	other threads:[~2018-04-15 19:55 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-04-15 19:55 [PATCH 00/12] vinyl: prepare read iterator for index rebuild Vladimir Davydov
2018-04-15 19:55 ` [PATCH 01/12] vinyl: rename vy_stmt_history to vy_history Vladimir Davydov
2018-04-15 19:55 ` [PATCH 02/12] vinyl: factor out vy_history_apply from vy_point_lookup_apply_history Vladimir Davydov
2018-05-14 18:19   ` [tarantool-patches] " Vladislav Shpilevoy
2018-04-15 19:55 ` [PATCH 03/12] vinyl: add vy_history_append_stmt helper Vladimir Davydov
2018-04-15 19:55 ` [PATCH 04/12] vinyl: zap iterator_src_type enum Vladimir Davydov
2018-04-15 19:55 ` [PATCH 05/12] vinyl: encapsulate key history with struct Vladimir Davydov
2018-04-15 19:55 ` [PATCH 06/12] vinyl: refine vy_history_cleanup Vladimir Davydov
2018-04-15 19:55 ` [PATCH 07/12] vinyl: move vy_history to its own source file Vladimir Davydov
2018-04-15 19:55 ` [PATCH 08/12] vinyl: use mempool for vy_history_node allocations Vladimir Davydov
2018-04-15 19:55 ` [PATCH 09/12] vinyl: consolidate skip optimization checks in read iterator Vladimir Davydov
2018-05-14 18:25   ` [tarantool-patches] " Vladislav Shpilevoy
2018-05-15 15:00     ` Vladimir Davydov
2018-04-15 19:55 ` Vladimir Davydov [this message]
2018-04-15 19:55 ` [PATCH 11/12] vinyl: make read iterator always return newest tuple version Vladimir Davydov
2018-04-15 19:55 ` [PATCH 12/12] vinyl: zap vy_read_iterator::curr_stmt Vladimir Davydov
2018-05-04 18:05 ` [tarantool-patches] Re: [PATCH 00/12] vinyl: prepare read iterator for index rebuild Vladislav Shpilevoy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=23d53017ad7228dc5b34463c9bfd6ceff1d5638e.1523820298.git.vdavydov.dev@gmail.com \
    --to=vdavydov.dev@gmail.com \
    --cc=kostja@tarantool.org \
    --cc=tarantool-patches@freelists.org \
    --subject='Re: [PATCH 10/12] vinyl: refactor vy_read_iterator_next' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox