Tarantool development patches archive
 help / color / mirror / Atom feed
From: Vladimir Davydov <vdavydov.dev@gmail.com>
To: kostja@tarantool.org
Cc: tarantool-patches@freelists.org
Subject: [PATCH 09/25] vinyl: store full tuples in secondary index cache
Date: Fri, 27 Jul 2018 14:29:49 +0300	[thread overview]
Message-ID: <e4ab0ed7f077a8ea36f36c9e3ee278e7959b6fe9.1532689066.git.vdavydov.dev@gmail.com> (raw)
In-Reply-To: <cover.1532689065.git.vdavydov.dev@gmail.com>
In-Reply-To: <cover.1532689065.git.vdavydov.dev@gmail.com>

Currently, both vy_read_iterator_next() and vy_point_lookup() add the
returned tuple to the tuple cache. As a result, we store partial tuples
in a secondary index tuple cache although we could store full tuples
(we have to retrieve them anyway when reading a secondary index). This
means wasting memory. Besides, when the #2129 gets implemented, there
will be tuples in a secondary index that have to be skipped as they have
been overwritten in the primary index. Caching them would be inefficient
and error prone. So let's call vy_cache_add() from the upper level and
add only full tuples to the cache.

Closes #3478
Needed for #2129
---
 src/box/vinyl.c            | 11 +++++++++
 src/box/vy_point_lookup.c  |  5 +---
 src/box/vy_read_iterator.c | 61 ++++++++++++++++++++++------------------------
 src/box/vy_read_iterator.h | 24 ++++++++++++++++++
 4 files changed, 65 insertions(+), 36 deletions(-)

diff --git a/src/box/vinyl.c b/src/box/vinyl.c
index c29f139e..f2f93736 100644
--- a/src/box/vinyl.c
+++ b/src/box/vinyl.c
@@ -1290,6 +1290,10 @@ vy_get_by_secondary_tuple(struct vy_lsm *lsm, struct vy_tx *tx,
 		say_warn("%s: key %s missing in primary index",
 			 vy_lsm_name(lsm), vy_stmt_str(tuple));
 	}
+
+	if ((*rv)->vlsn == INT64_MAX)
+		vy_cache_add(&lsm->pk->cache, *result, NULL, tuple, ITER_EQ);
+
 	return 0;
 }
 
@@ -1336,6 +1340,8 @@ vy_get(struct vy_lsm *lsm, struct vy_tx *tx,
 		} else {
 			*result = tuple;
 		}
+		if ((*rv)->vlsn == INT64_MAX)
+			vy_cache_add(&lsm->cache, *result, NULL, key, ITER_EQ);
 		return 0;
 	}
 
@@ -1352,6 +1358,8 @@ vy_get(struct vy_lsm *lsm, struct vy_tx *tx,
 		if (rc != 0 || *result != NULL)
 			break;
 	}
+	if (rc == 0)
+		vy_read_iterator_cache_add(&itr, *result);
 	vy_read_iterator_close(&itr);
 	return rc;
 }
@@ -3707,6 +3715,7 @@ vinyl_iterator_primary_next(struct iterator *base, struct tuple **ret)
 		goto fail;
 	if (vy_read_iterator_next(&it->iterator, ret) != 0)
 		goto fail;
+	vy_read_iterator_cache_add(&it->iterator, *ret);
 	if (*ret == NULL) {
 		/* EOF. Close the iterator immediately. */
 		vinyl_iterator_close(it);
@@ -3736,6 +3745,7 @@ next:
 
 	if (tuple == NULL) {
 		/* EOF. Close the iterator immediately. */
+		vy_read_iterator_cache_add(&it->iterator, NULL);
 		vinyl_iterator_close(it);
 		*ret = NULL;
 		return 0;
@@ -3755,6 +3765,7 @@ next:
 		goto fail;
 	if (*ret == NULL)
 		goto next;
+	vy_read_iterator_cache_add(&it->iterator, *ret);
 	tuple_bless(*ret);
 	tuple_unref(*ret);
 	return 0;
diff --git a/src/box/vy_point_lookup.c b/src/box/vy_point_lookup.c
index f2261fdf..5e43340b 100644
--- a/src/box/vy_point_lookup.c
+++ b/src/box/vy_point_lookup.c
@@ -280,11 +280,8 @@ done:
 	if (rc != 0)
 		return -1;
 
-	if (*ret != NULL) {
+	if (*ret != NULL)
 		vy_stmt_counter_acct_tuple(&lsm->stat.get, *ret);
-		if ((*rv)->vlsn == INT64_MAX)
-			vy_cache_add(&lsm->cache, *ret, NULL, key, ITER_EQ);
-	}
 
 	double latency = ev_monotonic_now(loop()) - start_time;
 	latency_collect(&lsm->stat.latency, latency);
diff --git a/src/box/vy_read_iterator.c b/src/box/vy_read_iterator.c
index 160bb899..954fc0df 100644
--- a/src/box/vy_read_iterator.c
+++ b/src/box/vy_read_iterator.c
@@ -845,24 +845,17 @@ vy_read_iterator_next(struct vy_read_iterator *itr, struct tuple **result)
 	ev_tstamp start_time = ev_monotonic_now(loop());
 
 	struct vy_lsm *lsm = itr->lsm;
-	struct tuple *stmt, *prev_stmt;
+	struct tuple *stmt;
 
-	/*
-	 * Remember the statement returned by the last iteration.
-	 * We will need it to update the cache.
-	 */
-	prev_stmt = itr->last_stmt;
-	if (prev_stmt != NULL)
-		tuple_ref(prev_stmt);
-	else /* first iteration */
-		lsm->stat.lookup++;
+	if (itr->last_stmt == NULL)
+		lsm->stat.lookup++; /* first iteration */
 next_key:
 	if (vy_read_iterator_advance(itr) != 0)
-		goto err;
+		return -1;
 	if (vy_read_iterator_apply_history(itr, &stmt) != 0)
-		goto err;
+		return -1;
 	if (vy_read_iterator_track_read(itr, stmt) != 0)
-		goto err;
+		return -1;
 
 	if (itr->last_stmt != NULL)
 		tuple_unref(itr->last_stmt);
@@ -877,9 +870,9 @@ next_key:
 		 * previous + current tuple as an unbroken chain.
 		 */
 		if (vy_stmt_lsn(stmt) == INT64_MAX) {
-			if (prev_stmt != NULL)
-				tuple_unref(prev_stmt);
-			prev_stmt = NULL;
+			if (itr->last_cached_stmt != NULL)
+				tuple_unref(itr->last_cached_stmt);
+			itr->last_cached_stmt = NULL;
 		}
 		goto next_key;
 	}
@@ -887,18 +880,6 @@ next_key:
 	       vy_stmt_type(stmt) == IPROTO_INSERT ||
 	       vy_stmt_type(stmt) == IPROTO_REPLACE);
 
-	/*
-	 * Store the result in the cache provided we are reading
-	 * the latest data.
-	 */
-	if ((**itr->read_view).vlsn == INT64_MAX) {
-		vy_cache_add(&lsm->cache, stmt, prev_stmt,
-			     itr->key, itr->iterator_type);
-	}
-	if (prev_stmt != NULL)
-		tuple_unref(prev_stmt);
-
-	/* Update LSM tree stats. */
 	if (stmt != NULL)
 		vy_stmt_counter_acct_tuple(&lsm->stat.get, stmt);
 
@@ -914,10 +895,24 @@ next_key:
 
 	*result = stmt;
 	return 0;
-err:
-	if (prev_stmt != NULL)
-		tuple_unref(prev_stmt);
-	return -1;
+}
+
+void
+vy_read_iterator_cache_add(struct vy_read_iterator *itr, struct tuple *stmt)
+{
+	if ((**itr->read_view).vlsn != INT64_MAX) {
+		if (itr->last_cached_stmt != NULL)
+			tuple_unref(itr->last_cached_stmt);
+		itr->last_cached_stmt = NULL;
+		return;
+	}
+	vy_cache_add(&itr->lsm->cache, stmt, itr->last_cached_stmt,
+		     itr->key, itr->iterator_type);
+	if (stmt != NULL)
+		tuple_ref(stmt);
+	if (itr->last_cached_stmt != NULL)
+		tuple_unref(itr->last_cached_stmt);
+	itr->last_cached_stmt = stmt;
 }
 
 /**
@@ -928,6 +923,8 @@ vy_read_iterator_close(struct vy_read_iterator *itr)
 {
 	if (itr->last_stmt != NULL)
 		tuple_unref(itr->last_stmt);
+	if (itr->last_cached_stmt != NULL)
+		tuple_unref(itr->last_cached_stmt);
 	vy_read_iterator_cleanup(itr);
 	free(itr->src);
 	TRASH(itr);
diff --git a/src/box/vy_read_iterator.h b/src/box/vy_read_iterator.h
index 2cac1087..baab8859 100644
--- a/src/box/vy_read_iterator.h
+++ b/src/box/vy_read_iterator.h
@@ -65,6 +65,11 @@ struct vy_read_iterator {
 	/** Last statement returned by vy_read_iterator_next(). */
 	struct tuple *last_stmt;
 	/**
+	 * Last statement added to the tuple cache by
+	 * vy_read_iterator_cache_add().
+	 */
+	struct tuple *last_cached_stmt;
+	/**
 	 * Copy of lsm->range_tree_version.
 	 * Used for detecting range tree changes.
 	 */
@@ -142,6 +147,25 @@ NODISCARD int
 vy_read_iterator_next(struct vy_read_iterator *itr, struct tuple **result);
 
 /**
+ * Add the last tuple returned by the read iterator to the cache.
+ * @param itr  Read iterator
+ * @param stmt Last tuple returned by the iterator.
+ *
+ * We use a separate function for populating the cache rather than
+ * doing that right in vy_read_iterator_next() so that we can store
+ * full tuples in a secondary index cache, thus saving some memory.
+ *
+ * Usage pattern:
+ * - Call vy_read_iterator_next() to get a partial tuple.
+ * - Call vy_point_lookup() to get the full tuple corresponding
+ *   to the partial tuple returned by the iterator.
+ * - Call vy_read_iterator_cache_add() on the full tuple to add
+ *   the result to the cache.
+ */
+void
+vy_read_iterator_cache_add(struct vy_read_iterator *itr, struct tuple *stmt);
+
+/**
  * Close the iterator and free resources.
  */
 void
-- 
2.11.0

  parent reply	other threads:[~2018-07-27 11:29 UTC|newest]

Thread overview: 39+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-07-27 11:29 [PATCH 00/25] vinyl: eliminate disk read on REPLACE/DELETE Vladimir Davydov
2018-07-27 11:29 ` [PATCH 01/25] vinyl: make point lookup always return the latest tuple version Vladimir Davydov
2018-07-27 11:29 ` [PATCH 02/25] vinyl: simplify vy_squash_process Vladimir Davydov
2018-07-27 11:29 ` [PATCH 03/25] vinyl: always get full tuple from pk after reading from secondary index Vladimir Davydov
2018-07-27 11:29 ` [PATCH 04/25] vinyl: fold vy_replace_one and vy_replace_impl Vladimir Davydov
2018-07-27 11:29 ` [PATCH 05/25] vinyl: fold vy_delete_impl Vladimir Davydov
2018-07-27 11:29 ` [PATCH 06/25] vinyl: refactor unique check Vladimir Davydov
2018-07-27 11:29 ` [PATCH 07/25] vinyl: check key uniqueness before modifying tx write set Vladimir Davydov
2018-07-27 11:29 ` [PATCH 08/25] vinyl: remove env argument of vy_check_is_unique_{primary,secondary} Vladimir Davydov
2018-07-31 20:45   ` [tarantool-patches] " Konstantin Osipov
2018-07-27 11:29 ` Vladimir Davydov [this message]
2018-07-31 20:47   ` [PATCH 09/25] vinyl: store full tuples in secondary index cache Konstantin Osipov
2018-07-27 11:29 ` [PATCH 10/25] vinyl: do not free pending tasks on shutdown Vladimir Davydov
2018-07-31 20:48   ` Konstantin Osipov
2018-07-27 11:29 ` [PATCH 11/25] vinyl: store pointer to scheduler in struct vy_task Vladimir Davydov
2018-07-31 20:49   ` Konstantin Osipov
2018-07-27 11:29 ` [PATCH 12/25] vinyl: rename some members of vy_scheduler and vy_task struct Vladimir Davydov
2018-07-27 11:29 ` [PATCH 13/25] vinyl: use cbus for communication between scheduler and worker threads Vladimir Davydov
2018-07-27 11:29 ` [PATCH 14/25] vinyl: zap vy_scheduler::is_worker_pool_running Vladimir Davydov
2018-07-27 11:29 ` [PATCH 15/25] vinyl: rename vy_task::status to is_failed Vladimir Davydov
2018-07-27 11:29 ` [PATCH 16/25] xrow: allow to store flags in DML requests Vladimir Davydov
2018-07-27 11:29 ` [PATCH 17/25] vinyl: pin last statement returned by write iterator explicitly Vladimir Davydov
2018-07-27 11:29 ` [PATCH 18/25] vinyl: teach write iterator to return overwritten tuples Vladimir Davydov
2018-07-27 11:29 ` [PATCH 19/25] vinyl: prepare write iterator heap comparator for deferred DELETEs Vladimir Davydov
2018-07-27 11:30 ` [PATCH 20/25] vinyl: allow to skip certain statements on read Vladimir Davydov
2018-07-27 11:30 ` [PATCH 21/25] vinyl: add function to create surrogate deletes from raw msgpack Vladimir Davydov
2018-07-27 11:30 ` [PATCH 22/25] vinyl: remove pointless assertion from vy_stmt_new_surrogate_delete Vladimir Davydov
2018-07-27 11:30 ` [PATCH 23/25] txn: add helper to detect transaction boundaries Vladimir Davydov
2018-07-31 20:52   ` [tarantool-patches] " Konstantin Osipov
2018-07-27 11:30 ` [PATCH 24/25] Introduce _vinyl_deferred_delete system space Vladimir Davydov
2018-07-31 20:54   ` Konstantin Osipov
2018-08-01 14:00     ` Vladimir Davydov
2018-08-01 20:25       ` [tarantool-patches] " Konstantin Osipov
2018-08-02  9:43         ` Vladimir Davydov
2018-08-06  8:42           ` Vladimir Davydov
2018-07-27 11:30 ` [PATCH 25/25] vinyl: eliminate disk read on REPLACE/DELETE Vladimir Davydov
2018-07-31 20:55   ` Konstantin Osipov
2018-08-01 16:03     ` Vladimir Davydov
2018-08-01 16:51     ` Vladimir Davydov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=e4ab0ed7f077a8ea36f36c9e3ee278e7959b6fe9.1532689066.git.vdavydov.dev@gmail.com \
    --to=vdavydov.dev@gmail.com \
    --cc=kostja@tarantool.org \
    --cc=tarantool-patches@freelists.org \
    --subject='Re: [PATCH 09/25] vinyl: store full tuples in secondary index cache' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox