[PATCH 3/3] vinyl: generate deferred DELETEs on tx commit

Vladimir Davydov vdavydov.dev at gmail.com
Fri Jul 13 13:53:54 MSK 2018


We don't need to postpone generation of secondary index DELETEs until
compaction in case the overwritten tuple is present in memory or in
cache. Instead we can produce the DELETEs when the transaction is
committed. This should significantly decrease the number of deferred
DELETEs and hence speed up lookups in secondary indexes.

Follow-up #2129
---
 src/box/vy_point_lookup.c | 32 ++++++++++++++++
 src/box/vy_point_lookup.h | 18 +++++++++
 src/box/vy_tx.c           | 97 +++++++++++++++++++++++++++++++++++++++++++++++
 test/vinyl/quota.result   |  2 +-
 4 files changed, 148 insertions(+), 1 deletion(-)

diff --git a/src/box/vy_point_lookup.c b/src/box/vy_point_lookup.c
index 5e43340b..7b704b84 100644
--- a/src/box/vy_point_lookup.c
+++ b/src/box/vy_point_lookup.c
@@ -293,3 +293,35 @@ done:
 	}
 	return 0;
 }
+
+int
+vy_point_lookup_mem(struct vy_lsm *lsm, const struct vy_read_view **rv,
+		    struct tuple *key, struct tuple **ret)
+{
+	assert(tuple_field_count(key) >= lsm->cmp_def->part_count);
+
+	int rc;
+	struct vy_history history;
+	vy_history_create(&history, &lsm->env->history_node_pool);
+
+	rc = vy_point_lookup_scan_cache(lsm, rv, key, &history);
+	if (rc != 0 || vy_history_is_terminal(&history))
+		goto done;
+
+	rc = vy_point_lookup_scan_mems(lsm, rv, key, &history);
+	if (rc != 0 || vy_history_is_terminal(&history))
+		goto done;
+
+	*ret = NULL;
+	goto out;
+done:
+	if (rc == 0) {
+		int upserts_applied;
+		rc = vy_history_apply(&history, lsm->cmp_def, lsm->mem_format,
+				      true, &upserts_applied, ret);
+		lsm->stat.upsert.applied += upserts_applied;
+	}
+out:
+	vy_history_cleanup(&history);
+	return rc;
+}
diff --git a/src/box/vy_point_lookup.h b/src/box/vy_point_lookup.h
index 3b7c5a04..6d77ce9c 100644
--- a/src/box/vy_point_lookup.h
+++ b/src/box/vy_point_lookup.h
@@ -71,6 +71,24 @@ vy_point_lookup(struct vy_lsm *lsm, struct vy_tx *tx,
 		const struct vy_read_view **rv,
 		struct tuple *key, struct tuple **ret);
 
+/**
+ * Look up a tuple by key in memory.
+ *
+ * This function works just like vy_point_lookup() except:
+ *
+ * - It only scans in-memory level and cache and hence doesn't yield.
+ * - It doesn't turn DELETE into NULL so it returns NULL if and only
+ *   if no terminal statement matching the key is present in memory
+ *   (there still may be statements stored on disk though).
+ * - It doesn't account the lookup to LSM tree stats (as it never
+ *   descends to lower levels).
+ *
+ * The function returns 0 on success, -1 on memory allocation error.
+ */
+int
+vy_point_lookup_mem(struct vy_lsm *lsm, const struct vy_read_view **rv,
+		    struct tuple *key, struct tuple **ret);
+
 #if defined(__cplusplus)
 } /* extern "C" */
 #endif /* defined(__cplusplus) */
diff --git a/src/box/vy_tx.c b/src/box/vy_tx.c
index bfef1ada..1421cb84 100644
--- a/src/box/vy_tx.c
+++ b/src/box/vy_tx.c
@@ -58,6 +58,7 @@
 #include "vy_history.h"
 #include "vy_read_set.h"
 #include "vy_read_view.h"
+#include "vy_point_lookup.h"
 
 int
 write_set_cmp(struct txv *a, struct txv *b)
@@ -483,6 +484,97 @@ vy_tx_write(struct vy_lsm *lsm, struct vy_mem *mem,
 	return vy_lsm_set(lsm, mem, stmt, region_stmt);
 }
 
+/**
+ * Try to generate a deferred DELETE statement on tx commit.
+ *
+ * This function is supposed to be called for a primary index
+ * statement which was executed without deletion of the overwritten
+ * tuple from secondary indexes. It looks up the overwritten tuple
+ * in memory and, if found, produces the deferred DELETEs and
+ * inserts them into the transaction log.
+ *
+ * Affects @tx->log, @v->stmt.
+ *
+ * Returns 0 on success, -1 on memory allocation error.
+ */
+static int
+vy_tx_handle_deferred_delete(struct vy_tx *tx, struct txv *v)
+{
+	struct vy_lsm *pk = v->lsm;
+	struct tuple *stmt = v->stmt;
+	uint8_t flags = vy_stmt_flags(stmt);
+
+	assert(pk->index_id == 0);
+	assert(flags & VY_STMT_DEFERRED_DELETE);
+
+	/* Look up the tuple overwritten by this statement. */
+	struct tuple *tuple;
+	if (vy_point_lookup_mem(pk, &tx->xm->p_global_read_view,
+				stmt, &tuple) != 0)
+		return -1;
+
+	if (tuple == NULL) {
+		/*
+		 * Nothing's found, but there still may be
+		 * matching statements stored on disk so we
+		 * have to defer generation of DELETE until
+		 * compaction.
+		 */
+		return 0;
+	}
+
+	/*
+	 * If a terminal statement is found, we can produce
+	 * DELETE right away so clear the flag now.
+	 */
+	vy_stmt_set_flags(stmt, flags & ~VY_STMT_DEFERRED_DELETE);
+
+	if (vy_stmt_type(tuple) == IPROTO_DELETE) {
+		/* The tuple's already deleted, nothing to do. */
+		tuple_unref(tuple);
+		return 0;
+	}
+
+	struct tuple *delete_stmt;
+	delete_stmt = vy_stmt_new_surrogate_delete(pk->mem_format, tuple);
+	tuple_unref(tuple);
+	if (delete_stmt == NULL)
+		return -1;
+
+	if (vy_stmt_type(stmt) == IPROTO_DELETE) {
+		/*
+		 * Since primary and secondary indexes of the
+		 * same space share in-memory statements, we
+		 * need to use the new DELETE in the primary
+		 * index, because the original DELETE doesn't
+		 * contain secondary key parts.
+		 */
+		vy_stmt_counter_acct_tuple(&pk->stat.txw.count, delete_stmt);
+		vy_stmt_counter_unacct_tuple(&pk->stat.txw.count, stmt);
+		v->stmt = delete_stmt;
+		tuple_ref(delete_stmt);
+		tuple_unref(stmt);
+	}
+
+	/*
+	 * Make DELETE statements for secondary indexes and
+	 * insert them into the transaction log.
+	 */
+	int rc = 0;
+	struct vy_lsm *lsm;
+	rlist_foreach_entry(lsm, &pk->list, list) {
+		struct txv *delete_txv = txv_new(tx, lsm, delete_stmt);
+		if (delete_txv == NULL) {
+			rc = -1;
+			break;
+		}
+		stailq_insert_entry(&tx->log, delete_txv, v, next_in_log);
+		vy_stmt_counter_acct_tuple(&lsm->stat.txw.count, delete_stmt);
+	}
+	tuple_unref(delete_stmt);
+	return rc;
+}
+
 int
 vy_tx_prepare(struct vy_tx *tx)
 {
@@ -591,6 +683,11 @@ vy_tx_prepare(struct vy_tx *tx)
 			return -1;
 		assert(v->mem != NULL);
 
+		if (lsm->index_id == 0 &&
+		    vy_stmt_flags(v->stmt) & VY_STMT_DEFERRED_DELETE &&
+		    vy_tx_handle_deferred_delete(tx, v) != 0)
+			return -1;
+
 		/* In secondary indexes only REPLACE/DELETE can be written. */
 		vy_stmt_set_lsn(v->stmt, MAX_LSN + tx->psn);
 		const struct tuple **region_stmt =
diff --git a/test/vinyl/quota.result b/test/vinyl/quota.result
index e323bc4e..48042185 100644
--- a/test/vinyl/quota.result
+++ b/test/vinyl/quota.result
@@ -89,7 +89,7 @@ _ = space:replace{1, 1, string.rep('a', 1024 * 1024 * 5)}
 ...
 box.stat.vinyl().quota.used
 ---
-- 5341228
+- 5341267
 ...
 space:drop()
 ---
-- 
2.11.0




More information about the Tarantool-patches mailing list