[PATCH] vinyl: fix assertion while recovering dumped statement

Vladimir Davydov vdavydov.dev at gmail.com
Tue May 21 15:16:10 MSK 2019


Certain kinds of DML requests don't update secondary indexes, e.g.
UPDATE that doesn't touch secondary index parts or DELETE for which
generation of secondary index statements is deferred. For such a request
vy_is_committed(env, space) may return false on recovery even if it has
actually been dumped: since such a statement is not dumped for secondary
indexes, secondary index's vy_lsm::dump_lsn may be less than such
statement's signature, which makes vy_is_committed() assume that the
statement hasn't been dumped. Further in the code we have checks that
ensure that if we execute a request on recovery, it must not be dumped
for the primary index (as the primary index is always dumped after
secondary indexes for the sake of recovery), which fires in this case.

To fix that, let's refactor the code basing on the following two facts:
 - Primary index is always updated by a DML request.
 - Primary index may only be dumped after secondary indexes.

Closes #4222
---
https://github.com/tarantool/tarantool/issues/4222

 src/box/vinyl.c             | 89 +++++++++++++++++++--------------------------
 test/vinyl/recover.result   | 60 ++++++++++++++++++++++++++++++
 test/vinyl/recover.test.lua | 27 ++++++++++++++
 3 files changed, 125 insertions(+), 51 deletions(-)

diff --git a/src/box/vinyl.c b/src/box/vinyl.c
index d4929a37..9372e5f7 100644
--- a/src/box/vinyl.c
+++ b/src/box/vinyl.c
@@ -1277,9 +1277,17 @@ vinyl_index_compact(struct index *index)
  * If the LSM tree is going to be dropped or truncated on WAL
  * recovery, there's no point in replaying statements for it,
  * either.
+ *
+ * Note, although we may skip secondary index update in certain
+ * cases (e.g.  UPDATE that doesn't touch secondary index parts
+ * or DELETE for which generation of secondary index statement
+ * is deferred), a DML request of any kind always updates the
+ * primary index. Also, we always dump the primary index after
+ * secondary indexes. So we may skip recovery of a DML request
+ * if it has been dumped for the primary index.
  */
 static inline bool
-vy_is_committed_one(struct vy_env *env, struct vy_lsm *lsm)
+vy_is_committed(struct vy_env *env, struct vy_lsm *lsm)
 {
 	if (likely(env->status != VINYL_FINAL_RECOVERY_LOCAL))
 		return false;
@@ -1291,23 +1299,6 @@ vy_is_committed_one(struct vy_env *env, struct vy_lsm *lsm)
 }
 
 /**
- * Check if a request has already been committed to a space.
- * See also vy_is_committed_one().
- */
-static inline bool
-vy_is_committed(struct vy_env *env, struct space *space)
-{
-	if (likely(env->status != VINYL_FINAL_RECOVERY_LOCAL))
-		return false;
-	for (uint32_t iid = 0; iid < space->index_count; iid++) {
-		struct vy_lsm *lsm = vy_lsm(space->index[iid]);
-		if (!vy_is_committed_one(env, lsm))
-			return false;
-	}
-	return true;
-}
-
-/**
  * Get a full tuple by a tuple read from a secondary index.
  * @param lsm         LSM tree from which the tuple was read.
  * @param tx          Current transaction.
@@ -1772,11 +1763,11 @@ static int
 vy_delete(struct vy_env *env, struct vy_tx *tx, struct txn_stmt *stmt,
 	  struct space *space, struct request *request)
 {
-	if (vy_is_committed(env, space))
-		return 0;
 	struct vy_lsm *pk = vy_lsm_find(space, 0);
 	if (pk == NULL)
 		return -1;
+	if (vy_is_committed(env, pk))
+		return 0;
 	struct vy_lsm *lsm = vy_lsm_find_unique(space, request->index_id);
 	if (lsm == NULL)
 		return -1;
@@ -1807,7 +1798,7 @@ vy_delete(struct vy_env *env, struct vy_tx *tx, struct txn_stmt *stmt,
 			return -1;
 		for (uint32_t i = 0; i < space->index_count; i++) {
 			struct vy_lsm *lsm = vy_lsm(space->index[i]);
-			if (vy_is_committed_one(env, lsm))
+			if (vy_is_committed(env, lsm))
 				continue;
 			rc = vy_tx_set(tx, lsm, delete);
 			if (rc != 0)
@@ -1891,7 +1882,7 @@ vy_perform_update(struct vy_env *env, struct vy_tx *tx, struct txn_stmt *stmt,
 
 	for (uint32_t i = 1; i < space->index_count; ++i) {
 		struct vy_lsm *lsm = vy_lsm(space->index[i]);
-		if (vy_is_committed_one(env, lsm))
+		if (vy_is_committed(env, lsm))
 			continue;
 		if (vy_tx_set_with_colmask(tx, lsm, delete, column_mask) != 0)
 			goto error;
@@ -1924,7 +1915,10 @@ vy_update(struct vy_env *env, struct vy_tx *tx, struct txn_stmt *stmt,
 	  struct space *space, struct request *request)
 {
 	assert(tx != NULL && tx->state == VINYL_TX_READY);
-	if (vy_is_committed(env, space))
+	struct vy_lsm *pk = vy_lsm_find(space, 0);
+	if (pk == NULL)
+		return -1;
+	if (vy_is_committed(env, pk))
 		return 0;
 	struct vy_lsm *lsm = vy_lsm_find_unique(space, request->index_id);
 	if (lsm == NULL)
@@ -1942,11 +1936,6 @@ vy_update(struct vy_env *env, struct vy_tx *tx, struct txn_stmt *stmt,
 		return 0;
 
 	/* Apply update operations. */
-	struct vy_lsm *pk = vy_lsm(space->index[0]);
-	assert(pk != NULL);
-	assert(pk->index_id == 0);
-	/* Primary key is dumped last. */
-	assert(!vy_is_committed_one(env, pk));
 	uint64_t column_mask = 0;
 	const char *new_tuple, *new_tuple_end;
 	uint32_t new_size, old_size;
@@ -2126,7 +2115,10 @@ vy_upsert(struct vy_env *env, struct vy_tx *tx, struct txn_stmt *stmt,
 	  struct space *space, struct request *request)
 {
 	assert(tx != NULL && tx->state == VINYL_TX_READY);
-	if (vy_is_committed(env, space))
+	struct vy_lsm *pk = vy_lsm_find(space, 0);
+	if (pk == NULL)
+		return -1;
+	if (vy_is_committed(env, pk))
 		return 0;
 	/* Check update operations. */
 	if (tuple_update_check_ops(region_aligned_alloc_cb, &fiber()->gc,
@@ -2143,11 +2135,6 @@ vy_upsert(struct vy_env *env, struct vy_tx *tx, struct txn_stmt *stmt,
 	const char *tuple_end = request->tuple_end;
 	const char *ops = request->ops;
 	const char *ops_end = request->ops_end;
-	struct vy_lsm *pk = vy_lsm_find(space, 0);
-	if (pk == NULL)
-		return -1;
-	/* Primary key is dumped last. */
-	assert(!vy_is_committed_one(env, pk));
 	if (tuple_validate_raw(pk->mem_format, tuple))
 		return -1;
 
@@ -2240,14 +2227,12 @@ static int
 vy_insert(struct vy_env *env, struct vy_tx *tx, struct txn_stmt *stmt,
 	  struct space *space, struct request *request)
 {
-	assert(stmt != NULL);
+	assert(tx != NULL && tx->state == VINYL_TX_READY);
 	struct vy_lsm *pk = vy_lsm_find(space, 0);
 	if (pk == NULL)
-		/* The space hasn't the primary index. */
 		return -1;
-	assert(pk->index_id == 0);
-	/* Primary key is dumped last. */
-	assert(!vy_is_committed_one(env, pk));
+	if (vy_is_committed(env, pk))
+		return 0;
 	if (tuple_validate_raw(pk->mem_format, request->tuple))
 		return -1;
 	/* First insert into the primary index. */
@@ -2263,7 +2248,7 @@ vy_insert(struct vy_env *env, struct vy_tx *tx, struct txn_stmt *stmt,
 
 	for (uint32_t iid = 1; iid < space->index_count; ++iid) {
 		struct vy_lsm *lsm = vy_lsm(space->index[iid]);
-		if (vy_is_committed_one(env, lsm))
+		if (vy_is_committed(env, lsm))
 			continue;
 		if (vy_tx_set(tx, lsm, stmt->new_tuple) != 0)
 			return -1;
@@ -2290,16 +2275,11 @@ vy_replace(struct vy_env *env, struct vy_tx *tx, struct txn_stmt *stmt,
 	   struct space *space, struct request *request)
 {
 	assert(tx != NULL && tx->state == VINYL_TX_READY);
-	if (vy_is_committed(env, space))
-		return 0;
-	if (request->type == IPROTO_INSERT)
-		return vy_insert(env, tx, stmt, space, request);
-
 	struct vy_lsm *pk = vy_lsm_find(space, 0);
 	if (pk == NULL)
 		return -1;
-	/* Primary key is dumped last. */
-	assert(!vy_is_committed_one(env, pk));
+	if (vy_is_committed(env, pk))
+		return 0;
 
 	/* Validate and create a statement for the new tuple. */
 	if (tuple_validate_raw(pk->mem_format, request->tuple))
@@ -2352,7 +2332,7 @@ vy_replace(struct vy_env *env, struct vy_tx *tx, struct txn_stmt *stmt,
 	}
 	for (uint32_t i = 1; i < space->index_count; i++) {
 		struct vy_lsm *lsm = vy_lsm(space->index[i]);
-		if (vy_is_committed_one(env, lsm))
+		if (vy_is_committed(env, lsm))
 			continue;
 		if (delete != NULL) {
 			rc = vy_tx_set(tx, lsm, delete);
@@ -2376,7 +2356,12 @@ vinyl_space_execute_replace(struct space *space, struct txn *txn,
 	struct vy_env *env = vy_env(space->engine);
 	struct vy_tx *tx = txn->engine_tx;
 	struct txn_stmt *stmt = txn_current_stmt(txn);
-	if (vy_replace(env, tx, stmt, space, request))
+	int rc;
+	if (request->type == IPROTO_INSERT)
+		rc = vy_insert(env, tx, stmt, space, request);
+	else
+		rc = vy_replace(env, tx, stmt, space, request);
+	if (rc != 0)
 		return -1;
 	*result = stmt->new_tuple;
 	return 0;
@@ -3300,6 +3285,8 @@ vinyl_space_apply_initial_join_row(struct space *space, struct request *request)
 	int rc = -1;
 	switch (request->type) {
 	case IPROTO_INSERT:
+		rc = vy_insert(env, tx, &stmt, space, request);
+		break;
 	case IPROTO_REPLACE:
 		rc = vy_replace(env, tx, &stmt, space, request);
 		break;
@@ -4488,7 +4475,7 @@ vy_deferred_delete_on_rollback(struct trigger *trigger, void *event)
  * to restore deferred DELETE statements that haven't been dumped
  * to disk. To skip deferred DELETEs that have been dumped, we
  * use the same technique we employ for normal WAL statements,
- * i.e. we filter them by LSN, see vy_is_committed_one(). To do
+ * i.e. we filter them by LSN, see vy_is_committed(). To do
  * that, we need to account the LSN of a WAL row that generated
  * a deferred DELETE statement to vy_lsm::dump_lsn, so we install
  * an on_commit trigger that propagates the LSN of the WAL row to
@@ -4575,7 +4562,7 @@ vy_deferred_delete_on_replace(struct trigger *trigger, void *event)
 	struct tuple *region_stmt = NULL;
 	for (uint32_t i = 1; i < space->index_count; i++) {
 		struct vy_lsm *lsm = vy_lsm(space->index[i]);
-		if (vy_is_committed_one(env, lsm))
+		if (vy_is_committed(env, lsm))
 			continue;
 		/*
 		 * As usual, rotate the active in-memory index if
diff --git a/test/vinyl/recover.result b/test/vinyl/recover.result
index a6313f22..345a019d 100644
--- a/test/vinyl/recover.result
+++ b/test/vinyl/recover.result
@@ -479,3 +479,63 @@ test_run:cmd('cleanup server force_recovery')
 ---
 - true
 ...
+--
+-- gh-4222: assertion failure while recovering dumped statement
+-- that isn't present in secondary index.
+--
+test_run:cmd('create server test with script = "vinyl/low_quota.lua"')
+---
+- true
+...
+test_run:cmd('start server test with args="1048576"')
+---
+- true
+...
+test_run:cmd('switch test')
+---
+- true
+...
+s = box.schema.space.create('test', {engine = 'vinyl'})
+---
+...
+pk = s:create_index('primary')
+---
+...
+sk = s:create_index('secondary', {unique = false, parts = {2, 'unsigned'}})
+---
+...
+s:insert{1, 1, 1}
+---
+- [1, 1, 1]
+...
+box.snapshot()
+---
+- ok
+...
+for i = 1, 1024 do s:update(1, {{'=', 3, string.rep('x', 1024)}}) end
+---
+...
+test_run:wait_cond(function() return pk:stat().disk.dump.count == 2 end)
+---
+- true
+...
+sk:stat().disk.dump.count -- 1
+---
+- 1
+...
+test_run:cmd('restart server test with args="1048576"')
+box.space.test:drop()
+---
+...
+test_run:cmd('switch default')
+---
+- true
+...
+test_run:cmd('stop server test')
+---
+- true
+...
+test_run:cmd('cleanup server test')
+---
+- true
+...
diff --git a/test/vinyl/recover.test.lua b/test/vinyl/recover.test.lua
index 1ed266a1..7bdba178 100644
--- a/test/vinyl/recover.test.lua
+++ b/test/vinyl/recover.test.lua
@@ -164,3 +164,30 @@ sum
 test_run:cmd('switch default')
 test_run:cmd('stop server force_recovery')
 test_run:cmd('cleanup server force_recovery')
+
+--
+-- gh-4222: assertion failure while recovering dumped statement
+-- that isn't present in secondary index.
+--
+test_run:cmd('create server test with script = "vinyl/low_quota.lua"')
+test_run:cmd('start server test with args="1048576"')
+test_run:cmd('switch test')
+
+s = box.schema.space.create('test', {engine = 'vinyl'})
+pk = s:create_index('primary')
+sk = s:create_index('secondary', {unique = false, parts = {2, 'unsigned'}})
+
+s:insert{1, 1, 1}
+box.snapshot()
+
+for i = 1, 1024 do s:update(1, {{'=', 3, string.rep('x', 1024)}}) end
+test_run:wait_cond(function() return pk:stat().disk.dump.count == 2 end)
+sk:stat().disk.dump.count -- 1
+
+test_run:cmd('restart server test with args="1048576"')
+
+box.space.test:drop()
+
+test_run:cmd('switch default')
+test_run:cmd('stop server test')
+test_run:cmd('cleanup server test')
-- 
2.11.0




More information about the Tarantool-patches mailing list