Lgtm On Tue, May 21, 2019, 15:16 Vladimir Davydov wrote: > Certain kinds of DML requests don't update secondary indexes, e.g. > UPDATE that doesn't touch secondary index parts or DELETE for which > generation of secondary index statements is deferred. For such a request > vy_is_committed(env, space) may return false on recovery even if it has > actually been dumped: since such a statement is not dumped for secondary > indexes, secondary index's vy_lsm::dump_lsn may be less than such > statement's signature, which makes vy_is_committed() assume that the > statement hasn't been dumped. Further in the code we have checks that > ensure that if we execute a request on recovery, it must not be dumped > for the primary index (as the primary index is always dumped after > secondary indexes for the sake of recovery), which fires in this case. > > To fix that, let's refactor the code basing on the following two facts: > - Primary index is always updated by a DML request. > - Primary index may only be dumped after secondary indexes. > > Closes #4222 > --- > https://github.com/tarantool/tarantool/issues/4222 > > src/box/vinyl.c | 89 > +++++++++++++++++++-------------------------- > test/vinyl/recover.result | 60 ++++++++++++++++++++++++++++++ > test/vinyl/recover.test.lua | 27 ++++++++++++++ > 3 files changed, 125 insertions(+), 51 deletions(-) > > diff --git a/src/box/vinyl.c b/src/box/vinyl.c > index d4929a37..9372e5f7 100644 > --- a/src/box/vinyl.c > +++ b/src/box/vinyl.c > @@ -1277,9 +1277,17 @@ vinyl_index_compact(struct index *index) > * If the LSM tree is going to be dropped or truncated on WAL > * recovery, there's no point in replaying statements for it, > * either. > + * > + * Note, although we may skip secondary index update in certain > + * cases (e.g. UPDATE that doesn't touch secondary index parts > + * or DELETE for which generation of secondary index statement > + * is deferred), a DML request of any kind always updates the > + * primary index. Also, we always dump the primary index after > + * secondary indexes. So we may skip recovery of a DML request > + * if it has been dumped for the primary index. > */ > static inline bool > -vy_is_committed_one(struct vy_env *env, struct vy_lsm *lsm) > +vy_is_committed(struct vy_env *env, struct vy_lsm *lsm) > { > if (likely(env->status != VINYL_FINAL_RECOVERY_LOCAL)) > return false; > @@ -1291,23 +1299,6 @@ vy_is_committed_one(struct vy_env *env, struct > vy_lsm *lsm) > } > > /** > - * Check if a request has already been committed to a space. > - * See also vy_is_committed_one(). > - */ > -static inline bool > -vy_is_committed(struct vy_env *env, struct space *space) > -{ > - if (likely(env->status != VINYL_FINAL_RECOVERY_LOCAL)) > - return false; > - for (uint32_t iid = 0; iid < space->index_count; iid++) { > - struct vy_lsm *lsm = vy_lsm(space->index[iid]); > - if (!vy_is_committed_one(env, lsm)) > - return false; > - } > - return true; > -} > - > -/** > * Get a full tuple by a tuple read from a secondary index. > * @param lsm LSM tree from which the tuple was read. > * @param tx Current transaction. > @@ -1772,11 +1763,11 @@ static int > vy_delete(struct vy_env *env, struct vy_tx *tx, struct txn_stmt *stmt, > struct space *space, struct request *request) > { > - if (vy_is_committed(env, space)) > - return 0; > struct vy_lsm *pk = vy_lsm_find(space, 0); > if (pk == NULL) > return -1; > + if (vy_is_committed(env, pk)) > + return 0; > struct vy_lsm *lsm = vy_lsm_find_unique(space, request->index_id); > if (lsm == NULL) > return -1; > @@ -1807,7 +1798,7 @@ vy_delete(struct vy_env *env, struct vy_tx *tx, > struct txn_stmt *stmt, > return -1; > for (uint32_t i = 0; i < space->index_count; i++) { > struct vy_lsm *lsm = vy_lsm(space->index[i]); > - if (vy_is_committed_one(env, lsm)) > + if (vy_is_committed(env, lsm)) > continue; > rc = vy_tx_set(tx, lsm, delete); > if (rc != 0) > @@ -1891,7 +1882,7 @@ vy_perform_update(struct vy_env *env, struct vy_tx > *tx, struct txn_stmt *stmt, > > for (uint32_t i = 1; i < space->index_count; ++i) { > struct vy_lsm *lsm = vy_lsm(space->index[i]); > - if (vy_is_committed_one(env, lsm)) > + if (vy_is_committed(env, lsm)) > continue; > if (vy_tx_set_with_colmask(tx, lsm, delete, column_mask) > != 0) > goto error; > @@ -1924,7 +1915,10 @@ vy_update(struct vy_env *env, struct vy_tx *tx, > struct txn_stmt *stmt, > struct space *space, struct request *request) > { > assert(tx != NULL && tx->state == VINYL_TX_READY); > - if (vy_is_committed(env, space)) > + struct vy_lsm *pk = vy_lsm_find(space, 0); > + if (pk == NULL) > + return -1; > + if (vy_is_committed(env, pk)) > return 0; > struct vy_lsm *lsm = vy_lsm_find_unique(space, request->index_id); > if (lsm == NULL) > @@ -1942,11 +1936,6 @@ vy_update(struct vy_env *env, struct vy_tx *tx, > struct txn_stmt *stmt, > return 0; > > /* Apply update operations. */ > - struct vy_lsm *pk = vy_lsm(space->index[0]); > - assert(pk != NULL); > - assert(pk->index_id == 0); > - /* Primary key is dumped last. */ > - assert(!vy_is_committed_one(env, pk)); > uint64_t column_mask = 0; > const char *new_tuple, *new_tuple_end; > uint32_t new_size, old_size; > @@ -2126,7 +2115,10 @@ vy_upsert(struct vy_env *env, struct vy_tx *tx, > struct txn_stmt *stmt, > struct space *space, struct request *request) > { > assert(tx != NULL && tx->state == VINYL_TX_READY); > - if (vy_is_committed(env, space)) > + struct vy_lsm *pk = vy_lsm_find(space, 0); > + if (pk == NULL) > + return -1; > + if (vy_is_committed(env, pk)) > return 0; > /* Check update operations. */ > if (tuple_update_check_ops(region_aligned_alloc_cb, &fiber()->gc, > @@ -2143,11 +2135,6 @@ vy_upsert(struct vy_env *env, struct vy_tx *tx, > struct txn_stmt *stmt, > const char *tuple_end = request->tuple_end; > const char *ops = request->ops; > const char *ops_end = request->ops_end; > - struct vy_lsm *pk = vy_lsm_find(space, 0); > - if (pk == NULL) > - return -1; > - /* Primary key is dumped last. */ > - assert(!vy_is_committed_one(env, pk)); > if (tuple_validate_raw(pk->mem_format, tuple)) > return -1; > > @@ -2240,14 +2227,12 @@ static int > vy_insert(struct vy_env *env, struct vy_tx *tx, struct txn_stmt *stmt, > struct space *space, struct request *request) > { > - assert(stmt != NULL); > + assert(tx != NULL && tx->state == VINYL_TX_READY); > struct vy_lsm *pk = vy_lsm_find(space, 0); > if (pk == NULL) > - /* The space hasn't the primary index. */ > return -1; > - assert(pk->index_id == 0); > - /* Primary key is dumped last. */ > - assert(!vy_is_committed_one(env, pk)); > + if (vy_is_committed(env, pk)) > + return 0; > if (tuple_validate_raw(pk->mem_format, request->tuple)) > return -1; > /* First insert into the primary index. */ > @@ -2263,7 +2248,7 @@ vy_insert(struct vy_env *env, struct vy_tx *tx, > struct txn_stmt *stmt, > > for (uint32_t iid = 1; iid < space->index_count; ++iid) { > struct vy_lsm *lsm = vy_lsm(space->index[iid]); > - if (vy_is_committed_one(env, lsm)) > + if (vy_is_committed(env, lsm)) > continue; > if (vy_tx_set(tx, lsm, stmt->new_tuple) != 0) > return -1; > @@ -2290,16 +2275,11 @@ vy_replace(struct vy_env *env, struct vy_tx *tx, > struct txn_stmt *stmt, > struct space *space, struct request *request) > { > assert(tx != NULL && tx->state == VINYL_TX_READY); > - if (vy_is_committed(env, space)) > - return 0; > - if (request->type == IPROTO_INSERT) > - return vy_insert(env, tx, stmt, space, request); > - > struct vy_lsm *pk = vy_lsm_find(space, 0); > if (pk == NULL) > return -1; > - /* Primary key is dumped last. */ > - assert(!vy_is_committed_one(env, pk)); > + if (vy_is_committed(env, pk)) > + return 0; > > /* Validate and create a statement for the new tuple. */ > if (tuple_validate_raw(pk->mem_format, request->tuple)) > @@ -2352,7 +2332,7 @@ vy_replace(struct vy_env *env, struct vy_tx *tx, > struct txn_stmt *stmt, > } > for (uint32_t i = 1; i < space->index_count; i++) { > struct vy_lsm *lsm = vy_lsm(space->index[i]); > - if (vy_is_committed_one(env, lsm)) > + if (vy_is_committed(env, lsm)) > continue; > if (delete != NULL) { > rc = vy_tx_set(tx, lsm, delete); > @@ -2376,7 +2356,12 @@ vinyl_space_execute_replace(struct space *space, > struct txn *txn, > struct vy_env *env = vy_env(space->engine); > struct vy_tx *tx = txn->engine_tx; > struct txn_stmt *stmt = txn_current_stmt(txn); > - if (vy_replace(env, tx, stmt, space, request)) > + int rc; > + if (request->type == IPROTO_INSERT) > + rc = vy_insert(env, tx, stmt, space, request); > + else > + rc = vy_replace(env, tx, stmt, space, request); > + if (rc != 0) > return -1; > *result = stmt->new_tuple; > return 0; > @@ -3300,6 +3285,8 @@ vinyl_space_apply_initial_join_row(struct space > *space, struct request *request) > int rc = -1; > switch (request->type) { > case IPROTO_INSERT: > + rc = vy_insert(env, tx, &stmt, space, request); > + break; > case IPROTO_REPLACE: > rc = vy_replace(env, tx, &stmt, space, request); > break; > @@ -4488,7 +4475,7 @@ vy_deferred_delete_on_rollback(struct trigger > *trigger, void *event) > * to restore deferred DELETE statements that haven't been dumped > * to disk. To skip deferred DELETEs that have been dumped, we > * use the same technique we employ for normal WAL statements, > - * i.e. we filter them by LSN, see vy_is_committed_one(). To do > + * i.e. we filter them by LSN, see vy_is_committed(). To do > * that, we need to account the LSN of a WAL row that generated > * a deferred DELETE statement to vy_lsm::dump_lsn, so we install > * an on_commit trigger that propagates the LSN of the WAL row to > @@ -4575,7 +4562,7 @@ vy_deferred_delete_on_replace(struct trigger > *trigger, void *event) > struct tuple *region_stmt = NULL; > for (uint32_t i = 1; i < space->index_count; i++) { > struct vy_lsm *lsm = vy_lsm(space->index[i]); > - if (vy_is_committed_one(env, lsm)) > + if (vy_is_committed(env, lsm)) > continue; > /* > * As usual, rotate the active in-memory index if > diff --git a/test/vinyl/recover.result b/test/vinyl/recover.result > index a6313f22..345a019d 100644 > --- a/test/vinyl/recover.result > +++ b/test/vinyl/recover.result > @@ -479,3 +479,63 @@ test_run:cmd('cleanup server force_recovery') > --- > - true > ... > +-- > +-- gh-4222: assertion failure while recovering dumped statement > +-- that isn't present in secondary index. > +-- > +test_run:cmd('create server test with script = "vinyl/low_quota.lua"') > +--- > +- true > +... > +test_run:cmd('start server test with args="1048576"') > +--- > +- true > +... > +test_run:cmd('switch test') > +--- > +- true > +... > +s = box.schema.space.create('test', {engine = 'vinyl'}) > +--- > +... > +pk = s:create_index('primary') > +--- > +... > +sk = s:create_index('secondary', {unique = false, parts = {2, > 'unsigned'}}) > +--- > +... > +s:insert{1, 1, 1} > +--- > +- [1, 1, 1] > +... > +box.snapshot() > +--- > +- ok > +... > +for i = 1, 1024 do s:update(1, {{'=', 3, string.rep('x', 1024)}}) end > +--- > +... > +test_run:wait_cond(function() return pk:stat().disk.dump.count == 2 end) > +--- > +- true > +... > +sk:stat().disk.dump.count -- 1 > +--- > +- 1 > +... > +test_run:cmd('restart server test with args="1048576"') > +box.space.test:drop() > +--- > +... > +test_run:cmd('switch default') > +--- > +- true > +... > +test_run:cmd('stop server test') > +--- > +- true > +... > +test_run:cmd('cleanup server test') > +--- > +- true > +... > diff --git a/test/vinyl/recover.test.lua b/test/vinyl/recover.test.lua > index 1ed266a1..7bdba178 100644 > --- a/test/vinyl/recover.test.lua > +++ b/test/vinyl/recover.test.lua > @@ -164,3 +164,30 @@ sum > test_run:cmd('switch default') > test_run:cmd('stop server force_recovery') > test_run:cmd('cleanup server force_recovery') > + > +-- > +-- gh-4222: assertion failure while recovering dumped statement > +-- that isn't present in secondary index. > +-- > +test_run:cmd('create server test with script = "vinyl/low_quota.lua"') > +test_run:cmd('start server test with args="1048576"') > +test_run:cmd('switch test') > + > +s = box.schema.space.create('test', {engine = 'vinyl'}) > +pk = s:create_index('primary') > +sk = s:create_index('secondary', {unique = false, parts = {2, > 'unsigned'}}) > + > +s:insert{1, 1, 1} > +box.snapshot() > + > +for i = 1, 1024 do s:update(1, {{'=', 3, string.rep('x', 1024)}}) end > +test_run:wait_cond(function() return pk:stat().disk.dump.count == 2 end) > +sk:stat().disk.dump.count -- 1 > + > +test_run:cmd('restart server test with args="1048576"') > + > +box.space.test:drop() > + > +test_run:cmd('switch default') > +test_run:cmd('stop server test') > +test_run:cmd('cleanup server test') > -- > 2.11.0 > > >