Hi! Thanks for the patch, LGTM. -- Serge Petrenko     >Вторник, 11 августа 2020, 1:18 +03:00 от Vladislav Shpilevoy : >  >Box.snapshot() could include rolled back data in case synchronous >transaction ROLLBACK arrived during WAL rotation in preparation of >a checkpoint. > >More specifically, snapshot consists of fixating the engines' >content (creation of a read-view), doing WAL rotation, and writing >the snapshot itself. All data changes after content fixation won't >go into the snap. So if ROLLBACK arrives during WAL rotation, the >fixated content will have rolled back data, not present in the >newest dataset. > >The patch makes it fail if during WAL rotation anything was rolled >back. The bug sometimes appeared in an existing test about qsync >snapshots, but with a very poor reproducibility. In a new test >file it is reproduced 100% without the patch. > >Closes #5167 >--- >Branch: http://github.com/tarantool/tarantool/tree/gerold103/gh-5167-flaky-qsync-snapshots >Issue: https://github.com/tarantool/tarantool/issues/5167 > >@ChangeLog >* Snapshot could contain changes from a rolled back synchronous transaction (gh-5167). > > src/box/gc.c | 12 +- > src/box/wal.c | 4 + > src/lib/core/errinj.h | 1 + > test/box/errinj.result | 1 + > .../gh-5167-qsync-rollback-snap.result | 165 ++++++++++++++++++ > .../gh-5167-qsync-rollback-snap.test.lua | 67 +++++++ > test/replication/suite.ini | 2 +- > 7 files changed, 250 insertions(+), 2 deletions(-) > create mode 100644 test/replication/gh-5167-qsync-rollback-snap.result > create mode 100644 test/replication/gh-5167-qsync-rollback-snap.test.lua > >diff --git a/src/box/gc.c b/src/box/gc.c >index d50a64d66..76f7c6325 100644 >--- a/src/box/gc.c >+++ b/src/box/gc.c >@@ -382,6 +382,7 @@ gc_do_checkpoint(bool is_scheduled) > { >  int rc; >  struct wal_checkpoint checkpoint; >+ int64_t limbo_rollback_count = txn_limbo.rollback_count; >  >  assert(!gc.checkpoint_is_in_progress); >  gc.checkpoint_is_in_progress = true; >@@ -396,7 +397,16 @@ gc_do_checkpoint(bool is_scheduled) >  rc = wal_begin_checkpoint(&checkpoint); >  if (rc != 0) >  goto out; >- >+ /* >+ * Check if the checkpoint contains rolled back data. That makes the >+ * checkpoint not self-sufficient - it needs the xlog file with >+ * ROLLBACK. Drop it. >+ */ >+ if (txn_limbo.rollback_count != limbo_rollback_count) { >+ rc = -1; >+ diag_set(ClientError, ER_SYNC_ROLLBACK); >+ goto out; >+ } >  /* >  * Wait the confirms on all "sync" transactions before >  * create a snapshot. >diff --git a/src/box/wal.c b/src/box/wal.c >index 220e68245..d8c92aa36 100644 >--- a/src/box/wal.c >+++ b/src/box/wal.c >@@ -662,6 +662,7 @@ wal_begin_checkpoint_f(struct cbus_call_msg *data) >  } >  vclock_copy(&msg->vclock, &writer->vclock); >  msg->wal_size = writer->checkpoint_wal_size; >+ ERROR_INJECT_SLEEP(ERRINJ_WAL_DELAY); >  return 0; > } >  >@@ -1272,6 +1273,9 @@ wal_write_async(struct journal *journal, struct journal_entry *entry) >  writer->last_entry = entry; >  batch->approx_len += entry->approx_len; >  writer->wal_pipe.n_input += entry->n_rows * XROW_IOVMAX; >+#ifndef NDEBUG >+ ++errinj(ERRINJ_WAL_WRITE_COUNT, ERRINJ_INT)->iparam; >+#endif >  cpipe_flush_input(&writer->wal_pipe); >  return 0; >  >diff --git a/src/lib/core/errinj.h b/src/lib/core/errinj.h >index aace8736f..814c57c2e 100644 >--- a/src/lib/core/errinj.h >+++ b/src/lib/core/errinj.h >@@ -82,6 +82,7 @@ struct errinj { >  _(ERRINJ_WAL_DELAY, ERRINJ_BOOL, {.bparam = false}) \ >  _(ERRINJ_WAL_DELAY_COUNTDOWN, ERRINJ_INT, {.iparam = -1}) \ >  _(ERRINJ_WAL_FALLOCATE, ERRINJ_INT, {.iparam = 0}) \ >+ _(ERRINJ_WAL_WRITE_COUNT, ERRINJ_INT, {.iparam = 0}) \ >  _(ERRINJ_INDEX_ALLOC, ERRINJ_BOOL, {.bparam = false}) \ >  _(ERRINJ_TUPLE_ALLOC, ERRINJ_BOOL, {.bparam = false}) \ >  _(ERRINJ_TUPLE_FIELD, ERRINJ_BOOL, {.bparam = false}) \ >diff --git a/test/box/errinj.result b/test/box/errinj.result >index 4bea0f46f..613d22c51 100644 >--- a/test/box/errinj.result >+++ b/test/box/errinj.result >@@ -114,6 +114,7 @@ evals >   - ERRINJ_WAL_ROTATE: false >   - ERRINJ_WAL_SYNC: false >   - ERRINJ_WAL_WRITE: false >+ - ERRINJ_WAL_WRITE_COUNT: 3 >   - ERRINJ_WAL_WRITE_DISK: false >   - ERRINJ_WAL_WRITE_EOF: false >   - ERRINJ_WAL_WRITE_PARTIAL: -1 >diff --git a/test/replication/gh-5167-qsync-rollback-snap.result b/test/replication/gh-5167-qsync-rollback-snap.result >new file mode 100644 >index 000000000..06f58526c >--- /dev/null >+++ b/test/replication/gh-5167-qsync-rollback-snap.result >@@ -0,0 +1,165 @@ >+-- test-run result file version 2 >+test_run = require('test_run').new() >+ | --- >+ | ... >+engine = test_run:get_cfg('engine') >+ | --- >+ | ... >+ >+orig_synchro_quorum = box.cfg.replication_synchro_quorum >+ | --- >+ | ... >+orig_synchro_timeout = box.cfg.replication_synchro_timeout >+ | --- >+ | ... >+box.schema.user.grant('guest', 'super') >+ | --- >+ | ... >+ >+test_run:cmd('create server replica with rpl_master=default,\ >+ script="replication/replica.lua"') >+ | --- >+ | - true >+ | ... >+test_run:cmd('start server replica with wait=True, wait_load=True') >+ | --- >+ | - true >+ | ... >+ >+-- >+-- gh-5167: >+-- >+fiber = require('fiber') >+ | --- >+ | ... >+box.cfg{replication_synchro_quorum = 2, replication_synchro_timeout = 1000} >+ | --- >+ | ... >+_ = box.schema.space.create('sync', {is_sync = true, engine = engine}) >+ | --- >+ | ... >+_ = box.space.sync:create_index('pk') >+ | --- >+ | ... >+-- Write something to flush the current master's state to replica. >+_ = box.space.sync:insert{1} >+ | --- >+ | ... >+_ = box.space.sync:delete{1} >+ | --- >+ | ... >+ >+box.cfg{replication_synchro_quorum = 3} >+ | --- >+ | ... >+ok, err = nil >+ | --- >+ | ... >+f = fiber.create(function() \ >+ ok, err = pcall(box.space.sync.insert, box.space.sync, {1}) \ >+end) >+ | --- >+ | ... >+ >+test_run:switch('replica') >+ | --- >+ | - true >+ | ... >+fiber = require('fiber') >+ | --- >+ | ... >+test_run:wait_cond(function() return box.space.sync:count() == 1 end) >+ | --- >+ | - true >+ | ... >+-- Snapshot will stuck in WAL thread on rotation before starting wait on the >+-- limbo. >+box.error.injection.set("ERRINJ_WAL_DELAY", true) >+ | --- >+ | - ok >+ | ... >+wal_write_count = box.error.injection.get("ERRINJ_WAL_WRITE_COUNT") >+ | --- >+ | ... >+ok, err = nil >+ | --- >+ | ... >+f = fiber.create(function() ok, err = pcall(box.snapshot) end) >+ | --- >+ | ... >+ >+test_run:switch('default') >+ | --- >+ | - true >+ | ... >+box.cfg{replication_synchro_timeout = 0.0001} >+ | --- >+ | ... >+test_run:wait_cond(function() return f:status() == 'dead' end) >+ | --- >+ | - true >+ | ... >+ok, err >+ | --- >+ | - false >+ | - Quorum collection for a synchronous transaction is timed out >+ | ... >+box.space.sync:select{} >+ | --- >+ | - [] >+ | ... >+ >+test_run:switch('replica') >+ | --- >+ | - true >+ | ... >+-- Rollback was received. Note, it is not legit to look for space:count() == 0. >+-- Because ideally ROLLBACK should not be applied before written to WAL. That >+-- means count() will be > 0 until WAL write succeeds. >+test_run:wait_cond(function() \ >+ return box.error.injection.get("ERRINJ_WAL_WRITE_COUNT") > wal_write_count \ >+end) >+ | --- >+ | - true >+ | ... >+-- Now WAL rotation is done. Snapshot will fail, because will see that a >+-- rollback happened during that. Meaning that the rotated WAL contains >+-- not confirmed data, and it can't be used as a checkpoint. >+box.error.injection.set("ERRINJ_WAL_DELAY", false) >+ | --- >+ | - ok >+ | ... >+test_run:wait_cond(function() return f:status() == 'dead' end) >+ | --- >+ | - true >+ | ... >+ok, err >+ | --- >+ | - false >+ | - A rollback for a synchronous transaction is received >+ | ... >+ >+test_run:switch('default') >+ | --- >+ | - true >+ | ... >+box.space.sync:drop() >+ | --- >+ | ... >+test_run:cmd('stop server replica') >+ | --- >+ | - true >+ | ... >+test_run:cmd('delete server replica') >+ | --- >+ | - true >+ | ... >+box.schema.user.revoke('guest', 'super') >+ | --- >+ | ... >+box.cfg{ \ >+ replication_synchro_quorum = orig_synchro_quorum, \ >+ replication_synchro_timeout = orig_synchro_timeout, \ >+} >+ | --- >+ | ... >diff --git a/test/replication/gh-5167-qsync-rollback-snap.test.lua b/test/replication/gh-5167-qsync-rollback-snap.test.lua >new file mode 100644 >index 000000000..475727e61 >--- /dev/null >+++ b/test/replication/gh-5167-qsync-rollback-snap.test.lua >@@ -0,0 +1,67 @@ >+test_run = require('test_run').new() >+engine = test_run:get_cfg('engine') >+ >+orig_synchro_quorum = box.cfg.replication_synchro_quorum >+orig_synchro_timeout = box.cfg.replication_synchro_timeout >+box.schema.user.grant('guest', 'super') >+ >+test_run:cmd('create server replica with rpl_master=default,\ >+ script="replication/replica.lua"') >+test_run:cmd('start server replica with wait=True, wait_load=True') >+ >+-- >+-- gh-5167: >+-- >+fiber = require('fiber') >+box.cfg{replication_synchro_quorum = 2, replication_synchro_timeout = 1000} >+_ = box.schema.space.create('sync', {is_sync = true, engine = engine}) >+_ = box.space.sync:create_index('pk') >+-- Write something to flush the current master's state to replica. >+_ = box.space.sync:insert{1} >+_ = box.space.sync:delete{1} >+ >+box.cfg{replication_synchro_quorum = 3} >+ok, err = nil >+f = fiber.create(function() \ >+ ok, err = pcall(box.space.sync.insert, box.space.sync, {1}) \ >+end) >+ >+test_run:switch('replica') >+fiber = require('fiber') >+test_run:wait_cond(function() return box.space.sync:count() == 1 end) >+-- Snapshot will stuck in WAL thread on rotation before starting wait on the >+-- limbo. >+box.error.injection.set("ERRINJ_WAL_DELAY", true) >+wal_write_count = box.error.injection.get("ERRINJ_WAL_WRITE_COUNT") >+ok, err = nil >+f = fiber.create(function() ok, err = pcall(box.snapshot) end) >+ >+test_run:switch('default') >+box.cfg{replication_synchro_timeout = 0.0001} >+test_run:wait_cond(function() return f:status() == 'dead' end) >+ok, err >+box.space.sync:select{} >+ >+test_run:switch('replica') >+-- Rollback was received. Note, it is not legit to look for space:count() == 0. >+-- Because ideally ROLLBACK should not be applied before written to WAL. That >+-- means count() will be > 0 until WAL write succeeds. >+test_run:wait_cond(function() \ >+ return box.error.injection.get("ERRINJ_WAL_WRITE_COUNT") > wal_write_count \ >+end) >+-- Now WAL rotation is done. Snapshot will fail, because will see that a >+-- rollback happened during that. Meaning that the rotated WAL contains >+-- not confirmed data, and it can't be used as a checkpoint. >+box.error.injection.set("ERRINJ_WAL_DELAY", false) >+test_run:wait_cond(function() return f:status() == 'dead' end) >+ok, err >+ >+test_run:switch('default') >+box.space.sync:drop() >+test_run:cmd('stop server replica') >+test_run:cmd('delete server replica') >+box.schema.user.revoke('guest', 'super') >+box.cfg{ \ >+ replication_synchro_quorum = orig_synchro_quorum, \ >+ replication_synchro_timeout = orig_synchro_timeout, \ >+} >diff --git a/test/replication/suite.ini b/test/replication/suite.ini >index 73f73eb89..247289d58 100644 >--- a/test/replication/suite.ini >+++ b/test/replication/suite.ini >@@ -3,7 +3,7 @@ core = tarantool > script = master.lua > description = tarantool/box, replication > disabled = consistent.test.lua >-release_disabled = catch.test.lua errinj.test.lua gc.test.lua gc_no_space.test.lua before_replace.test.lua qsync_advanced.test.lua qsync_errinj.test.lua quorum.test.lua recover_missing_xlog.test.lua sync.test.lua long_row_timeout.test.lua gh-4739-vclock-assert.test.lua gh-4730-applier-rollback.test.lua gh-5140-qsync-casc-rollback.test.lua gh-5144-qsync-dup-confirm.test.lua >+release_disabled = catch.test.lua errinj.test.lua gc.test.lua gc_no_space.test.lua before_replace.test.lua qsync_advanced.test.lua qsync_errinj.test.lua quorum.test.lua recover_missing_xlog.test.lua sync.test.lua long_row_timeout.test.lua gh-4739-vclock-assert.test.lua gh-4730-applier-rollback.test.lua gh-5140-qsync-casc-rollback.test.lua gh-5144-qsync-dup-confirm.test.lua gh-5167-rollback-snap.test.lua > config = suite.cfg > lua_libs = lua/fast_replica.lua lua/rlimit.lua > use_unix_sockets = True >-- >2.21.1 (Apple Git-122.3) >