[Tarantool-patches] [PATCH 1/1] applier: process synchro rows after WAL write

Vladislav Shpilevoy v.shpilevoy at tarantool.org
Thu Apr 8 01:47:39 MSK 2021


Applier used to process synchronous rows CONFIRM and ROLLBACK
right after receipt before they are written to WAL.

That led to a bug that the confirmed data became visible, might be
accessed by user requests, then the node restarted before CONFIRM
finished its WAL write, and the data was not visible again. That
is just like if it would be rolled back, which is not acceptable.

Another case - CONFIRM WAL write could simply fail due to any
reason (no disk space, OOM), but the transactions would remain
confirmed anyway.

Also that produced some hacks in the limbo's code to support the
confirmation and rollback of transactions not yet written to WAL.

The patch makes the synchro rows processed only after they are
written to WAL. Although the 'rollback' case above might still
happen if the xlogs were in the kernel caches, and the machine was
powered off before they were flushed to disk. But that is not
related to qsync specifically.

To handle the synchro rows after WAL write the patch makes them go
to WAL in a blocking way (journal_write() instead of
journal_write_try_async()). Otherwise it could happen that a
CONFIRM/ROLLBACK is being written to WAL and would clear the limbo
afterwards, but a new transaction arrives with a different owner,
and it conflicts with the current limbo owner.

Closes #5213
---
Branch: http://github.com/tarantool/tarantool/tree/gerold103/gh-5213-applier-qsync-write-before
Issue: https://github.com/tarantool/tarantool/issues/5213

 .../unreleased/applier-qsync-write-order.md   |   6 +
 src/box/applier.cc                            | 107 +++---
 src/box/txn_limbo.c                           |  48 +--
 .../gh-5213-qsync-applier-order.result        | 329 ++++++++++++++++++
 .../gh-5213-qsync-applier-order.test.lua      | 129 +++++++
 test/replication/gh-5213-replica.lua          |  10 +
 test/replication/suite.cfg                    |   1 +
 test/replication/suite.ini                    |   2 +-
 8 files changed, 529 insertions(+), 103 deletions(-)
 create mode 100644 changelogs/unreleased/applier-qsync-write-order.md
 create mode 100644 test/replication/gh-5213-qsync-applier-order.result
 create mode 100644 test/replication/gh-5213-qsync-applier-order.test.lua
 create mode 100644 test/replication/gh-5213-replica.lua

diff --git a/changelogs/unreleased/applier-qsync-write-order.md b/changelogs/unreleased/applier-qsync-write-order.md
new file mode 100644
index 000000000..10b9ade80
--- /dev/null
+++ b/changelogs/unreleased/applier-qsync-write-order.md
@@ -0,0 +1,6 @@
+## bugfix/replication
+
+* Fix the bug when a synchronous transaction could be confirmed and visible on
+  a replica, but then not confirmed / invisible again after restart. Could
+  happen more likely on memtx spaces with `memtx_use_mvcc_engine` enabled
+  (gh-5213).
diff --git a/src/box/applier.cc b/src/box/applier.cc
index 971b2e64c..f7c82548c 100644
--- a/src/box/applier.cc
+++ b/src/box/applier.cc
@@ -762,26 +762,21 @@ applier_txn_wal_write_cb(struct trigger *trigger, void *event)
 }
 
 struct synchro_entry {
-	/** Encoded form of a synchro record. */
-	struct synchro_body_bin	body_bin;
-
-	/** xrow to write, used by the journal engine. */
-	struct xrow_header row;
-
+	/** Request to process when WAL write is done. */
+	struct synchro_request *req;
+	/** Fiber created the entry. To wakeup when WAL write is done. */
+	struct fiber *owner;
 	/**
-	 * The journal entry itself. Note since
-	 * it has unsized array it must be the
-	 * last entry in the structure.
+	 * The base journal entry. It has unsized array and then must be the
+	 * last entry in the structure. But can workaround it via a union
+	 * adding the needed tail as char[].
 	 */
-	struct journal_entry journal_entry;
+	union {
+		struct journal_entry base;
+		char base_buf[sizeof(base) + sizeof(base.rows[0])];
+	};
 };
 
-static void
-synchro_entry_delete(struct synchro_entry *entry)
-{
-	free(entry);
-}
-
 /**
  * Async write journal completion.
  */
@@ -791,50 +786,15 @@ apply_synchro_row_cb(struct journal_entry *entry)
 	assert(entry->complete_data != NULL);
 	struct synchro_entry *synchro_entry =
 		(struct synchro_entry *)entry->complete_data;
-	if (entry->res < 0)
+	if (entry->res < 0) {
 		applier_rollback_by_wal_io();
-	else
+	} else {
+		txn_limbo_process(&txn_limbo, synchro_entry->req);
 		trigger_run(&replicaset.applier.on_wal_write, NULL);
-
-	synchro_entry_delete(synchro_entry);
-}
-
-/**
- * Allocate a new synchro_entry to be passed to
- * the journal engine in async write way.
- */
-static struct synchro_entry *
-synchro_entry_new(struct xrow_header *applier_row,
-		  struct synchro_request *req)
-{
-	struct synchro_entry *entry;
-	size_t size = sizeof(*entry) + sizeof(struct xrow_header *);
-
-	/*
-	 * For simplicity we use malloc here but
-	 * probably should provide some cache similar
-	 * to txn cache.
-	 */
-	entry = (struct synchro_entry *)malloc(size);
-	if (entry == NULL) {
-		diag_set(OutOfMemory, size, "malloc", "synchro_entry");
-		return NULL;
 	}
-
-	struct journal_entry *journal_entry = &entry->journal_entry;
-	struct synchro_body_bin *body_bin = &entry->body_bin;
-	struct xrow_header *row = &entry->row;
-
-	journal_entry->rows[0] = row;
-
-	xrow_encode_synchro(row, body_bin, req);
-
-	row->lsn = applier_row->lsn;
-	row->replica_id = applier_row->replica_id;
-
-	journal_entry_create(journal_entry, 1, xrow_approx_len(row),
-			     apply_synchro_row_cb, entry);
-	return entry;
+	/* The fiber is the same on final join. */
+	if (synchro_entry->owner != fiber())
+		fiber_wakeup(synchro_entry->owner);
 }
 
 /** Process a synchro request. */
@@ -847,14 +807,31 @@ apply_synchro_row(struct xrow_header *row)
 	if (xrow_decode_synchro(row, &req) != 0)
 		goto err;
 
-	txn_limbo_process(&txn_limbo, &req);
-
-	struct synchro_entry *entry;
-	entry = synchro_entry_new(row, &req);
-	if (entry == NULL)
-		goto err;
-
-	if (journal_write_try_async(&entry->journal_entry) != 0) {
+	struct synchro_entry entry;
+	entry.base.rows[0] = row;
+	journal_entry_create(&entry.base, 1, xrow_approx_len(row),
+			     apply_synchro_row_cb, &entry);
+	entry.req = &req;
+	entry.owner = fiber();
+	/*
+	 * The WAL write is blocking. Otherwise it might happen that a CONFIRM
+	 * or ROLLBACK is sent to WAL, and it would empty the limbo, but before
+	 * it is written, more transactions arrive with a different owner. They
+	 * won't be able to enter the limbo due to owner ID mismatch. Hence the
+	 * synchro rows must block receipt of new transactions.
+	 *
+	 * Don't forget to return -1 both if the journal write failed right
+	 * away, and if it failed inside of WAL thread (res < 0). Otherwise the
+	 * caller would propagate committed vclock to this row thinking it was
+	 * a success.
+	 *
+	 * XXX: in theory it could be done vice-versa. The write could be made
+	 * non-blocking, and instead the potentially conflicting transactions
+	 * could try to wait for all the current synchro WAL writes to end
+	 * before trying to commit. But that requires extra steps from the
+	 * transactions side, including the async ones.
+	 */
+	if (journal_write(&entry.base) != 0 || entry.base.res < 0) {
 		diag_set(ClientError, ER_WAL_IO);
 		goto err;
 	}
diff --git a/src/box/txn_limbo.c b/src/box/txn_limbo.c
index cf0ad9350..c63211191 100644
--- a/src/box/txn_limbo.c
+++ b/src/box/txn_limbo.c
@@ -400,20 +400,11 @@ txn_limbo_read_confirm(struct txn_limbo *limbo, int64_t lsn)
 		txn_limbo_remove(limbo, e);
 		txn_clear_flags(e->txn, TXN_WAIT_SYNC | TXN_WAIT_ACK);
 		/*
-		 * If already written to WAL by now, finish tx processing.
-		 * Otherwise just clear the sync flags. Tx procesing will finish
-		 * automatically once the tx is written to WAL.
-		 *
-		 * XXX: Normally at this point all transactions covered by this
-		 * CONFIRM should be in WAL already, but there is a bug, that
-		 * replica always processes received synchro requests *before*
-		 * writing them to WAL. So it can happen, that a CONFIRM is
-		 * 'read', but the transaction is not written yet. Should be
-		 * fixed when the replica will behave properly, and then this
-		 * branch won't exist.
+		 * Should be written to WAL by now. Confirm is always written
+		 * after the affected transactions.
 		 */
-		if (e->txn->signature >= 0)
-			txn_complete_success(e->txn);
+		assert(e->txn->signature >= 0);
+		txn_complete_success(e->txn);
 	}
 	/* Update is_ro once the limbo is clear. */
 	if (txn_limbo_is_empty(limbo))
@@ -455,30 +446,13 @@ txn_limbo_read_rollback(struct txn_limbo *limbo, int64_t lsn)
 	rlist_foreach_entry_safe_reverse(e, &limbo->queue, in_queue, tmp) {
 		txn_limbo_abort(limbo, e);
 		txn_clear_flags(e->txn, TXN_WAIT_SYNC | TXN_WAIT_ACK);
-		if (e->txn->signature >= 0) {
-			/* Rollback the transaction. */
-			e->txn->signature = TXN_SIGNATURE_SYNC_ROLLBACK;
-			txn_complete_fail(e->txn);
-		} else {
-			/*
-			 * Rollback the transaction, but don't free it yet. It
-			 * will be freed after its WAL write is completed.
-			 *
-			 * XXX: Normally at this point all transactions covered
-			 * by this ROLLBACK should be in WAL already, but there
-			 * is a bug, that replica always processes received
-			 * synchro requests *before* writing them to WAL. So it
-			 * can happen, that a ROLLBACK is 'read', but the
-			 * transaction is not written yet. Should be fixed when
-			 * the replica will behave properly, and then this
-			 * branch won't exist.
-			 */
-			e->txn->signature = TXN_SIGNATURE_SYNC_ROLLBACK;
-			struct fiber *fiber = e->txn->fiber;
-			e->txn->fiber = fiber();
-			txn_complete_fail(e->txn);
-			e->txn->fiber = fiber;
-		}
+		/*
+		 * Should be written to WAL by now. Rollback is always written
+		 * after the affected transactions.
+		 */
+		assert(e->txn->signature >= 0);
+		e->txn->signature = TXN_SIGNATURE_SYNC_ROLLBACK;
+		txn_complete_fail(e->txn);
 		if (e == last_rollback)
 			break;
 	}
diff --git a/test/replication/gh-5213-qsync-applier-order.result b/test/replication/gh-5213-qsync-applier-order.result
new file mode 100644
index 000000000..cab8c588f
--- /dev/null
+++ b/test/replication/gh-5213-qsync-applier-order.result
@@ -0,0 +1,329 @@
+-- test-run result file version 2
+--
+-- gh-5213: applier used to process CONFIRM/ROLLBACK before writing them to WAL.
+-- As a result it could happen that the transactions became visible on CONFIRM,
+-- then somehow weren't written to WAL, and on restart the data might not be
+-- visible again. Which means rollback of confirmed data and is not acceptable
+-- (on the contrary with commit after rollback).
+--
+test_run = require('test_run').new()
+ | ---
+ | ...
+fiber = require('fiber')
+ | ---
+ | ...
+old_synchro_quorum = box.cfg.replication_synchro_quorum
+ | ---
+ | ...
+old_synchro_timeout = box.cfg.replication_synchro_timeout
+ | ---
+ | ...
+
+box.schema.user.grant('guest', 'super')
+ | ---
+ | ...
+
+s = box.schema.space.create('test', {is_sync = true})
+ | ---
+ | ...
+_ = s:create_index('pk')
+ | ---
+ | ...
+
+test_run:cmd('create server replica with rpl_master=default,\
+              script="replication/gh-5213-replica.lua"')
+ | ---
+ | - true
+ | ...
+test_run:cmd('start server replica')
+ | ---
+ | - true
+ | ...
+
+test_run:switch('replica')
+ | ---
+ | - true
+ | ...
+assert(box.info.id == 2)
+ | ---
+ | - true
+ | ...
+lsn = box.info.vclock[1]
+ | ---
+ | ...
+
+test_run:switch('default')
+ | ---
+ | - true
+ | ...
+fiber = require('fiber')
+ | ---
+ | ...
+box.cfg{                                                                        \
+    replication_synchro_quorum = 3,                                             \
+    replication_synchro_timeout = 1000,                                         \
+}
+ | ---
+ | ...
+f = fiber.new(function() s:replace{1} end)
+ | ---
+ | ...
+
+test_run:switch('replica')
+ | ---
+ | - true
+ | ...
+-- Wait when the transaction is written to WAL.
+test_run:wait_cond(function() return box.info.vclock[1] == lsn + 1 end)
+ | ---
+ | - true
+ | ...
+s = box.space.test
+ | ---
+ | ...
+-- But not visible yet. Because MVCC is on, and no CONFIRM yet.
+assert(s:get({1}) == nil)
+ | ---
+ | - true
+ | ...
+-- Block the incoming CONFIRM to be able to ensure the data is not visible until
+-- WAL write ends.
+box.error.injection.set('ERRINJ_WAL_DELAY_COUNTDOWN', 0)
+ | ---
+ | - ok
+ | ...
+
+test_run:switch('default')
+ | ---
+ | - true
+ | ...
+box.cfg{replication_synchro_quorum = 2}
+ | ---
+ | ...
+test_run:wait_cond(function() return f:status() == 'dead' end)
+ | ---
+ | - true
+ | ...
+
+test_run:switch('replica')
+ | ---
+ | - true
+ | ...
+-- Wait when CONFIRM is in the WAL thread.
+test_run:wait_cond(function() return box.error.injection.get('ERRINJ_WAL_DELAY') end)
+ | ---
+ | - true
+ | ...
+assert(s:get({1}) == nil)
+ | ---
+ | - true
+ | ...
+box.error.injection.set("ERRINJ_WAL_DELAY", false)
+ | ---
+ | - ok
+ | ...
+-- After CONFIRM is in WAL, the transaction is committed and its data is
+-- visible.
+test_run:wait_cond(function() return s:get({1}) ~= nil end)
+ | ---
+ | - true
+ | ...
+
+--
+-- Ensure CONFIRM WAL write fail also works fine when couldn't even start a WAL
+-- write. The data remains not confirmed until WAL write succeeds.
+--
+lsn = box.info.vclock[1]
+ | ---
+ | ...
+
+test_run:switch('default')
+ | ---
+ | - true
+ | ...
+box.cfg{replication_synchro_quorum = 3}
+ | ---
+ | ...
+f = fiber.new(function() s:replace{2} end)
+ | ---
+ | ...
+
+test_run:switch('replica')
+ | ---
+ | - true
+ | ...
+test_run:wait_cond(function() return box.info.vclock[1] == lsn + 1 end)
+ | ---
+ | - true
+ | ...
+assert(s:get({2}) == nil)
+ | ---
+ | - true
+ | ...
+-- Make journal write fail immediately.
+box.error.injection.set('ERRINJ_WAL_IO', true)
+ | ---
+ | - ok
+ | ...
+
+test_run:switch('default')
+ | ---
+ | - true
+ | ...
+box.cfg{replication_synchro_quorum = 2}
+ | ---
+ | ...
+test_run:wait_cond(function() return f:status() == 'dead' end)
+ | ---
+ | - true
+ | ...
+
+test_run:switch('replica')
+ | ---
+ | - true
+ | ...
+test_run:wait_upstream(1, {status = 'stopped'})
+ | ---
+ | - true
+ | ...
+box.error.injection.set('ERRINJ_WAL_IO', false)
+ | ---
+ | - ok
+ | ...
+assert(s:get({2}) == nil)
+ | ---
+ | - true
+ | ...
+-- Re-subscribe.
+replication = box.cfg.replication
+ | ---
+ | ...
+box.cfg{replication = {}}
+ | ---
+ | ...
+box.cfg{replication = replication}
+ | ---
+ | ...
+test_run:wait_cond(function() return box.info.vclock[1] == lsn + 2 end)
+ | ---
+ | - true
+ | ...
+test_run:wait_upstream(1, {status = 'follow'})
+ | ---
+ | - true
+ | ...
+assert(s:get({2}) ~= nil)
+ | ---
+ | - true
+ | ...
+
+--
+-- Ensure CONFIRM WAL write fail works fine when an error happens inside WAL
+-- thread. The data remains not confirmed until WAL write succeeds.
+--
+lsn = box.info.vclock[1]
+ | ---
+ | ...
+
+test_run:switch('default')
+ | ---
+ | - true
+ | ...
+box.cfg{replication_synchro_quorum = 3}
+ | ---
+ | ...
+f = fiber.new(function() s:replace{3} end)
+ | ---
+ | ...
+
+test_run:switch('replica')
+ | ---
+ | - true
+ | ...
+test_run:wait_cond(function() return box.info.vclock[1] == lsn + 1 end)
+ | ---
+ | - true
+ | ...
+assert(s:get({3}) == nil)
+ | ---
+ | - true
+ | ...
+-- Journal write start is going to succeed, but it will fail later on return
+-- from the WAL thread.
+box.error.injection.set('ERRINJ_WAL_ROTATE', true)
+ | ---
+ | - ok
+ | ...
+
+test_run:switch('default')
+ | ---
+ | - true
+ | ...
+box.cfg{replication_synchro_quorum = 2}
+ | ---
+ | ...
+test_run:wait_cond(function() return f:status() == 'dead' end)
+ | ---
+ | - true
+ | ...
+
+test_run:switch('replica')
+ | ---
+ | - true
+ | ...
+test_run:wait_upstream(1, {status = 'stopped'})
+ | ---
+ | - true
+ | ...
+box.error.injection.set('ERRINJ_WAL_ROTATE', false)
+ | ---
+ | - ok
+ | ...
+assert(s:get({3}) == nil)
+ | ---
+ | - true
+ | ...
+box.cfg{replication = {}}
+ | ---
+ | ...
+box.cfg{replication = replication}
+ | ---
+ | ...
+test_run:wait_cond(function() return box.info.vclock[1] == lsn + 2 end)
+ | ---
+ | - true
+ | ...
+test_run:wait_upstream(1, {status = 'follow'})
+ | ---
+ | - true
+ | ...
+assert(s:get({3}) ~= nil)
+ | ---
+ | - true
+ | ...
+
+test_run:switch('default')
+ | ---
+ | - true
+ | ...
+test_run:cmd('stop server replica')
+ | ---
+ | - true
+ | ...
+test_run:cmd('delete server replica')
+ | ---
+ | - true
+ | ...
+
+s:drop()
+ | ---
+ | ...
+box.schema.user.revoke('guest', 'super')
+ | ---
+ | ...
+box.cfg{                                                                        \
+    replication_synchro_quorum = old_synchro_quorum,                            \
+    replication_synchro_timeout = old_synchro_timeout,                          \
+}
+ | ---
+ | ...
diff --git a/test/replication/gh-5213-qsync-applier-order.test.lua b/test/replication/gh-5213-qsync-applier-order.test.lua
new file mode 100644
index 000000000..3a32626f2
--- /dev/null
+++ b/test/replication/gh-5213-qsync-applier-order.test.lua
@@ -0,0 +1,129 @@
+--
+-- gh-5213: applier used to process CONFIRM/ROLLBACK before writing them to WAL.
+-- As a result it could happen that the transactions became visible on CONFIRM,
+-- then somehow weren't written to WAL, and on restart the data might not be
+-- visible again. Which means rollback of confirmed data and is not acceptable
+-- (on the contrary with commit after rollback).
+--
+test_run = require('test_run').new()
+fiber = require('fiber')
+old_synchro_quorum = box.cfg.replication_synchro_quorum
+old_synchro_timeout = box.cfg.replication_synchro_timeout
+
+box.schema.user.grant('guest', 'super')
+
+s = box.schema.space.create('test', {is_sync = true})
+_ = s:create_index('pk')
+
+test_run:cmd('create server replica with rpl_master=default,\
+              script="replication/gh-5213-replica.lua"')
+test_run:cmd('start server replica')
+
+test_run:switch('replica')
+assert(box.info.id == 2)
+lsn = box.info.vclock[1]
+
+test_run:switch('default')
+fiber = require('fiber')
+box.cfg{                                                                        \
+    replication_synchro_quorum = 3,                                             \
+    replication_synchro_timeout = 1000,                                         \
+}
+f = fiber.new(function() s:replace{1} end)
+
+test_run:switch('replica')
+-- Wait when the transaction is written to WAL.
+test_run:wait_cond(function() return box.info.vclock[1] == lsn + 1 end)
+s = box.space.test
+-- But not visible yet. Because MVCC is on, and no CONFIRM yet.
+assert(s:get({1}) == nil)
+-- Block the incoming CONFIRM to be able to ensure the data is not visible until
+-- WAL write ends.
+box.error.injection.set('ERRINJ_WAL_DELAY_COUNTDOWN', 0)
+
+test_run:switch('default')
+box.cfg{replication_synchro_quorum = 2}
+test_run:wait_cond(function() return f:status() == 'dead' end)
+
+test_run:switch('replica')
+-- Wait when CONFIRM is in the WAL thread.
+test_run:wait_cond(function() return box.error.injection.get('ERRINJ_WAL_DELAY') end)
+assert(s:get({1}) == nil)
+box.error.injection.set("ERRINJ_WAL_DELAY", false)
+-- After CONFIRM is in WAL, the transaction is committed and its data is
+-- visible.
+test_run:wait_cond(function() return s:get({1}) ~= nil end)
+
+--
+-- Ensure CONFIRM WAL write fail also works fine when couldn't even start a WAL
+-- write. The data remains not confirmed until WAL write succeeds.
+--
+lsn = box.info.vclock[1]
+
+test_run:switch('default')
+box.cfg{replication_synchro_quorum = 3}
+f = fiber.new(function() s:replace{2} end)
+
+test_run:switch('replica')
+test_run:wait_cond(function() return box.info.vclock[1] == lsn + 1 end)
+assert(s:get({2}) == nil)
+-- Make journal write fail immediately.
+box.error.injection.set('ERRINJ_WAL_IO', true)
+
+test_run:switch('default')
+box.cfg{replication_synchro_quorum = 2}
+test_run:wait_cond(function() return f:status() == 'dead' end)
+
+test_run:switch('replica')
+test_run:wait_upstream(1, {status = 'stopped'})
+box.error.injection.set('ERRINJ_WAL_IO', false)
+assert(s:get({2}) == nil)
+-- Re-subscribe.
+replication = box.cfg.replication
+box.cfg{replication = {}}
+box.cfg{replication = replication}
+test_run:wait_cond(function() return box.info.vclock[1] == lsn + 2 end)
+test_run:wait_upstream(1, {status = 'follow'})
+assert(s:get({2}) ~= nil)
+
+--
+-- Ensure CONFIRM WAL write fail works fine when an error happens inside WAL
+-- thread. The data remains not confirmed until WAL write succeeds.
+--
+lsn = box.info.vclock[1]
+
+test_run:switch('default')
+box.cfg{replication_synchro_quorum = 3}
+f = fiber.new(function() s:replace{3} end)
+
+test_run:switch('replica')
+test_run:wait_cond(function() return box.info.vclock[1] == lsn + 1 end)
+assert(s:get({3}) == nil)
+-- Journal write start is going to succeed, but it will fail later on return
+-- from the WAL thread.
+box.error.injection.set('ERRINJ_WAL_ROTATE', true)
+
+test_run:switch('default')
+box.cfg{replication_synchro_quorum = 2}
+test_run:wait_cond(function() return f:status() == 'dead' end)
+
+test_run:switch('replica')
+test_run:wait_upstream(1, {status = 'stopped'})
+box.error.injection.set('ERRINJ_WAL_ROTATE', false)
+assert(s:get({3}) == nil)
+box.cfg{replication = {}}
+box.cfg{replication = replication}
+test_run:wait_cond(function() return box.info.vclock[1] == lsn + 2 end)
+test_run:wait_upstream(1, {status = 'follow'})
+assert(s:get({3}) ~= nil)
+
+test_run:switch('default')
+test_run:cmd('stop server replica')
+test_run:cmd('delete server replica')
+
+s:drop()
+box.schema.user.revoke('guest', 'super')
+box.cfg{                                                                        \
+    replication_synchro_quorum = old_synchro_quorum,                            \
+    replication_synchro_timeout = old_synchro_timeout,                          \
+}
diff --git a/test/replication/gh-5213-replica.lua b/test/replication/gh-5213-replica.lua
new file mode 100644
index 000000000..3ba90d7ab
--- /dev/null
+++ b/test/replication/gh-5213-replica.lua
@@ -0,0 +1,10 @@
+#!/usr/bin/env tarantool
+
+box.cfg({
+    listen = os.getenv("LISTEN"),
+    replication = os.getenv("MASTER"),
+    read_only = true,
+    memtx_use_mvcc_engine = true,
+})
+
+require('console').listen(os.getenv('ADMIN'))
diff --git a/test/replication/suite.cfg b/test/replication/suite.cfg
index aff5fda26..ab25fa331 100644
--- a/test/replication/suite.cfg
+++ b/test/replication/suite.cfg
@@ -15,6 +15,7 @@
     "gh-3760-misc-return-on-quorum-0.test.lua": {},
     "gh-4399-misc-no-failure-on-error-reading-wal.test.lua": {},
     "gh-4424-misc-orphan-on-reconfiguration-error.test.lua": {},
+    "gh-5213-qsync-applier-order.test.lua": {},
     "gh-5426-election-on-off.test.lua": {},
     "gh-5433-election-restart-recovery.test.lua": {},
     "gh-5506-election-on-off.test.lua": {},
diff --git a/test/replication/suite.ini b/test/replication/suite.ini
index a9e44e8cf..a7d5f6129 100644
--- a/test/replication/suite.ini
+++ b/test/replication/suite.ini
@@ -3,7 +3,7 @@ core = tarantool
 script =  master.lua
 description = tarantool/box, replication
 disabled = consistent.test.lua
-release_disabled = catch.test.lua errinj.test.lua gc.test.lua gc_no_space.test.lua before_replace.test.lua qsync_advanced.test.lua qsync_errinj.test.lua quorum.test.lua recover_missing_xlog.test.lua sync.test.lua long_row_timeout.test.lua gh-4739-vclock-assert.test.lua gh-4730-applier-rollback.test.lua gh-5140-qsync-casc-rollback.test.lua gh-5144-qsync-dup-confirm.test.lua gh-5167-qsync-rollback-snap.test.lua gh-5506-election-on-off.test.lua gh-5536-wal-limit.test.lua hang_on_synchro_fail.test.lua anon_register_gap.test.lua
+release_disabled = catch.test.lua errinj.test.lua gc.test.lua gc_no_space.test.lua before_replace.test.lua qsync_advanced.test.lua qsync_errinj.test.lua quorum.test.lua recover_missing_xlog.test.lua sync.test.lua long_row_timeout.test.lua gh-4739-vclock-assert.test.lua gh-4730-applier-rollback.test.lua gh-5140-qsync-casc-rollback.test.lua gh-5144-qsync-dup-confirm.test.lua gh-5167-qsync-rollback-snap.test.lua gh-5506-election-on-off.test.lua gh-5536-wal-limit.test.lua hang_on_synchro_fail.test.lua anon_register_gap.test.lua gh-5213-qsync-applier-order.test.lua
 config = suite.cfg
 lua_libs = lua/fast_replica.lua lua/rlimit.lua
 use_unix_sockets = True
-- 
2.24.3 (Apple Git-128)



More information about the Tarantool-patches mailing list