[Tarantool-patches] [PATCH v2 20/19] replication: add test for quorum 1

Vladislav Shpilevoy v.shpilevoy at tarantool.org
Wed Jul 1 02:00:40 MSK 2020


When synchro quorum is 1, the final commit and confirmation write
are done by the fiber created the transaction, right after WAL
write. This case got special handling in the previous patches,
and this commits adds a test for that.

Closes #5123
---
 test/replication/qsync_basic.result    |  33 +++++++
 test/replication/qsync_basic.test.lua  |  12 +++
 test/replication/qsync_errinj.result   | 114 +++++++++++++++++++++++++
 test/replication/qsync_errinj.test.lua |  45 ++++++++++
 test/replication/suite.ini             |   2 +-
 5 files changed, 205 insertions(+), 1 deletion(-)
 create mode 100644 test/replication/qsync_errinj.result
 create mode 100644 test/replication/qsync_errinj.test.lua

diff --git a/test/replication/qsync_basic.result b/test/replication/qsync_basic.result
index f713d4b08..cdecf00e8 100644
--- a/test/replication/qsync_basic.result
+++ b/test/replication/qsync_basic.result
@@ -299,6 +299,39 @@ box.space.sync:select{6}
  | - []
  | ...
 
+--
+-- gh-5123: quorum 1 still should write CONFIRM.
+--
+test_run:switch('default')
+ | ---
+ | - true
+ | ...
+box.cfg{replication_synchro_quorum = 1, replication_synchro_timeout = 5}
+ | ---
+ | ...
+oldlsn = box.info.lsn
+ | ---
+ | ...
+box.space.sync:replace{7}
+ | ---
+ | - [7]
+ | ...
+newlsn = box.info.lsn
+ | ---
+ | ...
+assert(newlsn >= oldlsn + 2)
+ | ---
+ | - true
+ | ...
+test_run:switch('replica')
+ | ---
+ | - true
+ | ...
+box.space.sync:select{7}
+ | ---
+ | - - [7]
+ | ...
+
 -- Cleanup.
 test_run:cmd('switch default')
  | ---
diff --git a/test/replication/qsync_basic.test.lua b/test/replication/qsync_basic.test.lua
index f84b6ee19..361f22bc3 100644
--- a/test/replication/qsync_basic.test.lua
+++ b/test/replication/qsync_basic.test.lua
@@ -118,6 +118,18 @@ test_run:switch('replica')
 box.space.test:select{6}
 box.space.sync:select{6}
 
+--
+-- gh-5123: quorum 1 still should write CONFIRM.
+--
+test_run:switch('default')
+box.cfg{replication_synchro_quorum = 1, replication_synchro_timeout = 5}
+oldlsn = box.info.lsn
+box.space.sync:replace{7}
+newlsn = box.info.lsn
+assert(newlsn >= oldlsn + 2)
+test_run:switch('replica')
+box.space.sync:select{7}
+
 -- Cleanup.
 test_run:cmd('switch default')
 
diff --git a/test/replication/qsync_errinj.result b/test/replication/qsync_errinj.result
new file mode 100644
index 000000000..1d2945761
--- /dev/null
+++ b/test/replication/qsync_errinj.result
@@ -0,0 +1,114 @@
+-- test-run result file version 2
+test_run = require('test_run').new()
+ | ---
+ | ...
+engine = test_run:get_cfg('engine')
+ | ---
+ | ...
+
+old_synchro_quorum = box.cfg.replication_synchro_quorum
+ | ---
+ | ...
+old_synchro_timeout = box.cfg.replication_synchro_timeout
+ | ---
+ | ...
+box.schema.user.grant('guest', 'super')
+ | ---
+ | ...
+
+test_run:cmd('create server replica with rpl_master=default,\
+             script="replication/replica.lua"')
+ | ---
+ | - true
+ | ...
+test_run:cmd('start server replica with wait=True, wait_load=True')
+ | ---
+ | - true
+ | ...
+
+_ = box.schema.space.create('sync', {is_sync = true, engine = engine})
+ | ---
+ | ...
+_ = box.space.sync:create_index('pk')
+ | ---
+ | ...
+
+--
+-- gh-5123: replica WAL fail shouldn't crash with quorum 1.
+--
+test_run:switch('default')
+ | ---
+ | - true
+ | ...
+box.cfg{replication_synchro_quorum = 1, replication_synchro_timeout = 5}
+ | ---
+ | ...
+box.space.sync:insert{1}
+ | ---
+ | - [1]
+ | ...
+
+test_run:switch('replica')
+ | ---
+ | - true
+ | ...
+box.error.injection.set('ERRINJ_WAL_IO', true)
+ | ---
+ | - ok
+ | ...
+
+test_run:switch('default')
+ | ---
+ | - true
+ | ...
+box.space.sync:insert{2}
+ | ---
+ | - [2]
+ | ...
+
+test_run:switch('replica')
+ | ---
+ | - true
+ | ...
+test_run:wait_upstream(1, {status='stopped'})
+ | ---
+ | - true
+ | ...
+box.error.injection.set('ERRINJ_WAL_IO', false)
+ | ---
+ | - ok
+ | ...
+
+test_run:cmd('restart server replica')
+ | 
+box.space.sync:select{2}
+ | ---
+ | - - [2]
+ | ...
+
+test_run:cmd('switch default')
+ | ---
+ | - true
+ | ...
+
+box.cfg{                                                                        \
+    replication_synchro_quorum = old_synchro_quorum,                            \
+    replication_synchro_timeout = old_synchro_timeout,                          \
+}
+ | ---
+ | ...
+test_run:cmd('stop server replica')
+ | ---
+ | - true
+ | ...
+test_run:cmd('delete server replica')
+ | ---
+ | - true
+ | ...
+
+box.space.sync:drop()
+ | ---
+ | ...
+box.schema.user.revoke('guest', 'super')
+ | ---
+ | ...
diff --git a/test/replication/qsync_errinj.test.lua b/test/replication/qsync_errinj.test.lua
new file mode 100644
index 000000000..96495ae6c
--- /dev/null
+++ b/test/replication/qsync_errinj.test.lua
@@ -0,0 +1,45 @@
+test_run = require('test_run').new()
+engine = test_run:get_cfg('engine')
+
+old_synchro_quorum = box.cfg.replication_synchro_quorum
+old_synchro_timeout = box.cfg.replication_synchro_timeout
+box.schema.user.grant('guest', 'super')
+
+test_run:cmd('create server replica with rpl_master=default,\
+             script="replication/replica.lua"')
+test_run:cmd('start server replica with wait=True, wait_load=True')
+
+_ = box.schema.space.create('sync', {is_sync = true, engine = engine})
+_ = box.space.sync:create_index('pk')
+
+--
+-- gh-5123: replica WAL fail shouldn't crash with quorum 1.
+--
+test_run:switch('default')
+box.cfg{replication_synchro_quorum = 1, replication_synchro_timeout = 5}
+box.space.sync:insert{1}
+
+test_run:switch('replica')
+box.error.injection.set('ERRINJ_WAL_IO', true)
+
+test_run:switch('default')
+box.space.sync:insert{2}
+
+test_run:switch('replica')
+test_run:wait_upstream(1, {status='stopped'})
+box.error.injection.set('ERRINJ_WAL_IO', false)
+
+test_run:cmd('restart server replica')
+box.space.sync:select{2}
+
+test_run:cmd('switch default')
+
+box.cfg{                                                                        \
+    replication_synchro_quorum = old_synchro_quorum,                            \
+    replication_synchro_timeout = old_synchro_timeout,                          \
+}
+test_run:cmd('stop server replica')
+test_run:cmd('delete server replica')
+
+box.space.sync:drop()
+box.schema.user.revoke('guest', 'super')
diff --git a/test/replication/suite.ini b/test/replication/suite.ini
index 6119a264b..11f8d4e20 100644
--- a/test/replication/suite.ini
+++ b/test/replication/suite.ini
@@ -3,7 +3,7 @@ core = tarantool
 script =  master.lua
 description = tarantool/box, replication
 disabled = consistent.test.lua
-release_disabled = catch.test.lua errinj.test.lua gc.test.lua gc_no_space.test.lua before_replace.test.lua quorum.test.lua recover_missing_xlog.test.lua sync.test.lua long_row_timeout.test.lua gh-4739-vclock-assert.test.lua gh-4730-applier-rollback.test.lua
+release_disabled = catch.test.lua errinj.test.lua gc.test.lua gc_no_space.test.lua before_replace.test.lua qsync_errinj.test.lua quorum.test.lua recover_missing_xlog.test.lua sync.test.lua long_row_timeout.test.lua gh-4739-vclock-assert.test.lua gh-4730-applier-rollback.test.lua
 config = suite.cfg
 lua_libs = lua/fast_replica.lua lua/rlimit.lua
 use_unix_sockets = True
-- 
2.21.1 (Apple Git-122.3)



More information about the Tarantool-patches mailing list