[Tarantool-patches] [PATCH v6 5/5] test: add replication/applier-rollback

Cyrill Gorcunov gorcunov at gmail.com
Tue Jan 28 11:26:44 MSK 2020


In the test force error injection ERRINJ_REPLICA_TXN_WRITE
to happen which will initiate applier transaction rollback.
Without the fix it will cause SIGSEGV due to lack of error
propagation.

Signed-off-by: Cyrill Gorcunov <gorcunov at gmail.com>
---

I've updated test a bit since it didn't pass on travis due
to redundant mutuable information printed. Lets see if it
pass tests on gitlab

 test/replication/applier-rollback-slave.lua |  16 ++
 test/replication/applier-rollback.result    | 166 ++++++++++++++++++++
 test/replication/applier-rollback.test.lua  |  81 ++++++++++
 3 files changed, 263 insertions(+)
 create mode 100644 test/replication/applier-rollback-slave.lua
 create mode 100644 test/replication/applier-rollback.result
 create mode 100644 test/replication/applier-rollback.test.lua

diff --git a/test/replication/applier-rollback-slave.lua b/test/replication/applier-rollback-slave.lua
new file mode 100644
index 000000000..26fb10055
--- /dev/null
+++ b/test/replication/applier-rollback-slave.lua
@@ -0,0 +1,16 @@
+--
+-- vim: ts=4 sw=4 et
+--
+
+print('arg', arg)
+
+box.cfg({
+    replication                 = os.getenv("MASTER"),
+    listen                      = os.getenv("LISTEN"),
+    memtx_memory                = 107374182,
+    replication_timeout         = 0.1,
+    replication_connect_timeout = 0.5,
+    read_only                   = true,
+})
+
+require('console').listen(os.getenv('ADMIN'))
diff --git a/test/replication/applier-rollback.result b/test/replication/applier-rollback.result
new file mode 100644
index 000000000..0349d5723
--- /dev/null
+++ b/test/replication/applier-rollback.result
@@ -0,0 +1,166 @@
+-- test-run result file version 2
+#!/usr/bin/env tarantool
+ | ---
+ | ...
+--
+-- vim: ts=4 sw=4 et
+--
+
+test_run = require('test_run').new()
+ | ---
+ | ...
+
+vclock_diff = require('fast_replica').vclock_diff
+ | ---
+ | ...
+errinj = box.error.injection
+ | ---
+ | ...
+engine = test_run:get_cfg('engine')
+ | ---
+ | ...
+
+--
+-- Allow replica to connect to us
+box.schema.user.grant('guest', 'replication')
+ | ---
+ | ...
+
+--
+-- Create replica instance, we're the master and
+-- start it, no data to sync yet though
+test_run:cmd("create server replica_slave with rpl_master=default, script='replication/applier-rollback-slave.lua'")
+ | ---
+ | - true
+ | ...
+test_run:cmd("start server replica_slave")
+ | ---
+ | - true
+ | ...
+
+--
+-- Fill initial data on the master instance
+test_run:cmd('switch default')
+ | ---
+ | - true
+ | ...
+
+_ = box.schema.space.create('test', {engine=engine})
+ | ---
+ | ...
+s = box.space.test
+ | ---
+ | ...
+
+s:format({{name = 'id', type = 'unsigned'}, {name = 'band_name', type = 'string'}})
+ | ---
+ | ...
+
+_ = s:create_index('primary', {type = 'tree', parts = {'id'}})
+ | ---
+ | ...
+s:insert({1, '1'})
+ | ---
+ | - [1, '1']
+ | ...
+s:insert({2, '2'})
+ | ---
+ | - [2, '2']
+ | ...
+s:insert({3, '3'})
+ | ---
+ | - [3, '3']
+ | ...
+
+--
+-- To make sure we're running
+box.info.status
+ | ---
+ | - running
+ | ...
+
+--
+-- Wait for data from master get propagated
+test_run:wait_lsn('replica_slave', 'default')
+ | ---
+ | ...
+
+--
+-- Now inject error into slave instance
+test_run:cmd('switch replica_slave')
+ | ---
+ | - true
+ | ...
+
+box.info.status
+ | ---
+ | - running
+ | ...
+
+fiber = require('fiber')
+ | ---
+ | ...
+errinj = box.error.injection
+ | ---
+ | ...
+errinj.set('ERRINJ_REPLICA_TXN_WRITE', true)
+ | ---
+ | - ok
+ | ...
+
+--
+-- Jump back to master node and write new
+-- entry which should cause error to happen
+-- on slave instance
+test_run:cmd('switch default')
+ | ---
+ | - true
+ | ...
+s:insert({4, '4'})
+ | ---
+ | - [4, '4']
+ | ...
+
+--
+-- Wait for error to trigger
+test_run:cmd('switch replica_slave')
+ | ---
+ | - true
+ | ...
+while test_run:grep_log('replica_slave', 'ER_INJECTION:[^\n]*') == nil do fiber.sleep(1) end
+ | ---
+ | ...
+
+--
+-- Such error cause the applier to be
+-- cancelled and reaped, thus stop the
+-- slave node and restart it back
+test_run:cmd('switch default')
+ | ---
+ | - true
+ | ...
+test_run:cmd("stop server replica_slave")
+ | ---
+ | - true
+ | ...
+test_run:cmd("start server replica_slave")
+ | ---
+ | - true
+ | ...
+
+--
+-- Wait until the new data fetched and sync'ed
+test_run:wait_lsn('replica_slave', 'default')
+ | ---
+ | ...
+
+--
+-- Cleanup
+test_run:cmd("stop server replica_slave")
+ | ---
+ | - true
+ | ...
+test_run:cmd("cleanup server replica_slave")
+ | ---
+ | - true
+ | ...
diff --git a/test/replication/applier-rollback.test.lua b/test/replication/applier-rollback.test.lua
new file mode 100644
index 000000000..d74f8df0e
--- /dev/null
+++ b/test/replication/applier-rollback.test.lua
@@ -0,0 +1,81 @@
+#!/usr/bin/env tarantool
+--
+-- vim: ts=4 sw=4 et
+--
+
+test_run = require('test_run').new()
+
+vclock_diff = require('fast_replica').vclock_diff
+errinj = box.error.injection
+engine = test_run:get_cfg('engine')
+
+--
+-- Allow replica to connect to us
+box.schema.user.grant('guest', 'replication')
+
+--
+-- Create replica instance, we're the master and
+-- start it, no data to sync yet though
+test_run:cmd("create server replica_slave with rpl_master=default, script='replication/applier-rollback-slave.lua'")
+test_run:cmd("start server replica_slave")
+
+--
+-- Fill initial data on the master instance
+test_run:cmd('switch default')
+
+_ = box.schema.space.create('test', {engine=engine})
+s = box.space.test
+
+s:format({{name = 'id', type = 'unsigned'}, {name = 'band_name', type = 'string'}})
+
+_ = s:create_index('primary', {type = 'tree', parts = {'id'}})
+s:insert({1, '1'})
+s:insert({2, '2'})
+s:insert({3, '3'})
+
+--
+-- To make sure we're running
+box.info.status
+
+--
+-- Wait for data from master get propagated
+test_run:wait_lsn('replica_slave', 'default')
+
+--
+-- Now inject error into slave instance
+test_run:cmd('switch replica_slave')
+
+box.info.status
+
+fiber = require('fiber')
+errinj = box.error.injection
+errinj.set('ERRINJ_REPLICA_TXN_WRITE', true)
+
+--
+-- Jump back to master node and write new
+-- entry which should cause error to happen
+-- on slave instance
+test_run:cmd('switch default')
+s:insert({4, '4'})
+
+--
+-- Wait for error to trigger
+test_run:cmd('switch replica_slave')
+while test_run:grep_log('replica_slave', 'ER_INJECTION:[^\n]*') == nil do fiber.sleep(1) end
+
+--
+-- Such error cause the applier to be
+-- cancelled and reaped, thus stop the
+-- slave node and restart it back
+test_run:cmd('switch default')
+test_run:cmd("stop server replica_slave")
+test_run:cmd("start server replica_slave")
+
+--
+-- Wait until the new data fetched and sync'ed
+test_run:wait_lsn('replica_slave', 'default')
+
+--
+-- Cleanup
+test_run:cmd("stop server replica_slave")
+test_run:cmd("cleanup server replica_slave")
-- 
2.20.1



More information about the Tarantool-patches mailing list