Tarantool development patches archive
 help / color / mirror / Atom feed
From: Cyrill Gorcunov <gorcunov@gmail.com>
To: tml <tarantool-patches@dev.tarantool.org>
Subject: [Tarantool-patches] [PATCH v10 4/4] test: add replication/applier-rollback
Date: Fri, 14 Feb 2020 17:03:39 +0300	[thread overview]
Message-ID: <20200214140339.4085-5-gorcunov@gmail.com> (raw)
In-Reply-To: <20200214140339.4085-1-gorcunov@gmail.com>

In the test force error injection ERRINJ_REPLICA_TXN_WRITE
to happen which will initiate applier transaction rollback.
Without the fix it will cause SIGSEGV due to lack of error
propagation.

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
---
 src/box/applier.cc                          |  12 ++
 src/lib/core/errinj.h                       |   1 +
 test/box/errinj.result                      |   1 +
 test/replication/applier-rollback-slave.lua |  16 ++
 test/replication/applier-rollback.result    | 160 ++++++++++++++++++++
 test/replication/applier-rollback.test.lua  |  79 ++++++++++
 test/replication/suite.ini                  |   2 +-
 7 files changed, 270 insertions(+), 1 deletion(-)
 create mode 100644 test/replication/applier-rollback-slave.lua
 create mode 100644 test/replication/applier-rollback.result
 create mode 100644 test/replication/applier-rollback.test.lua

diff --git a/src/box/applier.cc b/src/box/applier.cc
index a4a73d383..574d086e0 100644
--- a/src/box/applier.cc
+++ b/src/box/applier.cc
@@ -51,6 +51,7 @@
 #include "txn.h"
 #include "box.h"
 #include "scoped_guard.h"
+#include "errinj.h"
 
 STRS(applier_state, applier_STATE);
 
@@ -832,6 +833,17 @@ applier_apply_tx(struct stailq *rows)
 	trigger_create(on_commit, applier_txn_commit_cb, NULL, NULL);
 	txn_on_commit(txn, on_commit);
 
+	/*
+	 * FIXME: Move this injection somewhere inside
+	 * txn_write, but since it has own bug (gh 4776)
+	 * we will cure it a bit later.
+	 */
+	ERROR_INJECT(ERRINJ_REPLICA_TXN_WRITE, {
+		diag_set(ClientError, ER_INJECTION,
+			 "replica txn write injection");
+		goto rollback;
+	});
+
 	if (txn_write(txn) < 0)
 		goto fail;
 
diff --git a/src/lib/core/errinj.h b/src/lib/core/errinj.h
index 672da2119..da6adfe97 100644
--- a/src/lib/core/errinj.h
+++ b/src/lib/core/errinj.h
@@ -135,6 +135,7 @@ struct errinj {
 	_(ERRINJ_COIO_SENDFILE_CHUNK, ERRINJ_INT, {.iparam = -1}) \
 	_(ERRINJ_SWIM_FD_ONLY, ERRINJ_BOOL, {.bparam = false}) \
 	_(ERRINJ_DYN_MODULE_COUNT, ERRINJ_INT, {.iparam = 0}) \
+	_(ERRINJ_REPLICA_TXN_WRITE, ERRINJ_BOOL, {.bparam = false}) \
 
 ENUM0(errinj_id, ERRINJ_LIST);
 extern struct errinj errinjs[];
diff --git a/test/box/errinj.result b/test/box/errinj.result
index f043c6689..60db28938 100644
--- a/test/box/errinj.result
+++ b/test/box/errinj.result
@@ -63,6 +63,7 @@ evals
   - ERRINJ_RELAY_SEND_DELAY: false
   - ERRINJ_RELAY_TIMEOUT: 0
   - ERRINJ_REPLICA_JOIN_DELAY: false
+  - ERRINJ_REPLICA_TXN_WRITE: false
   - ERRINJ_SIO_READ_MAX: -1
   - ERRINJ_SNAP_COMMIT_DELAY: false
   - ERRINJ_SNAP_WRITE_DELAY: false
diff --git a/test/replication/applier-rollback-slave.lua b/test/replication/applier-rollback-slave.lua
new file mode 100644
index 000000000..26fb10055
--- /dev/null
+++ b/test/replication/applier-rollback-slave.lua
@@ -0,0 +1,16 @@
+--
+-- vim: ts=4 sw=4 et
+--
+
+print('arg', arg)
+
+box.cfg({
+    replication                 = os.getenv("MASTER"),
+    listen                      = os.getenv("LISTEN"),
+    memtx_memory                = 107374182,
+    replication_timeout         = 0.1,
+    replication_connect_timeout = 0.5,
+    read_only                   = true,
+})
+
+require('console').listen(os.getenv('ADMIN'))
diff --git a/test/replication/applier-rollback.result b/test/replication/applier-rollback.result
new file mode 100644
index 000000000..3209fc7fd
--- /dev/null
+++ b/test/replication/applier-rollback.result
@@ -0,0 +1,160 @@
+-- test-run result file version 2
+#!/usr/bin/env tarantool
+ | ---
+ | ...
+--
+-- vim: ts=4 sw=4 et
+--
+
+test_run = require('test_run').new()
+ | ---
+ | ...
+
+errinj = box.error.injection
+ | ---
+ | ...
+engine = test_run:get_cfg('engine')
+ | ---
+ | ...
+
+--
+-- Allow replica to connect to us
+box.schema.user.grant('guest', 'replication')
+ | ---
+ | ...
+
+--
+-- Create replica instance, we're the master and
+-- start it, no data to sync yet though
+test_run:cmd("create server replica_slave with rpl_master=default, script='replication/applier-rollback-slave.lua'")
+ | ---
+ | - true
+ | ...
+test_run:cmd("start server replica_slave")
+ | ---
+ | - true
+ | ...
+
+--
+-- Fill initial data on the master instance
+test_run:cmd('switch default')
+ | ---
+ | - true
+ | ...
+
+_ = box.schema.space.create('test', {engine=engine})
+ | ---
+ | ...
+s = box.space.test
+ | ---
+ | ...
+
+s:format({{name = 'id', type = 'unsigned'}, {name = 'band_name', type = 'string'}})
+ | ---
+ | ...
+
+_ = s:create_index('primary', {type = 'tree', parts = {'id'}})
+ | ---
+ | ...
+s:insert({1, '1'})
+ | ---
+ | - [1, '1']
+ | ...
+s:insert({2, '2'})
+ | ---
+ | - [2, '2']
+ | ...
+s:insert({3, '3'})
+ | ---
+ | - [3, '3']
+ | ...
+
+--
+-- To make sure we're running
+box.info.status
+ | ---
+ | - running
+ | ...
+
+--
+-- Wait for data from master get propagated
+test_run:wait_lsn('replica_slave', 'default')
+ | ---
+ | ...
+
+--
+-- Now inject error into slave instance
+test_run:cmd('switch replica_slave')
+ | ---
+ | - true
+ | ...
+
+--
+-- To make sure we're running
+box.info.status
+ | ---
+ | - running
+ | ...
+
+errinj = box.error.injection
+ | ---
+ | ...
+errinj.set('ERRINJ_REPLICA_TXN_WRITE', true)
+ | ---
+ | - ok
+ | ...
+
+--
+-- Jump back to master node and write new
+-- entry which should cause error to happen
+-- on slave instance
+test_run:cmd('switch default')
+ | ---
+ | - true
+ | ...
+s:insert({4, '4'})
+ | ---
+ | - [4, '4']
+ | ...
+
+--
+-- Wait for error to trigger
+test_run:cmd('switch replica_slave')
+ | ---
+ | - true
+ | ...
+fiber = require('fiber')
+ | ---
+ | ...
+while test_run:grep_log('replica_slave', 'ER_INJECTION:[^\n]*') == nil do fiber.sleep(0.1) end
+ | ---
+ | ...
+
+----
+---- Such error cause the applier to be
+---- cancelled and reaped, thus stop the
+---- slave node and cleanup
+test_run:cmd('switch default')
+ | ---
+ | - true
+ | ...
+
+--
+-- Cleanup
+test_run:cmd("stop server replica_slave")
+ | ---
+ | - true
+ | ...
+test_run:cmd("delete server replica_slave")
+ | ---
+ | - true
+ | ...
+box.cfg{replication=""}
+ | ---
+ | ...
+box.space.test:drop()
+ | ---
+ | ...
+box.schema.user.revoke('guest', 'replication')
+ | ---
+ | ...
diff --git a/test/replication/applier-rollback.test.lua b/test/replication/applier-rollback.test.lua
new file mode 100644
index 000000000..d31eff9f0
--- /dev/null
+++ b/test/replication/applier-rollback.test.lua
@@ -0,0 +1,79 @@
+#!/usr/bin/env tarantool
+--
+-- vim: ts=4 sw=4 et
+--
+
+test_run = require('test_run').new()
+
+errinj = box.error.injection
+engine = test_run:get_cfg('engine')
+
+--
+-- Allow replica to connect to us
+box.schema.user.grant('guest', 'replication')
+
+--
+-- Create replica instance, we're the master and
+-- start it, no data to sync yet though
+test_run:cmd("create server replica_slave with rpl_master=default, script='replication/applier-rollback-slave.lua'")
+test_run:cmd("start server replica_slave")
+
+--
+-- Fill initial data on the master instance
+test_run:cmd('switch default')
+
+_ = box.schema.space.create('test', {engine=engine})
+s = box.space.test
+
+s:format({{name = 'id', type = 'unsigned'}, {name = 'band_name', type = 'string'}})
+
+_ = s:create_index('primary', {type = 'tree', parts = {'id'}})
+s:insert({1, '1'})
+s:insert({2, '2'})
+s:insert({3, '3'})
+
+--
+-- To make sure we're running
+box.info.status
+
+--
+-- Wait for data from master get propagated
+test_run:wait_lsn('replica_slave', 'default')
+
+--
+-- Now inject error into slave instance
+test_run:cmd('switch replica_slave')
+
+--
+-- To make sure we're running
+box.info.status
+
+errinj = box.error.injection
+errinj.set('ERRINJ_REPLICA_TXN_WRITE', true)
+
+--
+-- Jump back to master node and write new
+-- entry which should cause error to happen
+-- on slave instance
+test_run:cmd('switch default')
+s:insert({4, '4'})
+
+--
+-- Wait for error to trigger
+test_run:cmd('switch replica_slave')
+fiber = require('fiber')
+while test_run:grep_log('replica_slave', 'ER_INJECTION:[^\n]*') == nil do fiber.sleep(0.1) end
+
+----
+---- Such error cause the applier to be
+---- cancelled and reaped, thus stop the
+---- slave node and cleanup
+test_run:cmd('switch default')
+
+--
+-- Cleanup
+test_run:cmd("stop server replica_slave")
+test_run:cmd("delete server replica_slave")
+box.cfg{replication=""}
+box.space.test:drop()
+box.schema.user.revoke('guest', 'replication')
diff --git a/test/replication/suite.ini b/test/replication/suite.ini
index ed1de3140..b804b85f6 100644
--- a/test/replication/suite.ini
+++ b/test/replication/suite.ini
@@ -3,7 +3,7 @@ core = tarantool
 script =  master.lua
 description = tarantool/box, replication
 disabled = consistent.test.lua
-release_disabled = catch.test.lua errinj.test.lua gc.test.lua gc_no_space.test.lua before_replace.test.lua quorum.test.lua recover_missing_xlog.test.lua sync.test.lua long_row_timeout.test.lua
+release_disabled = catch.test.lua errinj.test.lua gc.test.lua gc_no_space.test.lua before_replace.test.lua quorum.test.lua recover_missing_xlog.test.lua sync.test.lua long_row_timeout.test.lua applier-rollback.test.lua
 config = suite.cfg
 lua_libs = lua/fast_replica.lua lua/rlimit.lua
 use_unix_sockets = True
-- 
2.20.1

  parent reply	other threads:[~2020-02-14 14:04 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-02-14 14:03 [Tarantool-patches] [PATCH v10 0/4] box/replication: add missing diag set and fix sigsegv Cyrill Gorcunov
2020-02-14 14:03 ` [Tarantool-patches] [PATCH v10 1/4] box/request: add missing OutOfMemory diag_set Cyrill Gorcunov
2020-02-15 17:33   ` Konstantin Osipov
2020-02-14 14:03 ` [Tarantool-patches] [PATCH v10 2/4] box/applier: add missing diag_set on region_alloc failure Cyrill Gorcunov
2020-02-15 17:34   ` Konstantin Osipov
2020-02-14 14:03 ` [Tarantool-patches] [PATCH v10 3/4] box/applier: prevent nil dereference on applier rollback Cyrill Gorcunov
2020-02-15 17:37   ` Konstantin Osipov
2020-02-15 18:24     ` Cyrill Gorcunov
2020-02-15 18:51       ` Konstantin Osipov
2020-02-14 14:03 ` Cyrill Gorcunov [this message]
2020-02-15 17:38   ` [Tarantool-patches] [PATCH v10 4/4] test: add replication/applier-rollback Konstantin Osipov
2020-02-15 17:59     ` Cyrill Gorcunov
2020-02-14 14:12 ` [Tarantool-patches] [PATCH v10 0/4] box/replication: add missing diag set and fix sigsegv Cyrill Gorcunov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200214140339.4085-5-gorcunov@gmail.com \
    --to=gorcunov@gmail.com \
    --cc=tarantool-patches@dev.tarantool.org \
    --subject='Re: [Tarantool-patches] [PATCH v10 4/4] test: add replication/applier-rollback' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox