[PATCH v3] replication: fix exit with ER_NO_SUCH_USER during bootstrap

Serge Petrenko sergepetrenko at tarantool.org
Fri Aug 24 14:56:45 MSK 2018


When replication is configured via some user created in box.once()
function and box.once() takes more than replication_timeout seconds
to execute, appliers recieve ER_NO_SUCH_USER error, which they don't
handle. This leads to occasional test failures in replication suite.
Fix this by handling the aforementioned case in applier_f() and add a
test case.

Closes #3637
---
https://github.com/tarantool/tarantool/issues/3637
https://github.com/tarantool/tarantool/tree/sp/gh-3637-replication-tests-fix

Changes in v3: 
  - rewrite test case to be more versatile.
  - go back to old comments in applier_f().

Changes in v2: 
  - add a test and ensure new relevant
    lines are covered.
  - merge ER_NOSUCH_USER case with
    ER_ACCESS_DENIED due to similarity.

 src/box/applier.cc                      |  3 +-
 test/replication/autobootstrap.result   | 58 +++++++++++++++++++++++++++++++++
 test/replication/autobootstrap.test.lua | 28 ++++++++++++++++
 test/replication/replica_auth.lua       | 14 ++++++++
 4 files changed, 102 insertions(+), 1 deletion(-)
 create mode 100644 test/replication/replica_auth.lua

diff --git a/src/box/applier.cc b/src/box/applier.cc
index dbb4d05f9..28df8f7ca 100644
--- a/src/box/applier.cc
+++ b/src/box/applier.cc
@@ -602,7 +602,8 @@ applier_f(va_list ap)
 				applier_log_error(applier, e);
 				applier_disconnect(applier, APPLIER_LOADING);
 				goto reconnect;
-			} else if (e->errcode() == ER_ACCESS_DENIED) {
+			} else if (e->errcode() == ER_ACCESS_DENIED ||
+				   e->errcode() == ER_NO_SUCH_USER) {
 				/* Invalid configuration */
 				applier_log_error(applier, e);
 				applier_disconnect(applier, APPLIER_DISCONNECTED);
diff --git a/test/replication/autobootstrap.result b/test/replication/autobootstrap.result
index 91badc1f1..ed904672d 100644
--- a/test/replication/autobootstrap.result
+++ b/test/replication/autobootstrap.result
@@ -231,3 +231,61 @@ _ = test_run:cmd("switch default")
 test_run:drop_cluster(SERVERS)
 ---
 ...
+--
+-- Test case for gh-3637. Before the fix replica would exit with
+-- an error. Now check that we don't hang and successfully connect.
+--
+fiber = require("fiber")
+---
+...
+test_run:cmd("setopt delimiter ';'")
+---
+- true
+...
+function wait_replica()
+    while box.info.replication[2] == nil do
+        fiber.sleep(0.01)
+    end
+end;
+---
+...
+test_run:cmd("setopt delimiter ''");
+---
+- true
+...
+test_run:cmd("create server replica_auth with rpl_master=default, script='replication/replica_auth.lua'")
+---
+- true
+...
+test_run:cmd("start server replica_auth with wait=False, wait_load=False, args='cluster:pass 0.1'")
+---
+- true
+...
+-- Wait a bit to make sure replica waits till user is created.
+fiber.sleep(0.1)
+---
+...
+box.schema.user.create('cluster', {password='pass'})
+---
+...
+box.schema.user.grant('cluster', 'replication')
+---
+...
+wait_replica()
+---
+...
+test_run:cmd("stop server replica_auth")
+---
+- true
+...
+test_run:cmd("cleanup server replica_auth")
+---
+- true
+...
+test_run:cmd("delete server replica_auth")
+---
+- true
+...
+box.schema.user.drop('cluster')
+---
+...
diff --git a/test/replication/autobootstrap.test.lua b/test/replication/autobootstrap.test.lua
index 752d5f317..21417a738 100644
--- a/test/replication/autobootstrap.test.lua
+++ b/test/replication/autobootstrap.test.lua
@@ -108,3 +108,31 @@ _ = test_run:cmd("switch default")
 -- Stop servers
 --
 test_run:drop_cluster(SERVERS)
+
+--
+-- Test case for gh-3637. Before the fix replica would exit with
+-- an error. Now check that we don't hang and successfully connect.
+--
+fiber = require("fiber")
+
+test_run:cmd("setopt delimiter ';'")
+function wait_replica()
+    while box.info.replication[2] == nil do
+        fiber.sleep(0.01)
+    end
+end;
+test_run:cmd("setopt delimiter ''");
+
+test_run:cmd("create server replica_auth with rpl_master=default, script='replication/replica_auth.lua'")
+test_run:cmd("start server replica_auth with wait=False, wait_load=False, args='cluster:pass 0.1'")
+-- Wait a bit to make sure replica waits till user is created.
+fiber.sleep(0.1)
+box.schema.user.create('cluster', {password='pass'})
+box.schema.user.grant('cluster', 'replication')
+wait_replica()
+
+test_run:cmd("stop server replica_auth")
+test_run:cmd("cleanup server replica_auth")
+test_run:cmd("delete server replica_auth")
+
+box.schema.user.drop('cluster')
diff --git a/test/replication/replica_auth.lua b/test/replication/replica_auth.lua
new file mode 100644
index 000000000..22ba9146c
--- /dev/null
+++ b/test/replication/replica_auth.lua
@@ -0,0 +1,14 @@
+#!/usr/bin/env tarantool
+
+local USER_PASS = arg[1]
+local TIMEOUT = arg[2] and tonumber(arg[2]) or 0.1
+local CON_TIMEOUT = arg[3] and tonumber(arg[3]) or 30.0
+
+require('console').listen(os.getenv('ADMIN'))
+
+box.cfg({
+    listen = os.getenv("LISTEN"),
+    replication = USER_PASS .. "@" .. os.getenv("MASTER"),
+    replication_timeout = TIMEOUT,
+    replication_connect_timeout = CON_TIMEOUT
+})
-- 
2.15.2 (Apple Git-101.1)




More information about the Tarantool-patches mailing list