From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: From: Serge Petrenko Subject: [PATCH v2] replication: fix exit with ER_NO_SUCH_USER during bootstrap Date: Thu, 23 Aug 2018 17:56:16 +0300 Message-Id: <20180823145616.97810-1-sergepetrenko@tarantool.org> To: vdavydov.dev@gmail.com Cc: tarantool-patches@freelists.org, Serge Petrenko List-ID: When replication is configured via some user created in box.once() function and box.once() takes more than replication_timeout seconds to execute, appliers recieve ER_NO_SUCH_USER error, which they don't handle. This leads to occasional test failures in replication suite. Fix this by handling the aforementioned case in applier_f() and add a test case. Closes #3637 --- https://github.com/tarantool/tarantool/issues/3637 https://github.com/tarantool/tarantool/tree/sp/gh-3637-replication-tests-fix Changes in v2: - add a test and ensure new relevant lines are covered. - merge ER_NOSUCH_USER case with ER_ACCESS_DENIED due to similarity. src/box/applier.cc | 8 ++++++-- test/replication/autobootstrap.result | 19 +++++++++++++++++++ test/replication/autobootstrap.test.lua | 10 ++++++++++ test/replication/replica_bootstrap.lua | 30 ++++++++++++++++++++++++++++++ test/replication/replica_bootstrap1.lua | 1 + test/replication/replica_bootstrap2.lua | 1 + 6 files changed, 67 insertions(+), 2 deletions(-) create mode 100644 test/replication/replica_bootstrap.lua create mode 120000 test/replication/replica_bootstrap1.lua create mode 120000 test/replication/replica_bootstrap2.lua diff --git a/src/box/applier.cc b/src/box/applier.cc index dbb4d05f9..02ec729bc 100644 --- a/src/box/applier.cc +++ b/src/box/applier.cc @@ -602,8 +602,12 @@ applier_f(va_list ap) applier_log_error(applier, e); applier_disconnect(applier, APPLIER_LOADING); goto reconnect; - } else if (e->errcode() == ER_ACCESS_DENIED) { - /* Invalid configuration */ + } else if (e->errcode() == ER_ACCESS_DENIED || + e->errcode() == ER_NO_SUCH_USER) { + /* + * Invalid configuration or box.once() + * hasn't finished on bootstrap master. + */ applier_log_error(applier, e); applier_disconnect(applier, APPLIER_DISCONNECTED); goto reconnect; diff --git a/test/replication/autobootstrap.result b/test/replication/autobootstrap.result index 91badc1f1..9837aafb8 100644 --- a/test/replication/autobootstrap.result +++ b/test/replication/autobootstrap.result @@ -231,3 +231,22 @@ _ = test_run:cmd("switch default") test_run:drop_cluster(SERVERS) --- ... +-- +-- Test case for gh-3637. Before the fix replica would exit with +-- an error. Now check that we don't hang and successfully connect. +-- +SERVERS = {'replica_bootstrap1', 'replica_bootstrap2'} +--- +... +test_run:create_cluster(SERVERS, "replication", {args="0.1"}) +--- +... +test_run:wait_fullmesh(SERVERS) +--- +... +print("ok") +--- +... +test_run:drop_cluster(SERVERS) +--- +... diff --git a/test/replication/autobootstrap.test.lua b/test/replication/autobootstrap.test.lua index 752d5f317..bbf6f6512 100644 --- a/test/replication/autobootstrap.test.lua +++ b/test/replication/autobootstrap.test.lua @@ -108,3 +108,13 @@ _ = test_run:cmd("switch default") -- Stop servers -- test_run:drop_cluster(SERVERS) + +-- +-- Test case for gh-3637. Before the fix replica would exit with +-- an error. Now check that we don't hang and successfully connect. +-- +SERVERS = {'replica_bootstrap1', 'replica_bootstrap2'} +test_run:create_cluster(SERVERS, "replication", {args="0.1"}) +test_run:wait_fullmesh(SERVERS) +print("ok") +test_run:drop_cluster(SERVERS) diff --git a/test/replication/replica_bootstrap.lua b/test/replication/replica_bootstrap.lua new file mode 100644 index 000000000..9a6c5f4a0 --- /dev/null +++ b/test/replication/replica_bootstrap.lua @@ -0,0 +1,30 @@ +#!/usr/bin/env tarantool + +local INSTANCE_ID = string.match(arg[0], "%d") +local USER = 'cluster' +local PASSWORD = 'pass' +local SOCKET_DIR = require('fio').cwd() +local TIMEOUT = tonumber(arg[1]) +local CON_TIMEOUT = arg[2] and tonumber(arg[2]) or 100 * 365 * 86400 + +local function instance_uri(instance_id) + return SOCKET_DIR..'/replica_bootstrap'..instance_id..'.sock' +end + +require('console').listen(os.getenv('ADMIN')) + +box.cfg({ + listen=instance_uri(INSTANCE_ID), + replication={ + USER..':'..PASSWORD..'@'..instance_uri(1), + USER..':'..PASSWORD..'@'..instance_uri(2) + }, + replication_timeout=TIMEOUT, + replication_connect_timeout=CON_TIMEOUT +}) + +box.once('bootstrap', function() + require('fiber').sleep(3 * TIMEOUT) + box.schema.user.create(USER, {password=PASSWORD}) + box.schema.user.grant(USER, 'replication') +end) diff --git a/test/replication/replica_bootstrap1.lua b/test/replication/replica_bootstrap1.lua new file mode 120000 index 000000000..2f3645a72 --- /dev/null +++ b/test/replication/replica_bootstrap1.lua @@ -0,0 +1 @@ +replica_bootstrap.lua \ No newline at end of file diff --git a/test/replication/replica_bootstrap2.lua b/test/replication/replica_bootstrap2.lua new file mode 120000 index 000000000..2f3645a72 --- /dev/null +++ b/test/replication/replica_bootstrap2.lua @@ -0,0 +1 @@ +replica_bootstrap.lua \ No newline at end of file -- 2.15.2 (Apple Git-101.1)