From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: From: Serge Petrenko Subject: [PATCH v3] replication: fix exit with ER_NO_SUCH_USER during bootstrap Date: Fri, 24 Aug 2018 14:56:45 +0300 Message-Id: <20180824115645.43531-1-sergepetrenko@tarantool.org> To: vdavydov.dev@gmail.com Cc: tarantool-patches@freelists.org, Serge Petrenko List-ID: When replication is configured via some user created in box.once() function and box.once() takes more than replication_timeout seconds to execute, appliers recieve ER_NO_SUCH_USER error, which they don't handle. This leads to occasional test failures in replication suite. Fix this by handling the aforementioned case in applier_f() and add a test case. Closes #3637 --- https://github.com/tarantool/tarantool/issues/3637 https://github.com/tarantool/tarantool/tree/sp/gh-3637-replication-tests-fix Changes in v3: - rewrite test case to be more versatile. - go back to old comments in applier_f(). Changes in v2: - add a test and ensure new relevant lines are covered. - merge ER_NOSUCH_USER case with ER_ACCESS_DENIED due to similarity. src/box/applier.cc | 3 +- test/replication/autobootstrap.result | 58 +++++++++++++++++++++++++++++++++ test/replication/autobootstrap.test.lua | 28 ++++++++++++++++ test/replication/replica_auth.lua | 14 ++++++++ 4 files changed, 102 insertions(+), 1 deletion(-) create mode 100644 test/replication/replica_auth.lua diff --git a/src/box/applier.cc b/src/box/applier.cc index dbb4d05f9..28df8f7ca 100644 --- a/src/box/applier.cc +++ b/src/box/applier.cc @@ -602,7 +602,8 @@ applier_f(va_list ap) applier_log_error(applier, e); applier_disconnect(applier, APPLIER_LOADING); goto reconnect; - } else if (e->errcode() == ER_ACCESS_DENIED) { + } else if (e->errcode() == ER_ACCESS_DENIED || + e->errcode() == ER_NO_SUCH_USER) { /* Invalid configuration */ applier_log_error(applier, e); applier_disconnect(applier, APPLIER_DISCONNECTED); diff --git a/test/replication/autobootstrap.result b/test/replication/autobootstrap.result index 91badc1f1..ed904672d 100644 --- a/test/replication/autobootstrap.result +++ b/test/replication/autobootstrap.result @@ -231,3 +231,61 @@ _ = test_run:cmd("switch default") test_run:drop_cluster(SERVERS) --- ... +-- +-- Test case for gh-3637. Before the fix replica would exit with +-- an error. Now check that we don't hang and successfully connect. +-- +fiber = require("fiber") +--- +... +test_run:cmd("setopt delimiter ';'") +--- +- true +... +function wait_replica() + while box.info.replication[2] == nil do + fiber.sleep(0.01) + end +end; +--- +... +test_run:cmd("setopt delimiter ''"); +--- +- true +... +test_run:cmd("create server replica_auth with rpl_master=default, script='replication/replica_auth.lua'") +--- +- true +... +test_run:cmd("start server replica_auth with wait=False, wait_load=False, args='cluster:pass 0.1'") +--- +- true +... +-- Wait a bit to make sure replica waits till user is created. +fiber.sleep(0.1) +--- +... +box.schema.user.create('cluster', {password='pass'}) +--- +... +box.schema.user.grant('cluster', 'replication') +--- +... +wait_replica() +--- +... +test_run:cmd("stop server replica_auth") +--- +- true +... +test_run:cmd("cleanup server replica_auth") +--- +- true +... +test_run:cmd("delete server replica_auth") +--- +- true +... +box.schema.user.drop('cluster') +--- +... diff --git a/test/replication/autobootstrap.test.lua b/test/replication/autobootstrap.test.lua index 752d5f317..21417a738 100644 --- a/test/replication/autobootstrap.test.lua +++ b/test/replication/autobootstrap.test.lua @@ -108,3 +108,31 @@ _ = test_run:cmd("switch default") -- Stop servers -- test_run:drop_cluster(SERVERS) + +-- +-- Test case for gh-3637. Before the fix replica would exit with +-- an error. Now check that we don't hang and successfully connect. +-- +fiber = require("fiber") + +test_run:cmd("setopt delimiter ';'") +function wait_replica() + while box.info.replication[2] == nil do + fiber.sleep(0.01) + end +end; +test_run:cmd("setopt delimiter ''"); + +test_run:cmd("create server replica_auth with rpl_master=default, script='replication/replica_auth.lua'") +test_run:cmd("start server replica_auth with wait=False, wait_load=False, args='cluster:pass 0.1'") +-- Wait a bit to make sure replica waits till user is created. +fiber.sleep(0.1) +box.schema.user.create('cluster', {password='pass'}) +box.schema.user.grant('cluster', 'replication') +wait_replica() + +test_run:cmd("stop server replica_auth") +test_run:cmd("cleanup server replica_auth") +test_run:cmd("delete server replica_auth") + +box.schema.user.drop('cluster') diff --git a/test/replication/replica_auth.lua b/test/replication/replica_auth.lua new file mode 100644 index 000000000..22ba9146c --- /dev/null +++ b/test/replication/replica_auth.lua @@ -0,0 +1,14 @@ +#!/usr/bin/env tarantool + +local USER_PASS = arg[1] +local TIMEOUT = arg[2] and tonumber(arg[2]) or 0.1 +local CON_TIMEOUT = arg[3] and tonumber(arg[3]) or 30.0 + +require('console').listen(os.getenv('ADMIN')) + +box.cfg({ + listen = os.getenv("LISTEN"), + replication = USER_PASS .. "@" .. os.getenv("MASTER"), + replication_timeout = TIMEOUT, + replication_connect_timeout = CON_TIMEOUT +}) -- 2.15.2 (Apple Git-101.1)