From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Date: Fri, 24 Aug 2018 19:32:09 +0300 From: Vladimir Davydov Subject: Re: [PATCH v3] replication: fix exit with ER_NO_SUCH_USER during bootstrap Message-ID: <20180824163209.2agiibwwtowoizzw@esperanza> References: <20180824115645.43531-1-sergepetrenko@tarantool.org> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <20180824115645.43531-1-sergepetrenko@tarantool.org> To: Serge Petrenko Cc: tarantool-patches@freelists.org List-ID: Pushed to 1.9, here's the final version: >From 33950162f3e766d413567cd75aaa7e6c384831bd Mon Sep 17 00:00:00 2001 From: Serge Petrenko Date: Thu, 23 Aug 2018 14:08:51 +0300 Subject: [PATCH] replication: fix exit with ER_NO_SUCH_USER during bootstrap When replication is configured via some user created in box.once() function and box.once() takes more than replication_timeout seconds to execute, appliers recieve ER_NO_SUCH_USER error, which they don't handle. This leads to occasional test failures in replication suite. Fix this by handling the aforementioned case in applier_f() and add a test case. Closes #3637 diff --git a/src/box/applier.cc b/src/box/applier.cc index b9f041d8..16a87389 100644 --- a/src/box/applier.cc +++ b/src/box/applier.cc @@ -596,7 +596,8 @@ applier_f(va_list ap) applier_log_error(applier, e); applier_disconnect(applier, APPLIER_LOADING); goto reconnect; - } else if (e->errcode() == ER_ACCESS_DENIED) { + } else if (e->errcode() == ER_ACCESS_DENIED || + e->errcode() == ER_NO_SUCH_USER) { /* Invalid configuration */ applier_log_error(applier, e); applier_disconnect(applier, APPLIER_DISCONNECTED); diff --git a/test/replication/misc.result b/test/replication/misc.result index 9df2a2c4..76e7fd5e 100644 --- a/test/replication/misc.result +++ b/test/replication/misc.result @@ -232,3 +232,55 @@ test_run:drop_cluster(SERVERS) box.schema.user.revoke('guest', 'replication') --- ... +-- +-- Test case for gh-3637. Before the fix replica would exit with +-- an error. Now check that we don't hang and successfully connect. +-- +fiber = require('fiber') +--- +... +test_run:cleanup_cluster() +--- +... +test_run:cmd("create server replica_auth with rpl_master=default, script='replication/replica_auth.lua'") +--- +- true +... +test_run:cmd("start server replica_auth with wait=False, wait_load=False, args='cluster:pass 0.05'") +--- +- true +... +-- Wait a bit to make sure replica waits till user is created. +fiber.sleep(0.1) +--- +... +box.schema.user.create('cluster', {password='pass'}) +--- +... +box.schema.user.grant('cluster', 'replication') +--- +... +while box.info.replication[2] == nil do fiber.sleep(0.01) end +--- +... +vclock = test_run:get_vclock('default') +--- +... +_ = test_run:wait_vclock('replica_auth', vclock) +--- +... +test_run:cmd("stop server replica_auth") +--- +- true +... +test_run:cmd("cleanup server replica_auth") +--- +- true +... +test_run:cmd("delete server replica_auth") +--- +- true +... +box.schema.user.drop('cluster') +--- +... diff --git a/test/replication/misc.test.lua b/test/replication/misc.test.lua index 979c5d58..c60adf5a 100644 --- a/test/replication/misc.test.lua +++ b/test/replication/misc.test.lua @@ -91,3 +91,28 @@ test_run:cmd("switch default") test_run:drop_cluster(SERVERS) box.schema.user.revoke('guest', 'replication') + +-- +-- Test case for gh-3637. Before the fix replica would exit with +-- an error. Now check that we don't hang and successfully connect. +-- +fiber = require('fiber') + +test_run:cleanup_cluster() + +test_run:cmd("create server replica_auth with rpl_master=default, script='replication/replica_auth.lua'") +test_run:cmd("start server replica_auth with wait=False, wait_load=False, args='cluster:pass 0.05'") +-- Wait a bit to make sure replica waits till user is created. +fiber.sleep(0.1) +box.schema.user.create('cluster', {password='pass'}) +box.schema.user.grant('cluster', 'replication') + +while box.info.replication[2] == nil do fiber.sleep(0.01) end +vclock = test_run:get_vclock('default') +_ = test_run:wait_vclock('replica_auth', vclock) + +test_run:cmd("stop server replica_auth") +test_run:cmd("cleanup server replica_auth") +test_run:cmd("delete server replica_auth") + +box.schema.user.drop('cluster') diff --git a/test/replication/replica_auth.lua b/test/replication/replica_auth.lua new file mode 100644 index 00000000..22ba9146 --- /dev/null +++ b/test/replication/replica_auth.lua @@ -0,0 +1,14 @@ +#!/usr/bin/env tarantool + +local USER_PASS = arg[1] +local TIMEOUT = arg[2] and tonumber(arg[2]) or 0.1 +local CON_TIMEOUT = arg[3] and tonumber(arg[3]) or 30.0 + +require('console').listen(os.getenv('ADMIN')) + +box.cfg({ + listen = os.getenv("LISTEN"), + replication = USER_PASS .. "@" .. os.getenv("MASTER"), + replication_timeout = TIMEOUT, + replication_connect_timeout = CON_TIMEOUT +})