From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Content-Type: text/plain; charset=utf-8 Mime-Version: 1.0 (Mac OS X Mail 11.5 \(3445.9.1\)) Subject: Re: [tarantool-patches] [PATCH] replication: fix a failing assert in replica_on_applier_disconnect() From: Sergey Petrenko In-Reply-To: <20180807165031.e2avj7waurok3zpk@esperanza> Date: Wed, 8 Aug 2018 13:10:30 +0300 Content-Transfer-Encoding: quoted-printable Message-Id: <51F21380-A795-4F40-B3DE-A9770B190BD6@tarantool.org> References: <20180803055905.69108-1-sergepetrenko@tarantool.org> <20180803155745.tmndjr52n6igtdno@tarantool.org> <81999702-C603-423E-92C9-199CE605FED4@tarantool.org> <20180807165031.e2avj7waurok3zpk@esperanza> To: Vladimir Davydov Cc: Kirill Yukhin , tarantool-patches@freelists.org List-ID: > 7 =D0=B0=D0=B2=D0=B3. 2018 =D0=B3., =D0=B2 19:50, Vladimir Davydov = =D0=BD=D0=B0=D0=BF=D0=B8=D1=81=D0=B0=D0=BB(=D0=B0= ): >=20 > On Mon, Aug 06, 2018 at 05:14:05PM +0300, Sergey Petrenko wrote: >> diff --git a/test/replication/misc.test.lua = b/test/replication/misc.test.lua >> index c05e52165..27c1a4821 100644 >> --- a/test/replication/misc.test.lua >> +++ b/test/replication/misc.test.lua >> @@ -81,3 +81,15 @@ test_run:cmd("switch default") >> test_run:drop_cluster(SERVERS) >>=20 >> box.schema.user.revoke('guest', 'replication') >> + >> +-- gh-3510 assertion failure in replica_on_applier_disconnect() >> +test_run:cmd('create server er_load1 with = script=3D"replication/er_load1.lua"') >> +test_run:cmd('create server er_load2 with = script=3D"replication/er_load2.lua"') >> +test_run:cmd('start server er_load1 with wait=3DFalse, = wait_load=3DFalse') >> +test_run:cmd('start server er_load2 with wait=3DFalse, = wait_load=3DFalse') >> +require('fiber').sleep(0.5) >> +test_run:cmd('stop server er_load1') >> +require('fiber').sleep(1) >=20 > > These sleep()'s are way too long. They will make our tests run = forever. > Please change to 0.00001 or smaller or rewrite the test so that you = do > not need them. > Hi! Thank you for review. Fixed. Here=E2=80=99s the new diff: src/box/replication.cc | 4 ++++ test/replication/er_load.lua | 25 +++++++++++++++++++++++++ test/replication/er_load1.lua | 1 + test/replication/er_load2.lua | 1 + test/replication/misc.result | 32 ++++++++++++++++++++++++++++++++ test/replication/misc.test.lua | 12 ++++++++++++ 6 files changed, 75 insertions(+) create mode 100644 test/replication/er_load.lua create mode 120000 test/replication/er_load1.lua create mode 120000 test/replication/er_load2.lua diff --git a/src/box/replication.cc b/src/box/replication.cc index 26bbbe32a..0efbd7c0e 100644 --- a/src/box/replication.cc +++ b/src/box/replication.cc @@ -350,6 +350,10 @@ replica_on_applier_disconnect(struct replica = *replica) assert(replicaset.applier.connected > 0); replicaset.applier.connected--; break; + case APPLIER_LOADING: + assert(replicaset.applier.loading > 0); + replicaset.applier.loading--; + break; case APPLIER_DISCONNECTED: break; default: diff --git a/test/replication/er_load.lua b/test/replication/er_load.lua new file mode 100644 index 000000000..0515b3cce --- /dev/null +++ b/test/replication/er_load.lua @@ -0,0 +1,25 @@ +#!/usr/bin/env tarantool + +-- get instance id from filename (er_load1.lua =3D> 1) +local INSTANCE_ID =3D string.match(arg[0], '%d') + +local SOCKET_DIR =3D require('fio').cwd() +local function instance_uri(instance_id) + return SOCKET_DIR..'/er_load'..instance_id..'.sock' +end + +require('console').listen(os.getenv('ADMIN')) + +box.cfg{ + listen =3D instance_uri(INSTANCE_ID); + replication =3D { + instance_uri(INSTANCE_ID), + instance_uri(INSTANCE_ID % 2 + 1) + }, + replication_timeout =3D 0.01, + read_only =3D INSTANCE_ID =3D=3D '2' +} +box.once('bootstrap', function() + box.schema.user.grant('guest', 'replication') + box.space._cluster:delete(2) +end) diff --git a/test/replication/er_load1.lua = b/test/replication/er_load1.lua new file mode 120000 index 000000000..18f7ffa5a --- /dev/null +++ b/test/replication/er_load1.lua @@ -0,0 +1 @@ +er_load.lua \ No newline at end of file diff --git a/test/replication/er_load2.lua = b/test/replication/er_load2.lua new file mode 120000 index 000000000..18f7ffa5a --- /dev/null +++ b/test/replication/er_load2.lua @@ -0,0 +1 @@ +er_load.lua \ No newline at end of file diff --git a/test/replication/misc.result b/test/replication/misc.result index ff0dbf549..75316933f 100644 --- a/test/replication/misc.result +++ b/test/replication/misc.result @@ -208,3 +208,35 @@ test_run:drop_cluster(SERVERS) box.schema.user.revoke('guest', 'replication') --- ... +-- gh-3510 assertion failure in replica_on_applier_disconnect() +test_run:cmd('create server er_load1 with = script=3D"replication/er_load1.lua"') +--- +- true +... +test_run:cmd('create server er_load2 with = script=3D"replication/er_load2.lua"') +--- +- true +... +test_run:cmd('start server er_load1 with wait=3DFalse, = wait_load=3DFalse') +--- +- true +... +-- instance er_load2 will fail with error ER_READONLY. this is ok. +-- We only test here that er_load1 doesn't assert. +test_run:cmd('start server er_load2 with wait=3DTrue, wait_load=3DTrue, = crash_expected =3D True') +--- +- false +... +test_run:cmd('stop server er_load1') +--- +- true +... +-- er_load2 exits automatically. +test_run:cmd('cleanup server er_load1') +--- +- true +... +test_run:cmd('cleanup server er_load2') +--- +- true +... diff --git a/test/replication/misc.test.lua = b/test/replication/misc.test.lua index c05e52165..acdfa737e 100644 --- a/test/replication/misc.test.lua +++ b/test/replication/misc.test.lua @@ -81,3 +81,15 @@ test_run:cmd("switch default") test_run:drop_cluster(SERVERS) =20 box.schema.user.revoke('guest', 'replication') + +-- gh-3510 assertion failure in replica_on_applier_disconnect() +test_run:cmd('create server er_load1 with = script=3D"replication/er_load1.lua"') +test_run:cmd('create server er_load2 with = script=3D"replication/er_load2.lua"') +test_run:cmd('start server er_load1 with wait=3DFalse, = wait_load=3DFalse') +-- instance er_load2 will fail with error ER_READONLY. this is ok. +-- We only test here that er_load1 doesn't assert. +test_run:cmd('start server er_load2 with wait=3DTrue, wait_load=3DTrue, = crash_expected =3D True') +test_run:cmd('stop server er_load1') +-- er_load2 exits automatically. +test_run:cmd('cleanup server er_load1') +test_run:cmd('cleanup server er_load2') --=20 2.15.2 (Apple Git-101.1)