From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Date: Thu, 30 Aug 2018 19:11:36 +0300 From: Vladimir Davydov Subject: Re: [tarantool-patches] [PATCH v2 2/3] box: add replication_sync_timeout Message-ID: <20180830161136.xhvok7vgkei6hxip@esperanza> References: <20180830141114.4531-1-krishtal.olja@gmail.com> <20180830141114.4531-2-krishtal.olja@gmail.com> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <20180830141114.4531-2-krishtal.olja@gmail.com> To: Olga Arkhangelskaia Cc: tarantool-patches@freelists.org List-ID: On Thu, Aug 30, 2018 at 05:11:13PM +0300, Olga Arkhangelskaia wrote: > diff --git a/src/box/lua/load_cfg.lua b/src/box/lua/load_cfg.lua > index f803d8987..213904458 100644 > --- a/src/box/lua/load_cfg.lua > +++ b/src/box/lua/load_cfg.lua > @@ -200,6 +202,7 @@ local dynamic_cfg = { > replication_connect_timeout = private.cfg_set_replication_connect_timeout, > replication_connect_quorum = private.cfg_set_replication_connect_quorum, > replication_sync_lag = private.cfg_set_replication_sync_lag, > + replication_sync_timeout = private.cfg_set_replication_sync_timeout, ^^^ Nit: three extra spaces. > instance_uuid = function() > if box.cfg.instance_uuid ~= box.info.uuid then > box.error(box.error.CFG, 'instance_uuid', > diff --git a/src/box/replication.cc b/src/box/replication.cc > index 861ce34ea..be58b0225 100644 > --- a/src/box/replication.cc > +++ b/src/box/replication.cc > @@ -49,7 +49,7 @@ double replication_timeout = 1.0; /* seconds */ > double replication_connect_timeout = 30.0; /* seconds */ > int replication_connect_quorum = REPLICATION_CONNECT_QUORUM_ALL; > double replication_sync_lag = 10.0; /* seconds */ > - > +double replication_sync_timeout = 300.0; /* seconds */ > struct replicaset replicaset; Nit: an empty line was here for a reason - it separated configuration options from replicaset definition. Please leave it. > > static int > @@ -673,12 +673,20 @@ replicaset_sync(void) > > /* > * Wait until all connected replicas synchronize up to > - * replication_sync_lag > + * replication_sync_lag or return on replication_sync_timeout > */ > + double start_time = ev_monotonic_now(loop()); > + double deadline = start_time + replication_sync_timeout; > while (replicaset.applier.synced < quorum && > replicaset.applier.connected + > - replicaset.applier.loading >= quorum) > - fiber_cond_wait(&replicaset.applier.cond); > + replicaset.applier.loading >= quorum) { > + if (fiber_cond_wait_deadline(&replicaset.applier.cond, > + deadline) != 0) { > + say_crit("replication_sync_timeout fired, entering orphan mode"); This message is not needed, because "entering orphan mode" is printed right below in this case. > + break; > + } > + > + } > > if (replicaset.applier.synced < quorum) { > /*