From: Sergey Ostanevich <sergos@tarantool.org> To: Vladislav Shpilevoy <v.shpilevoy@tarantool.org> Cc: tarantool-patches@dev.tarantool.org Subject: Re: [Tarantool-patches] [PATCH v2 02/19] replication: introduce replication_synchro_* cfg options Date: Wed, 1 Jul 2020 19:05:32 +0300 [thread overview] Message-ID: <20200701160532.GB45053@tarantool.org> (raw) In-Reply-To: <9ece5bf917856b1b86e6309504e95f55b7c2c6ca.1593472477.git.v.shpilevoy@tarantool.org> Hi! Thanks for the patch! LGTM, just one nit in desc. Regards, Sergos On 30 Jun 01:15, Vladislav Shpilevoy wrote: > Synchronous transactions are supposed to be replicated on a > specified number of replicas before committed on master. The > number of replicas can be specified using > replication_synchro_quorum option. It is 1 by default, so sync > transactions work like asynchronous when not configured anyhow. > 1 means successful WAL write on master is enough for commit. > > When replication_synchro_quorum is greater than 1, an instance has to > wait for the specified number of replicas to reply with success. If double space here - - - - - - - -- - - ----- -^ > enough replies aren't collected during replication_synchro_timeout, > the instance rolls back the tx in question. > > Part of #4844 > Part of #5073 > --- > src/box/box.cc | 53 +++++++++++++++++++++++++++++++++ > src/box/box.h | 2 ++ > src/box/lua/cfg.cc | 18 +++++++++++ > src/box/lua/load_cfg.lua | 10 +++++++ > src/box/replication.cc | 2 ++ > src/box/replication.h | 12 ++++++++ > test/app-tap/init_script.result | 2 ++ > test/box/admin.result | 4 +++ > test/box/cfg.result | 8 +++++ > 9 files changed, 111 insertions(+) > > diff --git a/src/box/box.cc b/src/box/box.cc > index 871b0d976..0821ea0a3 100644 > --- a/src/box/box.cc > +++ b/src/box/box.cc > @@ -476,6 +476,31 @@ box_check_replication_sync_lag(void) > return lag; > } > > +static int > +box_check_replication_synchro_quorum(void) > +{ > + int quorum = cfg_geti("replication_synchro_quorum"); > + if (quorum <= 0 || quorum > VCLOCK_MAX) { > + diag_set(ClientError, ER_CFG, "replication_synchro_quorum", > + "the value must be greater than zero and less than " > + "maximal number of replicas"); > + return -1; > + } > + return quorum; > +} > + > +static double > +box_check_replication_synchro_timeout(void) > +{ > + double timeout = cfg_getd("replication_synchro_timeout"); > + if (timeout <= 0) { > + diag_set(ClientError, ER_CFG, "replication_synchro_timeout", > + "the value must be greater than zero"); > + return -1; > + } > + return timeout; > +} > + > static double > box_check_replication_sync_timeout(void) > { > @@ -658,6 +683,10 @@ box_check_config() > box_check_replication_connect_timeout(); > box_check_replication_connect_quorum(); > box_check_replication_sync_lag(); > + if (box_check_replication_synchro_quorum() < 0) > + diag_raise(); > + if (box_check_replication_synchro_timeout() < 0) > + diag_raise(); > box_check_replication_sync_timeout(); > box_check_readahead(cfg_geti("readahead")); > box_check_checkpoint_count(cfg_geti("checkpoint_count")); > @@ -777,6 +806,26 @@ box_set_replication_sync_lag(void) > replication_sync_lag = box_check_replication_sync_lag(); > } > > +int > +box_set_replication_synchro_quorum(void) > +{ > + int value = box_check_replication_synchro_quorum(); > + if (value < 0) > + return -1; > + replication_synchro_quorum = value; > + return 0; > +} > + > +int > +box_set_replication_synchro_timeout(void) > +{ > + double value = box_check_replication_synchro_timeout(); > + if (value < 0) > + return -1; > + replication_synchro_timeout = value; > + return 0; > +} > + > void > box_set_replication_sync_timeout(void) > { > @@ -2417,6 +2466,10 @@ box_cfg_xc(void) > box_set_replication_connect_timeout(); > box_set_replication_connect_quorum(); > box_set_replication_sync_lag(); > + if (box_set_replication_synchro_quorum() != 0) > + diag_raise(); > + if (box_set_replication_synchro_timeout() != 0) > + diag_raise(); > box_set_replication_sync_timeout(); > box_set_replication_skip_conflict(); > box_set_replication_anon(); > diff --git a/src/box/box.h b/src/box/box.h > index 557542a83..f9789154e 100644 > --- a/src/box/box.h > +++ b/src/box/box.h > @@ -243,6 +243,8 @@ void box_set_replication_timeout(void); > void box_set_replication_connect_timeout(void); > void box_set_replication_connect_quorum(void); > void box_set_replication_sync_lag(void); > +int box_set_replication_synchro_quorum(void); > +int box_set_replication_synchro_timeout(void); > void box_set_replication_sync_timeout(void); > void box_set_replication_skip_conflict(void); > void box_set_replication_anon(void); > diff --git a/src/box/lua/cfg.cc b/src/box/lua/cfg.cc > index a5b15e527..d481155cd 100644 > --- a/src/box/lua/cfg.cc > +++ b/src/box/lua/cfg.cc > @@ -313,6 +313,22 @@ lbox_cfg_set_replication_sync_lag(struct lua_State *L) > return 0; > } > > +static int > +lbox_cfg_set_replication_synchro_quorum(struct lua_State *L) > +{ > + if (box_set_replication_synchro_quorum() != 0) > + luaT_error(L); > + return 0; > +} > + > +static int > +lbox_cfg_set_replication_synchro_timeout(struct lua_State *L) > +{ > + if (box_set_replication_synchro_timeout() != 0) > + luaT_error(L); > + return 0; > +} > + > static int > lbox_cfg_set_replication_sync_timeout(struct lua_State *L) > { > @@ -370,6 +386,8 @@ box_lua_cfg_init(struct lua_State *L) > {"cfg_set_replication_connect_quorum", lbox_cfg_set_replication_connect_quorum}, > {"cfg_set_replication_connect_timeout", lbox_cfg_set_replication_connect_timeout}, > {"cfg_set_replication_sync_lag", lbox_cfg_set_replication_sync_lag}, > + {"cfg_set_replication_synchro_quorum", lbox_cfg_set_replication_synchro_quorum}, > + {"cfg_set_replication_synchro_timeout", lbox_cfg_set_replication_synchro_timeout}, > {"cfg_set_replication_sync_timeout", lbox_cfg_set_replication_sync_timeout}, > {"cfg_set_replication_skip_conflict", lbox_cfg_set_replication_skip_conflict}, > {"cfg_set_replication_anon", lbox_cfg_set_replication_anon}, > diff --git a/src/box/lua/load_cfg.lua b/src/box/lua/load_cfg.lua > index f2f2df6f8..a7f03c7d6 100644 > --- a/src/box/lua/load_cfg.lua > +++ b/src/box/lua/load_cfg.lua > @@ -89,6 +89,8 @@ local default_cfg = { > replication_timeout = 1, > replication_sync_lag = 10, > replication_sync_timeout = 300, > + replication_synchro_quorum = 1, > + replication_synchro_timeout = 5, > replication_connect_timeout = 30, > replication_connect_quorum = nil, -- connect all > replication_skip_conflict = false, > @@ -164,6 +166,8 @@ local template_cfg = { > replication_timeout = 'number', > replication_sync_lag = 'number', > replication_sync_timeout = 'number', > + replication_synchro_quorum = 'number', > + replication_synchro_timeout = 'number', > replication_connect_timeout = 'number', > replication_connect_quorum = 'number', > replication_skip_conflict = 'boolean', > @@ -280,6 +284,8 @@ local dynamic_cfg = { > replication_connect_quorum = private.cfg_set_replication_connect_quorum, > replication_sync_lag = private.cfg_set_replication_sync_lag, > replication_sync_timeout = private.cfg_set_replication_sync_timeout, > + replication_synchro_quorum = private.cfg_set_replication_synchro_quorum, > + replication_synchro_timeout = private.cfg_set_replication_synchro_timeout, > replication_skip_conflict = private.cfg_set_replication_skip_conflict, > replication_anon = private.cfg_set_replication_anon, > instance_uuid = check_instance_uuid, > @@ -313,6 +319,8 @@ local dynamic_cfg_order = { > replication_timeout = 150, > replication_sync_lag = 150, > replication_sync_timeout = 150, > + replication_synchro_quorum = 150, > + replication_synchro_timeout = 150, > replication_connect_timeout = 150, > replication_connect_quorum = 150, > replication = 200, > @@ -348,6 +356,8 @@ local dynamic_cfg_skip_at_load = { > replication_connect_quorum = true, > replication_sync_lag = true, > replication_sync_timeout = true, > + replication_synchro_quorum = true, > + replication_synchro_timeout = true, > replication_skip_conflict = true, > replication_anon = true, > wal_dir_rescan_delay = true, > diff --git a/src/box/replication.cc b/src/box/replication.cc > index 273a7cb66..01e9e876a 100644 > --- a/src/box/replication.cc > +++ b/src/box/replication.cc > @@ -51,6 +51,8 @@ double replication_timeout = 1.0; /* seconds */ > double replication_connect_timeout = 30.0; /* seconds */ > int replication_connect_quorum = REPLICATION_CONNECT_QUORUM_ALL; > double replication_sync_lag = 10.0; /* seconds */ > +int replication_synchro_quorum = 1; > +double replication_synchro_timeout = 5.0; /* seconds */ > double replication_sync_timeout = 300.0; /* seconds */ > bool replication_skip_conflict = false; > bool replication_anon = false; > diff --git a/src/box/replication.h b/src/box/replication.h > index 93a25c8a7..a081870f9 100644 > --- a/src/box/replication.h > +++ b/src/box/replication.h > @@ -125,6 +125,18 @@ extern int replication_connect_quorum; > */ > extern double replication_sync_lag; > > +/** > + * Minimal number of replicas which should ACK a synchronous > + * transaction to be able to confirm it and commit. > + */ > +extern int replication_synchro_quorum; > + > +/** > + * Time in seconds which the master node is able to wait for ACKs > + * for a synchronous transaction until it is rolled back. > + */ > +extern double replication_synchro_timeout; > + > /** > * Max time to wait for appliers to synchronize before entering > * the orphan mode. > diff --git a/test/app-tap/init_script.result b/test/app-tap/init_script.result > index 7c4454285..857f0c95f 100644 > --- a/test/app-tap/init_script.result > +++ b/test/app-tap/init_script.result > @@ -30,6 +30,8 @@ replication_connect_timeout:30 > replication_skip_conflict:false > replication_sync_lag:10 > replication_sync_timeout:300 > +replication_synchro_quorum:1 > +replication_synchro_timeout:5 > replication_timeout:1 > slab_alloc_factor:1.05 > sql_cache_size:5242880 > diff --git a/test/box/admin.result b/test/box/admin.result > index d94da8c5d..ab3e80a97 100644 > --- a/test/box/admin.result > +++ b/test/box/admin.result > @@ -81,6 +81,10 @@ cfg_filter(box.cfg) > - 10 > - - replication_sync_timeout > - 300 > + - - replication_synchro_quorum > + - 1 > + - - replication_synchro_timeout > + - 5 > - - replication_timeout > - 1 > - - slab_alloc_factor > diff --git a/test/box/cfg.result b/test/box/cfg.result > index b41d54599..bdd210b09 100644 > --- a/test/box/cfg.result > +++ b/test/box/cfg.result > @@ -69,6 +69,10 @@ cfg_filter(box.cfg) > | - 10 > | - - replication_sync_timeout > | - 300 > + | - - replication_synchro_quorum > + | - 1 > + | - - replication_synchro_timeout > + | - 5 > | - - replication_timeout > | - 1 > | - - slab_alloc_factor > @@ -172,6 +176,10 @@ cfg_filter(box.cfg) > | - 10 > | - - replication_sync_timeout > | - 300 > + | - - replication_synchro_quorum > + | - 1 > + | - - replication_synchro_timeout > + | - 5 > | - - replication_timeout > | - 1 > | - - slab_alloc_factor > -- > 2.21.1 (Apple Git-122.3) >
next prev parent reply other threads:[~2020-07-01 16:05 UTC|newest] Thread overview: 68+ messages / expand[flat|nested] mbox.gz Atom feed top [not found] <cover.1593723973.git.sergeyb@tarantool.org> 2020-06-29 23:15 ` [Tarantool-patches] [PATCH v2 00/19] Sync replication Vladislav Shpilevoy 2020-06-29 23:15 ` [Tarantool-patches] [PATCH v2 01/19] replication: introduce space.is_sync option Vladislav Shpilevoy 2020-06-30 23:00 ` Vladislav Shpilevoy 2020-07-01 15:55 ` Sergey Ostanevich 2020-07-01 23:46 ` Vladislav Shpilevoy 2020-07-02 8:25 ` Serge Petrenko 2020-06-29 23:15 ` [Tarantool-patches] [PATCH v2 10/19] txn_limbo: add ROLLBACK processing Vladislav Shpilevoy 2020-07-05 15:29 ` Vladislav Shpilevoy 2020-06-29 23:15 ` [Tarantool-patches] [PATCH v2 11/19] box: rework local_recovery to use async txn_commit Vladislav Shpilevoy 2020-06-29 23:15 ` [Tarantool-patches] [PATCH v2 12/19] replication: support ROLLBACK and CONFIRM during recovery Vladislav Shpilevoy 2020-06-29 23:15 ` [Tarantool-patches] [PATCH v2 13/19] replication: add test for synchro CONFIRM/ROLLBACK Vladislav Shpilevoy 2020-06-29 23:15 ` [Tarantool-patches] [PATCH v2 14/19] applier: remove writer_cond Vladislav Shpilevoy 2020-07-02 9:13 ` Serge Petrenko 2020-06-29 23:15 ` [Tarantool-patches] [PATCH v2 15/19] applier: send heartbeat not only on commit, but on any write Vladislav Shpilevoy 2020-07-01 23:55 ` Vladislav Shpilevoy 2020-07-03 12:23 ` Serge Petrenko 2020-06-29 23:15 ` [Tarantool-patches] [PATCH v2 16/19] txn_limbo: add diag_set in txn_limbo_wait_confirm Vladislav Shpilevoy 2020-06-29 23:15 ` [Tarantool-patches] [PATCH v2 17/19] replication: delay initial join until confirmation Vladislav Shpilevoy 2020-06-29 23:15 ` [Tarantool-patches] [PATCH v2 18/19] replication: only send confirmed data during final join Vladislav Shpilevoy 2020-06-29 23:15 ` [Tarantool-patches] [PATCH v2 19/19] replication: block async transactions when not empty limbo Vladislav Shpilevoy 2020-07-01 17:12 ` Sergey Ostanevich 2020-07-01 23:47 ` Vladislav Shpilevoy 2020-07-03 12:28 ` Serge Petrenko 2020-06-29 23:15 ` [Tarantool-patches] [PATCH v2 02/19] replication: introduce replication_synchro_* cfg options Vladislav Shpilevoy 2020-07-01 16:05 ` Sergey Ostanevich [this message] 2020-07-01 23:46 ` Vladislav Shpilevoy 2020-07-02 8:29 ` Serge Petrenko 2020-07-02 23:36 ` Vladislav Shpilevoy 2020-06-29 23:15 ` [Tarantool-patches] [PATCH v2 03/19] txn: add TXN_WAIT_ACK flag Vladislav Shpilevoy 2020-07-01 17:14 ` Sergey Ostanevich 2020-07-01 23:46 ` Vladislav Shpilevoy 2020-07-02 8:30 ` Serge Petrenko 2020-06-29 23:15 ` [Tarantool-patches] [PATCH v2 04/19] replication: make sync transactions wait quorum Vladislav Shpilevoy 2020-06-30 23:00 ` Vladislav Shpilevoy 2020-07-02 8:48 ` Serge Petrenko 2020-07-03 21:16 ` Vladislav Shpilevoy 2020-07-05 16:05 ` Vladislav Shpilevoy 2020-06-29 23:15 ` [Tarantool-patches] [PATCH v2 05/19] xrow: introduce CONFIRM and ROLLBACK entries Vladislav Shpilevoy 2020-06-29 23:15 ` [Tarantool-patches] [PATCH v2 06/19] txn: introduce various reasons for txn rollback Vladislav Shpilevoy 2020-06-29 23:15 ` [Tarantool-patches] [PATCH v2 07/19] replication: write and read CONFIRM entries Vladislav Shpilevoy 2020-06-29 23:15 ` [Tarantool-patches] [PATCH v2 08/19] replication: add support of qsync to the snapshot machinery Vladislav Shpilevoy 2020-07-02 8:52 ` Serge Petrenko 2020-07-08 11:43 ` Leonid Vasiliev 2020-06-29 23:15 ` [Tarantool-patches] [PATCH v2 09/19] txn_limbo: add timeout when waiting for acks Vladislav Shpilevoy 2020-06-29 23:22 ` [Tarantool-patches] [PATCH v2 00/19] Sync replication Vladislav Shpilevoy 2020-06-30 23:00 ` [Tarantool-patches] [PATCH v2 20/19] replication: add test for quorum 1 Vladislav Shpilevoy 2020-07-03 12:32 ` Serge Petrenko 2020-07-02 21:13 ` [Tarantool-patches] [PATCH 1/4] replication: regression test on gh-5119 [not fixed] sergeyb 2020-07-02 21:13 ` [Tarantool-patches] [PATCH 2/4] replication: add advanced tests for sync replication sergeyb 2020-07-02 22:46 ` Sergey Bronnikov 2020-07-02 23:20 ` Vladislav Shpilevoy 2020-07-06 12:30 ` Sergey Bronnikov 2020-07-06 23:31 ` Vladislav Shpilevoy 2020-07-07 12:12 ` Sergey Bronnikov 2020-07-07 20:57 ` Vladislav Shpilevoy 2020-07-08 12:07 ` Sergey Bronnikov 2020-07-08 22:13 ` Vladislav Shpilevoy 2020-07-09 9:39 ` Sergey Bronnikov 2020-07-02 21:13 ` [Tarantool-patches] [PATCH 3/4] replication: add tests for sync replication with anon replica sergeyb 2020-07-06 23:31 ` Vladislav Shpilevoy 2020-07-02 21:13 ` [Tarantool-patches] [PATCH 4/4] replication: add tests for sync replication with snapshots sergeyb 2020-07-02 22:46 ` Sergey Bronnikov 2020-07-02 23:20 ` Vladislav Shpilevoy 2020-07-06 23:31 ` Vladislav Shpilevoy 2020-07-07 16:00 ` Sergey Bronnikov 2020-07-06 23:31 ` [Tarantool-patches] [PATCH] Add new error injection constant ERRINJ_SYNC_TIMEOUT Vladislav Shpilevoy 2020-07-10 0:50 ` [Tarantool-patches] [PATCH v2 00/19] Sync replication Vladislav Shpilevoy 2020-07-10 7:40 ` Kirill Yukhin
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20200701160532.GB45053@tarantool.org \ --to=sergos@tarantool.org \ --cc=tarantool-patches@dev.tarantool.org \ --cc=v.shpilevoy@tarantool.org \ --subject='Re: [Tarantool-patches] [PATCH v2 02/19] replication: introduce replication_synchro_* cfg options' \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox