Tarantool development patches archive
 help / color / mirror / Atom feed
From: Serge Petrenko <sergepetrenko@tarantool.org>
To: Vladislav Shpilevoy <v.shpilevoy@tarantool.org>,
	sergos@tarantool.org, gorcunov@gmail.com,
	Leonid Vasiliev <lvasiliev@tarantool.org>
Cc: tarantool-patches@dev.tarantool.org
Subject: Re: [Tarantool-patches] [PATCH 2/8] replication: introduce replication_sync_quorum cfg
Date: Fri, 19 Jun 2020 20:45:44 +0300	[thread overview]
Message-ID: <f789d77c-239e-d7cc-d051-04e284b00fa5@tarantool.org> (raw)
In-Reply-To: <969b1847-08cb-5370-ec5d-a07a52397b48@tarantool.org>


16.06.2020 02:05, Vladislav Shpilevoy пишет:
> I appended a new commit on top of this one on the
> branch. In the quorum commit I renamed the
> option to
>
>      replication_synchro_quorum
>
> ====================
> commit fc19662ec528c5217c7b611ae16d417497d9fe35
> Author: Vladislav Shpilevoy <v.shpilevoy@tarantool.org>
> Date:   Tue Jun 16 00:47:24 2020 +0200
>
>      replication: introduce replication_synchro_timeout cfg
>      
>      [TO BE SQUASHED INTO THE PREVIOUS COMMIT]
>      
>      Part of #4844
>      Part of #5073
>
> diff --git a/src/box/box.cc b/src/box/box.cc
> index c7a5f2e3c..9db55e05a 100644
> --- a/src/box/box.cc
> +++ b/src/box/box.cc
> @@ -489,6 +489,18 @@ box_check_replication_synchro_quorum(void)
>   	return quorum;
>   }
>   
> +static double
> +box_check_replication_synchro_timeout(void)
> +{
> +	double timeout = cfg_getd("replication_synchro_timeout");
> +	if (timeout <= 0) {
> +		diag_set(ClientError, ER_CFG, "replication_synchro_timeout",
> +			 "the value must be greater than zero");
> +		return -1;
> +	}
> +	return timeout;
> +}
> +
>   static double
>   box_check_replication_sync_timeout(void)
>   {
> @@ -673,6 +685,8 @@ box_check_config()
>   	box_check_replication_sync_lag();
>   	if (box_check_replication_synchro_quorum() < 0)
>   		diag_raise();
> +	if (box_check_replication_synchro_timeout() < 0)
> +		diag_raise();
>   	box_check_replication_sync_timeout();
>   	box_check_readahead(cfg_geti("readahead"));
>   	box_check_checkpoint_count(cfg_geti("checkpoint_count"));
> @@ -802,6 +816,16 @@ box_set_replication_synchro_quorum(void)
>   	return 0;
>   }
>   
> +int
> +box_set_replication_synchro_timeout(void)
> +{
> +	double value = box_check_replication_synchro_timeout();
> +	if (value < 0)
> +		return -1;
> +	replication_synchro_timeout = value;
> +	return 0;
> +}
> +
>   void
>   box_set_replication_sync_timeout(void)
>   {
> @@ -2444,6 +2468,8 @@ box_cfg_xc(void)
>   	box_set_replication_sync_lag();
>   	if (box_set_replication_synchro_quorum() != 0)
>   		diag_raise();
> +	if (box_set_replication_synchro_timeout() != 0)
> +		diag_raise();
>   	box_set_replication_sync_timeout();
>   	box_set_replication_skip_conflict();
>   	box_set_replication_anon();
> diff --git a/src/box/box.h b/src/box/box.h
> index 24802d0f1..f9789154e 100644
> --- a/src/box/box.h
> +++ b/src/box/box.h
> @@ -244,6 +244,7 @@ void box_set_replication_connect_timeout(void);
>   void box_set_replication_connect_quorum(void);
>   void box_set_replication_sync_lag(void);
>   int box_set_replication_synchro_quorum(void);
> +int box_set_replication_synchro_timeout(void);
>   void box_set_replication_sync_timeout(void);
>   void box_set_replication_skip_conflict(void);
>   void box_set_replication_anon(void);
> diff --git a/src/box/lua/cfg.cc b/src/box/lua/cfg.cc
> index 01e8958cd..d481155cd 100644
> --- a/src/box/lua/cfg.cc
> +++ b/src/box/lua/cfg.cc
> @@ -321,6 +321,14 @@ lbox_cfg_set_replication_synchro_quorum(struct lua_State *L)
>   	return 0;
>   }
>   
> +static int
> +lbox_cfg_set_replication_synchro_timeout(struct lua_State *L)
> +{
> +	if (box_set_replication_synchro_timeout() != 0)
> +		luaT_error(L);
> +	return 0;
> +}
> +
>   static int
>   lbox_cfg_set_replication_sync_timeout(struct lua_State *L)
>   {
> @@ -379,6 +387,7 @@ box_lua_cfg_init(struct lua_State *L)
>   		{"cfg_set_replication_connect_timeout", lbox_cfg_set_replication_connect_timeout},
>   		{"cfg_set_replication_sync_lag", lbox_cfg_set_replication_sync_lag},
>   		{"cfg_set_replication_synchro_quorum", lbox_cfg_set_replication_synchro_quorum},
> +		{"cfg_set_replication_synchro_timeout", lbox_cfg_set_replication_synchro_timeout},
>   		{"cfg_set_replication_sync_timeout", lbox_cfg_set_replication_sync_timeout},
>   		{"cfg_set_replication_skip_conflict", lbox_cfg_set_replication_skip_conflict},
>   		{"cfg_set_replication_anon", lbox_cfg_set_replication_anon},
> diff --git a/src/box/lua/load_cfg.lua b/src/box/lua/load_cfg.lua
> index 991e919e4..1155248a5 100644
> --- a/src/box/lua/load_cfg.lua
> +++ b/src/box/lua/load_cfg.lua
> @@ -90,6 +90,7 @@ local default_cfg = {
>       replication_sync_lag = 10,
>       replication_sync_timeout = 300,
>       replication_synchro_quorum = 1,
> +    replication_synchro_timeout = 5,
>       replication_connect_timeout = 30,
>       replication_connect_quorum = nil, -- connect all
>       replication_skip_conflict = false,
> @@ -166,6 +167,7 @@ local template_cfg = {
>       replication_sync_lag = 'number',
>       replication_sync_timeout = 'number',
>       replication_synchro_quorum = 'number',
> +    replication_synchro_timeout = 'number',
>       replication_connect_timeout = 'number',
>       replication_connect_quorum = 'number',
>       replication_skip_conflict = 'boolean',
> @@ -278,6 +280,7 @@ local dynamic_cfg = {
>       replication_sync_lag    = private.cfg_set_replication_sync_lag,
>       replication_sync_timeout = private.cfg_set_replication_sync_timeout,
>       replication_synchro_quorum = private.cfg_set_replication_synchro_quorum,
> +    replication_synchro_timeout = private.cfg_set_replication_synchro_timeout,
>       replication_skip_conflict = private.cfg_set_replication_skip_conflict,
>       replication_anon        = private.cfg_set_replication_anon,
>       instance_uuid           = check_instance_uuid,
> @@ -312,6 +315,7 @@ local dynamic_cfg_order = {
>       replication_sync_lag    = 150,
>       replication_sync_timeout    = 150,
>       replication_synchro_quorum  = 150,
> +    replication_synchro_timeout = 150,
>       replication_connect_timeout = 150,
>       replication_connect_quorum  = 150,
>       replication             = 200,
> @@ -348,6 +352,7 @@ local dynamic_cfg_skip_at_load = {
>       replication_sync_lag    = true,
>       replication_sync_timeout = true,
>       replication_synchro_quorum = true,
> +    replication_synchro_timeout = true,
>       replication_skip_conflict = true,
>       replication_anon        = true,
>       wal_dir_rescan_delay    = true,
> diff --git a/src/box/replication.cc b/src/box/replication.cc
> index 5b52f3864..01e9e876a 100644
> --- a/src/box/replication.cc
> +++ b/src/box/replication.cc
> @@ -52,6 +52,7 @@ double replication_connect_timeout = 30.0; /* seconds */
>   int replication_connect_quorum = REPLICATION_CONNECT_QUORUM_ALL;
>   double replication_sync_lag = 10.0; /* seconds */
>   int replication_synchro_quorum = 1;
> +double replication_synchro_timeout = 5.0; /* seconds */
>   double replication_sync_timeout = 300.0; /* seconds */
>   bool replication_skip_conflict = false;
>   bool replication_anon = false;
> diff --git a/src/box/replication.h b/src/box/replication.h
> index 05e3eb943..a081870f9 100644
> --- a/src/box/replication.h
> +++ b/src/box/replication.h
> @@ -131,6 +131,12 @@ extern double replication_sync_lag;
>    */
>   extern int replication_synchro_quorum;
>   
> +/**
> + * Time in seconds which the master node is able to wait for ACKs
> + * for a synchronous transaction until it is rolled back.
> + */
> +extern double replication_synchro_timeout;
> +
>   /**
>    * Max time to wait for appliers to synchronize before entering
>    * the orphan mode.
> diff --git a/test/app-tap/init_script.result b/test/app-tap/init_script.result
> index 2987b60b9..857f0c95f 100644
> --- a/test/app-tap/init_script.result
> +++ b/test/app-tap/init_script.result
> @@ -31,6 +31,7 @@ replication_skip_conflict:false
>   replication_sync_lag:10
>   replication_sync_timeout:300
>   replication_synchro_quorum:1
> +replication_synchro_timeout:5
>   replication_timeout:1
>   slab_alloc_factor:1.05
>   sql_cache_size:5242880
> diff --git a/test/box/admin.result b/test/box/admin.result
> index 35ecc7617..ab3e80a97 100644
> --- a/test/box/admin.result
> +++ b/test/box/admin.result
> @@ -83,6 +83,8 @@ cfg_filter(box.cfg)
>       - 300
>     - - replication_synchro_quorum
>       - 1
> +  - - replication_synchro_timeout
> +    - 5
>     - - replication_timeout
>       - 1
>     - - slab_alloc_factor
> diff --git a/test/box/cfg.result b/test/box/cfg.result
> index cdc0773f2..bdd210b09 100644
> --- a/test/box/cfg.result
> +++ b/test/box/cfg.result
> @@ -71,6 +71,8 @@ cfg_filter(box.cfg)
>    |     - 300
>    |   - - replication_synchro_quorum
>    |     - 1
> + |   - - replication_synchro_timeout
> + |     - 5
>    |   - - replication_timeout
>    |     - 1
>    |   - - slab_alloc_factor
> @@ -176,6 +178,8 @@ cfg_filter(box.cfg)
>    |     - 300
>    |   - - replication_synchro_quorum
>    |     - 1
> + |   - - replication_synchro_timeout
> + |     - 5
>    |   - - replication_timeout
>    |     - 1
>    |   - - slab_alloc_factor

Thanks! Looks good. I squashed the patch into the previous commit.

Here's the new commit message I came up with:


     replication: introduce replication_synchro_quorum and 
replication_synchro_timeout cfg options

     Synchronous transactions are supposed to be replicated on a
     specified number of replicas before committed on master. The
     number of replicas can be specified using
     replication_synchro_quorum option. It is 1 by default, so sync
     transactions work like asynchronous when not configured anyhow.
     1 means successful WAL write on master is enough for commit.

     When replication_synchro_quorum is greater than 1, an instance has to
     wait for the specified number of replicas to  reply with success. If
     enough replies aren't collected during replication_synchro_timeout,
     the instance rolls back the tx in question.

     Part of #4844
     Part of #5073


-- 
Serge Petrenko

  parent reply	other threads:[~2020-06-19 17:45 UTC|newest]

Thread overview: 56+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-06-09 12:20 [Tarantool-patches] [PATCH 0/8] wait for lsn and confirm Serge Petrenko
2020-06-09 12:20 ` [Tarantool-patches] [PATCH 1/8] replication: introduce space.is_sync option Serge Petrenko
2020-06-10 23:51   ` Vladislav Shpilevoy
2020-06-18 22:27     ` Leonid Vasiliev
2020-06-21 16:24       ` Vladislav Shpilevoy
2020-06-09 12:20 ` [Tarantool-patches] [PATCH 2/8] replication: introduce replication_sync_quorum cfg Serge Petrenko
2020-06-10 23:51   ` Vladislav Shpilevoy
2020-06-15 23:05   ` Vladislav Shpilevoy
2020-06-18 22:54     ` Leonid Vasiliev
2020-06-19 17:45     ` Serge Petrenko [this message]
2020-06-21 16:25       ` Vladislav Shpilevoy
2020-06-09 12:20 ` [Tarantool-patches] [PATCH 3/8] txn: add TXN_WAIT_ACK flag Serge Petrenko
2020-06-18 23:12   ` Leonid Vasiliev
2020-06-21 16:25     ` Vladislav Shpilevoy
2020-06-22  9:44       ` Serge Petrenko
2020-06-23 22:13         ` Vladislav Shpilevoy
2020-06-09 12:20 ` [Tarantool-patches] [PATCH 4/8] replication: make sync transactions wait quorum Serge Petrenko
2020-06-10 23:51   ` Vladislav Shpilevoy
2020-06-11 14:57   ` Vladislav Shpilevoy
2020-06-15 23:05     ` Vladislav Shpilevoy
2020-06-19 12:39   ` Leonid Vasiliev
2020-06-25 21:48   ` Vladislav Shpilevoy
2020-06-09 12:20 ` [Tarantool-patches] [PATCH 5/8] txn_limbo: follow-up fixes Serge Petrenko
2020-06-10 23:51   ` Vladislav Shpilevoy
2020-06-11  8:46     ` Serge Petrenko
2020-06-11 13:01       ` Vladislav Shpilevoy
2020-06-09 12:20 ` [Tarantool-patches] [PATCH 6/8] txn_limbo: fix instance id assignment Serge Petrenko
2020-06-10 23:51   ` Vladislav Shpilevoy
2020-06-09 12:20 ` [Tarantool-patches] [PATCH 7/8] xrow: introduce CONFIRM entry Serge Petrenko
2020-06-19 15:18   ` Leonid Vasiliev
2020-06-22 10:14     ` Serge Petrenko
2020-06-23  8:33   ` Serge Petrenko
2020-06-09 12:20 ` [Tarantool-patches] [PATCH 8/8] replication: write and read CONFIRM entries Serge Petrenko
2020-06-10 23:51   ` Vladislav Shpilevoy
2020-06-11  8:56     ` Serge Petrenko
2020-06-11 13:04       ` Vladislav Shpilevoy
2020-06-11 14:57   ` Vladislav Shpilevoy
2020-06-15 23:05     ` Vladislav Shpilevoy
2020-06-18 11:32       ` Leonid Vasiliev
2020-06-18 21:49         ` Vladislav Shpilevoy
2020-06-19 17:48         ` Serge Petrenko
2020-06-19 17:50   ` Serge Petrenko
2020-06-23  8:35     ` Serge Petrenko
2020-06-20 15:06   ` Leonid Vasiliev
2020-06-22 10:34     ` Serge Petrenko
2020-06-23  8:34   ` Serge Petrenko
2020-06-25 22:04   ` Vladislav Shpilevoy
2020-06-25 22:31     ` Vladislav Shpilevoy
2020-06-26 10:58       ` Serge Petrenko
2020-06-09 12:53 ` [Tarantool-patches] [PATCH 0/2] A few fixes for building Cyrill Gorcunov
2020-06-09 12:53 ` [Tarantool-patches] [PATCH 1/2] box/applier: fix typo Cyrill Gorcunov
2020-06-10  9:18   ` Sergey Ostanevich
2020-06-09 12:53 ` [Tarantool-patches] [PATCH 2/2] box: use tnt_raise for quorum check Cyrill Gorcunov
2020-06-10  9:17   ` Sergey Ostanevich
2020-06-10 10:45   ` Serge Petrenko
2020-06-22 21:51 ` [Tarantool-patches] [PATCH 0/8] wait for lsn and confirm Vladislav Shpilevoy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=f789d77c-239e-d7cc-d051-04e284b00fa5@tarantool.org \
    --to=sergepetrenko@tarantool.org \
    --cc=gorcunov@gmail.com \
    --cc=lvasiliev@tarantool.org \
    --cc=sergos@tarantool.org \
    --cc=tarantool-patches@dev.tarantool.org \
    --cc=v.shpilevoy@tarantool.org \
    --subject='Re: [Tarantool-patches] [PATCH 2/8] replication: introduce replication_sync_quorum cfg' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox