[Tarantool-patches] [PATCH v2 02/19] replication: introduce replication_synchro_* cfg options
Serge Petrenko
sergepetrenko at tarantool.org
Thu Jul 2 11:29:02 MSK 2020
30.06.2020 02:15, Vladislav Shpilevoy пишет:
> Synchronous transactions are supposed to be replicated on a
> specified number of replicas before committed on master. The
> number of replicas can be specified using
> replication_synchro_quorum option. It is 1 by default, so sync
> transactions work like asynchronous when not configured anyhow.
> 1 means successful WAL write on master is enough for commit.
>
> When replication_synchro_quorum is greater than 1, an instance has to
> wait for the specified number of replicas to reply with success. If
> enough replies aren't collected during replication_synchro_timeout,
> the instance rolls back the tx in question.
>
> Part of #4844
> Part of #5073
Thanks for the patch!
LGTM with 1 comment below.
> ---
> src/box/box.cc | 53 +++++++++++++++++++++++++++++++++
> src/box/box.h | 2 ++
> src/box/lua/cfg.cc | 18 +++++++++++
> src/box/lua/load_cfg.lua | 10 +++++++
> src/box/replication.cc | 2 ++
> src/box/replication.h | 12 ++++++++
> test/app-tap/init_script.result | 2 ++
> test/box/admin.result | 4 +++
> test/box/cfg.result | 8 +++++
> 9 files changed, 111 insertions(+)
>
> diff --git a/src/box/box.cc b/src/box/box.cc
> index 871b0d976..0821ea0a3 100644
> --- a/src/box/box.cc
> +++ b/src/box/box.cc
> @@ -476,6 +476,31 @@ box_check_replication_sync_lag(void)
> return lag;
> }
>
> +static int
> +box_check_replication_synchro_quorum(void)
> +{
> + int quorum = cfg_geti("replication_synchro_quorum");
> + if (quorum <= 0 || quorum > VCLOCK_MAX) {
It should be `quorum >= VCLOCK_MAX`, because you can't have VCLOCK_MAX (32)
instances in a cluster, only 31. Id 0 is used by anonymous replicas.
> + diag_set(ClientError, ER_CFG, "replication_synchro_quorum",
> + "the value must be greater than zero and less than "
> + "maximal number of replicas");
> + return -1;
> + }
> + return quorum;
> +}
> +
> +static double
> +box_check_replication_synchro_timeout(void)
> +{
> + double timeout = cfg_getd("replication_synchro_timeout");
> + if (timeout <= 0) {
> + diag_set(ClientError, ER_CFG, "replication_synchro_timeout",
> + "the value must be greater than zero");
> + return -1;
> + }
> + return timeout;
> +}
> +
> static double
> box_check_replication_sync_timeout(void)
> {
> @@ -658,6 +683,10 @@ box_check_config()
> box_check_replication_connect_timeout();
> box_check_replication_connect_quorum();
> box_check_replication_sync_lag();
> + if (box_check_replication_synchro_quorum() < 0)
> + diag_raise();
> + if (box_check_replication_synchro_timeout() < 0)
> + diag_raise();
> box_check_replication_sync_timeout();
> box_check_readahead(cfg_geti("readahead"));
> box_check_checkpoint_count(cfg_geti("checkpoint_count"));
> @@ -777,6 +806,26 @@ box_set_replication_sync_lag(void)
> replication_sync_lag = box_check_replication_sync_lag();
> }
>
> +int
> +box_set_replication_synchro_quorum(void)
> +{
> + int value = box_check_replication_synchro_quorum();
> + if (value < 0)
> + return -1;
> + replication_synchro_quorum = value;
> + return 0;
> +}
> +
> +int
> +box_set_replication_synchro_timeout(void)
> +{
> + double value = box_check_replication_synchro_timeout();
> + if (value < 0)
> + return -1;
> + replication_synchro_timeout = value;
> + return 0;
> +}
> +
> void
> box_set_replication_sync_timeout(void)
> {
> @@ -2417,6 +2466,10 @@ box_cfg_xc(void)
> box_set_replication_connect_timeout();
> box_set_replication_connect_quorum();
> box_set_replication_sync_lag();
> + if (box_set_replication_synchro_quorum() != 0)
> + diag_raise();
> + if (box_set_replication_synchro_timeout() != 0)
> + diag_raise();
> box_set_replication_sync_timeout();
> box_set_replication_skip_conflict();
> box_set_replication_anon();
> diff --git a/src/box/box.h b/src/box/box.h
> index 557542a83..f9789154e 100644
> --- a/src/box/box.h
> +++ b/src/box/box.h
> @@ -243,6 +243,8 @@ void box_set_replication_timeout(void);
> void box_set_replication_connect_timeout(void);
> void box_set_replication_connect_quorum(void);
> void box_set_replication_sync_lag(void);
> +int box_set_replication_synchro_quorum(void);
> +int box_set_replication_synchro_timeout(void);
> void box_set_replication_sync_timeout(void);
> void box_set_replication_skip_conflict(void);
> void box_set_replication_anon(void);
> diff --git a/src/box/lua/cfg.cc b/src/box/lua/cfg.cc
> index a5b15e527..d481155cd 100644
> --- a/src/box/lua/cfg.cc
> +++ b/src/box/lua/cfg.cc
> @@ -313,6 +313,22 @@ lbox_cfg_set_replication_sync_lag(struct lua_State *L)
> return 0;
> }
>
> +static int
> +lbox_cfg_set_replication_synchro_quorum(struct lua_State *L)
> +{
> + if (box_set_replication_synchro_quorum() != 0)
> + luaT_error(L);
> + return 0;
> +}
> +
> +static int
> +lbox_cfg_set_replication_synchro_timeout(struct lua_State *L)
> +{
> + if (box_set_replication_synchro_timeout() != 0)
> + luaT_error(L);
> + return 0;
> +}
> +
> static int
> lbox_cfg_set_replication_sync_timeout(struct lua_State *L)
> {
> @@ -370,6 +386,8 @@ box_lua_cfg_init(struct lua_State *L)
> {"cfg_set_replication_connect_quorum", lbox_cfg_set_replication_connect_quorum},
> {"cfg_set_replication_connect_timeout", lbox_cfg_set_replication_connect_timeout},
> {"cfg_set_replication_sync_lag", lbox_cfg_set_replication_sync_lag},
> + {"cfg_set_replication_synchro_quorum", lbox_cfg_set_replication_synchro_quorum},
> + {"cfg_set_replication_synchro_timeout", lbox_cfg_set_replication_synchro_timeout},
> {"cfg_set_replication_sync_timeout", lbox_cfg_set_replication_sync_timeout},
> {"cfg_set_replication_skip_conflict", lbox_cfg_set_replication_skip_conflict},
> {"cfg_set_replication_anon", lbox_cfg_set_replication_anon},
> diff --git a/src/box/lua/load_cfg.lua b/src/box/lua/load_cfg.lua
> index f2f2df6f8..a7f03c7d6 100644
> --- a/src/box/lua/load_cfg.lua
> +++ b/src/box/lua/load_cfg.lua
> @@ -89,6 +89,8 @@ local default_cfg = {
> replication_timeout = 1,
> replication_sync_lag = 10,
> replication_sync_timeout = 300,
> + replication_synchro_quorum = 1,
> + replication_synchro_timeout = 5,
> replication_connect_timeout = 30,
> replication_connect_quorum = nil, -- connect all
> replication_skip_conflict = false,
> @@ -164,6 +166,8 @@ local template_cfg = {
> replication_timeout = 'number',
> replication_sync_lag = 'number',
> replication_sync_timeout = 'number',
> + replication_synchro_quorum = 'number',
> + replication_synchro_timeout = 'number',
> replication_connect_timeout = 'number',
> replication_connect_quorum = 'number',
> replication_skip_conflict = 'boolean',
> @@ -280,6 +284,8 @@ local dynamic_cfg = {
> replication_connect_quorum = private.cfg_set_replication_connect_quorum,
> replication_sync_lag = private.cfg_set_replication_sync_lag,
> replication_sync_timeout = private.cfg_set_replication_sync_timeout,
> + replication_synchro_quorum = private.cfg_set_replication_synchro_quorum,
> + replication_synchro_timeout = private.cfg_set_replication_synchro_timeout,
> replication_skip_conflict = private.cfg_set_replication_skip_conflict,
> replication_anon = private.cfg_set_replication_anon,
> instance_uuid = check_instance_uuid,
> @@ -313,6 +319,8 @@ local dynamic_cfg_order = {
> replication_timeout = 150,
> replication_sync_lag = 150,
> replication_sync_timeout = 150,
> + replication_synchro_quorum = 150,
> + replication_synchro_timeout = 150,
> replication_connect_timeout = 150,
> replication_connect_quorum = 150,
> replication = 200,
> @@ -348,6 +356,8 @@ local dynamic_cfg_skip_at_load = {
> replication_connect_quorum = true,
> replication_sync_lag = true,
> replication_sync_timeout = true,
> + replication_synchro_quorum = true,
> + replication_synchro_timeout = true,
> replication_skip_conflict = true,
> replication_anon = true,
> wal_dir_rescan_delay = true,
> diff --git a/src/box/replication.cc b/src/box/replication.cc
> index 273a7cb66..01e9e876a 100644
> --- a/src/box/replication.cc
> +++ b/src/box/replication.cc
> @@ -51,6 +51,8 @@ double replication_timeout = 1.0; /* seconds */
> double replication_connect_timeout = 30.0; /* seconds */
> int replication_connect_quorum = REPLICATION_CONNECT_QUORUM_ALL;
> double replication_sync_lag = 10.0; /* seconds */
> +int replication_synchro_quorum = 1;
> +double replication_synchro_timeout = 5.0; /* seconds */
> double replication_sync_timeout = 300.0; /* seconds */
> bool replication_skip_conflict = false;
> bool replication_anon = false;
> diff --git a/src/box/replication.h b/src/box/replication.h
> index 93a25c8a7..a081870f9 100644
> --- a/src/box/replication.h
> +++ b/src/box/replication.h
> @@ -125,6 +125,18 @@ extern int replication_connect_quorum;
> */
> extern double replication_sync_lag;
>
> +/**
> + * Minimal number of replicas which should ACK a synchronous
> + * transaction to be able to confirm it and commit.
> + */
> +extern int replication_synchro_quorum;
> +
> +/**
> + * Time in seconds which the master node is able to wait for ACKs
> + * for a synchronous transaction until it is rolled back.
> + */
> +extern double replication_synchro_timeout;
> +
> /**
> * Max time to wait for appliers to synchronize before entering
> * the orphan mode.
> diff --git a/test/app-tap/init_script.result b/test/app-tap/init_script.result
> index 7c4454285..857f0c95f 100644
> --- a/test/app-tap/init_script.result
> +++ b/test/app-tap/init_script.result
> @@ -30,6 +30,8 @@ replication_connect_timeout:30
> replication_skip_conflict:false
> replication_sync_lag:10
> replication_sync_timeout:300
> +replication_synchro_quorum:1
> +replication_synchro_timeout:5
> replication_timeout:1
> slab_alloc_factor:1.05
> sql_cache_size:5242880
> diff --git a/test/box/admin.result b/test/box/admin.result
> index d94da8c5d..ab3e80a97 100644
> --- a/test/box/admin.result
> +++ b/test/box/admin.result
> @@ -81,6 +81,10 @@ cfg_filter(box.cfg)
> - 10
> - - replication_sync_timeout
> - 300
> + - - replication_synchro_quorum
> + - 1
> + - - replication_synchro_timeout
> + - 5
> - - replication_timeout
> - 1
> - - slab_alloc_factor
> diff --git a/test/box/cfg.result b/test/box/cfg.result
> index b41d54599..bdd210b09 100644
> --- a/test/box/cfg.result
> +++ b/test/box/cfg.result
> @@ -69,6 +69,10 @@ cfg_filter(box.cfg)
> | - 10
> | - - replication_sync_timeout
> | - 300
> + | - - replication_synchro_quorum
> + | - 1
> + | - - replication_synchro_timeout
> + | - 5
> | - - replication_timeout
> | - 1
> | - - slab_alloc_factor
> @@ -172,6 +176,10 @@ cfg_filter(box.cfg)
> | - 10
> | - - replication_sync_timeout
> | - 300
> + | - - replication_synchro_quorum
> + | - 1
> + | - - replication_synchro_timeout
> + | - 5
> | - - replication_timeout
> | - 1
> | - - slab_alloc_factor
--
Serge Petrenko
More information about the Tarantool-patches
mailing list