From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from localhost (localhost [127.0.0.1]) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTP id 0E16D2C68E for ; Fri, 12 Oct 2018 15:46:52 -0400 (EDT) Received: from turing.freelists.org ([127.0.0.1]) by localhost (turing.freelists.org [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id Tc17KvqzPSKL for ; Fri, 12 Oct 2018 15:46:51 -0400 (EDT) Received: from smtp52.i.mail.ru (smtp52.i.mail.ru [94.100.177.112]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTPS id 3A5022C580 for ; Fri, 12 Oct 2018 15:46:51 -0400 (EDT) From: Olga Arkhangelskaia Subject: [tarantool-patches] [PATCH v2 1/2] box: added replication_dead/rw_gap options Date: Fri, 12 Oct 2018 22:45:56 +0300 Message-Id: <20181012194557.7445-2-arkholga@tarantool.org> In-Reply-To: <20181012194557.7445-1-arkholga@tarantool.org> References: <20181012194557.7445-1-arkholga@tarantool.org> Sender: tarantool-patches-bounce@freelists.org Errors-to: tarantool-patches-bounce@freelists.org Reply-To: tarantool-patches@freelists.org List-help: List-unsubscribe: List-software: Ecartis version 1.0.0 List-Id: tarantool-patches List-subscribe: List-owner: List-post: List-archive: To: tarantool-patches@freelists.org Cc: Olga Arkhangelskaia In scope of gh-3110 we need options that store periods of time, to be compared with time of last activity of relay and applier. This patch introduces replication_dead_gap and replication_rw_gap options. replication_dead_gap is configured in box.cfg, with default 0 value. If time that passed from now till last reader/writer activity of given replica exceeds replication_dead_gap value, replica is suspected to be dead. replication_dead_gap is measured in hours. replication_rw_gap is configured in box.cfg, with default 0 value. If time difference between last reader activity and last writer activity of given replica exceeds replication_rw_gap value, replica is suspected to be dead. replication_rw_gap is measured in hours. --- src/box/box.cc | 34 ++++++++++++++++++++++++++++++++++ src/box/box.h | 2 ++ src/box/lua/cfg.cc | 24 ++++++++++++++++++++++++ src/box/lua/load_cfg.lua | 8 ++++++++ src/box/replication.cc | 3 ++- src/box/replication.h | 12 ++++++++++++ test/box/admin.result | 4 ++++ test/box/cfg.result | 8 ++++++++ 8 files changed, 94 insertions(+), 1 deletion(-) diff --git a/src/box/box.cc b/src/box/box.cc index 7e32b9fc7..f74e012f7 100644 --- a/src/box/box.cc +++ b/src/box/box.cc @@ -465,6 +465,28 @@ box_check_replication_sync_timeout(void) return timeout; } +static double +box_check_replication_dead_gap(void) +{ + double gap = cfg_getd("replication_dead_gap"); + if (gap <= 0) { + tnt_raise(ClientError, ER_CFG, "replication_dead_gap", + "the value must be grater than 0"); + } + return gap; +} + +static double +box_check_replication_rw_gap(void) +{ + double gap = cfg_getd("replication_rw_gap"); + if (gap <= 0) { + tnt_raise(ClientError, ER_CFG, "replication_dead_gap", + "the value must be grater than 0"); + } + return gap; +} + static void box_check_instance_uuid(struct tt_uuid *uuid) { @@ -739,6 +761,18 @@ box_set_replication_sync_timeout(void) replication_sync_timeout = box_check_replication_sync_timeout(); } +void +box_set_replication_dead_gap(void) +{ + replication_dead_gap = box_check_replication_dead_gap(); +} + +void +box_set_replication_rw_gap(void) +{ + replication_rw_gap = box_check_replication_rw_gap(); +} + void box_set_replication_skip_conflict(void) { diff --git a/src/box/box.h b/src/box/box.h index 9930d4a1a..bfb5bb873 100644 --- a/src/box/box.h +++ b/src/box/box.h @@ -198,6 +198,8 @@ void box_set_replication_connect_quorum(void); void box_set_replication_sync_lag(void); void box_set_replication_sync_timeout(void); void box_set_replication_skip_conflict(void); +void box_set_replication_dead_gap(void); +void box_set_replication_rw_gap(void); void box_set_net_msg_max(void); extern "C" { diff --git a/src/box/lua/cfg.cc b/src/box/lua/cfg.cc index c3825591c..f34b34bee 100644 --- a/src/box/lua/cfg.cc +++ b/src/box/lua/cfg.cc @@ -316,6 +316,28 @@ lbox_cfg_set_replication_sync_timeout(struct lua_State *L) return 0; } +static int +lbox_cfg_set_replication_dead_gap(struct lua_State *L) +{ + try { + box_set_replication_dead_gap(); + } catch (Exception *) { + luaT_error(L); + } + return 0; +} + +static int +lbox_cfg_set_replication_rw_gap(struct lua_State *L) +{ + try { + box_set_replication_rw_gap(); + } catch (Exception *) { + luaT_error(L); + } + return 0; +} + static int lbox_cfg_set_replication_skip_conflict(struct lua_State *L) { @@ -353,6 +375,8 @@ box_lua_cfg_init(struct lua_State *L) {"cfg_set_replication_sync_lag", lbox_cfg_set_replication_sync_lag}, {"cfg_set_replication_sync_timeout", lbox_cfg_set_replication_sync_timeout}, {"cfg_set_replication_skip_conflict", lbox_cfg_set_replication_skip_conflict}, + {"cfg_set_replication_dead_gap", lbox_cfg_set_replication_dead_gap}, + {"cfg_set_replication_rw_gap", lbox_cfg_set_replication_rw_gap}, {"cfg_set_net_msg_max", lbox_cfg_set_net_msg_max}, {NULL, NULL} }; diff --git a/src/box/lua/load_cfg.lua b/src/box/lua/load_cfg.lua index f62f4dc1e..c15769dfe 100644 --- a/src/box/lua/load_cfg.lua +++ b/src/box/lua/load_cfg.lua @@ -76,6 +76,8 @@ local default_cfg = { replication_connect_timeout = 30, replication_connect_quorum = nil, -- connect all replication_skip_conflict = false, + replication_dead_gap = 0, + replication_rw_gap = 0, feedback_enabled = true, feedback_host = "https://feedback.tarantool.io", feedback_interval = 3600, @@ -138,6 +140,8 @@ local template_cfg = { replication_connect_timeout = 'number', replication_connect_quorum = 'number', replication_skip_conflict = 'boolean', + replication_dead_gap = 'number', + replication_rw_gap = 'number', feedback_enabled = 'boolean', feedback_host = 'string', feedback_interval = 'number', @@ -232,6 +236,8 @@ local dynamic_cfg = { replication_sync_lag = private.cfg_set_replication_sync_lag, replication_sync_timeout = private.cfg_set_replication_sync_timeout, replication_skip_conflict = private.cfg_set_replication_skip_conflict, + replication_dead_gap = private.cfg_set_replication_dead_gap, + replication_rw_gap = private.cfg_set_replication_rw_gap, instance_uuid = check_instance_uuid, replicaset_uuid = check_replicaset_uuid, net_msg_max = private.cfg_set_net_msg_max, @@ -248,6 +254,8 @@ local dynamic_cfg_skip_at_load = { replication_connect_quorum = true, replication_sync_lag = true, replication_sync_timeout = true, + replication_dead_gap = true, + replication_rw_gap = true, wal_dir_rescan_delay = true, custom_proc_title = true, force_recovery = true, diff --git a/src/box/replication.cc b/src/box/replication.cc index 2cb4ec0f8..392f8d9fd 100644 --- a/src/box/replication.cc +++ b/src/box/replication.cc @@ -53,7 +53,8 @@ int replication_connect_quorum = REPLICATION_CONNECT_QUORUM_ALL; double replication_sync_lag = 10.0; /* seconds */ double replication_sync_timeout = 300.0; /* seconds */ bool replication_skip_conflict = false; - +double replication_dead_gap = 0.0; /* hours */ +double replication_rw_gap = 0.0; /* hours */ struct replicaset replicaset; static int diff --git a/src/box/replication.h b/src/box/replication.h index 2ac620d86..bf0c99fb7 100644 --- a/src/box/replication.h +++ b/src/box/replication.h @@ -137,6 +137,18 @@ extern double replication_sync_timeout; */ extern bool replication_skip_conflict; +/* + * If replica is not active during time that exceeds replication_dead_gap it + * is considered as dead replica and can be thrown out from system space. + */ +extern double replication_dead_gap; + +/* + * If replica has both roles and gap between read/write activities exceeds this + * value it can be considered as dead one. + */ +extern double replication_rw_gap; + /** * Wait for the given period of time before trying to reconnect * to a master. diff --git a/test/box/admin.result b/test/box/admin.result index 8048460a1..5341e6f78 100644 --- a/test/box/admin.result +++ b/test/box/admin.result @@ -64,6 +64,10 @@ cfg_filter(box.cfg) - 16320 - - replication_connect_timeout - 30 + - - replication_dead_gap + - 0 + - - replication_rw_gap + - 0 - - replication_skip_conflict - false - - replication_sync_lag diff --git a/test/box/cfg.result b/test/box/cfg.result index 515033754..2f6bcd788 100644 --- a/test/box/cfg.result +++ b/test/box/cfg.result @@ -60,6 +60,10 @@ cfg_filter(box.cfg) - 16320 - - replication_connect_timeout - 30 + - - replication_dead_gap + - 0 + - - replication_rw_gap + - 0 - - replication_skip_conflict - false - - replication_sync_lag @@ -161,6 +165,10 @@ cfg_filter(box.cfg) - 16320 - - replication_connect_timeout - 30 + - - replication_dead_gap + - 0 + - - replication_rw_gap + - 0 - - replication_skip_conflict - false - - replication_sync_lag -- 2.14.3 (Apple Git-98)