From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from localhost (localhost [127.0.0.1]) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTP id 8ED3F29FD2 for ; Wed, 29 Aug 2018 14:57:05 -0400 (EDT) Received: from turing.freelists.org ([127.0.0.1]) by localhost (turing.freelists.org [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id 56F3JCPIUovx for ; Wed, 29 Aug 2018 14:57:05 -0400 (EDT) Received: from mail-lf1-f51.google.com (mail-lf1-f51.google.com [209.85.167.51]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTPS id 1895F29F97 for ; Wed, 29 Aug 2018 14:57:05 -0400 (EDT) Received: by mail-lf1-f51.google.com with SMTP id r4-v6so5148195lff.12 for ; Wed, 29 Aug 2018 11:57:04 -0700 (PDT) From: Olga Arkhangelskaia Subject: [tarantool-patches] [PATCH 2/3] box: add replication_sync_lag_timeout Date: Wed, 29 Aug 2018 21:56:41 +0300 Message-Id: <20180829185642.49479-2-krishtal.olja@gmail.com> In-Reply-To: <20180829185642.49479-1-krishtal.olja@gmail.com> References: <20180829185642.49479-1-krishtal.olja@gmail.com> Sender: tarantool-patches-bounce@freelists.org Errors-to: tarantool-patches-bounce@freelists.org Reply-To: tarantool-patches@freelists.org List-help: List-unsubscribe: List-software: Ecartis version 1.0.0 List-Id: tarantool-patches List-subscribe: List-owner: List-post: List-archive: To: tarantool-patches@freelists.org Cc: Olga Arkhangelskaia In scope of gh-3427 we need timeout in case if replicaset will wait for synchronization for too long, or even forever. Default value is TIMEOUT_INFINITY. @TarantoolBot document Title: Introduce new option replication_sync_lag_timeout. After initial bootstrap or after replication configuration changes we need to sync up with replication quorum. Sometimes sync can take too long or replication_sync_lag can be smaller than network latency we replica will stuck in sync loop that can't be cancelled.To avoid this situations replication_sync_lag_timeout can be used. When time set in replication_sync_lag_timeout is passed replica enters orphan state. Can be set dynamically. Default value is TIMEOUT_INFINITY. Closes #3674 --- https://github.com/tarantool/tarantool/issues/3647 https://github.com/tarantool/tarantool/tree/OKriw/gh-3427-replication-no-sync-1.9 src/box/box.cc | 19 +++++++++++++++++++ src/box/box.h | 1 + src/box/lua/cfg.cc | 12 ++++++++++++ src/box/lua/load_cfg.lua | 4 ++++ src/box/replication.cc | 14 ++++++++++---- src/box/replication.h | 6 ++++++ test/box-tap/cfg.test.lua | 9 ++++++++- test/box/admin.result | 2 ++ test/box/cfg.result | 4 ++++ 9 files changed, 66 insertions(+), 5 deletions(-) diff --git a/src/box/box.cc b/src/box/box.cc index 7155ad085..0f8364ebc 100644 --- a/src/box/box.cc +++ b/src/box/box.cc @@ -420,6 +420,17 @@ box_check_replication_sync_lag(void) return lag; } +static double +box_check_replication_sync_lag_timeout(void) +{ + double timeout = cfg_getd_default("replication_sync_lag_timeout", TIMEOUT_INFINITY); + if (timeout <= 0) { + tnt_raise(ClientError, ER_CFG, "replication_sync_lag_timeout", + "the value must be greater than 0"); + } + return timeout; +} + static void box_check_instance_uuid(struct tt_uuid *uuid) { @@ -546,6 +557,7 @@ box_check_config() box_check_replication_connect_timeout(); box_check_replication_connect_quorum(); box_check_replication_sync_lag(); + box_check_replication_sync_lag_timeout(); box_check_readahead(cfg_geti("readahead")); box_check_checkpoint_count(cfg_geti("checkpoint_count")); box_check_wal_max_rows(cfg_geti64("rows_per_wal")); @@ -662,6 +674,12 @@ box_set_replication_sync_lag(void) replication_sync_lag = box_check_replication_sync_lag(); } +void +box_set_replication_sync_lag_timeout(void) +{ + replication_sync_lag_timeout = box_check_replication_sync_lag_timeout(); +} + void box_bind(void) { @@ -1754,6 +1772,7 @@ box_cfg_xc(void) box_set_replication_connect_timeout(); box_set_replication_connect_quorum(); box_set_replication_sync_lag(); + box_set_replication_sync_lag_timeout(); xstream_create(&join_stream, apply_initial_join_row); xstream_create(&subscribe_stream, apply_row); diff --git a/src/box/box.h b/src/box/box.h index 3090fdcdb..f30d0e4cf 100644 --- a/src/box/box.h +++ b/src/box/box.h @@ -177,6 +177,7 @@ void box_set_replication_timeout(void); void box_set_replication_connect_timeout(void); void box_set_replication_connect_quorum(void); void box_set_replication_sync_lag(void); +void box_set_replication_sync_lag_timeout(void); extern "C" { #endif /* defined(__cplusplus) */ diff --git a/src/box/lua/cfg.cc b/src/box/lua/cfg.cc index 5442723b5..bda36a2b9 100644 --- a/src/box/lua/cfg.cc +++ b/src/box/lua/cfg.cc @@ -273,6 +273,17 @@ lbox_cfg_set_replication_sync_lag(struct lua_State *L) return 0; } +static int +lbox_cfg_set_replication_sync_lag_timeout(struct lua_State *L) +{ + try { + box_set_replication_sync_lag_timeout(); + } catch (Exception *) { + luaT_error(L); + } + return 0; +} + void box_lua_cfg_init(struct lua_State *L) { @@ -298,6 +309,7 @@ box_lua_cfg_init(struct lua_State *L) {"cfg_set_replication_connect_timeout", lbox_cfg_set_replication_connect_timeout}, {"cfg_set_replication_connect_quorum", lbox_cfg_set_replication_connect_quorum}, {"cfg_set_replication_sync_lag", lbox_cfg_set_replication_sync_lag}, + {"cfg_set_replication_sync_lag_timeout", lbox_cfg_set_replication_sync_lag_timeout}, {NULL, NULL} }; diff --git a/src/box/lua/load_cfg.lua b/src/box/lua/load_cfg.lua index f803d8987..f77a86cdd 100644 --- a/src/box/lua/load_cfg.lua +++ b/src/box/lua/load_cfg.lua @@ -72,6 +72,7 @@ local default_cfg = { worker_pool_threads = 4, replication_timeout = 1, replication_sync_lag = 10, + replication_sync_lag_timeout = 500 * 365 * 86400, replication_connect_timeout = 30, replication_connect_quorum = nil, -- connect all } @@ -128,6 +129,7 @@ local template_cfg = { worker_pool_threads = 'number', replication_timeout = 'number', replication_sync_lag = 'number', + replication_sync_lag_timeout = 'number', replication_connect_timeout = 'number', replication_connect_quorum = 'number', } @@ -200,6 +202,7 @@ local dynamic_cfg = { replication_connect_timeout = private.cfg_set_replication_connect_timeout, replication_connect_quorum = private.cfg_set_replication_connect_quorum, replication_sync_lag = private.cfg_set_replication_sync_lag, + replication_sync_lag_timeout = private.cfg_set_replication_sync_lag_timeout, instance_uuid = function() if box.cfg.instance_uuid ~= box.info.uuid then box.error(box.error.CFG, 'instance_uuid', @@ -222,6 +225,7 @@ local dynamic_cfg_skip_at_load = { replication_connect_timeout = true, replication_connect_quorum = true, replication_sync_lag = true, + replication_sync_lag_timeout = true, wal_dir_rescan_delay = true, custom_proc_title = true, force_recovery = true, diff --git a/src/box/replication.cc b/src/box/replication.cc index 861ce34ea..731b05faf 100644 --- a/src/box/replication.cc +++ b/src/box/replication.cc @@ -49,7 +49,7 @@ double replication_timeout = 1.0; /* seconds */ double replication_connect_timeout = 30.0; /* seconds */ int replication_connect_quorum = REPLICATION_CONNECT_QUORUM_ALL; double replication_sync_lag = 10.0; /* seconds */ - +double replication_sync_lag_timeout = TIMEOUT_INFINITY; struct replicaset replicaset; static int @@ -673,12 +673,18 @@ replicaset_sync(void) /* * Wait until all connected replicas synchronize up to - * replication_sync_lag + * replication_sync_lag or return on replication_sync_lag_timeout */ while (replicaset.applier.synced < quorum && replicaset.applier.connected + - replicaset.applier.loading >= quorum) - fiber_cond_wait(&replicaset.applier.cond); + replicaset.applier.loading >= quorum) { + if (fiber_cond_wait_timeout(&replicaset.applier.cond, + replication_sync_lag_timeout) != 0) { + say_crit("replication_sync_lag_timeout fired, entering orphan mode"); + return; + } + + } if (replicaset.applier.synced < quorum) { /* diff --git a/src/box/replication.h b/src/box/replication.h index 06a2867b6..71c17dc8e 100644 --- a/src/box/replication.h +++ b/src/box/replication.h @@ -126,6 +126,12 @@ extern int replication_connect_quorum; */ extern double replication_sync_lag; +/** + * Time to wait before enter orphan state in case of unsuccessful + * synchronization. + */ +extern double replication_sync_lag_timeout; + /** * Wait for the given period of time before trying to reconnect * to a master. diff --git a/test/box-tap/cfg.test.lua b/test/box-tap/cfg.test.lua index d315346de..dd883a020 100755 --- a/test/box-tap/cfg.test.lua +++ b/test/box-tap/cfg.test.lua @@ -6,7 +6,7 @@ local socket = require('socket') local fio = require('fio') local uuid = require('uuid') local msgpack = require('msgpack') -test:plan(91) +test:plan(94) -------------------------------------------------------------------------------- -- Invalid values @@ -29,6 +29,8 @@ invalid('replication_timeout', -1) invalid('replication_timeout', 0) invalid('replication_sync_lag', -1) invalid('replication_sync_lag', 0) +invalid('replication_sync_lag_timeout', -1) +invalid('replication_sync_lag_timeout', 0) invalid('replication_connect_timeout', -1) invalid('replication_connect_timeout', 0) invalid('replication_connect_quorum', -1) @@ -100,6 +102,11 @@ status, result = pcall(box.cfg, {replication_sync_lag = 1}) test:ok(status, "dynamic replication_sync_lag") pcall(box.cfg, {repliction_sync_lag = lag}) +timeout = box.cfg.replication_sync_lag_timeout +status, result = pcall(box.cfg, {replication_sync_lag_timeout = 10}) +test:ok(status, "dynamic replication_sync_lag_timeout") +pcall(box.cfg, {repliction_sync_lag_timeout = timeout}) + -------------------------------------------------------------------------------- -- gh-534: Segmentation fault after two bad wal_mode settings -------------------------------------------------------------------------------- diff --git a/test/box/admin.result b/test/box/admin.result index c3e318a6a..d7205b088 100644 --- a/test/box/admin.result +++ b/test/box/admin.result @@ -58,6 +58,8 @@ cfg_filter(box.cfg) - 30 - - replication_sync_lag - 10 + - - replication_sync_lag_timeout + - 15768000000 - - replication_timeout - 1 - - rows_per_wal diff --git a/test/box/cfg.result b/test/box/cfg.result index a2df83310..20a8e0384 100644 --- a/test/box/cfg.result +++ b/test/box/cfg.result @@ -54,6 +54,8 @@ cfg_filter(box.cfg) - 30 - - replication_sync_lag - 10 + - - replication_sync_lag_timeout + - 15768000000 - - replication_timeout - 1 - - rows_per_wal @@ -143,6 +145,8 @@ cfg_filter(box.cfg) - 30 - - replication_sync_lag - 10 + - - replication_sync_lag_timeout + - 15768000000 - - replication_timeout - 1 - - rows_per_wal -- 2.14.3 (Apple Git-98)