* [tarantool-patches] [PATCH vshard 1/1] storage: do read_only + sync on master demotion before box.cfg
@ 2018-03-22 22:11 Vladislav Shpilevoy
2018-03-26 10:38 ` [tarantool-patches] " Georgy Kirichenko
0 siblings, 1 reply; 2+ messages in thread
From: Vladislav Shpilevoy @ 2018-03-22 22:11 UTC (permalink / raw)
To: tarantool-patches; +Cc: georgy, Vladislav Shpilevoy
Before a master is demoted and updated its box.cfg it must turn
read_only mode on and try to sync with slaves. With no sync and
read_only before box.cfg it is possible, that some new data will
appear during box reconfiguration, and it will not be synced.
Closes #84
---
Branch: https://github.com/tarantool/vshard/tree/gh-84-sync-before-cfg
Issue: https://github.com/tarantool/vshard/issues/84
test/misc/reconfigure.result | 14 ++++
test/misc/reconfigure.test.lua | 4 ++
test/storage/demote_sync_errinj.result | 110 +++++++++++++++++++++++++++++++
test/storage/demote_sync_errinj.test.lua | 39 +++++++++++
test/storage/read_only_slave.result | 27 +++++---
test/storage/read_only_slave.test.lua | 16 +++--
test/unit/config.result | 23 +++++++
test/unit/config.test.lua | 11 ++++
vshard/cfg.lua | 7 +-
vshard/consts.lua | 3 +-
vshard/storage/init.lua | 76 +++++++++++++++++++--
11 files changed, 303 insertions(+), 27 deletions(-)
create mode 100644 test/storage/demote_sync_errinj.result
create mode 100644 test/storage/demote_sync_errinj.test.lua
diff --git a/test/misc/reconfigure.result b/test/misc/reconfigure.result
index 0a014a3..c7960b3 100644
--- a/test/misc/reconfigure.result
+++ b/test/misc/reconfigure.result
@@ -100,6 +100,13 @@ not vshard.storage.internal.collect_lua_garbage
---
- true
...
+vshard.storage.internal.sync_timeout
+---
+- 1
+...
+cfg.sync_timeout = 100
+---
+...
cfg.collect_lua_garbage = true
---
...
@@ -120,6 +127,10 @@ not vshard.storage.internal.collect_lua_garbage
---
- true
...
+vshard.storage.internal.sync_timeout
+---
+- 1
+...
vshard.storage.internal.rebalancer_max_receiving ~= 1000
---
- true
@@ -128,6 +139,9 @@ vshard.storage.internal.collect_bucket_garbage_interval ~= 100
---
- true
...
+cfg.sync_timeout = nil
+---
+...
cfg.collect_lua_garbage = nil
---
...
diff --git a/test/misc/reconfigure.test.lua b/test/misc/reconfigure.test.lua
index 033214a..25dc2ca 100644
--- a/test/misc/reconfigure.test.lua
+++ b/test/misc/reconfigure.test.lua
@@ -41,14 +41,18 @@ util.check_error(vshard.storage.cfg, cfg, 'unknow uuid')
-- changed.
--
not vshard.storage.internal.collect_lua_garbage
+vshard.storage.internal.sync_timeout
+cfg.sync_timeout = 100
cfg.collect_lua_garbage = true
cfg.rebalancer_max_receiving = 1000
cfg.collect_bucket_garbage_interval = 100
cfg.invalid_option = 'kek'
vshard.storage.cfg(cfg, names.storage_1_a)
not vshard.storage.internal.collect_lua_garbage
+vshard.storage.internal.sync_timeout
vshard.storage.internal.rebalancer_max_receiving ~= 1000
vshard.storage.internal.collect_bucket_garbage_interval ~= 100
+cfg.sync_timeout = nil
cfg.collect_lua_garbage = nil
cfg.rebalancer_max_receiving = nil
cfg.collect_bucket_garbage_interval = nil
diff --git a/test/storage/demote_sync_errinj.result b/test/storage/demote_sync_errinj.result
new file mode 100644
index 0000000..2d14504
--- /dev/null
+++ b/test/storage/demote_sync_errinj.result
@@ -0,0 +1,110 @@
+test_run = require('test_run').new()
+---
+...
+REPLICASET_1 = { 'storage_1_a', 'storage_1_b' }
+---
+...
+REPLICASET_2 = { 'storage_2_a', 'storage_2_b' }
+---
+...
+test_run:create_cluster(REPLICASET_1, 'storage')
+---
+...
+test_run:create_cluster(REPLICASET_2, 'storage')
+---
+...
+util = require('util')
+---
+...
+util.wait_master(test_run, REPLICASET_1, 'storage_1_a')
+---
+...
+util.wait_master(test_run, REPLICASET_2, 'storage_2_a')
+---
+...
+test_run:switch('storage_1_a')
+---
+- true
+...
+fiber = require('fiber')
+---
+...
+s = box.schema.create_space('test')
+---
+...
+pk = s:create_index('pk')
+---
+...
+vshard.storage.internal.errinj.ERRINJ_CFG_DELAY = true
+---
+...
+cfg.sharding[replicasets[1]].replicas[names.storage_1_b].master = true
+---
+...
+cfg.sharding[replicasets[1]].replicas[names.storage_1_a].master = false
+---
+...
+f = fiber.create(function() vshard.storage.cfg(cfg, names.storage_1_a) end)
+---
+...
+f:status()
+---
+- suspended
+...
+-- Can not write - read only mode is already on.
+s:replace{1}
+---
+- error: Can't modify data because this instance is in read-only mode.
+...
+test_run:switch('storage_1_b')
+---
+- true
+...
+cfg.sharding[replicasets[1]].replicas[names.storage_1_b].master = true
+---
+...
+cfg.sharding[replicasets[1]].replicas[names.storage_1_a].master = false
+---
+...
+vshard.storage.cfg(cfg, names.storage_1_b)
+---
+...
+box.space.test:select{}
+---
+- []
+...
+test_run:switch('storage_1_a')
+---
+- true
+...
+vshard.storage.internal.errinj.ERRINJ_CFG_DELAY = false
+---
+...
+while f:status() ~= 'dead' do fiber.sleep(0.1) end
+---
+...
+s:select{}
+---
+- []
+...
+test_run:switch('storage_1_b')
+---
+- true
+...
+box.space.test:select{}
+---
+- []
+...
+box.space.test:drop()
+---
+...
+test_run:cmd("switch default")
+---
+- true
+...
+test_run:drop_cluster(REPLICASET_2)
+---
+...
+test_run:drop_cluster(REPLICASET_1)
+---
+...
diff --git a/test/storage/demote_sync_errinj.test.lua b/test/storage/demote_sync_errinj.test.lua
new file mode 100644
index 0000000..f12ab60
--- /dev/null
+++ b/test/storage/demote_sync_errinj.test.lua
@@ -0,0 +1,39 @@
+test_run = require('test_run').new()
+REPLICASET_1 = { 'storage_1_a', 'storage_1_b' }
+REPLICASET_2 = { 'storage_2_a', 'storage_2_b' }
+test_run:create_cluster(REPLICASET_1, 'storage')
+test_run:create_cluster(REPLICASET_2, 'storage')
+util = require('util')
+util.wait_master(test_run, REPLICASET_1, 'storage_1_a')
+util.wait_master(test_run, REPLICASET_2, 'storage_2_a')
+
+test_run:switch('storage_1_a')
+fiber = require('fiber')
+s = box.schema.create_space('test')
+pk = s:create_index('pk')
+vshard.storage.internal.errinj.ERRINJ_CFG_DELAY = true
+cfg.sharding[replicasets[1]].replicas[names.storage_1_b].master = true
+cfg.sharding[replicasets[1]].replicas[names.storage_1_a].master = false
+f = fiber.create(function() vshard.storage.cfg(cfg, names.storage_1_a) end)
+f:status()
+-- Can not write - read only mode is already on.
+s:replace{1}
+
+test_run:switch('storage_1_b')
+cfg.sharding[replicasets[1]].replicas[names.storage_1_b].master = true
+cfg.sharding[replicasets[1]].replicas[names.storage_1_a].master = false
+vshard.storage.cfg(cfg, names.storage_1_b)
+box.space.test:select{}
+
+test_run:switch('storage_1_a')
+vshard.storage.internal.errinj.ERRINJ_CFG_DELAY = false
+while f:status() ~= 'dead' do fiber.sleep(0.1) end
+s:select{}
+
+test_run:switch('storage_1_b')
+box.space.test:select{}
+box.space.test:drop()
+
+test_run:cmd("switch default")
+test_run:drop_cluster(REPLICASET_2)
+test_run:drop_cluster(REPLICASET_1)
diff --git a/test/storage/read_only_slave.result b/test/storage/read_only_slave.result
index 9d5c85e..07d3653 100644
--- a/test/storage/read_only_slave.result
+++ b/test/storage/read_only_slave.result
@@ -36,10 +36,17 @@ s = box.schema.create_space('test')
pk = s:create_index('pk')
---
...
+ok = nil
+---
+...
+err = nil
+---
+...
function on_master_enable() s:replace{1} end
---
...
-function on_master_disable() s:replace{2} end
+-- Test, that in disable trigger already can not write.
+function on_master_disable() ok, err = pcall(s.replace, s, {2}) end
---
...
vshard.storage.on_master_enable(on_master_enable)
@@ -85,8 +92,8 @@ vshard.storage.on_master_enable(on_master_enable)
vshard.storage.on_master_disable(on_master_disable)
---
...
--- Yes, there is no 3 or 4, because trigger is set after a replica
--- becames slave.
+-- Yes, there is no 3 or 4, because a trigger on disable always
+-- works in readonly.
s:select{}
---
- - [1]
@@ -128,28 +135,28 @@ box.cfg.read_only
---
- true
...
+ok, err
+---
+- false
+- Can't modify data because this instance is in read-only mode.
+...
fiber = require('fiber')
---
...
-while s:count() ~= 3 do fiber.sleep(0.1) end
+while s:count() ~= 2 do fiber.sleep(0.1) end
---
...
s:select{}
---
- - [1]
- - [2]
- [3]
...
test_run:switch('storage_1_b')
---
- true
...
--- Yes, there is no {2}, because replication source is unset
--- already.
-s:select{}
+s:drop()
---
-- - [1]
- - [3]
...
test_run:cmd("switch default")
---
diff --git a/test/storage/read_only_slave.test.lua b/test/storage/read_only_slave.test.lua
index 3253ad7..d49e6f2 100644
--- a/test/storage/read_only_slave.test.lua
+++ b/test/storage/read_only_slave.test.lua
@@ -11,8 +11,11 @@ test_run:switch('storage_1_a')
box.cfg.read_only
s = box.schema.create_space('test')
pk = s:create_index('pk')
+ok = nil
+err = nil
function on_master_enable() s:replace{1} end
-function on_master_disable() s:replace{2} end
+-- Test, that in disable trigger already can not write.
+function on_master_disable() ok, err = pcall(s.replace, s, {2}) end
vshard.storage.on_master_enable(on_master_enable)
vshard.storage.on_master_disable(on_master_disable)
s:select{}
@@ -27,8 +30,8 @@ function on_master_enable() s:replace{3} end
function on_master_disable() if not box.cfg.read_only then s:replace{4} end end
vshard.storage.on_master_enable(on_master_enable)
vshard.storage.on_master_disable(on_master_disable)
--- Yes, there is no 3 or 4, because trigger is set after a replica
--- becames slave.
+-- Yes, there is no 3 or 4, because a trigger on disable always
+-- works in readonly.
s:select{}
-- Check that after master change the read_only is updated, and
@@ -44,14 +47,13 @@ cfg.sharding[replicasets[1]].replicas[names.storage_1_b].master = true
cfg.sharding[replicasets[1]].replicas[names.storage_1_a].master = false
vshard.storage.cfg(cfg, names.storage_1_a)
box.cfg.read_only
+ok, err
fiber = require('fiber')
-while s:count() ~= 3 do fiber.sleep(0.1) end
+while s:count() ~= 2 do fiber.sleep(0.1) end
s:select{}
test_run:switch('storage_1_b')
--- Yes, there is no {2}, because replication source is unset
--- already.
-s:select{}
+s:drop()
test_run:cmd("switch default")
test_run:drop_cluster(REPLICASET_2)
diff --git a/test/unit/config.result b/test/unit/config.result
index 2a5473c..8b098fb 100644
--- a/test/unit/config.result
+++ b/test/unit/config.result
@@ -419,3 +419,26 @@ cfg.collect_lua_garbage = false
_ = lcfg.check(cfg)
---
...
+--
+-- gh-84: sync before master demotion, and allow to configure
+-- sync timeout.
+--
+cfg.sync_timeout = -100
+---
+...
+check(cfg)
+---
+- Sync timeout must be non-negative number
+...
+cfg.sync_timeout = 0
+---
+...
+_ = lcfg.check(cfg)
+---
+...
+cfg.sync_timeout = 10.5
+---
+...
+_ = lcfg.check(cfg)
+---
+...
diff --git a/test/unit/config.test.lua b/test/unit/config.test.lua
index 140c38c..0990d0e 100644
--- a/test/unit/config.test.lua
+++ b/test/unit/config.test.lua
@@ -166,3 +166,14 @@ cfg.collect_lua_garbage = true
_ = lcfg.check(cfg)
cfg.collect_lua_garbage = false
_ = lcfg.check(cfg)
+
+--
+-- gh-84: sync before master demotion, and allow to configure
+-- sync timeout.
+--
+cfg.sync_timeout = -100
+check(cfg)
+cfg.sync_timeout = 0
+_ = lcfg.check(cfg)
+cfg.sync_timeout = 10.5
+_ = lcfg.check(cfg)
diff --git a/vshard/cfg.lua b/vshard/cfg.lua
index c578af2..9860676 100644
--- a/vshard/cfg.lua
+++ b/vshard/cfg.lua
@@ -198,7 +198,11 @@ local cfg_template = {
{'collect_lua_garbage', {
type = 'boolean', name = 'Garbage Lua collect necessity',
is_optional = true, default = false
- }}
+ }},
+ {'sync_timeout', {
+ type = 'non-negative number', name = 'Sync timeout', is_optional = true,
+ default = consts.DEFAULT_SYNC_TIMEOUT
+ }},
}
--
@@ -227,6 +231,7 @@ local function remove_non_box_options(cfg)
cfg.shard_index = nil
cfg.collect_bucket_garbage_interval = nil
cfg.collect_lua_garbage = nil
+ cfg.sync_timeout = nil
end
return {
diff --git a/vshard/consts.lua b/vshard/consts.lua
index 47651d7..1a0e4ab 100644
--- a/vshard/consts.lua
+++ b/vshard/consts.lua
@@ -20,7 +20,6 @@ return {
REPLICATION_THRESHOLD_FAIL = 10,
DEFAULT_BUCKET_COUNT = 3000;
- BUCKET_SYNC_TIMEOUT = 0.1;
BUCKET_SENT_GARBAGE_DELAY = 0.5;
DEFAULT_REBALANCER_DISBALANCE_THRESHOLD = 1;
REBALANCER_IDLE_INTERVAL = 60 * 60;
@@ -30,7 +29,7 @@ return {
CALL_TIMEOUT_MAX = 64;
FAILOVER_UP_TIMEOUT = 5;
FAILOVER_DOWN_TIMEOUT = 1;
- SYNC_TIMEOUT = 1;
+ DEFAULT_SYNC_TIMEOUT = 1;
RECONNECT_TIMEOUT = 0.5;
DEFAULT_COLLECT_BUCKET_GARBAGE_INTERVAL = 0.5;
RECOVERY_INTERVAL = 5;
diff --git a/vshard/storage/init.lua b/vshard/storage/init.lua
index a0f216e..6d66b05 100644
--- a/vshard/storage/init.lua
+++ b/vshard/storage/init.lua
@@ -35,10 +35,12 @@ if not M then
errinj = {
ERRINJ_BUCKET_FIND_GARBAGE_DELAY = false,
ERRINJ_RELOAD = false,
+ ERRINJ_CFG_DELAY = false,
},
-- This counter is used to restart background fibers with
-- new reloaded code.
module_version = 0,
+ sync_timeout = consts.DEFAULT_SYNC_TIMEOUT,
------------------- Garbage collection -------------------
-- Fiber to remove garbage buckets data.
@@ -305,7 +307,7 @@ local function sync(timeout)
end
log.debug("Synchronizing replicaset...")
- timeout = timeout or consts.SYNC_TIMEOUT
+ timeout = timeout or M.sync_timeout
local vclock = box.info.vclock
local tstart = lfiber.time()
repeat
@@ -539,14 +541,31 @@ local function buckets_discovery()
return ret
end
+--
+-- The only thing, that must be done to abort a master demote is
+-- a reset of read_only.
+--
+local function local_on_master_disable_abort()
+ box.cfg{read_only = false}
+end
+
+--
+-- Prepare to a master demotion. Before it, a master must stop
+-- accept writes, and try to wait until all of its data is
+-- replicated to each slave.
+--
+local function local_on_master_disable_prepare()
+ log.info("Resigning from the replicaset master role...")
+ box.cfg({read_only = true})
+ sync(M.sync_timeout)
+end
+
--
-- This function executes when a master role is removed from local
-- instance during configuration
--
local function local_on_master_disable()
M._on_master_disable:run()
- box.cfg({read_only = true})
- log.verbose("Resigning from the replicaset master role...")
-- Stop garbage collecting
if M.collect_bucket_garbage_fiber ~= nil then
M.collect_bucket_garbage_fiber:cancel()
@@ -558,13 +577,29 @@ local function local_on_master_disable()
M.recovery_fiber = nil
log.info('Recovery stopped')
end
- -- Wait until replicas are synchronized before one another become a new master
- sync(consts.SYNC_TIMEOUT)
log.info("Resigned from the replicaset master role")
end
local collect_garbage_f
+--
+-- The only thing, that must be done to abort a master promotion
+-- is a set read_only back to true.
+--
+local function local_on_master_enable_abort()
+ box.cfg({read_only = true})
+end
+
+--
+-- Promote does not require sync, because a replica can not have a
+-- data, that is not on a current master - the replica is read
+-- only. But read_only can not be set to false here, because
+-- until box.cfg is called, it can not be guaranteed, that the
+-- promotion will be successful.
+--
+local function local_on_master_enable_prepare()
+ log.info("Taking on replicaset master role...")
+end
--
-- This function executes whan a master role is added to local
-- instance during configuration
@@ -572,7 +607,6 @@ local collect_garbage_f
local function local_on_master_enable()
box.cfg({read_only = false})
M._on_master_enable:run()
- log.verbose("Taking on replicaset master role...")
recovery_garbage_receiving_buckets()
-- Start background process to collect garbage.
M.collect_bucket_garbage_fiber =
@@ -1348,9 +1382,37 @@ local function storage_cfg(cfg, this_replica_uuid)
local shard_index = cfg.shard_index
local collect_bucket_garbage_interval = cfg.collect_bucket_garbage_interval
local collect_lua_garbage = cfg.collect_lua_garbage
+ --
+ -- Sync timeout is a special case - it must be updated before
+ -- all other options to allow a user to demote a master with
+ -- a new sync timeout.
+ --
+ local old_sync_timeout = M.sync_timeout
+ M.sync_timeout = cfg.sync_timeout
lcfg.remove_non_box_options(cfg)
- box.cfg(cfg)
+ if was_master and not is_master then
+ local_on_master_disable_prepare()
+ end
+ if not was_master and is_master then
+ local_on_master_enable_prepare()
+ end
+
+ local ok, err = pcall(box.cfg, cfg)
+ while M.errinj.ERRINJ_CFG_DELAY do
+ lfiber.sleep(0.01)
+ end
+ if not ok then
+ M.sync_timeout = old_sync_timeout
+ if was_master and not is_master then
+ local_on_master_disable_abort()
+ end
+ if not was_master and is_master then
+ local_on_master_enable_abort()
+ end
+ error(err)
+ end
+
log.info("Box has been configured")
local uri = luri.parse(this_replica.uri)
box.once("vshard:storage:1", storage_schema_v1, uri.login, uri.password)
--
2.14.3 (Apple Git-98)
^ permalink raw reply [flat|nested] 2+ messages in thread
* [tarantool-patches] Re: [PATCH vshard 1/1] storage: do read_only + sync on master demotion before box.cfg
2018-03-22 22:11 [tarantool-patches] [PATCH vshard 1/1] storage: do read_only + sync on master demotion before box.cfg Vladislav Shpilevoy
@ 2018-03-26 10:38 ` Georgy Kirichenko
0 siblings, 0 replies; 2+ messages in thread
From: Georgy Kirichenko @ 2018-03-26 10:38 UTC (permalink / raw)
To: tarantool-patches; +Cc: Vladislav Shpilevoy
[-- Attachment #1: Type: text/plain, Size: 18355 bytes --]
Likes Ok for me
On Friday, March 23, 2018 1:11:45 AM MSK Vladislav Shpilevoy wrote:
> Before a master is demoted and updated its box.cfg it must turn
> read_only mode on and try to sync with slaves. With no sync and
> read_only before box.cfg it is possible, that some new data will
> appear during box reconfiguration, and it will not be synced.
>
> Closes #84
> ---
>
> Branch: https://github.com/tarantool/vshard/tree/gh-84-sync-before-cfg
> Issue: https://github.com/tarantool/vshard/issues/84
>
> test/misc/reconfigure.result | 14 ++++
> test/misc/reconfigure.test.lua | 4 ++
> test/storage/demote_sync_errinj.result | 110
> +++++++++++++++++++++++++++++++ test/storage/demote_sync_errinj.test.lua |
> 39 +++++++++++
> test/storage/read_only_slave.result | 27 +++++---
> test/storage/read_only_slave.test.lua | 16 +++--
> test/unit/config.result | 23 +++++++
> test/unit/config.test.lua | 11 ++++
> vshard/cfg.lua | 7 +-
> vshard/consts.lua | 3 +-
> vshard/storage/init.lua | 76 +++++++++++++++++++--
> 11 files changed, 303 insertions(+), 27 deletions(-)
> create mode 100644 test/storage/demote_sync_errinj.result
> create mode 100644 test/storage/demote_sync_errinj.test.lua
>
> diff --git a/test/misc/reconfigure.result b/test/misc/reconfigure.result
> index 0a014a3..c7960b3 100644
> --- a/test/misc/reconfigure.result
> +++ b/test/misc/reconfigure.result
> @@ -100,6 +100,13 @@ not vshard.storage.internal.collect_lua_garbage
> ---
> - true
> ...
> +vshard.storage.internal.sync_timeout
> +---
> +- 1
> +...
> +cfg.sync_timeout = 100
> +---
> +...
> cfg.collect_lua_garbage = true
> ---
> ...
> @@ -120,6 +127,10 @@ not vshard.storage.internal.collect_lua_garbage
> ---
> - true
> ...
> +vshard.storage.internal.sync_timeout
> +---
> +- 1
> +...
> vshard.storage.internal.rebalancer_max_receiving ~= 1000
> ---
> - true
> @@ -128,6 +139,9 @@ vshard.storage.internal.collect_bucket_garbage_interval
> ~= 100 ---
> - true
> ...
> +cfg.sync_timeout = nil
> +---
> +...
> cfg.collect_lua_garbage = nil
> ---
> ...
> diff --git a/test/misc/reconfigure.test.lua b/test/misc/reconfigure.test.lua
> index 033214a..25dc2ca 100644
> --- a/test/misc/reconfigure.test.lua
> +++ b/test/misc/reconfigure.test.lua
> @@ -41,14 +41,18 @@ util.check_error(vshard.storage.cfg, cfg, 'unknow uuid')
> -- changed.
> --
> not vshard.storage.internal.collect_lua_garbage
> +vshard.storage.internal.sync_timeout
> +cfg.sync_timeout = 100
> cfg.collect_lua_garbage = true
> cfg.rebalancer_max_receiving = 1000
> cfg.collect_bucket_garbage_interval = 100
> cfg.invalid_option = 'kek'
> vshard.storage.cfg(cfg, names.storage_1_a)
> not vshard.storage.internal.collect_lua_garbage
> +vshard.storage.internal.sync_timeout
> vshard.storage.internal.rebalancer_max_receiving ~= 1000
> vshard.storage.internal.collect_bucket_garbage_interval ~= 100
> +cfg.sync_timeout = nil
> cfg.collect_lua_garbage = nil
> cfg.rebalancer_max_receiving = nil
> cfg.collect_bucket_garbage_interval = nil
> diff --git a/test/storage/demote_sync_errinj.result
> b/test/storage/demote_sync_errinj.result new file mode 100644
> index 0000000..2d14504
> --- /dev/null
> +++ b/test/storage/demote_sync_errinj.result
> @@ -0,0 +1,110 @@
> +test_run = require('test_run').new()
> +---
> +...
> +REPLICASET_1 = { 'storage_1_a', 'storage_1_b' }
> +---
> +...
> +REPLICASET_2 = { 'storage_2_a', 'storage_2_b' }
> +---
> +...
> +test_run:create_cluster(REPLICASET_1, 'storage')
> +---
> +...
> +test_run:create_cluster(REPLICASET_2, 'storage')
> +---
> +...
> +util = require('util')
> +---
> +...
> +util.wait_master(test_run, REPLICASET_1, 'storage_1_a')
> +---
> +...
> +util.wait_master(test_run, REPLICASET_2, 'storage_2_a')
> +---
> +...
> +test_run:switch('storage_1_a')
> +---
> +- true
> +...
> +fiber = require('fiber')
> +---
> +...
> +s = box.schema.create_space('test')
> +---
> +...
> +pk = s:create_index('pk')
> +---
> +...
> +vshard.storage.internal.errinj.ERRINJ_CFG_DELAY = true
> +---
> +...
> +cfg.sharding[replicasets[1]].replicas[names.storage_1_b].master = true
> +---
> +...
> +cfg.sharding[replicasets[1]].replicas[names.storage_1_a].master = false
> +---
> +...
> +f = fiber.create(function() vshard.storage.cfg(cfg, names.storage_1_a) end)
> +---
> +...
> +f:status()
> +---
> +- suspended
> +...
> +-- Can not write - read only mode is already on.
> +s:replace{1}
> +---
> +- error: Can't modify data because this instance is in read-only mode.
> +...
> +test_run:switch('storage_1_b')
> +---
> +- true
> +...
> +cfg.sharding[replicasets[1]].replicas[names.storage_1_b].master = true
> +---
> +...
> +cfg.sharding[replicasets[1]].replicas[names.storage_1_a].master = false
> +---
> +...
> +vshard.storage.cfg(cfg, names.storage_1_b)
> +---
> +...
> +box.space.test:select{}
> +---
> +- []
> +...
> +test_run:switch('storage_1_a')
> +---
> +- true
> +...
> +vshard.storage.internal.errinj.ERRINJ_CFG_DELAY = false
> +---
> +...
> +while f:status() ~= 'dead' do fiber.sleep(0.1) end
> +---
> +...
> +s:select{}
> +---
> +- []
> +...
> +test_run:switch('storage_1_b')
> +---
> +- true
> +...
> +box.space.test:select{}
> +---
> +- []
> +...
> +box.space.test:drop()
> +---
> +...
> +test_run:cmd("switch default")
> +---
> +- true
> +...
> +test_run:drop_cluster(REPLICASET_2)
> +---
> +...
> +test_run:drop_cluster(REPLICASET_1)
> +---
> +...
> diff --git a/test/storage/demote_sync_errinj.test.lua
> b/test/storage/demote_sync_errinj.test.lua new file mode 100644
> index 0000000..f12ab60
> --- /dev/null
> +++ b/test/storage/demote_sync_errinj.test.lua
> @@ -0,0 +1,39 @@
> +test_run = require('test_run').new()
> +REPLICASET_1 = { 'storage_1_a', 'storage_1_b' }
> +REPLICASET_2 = { 'storage_2_a', 'storage_2_b' }
> +test_run:create_cluster(REPLICASET_1, 'storage')
> +test_run:create_cluster(REPLICASET_2, 'storage')
> +util = require('util')
> +util.wait_master(test_run, REPLICASET_1, 'storage_1_a')
> +util.wait_master(test_run, REPLICASET_2, 'storage_2_a')
> +
> +test_run:switch('storage_1_a')
> +fiber = require('fiber')
> +s = box.schema.create_space('test')
> +pk = s:create_index('pk')
> +vshard.storage.internal.errinj.ERRINJ_CFG_DELAY = true
> +cfg.sharding[replicasets[1]].replicas[names.storage_1_b].master = true
> +cfg.sharding[replicasets[1]].replicas[names.storage_1_a].master = false
> +f = fiber.create(function() vshard.storage.cfg(cfg, names.storage_1_a) end)
> +f:status()
> +-- Can not write - read only mode is already on.
> +s:replace{1}
> +
> +test_run:switch('storage_1_b')
> +cfg.sharding[replicasets[1]].replicas[names.storage_1_b].master = true
> +cfg.sharding[replicasets[1]].replicas[names.storage_1_a].master = false
> +vshard.storage.cfg(cfg, names.storage_1_b)
> +box.space.test:select{}
> +
> +test_run:switch('storage_1_a')
> +vshard.storage.internal.errinj.ERRINJ_CFG_DELAY = false
> +while f:status() ~= 'dead' do fiber.sleep(0.1) end
> +s:select{}
> +
> +test_run:switch('storage_1_b')
> +box.space.test:select{}
> +box.space.test:drop()
> +
> +test_run:cmd("switch default")
> +test_run:drop_cluster(REPLICASET_2)
> +test_run:drop_cluster(REPLICASET_1)
> diff --git a/test/storage/read_only_slave.result
> b/test/storage/read_only_slave.result index 9d5c85e..07d3653 100644
> --- a/test/storage/read_only_slave.result
> +++ b/test/storage/read_only_slave.result
> @@ -36,10 +36,17 @@ s = box.schema.create_space('test')
> pk = s:create_index('pk')
> ---
> ...
> +ok = nil
> +---
> +...
> +err = nil
> +---
> +...
> function on_master_enable() s:replace{1} end
> ---
> ...
> -function on_master_disable() s:replace{2} end
> +-- Test, that in disable trigger already can not write.
> +function on_master_disable() ok, err = pcall(s.replace, s, {2}) end
> ---
> ...
> vshard.storage.on_master_enable(on_master_enable)
> @@ -85,8 +92,8 @@ vshard.storage.on_master_enable(on_master_enable)
> vshard.storage.on_master_disable(on_master_disable)
> ---
> ...
> --- Yes, there is no 3 or 4, because trigger is set after a replica
> --- becames slave.
> +-- Yes, there is no 3 or 4, because a trigger on disable always
> +-- works in readonly.
> s:select{}
> ---
> - - [1]
> @@ -128,28 +135,28 @@ box.cfg.read_only
> ---
> - true
> ...
> +ok, err
> +---
> +- false
> +- Can't modify data because this instance is in read-only mode.
> +...
> fiber = require('fiber')
> ---
> ...
> -while s:count() ~= 3 do fiber.sleep(0.1) end
> +while s:count() ~= 2 do fiber.sleep(0.1) end
> ---
> ...
> s:select{}
> ---
> - - [1]
> - - [2]
> - [3]
> ...
> test_run:switch('storage_1_b')
> ---
> - true
> ...
> --- Yes, there is no {2}, because replication source is unset
> --- already.
> -s:select{}
> +s:drop()
> ---
> -- - [1]
> - - [3]
> ...
> test_run:cmd("switch default")
> ---
> diff --git a/test/storage/read_only_slave.test.lua
> b/test/storage/read_only_slave.test.lua index 3253ad7..d49e6f2 100644
> --- a/test/storage/read_only_slave.test.lua
> +++ b/test/storage/read_only_slave.test.lua
> @@ -11,8 +11,11 @@ test_run:switch('storage_1_a')
> box.cfg.read_only
> s = box.schema.create_space('test')
> pk = s:create_index('pk')
> +ok = nil
> +err = nil
> function on_master_enable() s:replace{1} end
> -function on_master_disable() s:replace{2} end
> +-- Test, that in disable trigger already can not write.
> +function on_master_disable() ok, err = pcall(s.replace, s, {2}) end
> vshard.storage.on_master_enable(on_master_enable)
> vshard.storage.on_master_disable(on_master_disable)
> s:select{}
> @@ -27,8 +30,8 @@ function on_master_enable() s:replace{3} end
> function on_master_disable() if not box.cfg.read_only then s:replace{4} end
> end vshard.storage.on_master_enable(on_master_enable)
> vshard.storage.on_master_disable(on_master_disable)
> --- Yes, there is no 3 or 4, because trigger is set after a replica
> --- becames slave.
> +-- Yes, there is no 3 or 4, because a trigger on disable always
> +-- works in readonly.
> s:select{}
>
> -- Check that after master change the read_only is updated, and
> @@ -44,14 +47,13 @@
> cfg.sharding[replicasets[1]].replicas[names.storage_1_b].master = true
> cfg.sharding[replicasets[1]].replicas[names.storage_1_a].master = false
> vshard.storage.cfg(cfg, names.storage_1_a)
> box.cfg.read_only
> +ok, err
> fiber = require('fiber')
> -while s:count() ~= 3 do fiber.sleep(0.1) end
> +while s:count() ~= 2 do fiber.sleep(0.1) end
> s:select{}
>
> test_run:switch('storage_1_b')
> --- Yes, there is no {2}, because replication source is unset
> --- already.
> -s:select{}
> +s:drop()
>
> test_run:cmd("switch default")
> test_run:drop_cluster(REPLICASET_2)
> diff --git a/test/unit/config.result b/test/unit/config.result
> index 2a5473c..8b098fb 100644
> --- a/test/unit/config.result
> +++ b/test/unit/config.result
> @@ -419,3 +419,26 @@ cfg.collect_lua_garbage = false
> _ = lcfg.check(cfg)
> ---
> ...
> +--
> +-- gh-84: sync before master demotion, and allow to configure
> +-- sync timeout.
> +--
> +cfg.sync_timeout = -100
> +---
> +...
> +check(cfg)
> +---
> +- Sync timeout must be non-negative number
> +...
> +cfg.sync_timeout = 0
> +---
> +...
> +_ = lcfg.check(cfg)
> +---
> +...
> +cfg.sync_timeout = 10.5
> +---
> +...
> +_ = lcfg.check(cfg)
> +---
> +...
> diff --git a/test/unit/config.test.lua b/test/unit/config.test.lua
> index 140c38c..0990d0e 100644
> --- a/test/unit/config.test.lua
> +++ b/test/unit/config.test.lua
> @@ -166,3 +166,14 @@ cfg.collect_lua_garbage = true
> _ = lcfg.check(cfg)
> cfg.collect_lua_garbage = false
> _ = lcfg.check(cfg)
> +
> +--
> +-- gh-84: sync before master demotion, and allow to configure
> +-- sync timeout.
> +--
> +cfg.sync_timeout = -100
> +check(cfg)
> +cfg.sync_timeout = 0
> +_ = lcfg.check(cfg)
> +cfg.sync_timeout = 10.5
> +_ = lcfg.check(cfg)
> diff --git a/vshard/cfg.lua b/vshard/cfg.lua
> index c578af2..9860676 100644
> --- a/vshard/cfg.lua
> +++ b/vshard/cfg.lua
> @@ -198,7 +198,11 @@ local cfg_template = {
> {'collect_lua_garbage', {
> type = 'boolean', name = 'Garbage Lua collect necessity',
> is_optional = true, default = false
> - }}
> + }},
> + {'sync_timeout', {
> + type = 'non-negative number', name = 'Sync timeout', is_optional =
> true, + default = consts.DEFAULT_SYNC_TIMEOUT
> + }},
> }
>
> --
> @@ -227,6 +231,7 @@ local function remove_non_box_options(cfg)
> cfg.shard_index = nil
> cfg.collect_bucket_garbage_interval = nil
> cfg.collect_lua_garbage = nil
> + cfg.sync_timeout = nil
> end
>
> return {
> diff --git a/vshard/consts.lua b/vshard/consts.lua
> index 47651d7..1a0e4ab 100644
> --- a/vshard/consts.lua
> +++ b/vshard/consts.lua
> @@ -20,7 +20,6 @@ return {
> REPLICATION_THRESHOLD_FAIL = 10,
>
> DEFAULT_BUCKET_COUNT = 3000;
> - BUCKET_SYNC_TIMEOUT = 0.1;
> BUCKET_SENT_GARBAGE_DELAY = 0.5;
> DEFAULT_REBALANCER_DISBALANCE_THRESHOLD = 1;
> REBALANCER_IDLE_INTERVAL = 60 * 60;
> @@ -30,7 +29,7 @@ return {
> CALL_TIMEOUT_MAX = 64;
> FAILOVER_UP_TIMEOUT = 5;
> FAILOVER_DOWN_TIMEOUT = 1;
> - SYNC_TIMEOUT = 1;
> + DEFAULT_SYNC_TIMEOUT = 1;
> RECONNECT_TIMEOUT = 0.5;
> DEFAULT_COLLECT_BUCKET_GARBAGE_INTERVAL = 0.5;
> RECOVERY_INTERVAL = 5;
> diff --git a/vshard/storage/init.lua b/vshard/storage/init.lua
> index a0f216e..6d66b05 100644
> --- a/vshard/storage/init.lua
> +++ b/vshard/storage/init.lua
> @@ -35,10 +35,12 @@ if not M then
> errinj = {
> ERRINJ_BUCKET_FIND_GARBAGE_DELAY = false,
> ERRINJ_RELOAD = false,
> + ERRINJ_CFG_DELAY = false,
> },
> -- This counter is used to restart background fibers with
> -- new reloaded code.
> module_version = 0,
> + sync_timeout = consts.DEFAULT_SYNC_TIMEOUT,
>
> ------------------- Garbage collection -------------------
> -- Fiber to remove garbage buckets data.
> @@ -305,7 +307,7 @@ local function sync(timeout)
> end
>
> log.debug("Synchronizing replicaset...")
> - timeout = timeout or consts.SYNC_TIMEOUT
> + timeout = timeout or M.sync_timeout
> local vclock = box.info.vclock
> local tstart = lfiber.time()
> repeat
> @@ -539,14 +541,31 @@ local function buckets_discovery()
> return ret
> end
>
> +--
> +-- The only thing, that must be done to abort a master demote is
> +-- a reset of read_only.
> +--
> +local function local_on_master_disable_abort()
> + box.cfg{read_only = false}
> +end
> +
> +--
> +-- Prepare to a master demotion. Before it, a master must stop
> +-- accept writes, and try to wait until all of its data is
> +-- replicated to each slave.
> +--
> +local function local_on_master_disable_prepare()
> + log.info("Resigning from the replicaset master role...")
> + box.cfg({read_only = true})
> + sync(M.sync_timeout)
> +end
> +
> --
> -- This function executes when a master role is removed from local
> -- instance during configuration
> --
> local function local_on_master_disable()
> M._on_master_disable:run()
> - box.cfg({read_only = true})
> - log.verbose("Resigning from the replicaset master role...")
> -- Stop garbage collecting
> if M.collect_bucket_garbage_fiber ~= nil then
> M.collect_bucket_garbage_fiber:cancel()
> @@ -558,13 +577,29 @@ local function local_on_master_disable()
> M.recovery_fiber = nil
> log.info('Recovery stopped')
> end
> - -- Wait until replicas are synchronized before one another become a new
> master - sync(consts.SYNC_TIMEOUT)
> log.info("Resigned from the replicaset master role")
> end
>
> local collect_garbage_f
>
> +--
> +-- The only thing, that must be done to abort a master promotion
> +-- is a set read_only back to true.
> +--
> +local function local_on_master_enable_abort()
> + box.cfg({read_only = true})
> +end
> +
> +--
> +-- Promote does not require sync, because a replica can not have a
> +-- data, that is not on a current master - the replica is read
> +-- only. But read_only can not be set to false here, because
> +-- until box.cfg is called, it can not be guaranteed, that the
> +-- promotion will be successful.
> +--
> +local function local_on_master_enable_prepare()
> + log.info("Taking on replicaset master role...")
> +end
> --
> -- This function executes whan a master role is added to local
> -- instance during configuration
> @@ -572,7 +607,6 @@ local collect_garbage_f
> local function local_on_master_enable()
> box.cfg({read_only = false})
> M._on_master_enable:run()
> - log.verbose("Taking on replicaset master role...")
> recovery_garbage_receiving_buckets()
> -- Start background process to collect garbage.
> M.collect_bucket_garbage_fiber =
> @@ -1348,9 +1382,37 @@ local function storage_cfg(cfg, this_replica_uuid)
> local shard_index = cfg.shard_index
> local collect_bucket_garbage_interval =
> cfg.collect_bucket_garbage_interval local collect_lua_garbage =
> cfg.collect_lua_garbage
> + --
> + -- Sync timeout is a special case - it must be updated before
> + -- all other options to allow a user to demote a master with
> + -- a new sync timeout.
> + --
> + local old_sync_timeout = M.sync_timeout
> + M.sync_timeout = cfg.sync_timeout
> lcfg.remove_non_box_options(cfg)
>
> - box.cfg(cfg)
> + if was_master and not is_master then
> + local_on_master_disable_prepare()
> + end
> + if not was_master and is_master then
> + local_on_master_enable_prepare()
> + end
> +
> + local ok, err = pcall(box.cfg, cfg)
> + while M.errinj.ERRINJ_CFG_DELAY do
> + lfiber.sleep(0.01)
> + end
> + if not ok then
> + M.sync_timeout = old_sync_timeout
> + if was_master and not is_master then
> + local_on_master_disable_abort()
> + end
> + if not was_master and is_master then
> + local_on_master_enable_abort()
> + end
> + error(err)
> + end
> +
> log.info("Box has been configured")
> local uri = luri.parse(this_replica.uri)
> box.once("vshard:storage:1", storage_schema_v1, uri.login,
> uri.password)
[-- Attachment #2: This is a digitally signed message part. --]
[-- Type: application/pgp-signature, Size: 488 bytes --]
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2018-03-26 10:38 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-03-22 22:11 [tarantool-patches] [PATCH vshard 1/1] storage: do read_only + sync on master demotion before box.cfg Vladislav Shpilevoy
2018-03-26 10:38 ` [tarantool-patches] " Georgy Kirichenko
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox