[Tarantool-patches] [PATCH 1/1] router: update known bucket count when rs removed
Vladislav Shpilevoy
v.shpilevoy at tarantool.org
Fri Aug 7 01:11:46 MSK 2020
When replicaset was removed, router didn't update
router.known_bucket_count value. It is used by discovery to decide
when to turn on aggressive discovery. And by router.info() to
provide alerts, change status.
As a result, in case of replicaset removal, known_bucket_count
was left bigger than it actually is, and it could be fixed only
by restart.
Discovery could become non-agressive too early, and router.info()
could show weird alerts about bucke_count being configured
differently on storages and router. Besides, router.info().bucket
was showing not realistic numbers.
---
Branch: http://github.com/tarantool/tarantool/tree/gerold103/router-known-buckets
test/router/router2.result | 61 ++++++++++++++++++++++++++++++++++++
test/router/router2.test.lua | 24 ++++++++++++++
vshard/router/init.lua | 3 ++
3 files changed, 88 insertions(+)
diff --git a/test/router/router2.result b/test/router/router2.result
index 40d6bff..492421b 100644
--- a/test/router/router2.result
+++ b/test/router/router2.result
@@ -250,6 +250,67 @@ vshard.router.static.discovery_fiber
| - null
| ...
+--
+-- Known bucket count should be updated properly when replicaset
+-- is removed from the config.
+--
+vshard.router.info().bucket
+ | ---
+ | - unreachable: 0
+ | available_ro: 0
+ | unknown: 0
+ | available_rw: 3000
+ | ...
+rs1_uuid = util.replicasets[1]
+ | ---
+ | ...
+rs1 = cfg.sharding[rs1_uuid]
+ | ---
+ | ...
+cfg.sharding[rs1_uuid] = nil
+ | ---
+ | ...
+vshard.router.cfg(cfg)
+ | ---
+ | ...
+vshard.router.info().bucket
+ | ---
+ | - unreachable: 0
+ | available_ro: 0
+ | unknown: 1500
+ | available_rw: 1500
+ | ...
+cfg.sharding[rs1_uuid] = rs1
+ | ---
+ | ...
+vshard.router.cfg(cfg)
+ | ---
+ | ...
+vshard.router.discovery_set('on')
+ | ---
+ | ...
+function wait_all_rw() \
+ local total = vshard.router.bucket_count() \
+ local res = vshard.router.info().bucket.available_rw == total \
+ if not res then \
+ vshard.router.discovery_wakeup() \
+ end \
+ return res \
+end
+ | ---
+ | ...
+test_run:wait_cond(wait_all_rw)
+ | ---
+ | - true
+ | ...
+vshard.router.info().bucket
+ | ---
+ | - unreachable: 0
+ | available_ro: 0
+ | unknown: 0
+ | available_rw: 3000
+ | ...
+
_ = test_run:switch("default")
| ---
| ...
diff --git a/test/router/router2.test.lua b/test/router/router2.test.lua
index afdf4e1..72c99e6 100644
--- a/test/router/router2.test.lua
+++ b/test/router/router2.test.lua
@@ -95,6 +95,30 @@ vshard.router.static.discovery_fiber
vshard.router.discovery_set('once')
vshard.router.static.discovery_fiber
+--
+-- Known bucket count should be updated properly when replicaset
+-- is removed from the config.
+--
+vshard.router.info().bucket
+rs1_uuid = util.replicasets[1]
+rs1 = cfg.sharding[rs1_uuid]
+cfg.sharding[rs1_uuid] = nil
+vshard.router.cfg(cfg)
+vshard.router.info().bucket
+cfg.sharding[rs1_uuid] = rs1
+vshard.router.cfg(cfg)
+vshard.router.discovery_set('on')
+function wait_all_rw() \
+ local total = vshard.router.bucket_count() \
+ local res = vshard.router.info().bucket.available_rw == total \
+ if not res then \
+ vshard.router.discovery_wakeup() \
+ end \
+ return res \
+end
+test_run:wait_cond(wait_all_rw)
+vshard.router.info().bucket
+
_ = test_run:switch("default")
_ = test_run:cmd("stop server router_1")
_ = test_run:cmd("cleanup server router_1")
diff --git a/vshard/router/init.lua b/vshard/router/init.lua
index d8c9246..0d94210 100644
--- a/vshard/router/init.lua
+++ b/vshard/router/init.lua
@@ -908,14 +908,17 @@ local function router_cfg(router, cfg, is_reload)
router.failover_ping_timeout = vshard_cfg.failover_ping_timeout
router.sync_timeout = vshard_cfg.sync_timeout
local old_route_map = router.route_map
+ local known_bucket_count = 0
router.route_map = table_new(router.total_bucket_count, 0)
for bucket, rs in pairs(old_route_map) do
local new_rs = router.replicasets[rs.uuid]
if new_rs then
router.route_map[bucket] = new_rs
new_rs.bucket_count = new_rs.bucket_count + 1
+ known_bucket_count = known_bucket_count + 1
end
end
+ router.known_bucket_count = known_bucket_count
if router.failover_fiber == nil then
router.failover_fiber = util.reloadable_fiber_create(
'vshard.failover.' .. router.name, M, 'failover_f', router)
--
2.21.1 (Apple Git-122.3)
More information about the Tarantool-patches
mailing list