[Tarantool-patches] [PATCH vshard 7/7] router: introduce discovery mode 'once'

Vladislav Shpilevoy v.shpilevoy at tarantool.org
Fri May 1 03:16:34 MSK 2020


Closes #210

@TarantoolBot document
Title: vshard.router.discovery_set() and new config option

```Lua
vshard.router.discovery_set(mode)
```
Turns on/off the background discovery fiber used by the router to
find buckets.

When `mode` is `"on"`, the discovery fiber works all the lifetime
of the router. Even after all buckets are discovered, it will
still go to storages and download their buckets with some big
period. This is useful, if bucket topology changes often and
bucket count is not big. Router will keep its route table up to
date even when no requests are processed. This is the default
value.

When `mode` is `"off"`, discovery is disabled completely.

When `mode` is `"once"`, discovery will start, find locations of
all the buckets, and then the discovery fiber is terminated. This
is good for large bucket count and for rarely clusters, where
rebalancing happens rarely.

The method is good to enable/disable discovery after the router is
already started, but discovery is enabled by default. You may want
to never enable it even for a short time - then specify
`discovery_mode` option in the configuration. It takes the same
values as `vshard.router.discovery_set(mode)`.

You may decide to turn off discovery or make it 'once' if you have
many routers, or tons of buckets (hundreds of thousands and more),
and you see that the discovery process consumes notable CPU % on
routers and storages. In that case it may be wise to turn off
discovery when there is no rebalancing in the cluster. And turn it
on for new routers, as well as for all routers when rebalancing is
started.
---
 test/router/router2.result   | 80 +++++++++++++++++++++++++++++++++++-
 test/router/router2.test.lua | 34 ++++++++++++++-
 vshard/cfg.lua               |  6 +--
 vshard/router/init.lua       | 15 ++++++-
 4 files changed, 129 insertions(+), 6 deletions(-)

diff --git a/test/router/router2.result b/test/router/router2.result
index 556f749..0f93fc4 100644
--- a/test/router/router2.result
+++ b/test/router/router2.result
@@ -113,16 +113,94 @@ vshard.router.static.discovery_fiber:status()
  | - suspended
  | ...
 
-f1:status(), f2, f3:status(), f4:status(), f5, f6:status()
+cfg.discovery_mode = 'once'
+ | ---
+ | ...
+vshard.router.cfg(cfg)
+ | ---
+ | ...
+f7 = vshard.router.static.discovery_fiber
+ | ---
+ | ...
+vshard.router.static.discovery_fiber:status()
+ | ---
+ | - suspended
+ | ...
+
+f1:status(), f2, f3:status(), f4:status(), f5, f6:status(), f7:status()
  | ---
  | - dead
  | - null
  | - dead
  | - dead
  | - null
+ | - dead
  | - suspended
  | ...
 
+vshard.router.bootstrap()
+ | ---
+ | - true
+ | ...
+function continue_discovery()                                                   \
+    local res = vshard.router.info().bucket.unknown == 0                        \
+    if not res then                                                             \
+        vshard.router.discovery_wakeup()                                        \
+    end                                                                         \
+    return res                                                                  \
+end
+ | ---
+ | ...
+
+-- With 'on' discovery works infinitely.
+vshard.router._route_map_clear()
+ | ---
+ | ...
+vshard.router.discovery_set('on')
+ | ---
+ | ...
+test_run:wait_cond(continue_discovery)
+ | ---
+ | - true
+ | ...
+vshard.router.info().bucket.unknown
+ | ---
+ | - 0
+ | ...
+vshard.router.static.discovery_fiber:status()
+ | ---
+ | - suspended
+ | ...
+
+-- With 'once' discovery mode the discovery fiber deletes self
+-- after full discovery.
+vshard.router._route_map_clear()
+ | ---
+ | ...
+vshard.router.discovery_set('once')
+ | ---
+ | ...
+test_run:wait_cond(continue_discovery)
+ | ---
+ | - true
+ | ...
+vshard.router.info().bucket.unknown
+ | ---
+ | - 0
+ | ...
+vshard.router.static.discovery_fiber
+ | ---
+ | - null
+ | ...
+-- Second set won't do anything.
+vshard.router.discovery_set('once')
+ | ---
+ | ...
+vshard.router.static.discovery_fiber
+ | ---
+ | - null
+ | ...
+
 _ = test_run:switch("default")
  | ---
  | ...
diff --git a/test/router/router2.test.lua b/test/router/router2.test.lua
index 33f4d3e..10f82fd 100644
--- a/test/router/router2.test.lua
+++ b/test/router/router2.test.lua
@@ -41,7 +41,39 @@ vshard.router.discovery_set('on')
 f6 = vshard.router.static.discovery_fiber
 vshard.router.static.discovery_fiber:status()
 
-f1:status(), f2, f3:status(), f4:status(), f5, f6:status()
+cfg.discovery_mode = 'once'
+vshard.router.cfg(cfg)
+f7 = vshard.router.static.discovery_fiber
+vshard.router.static.discovery_fiber:status()
+
+f1:status(), f2, f3:status(), f4:status(), f5, f6:status(), f7:status()
+
+vshard.router.bootstrap()
+function continue_discovery()                                                   \
+    local res = vshard.router.info().bucket.unknown == 0                        \
+    if not res then                                                             \
+        vshard.router.discovery_wakeup()                                        \
+    end                                                                         \
+    return res                                                                  \
+end
+
+-- With 'on' discovery works infinitely.
+vshard.router._route_map_clear()
+vshard.router.discovery_set('on')
+test_run:wait_cond(continue_discovery)
+vshard.router.info().bucket.unknown
+vshard.router.static.discovery_fiber:status()
+
+-- With 'once' discovery mode the discovery fiber deletes self
+-- after full discovery.
+vshard.router._route_map_clear()
+vshard.router.discovery_set('once')
+test_run:wait_cond(continue_discovery)
+vshard.router.info().bucket.unknown
+vshard.router.static.discovery_fiber
+-- Second set won't do anything.
+vshard.router.discovery_set('once')
+vshard.router.static.discovery_fiber
 
 _ = test_run:switch("default")
 _ = test_run:cmd("stop server router_1")
diff --git a/vshard/cfg.lua b/vshard/cfg.lua
index 8a3e812..1ef1899 100644
--- a/vshard/cfg.lua
+++ b/vshard/cfg.lua
@@ -152,8 +152,8 @@ local function cfg_check_weights(weights)
 end
 
 local function check_discovery_mode(value)
-    if value ~= 'on' and value ~= 'off' then
-        error("Expected 'on' or 'off' for discovery_mode")
+    if value ~= 'on' and value ~= 'off' and value ~= 'once' then
+        error("Expected 'on', 'off', or 'once' for discovery_mode")
     end
 end
 
@@ -262,7 +262,7 @@ local cfg_template = {
         is_optional = true, default = consts.DEFAULT_FAILOVER_PING_TIMEOUT
     },
     discovery_mode = {
-        type = 'string', name = 'Discovery mode: on, off',
+        type = 'string', name = 'Discovery mode: on, off, once',
         is_optional = true, default = 'on', check = check_discovery_mode
     },
 }
diff --git a/vshard/router/init.lua b/vshard/router/init.lua
index 26ea85b..927a38e 100644
--- a/vshard/router/init.lua
+++ b/vshard/router/init.lua
@@ -249,7 +249,7 @@ if util.version_is_at_least(1, 10, 0) then
 --
 discovery_f = function(router)
     local module_version = M.module_version
-    assert(router.discovery_mode == 'on')
+    assert(router.discovery_mode == 'on' or router.discovery_mode == 'once')
     local iterators = {}
     local opts = {is_async = true}
     local mode
@@ -342,6 +342,13 @@ discovery_f = function(router)
             unknown_bucket_count =
                 router.total_bucket_count - router.known_bucket_count
             if unknown_bucket_count == 0 then
+                if router.discovery_mode == 'once' then
+                    log.info("Discovery mode is 'once', and all is "..
+                             "discovered - shut down the discovery process")
+                    router.discovery_fiber = nil
+                    lfiber.self():cancel()
+                    return
+                end
                 if mode ~= 'idle' then
                     log.info('Discovery enters idle mode, all buckets are '..
                              'known. Discovery works with %s seconds '..
@@ -437,6 +444,12 @@ local function discovery_set(router, new_mode)
     if new_mode == 'off' then
         return
     end
+    if new_mode == 'once' and
+       router.total_bucket_count == router.known_bucket_count then
+        -- 'Once' discovery is supposed to stop working when all
+        -- is found. But it is the case already. So nothing to do.
+        return
+    end
     router.discovery_fiber = util.reloadable_fiber_create(
         'vshard.discovery.' .. router.name, M, 'discovery_f', router)
 end
-- 
2.21.1 (Apple Git-122.3)



More information about the Tarantool-patches mailing list