[Tarantool-patches] [PATCH vshard 05/11] util: introduce safe fiber_cond_wait()

Vladislav Shpilevoy v.shpilevoy at tarantool.org
Tue Feb 23 03:15:43 MSK 2021


Original fiber_cond:wait() has a few issues:

- Raises exception when fiber is canceled, which makes it
  inapplicable in exception-intolerant code;

- Raises an ugly misleading usage exception when timeout is
  negative which easily can happen if the caller's code calls
  wait() multiple times retrying something and does not want to
  bother with doing a part of cond's job;

- When fails, pushes an error of type 'TimedOut' which is not the
  same as 'ClientError' with box.error.TIMEOUT code. The latter is
  used wider, at least in vshard.

The patch introduces util.fiber_cond_wait() function which fixes
the mentioned issues.

It is needed in the future map-reduce subsystem modules revolving
around waiting on various conditions.

Part of #147
---
 test/unit/util.result   | 82 +++++++++++++++++++++++++++++++++++++++++
 test/unit/util.test.lua | 31 ++++++++++++++++
 vshard/util.lua         | 35 ++++++++++++++++++
 3 files changed, 148 insertions(+)

diff --git a/test/unit/util.result b/test/unit/util.result
index 42a361a..679c087 100644
--- a/test/unit/util.result
+++ b/test/unit/util.result
@@ -184,3 +184,85 @@ t ~= res
 ---
 - true
 ...
+--
+-- Exception-safe cond wait.
+--
+cond_wait = util.fiber_cond_wait
+---
+...
+cond = fiber.cond()
+---
+...
+ok, err = cond_wait(cond, -1)
+---
+...
+assert(not ok and err.message)
+---
+- Timeout exceeded
+...
+-- Ensure it does not return 'false' like pcall(). It must conform to nil,err
+-- signature.
+assert(type(ok) == 'nil')
+---
+- true
+...
+ok, err = cond_wait(cond, 0)
+---
+...
+assert(not ok and err.message)
+---
+- Timeout exceeded
+...
+ok, err = cond_wait(cond, 0.000001)
+---
+...
+assert(not ok and err.message)
+---
+- Timeout exceeded
+...
+ok, err = nil
+---
+...
+_ = fiber.create(function() ok, err = cond_wait(cond, 1000000) end)
+---
+...
+fiber.yield()
+---
+...
+cond:signal()
+---
+...
+_ = test_run:wait_cond(function() return ok or err end)
+---
+...
+assert(ok and not err)
+---
+- true
+...
+ok, err = nil
+---
+...
+f = fiber.create(function() ok, err = cond_wait(cond, 1000000) end)
+---
+...
+fiber.yield()
+---
+...
+f:cancel()
+---
+...
+_ = test_run:wait_cond(function() return ok or err end)
+---
+...
+assert(not ok)
+---
+- true
+...
+err.message
+---
+- fiber is cancelled
+...
+assert(type(err) == 'table')
+---
+- true
+...
diff --git a/test/unit/util.test.lua b/test/unit/util.test.lua
index 9550a95..df3db6f 100644
--- a/test/unit/util.test.lua
+++ b/test/unit/util.test.lua
@@ -76,3 +76,34 @@ yield_count
 t
 res
 t ~= res
+
+--
+-- Exception-safe cond wait.
+--
+cond_wait = util.fiber_cond_wait
+cond = fiber.cond()
+ok, err = cond_wait(cond, -1)
+assert(not ok and err.message)
+-- Ensure it does not return 'false' like pcall(). It must conform to nil,err
+-- signature.
+assert(type(ok) == 'nil')
+ok, err = cond_wait(cond, 0)
+assert(not ok and err.message)
+ok, err = cond_wait(cond, 0.000001)
+assert(not ok and err.message)
+
+ok, err = nil
+_ = fiber.create(function() ok, err = cond_wait(cond, 1000000) end)
+fiber.yield()
+cond:signal()
+_ = test_run:wait_cond(function() return ok or err end)
+assert(ok and not err)
+
+ok, err = nil
+f = fiber.create(function() ok, err = cond_wait(cond, 1000000) end)
+fiber.yield()
+f:cancel()
+_ = test_run:wait_cond(function() return ok or err end)
+assert(not ok)
+err.message
+assert(type(err) == 'table')
diff --git a/vshard/util.lua b/vshard/util.lua
index 2362607..d78f3a5 100644
--- a/vshard/util.lua
+++ b/vshard/util.lua
@@ -1,6 +1,7 @@
 -- vshard.util
 local log = require('log')
 local fiber = require('fiber')
+local lerror = require('vshard.error')
 
 local MODULE_INTERNALS = '__module_vshard_util'
 local M = rawget(_G, MODULE_INTERNALS)
@@ -191,6 +192,39 @@ local function table_minus_yield(dst, src, interval)
     return dst
 end
 
+local function fiber_cond_wait_xc(cond, timeout)
+    -- Handle negative timeout specifically - otherwise wait() will throw an
+    -- ugly usage error.
+    -- Don't trust this check to the caller's code, because often it just calls
+    -- wait many times until it fails or the condition is met. Code looks much
+    -- cleaner when it does not need to check the timeout sign. On the other
+    -- hand, perf is not important here - anyway wait() yields which is slow on
+    -- its own, but also breaks JIT trace recording which makes pcall() in the
+    -- non-xc version of this function inconsiderable.
+    if timeout < 0 or not cond:wait(timeout) then
+        -- Don't use the original error if cond sets it. Because it sets
+        -- TimedOut error. It does not have a proper error code, and may not be
+        -- detected by router as a special timeout case if necessary. Or at
+        -- least would complicate the handling in future. Instead, try to use a
+        -- unified timeout error where possible.
+        error(lerror.timeout())
+    end
+    -- Still possible though that the fiber is canceled and cond:wait() throws.
+    -- This is why the _xc() version of this function throws even the timeout -
+    -- anyway pcall() is inevitable.
+end
+
+--
+-- Exception-safe cond wait with unified errors in vshard format.
+--
+local function fiber_cond_wait(cond, timeout)
+    local ok, err = pcall(fiber_cond_wait_xc, cond, timeout)
+    if ok then
+        return true
+    end
+    return nil, lerror.make(err)
+end
+
 return {
     tuple_extract_key = tuple_extract_key,
     reloadable_fiber_create = reloadable_fiber_create,
@@ -200,4 +234,5 @@ return {
     version_is_at_least = version_is_at_least,
     table_copy_yield = table_copy_yield,
     table_minus_yield = table_minus_yield,
+    fiber_cond_wait = fiber_cond_wait,
 }
-- 
2.24.3 (Apple Git-128)



More information about the Tarantool-patches mailing list