[Tarantool-patches] [PATCH 5/9] util: introduce yielding table functions

Oleg Babin olegrok at tarantool.org
Thu Feb 11 09:50:33 MSK 2021


Thanks for your fixes! LGTM.

On 11/02/2021 01:34, Vladislav Shpilevoy wrote:
> Thanks for the review!
>
>>> diff --git a/test/unit/util.result b/test/unit/util.result
>>> index 096e36f..c4fd84d 100644
>>> --- a/test/unit/util.result
>>> +++ b/test/unit/util.result
>>> @@ -71,3 +71,116 @@ test_run:grep_log('default', 'reloadable_function has been started', 1000)
>>> +do                                                                              \
>>> +    t = {k1 = 1, k2 = 2, k3 = 3, k4 = 4}                                        \
>>> +    f = fiber.create(function()                                                 \
>>> +        minus_yield(t, {k2 = 2, k3 = 3, k5 = 5, k4 = 444}, 2)                   \
>>> +    end)                                                                        \
>>> +    yield_count = 0                                                             \
>>> +    while f:status() ~= 'dead' do                                               \
>>> +        yield_count = yield_count + 1                                           \
>>> +        fiber.yield()                                                           \
>>> +    end                                                                         \
>>> +end
>>> +---
>> Why can't you use "csw" of fiber.self() instead? Also it's it reliable enough to simply count yields?
> Yup, will work too. See the diff below.
>
> ====================
> diff --git a/test/unit/util.result b/test/unit/util.result
> index c4fd84d..42a361a 100644
> --- a/test/unit/util.result
> +++ b/test/unit/util.result
> @@ -111,14 +111,14 @@ minus_yield({k1 = 1, k2 = 2, k3 = 3}, {k1 = 1, k2 = 222}, 10)
>   ...
>   do                                                                              \
>       t = {k1 = 1, k2 = 2, k3 = 3, k4 = 4}                                        \
> +    yield_count = 0                                                             \
>       f = fiber.create(function()                                                 \
> +        local csw1 = fiber.info()[fiber.id()].csw                               \
>           minus_yield(t, {k2 = 2, k3 = 3, k5 = 5, k4 = 444}, 2)                   \
> +        local csw2 = fiber.info()[fiber.id()].csw                               \
> +        yield_count = csw2 - csw1                                               \
>       end)                                                                        \
> -    yield_count = 0                                                             \
> -    while f:status() ~= 'dead' do                                               \
> -        yield_count = yield_count + 1                                           \
> -        fiber.yield()                                                           \
> -    end                                                                         \
> +    test_run:wait_cond(function() return f:status() == 'dead' end)              \
>   end
>   ---
>   ...
> @@ -151,14 +151,14 @@ copy_yield({k1 = 1, k2 = 2}, 1)
>   do                                                                              \
>       t = {k1 = 1, k2 = 2, k3 = 3, k4 = 4}                                        \
>       res = nil                                                                   \
> +    yield_count = 0                                                             \
>       f = fiber.create(function()                                                 \
> +        local csw1 = fiber.info()[fiber.id()].csw                               \
>           res = copy_yield(t, 2)                                                  \
> +        local csw2 = fiber.info()[fiber.id()].csw                               \
> +        yield_count = csw2 - csw1                                               \
>       end)                                                                        \
> -    yield_count = 0                                                             \
> -    while f:status() ~= 'dead' do                                               \
> -        yield_count = yield_count + 1                                           \
> -        fiber.yield()                                                           \
> -    end                                                                         \
> +    test_run:wait_cond(function() return f:status() == 'dead' end)              \
>   end
>   ---
>   ...
> diff --git a/test/unit/util.test.lua b/test/unit/util.test.lua
> index 4d6cbe9..9550a95 100644
> --- a/test/unit/util.test.lua
> +++ b/test/unit/util.test.lua
> @@ -42,14 +42,14 @@ minus_yield({k1 = 1, k2 = 2, k3 = 3}, {k1 = 1, k2 = 222}, 10)
>   
>   do                                                                              \
>       t = {k1 = 1, k2 = 2, k3 = 3, k4 = 4}                                        \
> +    yield_count = 0                                                             \
>       f = fiber.create(function()                                                 \
> +        local csw1 = fiber.info()[fiber.id()].csw                               \
>           minus_yield(t, {k2 = 2, k3 = 3, k5 = 5, k4 = 444}, 2)                   \
> +        local csw2 = fiber.info()[fiber.id()].csw                               \
> +        yield_count = csw2 - csw1                                               \
>       end)                                                                        \
> -    yield_count = 0                                                             \
> -    while f:status() ~= 'dead' do                                               \
> -        yield_count = yield_count + 1                                           \
> -        fiber.yield()                                                           \
> -    end                                                                         \
> +    test_run:wait_cond(function() return f:status() == 'dead' end)              \
>   end
>   yield_count
>   t
> @@ -63,14 +63,14 @@ copy_yield({k1 = 1, k2 = 2}, 1)
>   do                                                                              \
>       t = {k1 = 1, k2 = 2, k3 = 3, k4 = 4}                                        \
>       res = nil                                                                   \
> +    yield_count = 0                                                             \
>       f = fiber.create(function()                                                 \
> +        local csw1 = fiber.info()[fiber.id()].csw                               \
>           res = copy_yield(t, 2)                                                  \
> +        local csw2 = fiber.info()[fiber.id()].csw                               \
> +        yield_count = csw2 - csw1                                               \
>       end)                                                                        \
> -    yield_count = 0                                                             \
> -    while f:status() ~= 'dead' do                                               \
> -        yield_count = yield_count + 1                                           \
> -        fiber.yield()                                                           \
> -    end                                                                         \
> +    test_run:wait_cond(function() return f:status() == 'dead' end)              \
>   end
>   yield_count
>   t
> ====================
>
>> Could scheduler skip this fiber at some loop iteration? In other words, won't this test be flaky?
> Nope. Unless the fiber is sleeping on some condition or for a timeout, a plain
> sleep(0) also known as fiber.yield() won't skip this fiber on the next
> iteration of the loop. But does not matter if csw is used to count the yields.
>
> Full new patch below.
>
> ====================
>      util: introduce yielding table functions
>      
>      The patch adds functions table_copy_yield and table_minus_yield.
>      
>      Yielding copy creates a duplicate of a table but yields every
>      specified number of keys copied.
>      
>      Yielding minus removes matching key-value pairs specified in one
>      table from another table. It yields every specified number of keys
>      passed.
>      
>      The functions should help to process huge Lua tables (millions of
>      elements and more). These are going to be used on the storage in
>      the new GC algorithm.
>      
>      The algorithm will need to keep a route table on the storage, just
>      like on the router, but with expiration time for the routes. Since
>      bucket count can be millions, it means GC will potentially operate
>      on a huge Lua table and could use some yields so as not to block
>      TX thread for long.
>      
>      Needed for #147
>
> diff --git a/test/unit/util.result b/test/unit/util.result
> index 096e36f..42a361a 100644
> --- a/test/unit/util.result
> +++ b/test/unit/util.result
> @@ -71,3 +71,116 @@ test_run:grep_log('default', 'reloadable_function has been started', 1000)
>   fib:cancel()
>   ---
>   ...
> +-- Yielding table minus.
> +minus_yield = util.table_minus_yield
> +---
> +...
> +minus_yield({}, {}, 1)
> +---
> +- []
> +...
> +minus_yield({}, {k = 1}, 1)
> +---
> +- []
> +...
> +minus_yield({}, {k = 1}, 0)
> +---
> +- []
> +...
> +minus_yield({k = 1}, {k = 1}, 0)
> +---
> +- []
> +...
> +minus_yield({k1 = 1, k2 = 2}, {k1 = 1, k3 = 3}, 10)
> +---
> +- k2: 2
> +...
> +minus_yield({k1 = 1, k2 = 2}, {k1 = 1, k2 = 2}, 10)
> +---
> +- []
> +...
> +-- Mismatching values are not deleted.
> +minus_yield({k1 = 1}, {k1 = 2}, 10)
> +---
> +- k1: 1
> +...
> +minus_yield({k1 = 1, k2 = 2, k3 = 3}, {k1 = 1, k2 = 222}, 10)
> +---
> +- k3: 3
> +  k2: 2
> +...
> +do                                                                              \
> +    t = {k1 = 1, k2 = 2, k3 = 3, k4 = 4}                                        \
> +    yield_count = 0                                                             \
> +    f = fiber.create(function()                                                 \
> +        local csw1 = fiber.info()[fiber.id()].csw                               \
> +        minus_yield(t, {k2 = 2, k3 = 3, k5 = 5, k4 = 444}, 2)                   \
> +        local csw2 = fiber.info()[fiber.id()].csw                               \
> +        yield_count = csw2 - csw1                                               \
> +    end)                                                                        \
> +    test_run:wait_cond(function() return f:status() == 'dead' end)              \
> +end
> +---
> +...
> +yield_count
> +---
> +- 2
> +...
> +t
> +---
> +- k4: 4
> +  k1: 1
> +...
> +-- Yielding table copy.
> +copy_yield = util.table_copy_yield
> +---
> +...
> +copy_yield({}, 1)
> +---
> +- []
> +...
> +copy_yield({k = 1}, 1)
> +---
> +- k: 1
> +...
> +copy_yield({k1 = 1, k2 = 2}, 1)
> +---
> +- k1: 1
> +  k2: 2
> +...
> +do                                                                              \
> +    t = {k1 = 1, k2 = 2, k3 = 3, k4 = 4}                                        \
> +    res = nil                                                                   \
> +    yield_count = 0                                                             \
> +    f = fiber.create(function()                                                 \
> +        local csw1 = fiber.info()[fiber.id()].csw                               \
> +        res = copy_yield(t, 2)                                                  \
> +        local csw2 = fiber.info()[fiber.id()].csw                               \
> +        yield_count = csw2 - csw1                                               \
> +    end)                                                                        \
> +    test_run:wait_cond(function() return f:status() == 'dead' end)              \
> +end
> +---
> +...
> +yield_count
> +---
> +- 2
> +...
> +t
> +---
> +- k3: 3
> +  k4: 4
> +  k1: 1
> +  k2: 2
> +...
> +res
> +---
> +- k3: 3
> +  k4: 4
> +  k1: 1
> +  k2: 2
> +...
> +t ~= res
> +---
> +- true
> +...
> diff --git a/test/unit/util.test.lua b/test/unit/util.test.lua
> index 5f39e06..9550a95 100644
> --- a/test/unit/util.test.lua
> +++ b/test/unit/util.test.lua
> @@ -27,3 +27,52 @@ fib = util.reloadable_fiber_create('Worker_name', fake_M, 'reloadable_function')
>   while not test_run:grep_log('default', 'module is reloaded, restarting') do fiber.sleep(0.01) end
>   test_run:grep_log('default', 'reloadable_function has been started', 1000)
>   fib:cancel()
> +
> +-- Yielding table minus.
> +minus_yield = util.table_minus_yield
> +minus_yield({}, {}, 1)
> +minus_yield({}, {k = 1}, 1)
> +minus_yield({}, {k = 1}, 0)
> +minus_yield({k = 1}, {k = 1}, 0)
> +minus_yield({k1 = 1, k2 = 2}, {k1 = 1, k3 = 3}, 10)
> +minus_yield({k1 = 1, k2 = 2}, {k1 = 1, k2 = 2}, 10)
> +-- Mismatching values are not deleted.
> +minus_yield({k1 = 1}, {k1 = 2}, 10)
> +minus_yield({k1 = 1, k2 = 2, k3 = 3}, {k1 = 1, k2 = 222}, 10)
> +
> +do                                                                              \
> +    t = {k1 = 1, k2 = 2, k3 = 3, k4 = 4}                                        \
> +    yield_count = 0                                                             \
> +    f = fiber.create(function()                                                 \
> +        local csw1 = fiber.info()[fiber.id()].csw                               \
> +        minus_yield(t, {k2 = 2, k3 = 3, k5 = 5, k4 = 444}, 2)                   \
> +        local csw2 = fiber.info()[fiber.id()].csw                               \
> +        yield_count = csw2 - csw1                                               \
> +    end)                                                                        \
> +    test_run:wait_cond(function() return f:status() == 'dead' end)              \
> +end
> +yield_count
> +t
> +
> +-- Yielding table copy.
> +copy_yield = util.table_copy_yield
> +copy_yield({}, 1)
> +copy_yield({k = 1}, 1)
> +copy_yield({k1 = 1, k2 = 2}, 1)
> +
> +do                                                                              \
> +    t = {k1 = 1, k2 = 2, k3 = 3, k4 = 4}                                        \
> +    res = nil                                                                   \
> +    yield_count = 0                                                             \
> +    f = fiber.create(function()                                                 \
> +        local csw1 = fiber.info()[fiber.id()].csw                               \
> +        res = copy_yield(t, 2)                                                  \
> +        local csw2 = fiber.info()[fiber.id()].csw                               \
> +        yield_count = csw2 - csw1                                               \
> +    end)                                                                        \
> +    test_run:wait_cond(function() return f:status() == 'dead' end)              \
> +end
> +yield_count
> +t
> +res
> +t ~= res
> diff --git a/vshard/util.lua b/vshard/util.lua
> index d3b4e67..2362607 100644
> --- a/vshard/util.lua
> +++ b/vshard/util.lua
> @@ -153,6 +153,44 @@ local function version_is_at_least(major_need, middle_need, minor_need)
>       return minor >= minor_need
>   end
>   
> +--
> +-- Copy @a src table. Fiber yields every @a interval keys copied.
> +--
> +local function table_copy_yield(src, interval)
> +    local res = {}
> +    -- Time-To-Yield.
> +    local tty = interval
> +    for k, v in pairs(src) do
> +        res[k] = v
> +        tty = tty - 1
> +        if tty <= 0 then
> +            fiber.yield()
> +            tty = interval
> +        end
> +    end
> +    return res
> +end
> +
> +--
> +-- Remove @a src keys from @a dst if their values match. Fiber yields every
> +-- @a interval iterations.
> +--
> +local function table_minus_yield(dst, src, interval)
> +    -- Time-To-Yield.
> +    local tty = interval
> +    for k, srcv in pairs(src) do
> +        if dst[k] == srcv then
> +            dst[k] = nil
> +        end
> +        tty = tty - 1
> +        if tty <= 0 then
> +            fiber.yield()
> +            tty = interval
> +        end
> +    end
> +    return dst
> +end
> +
>   return {
>       tuple_extract_key = tuple_extract_key,
>       reloadable_fiber_create = reloadable_fiber_create,
> @@ -160,4 +198,6 @@ return {
>       async_task = async_task,
>       internal = M,
>       version_is_at_least = version_is_at_least,
> +    table_copy_yield = table_copy_yield,
> +    table_minus_yield = table_minus_yield,
>   }
>


More information about the Tarantool-patches mailing list