[patches] [fio 1/1] fio: Fix race condition in fio.read

Konstantin Osipov kostja at tarantool.org
Thu Mar 1 23:23:20 MSK 2018


* imarkov <imarkov at tarantool.org> [18/02/27 11:16]:
> When read followed by yield from multiple fibers is performed,
> ibuf shared among several fibers is corrupted and read data is
> mixed.

I think this is not an accurate description of the issue. 

fio.read() can yield because it uses coio_read(), and performs a
read in a worker pool thread. coio_read yields, and 
another fiber can start another fio.read() which will try to
read into the same buffer. 

In other words, yield does not follow fio.read(). It doesn't
happen during fio.read(), it happens *before* the actual read
is done. 

Based on what is really happening here, you can create a much
smaller test case - you don't need to write a large file any more,
a couple of files of any size (e.g. 100 bytes) will do.

Please update the comment and the test.

Please see also one more comment inline.

> 
> The fix is to create new ibuf for each fio.read call in case
> buffer is not specified.
> 
> Closes #3187
> ---
>  src/lua/buffer.lua    |  6 +++++
>  src/lua/fio.lua       |  5 ++--
>  test/app/fio.result   | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++
>  test/app/fio.test.lua | 33 +++++++++++++++++++++++
>  4 files changed, 116 insertions(+), 2 deletions(-)
> 
> diff --git a/src/lua/buffer.lua b/src/lua/buffer.lua
> index a72d8d1..29be015 100644
> --- a/src/lua/buffer.lua
> +++ b/src/lua/buffer.lua
> @@ -128,6 +128,11 @@ local function ibuf_read(buf, size)
>      return rpos
>  end
>  
> +local function ibuf_destroy(buf)
> +    checkibuf(buf, "destroy")
> +    ffi.C.ibuf_destroy(buf)
> +end
> +
>  local function ibuf_serialize(buf)
>      local properties = { rpos = buf.rpos, wpos = buf.wpos }
>      return { ibuf = properties }
> @@ -136,6 +141,7 @@ end
>  local ibuf_methods = {
>      recycle = ibuf_recycle;
>      reset = ibuf_reset;
> +    destroy = ibuf_destroy,
>  
>      reserve = ibuf_reserve;
>      alloc = ibuf_alloc;
> diff --git a/src/lua/fio.lua b/src/lua/fio.lua
> index d48c709..8797fba 100644
> --- a/src/lua/fio.lua
> +++ b/src/lua/fio.lua
> @@ -40,14 +40,14 @@ fio_methods.read = function(self, buf, size)
>      end
>      if not ffi.istype(const_char_ptr_t, buf) then
>          size = buf or size
> -        tmpbuf = buffer.IBUF_SHARED
> -        tmpbuf:reset()
> +        tmpbuf = buffer.ibuf()
>          buf = tmpbuf:reserve(size)
>      end
>      local res, err = internal.read(self.fh, buf, size)
>      if res == nil then
>          if tmpbuf ~= nil then
>              tmpbuf:recycle()
> +            tmpbuf:destroy()
>          end
>          return nil, err
>      end
> @@ -55,6 +55,7 @@ fio_methods.read = function(self, buf, size)
>          tmpbuf:alloc(res)
>          res = ffi.string(tmpbuf.rpos, tmpbuf:size())
>          tmpbuf:recycle()
> +        tmpbuf:destroy()
>      end
>      return res
>  end
> diff --git a/test/app/fio.result b/test/app/fio.result
> index 0fc8582..1f12384 100644
> --- a/test/app/fio.result
> +++ b/test/app/fio.result
> @@ -1067,6 +1067,80 @@ fio.unlink(tmpfile)
>  ---
>  - true
>  ...
> +tmp1 = fio.pathjoin(tmpdir, "tmp1")
> +---
> +...
> +tmp2= fio.pathjoin(tmpdir, "tmp2")
> +---
> +...
> +test_run:cmd("setopt delimiter ';'")
> +---
> +- true
> +...
> +function write_big_file(name, odd)
> +    local fh = fio.open(name, { 'O_RDWR', 'O_TRUNC', 'O_CREAT' }, 0777)
> +    if odd then
> +        for i = 1, 100000, 2 do
> +            fh:write(i)
> +        end

It doesn't have to be so complex and slow. You can have a file 
with all ones (string.rep('1', 100)) and another file with all
'2's.

Usually you should avoid making things more complex than they need
to be, please always challenge yourself at that.

> +    else
> +        for i = 2, 100000,2 do
> +            fh:write(i)
> +        end
> +    end
> +    fh:write(name)
> +    fh:seek(0)
> +    return fh
> +end;
> +---
> +...
> +test_run:cmd("setopt delimiter ''");
> +---
> +- true
> +...
> +fh1 = write_big_file(tmp1)
> +---
> +...
> +fh2 = write_big_file(tmp2)
> +---
> +...
> +fiber = require('fiber')
> +---
> +...
> +digest = require('digest')
> +---
> +...
> +str = fh1:read()
> +---
> +...
> +fh1:seek(0)
> +---
> +- 0
> +...
> +hash = digest.crc32(str)
> +---
> +...
> +ch = fiber.channel(1)
> +---
> +...
> +f1 = fiber.create(function() str = fh1:read() ch:put(digest.crc32(str)) end)
> +---
> +...
> +f2 = fiber.create(function() str = fh2:read() end)
> +---
> +...
> +ch:get() == hash
> +---
> +- true
> +...
> +fio.unlink(tmp1)
> +---
> +- true
> +...
> +fio.unlink(tmp2)
> +---
> +- true
> +...
>  fio.rmdir(tmpdir)
>  ---
>  - true
> diff --git a/test/app/fio.test.lua b/test/app/fio.test.lua
> index cd0bc16..8c37baa 100644
> --- a/test/app/fio.test.lua
> +++ b/test/app/fio.test.lua
> @@ -342,4 +342,37 @@ fio.path.lexists(link)
>  
>  fio.unlink(link)
>  fio.unlink(tmpfile)
> +tmp1 = fio.pathjoin(tmpdir, "tmp1")
> +tmp2= fio.pathjoin(tmpdir, "tmp2")
> +test_run:cmd("setopt delimiter ';'")
> +function write_big_file(name, odd)
> +    local fh = fio.open(name, { 'O_RDWR', 'O_TRUNC', 'O_CREAT' }, 0777)
> +    if odd then
> +        for i = 1, 100000, 2 do
> +            fh:write(i)
> +        end
> +    else
> +        for i = 2, 100000,2 do
> +            fh:write(i)
> +        end
> +    end
> +    fh:write(name)
> +    fh:seek(0)
> +    return fh
> +end;
> +test_run:cmd("setopt delimiter ''");
> +fh1 = write_big_file(tmp1)
> +fh2 = write_big_file(tmp2)
> +fiber = require('fiber')
> +digest = require('digest')
> +str = fh1:read()
> +fh1:seek(0)
> +hash = digest.crc32(str)
> +ch = fiber.channel(1)
> +f1 = fiber.create(function() str = fh1:read() ch:put(digest.crc32(str)) end)
> +f2 = fiber.create(function() str = fh2:read() end)
> +ch:get() == hash
> +
> +fio.unlink(tmp1)
> +fio.unlink(tmp2)
>  fio.rmdir(tmpdir)
> -- 
> 2.7.4

-- 
Konstantin Osipov, Moscow, Russia, +7 903 626 22 32
http://tarantool.org - www.twitter.com/kostja_osipov



More information about the Tarantool-patches mailing list