[Tarantool-patches] [PATCH v3] lua: fix running init lua script

Artem artemreyt at tarantool.org
Mon Dec 21 17:23:40 MSK 2020


Hello, Sergey!

Thanks for your review! See my answers below.

Branch is updated.

18.12.2020 15:59, Sergey Bronnikov пишет:
> Hello, Artem
>
> thanks for the patch! See 4 minor comments below.
>
> On 16.12.2020 18:57, Artem Starshov wrote:
>> When tarantool launched with -e flag and in
>> script after there is an error, programm hangs.
> 1. "program"
1. Done.
>> This happens because shed fiber launches separate
>> fiber for init user script and starts auxiliary
>> event loop. It's supposed that fiber will stop
>> this loop, but in case of error in script, fiber
>> tries to stop a loop when the last one isn't
>> started yet.
>>
>> Added a flag, which will watch is loop started and
>> when fiber tries to call `ev_break()` we can be sure
>> that loop is running already.
>>
>> Fixes #4983
>> ---
>> @ChangeLog: Fixed a hang when tarantool launched with the -e option 
>> and users
>> script throws an error (`tarantool -e 'assert(false)'`) (gh-4983).
>>
>> Changes in v2:
>>      - Test commit squashed with code fix commit;
>>      - Fixed comments in test;
>>      - Fixed commit message (no stairs);
>>      - Closes -> Fixes;
>>      - Removed small bugs in test;
>>      - Moved test to tap.test(name, func).
>>
>> Changes in v3:
>>      - Added more information to change log;
>>      - 'src/lua` -> 'lua' for patch name;
>>      - Code-style fixes in test.
>>
>> Branch: 
>> https://github.com/tarantool/tarantool/tree/artemreyt/gh-4983-tnt-e-assert-false-hangs
>> Issue: https://github.com/tarantool/tarantool/issues/4983
>>
>>   src/lua/init.c                                |  9 ++
>>   .../gh-4983-tnt-e-assert-false-hangs.test.lua | 92 +++++++++++++++++++
>>   2 files changed, 101 insertions(+)
>>   create mode 100755 
>> test/app-tap/gh-4983-tnt-e-assert-false-hangs.test.lua
>>
>> diff --git a/src/lua/init.c b/src/lua/init.c
>> index a0b2fc775..92bdb82c0 100644
>> --- a/src/lua/init.c
>> +++ b/src/lua/init.c
>> @@ -569,6 +569,7 @@ run_script_f(va_list ap)
>>        * never really dead. It never returns from its function.
>>        */
>>       struct diag *diag = va_arg(ap, struct diag *);
>> +    bool aux_loop_is_run = false;
>>         /*
>>        * Load libraries and execute chunks passed by -l and -e
>> @@ -611,6 +612,7 @@ run_script_f(va_list ap)
>>        * loop and re-schedule this fiber.
>>        */
>>       fiber_sleep(0.0);
>> +    aux_loop_is_run = true;
>>         if (path && strcmp(path, "-") != 0 && access(path, F_OK) == 0) {
>>           /* Execute script. */
>> @@ -650,6 +652,13 @@ run_script_f(va_list ap)
>>        * return control back to tarantool_lua_run_script.
>>        */
>>   end:
>> +    /*
>> +     * Auxiliary loop in tarantool_lua_run_script
>> +     * should start (ev_run()) before this fiber
>> +     * invokes ev_break().
>> +     */
>> +    if (!aux_loop_is_run)
>> +        fiber_sleep(0.0);
>>       ev_break(loop(), EVBREAK_ALL);
>>       return 0;
>>   diff --git a/test/app-tap/gh-4983-tnt-e-assert-false-hangs.test.lua 
>> b/test/app-tap/gh-4983-tnt-e-assert-false-hangs.test.lua
>> new file mode 100755
>> index 000000000..378ac229c
>> --- /dev/null
>> +++ b/test/app-tap/gh-4983-tnt-e-assert-false-hangs.test.lua
>
> 2. test always failed when it is running with high concurrency:
>
> ../../test/test-run.py 
> --builddir=/home/s.bronnikov/work/tarantool/build 
> --vardir=/home/s.bronnikov/work/tantool/build/test/var \
>
> -j100 $(yes app-tap/gh-4983-tnt-e-assert-false-hangs.test.lua | head 
> -n 1000)
>
2. Fixed by increasing timeout on waiting for process, opening log file 
and reading the last one.

(From 2 seconds to 5).

>> @@ -0,0 +1,92 @@
>> +#!/usr/bin/env tarantool
>> +
>> +local fiber = require('fiber')
>> +local clock = require('clock')
>> +local ffi = require('ffi')
>> +local fio = require('fio')
>> +local errno = require('errno')
>> +local tap = require('tap')
>> +
>> +--
>> +-- gh-4983: tarantool -e 'assert(false)' hangs
>> +--
>> +
>> +-- For process_is_alive.
>> +ffi.cdef([[
>> +    int kill(pid_t pid, int sig);
>> +]])
>> +
>> +---  Verify whether a process is alive.
>> +local function process_is_alive(pid)
>> +    local rc = ffi.C.kill(pid, 0)
>> +    return rc == 0 or errno() ~= errno.ESRCH
>> +end
>> +
>> +--- Returns true if process completed before timeout, false otherwise.
>> +local function wait_process_completion(pid, timeout)
>> +    local start_time = clock.monotonic()
>> +    local process_completed = false
>> +    while clock.monotonic() - start_time < timeout do
>> +        if not process_is_alive(pid) then
>> +            process_completed = true
>> +            break
>> +        end
>> +    end
>> +    return process_completed
>> +end
>> +
>> +--- Open file on reading with timeout.
>> +local function open_with_timeout(filename, timeout)
>> +    local fh
>> +    local start_time = clock.monotonic()
>> +    while not fh and clock.monotonic() - start_time < timeout do
>> +        fh = fio.open(filename, {'O_RDONLY'})
>> +    end
>> +    return fh
>> +end
>> +
>> +--- Try to read from file with timeout with interval.
>> +local function read_with_timeout(fh, timeout, interval)
>> +    local data = ''
>> +    local start_time = clock.monotonic()
>> +    while #data == 0 and clock.monotonic() - start_time < timeout do
>> +        data = fh:read()
>> +        if #data == 0 then fiber.sleep(interval) end
>> +    end
>> +    return data
>> +end
>> +
>> +local TARANTOOL_PATH = arg[-1]
>> +local output_file = fio.abspath('out.txt')
>> +local line = ('%s -e "assert(false)" > %s 2>&1 & echo $!'):
>> +                        format(TARANTOOL_PATH, output_file)
>> +local process_waiting_timeout = 2.0
>> +local file_read_timeout = 2.0
>> +local file_read_interval = 0.2
>> +local file_open_timeout = 2.0
>> +
>> +local res = tap.test('gh-4983-tnt-e-assert-false-hangs', function(test)
>> +    test:plan(2)
>> +
>> +    local pid = tonumber(io.popen(line):read("*line"))
>
> 3. Why are you using double quotes here if above you are using single 
> quotes?
>
> Make using quotes consistent in a whole test. See below too.
>
3. Done.
>> +    assert(pid, "pid of proccess can't be recieved")
>> +
>> +    local process_completed = wait_process_completion(pid,
>> +            process_waiting_timeout)
>> +
>> +    test:ok(process_completed,
>> +            ('tarantool process with pid = %d completed'):format(pid))
>> +
>> +    -- Kill process if hangs.
>> +    if not process_completed then ffi.C.kill(pid, 9) end
>> +
>> +    local fh = open_with_timeout(output_file, file_open_timeout)
>> +    assert(fh, 'error while opening ' .. output_file)
>> +
>> +    local data = read_with_timeout(fh, file_read_timeout, 
>> file_read_interval)
>> +    test:like(data, "assertion failed", "assertion failure is 
>> displayed")
>> +
>> +    fh:close()
>> +end)
>> +
>> +os.exit(res and 0 or 1)
>> 4. add a newline at the end of file
4. Done.


More information about the Tarantool-patches mailing list