[Tarantool-patches] [PATCH v4] lua: fix running init lua script

Sergey Bronnikov sergeyb at tarantool.org
Fri Dec 25 15:44:03 MSK 2020


Artem,

thanks for the patch!

LGTM

On 24.12.2020 23:41, Artem Starshov wrote:
> When tarantool launched with -e flag and in
> script after there is an error, program hangs.
> This happens because shed fiber launches separate
> fiber for init user script and starts auxiliary
> event loop. It's supposed that fiber will stop
> this loop, but in case of error in script, fiber
> tries to stop a loop when the last one isn't
> started yet.
>
> Added a flag, which will watch is loop started and
> when fiber tries to call `ev_break()` we can be sure
> that loop is running already.
>
> Fixes #4983
> ---
> @ChangeLog: Fixed a hang when tarantool launched with the -e option and users
> script throws an error (`tarantool -e 'assert(false)'`) (gh-4983).
>
> Changes in v2:
>      - Test commit squashed with code fix commit;
>      - Fixed comments in test;
>      - Fixed commit message (no stairs);
>      - Closes -> Fixes;
>      - Removed small bugs in test;
>      - Moved test to tap.test(name, func).
>
> Changes in v3:
>      - Added more information to change log;
>      - 'src/lua` -> 'lua' for patch name;
>      - Code-style fixes in test.
>
> Changes in v4:
>      - Set timeout for operations like for cond_var wait in test-run
>      - Added module `process_timeout` in test/app-tap/lua for further using
>
> Branch: https://github.com/tarantool/tarantool/tree/artemreyt/gh-4983-tnt-e-assert-false-hangs
> Issue: https://github.com/tarantool/tarantool/issues/4983
>
>   src/lua/init.c                                |  9 +++
>   .../gh-4983-tnt-e-assert-false-hangs.test.lua | 46 +++++++++++++++
>   test/app-tap/lua/process_timeout.lua          | 57 +++++++++++++++++++
>   test/app-tap/suite.ini                        |  2 +-
>   4 files changed, 113 insertions(+), 1 deletion(-)
>   create mode 100755 test/app-tap/gh-4983-tnt-e-assert-false-hangs.test.lua
>   create mode 100644 test/app-tap/lua/process_timeout.lua
>
> diff --git a/src/lua/init.c b/src/lua/init.c
> index a0b2fc775..92bdb82c0 100644
> --- a/src/lua/init.c
> +++ b/src/lua/init.c
> @@ -569,6 +569,7 @@ run_script_f(va_list ap)
>   	 * never really dead. It never returns from its function.
>   	 */
>   	struct diag *diag = va_arg(ap, struct diag *);
> +	bool aux_loop_is_run = false;
>   
>   	/*
>   	 * Load libraries and execute chunks passed by -l and -e
> @@ -611,6 +612,7 @@ run_script_f(va_list ap)
>   	 * loop and re-schedule this fiber.
>   	 */
>   	fiber_sleep(0.0);
> +	aux_loop_is_run = true;
>   
>   	if (path && strcmp(path, "-") != 0 && access(path, F_OK) == 0) {
>   		/* Execute script. */
> @@ -650,6 +652,13 @@ run_script_f(va_list ap)
>   	 * return control back to tarantool_lua_run_script.
>   	 */
>   end:
> +	/*
> +	 * Auxiliary loop in tarantool_lua_run_script
> +	 * should start (ev_run()) before this fiber
> +	 * invokes ev_break().
> +	 */
> +	if (!aux_loop_is_run)
> +		fiber_sleep(0.0);
>   	ev_break(loop(), EVBREAK_ALL);
>   	return 0;
>   
> diff --git a/test/app-tap/gh-4983-tnt-e-assert-false-hangs.test.lua b/test/app-tap/gh-4983-tnt-e-assert-false-hangs.test.lua
> new file mode 100755
> index 000000000..891a56ade
> --- /dev/null
> +++ b/test/app-tap/gh-4983-tnt-e-assert-false-hangs.test.lua
> @@ -0,0 +1,46 @@
> +#!/usr/bin/env tarantool
> +
> +local common = require('process_timeout')
> +local tap = require('tap')
> +local fio = require('fio')
> +
> +--
> +-- gh-4983: tarantool -e 'assert(false)' hangs
> +--
> +
> +local TARANTOOL_PATH = arg[-1]
> +local output_file = fio.abspath('out.txt')
> +local line = ('%s -e "assert(false)" > %s 2>&1 & echo $!'):
> +        format(TARANTOOL_PATH, output_file)
> +
> +-- Like a default timeout for `cond_wait` in test-run
> +local process_waiting_timeout = 60.0
> +local file_read_timeout = 60.0
> +local file_open_timeout = 60.0
> +local file_read_interval = 0.2
> +
> +local res = tap.test('gh-4983-tnt-e-assert-false-hangs', function(test)
> +    test:plan(2)
> +
> +    local pid = tonumber(io.popen(line):read('*line'))
> +    assert(pid, 'pid of proccess can\'t be recieved')
> +
> +    local process_completed = common.wait_process_completion(pid,
> +            process_waiting_timeout)
> +
> +    test:ok(process_completed,
> +            ('tarantool process with pid = %d completed'):format(pid))
> +
> +    -- Kill process if hangs.
> +    if not process_completed then ffi.C.kill(pid, 9) end
> +
> +    local fh = common.open_with_timeout(output_file, file_open_timeout)
> +    assert(fh, 'error while opening ' .. output_file)
> +
> +    local data = common.read_with_timeout(fh, file_read_timeout, file_read_interval)
> +    test:like(data, 'assertion failed', 'assertion failure is displayed')
> +
> +    fh:close()
> +end)
> +
> +os.exit(res and 0 or 1)
> diff --git a/test/app-tap/lua/process_timeout.lua b/test/app-tap/lua/process_timeout.lua
> new file mode 100644
> index 000000000..141fb0e43
> --- /dev/null
> +++ b/test/app-tap/lua/process_timeout.lua
> @@ -0,0 +1,57 @@
> +local fiber = require('fiber')
> +local clock = require('clock')
> +local ffi = require('ffi')
> +local fio = require('fio')
> +local errno = require('errno')
> +
> +-- For process_is_alive.
> +ffi.cdef([[
> +    int kill(pid_t pid, int sig);
> +]])
> +
> +---  Verify whether a process is alive.
> +local function process_is_alive(pid)
> +    local rc = ffi.C.kill(pid, 0)
> +    return rc == 0 or errno() ~= errno.ESRCH
> +end
> +
> +--- Returns true if process completed before timeout, false otherwise.
> +local function wait_process_completion(pid, timeout)
> +    local start_time = clock.monotonic()
> +    local process_completed = false
> +    while clock.monotonic() - start_time < timeout do
> +        if not process_is_alive(pid) then
> +            process_completed = true
> +            break
> +        end
> +    end
> +    return process_completed
> +end
> +
> +--- Open file on reading with timeout.
> +local function open_with_timeout(filename, timeout)
> +    local fh
> +    local start_time = clock.monotonic()
> +    while not fh and clock.monotonic() - start_time < timeout do
> +        fh = fio.open(filename, {'O_RDONLY'})
> +    end
> +    return fh
> +end
> +
> +--- Try to read from file with timeout with interval.
> +local function read_with_timeout(fh, timeout, interval)
> +    local data = ''
> +    local start_time = clock.monotonic()
> +    while #data == 0 and clock.monotonic() - start_time < timeout do
> +        data = fh:read()
> +        if #data == 0 then fiber.sleep(interval) end
> +    end
> +    return data
> +end
> +
> +return {
> +    process_is_alive = process_is_alive,
> +    wait_process_completion = wait_process_completion,
> +    open_with_timeout = open_with_timeout,
> +    read_with_timeout = read_with_timeout
> +}
> diff --git a/test/app-tap/suite.ini b/test/app-tap/suite.ini
> index 2f16128f9..6e27f88d6 100644
> --- a/test/app-tap/suite.ini
> +++ b/test/app-tap/suite.ini
> @@ -1,7 +1,7 @@
>   [default]
>   core = app
>   description = application server tests (TAP)
> -lua_libs = lua/require_mod.lua lua/serializer_test.lua
> +lua_libs = lua/require_mod.lua lua/serializer_test.lua lua/process_timeout.lua
>   is_parallel = True
>   pretest_clean = True
>   use_unix_sockets_iproto = True


More information about the Tarantool-patches mailing list