From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from smtp46.i.mail.ru (smtp46.i.mail.ru [94.100.177.106]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dev.tarantool.org (Postfix) with ESMTPS id 207A84765E0 for ; Mon, 21 Dec 2020 17:23:42 +0300 (MSK) References: <27bae760920c0fe2647dff3b537f320f9d59c488.1608134096.git.artemreyt@tarantool.org> From: Artem Message-ID: Date: Mon, 21 Dec 2020 17:23:40 +0300 MIME-Version: 1.0 In-Reply-To: Content-Type: text/plain; charset="utf-8"; format="flowed" Content-Transfer-Encoding: 8bit Content-Language: ru Subject: Re: [Tarantool-patches] [PATCH v3] lua: fix running init lua script List-Id: Tarantool development patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Sergey Bronnikov , Alexander Turenko Cc: tarantool-patches@dev.tarantool.org Hello, Sergey! Thanks for your review! See my answers below. Branch is updated. 18.12.2020 15:59, Sergey Bronnikov пишет: > Hello, Artem > > thanks for the patch! See 4 minor comments below. > > On 16.12.2020 18:57, Artem Starshov wrote: >> When tarantool launched with -e flag and in >> script after there is an error, programm hangs. > 1. "program" 1. Done. >> This happens because shed fiber launches separate >> fiber for init user script and starts auxiliary >> event loop. It's supposed that fiber will stop >> this loop, but in case of error in script, fiber >> tries to stop a loop when the last one isn't >> started yet. >> >> Added a flag, which will watch is loop started and >> when fiber tries to call `ev_break()` we can be sure >> that loop is running already. >> >> Fixes #4983 >> --- >> @ChangeLog: Fixed a hang when tarantool launched with the -e option >> and users >> script throws an error (`tarantool -e 'assert(false)'`) (gh-4983). >> >> Changes in v2: >>      - Test commit squashed with code fix commit; >>      - Fixed comments in test; >>      - Fixed commit message (no stairs); >>      - Closes -> Fixes; >>      - Removed small bugs in test; >>      - Moved test to tap.test(name, func). >> >> Changes in v3: >>      - Added more information to change log; >>      - 'src/lua` -> 'lua' for patch name; >>      - Code-style fixes in test. >> >> Branch: >> https://github.com/tarantool/tarantool/tree/artemreyt/gh-4983-tnt-e-assert-false-hangs >> Issue: https://github.com/tarantool/tarantool/issues/4983 >> >>   src/lua/init.c                                |  9 ++ >>   .../gh-4983-tnt-e-assert-false-hangs.test.lua | 92 +++++++++++++++++++ >>   2 files changed, 101 insertions(+) >>   create mode 100755 >> test/app-tap/gh-4983-tnt-e-assert-false-hangs.test.lua >> >> diff --git a/src/lua/init.c b/src/lua/init.c >> index a0b2fc775..92bdb82c0 100644 >> --- a/src/lua/init.c >> +++ b/src/lua/init.c >> @@ -569,6 +569,7 @@ run_script_f(va_list ap) >>        * never really dead. It never returns from its function. >>        */ >>       struct diag *diag = va_arg(ap, struct diag *); >> +    bool aux_loop_is_run = false; >>         /* >>        * Load libraries and execute chunks passed by -l and -e >> @@ -611,6 +612,7 @@ run_script_f(va_list ap) >>        * loop and re-schedule this fiber. >>        */ >>       fiber_sleep(0.0); >> +    aux_loop_is_run = true; >>         if (path && strcmp(path, "-") != 0 && access(path, F_OK) == 0) { >>           /* Execute script. */ >> @@ -650,6 +652,13 @@ run_script_f(va_list ap) >>        * return control back to tarantool_lua_run_script. >>        */ >>   end: >> +    /* >> +     * Auxiliary loop in tarantool_lua_run_script >> +     * should start (ev_run()) before this fiber >> +     * invokes ev_break(). >> +     */ >> +    if (!aux_loop_is_run) >> +        fiber_sleep(0.0); >>       ev_break(loop(), EVBREAK_ALL); >>       return 0; >>   diff --git a/test/app-tap/gh-4983-tnt-e-assert-false-hangs.test.lua >> b/test/app-tap/gh-4983-tnt-e-assert-false-hangs.test.lua >> new file mode 100755 >> index 000000000..378ac229c >> --- /dev/null >> +++ b/test/app-tap/gh-4983-tnt-e-assert-false-hangs.test.lua > > 2. test always failed when it is running with high concurrency: > > ../../test/test-run.py > --builddir=/home/s.bronnikov/work/tarantool/build > --vardir=/home/s.bronnikov/work/tantool/build/test/var \ > > -j100 $(yes app-tap/gh-4983-tnt-e-assert-false-hangs.test.lua | head > -n 1000) > 2. Fixed by increasing timeout on waiting for process, opening log file and reading the last one. (From 2 seconds to 5). >> @@ -0,0 +1,92 @@ >> +#!/usr/bin/env tarantool >> + >> +local fiber = require('fiber') >> +local clock = require('clock') >> +local ffi = require('ffi') >> +local fio = require('fio') >> +local errno = require('errno') >> +local tap = require('tap') >> + >> +-- >> +-- gh-4983: tarantool -e 'assert(false)' hangs >> +-- >> + >> +-- For process_is_alive. >> +ffi.cdef([[ >> +    int kill(pid_t pid, int sig); >> +]]) >> + >> +---  Verify whether a process is alive. >> +local function process_is_alive(pid) >> +    local rc = ffi.C.kill(pid, 0) >> +    return rc == 0 or errno() ~= errno.ESRCH >> +end >> + >> +--- Returns true if process completed before timeout, false otherwise. >> +local function wait_process_completion(pid, timeout) >> +    local start_time = clock.monotonic() >> +    local process_completed = false >> +    while clock.monotonic() - start_time < timeout do >> +        if not process_is_alive(pid) then >> +            process_completed = true >> +            break >> +        end >> +    end >> +    return process_completed >> +end >> + >> +--- Open file on reading with timeout. >> +local function open_with_timeout(filename, timeout) >> +    local fh >> +    local start_time = clock.monotonic() >> +    while not fh and clock.monotonic() - start_time < timeout do >> +        fh = fio.open(filename, {'O_RDONLY'}) >> +    end >> +    return fh >> +end >> + >> +--- Try to read from file with timeout with interval. >> +local function read_with_timeout(fh, timeout, interval) >> +    local data = '' >> +    local start_time = clock.monotonic() >> +    while #data == 0 and clock.monotonic() - start_time < timeout do >> +        data = fh:read() >> +        if #data == 0 then fiber.sleep(interval) end >> +    end >> +    return data >> +end >> + >> +local TARANTOOL_PATH = arg[-1] >> +local output_file = fio.abspath('out.txt') >> +local line = ('%s -e "assert(false)" > %s 2>&1 & echo $!'): >> +                        format(TARANTOOL_PATH, output_file) >> +local process_waiting_timeout = 2.0 >> +local file_read_timeout = 2.0 >> +local file_read_interval = 0.2 >> +local file_open_timeout = 2.0 >> + >> +local res = tap.test('gh-4983-tnt-e-assert-false-hangs', function(test) >> +    test:plan(2) >> + >> +    local pid = tonumber(io.popen(line):read("*line")) > > 3. Why are you using double quotes here if above you are using single > quotes? > > Make using quotes consistent in a whole test. See below too. > 3. Done. >> +    assert(pid, "pid of proccess can't be recieved") >> + >> +    local process_completed = wait_process_completion(pid, >> +            process_waiting_timeout) >> + >> +    test:ok(process_completed, >> +            ('tarantool process with pid = %d completed'):format(pid)) >> + >> +    -- Kill process if hangs. >> +    if not process_completed then ffi.C.kill(pid, 9) end >> + >> +    local fh = open_with_timeout(output_file, file_open_timeout) >> +    assert(fh, 'error while opening ' .. output_file) >> + >> +    local data = read_with_timeout(fh, file_read_timeout, >> file_read_interval) >> +    test:like(data, "assertion failed", "assertion failure is >> displayed") >> + >> +    fh:close() >> +end) >> + >> +os.exit(res and 0 or 1) >> 4. add a newline at the end of file 4. Done.