Hi, Sergey, On 19.02.2025 12:34, Sergey Bronnikov via Tarantool-patches wrote: > > Hi, Sergey, > > thanks for review! > > On 18.02.2025 18:43, Sergey Kaplun via Tarantool-patches wrote: >> Hi, Sergey! >> Thanks for the fixes! >> After refactoring the code become more readable, thanks! >> Now I have a few ideas, see my comments below. >> >> On 13.02.25, Sergey Bronnikov wrote: >>> sysprof has a number of options and with any incorrect option it >>> returns `false` and error message "profiler misuse". This message >>> discourage sysprof users and make using sysprof more complicated. >> Typo: s/discourage/discourages/ >> Typo: s/make/makes/ > Fixed. >>> The patch sets default profiling mode ("D", that shows only >> Typo: s/default/the default/ > Fixed. >>> virtual machine state counters) if it was not passed and adds >>> details to the error message with possible reasons of misuse. >>> --- >>> src/lib_misc.c | 80 +++++++++++++------ >>> src/lj_errmsg.h | 5 ++ >>> .../profilers/misclib-sysprof-lapi.test.lua | 48 +++++++++-- >>> 3 files changed, 100 insertions(+), 33 deletions(-) >>> >>> diff --git a/src/lib_misc.c b/src/lib_misc.c >>> index 5b7a4b62..d71904e4 100644 >>> --- a/src/lib_misc.c >>> +++ b/src/lib_misc.c >>> @@ -163,6 +163,7 @@ static int on_stop_cb_default(void *opt, uint8_t *buf) >>> >>> /* The default profiling interval equals to 10 ms. */ >>> #define SYSPROF_DEFAULT_INTERVAL 10 >>> +#define SYSPROF_DEFAULT_MODE "D" >>> #define SYSPROF_DEFAULT_OUTPUT "sysprof.bin" >>> >>> static int set_output_path(const char *path, struct luam_Sysprof_Options *opt) { >>> @@ -177,21 +178,41 @@ static int set_output_path(const char *path, struct luam_Sysprof_Options *opt) { >>> return PROFILE_SUCCESS; >>> } >>> >>> -static int parse_sysprof_opts(lua_State *L, struct luam_Sysprof_Options *opt, int idx) { >>> - GCtab *options = lj_lib_checktab(L, idx); >>> +static int parse_sysprof_opts(lua_State *L, struct luam_Sysprof_Options *opt, >>> + const char **err_details) { >>> + int n = (int)(L->top - L->base); >>> + if (n != 1) { >> I suppose this should be `n == 0`. Otherwise we will observe the >> following behaviour: >> | src/luajit -e 'print(misc.sysprof.start(1, 2, 3))' >> | true > Right, fixed. >>> + opt->mode = LUAM_SYSPROF_DEFAULT; >>> + opt->interval = SYSPROF_DEFAULT_INTERVAL; >>> + goto set_path; >> I suppose it is better to set path explicitly here and goto ctx_allocate; > > The code that make a jump to the middle of basic block smells bad. > > It makes control flow more complicated, and benefits are not obvious. > Verbally decided to fix as the following: --- a/src/lib_misc.c +++ b/src/lib_misc.c @@ -184,7 +184,7 @@ static int parse_sysprof_opts(lua_State *L, struct luam_Sysprof_Options *opt,    if (n == 0) {      opt->mode = LUAM_SYSPROF_DEFAULT;      opt->interval = SYSPROF_DEFAULT_INTERVAL; -    goto set_path; +    return PROFILE_SUCCESS;    }    if (!lua_istable(L, 1)) { @@ -244,8 +244,6 @@ static int parse_sysprof_opts(lua_State *L, struct luam_Sysprof_Options *opt,      }    } -set_path: -    /* Get output path. */    if (opt->mode != LUAM_SYSPROF_DEFAULT)    { >>> + } >>> + >>> + if (!lua_istable(L, 1)) { >> The more I think about it, the more it looks like a bug -- all library >> functions raise an error when they get a bad parameter type. >> >> Maybe we should just use `lj_lib_checktab()` here? This will help the >> user to find an error and be consistent with memprof behaviour. So, >> bugfix isn't breaking change, since something is working incorrectly. >> Plus, as a bonus we don't need to introduce the new error with the same >> meaning we have already. >> >> OTOH, if you are against it, we may leave it as is, but check it via >> `tvistab()` instead. >> >> What do you think? >> >> And the same should be done with the parameters in table content -- >> their type should be checked at once and the corresponding error should >> be raised. I suppose we can introduce a local (inside this translation >> unit) helper `key_opt_type()` here -- we will check a type for the keys >> `path`, `mode`, `interval` and raise the user-friendly error like: > > What for? The reason of this patch series is bad error handling in > profilers. > > The value behind this refactoring is not obvious. Please elaborate. > > This code was imperfect for about 2-3 years and this was ok for everyone > > who added it initially. I don't want to fix all bad places here, just > improve error messages visible by end users. > Verbally discussed making as the following: --- a/src/lib_misc.c +++ b/src/lib_misc.c @@ -188,11 +188,6 @@ static int parse_sysprof_opts(lua_State *L, struct luam_Sysprof_Options *opt,      return PROFILE_SUCCESS;    } -  if (!lua_istable(L, 1)) { -    *err_details = err2msg(LJ_ERR_PROF_DETAILS_BADTABLE); -    return PROFILE_ERRUSE; -  } -    GCtab *options = lj_lib_checktab(L, 1);    /* Get profiling mode. */ >> `bad key 'mode' in 'start' argument (string expected, got table)`. >> See how it is done in `lj_lib_check*()` for inspiration. >> >>> + *err_details = err2msg(LJ_ERR_PROF_DETAILS_BADTABLE); >>> + return PROFILE_ERRUSE; >>> + } >>> + >>> + GCtab *options = lj_lib_checktab(L, 1); >> This check is excess, let's move it above, as I suggested. > > it is not a check, it's a function that retrieves a table from the stack. > >>> >>> /* Get profiling mode. */ >>> { >>> const char *mode = NULL; >>> >>> cTValue *mode_opt = lj_tab_getstr(options, lj_str_newlit(L, "mode")); >>> - if (!mode_opt || !tvisstr(mode_opt)) { >>> - return PROFILE_ERRUSE; >>> + if (mode_opt) { >>> + if (!tvisstr(mode_opt)) { >>> + *err_details = err2msg(LJ_ERR_PROF_DETAILS_BADMODE); >>> + return PROFILE_ERRUSE; >>> + } >>> + mode = strVdata(mode_opt); >>> + if (strlen(mode) > 0 && mode[1] != '\0') { >> Minor: It's better to use `(strlen(mode) == 0) || mode[1] != '\0'` -- we >> don't got further in code below, just early return here. > > Fixed. > > >>> + *err_details = err2msg(LJ_ERR_PROF_DETAILS_BADMODE); >>> + return PROFILE_ERRUSE; >>> + } >>> } >>> >> >> >>> +set_path: >>> + >>> /* Get output path. */ >>> if (opt->mode != LUAM_SYSPROF_DEFAULT) >>> { >>> @@ -230,8 +256,10 @@ static int parse_sysprof_opts(lua_State *L, struct luam_Sysprof_Options *opt, in >>> cTValue *pathtv = lj_tab_getstr(options, lj_str_newlit(L, "path")); >>> if (!pathtv) >>> path = SYSPROF_DEFAULT_OUTPUT; >>> - else if (!tvisstr(pathtv)) >>> + else if (!tvisstr(pathtv)) { >> It is better to wrap if and else branches too like the following: >> | if () { >> | } else if () { >> | } else { >> | } > > It's funny, on the previous review brackets were excess [1]. > > 1. > https://lists.tarantool.org/tarantool-patches/Z6XlUsKqu92__8fL@root/T/#t > > Brackets are not needed in the first and third blocks, if we're > following a rule "brackets for blocks with more than one statements". >>> + *err_details = err2msg(LJ_ERR_PROF_DETAILS_BADPATH); >>> return PROFILE_ERRUSE; >>> + } >>> else >>> path = strVdata(pathtv); >>> >>> @@ -251,29 +279,28 @@ static int parse_sysprof_opts(lua_State *L, struct luam_Sysprof_Options *opt, in >>> return PROFILE_SUCCESS; >>> } >>> >>> -static int parse_options(lua_State *L, struct luam_Sysprof_Options *opt) >>> -{ >>> - if (lua_gettop(L) != 1) >>> - return PROFILE_ERRUSE; >>> - >>> - if (!lua_istable(L, 1)) >>> - return PROFILE_ERRUSE; >>> - >>> - return parse_sysprof_opts(L, opt, 1); >>> -} >>> - >>> -static int sysprof_error(lua_State *L, int status) >>> +static int sysprof_error(lua_State *L, int status, const char *err_details) >>> { >>> switch (status) { >>> case PROFILE_ERRUSE: >>> lua_pushnil(L); >>> lua_pushstring(L, err2msg(LJ_ERR_PROF_MISUSE)); >>> + if (err_details) { >>> + lua_pushstring(L, ": "); >> I suppose we may use string formatting here instead of concatenation Lua >> C API call: >> See usage of `lj_strfmt_pushf()` for the details. > > Sure, we may, but what for? > > Previous functions are a part of Lua C API (lua_pushnil, lua_pushstring), > > why do we need to mix Lua C functions with this LJ-specific function? > Verbally decided to do as the following: @@ -283,22 +282,20 @@ static int sysprof_error(lua_State *L, int status, const char *err_details)    switch (status) {      case PROFILE_ERRUSE:        lua_pushnil(L); -      lua_pushstring(L, err2msg(LJ_ERR_PROF_MISUSE));        if (err_details) { -        lua_pushstring(L, ": "); -        lua_pushstring(L, err_details); -        lua_concat(L, 3); +        lj_strfmt_pushf(L, "%s: %s", err2msg(LJ_ERR_PROF_MISUSE), err_details); +      } else { +        lua_pushstring(L, err2msg(LJ_ERR_PROF_MISUSE));        }        lua_pushinteger(L, EINVAL);        return 3;  #if LJ_HASSYSPROF      case PROFILE_ERRRUN:        lua_pushnil(L); -      lua_pushstring(L, err2msg(LJ_ERR_PROF_ISRUNNING));        if (err_details) { -        lua_pushstring(L, ": "); -        lua_pushstring(L, err_details); -        lua_concat(L, 3); +        lj_strfmt_pushf(L, "%s: %s", err2msg(LJ_ERR_PROF_MISUSE), err_details); +      } else { +        lua_pushstring(L, err2msg(LJ_ERR_PROF_ISRUNNING));        }        lua_pushinteger(L, EINVAL);        return 3; >> Also, we may use nice helper to avoid code copy-pasting. >> >>> + lua_pushstring(L, err_details); >>> + lua_concat(L, 3); >>> + } >>> lua_pushinteger(L, EINVAL); >>> return 3; >>> #if LJ_HASSYSPROF >>> case PROFILE_ERRRUN: >>> lua_pushnil(L); >>> lua_pushstring(L, err2msg(LJ_ERR_PROF_ISRUNNING)); >>> + if (err_details) { >>> + lua_pushstring(L, ": "); >>> + lua_pushstring(L, err_details); >>> + lua_concat(L, 3); >>> + } >>> lua_pushinteger(L, EINVAL); >>> return 3; >>> case PROFILE_ERRIO: >>> @@ -291,15 +318,16 @@ LJLIB_CF(misc_sysprof_start) >> >> >>> diff --git a/src/lj_errmsg.h b/src/lj_errmsg.h >>> index 19c41f0b..b5c3a275 100644 >>> --- a/src/lj_errmsg.h >>> +++ b/src/lj_errmsg.h >>> @@ -188,6 +188,11 @@ ERRDEF(PROF_ISRUNNING, "profiler is running already") >>> ERRDEF(PROF_NOTRUNNING, "profiler is not running") >>> #endif >>> >>> +ERRDEF(PROF_DETAILS_BADMODE, "profiler mode must be 'D', 'L' or 'C'") >>> +ERRDEF(PROF_DETAILS_BADINTERVAL, "profiler interval must be greater than 1") >>> +ERRDEF(PROF_DETAILS_BADPATH, "profiler path does not exist") >>> +ERRDEF(PROF_DETAILS_BADTABLE, "profiler expects a table with parameters") >>> + >> These changes should be under the LJ_HASSYSPROF ifdef. >> Also, I suggest the following naming: >> | SYSPROF_BADMODE >> | SYSPROF_BADINTERVAL >> With this it will be obviour that they are sysprof-specific and not too >> long. > > Imagine you then introduce a table and interval number for memprof. > > Will you rename constants back? I dislike your suggestion. > >>> #undef ERRDEF >>> >>> /* Detecting unused error messages: >>> diff --git a/test/tarantool-tests/profilers/misclib-sysprof-lapi.test.lua b/test/tarantool-tests/profilers/misclib-sysprof-lapi.test.lua >>> index 32fa384c..7622323a 100644 >>> --- a/test/tarantool-tests/profilers/misclib-sysprof-lapi.test.lua >>> +++ b/test/tarantool-tests/profilers/misclib-sysprof-lapi.test.lua >>> @@ -10,7 +10,7 @@ local test = tap.test("misclib-sysprof-lapi"):skipcond({ >>> ["Disabled due to #10803"] = os.getenv("LUAJIT_TEST_USE_VALGRIND"), >>> }) >>> >>> -test:plan(19) >>> +test:plan(33) >>> >>> jit.off() >>> -- XXX: Run JIT tuning functions in a safe frame to avoid errors >>> @@ -65,10 +65,25 @@ end >>> >>> -- Wrong profiling mode. >>> local res, err, errno = misc.sysprof.start{ mode = "A" } >>> -test:ok(res == nil anderr:match("profiler misuse"), >>> - "result status with wrong profiling mode") >>> +test:ok(res == nil, "result status with wrong profiling mode") >>> +test:ok(err:match("profiler mode must be 'D', 'L' or 'C'"), >>> + "error with wrong profiling mode") >> I would rather still check matching with `profiler misuse:` error (as a >> separate testcase). Here and below. But it's up to you. Feel free to >> ignore. > I think that checking specific part of error message is enough. >>> test:ok(type(errno) == "number", "errno with wrong profiling mode") >>> >>> +-- Missed profiling mode. >>> +res, err, errno = misc.sysprof.start{} >>> +test:is(res, true, "res with missed profiling mode") >>> +test:is(err, nil, "no error with missed profiling mode") >>> +test:is(errno, nil, "no errno with missed profiling mode") >>> +assert(misc.sysprof.stop()) >>> + >>> +-- Not a table. >> This should be changed to the pcall() after the changes I suggested. >> >>> +res, err, errno = misc.sysprof.start("NOT A TABLE") >>> +test:ok(res == nil, "res with not a table") >>> +test:ok(err:match("profiler expects a table with parameters"), >>> + "error with not a table") >>> +test:ok(type(errno) == "number", "errno with not a table") >> Please also check the case with multiple arguments (2 cases: all >> invalid, and only first valid). > > Added: > > 254: ok - res with all invalid parameters > 254: ok - error with all invalid parameters > 254: ok - errno with all invalid parameters > 254: ok - res with all invalid parameters except the first one > 254: ok - error with all invalid parameters except the first one > 254: ok - errno with all invalid parameters except the first one > > >>> + >>> -- Already running. >>> res, err = misc.sysprof.start{ mode = "D" } >>> assert(res, err) >>> @@ -92,11 +107,30 @@test:ok(res == nil anderr:match("No such file or directory"), >>> "result status and error with bad path") >>> test:ok(type(errno) == "number", "errno with bad path") >>> >>> --- Bad interval. >>> +-- Bad interval (-1). >>> res, err, errno = misc.sysprof.start{ mode = "C", interval = -1 } >>> -test:ok(res == nil anderr:match("profiler misuse"), >>> - "result status and error with bad interval") >>> -test:ok(type(errno) == "number", "errno with bad interval") >>> +test:is(res, nil, "result status and error with bad interval -1") >>> +test:ok(err:match("profiler interval must be greater than 1"), >>> + "error with bad interval -1") >>> +test:ok(type(errno) == "number", "errno with bad interval -1") >>> + >>> +-- Bad interval (0). >>> +res, err, errno = misc.sysprof.start{ mode = "C", interval = 0 } >>> +test:ok(res == nil, "res with bad interval 0") >>> +test:ok(err:match("profiler interval must be greater than 1"), >>> + "error with bad interval 0") >>> +test:ok(type(errno) == "number", "errno with bad interval 0") >>> + >>> +-- Good interval (1). >>> +res, err, errno = misc.sysprof.start{ >> Minor: Please use brackets for functions call. > > Case without brackets is more popular: > >   local res, err = misc.sysprof.start(opts) >   res,err = misc.sysprof.stop() > local res, err, errno = misc.sysprof.start{ mode = "A" } > res, err, errno = misc.sysprof.start{} > assert(misc.sysprof.stop()) > res, err, errno = misc.sysprof.start("NOT A TABLE") > res, err = misc.sysprof.start{ mode = "D" } > res, err, errno = misc.sysprof.start{ mode = "D" } > res, err = misc.sysprof.stop() > res, err, errno = misc.sysprof.stop() > res, err, errno = misc.sysprof.start({ mode = "C", path = BAD_PATH }) > res, err, errno = misc.sysprof.start{ mode = "C", interval = -1 } > res, err, errno = misc.sysprof.start{ mode = "C", interval = 0 } > res, err, errno = misc.sysprof.start{ > misc.sysprof.stop() > local report = misc.sysprof.report() > report = misc.sysprof.report() > > proposed change will make coding style inconsistent in the file > >>> + mode = "C", >>> + interval = 1, >>> + path = "/dev/null", >>> +} >>> +test:is(res, true, "res with good interval 1") >>> +test:is(err, nil, "no error with good interval 1") >>> +test:is(errno, nil, "no errno with good interval 1") >>> +misc.sysprof.stop() >>> >>> -- DEFAULT MODE >>> >>> -- >>> 2.34.1 >>>