From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Date: Tue, 18 Jun 2019 19:23:09 +0300 From: Vladimir Davydov Subject: Re: [PATCH v3 6/6] box: introduce Lua persistent functions Message-ID: <20190618162309.agkhc7lp6i4c2wsx@esperanza> References: <0b5ebd5a39cfe1e2d964c1e29ee4c9f09fe1a751.1560433806.git.kshcherbatov@tarantool.org> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <0b5ebd5a39cfe1e2d964c1e29ee4c9f09fe1a751.1560433806.git.kshcherbatov@tarantool.org> To: Kirill Shcherbatov Cc: tarantool-patches@freelists.org List-ID: On Thu, Jun 13, 2019 at 05:08:26PM +0300, Kirill Shcherbatov wrote: > This patch changes a format of the _func system space > to support persistent Lua functions. The format of the > system space is > > [ UINT, UINT, STR, UINT, > STR, STR, BOOL, > , MAP] I guess this part could be dropped as it's covered by the docbot request right below. > > Closes #4182 > Needed for #1260 > > @TarantoolBot document > Title: Persistent Lua functions > > Now Tarantool supports 'persistent' Lua functions. > Such functions are stored in snapshoot and are available after snapshot > restart. > To create a persistent Lua function, specify a function body > in box.schema.func.create call: > e.g. body = "function(a, b) return a + b end" > > A Lua persistent function may be 'sandboxed'. The 'sandboxed' > function is executed in isolated environment: > a. only limited set of Lua functions and modules are available: > -assert -error -pairs -ipairs -next -pcall -xpcall -type > -print -select -string -tonumber -tostring -unpack -math -utf8; > b. global variables are forbidden > > Finally, the new 'is_deterministic' flag allows to mark a > registered function as deterministic, i.e. the function that > can produce only one result for a given list of parameters. > > The new box.schema.func.create interface is: > box.schema.func.create('funcname', , > , , > , , > ) Why is FALSE written in CAPS? :) > diff --git a/src/box/alter.cc b/src/box/alter.cc > index 081786820..5232a56c1 100644 > --- a/src/box/alter.cc > +++ b/src/box/alter.cc > @@ -2569,6 +2587,17 @@ func_def_new_from_tuple(struct tuple *tuple) > /* Lua is the default. */ > def->language = FUNC_LANGUAGE_LUA; > } > + if (field_count > BOX_FUNC_FIELD_BODY) { > + assert(field_count > BOX_FUNC_FIELD_OPTS); One can try to insert a tuple that has 'body', but doesn't have 'opts' field. We shouldn't crash in this case, but rather fail gracefully. So please remove this assertion. > + def->is_deterministic = > + tuple_field_bool_xc(tuple, > + BOX_FUNC_FIELD_IS_DETERMINISTIC); > + def->is_sandboxed = > + tuple_field_bool_xc(tuple, > + BOX_FUNC_FIELD_IS_SANDBOXED); > + } else { > + def->is_deterministic = false; > + } > def_guard.is_active = false; > return def; > } > diff --git a/src/box/func_def.h b/src/box/func_def.h > index 7df6678d8..4886d5440 100644 > --- a/src/box/func_def.h > +++ b/src/box/func_def.h > @@ -58,11 +58,24 @@ struct func_def { > uint32_t fid; > /** Owner of the function. */ > uint32_t uid; > + /** Definition of the persistent function. */ > + char *body; > /** > * True if the function requires change of user id before > * invocation. > */ > bool setuid; > + /** > + * Whether this function is deterministic (can produce > + * only one result for a given list of parameters). > + */ > + bool is_deterministic; > + /** > + * Whether the routine mast be initialized with isolated s/mast/must > + * sandbox where only a limited number if functions is > + * available. > + */ > + bool is_sandboxed; > /** > * The language of the stored function. > */ > diff --git a/src/box/lua/call.c b/src/box/lua/call.c > index 90c28a160..43445373f 100644 > --- a/src/box/lua/call.c > +++ b/src/box/lua/call.c > @@ -573,22 +573,142 @@ box_lua_eval(const char *expr, uint32_t expr_len, > struct func_lua { > /** Function object base class. */ > struct func base; > + /** > + * The reference index of Lua function object. > + * Is equal to LUA_REFNIL when undefined. IMO better say: For a persistent function: a reference to the function body. Otherwise LUA_REFNIL. > + */ > + int lua_ref; > }; > > static struct func_vtab func_lua_vtab; > +static struct func_vtab func_persistent_lua_vtab; > + > +static const char *default_sandbox_exports[] = { > + "assert", "error", "ipairs", "math", "next", "pairs", "pcall", "print", > + "select", "string", "table", "tonumber", "tostring", "type", "unpack", > + "xpcall", "utf8", > +}; > + > +/** > + * Assemble a new sandbox with given exports table on top of the > + * Lua stack. All modules in exports list are copying deeply > + * to ensure the immutablility of this system object. > + */ > +static int > +luaT_prepare_sandbox(struct lua_State *L, const char *exports[], Let's call this function simply 'prepare_lua_sandbox'. > + uint32_t exports_count) Should be named 'export_count'. Also, please use 'int'. > +{ > + int count, rc = -1; > + const char *deepcopy = "table.deepcopy"; > + int luaL_deepcopy_func_ref = LUA_REFNIL; > + if (luaT_func_find(L, deepcopy, deepcopy + strlen(deepcopy), > + &count) != 0) > + goto end; > + luaL_deepcopy_func_ref = luaL_ref(L, LUA_REGISTRYINDEX); > + assert(luaL_deepcopy_func_ref != LUA_REFNIL); > + lua_createtable(L, exports_count, 0); > + for (unsigned i = 0; i < exports_count; i++) { > + int count; > + uint32_t name_len = strlen(exports[i]); > + if (luaT_func_find(L, exports[i], exports[i] + name_len, > + &count) != 0) > + goto end; > + switch (lua_type(L, -1)) { > + case LUA_TTABLE: > + lua_rawgeti(L, LUA_REGISTRYINDEX, > + luaL_deepcopy_func_ref); > + lua_insert(L, -2); > + lua_call(L, 1, LUA_MULTRET); > + FALLTHROUGH; You don't really need to FALLTHROUGH here. Let's please use 'break'. > + case LUA_TFUNCTION: > + break; > + default: > + unreachable(); > + } > + lua_setfield(L, -2, exports[i]); > + } > + rc = 0; > +end: > + luaL_unref(tarantool_L, LUA_REGISTRYINDEX, luaL_deepcopy_func_ref); > + return rc; > +} > + > +static int > +func_persistent_lua_load(struct lua_State *L) A comment would be welcome. > +{ > + struct func_lua *func = (struct func_lua *) lua_topointer(L, 1); > + assert(func->base.def->body != NULL); > + lua_settop(L, 0); > + > + struct region *region = &fiber()->gc; > + size_t region_svp = region_used(region); > + const char *load_pref = "return "; > + uint32_t load_str_sz = > + strlen(load_pref) + strlen(func->base.def->body) + 1; > + char *load_str = region_alloc(region, load_str_sz); > + if (load_str == NULL) { > + diag_set(OutOfMemory, load_str_sz, "region", "load_str"); > + goto error; > + } > + sprintf(load_str, "%s%s", load_pref, func->base.def->body); > + if (luaL_loadstring(L, load_str) != 0 || > + luaT_call(L, 0, 1) != 0 || !lua_isfunction(L, -1)) { > + diag_set(ClientError, ER_LOAD_FUNCTION, func->base.def->name, > + func->base.def->body); Should be an error message here, not a function body. I think we should pass diag_last_error()->errmsg. > + goto error; > + } > + if (func->base.def->is_sandboxed) { > + if (luaT_prepare_sandbox(L, default_sandbox_exports, > + nelem(default_sandbox_exports)) != 0) { > + diag_set(ClientError, ER_LOAD_FUNCTION, > + func->base.def->name, func->base.def->body); Ditto. > + goto error; > + } > + lua_setfenv(L, -2); > + } > + func->lua_ref = luaL_ref(L, LUA_REGISTRYINDEX); > + region_truncate(region, region_svp); > + return lua_gettop(L); > +error: > + region_truncate(region, region_svp); > + return luaT_error(L); > +} > > struct func * > func_lua_new(struct func_def *def) > { > - (void) def; > assert(def->language == FUNC_LANGUAGE_LUA); > + if (def->is_sandboxed && def->body == NULL) { > + diag_set(ClientError, ER_CREATE_FUNCTION, def->name, > + "is_sandboxed option may be set only for persistent " > + "Lua function (when body option is set)"); > + return NULL; > + } > struct func_lua *func = > (struct func_lua *) malloc(sizeof(struct func_lua)); > if (func == NULL) { > diag_set(OutOfMemory, sizeof(*func), "malloc", "func"); > return NULL; > } > - func->base.vtab = &func_lua_vtab; > + if (def->body != NULL) { > + /** Load persistent Lua function. */ > + struct lua_State *L = lua_newthread(tarantool_L); > + int coro_ref = luaL_ref(tarantool_L, LUA_REGISTRYINDEX); > + lua_pushcfunction(L, func_persistent_lua_load); > + func->base.def = def; > + lua_pushlightuserdata(L, func); > + int rc = luaT_call(L, 1, LUA_MULTRET); Is this really necessary? Can't you call func_persistent_lua_load directly? AFAICS it doesn't throw any exceptions. > + luaL_unref(tarantool_L, LUA_REGISTRYINDEX, coro_ref); > + if (rc != 0) { > + free(func); > + return NULL; > + } > + assert(func->lua_ref != LUA_REFNIL); > + func->base.vtab = &func_persistent_lua_vtab; > + } else { > + func->lua_ref = LUA_REFNIL; > + func->base.vtab = &func_lua_vtab; > + } > return &func->base; > } > > @@ -614,6 +734,53 @@ static struct func_vtab func_lua_vtab = { > .destroy = func_lua_unload, > }; > > +static void > +func_persistent_lua_unload(struct func *base) func_persistent_lua_load takes lua_State while this function takes struct func. Looks inconsistent. > +{ > + assert(base != NULL && base->def->language == FUNC_LANGUAGE_LUA && > + base->def->body != NULL); > + assert(base->vtab == &func_persistent_lua_vtab); > + struct func_lua *func = (struct func_lua *) base; > + luaL_unref(tarantool_L, LUA_REGISTRYINDEX, func->lua_ref); > + func->lua_ref = LUA_REFNIL; > +} > + > +static int > +func_persistent_lua_call_cb(lua_State *L) I'd try to make this function more generic: name it execute_lua_call_by_ref and pass lua_ref to it instead of func_lua. We could probably reuse the same context we use for executing CALL/EVAL (lua_exec_ctx). > +{ > + struct port *in_port = (struct port *) lua_topointer(L, 1); > + struct func_lua *func = (struct func_lua *) lua_topointer(L, 2); > + lua_settop(L, 0); /* clear the stack to simplify the logic below */ > + > + assert(func->lua_ref != LUA_REFNIL); > + lua_rawgeti(L, LUA_REGISTRYINDEX, func->lua_ref); > + > + /* Push the rest of args (a tuple). */ > + int top = lua_gettop(L); > + port_dump_lua(in_port, L, true); > + int arg_count = lua_gettop(L) - top; > + > + lua_call(L, arg_count, LUA_MULTRET); > + return lua_gettop(L); > +} > + > +static inline int > +func_persistent_lua_call(struct func *base, struct port *in_port, > + struct port *out_port) > +{ > + assert(base != NULL && base->def->language == FUNC_LANGUAGE_LUA && > + base->def->body != NULL); > + assert(base->vtab == &func_persistent_lua_vtab); > + return box_process_lua(func_persistent_lua_call_cb, base, > + in_port, out_port); > + > +} > + > +static struct func_vtab func_persistent_lua_vtab = { > + .call = func_persistent_lua_call, > + .destroy = func_persistent_lua_unload, > +}; > + > static int > lbox_module_reload(lua_State *L) > { > diff --git a/src/box/lua/schema.lua b/src/box/lua/schema.lua > index 9c3ee063c..9d8df54dc 100644 > --- a/src/box/lua/schema.lua > +++ b/src/box/lua/schema.lua > @@ -820,10 +821,32 @@ local function create_vcollation_space() > box.space[box.schema.VCOLLATION_ID]:format(format) > end > > +local function upgrade_func_to_2_2_1() > + log.info("Update _func format") > + local _func = box.space[box.schema.FUNC_ID] > + local format = {} > + format[1] = {name='id', type='unsigned'} > + format[2] = {name='owner', type='unsigned'} > + format[3] = {name='name', type='string'} > + format[4] = {name='setuid', type='unsigned'} > + format[5] = {name='language', type='string'} > + format[6] = {name='body', type='string'} > + format[7] = {name='is_deterministic', type='boolean'} > + format[8] = {name='is_sandboxed', type='boolean'} > + format[9] = {name='opts', type='map'} > + for _, v in box.space._func:pairs() do > + _ = box.space._func:replace({v.id, v.owner, v.name, v.setuid, > + v[5] or 'LUA', '', false, false, Why not v.language? > + setmap({})}) > + end > + _func:format(format) > +end > + > local function upgrade_to_2_2_1() > upgrade_sequence_to_2_2_1() > upgrade_ck_constraint_to_2_2_1() > create_vcollation_space() > + upgrade_func_to_2_2_1() > end > > -------------------------------------------------------------------------------- > diff --git a/test/box/function1.result b/test/box/function1.result > index 331bd466a..172f56082 100644 > --- a/test/box/function1.result > +++ b/test/box/function1.result > @@ -504,3 +510,120 @@ box.schema.func.drop('secret_leak') > box.schema.func.drop('secret') > --- > ... > +-- > +-- gh-4182: Introduce persistent Lua functions. > +-- > +test_run:cmd("setopt delimiter ';'") > +--- > +- true > +... > +body = [[function(tuple) > + if type(tuple.address) ~= 'string' then > + return nil, 'Invalid field type' > + end > + local t = tuple.address:upper():split() > + for k,v in pairs(t) do t[k] = v end > + return t > + end > +]] > +test_run:cmd("setopt delimiter ''"); > +--- > +... > +box.schema.func.create('addrsplit', {body = body, language = "C"}) > +--- > +- error: 'Failed to create function ''addrsplit'': body and is_sandboxed options are > + not compatible with C language' > +... > +box.schema.func.create('addrsplit', {is_sandboxed = true, language = "C"}) > +--- > +- error: 'Failed to create function ''addrsplit'': body and is_sandboxed options are > + not compatible with C language' > +... > +box.schema.func.create('addrsplit', {is_sandboxed = true}) > +--- > +- error: 'Failed to create function ''addrsplit'': is_sandboxed option may be set > + only for persistent Lua function (when body option is set)' > +... > +box.schema.func.create('invalid', {body = "function(tuple) ret tuple"}) > +--- > +- error: 'Failed to dynamically load function ''invalid'': function(tuple) ret tuple' > +... Please also check what happens when body is loaded okay, but happens to be not a function.