[PATCH v3 6/6] box: introduce Lua persistent functions
Vladimir Davydov
vdavydov.dev at gmail.com
Tue Jun 18 19:23:09 MSK 2019
On Thu, Jun 13, 2019 at 05:08:26PM +0300, Kirill Shcherbatov wrote:
> This patch changes a format of the _func system space
> to support persistent Lua functions. The format of the
> system space is
>
> [<id> UINT, <owner> UINT, <name> STR, <setuid> UINT,
> <language> STR, <body> STR, <is_deterministic> BOOL,
> <is_sandboxed>, <opts> MAP]
I guess this part could be dropped as it's covered by the docbot request
right below.
>
> Closes #4182
> Needed for #1260
>
> @TarantoolBot document
> Title: Persistent Lua functions
>
> Now Tarantool supports 'persistent' Lua functions.
> Such functions are stored in snapshoot and are available after
snapshot
> restart.
> To create a persistent Lua function, specify a function body
> in box.schema.func.create call:
> e.g. body = "function(a, b) return a + b end"
>
> A Lua persistent function may be 'sandboxed'. The 'sandboxed'
> function is executed in isolated environment:
> a. only limited set of Lua functions and modules are available:
> -assert -error -pairs -ipairs -next -pcall -xpcall -type
> -print -select -string -tonumber -tostring -unpack -math -utf8;
> b. global variables are forbidden
>
> Finally, the new 'is_deterministic' flag allows to mark a
> registered function as deterministic, i.e. the function that
> can produce only one result for a given list of parameters.
>
> The new box.schema.func.create interface is:
> box.schema.func.create('funcname', <setuid = true|FALSE>,
> <if_not_exists = true|FALSE>, <language = LUA|c>,
> <body = string ('')>, <is_deterministic = true|FALSE>,
> <is_sandboxed = true|FALSE>)
Why is FALSE written in CAPS? :)
> diff --git a/src/box/alter.cc b/src/box/alter.cc
> index 081786820..5232a56c1 100644
> --- a/src/box/alter.cc
> +++ b/src/box/alter.cc
> @@ -2569,6 +2587,17 @@ func_def_new_from_tuple(struct tuple *tuple)
> /* Lua is the default. */
> def->language = FUNC_LANGUAGE_LUA;
> }
> + if (field_count > BOX_FUNC_FIELD_BODY) {
> + assert(field_count > BOX_FUNC_FIELD_OPTS);
One can try to insert a tuple that has 'body', but doesn't have 'opts'
field. We shouldn't crash in this case, but rather fail gracefully. So
please remove this assertion.
> + def->is_deterministic =
> + tuple_field_bool_xc(tuple,
> + BOX_FUNC_FIELD_IS_DETERMINISTIC);
> + def->is_sandboxed =
> + tuple_field_bool_xc(tuple,
> + BOX_FUNC_FIELD_IS_SANDBOXED);
> + } else {
> + def->is_deterministic = false;
> + }
> def_guard.is_active = false;
> return def;
> }
> diff --git a/src/box/func_def.h b/src/box/func_def.h
> index 7df6678d8..4886d5440 100644
> --- a/src/box/func_def.h
> +++ b/src/box/func_def.h
> @@ -58,11 +58,24 @@ struct func_def {
> uint32_t fid;
> /** Owner of the function. */
> uint32_t uid;
> + /** Definition of the persistent function. */
> + char *body;
> /**
> * True if the function requires change of user id before
> * invocation.
> */
> bool setuid;
> + /**
> + * Whether this function is deterministic (can produce
> + * only one result for a given list of parameters).
> + */
> + bool is_deterministic;
> + /**
> + * Whether the routine mast be initialized with isolated
s/mast/must
> + * sandbox where only a limited number if functions is
> + * available.
> + */
> + bool is_sandboxed;
> /**
> * The language of the stored function.
> */
> diff --git a/src/box/lua/call.c b/src/box/lua/call.c
> index 90c28a160..43445373f 100644
> --- a/src/box/lua/call.c
> +++ b/src/box/lua/call.c
> @@ -573,22 +573,142 @@ box_lua_eval(const char *expr, uint32_t expr_len,
> struct func_lua {
> /** Function object base class. */
> struct func base;
> + /**
> + * The reference index of Lua function object.
> + * Is equal to LUA_REFNIL when undefined.
IMO better say:
For a persistent function: a reference to the function body.
Otherwise LUA_REFNIL.
> + */
> + int lua_ref;
> };
>
> static struct func_vtab func_lua_vtab;
> +static struct func_vtab func_persistent_lua_vtab;
> +
> +static const char *default_sandbox_exports[] = {
> + "assert", "error", "ipairs", "math", "next", "pairs", "pcall", "print",
> + "select", "string", "table", "tonumber", "tostring", "type", "unpack",
> + "xpcall", "utf8",
> +};
> +
> +/**
> + * Assemble a new sandbox with given exports table on top of the
> + * Lua stack. All modules in exports list are copying deeply
> + * to ensure the immutablility of this system object.
> + */
> +static int
> +luaT_prepare_sandbox(struct lua_State *L, const char *exports[],
Let's call this function simply 'prepare_lua_sandbox'.
> + uint32_t exports_count)
Should be named 'export_count'. Also, please use 'int'.
> +{
> + int count, rc = -1;
> + const char *deepcopy = "table.deepcopy";
> + int luaL_deepcopy_func_ref = LUA_REFNIL;
> + if (luaT_func_find(L, deepcopy, deepcopy + strlen(deepcopy),
> + &count) != 0)
> + goto end;
> + luaL_deepcopy_func_ref = luaL_ref(L, LUA_REGISTRYINDEX);
> + assert(luaL_deepcopy_func_ref != LUA_REFNIL);
> + lua_createtable(L, exports_count, 0);
> + for (unsigned i = 0; i < exports_count; i++) {
> + int count;
> + uint32_t name_len = strlen(exports[i]);
> + if (luaT_func_find(L, exports[i], exports[i] + name_len,
> + &count) != 0)
> + goto end;
> + switch (lua_type(L, -1)) {
> + case LUA_TTABLE:
> + lua_rawgeti(L, LUA_REGISTRYINDEX,
> + luaL_deepcopy_func_ref);
> + lua_insert(L, -2);
> + lua_call(L, 1, LUA_MULTRET);
> + FALLTHROUGH;
You don't really need to FALLTHROUGH here. Let's please use 'break'.
> + case LUA_TFUNCTION:
> + break;
> + default:
> + unreachable();
> + }
> + lua_setfield(L, -2, exports[i]);
> + }
> + rc = 0;
> +end:
> + luaL_unref(tarantool_L, LUA_REGISTRYINDEX, luaL_deepcopy_func_ref);
> + return rc;
> +}
> +
> +static int
> +func_persistent_lua_load(struct lua_State *L)
A comment would be welcome.
> +{
> + struct func_lua *func = (struct func_lua *) lua_topointer(L, 1);
> + assert(func->base.def->body != NULL);
> + lua_settop(L, 0);
> +
> + struct region *region = &fiber()->gc;
> + size_t region_svp = region_used(region);
> + const char *load_pref = "return ";
> + uint32_t load_str_sz =
> + strlen(load_pref) + strlen(func->base.def->body) + 1;
> + char *load_str = region_alloc(region, load_str_sz);
> + if (load_str == NULL) {
> + diag_set(OutOfMemory, load_str_sz, "region", "load_str");
> + goto error;
> + }
> + sprintf(load_str, "%s%s", load_pref, func->base.def->body);
> + if (luaL_loadstring(L, load_str) != 0 ||
> + luaT_call(L, 0, 1) != 0 || !lua_isfunction(L, -1)) {
> + diag_set(ClientError, ER_LOAD_FUNCTION, func->base.def->name,
> + func->base.def->body);
Should be an error message here, not a function body. I think we should
pass diag_last_error()->errmsg.
> + goto error;
> + }
> + if (func->base.def->is_sandboxed) {
> + if (luaT_prepare_sandbox(L, default_sandbox_exports,
> + nelem(default_sandbox_exports)) != 0) {
> + diag_set(ClientError, ER_LOAD_FUNCTION,
> + func->base.def->name, func->base.def->body);
Ditto.
> + goto error;
> + }
> + lua_setfenv(L, -2);
> + }
> + func->lua_ref = luaL_ref(L, LUA_REGISTRYINDEX);
> + region_truncate(region, region_svp);
> + return lua_gettop(L);
> +error:
> + region_truncate(region, region_svp);
> + return luaT_error(L);
> +}
>
> struct func *
> func_lua_new(struct func_def *def)
> {
> - (void) def;
> assert(def->language == FUNC_LANGUAGE_LUA);
> + if (def->is_sandboxed && def->body == NULL) {
> + diag_set(ClientError, ER_CREATE_FUNCTION, def->name,
> + "is_sandboxed option may be set only for persistent "
> + "Lua function (when body option is set)");
> + return NULL;
> + }
> struct func_lua *func =
> (struct func_lua *) malloc(sizeof(struct func_lua));
> if (func == NULL) {
> diag_set(OutOfMemory, sizeof(*func), "malloc", "func");
> return NULL;
> }
> - func->base.vtab = &func_lua_vtab;
> + if (def->body != NULL) {
> + /** Load persistent Lua function. */
> + struct lua_State *L = lua_newthread(tarantool_L);
> + int coro_ref = luaL_ref(tarantool_L, LUA_REGISTRYINDEX);
> + lua_pushcfunction(L, func_persistent_lua_load);
> + func->base.def = def;
> + lua_pushlightuserdata(L, func);
> + int rc = luaT_call(L, 1, LUA_MULTRET);
Is this really necessary? Can't you call func_persistent_lua_load
directly? AFAICS it doesn't throw any exceptions.
> + luaL_unref(tarantool_L, LUA_REGISTRYINDEX, coro_ref);
> + if (rc != 0) {
> + free(func);
> + return NULL;
> + }
> + assert(func->lua_ref != LUA_REFNIL);
> + func->base.vtab = &func_persistent_lua_vtab;
> + } else {
> + func->lua_ref = LUA_REFNIL;
> + func->base.vtab = &func_lua_vtab;
> + }
> return &func->base;
> }
>
> @@ -614,6 +734,53 @@ static struct func_vtab func_lua_vtab = {
> .destroy = func_lua_unload,
> };
>
> +static void
> +func_persistent_lua_unload(struct func *base)
func_persistent_lua_load takes lua_State while this function takes
struct func. Looks inconsistent.
> +{
> + assert(base != NULL && base->def->language == FUNC_LANGUAGE_LUA &&
> + base->def->body != NULL);
> + assert(base->vtab == &func_persistent_lua_vtab);
> + struct func_lua *func = (struct func_lua *) base;
> + luaL_unref(tarantool_L, LUA_REGISTRYINDEX, func->lua_ref);
> + func->lua_ref = LUA_REFNIL;
> +}
> +
> +static int
> +func_persistent_lua_call_cb(lua_State *L)
I'd try to make this function more generic: name it
execute_lua_call_by_ref and pass lua_ref to it instead of
func_lua. We could probably reuse the same context we use
for executing CALL/EVAL (lua_exec_ctx).
> +{
> + struct port *in_port = (struct port *) lua_topointer(L, 1);
> + struct func_lua *func = (struct func_lua *) lua_topointer(L, 2);
> + lua_settop(L, 0); /* clear the stack to simplify the logic below */
> +
> + assert(func->lua_ref != LUA_REFNIL);
> + lua_rawgeti(L, LUA_REGISTRYINDEX, func->lua_ref);
> +
> + /* Push the rest of args (a tuple). */
> + int top = lua_gettop(L);
> + port_dump_lua(in_port, L, true);
> + int arg_count = lua_gettop(L) - top;
> +
> + lua_call(L, arg_count, LUA_MULTRET);
> + return lua_gettop(L);
> +}
> +
> +static inline int
> +func_persistent_lua_call(struct func *base, struct port *in_port,
> + struct port *out_port)
> +{
> + assert(base != NULL && base->def->language == FUNC_LANGUAGE_LUA &&
> + base->def->body != NULL);
> + assert(base->vtab == &func_persistent_lua_vtab);
> + return box_process_lua(func_persistent_lua_call_cb, base,
> + in_port, out_port);
> +
> +}
> +
> +static struct func_vtab func_persistent_lua_vtab = {
> + .call = func_persistent_lua_call,
> + .destroy = func_persistent_lua_unload,
> +};
> +
> static int
> lbox_module_reload(lua_State *L)
> {
> diff --git a/src/box/lua/schema.lua b/src/box/lua/schema.lua
> index 9c3ee063c..9d8df54dc 100644
> --- a/src/box/lua/schema.lua
> +++ b/src/box/lua/schema.lua
> @@ -820,10 +821,32 @@ local function create_vcollation_space()
> box.space[box.schema.VCOLLATION_ID]:format(format)
> end
>
> +local function upgrade_func_to_2_2_1()
> + log.info("Update _func format")
> + local _func = box.space[box.schema.FUNC_ID]
> + local format = {}
> + format[1] = {name='id', type='unsigned'}
> + format[2] = {name='owner', type='unsigned'}
> + format[3] = {name='name', type='string'}
> + format[4] = {name='setuid', type='unsigned'}
> + format[5] = {name='language', type='string'}
> + format[6] = {name='body', type='string'}
> + format[7] = {name='is_deterministic', type='boolean'}
> + format[8] = {name='is_sandboxed', type='boolean'}
> + format[9] = {name='opts', type='map'}
> + for _, v in box.space._func:pairs() do
> + _ = box.space._func:replace({v.id, v.owner, v.name, v.setuid,
> + v[5] or 'LUA', '', false, false,
Why not v.language?
> + setmap({})})
> + end
> + _func:format(format)
> +end
> +
> local function upgrade_to_2_2_1()
> upgrade_sequence_to_2_2_1()
> upgrade_ck_constraint_to_2_2_1()
> create_vcollation_space()
> + upgrade_func_to_2_2_1()
> end
>
> --------------------------------------------------------------------------------
> diff --git a/test/box/function1.result b/test/box/function1.result
> index 331bd466a..172f56082 100644
> --- a/test/box/function1.result
> +++ b/test/box/function1.result
> @@ -504,3 +510,120 @@ box.schema.func.drop('secret_leak')
> box.schema.func.drop('secret')
> ---
> ...
> +--
> +-- gh-4182: Introduce persistent Lua functions.
> +--
> +test_run:cmd("setopt delimiter ';'")
> +---
> +- true
> +...
> +body = [[function(tuple)
> + if type(tuple.address) ~= 'string' then
> + return nil, 'Invalid field type'
> + end
> + local t = tuple.address:upper():split()
> + for k,v in pairs(t) do t[k] = v end
> + return t
> + end
> +]]
> +test_run:cmd("setopt delimiter ''");
> +---
> +...
> +box.schema.func.create('addrsplit', {body = body, language = "C"})
> +---
> +- error: 'Failed to create function ''addrsplit'': body and is_sandboxed options are
> + not compatible with C language'
> +...
> +box.schema.func.create('addrsplit', {is_sandboxed = true, language = "C"})
> +---
> +- error: 'Failed to create function ''addrsplit'': body and is_sandboxed options are
> + not compatible with C language'
> +...
> +box.schema.func.create('addrsplit', {is_sandboxed = true})
> +---
> +- error: 'Failed to create function ''addrsplit'': is_sandboxed option may be set
> + only for persistent Lua function (when body option is set)'
> +...
> +box.schema.func.create('invalid', {body = "function(tuple) ret tuple"})
> +---
> +- error: 'Failed to dynamically load function ''invalid'': function(tuple) ret tuple'
> +...
Please also check what happens when body is loaded okay, but happens to
be not a function.
More information about the Tarantool-patches
mailing list