[Tarantool-patches] [PATCH v4 5/6] box/func: implement cfunc Lua module

Vladislav Shpilevoy v.shpilevoy at tarantool.org
Sat Oct 3 16:55:26 MSK 2020


Thanks for the patch!

See 20 comments below.

On 01.10.2020 15:51, Cyrill Gorcunov wrote:
> Currently to run "C" function from some external module
> one have to register it first in "_func" system space. This
> is a problem if node is in read-only mode (replica).
> 
> Still people would like to have a way to run such functions
> even in ro mode. For this sake we implement "cfunc" lua
> module.
> 
> Fixes #4692
> 
> Signed-off-by: Cyrill Gorcunov <gorcunov at gmail.com>
> 
> @TarantoolBot document
> Title: cfunc module

1. The documentation will go to a github issue, where it looks super
broken as I checked. Please, try to use github markdown here.

> .. _cfunc-list:
> 
> .. function:: list()
> 
>     List created functions.
> 
>     **Example**
> 
>     .. code-block:: lua
> 
>         require('cfunc').list()
>         ---
>         - - easy
>         ...

2. Lets make the API minimal for now, without exists(), list(). I can't
think of why are they needed in an application. We can add more methods
if they are obviously needed for something, or when users ask.

> ---
>  src/box/CMakeLists.txt |   1 +
>  src/box/func.c         |  10 ++
>  src/box/lua/cfunc.c    | 362 +++++++++++++++++++++++++++++++++++++++++
>  src/box/lua/cfunc.h    |  55 +++++++
>  src/box/lua/init.c     |   2 +
>  5 files changed, 430 insertions(+)
>  create mode 100644 src/box/lua/cfunc.c
>  create mode 100644 src/box/lua/cfunc.h
> 
> diff --git a/src/box/func.c b/src/box/func.c
> index ba98f0f9e..032b6062c 100644
> --- a/src/box/func.c
> +++ b/src/box/func.c
> @@ -34,6 +34,7 @@
>  #include "assoc.h"
>  #include "lua/utils.h"
>  #include "lua/call.h"
> +#include "lua/cfunc.h"

3. Why do you need cfunc here? Also it looks like a cyclic
dependency - cfunc depends on box/func and vise versa.

>  #include "error.h"
>  #include "errinj.h"
>  #include "diag.h"
> @@ -152,6 +153,14 @@ module_init(void)
>  			  "modules hash table");
>  		return -1;
>  	}
> +	/*
> +	 * cfunc depends on module engine,
> +	 * so initialize them together.
> +	 */
> +	if (cfunc_init() != 0) {
> +		module_free();
> +		return -1;
> +	}

4. Better not enclose modules. We have lots of modules depending
on each other but still it is much more clear when they are all
initialized in one place. So I would move it to box_init() and
box_free().

For example, func_c depend on port, but we don't call port_init()
in module_init(). Actually, port is needed in several modules. The
same for tuple module, tuple format, fiber. I wouldn't consider
cfunc as something different.


5. The more I look at the names, the more I think that we probably
should rename cfunc to cmod, or cmodule, or boxmod, or cbox, or
modbox, or another name.

Because we don't allow to load and call any function. Only module API
functions, which have strict signature - box_function_f.

Also cfunc looks too similar to func_c, while they don't have much
in common. func_c is a helper for _func space, and cfunc has nothing
to do with spaces and schema.

>  	return 0;
>  }
> diff --git a/src/box/lua/cfunc.c b/src/box/lua/cfunc.c
> new file mode 100644
> index 000000000..d57ed7d1b
> --- /dev/null
> +++ b/src/box/lua/cfunc.c
> @@ -0,0 +1,362 @@
> +#include <string.h>
> +
> +#include <lua.h>
> +#include <lauxlib.h>
> +#include <lualib.h>
> +
> +#include "assoc.h"
> +#include "lua/utils.h"
> +#include "trivia/util.h"
> +#include "box/func.h"
> +#include "box/call.h"
> +#include "box/port.h"
> +#include "box/identifier.h"
> +#include "box/session.h"
> +#include "box/user.h"

6. Why do you need box/func.h, box/call.h, box/session.h,
box/user.h? All these files are about schema and credentials.
cfunc does not need any of this. If anything from them does
not depend on schema, and you need it, then you probably
should move these code pieces out first.

For instance, module_sym does not have intersections with
func_c, but somewhy is stored in box/func.h and .c.

Also why do you need box/identifier.h? We don't store the
names in any schema, so we don't need them to be valid
identifiers. Let dlsym() decide what names are valid.

> +#include "say.h"
> +#include "diag.h"
> +#include "tt_static.h"
> +
> +#include "box/lua/cfunc.h"
> +#include "box/lua/call.h"

7. Why do you need box/lua/call.h? You are not calling Lua
functions here.

> +
> +static struct mh_strnptr_t *cfunc_hash;
> +static unsigned int nr_cfunc = 0;

8. What is 'nr'? If it is a number of functions, then why
couldn't you use mh_size()?

> +
> +struct cfunc {
> +	struct module_sym	mod_sym;
> +	size_t			name_len;
> +	char			name[0];
> +};
> +
> +struct cfunc *
> +cfunc_lookup(const char *name, size_t len)
> +{
> +	if (name != NULL && len > 0) {
> +		mh_int_t e = mh_strnptr_find_inp(cfunc_hash, name, len);
> +		if (e != mh_end(cfunc_hash))
> +			return mh_strnptr_node(cfunc_hash, e)->val;
> +	}
> +	return NULL;
> +}
> +
> +static int
> +cfunc_add(struct cfunc *cfunc)
> +{
> +	const struct mh_strnptr_node_t nd = {
> +		.str	= cfunc->name,
> +		.len	= cfunc->name_len,
> +		.hash	= mh_strn_hash(cfunc->name, cfunc->name_len),
> +		.val	= cfunc,
> +	};
> +
> +	mh_int_t h = mh_strnptr_put(cfunc_hash, &nd, NULL, NULL);
> +	if (h == mh_end(cfunc_hash)) {
> +		diag_set(OutOfMemory, sizeof(nd), "cfunc_hash_add", "h");
> +		return -1;
> +	}
> +	return 0;
> +}
> +
> +static void
> +luaT_param_type_error(struct lua_State *L, int idx, const char *func_name,
> +		      const char *param, const char *exp)
> +{
> +	const char *typename = idx == 0 ?
> +		"<unknown>" : lua_typename(L, lua_type(L, idx));
> +	static const char *fmt =
> +		"%s: wrong parameter \"%s\": expected %s, got %s";
> +	diag_set(IllegalParams, fmt, func_name, param, exp, typename);
> +}
> +
> +static int
> +lbox_cfunc_create(struct lua_State *L)

9. Better not use lbox_ prefix. Because this is not box.cfunc,
it is just cfunc.

> +{
> +	static const char method[] = "cfunc.create";

10. Why so complex? And why static? Why not

	const char *method = "cfunc.create";

like we do everywhere else?

> +	struct cfunc *cfunc = NULL;
> +
> +	if (lua_gettop(L) != 1) {
> +		static const char *fmt =
> +			"%s: expects %s(\'name\')";

11. Lets not use 'static' when it does not change anything.
The same for all other places where 'static' can be removed
without any changes.

> +		diag_set(IllegalParams, fmt, method, method);
> +		goto out;
> +	}
> +
> +	if (lua_type(L, 1) != LUA_TSTRING) {
> +		luaT_param_type_error(L, 1, method,
> +				      lua_tostring(L, 1),
> +				      "function name");
> +		goto out;
> +	}
> +
> +	size_t name_len;
> +	const char *name = lua_tolstring(L, 1, &name_len);
> +
> +	if (identifier_check(name, name_len) != 0)
> +		goto out;
> +
> +	if (cfunc_lookup(name, name_len) != NULL) {
> +		const char *fmt = tnt_errcode_desc(ER_FUNCTION_EXISTS);
> +		diag_set(IllegalParams, fmt, name);
> +		goto out;
> +	}
> +
> +	cfunc = malloc(sizeof(*cfunc) + name_len + 1);
> +	if (cfunc == NULL) {
> +		diag_set(OutOfMemory, sizeof(*cfunc), "malloc", "cfunc");

12. You allocated 'sizeof(*cfunc) + name_len + 1)', not only sizeof.

> +		goto out;
> +	}
> +
> +	cfunc->mod_sym.addr	= NULL;
> +	cfunc->mod_sym.module	= NULL;
> +	cfunc->mod_sym.name	= cfunc->name;
> +	cfunc->name_len		= name_len;
> +
> +	memcpy(cfunc->name, name, name_len);
> +	cfunc->name[name_len] = '\0';
> +
> +	if (cfunc_add(cfunc) != 0)
> +		goto out;
> +
> +	nr_cfunc++;
> +	return 0;
> +
> +out:
> +	free(cfunc);
> +	return luaT_error(L);

13. Our agreement for the new code is that it should not throw. It
should return nil + error. This was somewhere in the documentation.
The same for all other functions.

Although I don't remember what is the agreement to return success.
If you return nothing in case of success, and nil+error on an error,
then you need to write code like this:

	unused, err = func()
	if not err then
		-- Success
	else
		-- Error
	end

The first value becomes never used, in both cases. Maybe it is worth
returning 'true', so at least I could write something like:

	ok, err = func()
	if ok then
		-- Success
	else
		-- Error
	end

But I don't know the officient agreement here. In SWIM I used the
second way, for example.

> +}
> +
> +static int
> +lbox_cfunc_drop(struct lua_State *L)
> +{
> +	static const char method[] = "cfunc.drop";
> +	const char *name = NULL;
> +
> +	if (lua_gettop(L) != 1) {
> +		static const char *fmt =
> +			"%s: expects %s(\'name\')";
> +		diag_set(IllegalParams, fmt, method, method);
> +		return luaT_error(L);
> +	}
> +
> +	if (lua_type(L, 1) != LUA_TSTRING) {
> +		luaT_param_type_error(L, 1, method,
> +				      lua_tostring(L, 1),
> +				      "function name or id");
> +		return luaT_error(L);
> +	}
> +
> +	size_t name_len;
> +	name = lua_tolstring(L, 1, &name_len);
> +
> +	mh_int_t e = mh_strnptr_find_inp(cfunc_hash, name, name_len);
> +	if (e == mh_end(cfunc_hash)) {
> +		const char *fmt = tnt_errcode_desc(ER_NO_SUCH_FUNCTION);
> +		diag_set(IllegalParams, fmt, name);
> +		return luaT_error(L);
> +	}
> +
> +	struct cfunc *cfunc = mh_strnptr_node(cfunc_hash, e)->val;
> +	mh_strnptr_del(cfunc_hash, e, NULL);
> +
> +	nr_cfunc--;
> +	free(cfunc);
> +
> +	return 0;
> +}
> +
> +static int
> +lbox_cfunc_exists(struct lua_State *L)
> +{
> +	static const char method[] = "cfunc.exists";
> +	if (lua_gettop(L) != 1) {
> +		static const char *fmt =
> +			"%s: expects %s(\'name\') but no name passed";
> +		diag_set(IllegalParams, fmt, method, method);
> +		return luaT_error(L);
> +	}
> +
> +	size_t name_len;
> +	const char *name = lua_tolstring(L, 1, &name_len);
> +
> +	struct cfunc *cfunc = cfunc_lookup(name, name_len);
> +	lua_pushboolean(L, cfunc != NULL ? true : false);

14. Result of comparison is boolean anyway. No need to use '?'
and these constants.

> +
> +	return 1;
> +}
> +
> +static int
> +lbox_cfunc_reload(struct lua_State *L)
> +{
> +	static const char method[] = "cfunc.reload";
> +	if (lua_gettop(L) != 1 || !lua_isstring(L, 1)) {
> +		static const char *fmt =
> +			"%s: expects %s(\'name\') but no name passed";
> +		diag_set(IllegalParams, fmt, method, method);
> +		return luaT_error(L);
> +	}
> +
> +	size_t name_len;
> +	const char *name = lua_tolstring(L, 1, &name_len);
> +
> +	/*
> +	 * Since we use module engine do not allow to
> +	 * access arbitrary names only the ones we've
> +	 * really created.
> +	 */
> +	struct cfunc *cfunc = cfunc_lookup(name, name_len);
> +	if (cfunc == NULL) {
> +		const char *fmt = tnt_errcode_desc(ER_NO_SUCH_FUNCTION);
> +		diag_set(IllegalParams, fmt, name);
> +		return luaT_error(L);
> +	}
> +
> +	struct module *module = NULL;
> +	struct func_name n;
> +
> +	func_split_name(name, &n);
> +	if (module_reload(n.package, n.package_end, &module) == 0) {

15. The API to reload a single function looks not so good. You can't
reload one function and not reload the others anyway. Or can you?

For instance, we have box.schema.func.reload(). It takes a module
name.

In that way the idea to rename cfunc to cbox or something like that looks
even better. We could provide methods to operate on entire modules. So it
would be

	f = cbox.load_func() -- Create a function.
	f:unload()           -- Unload a function.
	cbox.reload_module() -- Reload module and all functions in it.
	cbox.unload_module() -- Unload the module from the memory.

We need to put more thoughts into the API design.

> +		if (module != NULL)
> +			return 0;
> +		diag_set(ClientError, ER_NO_SUCH_MODULE, name);
> +	}
> +
> +	return luaT_error(L);
> +}
> +
> +static int
> +lbox_cfunc_list(struct lua_State *L)
> +{
> +	if (nr_cfunc == 0)
> +		return 0;

16. So the function either returns nil or a table? This
looks bad. So a user can't just write

	for i, func in pairs(cfunc.list()) do
		...
	end

In your case he needs to check for nil. Does not look
like a good idea. Maybe worth returning an empty table.
Or just drop this function, because I don't know why
would a user need it. In an application you anyway know
all the functions you use.

> +
> +	lua_createtable(L, nr_cfunc, 0);
> +
> +	int nr = 1;
> +	mh_int_t i;
> +	mh_foreach(cfunc_hash, i) {
> +		struct cfunc *c = mh_strnptr_node(cfunc_hash, i)->val;
> +		lua_pushstring(L, c->name);
> +		lua_rawseti(L, -2, nr++);
> +	}
> +
> +	return 1;
> +}
> +
> +static int
> +lbox_cfunc_call(struct lua_State *L)
> +{
> +	static const char method[] = "cfunc.call";
> +	if (lua_gettop(L) < 1 || !lua_isstring(L, 1)) {
> +		static const char *fmt =
> +			"%s: expects %s(\'name\')";
> +		diag_set(IllegalParams, fmt, method, method);

17. I suppose you forgot a return here. And also to cover it
with a test.

> +	}
> +
> +	size_t name_len;
> +	const char *name = lua_tolstring(L, 1, &name_len);
> +
> +	struct cfunc *cfunc = cfunc_lookup(name, name_len);
> +	if (cfunc == NULL) {
> +		const char *fmt = tnt_errcode_desc(ER_NO_SUCH_FUNCTION);
> +		diag_set(IllegalParams, fmt, name);
> +		return luaT_error(L);
> +	}
> +
> +	lua_State *args_L = luaT_newthread(tarantool_L);

18. Wtf? Why can't you use L? Why do you even move anything here
between the stacks?

> +	if (args_L == NULL)
> +		return luaT_error(L);
> +
> +	int coro_ref = luaL_ref(tarantool_L, LUA_REGISTRYINDEX);
> +	lua_xmove(L, args_L, lua_gettop(L) - 1);
> +
> +	struct port args;
> +	port_lua_create(&args, args_L);
> +	((struct port_lua *)&args)->ref = coro_ref;
> +
> +	struct port ret;
> +	if (module_sym_call(&cfunc->mod_sym,  &args, &ret) != 0) {
> +		port_destroy(&args);
> +		return luaT_error(L);
> +	}
> +
> +	int top = lua_gettop(L);
> +	port_dump_lua(&ret, L, true);
> +	int cnt = lua_gettop(L) - top;
> +
> +	port_destroy(&ret);
> +	port_destroy(&args);
> +	return cnt;
> +}
> +
> +int
> +cfunc_init(void)
> +{
> +	cfunc_hash = mh_strnptr_new();
> +	if (cfunc_hash == NULL) {
> +		diag_set(OutOfMemory, sizeof(*cfunc_hash), "malloc",
> +			  "cfunc hash table");
> +		return -1;
> +	}
> +	return 0;
> +}
> +
> +void
> +cfunc_free(void)
> +{
> +	while (mh_size(cfunc_hash) > 0) {
> +		mh_int_t e = mh_first(cfunc_hash);
> +		struct cfunc *c = mh_strnptr_node(cfunc_hash, e)->val;
> +		module_sym_unload(&c->mod_sym);
> +		mh_strnptr_del(cfunc_hash, e, NULL);
> +	}
> +	mh_strnptr_delete(cfunc_hash);
> +	cfunc_hash = NULL;
> +}
> +
> +void
> +box_lua_cfunc_init(struct lua_State *L)
> +{
> +	static const struct luaL_Reg cfunclib[] = {
> +		{ "create",	lbox_cfunc_create },
> +		{ "drop",	lbox_cfunc_drop },
> +		{ "exists",	lbox_cfunc_exists },
> +		{ "call",	lbox_cfunc_call },
> +		{ "reload",	lbox_cfunc_reload },
> +		{ "list",	lbox_cfunc_list },
> +		{ }
> +	};
> +
> +	luaL_register_module(L, "cfunc", cfunclib);
> +	lua_pop(L, 1);
> +}
> diff --git a/src/box/lua/cfunc.h b/src/box/lua/cfunc.h
> new file mode 100644
> index 000000000..51cec28b8
> --- /dev/null
> +++ b/src/box/lua/cfunc.h
> @@ -0,0 +1,55 @@
> +#ifndef INCLUDES_TARANTOOL_MOD_BOX_LUA_CFUNC_H
> +#define INCLUDES_TARANTOOL_MOD_BOX_LUA_CFUNC_H

19. In the new code we agreed to use #pragma once.

> +
> +#include <stdint.h>
> +
> +#if defined(__cplusplus)
> +extern "C" {
> +#endif /* defined(__cplusplus) */
> +
> +struct lua_State;
> +
> +void
> +box_lua_cfunc_init(struct lua_State *L);
> +
> +int
> +cfunc_init(void);

20. Why do you need 2 inits? All Lua modules always
have a single init and a single free.

> +
> +void
> +cfunc_free(void);
> +


More information about the Tarantool-patches mailing list