[Tarantool-patches] [PATCH v19 4/6] box/func: switch to module_cache interface

Vladislav Shpilevoy v.shpilevoy at tarantool.org
Tue Mar 9 01:47:58 MSK 2021


Thanks for the patch!

See 10 comments below.

>  src/box/func.c     | 492 ++++++++++++++++++---------------------------
>  src/box/func.h     |  19 +-
>  src/box/func_def.h |  14 --
>  3 files changed, 200 insertions(+), 325 deletions(-)
> 
> diff --git a/src/box/func.c b/src/box/func.c
> index 88903f40e..a8401c6ed 100644
> --- a/src/box/func.c
> +++ b/src/box/func.c
> @@ -216,10 +134,15 @@ cache_find(const char *name, const char *name_end)
>  static int
>  cache_put(struct box_module *bxmod)
>  {
> -	size_t package_len = strlen(bxmod->package);
> -	uint32_t name_hash = mh_strn_hash(bxmod->package, package_len);
> +	const char *package = bxmod->module->package;
> +	size_t package_len = bxmod->module->package_len;
> +
>  	const struct mh_strnptr_node_t strnode = {
> -		bxmod->package, package_len, name_hash, bxmod};
> +		.str = package,
> +		.len = package_len,
> +		.hash = mh_strn_hash(package, package_len),
> +		.val = bxmod
> +	};

1. How do you decide when to align and when not align the
assignments?

>  
>  	mh_int_t i = mh_strnptr_put(box_module_hash, &strnode, NULL, NULL);
>  	if (i == mh_end(box_module_hash)) {
> @@ -231,150 +154,175 @@ cache_put(struct box_module *bxmod)
>  }
>  
>  /**
> - * Delete a module from the module cache
> + * Update module in module cache.
>   */
>  static void
> -cache_del(const char *name, const char *name_end)
> +cache_update(struct box_module *bxmod)
>  {
> -	mh_int_t i = mh_strnptr_find_inp(box_module_hash, name,
> -					 name_end - name);
> -	if (i == mh_end(box_module_hash))
> -		return;
> -	mh_strnptr_del(box_module_hash, i, NULL);
> +	const char *str = bxmod->module->package;
> +	size_t len = bxmod->module->package_len;
> +
> +	mh_int_t e = mh_strnptr_find_inp(box_module_hash, str, len);
> +	if (e == mh_end(box_module_hash))
> +		panic("func: failed to update cache: %s", str);
> +
> +	mh_strnptr_node(box_module_hash, e)->str = str;
> +	mh_strnptr_node(box_module_hash, e)->val = bxmod;
>  }
>  
> -/*
> - * Load a dso.
> - * Create a new symlink based on temporary directory and try to
> - * load via this symink to load a dso twice for cases of a function
> - * reload.
> +/**
> + * Delete a module from the module cache
>   */
> +static void
> +cache_del(struct box_module *bxmod)
> +{
> +	const char *str = bxmod->module->package;
> +	size_t len = bxmod->module->package_len;
> +
> +	mh_int_t e = mh_strnptr_find_inp(box_module_hash, str, len);
> +	if (e != mh_end(box_module_hash)) {
> +		struct box_module *v;
> +		v = mh_strnptr_node(box_module_hash, e)->val;
> +		if (v == bxmod)
> +			mh_strnptr_del(box_module_hash, e, NULL);

2. The same comment as in the other patch about the other
cache_del. You need to describe when the pointer might be
not the same.

> +	}
> +}
> +
> +/** Increment reference to a module. */
> +static inline void
> +box_module_ref(struct box_module *bxmod)
> +{
> +	assert(bxmod->refs >= 0);
> +	++bxmod->refs;
> +}
> +
> +/** Low-level module loader. */
>  static struct box_module *
> -box_module_load(const char *package, const char *package_end)
> +box_module_ld(const char *package, size_t package_len,
> +	      struct module *(ld)(const char *package,
> +				  size_t package_len))

3. Too complex. I propose to call module_load/module_load_force in
the upper code, and add box_module_new(struct module *) which
wrapps the returned struct module * into struct box_module *.

>  {
> -	char path[PATH_MAX];
> -	if (module_find(package, package_end, path, sizeof(path)) != 0)
> +	struct module *m = ld(package, package_len);
> +	if (m == NULL)
>  		return NULL;

Below this line is box_module_new(). Above this line you move to the
upper level. To box_module_load() and box_module_load_force().

>  
> -	int package_len = package_end - package;
> -	struct box_module *bxmod = malloc(sizeof(*bxmod) + package_len + 1);
> +	struct box_module *bxmod = malloc(sizeof(*bxmod));
>  	if (bxmod == NULL) {
> +		module_unload(m);
>  		diag_set(OutOfMemory, sizeof(*bxmod) + package_len + 1,
>  			 "malloc", "struct box_module");
>  		return NULL;
>  	}
> -	memcpy(bxmod->package, package, package_len);
> -	bxmod->package[package_len] = 0;
> -	rlist_create(&bxmod->funcs);
> -	bxmod->calls = 0;
> -
> -	const char *tmpdir = getenv("TMPDIR");
> -	if (tmpdir == NULL)
> -		tmpdir = "/tmp";
> -	char dir_name[PATH_MAX];
> -	int rc = snprintf(dir_name, sizeof(dir_name), "%s/tntXXXXXX", tmpdir);
> -	if (rc < 0 || (size_t) rc >= sizeof(dir_name)) {
> -		diag_set(SystemError, "failed to generate path to tmp dir");
> -		goto error;
> -	}
> -
> -	if (mkdtemp(dir_name) == NULL) {
> -		diag_set(SystemError, "failed to create unique dir name: %s",
> -			 dir_name);
> -		goto error;
> -	}
> -	char load_name[PATH_MAX];
> -	rc = snprintf(load_name, sizeof(load_name), "%s/%.*s." TARANTOOL_LIBEXT,
> -		      dir_name, package_len, package);
> -	if (rc < 0 || (size_t) rc >= sizeof(dir_name)) {
> -		diag_set(SystemError, "failed to generate path to DSO");
> -		goto error;
> -	}
>  
> -	struct stat st;
> -	if (stat(path, &st) < 0) {
> -		diag_set(SystemError, "failed to stat() module %s", path);
> -		goto error;
> -	}
> +	bxmod->refs = 0;
> +	bxmod->module = m;
> +	rlist_create(&bxmod->funcs);
>  
> -	int source_fd = open(path, O_RDONLY);
> -	if (source_fd < 0) {
> -		diag_set(SystemError, "failed to open module %s file for" \
> -			 " reading", path);
> -		goto error;
> -	}
> -	int dest_fd = open(load_name, O_WRONLY|O_CREAT|O_TRUNC,
> -			   st.st_mode & 0777);
> -	if (dest_fd < 0) {
> -		diag_set(SystemError, "failed to open file %s for writing ",
> -			 load_name);
> -		close(source_fd);
> -		goto error;
> -	}
> +	box_module_ref(bxmod);
> +	return bxmod;
> +}
>  
> -	off_t ret = eio_sendfile_sync(dest_fd, source_fd, 0, st.st_size);
> -	close(source_fd);
> -	close(dest_fd);
> -	if (ret != st.st_size) {
> -		diag_set(SystemError, "failed to copy DSO %s to %s",
> -			 path, load_name);
> -		goto error;
> -	}
> +/** Load a new module. */
> +static struct box_module *
> +box_module_load(const char *package, size_t package_len)
> +{
> +	return box_module_ld(package, package_len,
> +			     module_load);
> +}
>  
> -	bxmod->handle = dlopen(load_name, RTLD_NOW | RTLD_LOCAL);
> -	if (unlink(load_name) != 0)
> -		say_warn("failed to unlink dso link %s", load_name);
> -	if (rmdir(dir_name) != 0)
> -		say_warn("failed to delete temporary dir %s", dir_name);
> -	if (bxmod->handle == NULL) {
> -		diag_set(ClientError, ER_LOAD_MODULE, package_len,
> -			  package, dlerror());
> -		goto error;
> -	}
> -	struct errinj *e = errinj(ERRINJ_DYN_MODULE_COUNT, ERRINJ_INT);
> -	if (e != NULL)
> -		++e->iparam;

4. After you removed this injection, the tests don't pass.

> -	return bxmod;
> -error:
> -	free(bxmod);
> -	return NULL;
> +/** Load a new module with force cache invalidation. */
> +static struct box_module *
> +box_module_load_force(const char *package, size_t package_len)
> +{
> +	return box_module_ld(package, package_len,
> +			     module_load_force);
>  }
>  
> +/** Delete a module. */
>  static void
>  box_module_delete(struct box_module *bxmod)
>  {
>  	struct errinj *e = errinj(ERRINJ_DYN_MODULE_COUNT, ERRINJ_INT);
>  	if (e != NULL)
>  		--e->iparam;

5. Add an assertion that ref count is 0.

> -	dlclose(bxmod->handle);
> +	module_unload(bxmod->module);
>  	TRASH(bxmod);
>  	free(bxmod);
>  }
>  
> -/*
> - * Check if a dso is unused and can be closed.
> - */
> -static void
> -box_module_gc(struct box_module *bxmod)
> +/** Decrement reference to a module and delete it if last one. */
> +static inline void
> +box_module_unref(struct box_module *bxmod)
>  {
> -	if (rlist_empty(&bxmod->funcs) && bxmod->calls == 0)
> +	assert(bxmod->refs > 0);
> +	if (--bxmod->refs == 0) {
> +		cache_del(bxmod);
>  		box_module_delete(bxmod);
> +	}
>  }
>  
> -/*
> - * Import a function from the module.
> - */
> -static box_function_f
> -box_module_sym(struct box_module *bxmod, const char *name)
> +/** Free box modules subsystem. */
> +void
> +box_module_free(void)
>  {
> -	box_function_f f = (box_function_f)dlsym(bxmod->handle, name);
> -	if (f == NULL) {
> -		diag_set(ClientError, ER_LOAD_FUNCTION, name, dlerror());
> -		return NULL;
> +	while (mh_size(box_module_hash) > 0) {
> +		struct box_module *bxmod;
> +
> +		mh_int_t i = mh_first(box_module_hash);
> +		bxmod = mh_strnptr_node(box_module_hash, i)->val;
> +		/*
> +		 * Module won't be deleted if it has
> +		 * active functions bound.
> +		 */
> +		box_module_unref(bxmod);
>  	}
> -	return f;
> +	mh_strnptr_delete(box_module_hash);
> +}
> +
> +static struct func_vtab func_c_vtab;
> +
> +/** Create new box function. */
> +static inline void
> +func_c_create(struct func_c *func_c)
> +{
> +	func_c->bxmod = NULL;
> +	func_c->base.vtab = &func_c_vtab;
> +	rlist_create(&func_c->item);
> +	module_func_create(&func_c->mf);
> +}
> +
> +/** Test if function is not resolved. */
> +static inline bool
> +is_func_c_emtpy(struct func_c *func_c)

6. The same comment as about module_cache patch. It should be
func_c_is_empty().

> +{
> +	return is_module_func_empty(&func_c->mf);
>  }
>  
> +/** Load a new function. */
> +static inline int
> +func_c_load(struct box_module *bxmod, const char *func_name,
> +	      struct func_c *func_c)
> +{
> +	int rc = module_func_load(bxmod->module, func_name, &func_c->mf);
> +	if (rc == 0) {
> +		rlist_move(&bxmod->funcs, &func_c->item);
> +		func_c->bxmod = bxmod;
> +		box_module_ref(bxmod);
> +	}
> +	return rc;
> +}
> +
> +/** Unload a function. */
> +static inline void
> +func_c_unload(struct func_c *func_c)
> +{
> +	module_func_unload(&func_c->mf);
> +	rlist_del(&func_c->item);
> +	box_module_unref(func_c->bxmod);
> +	func_c_create(func_c);
> +}
> +
> +/** Reload module in a force way. */
>  int
>  box_module_reload(const char *package, const char *package_end)
>  {
> @@ -385,25 +333,24 @@ box_module_reload(const char *package, const char *package_end)
>  		return -1;
>  	}
>  
> -	struct box_module *bxmod = box_module_load(package, package_end);
> +	size_t len = package_end - package;
> +	struct box_module *bxmod = box_module_load_force(package, len);
>  	if (bxmod == NULL)
>  		return -1;
>  
> -	struct func_c *func, *tmp_func;
> -	rlist_foreach_entry_safe(func, &bxmod_old->funcs, item, tmp_func) {
> +	struct func_c *func, *tmp;

7. You didn't really need to rename tmp_func. Try not to make so much
unnecessary changes. It does not make it easier to find the functional
changes you made, nor read the history when it is needed occasionally.

> +	rlist_foreach_entry_safe(func, &bxmod_old->funcs, item, tmp) {
>  		struct func_name name;
>  		func_split_name(func->base.def->name, &name);
> -		func->func = box_module_sym(bxmod, name.sym);
> -		if (func->func == NULL)
> +		func_c_unload(func);
> +		if (func_c_load(bxmod, name.sym, func) != 0)
>  			goto restore;
> -		func->bxmod = bxmod;
> -		rlist_move(&bxmod->funcs, &func->item);
>  	}
> -	cache_del(package, package_end);
> -	if (cache_put(bxmod) != 0)
> -		goto restore;
> -	box_module_gc(bxmod_old);
> +
> +	cache_update(bxmod);
> +	box_module_unref(bxmod_old);
>  	return 0;
> +
> @@ -474,67 +420,51 @@ func_new(struct func_def *def)
>  	return func;
>  }
>  
> -static struct func_vtab func_c_vtab;
> -
> +/** Create new C function. */
>  static struct func *
>  func_c_new(MAYBE_UNUSED struct func_def *def)
>  {
>  	assert(def->language == FUNC_LANGUAGE_C);
>  	assert(def->body == NULL && !def->is_sandboxed);
> -	struct func_c *func = (struct func_c *) malloc(sizeof(struct func_c));
> -	if (func == NULL) {
> -		diag_set(OutOfMemory, sizeof(*func), "malloc", "func");
> +	struct func_c *func_c = malloc(sizeof(struct func_c));

8. You didn't need to change this line. The old name was just fine. It
wouldn't be fine if both 'func' and 'func_c' were used in here. But
there was only one name. The same in many other places in this patch.

Please, go through git diff and keep only the relevant changes. Even if
we would allow to do refactoring for the sake of refactoring, this diff
wouldn't fit here, because rename of 'func' to 'func_c' variables is not
relevant to `switch to module_cache interface` commit title. Both names
existed before the patch. The same can be argued about many other places.
So just don't make unnecessary/irrelevant diff.

Unless you absolutely want to fight over it, and only in a separate patch.
Don't mix it with functional changes.

> +	if (func_c == NULL) {
> +		diag_set(OutOfMemory, sizeof(*func_c), "malloc", "func_c");
>  		return NULL;
>  	}
> -	func->base.vtab = &func_c_vtab;
> -	func->func = NULL;
> -	func->bxmod = NULL;
> -	return &func->base;
> -}
> -
> -static void
> -func_c_unload(struct func_c *func)
> -{
> -	if (func->bxmod) {
> -		rlist_del(&func->item);
> -		if (rlist_empty(&func->bxmod->funcs)) {
> -			struct func_name name;
> -			func_split_name(func->base.def->name, &name);
> -			cache_del(name.package, name.package_end);
> -		}
> -		box_module_gc(func->bxmod);
> -	}
> -	func->bxmod = NULL;
> -	func->func = NULL;
> +	func_c_create(func_c);
> +	return &func_c->base;
>  }
>  
> +/** Destroy C function. */
>  static void
>  func_c_destroy(struct func *base)
>  {
>  	assert(base->vtab == &func_c_vtab);
>  	assert(base != NULL && base->def->language == FUNC_LANGUAGE_C);
> -	struct func_c *func = (struct func_c *) base;
> -	func_c_unload(func);
> +	struct func_c *func_c = (struct func_c *) base;
> +	box_module_unref(func_c->bxmod);
> +	func_c_unload(func_c);
>  	TRASH(base);
> -	free(func);
> +	free(func_c);
>  }
>  
>  /**
> - * Resolve func->func (find the respective DLL and fetch the
> + * Resolve func->func (find the respective share library and fetch the

9. The comment was just fine. Please. I beg you. Stop doing unnecessary
changes. Regardless of how many and often you do them, I will continue
asking to stop it. It does not make better literally anything. Not the
code except subjectively for you maybe, not the patch, not the history,
not the perf, not anything,

>   * symbol from it).
>   */
>  static int
> -func_c_load(struct func_c *func)
> +func_c_resolve(struct func_c *func_c)
>  {
> -	assert(func->func == NULL);
> +	assert(is_func_c_emtpy(func_c));
>  
>  	struct func_name name;
> -	func_split_name(func->base.def->name, &name);
> +	func_split_name(func_c->base.def->name, &name);
>  
>  	struct box_module *cached, *bxmod;
>  	cached = cache_find(name.package, name.package_end);
>  	if (cached == NULL) {
> -		bxmod = box_module_load(name.package, name.package_end);
> +		size_t len = name.package_end - name.package;
> +		bxmod = box_module_load(name.package, len);

10. You changed these lines literally in the previous patch by
another refactoring. Please, stop. Don't change code unless it
is necessary for consistency, bug fixing, or feature development.
Or serious refactoring, or optimizations. Here you just changed
`const char *, const char *` to `const char *, size_t`. It did
absolutely nothing except increasing the patch size and masking
the really mattering functional changes in this ocean of diff
hunks about endless renames, code block moves, arguments mixing
back and forth, and so on.

It is also totally not related to `switch to module_cache interface`
commit title.

Fitting into the same diff hunk as some really needed change,
does not make the other change atomic nor relevant. It is not
about changing things while you go alongside, like huricane.

Без негатива.


More information about the Tarantool-patches mailing list