[Tarantool-patches] [PATCH v19 3/6] box/module_cache: introduce modules subsystem

Vladislav Shpilevoy v.shpilevoy at tarantool.org
Tue Mar 9 01:45:02 MSK 2021


Thanks for the patch!

See 10 comments below.

> diff --git a/src/box/module_cache.c b/src/box/module_cache.c
> new file mode 100644
> index 000000000..bc2184e5f
> --- /dev/null
> +++ b/src/box/module_cache.c
> @@ -0,0 +1,476 @@
> +/*
> + * SPDX-License-Identifier: BSD-2-Clause
> + *
> + * Copyright 2010-2021, Tarantool AUTHORS, please see AUTHORS file.
> + */
> +
> +#include <unistd.h>
> +#include <string.h>
> +#include <fcntl.h>
> +#include <dlfcn.h>
> +#include <lua.h>
> +
> +#include "assoc.h"
> +#include "diag.h"
> +#include "fiber.h"
> +#include "module_cache.h"
> +
> +#include "box/error.h"
> +#include "box/port.h"
> +
> +#include "lua/utils.h"
> +#include "libeio/eio.h"
> +
> +static struct mh_strnptr_t *module_cache = NULL;
> +
> +/**
> + * Helpers for cache manipulations.
> + */
> +static void *
> +cache_find(const char *str, size_t len)
> +{
> +	mh_int_t e = mh_strnptr_find_inp(module_cache, str, len);
> +	if (e == mh_end(module_cache))
> +		return NULL;
> +	return mh_strnptr_node(module_cache, e)->val;
> +}
> +
> +static void
> +cache_update(struct module *m)
> +{
> +	const char *str = m->package;
> +	size_t len = m->package_len;
> +
> +	mh_int_t e = mh_strnptr_find_inp(module_cache, str, len);
> +	if (e == mh_end(module_cache))
> +		panic("module: failed to update cache: %s", str);
> +
> +	mh_strnptr_node(module_cache, e)->str = m->package;
> +	mh_strnptr_node(module_cache, e)->val = m;
> +}
> +
> +static int
> +cache_add(struct module *m)

1. In func.c it is called `cache_put`, but does the same. What is the
difference between `put` and `add` then? If none, then why not use
only one verb?

> +{
> +	const struct mh_strnptr_node_t nd = {
> +		.str	= m->package,
> +		.len	= m->package_len,
> +		.hash	= mh_strn_hash(m->package, m->package_len),
> +		.val	= m,
> +	};
> +
> +	mh_int_t e = mh_strnptr_put(module_cache, &nd, NULL, NULL);
> +	if (e == mh_end(module_cache)) {
> +		diag_set(OutOfMemory, sizeof(nd), "malloc",
> +			 "module_cache node");
> +		return -1;
> +	}
> +	return 0;
> +}
> +
> +static void
> +cache_del(struct module *m)
> +{
> +	const char *str = m->package;
> +	size_t len = m->package_len;
> +
> +	mh_int_t e = mh_strnptr_find_inp(module_cache, str, len);
> +	if (e != mh_end(module_cache)) {
> +		struct module *v = mh_strnptr_node(module_cache, e)->val;
> +		if (v == m)
> +			mh_strnptr_del(module_cache, e, NULL);

2. Please, add a comment about why the module pointer in the cache
might be different.

> +	}
> +}
> +
> +struct module *
> +module_load_force(const char *package, size_t package_len)
> +{
> +	char path[PATH_MAX];
> +	size_t size = sizeof(path);
> +
> +	if (find_package(package, package_len, path, size) != 0)
> +		return NULL;
> +
> +	struct module *m = module_new(package, package_len, path);
> +	if (m == NULL)
> +		return NULL;
> +
> +	struct module *c = cache_find(package, package_len);
> +	if (c != NULL) {
> +		cache_update(m);
> +		say_info("module_cache: force load: %s", package);

3. That is not very helpful for a user - he does not know what
'module_cache' is, and what is 'force load'. Try to make a more
user-friendly message. It should not use any internal names.

> +	} else {
> +		if (cache_add(m) != 0) {
> +			module_unload(m);
> +			return NULL;
> +		}
> +	}
> +
> +	return m;
> +}
> +
> +struct module *
> +module_load(const char *package, size_t package_len)
> +{
> +	char path[PATH_MAX];
> +
> +	if (find_package(package, package_len, path, sizeof(path)) != 0)
> +		return NULL;
> +
> +	struct module *m = cache_find(package, package_len);
> +	if (m != NULL) {
> +		struct module_attr attr;
> +		struct stat st;
> +		if (stat(path, &st) != 0) {
> +			diag_set(SystemError, "failed to stat() %s", path);
> +			return NULL;
> +		}
> +
> +		/*
> +		 * In case of cache hit we may reuse existing
> +		 * module which speedup load procedure.
> +		 */
> +		module_attr_fill(&attr, &st);
> +		if (memcmp(&attr, &m->attr, sizeof(attr)) == 0) {

4. That is not safe in case the struct has padding between
members and/or in the end. You either must compare the members
manually, or must ensure there is no any padding. For instance,
reorder the members not to have the padding in between, add a dummy
member in the end to fill the end-padding if necessary and fill it
with 0 always, and add a static_assert its size is equal to the sum
of member sizes. Then memcmp should be safe. Otherwise you risk to
compare garbage.

> +			module_ref(m);
> +			return m;
> +		}
> +
> +		/*
> +		 * Module has been updated on a storage device,
> +		 * so load a new instance and update the cache,
> +		 * old entry get evicted but continue residing
> +		 * in memory, fully functional, until last
> +		 * function is unloaded.
> +		 */
> +		m = module_new(package, package_len, path);
> +		if (m != NULL) {
> +			cache_update(m);
> +			say_info("module_cache: attr changed: %s",
> +				 package);
> +		}
> +	} else {
> +		m = module_new(package, package_len, path);
> +		if (m != NULL) {
> +			if (cache_add(m) != 0) {

5. You could improve readability a bit if you would do early
returns on errors (return NULL right away if m == NULL), or
reduce the indentation. For instance, by using a single `if`
with the condition `m != NULL && cache_add(m) != 0` instead
of 2 `if`s.

> +				module_unload(m);
> +				return NULL;
> +			}
> +		}
> +	}
> +
> +	return m;
> +}
> +
> +void
> +module_unload(struct module *m)
> +{
> +	module_unref(m);
> +}
> +
> +void
> +module_free(void)
> +{
> +	size_t nr_busy = 0;
> +	mh_int_t e;
> +
> +	mh_foreach(module_cache, e) {
> +		struct module *m = mh_strnptr_node(module_cache, e)->val;
> +		if (m->refs > 1)
> +			nr_busy++;
> +		module_unload(m);
> +	}
> +
> +	mh_strnptr_delete(module_cache);
> +	module_cache = NULL;
> +
> +	if (nr_busy > 0)
> +		say_info("module_cache: %zu entries left", nr_busy);

6. This probably must be panic. If there are modules, they are going to
try to delete themselves from the cache later, which is NULL, so it
would crash anyway but without any message.

`module_cache` must be empty when module_free() is called.

> +}
> diff --git a/src/box/module_cache.h b/src/box/module_cache.h
> new file mode 100644
> index 000000000..78dd5c7ad
> --- /dev/null
> +++ b/src/box/module_cache.h
> @@ -0,0 +1,206 @@
> +/*
> + * SPDX-License-Identifier: BSD-2-Clause
> + *
> + * Copyright 2010-2021, Tarantool AUTHORS, please see AUTHORS file.
> + */
> +
> +#pragma once
> +
> +#include <sys/types.h>
> +#include <sys/stat.h>
> +
> +#include "trivia/config.h"
> +
> +#if defined(__cplusplus)
> +extern "C" {
> +#endif /* defined(__cplusplus) */
> +
> +/**
> + * API of C stored function.
> + */
> +
> +struct port;
> +
> +struct box_function_ctx {
> +	struct port *port;
> +};
> +
> +typedef struct box_function_ctx box_function_ctx_t;
> +typedef int (*box_function_t)(box_function_ctx_t *ctx,
> +			      const char *args,
> +			      const char *args_end);
> +
> +/**
> + * Shared library file attributes for
> + * module cache invalidation.
> + */
> +struct module_attr {
> +#ifdef TARGET_OS_DARWIN
> +	struct timespec st_mtimespec;
> +#else
> +	struct timespec st_mtim;
> +#endif
> +	dev_t st_dev;
> +	ino_t st_ino;
> +	off_t st_size;

7. For the sake of easier memcmp() you could use platform-agnostic types.
For instance, u/int64_t or u/int32_t.

> +};
> +
> +/**
> + * Dynamic shared module.
> + */
> +struct module {
> +	/**
> +	 * Module handle, dlopen() result.
> +	 */
> +	void *handle;
> +	/**
> +	 * File attributes.
> +	 */
> +	struct module_attr attr;
> +	/**
> +	 * Count of active references.
> +	 */
> +	int64_t refs;
> +	/**
> +	 * Length of @a package.
> +	 */
> +	size_t package_len;
> +	/**
> +	 * Module's name without file extension.
> +	 */
> +	char package[0];
> +};
> +
> +/**
> + * Module function.
> + */
> +struct module_func {
> +	/**
> +	 * Function's address, iow dlsym() result.
> +	 */
> +	 box_function_t func;
> +	/**
> +	 * Function's module.
> +	 */
> +	struct module *module;
> +};
> +
> +/**
> + * Load a module.
> + *
> + * Lookup for a module instance in cache and if not found
> + * the module is loaded from a storage device.

8. It also checks for the attributes being changed.

> + *
> + * @param package module package (without file extension).
> + * @param package_len length of @a package.
> + *
> + * Possible errors:
> + * ClientError: the package is not found on a storage device.
> + * ClientError: an error happened when been loading the package.
> + * SystemError: a system error happened during procedure.
> + * OutOfMemory: unable to allocate new memory for module instance.
> + *
> + * @return a module instance on success, NULL otherwise (diag is set)
> + */
> +struct module *
> +module_load(const char *package, size_t package_len);
> +
> +/**
> + * Force load a module.
> + *
> + * Load a module from a storage device and invalidate
> + * an associated cache entry.

9. Reload, not invalidate. It stays cached, but with a new
pointer.

> + *
> + * @param package module package (without file extension).
> + * @param package_len length of @a package.
> + *
> + * Possible errors:
> + * ClientError: the package is not found on a storage device.
> + * ClientError: an error happened when been loading the package.
> + * SystemError: a system error happened during procedure.
> + * OutOfMemory: unable to allocate new memory for module instance.
> + *
> + * @return a module instance on success, NULL otherwise (diag is set)
> + */
> +struct module *
> +module_load_force(const char *package, size_t package_len);
> +
> +/**
> + * Unload a module instance.
> + *
> + * @param m a module to unload.
> + */
> +void
> +module_unload(struct module *m);
> +
> +/** Test if module function is empty. */
> +static inline bool
> +is_module_func_empty(struct module_func *mf)

10. Struct prefix is "stronger" than 'is' prefix. Otherwise
we would end up with tons of 'is' checkers in the code not
related to each other but having the same prefix, and
conflicting with variable names.

Here it should be module_func_is_empty().

> +{
> +	return mf->module == NULL;
> +}


More information about the Tarantool-patches mailing list