[Tarantool-patches] [PATCH v20 4/7] box/module_cache: introduce modules subsystem

Vladislav Shpilevoy v.shpilevoy at tarantool.org
Mon Apr 5 18:52:47 MSK 2021


Thanks for the patch!

See 6 comments below.

> diff --git a/src/box/module_cache.c b/src/box/module_cache.c
> new file mode 100644
> index 000000000..2cd2f2e8b
> --- /dev/null
> +++ b/src/box/module_cache.c
> @@ -0,0 +1,474 @@
> +/*
> + * SPDX-License-Identifier: BSD-2-Clause
> + *
> + * Copyright 2010-2021, Tarantool AUTHORS, please see AUTHORS file.
> + */
> +
> +#include <unistd.h>
> +#include <string.h>
> +#include <fcntl.h>
> +#include <dlfcn.h>
> +#include <lua.h>
> +
> +#include "assoc.h"
> +#include "diag.h"
> +#include "fiber.h"
> +#include "module_cache.h"
> +
> +#include "box/error.h"
> +#include "box/port.h"
> +
> +#include "lua/utils.h"
> +#include "libeio/eio.h"
> +
> +static struct mh_strnptr_t *module_cache = NULL;
> +
> +/**
> + * Helpers for cache manipulations.
> + */
> +static void *

1. It returns struct module, therefore the return type must be
'struct module *', not 'void *'. The same for cache_find() in box.lib
implementation.

> +cache_find(const char *str, size_t len)
> +{
> +	mh_int_t e = mh_strnptr_find_inp(module_cache, str, len);
> +	if (e == mh_end(module_cache))
> +		return NULL;
> +	return mh_strnptr_node(module_cache, e)->val;
> +}
> +
> +static void
> +cache_update(struct module *m)
> +{
> +	const char *str = m->package;
> +	size_t len = m->package_len;
> +
> +	mh_int_t e = mh_strnptr_find_inp(module_cache, str, len);
> +	if (e == mh_end(module_cache))
> +		panic("module: failed to update cache: %s", str);
> +
> +	mh_strnptr_node(module_cache, e)->str = m->package;
> +	mh_strnptr_node(module_cache, e)->val = m;
> +}
> +
> +static int
> +cache_put(struct module *m)
> +{
> +	const struct mh_strnptr_node_t nd = {
> +		.str	= m->package,
> +		.len	= m->package_len,
> +		.hash	= mh_strn_hash(m->package, m->package_len),
> +		.val	= m,
> +	};
> +
> +	mh_int_t e = mh_strnptr_put(module_cache, &nd, NULL, NULL);

2. Put() silently replaces the old value if it is present. I would
recommend to use the next to the last argument to get the old value
and ensure it is mh_end() via an assertion/panic. The same for the other
new put() functions in the other commits.

> +	if (e == mh_end(module_cache)) {
> +		diag_set(OutOfMemory, sizeof(nd), "malloc",
> +			 "module_cache node");
> +		return -1;
> +	}
> +	return 0;
> +}
> +
> +static void
> +cache_del(struct module *m)
> +{
> +	const char *str = m->package;
> +	size_t len = m->package_len;
> +
> +	mh_int_t e = mh_strnptr_find_inp(module_cache, str, len);
> +	if (e != mh_end(module_cache)) {

3. Maybe this must be an assertion/panic. I don't see a valid case when
del() is called on an already deleted module. The same for the other
new del() functions in the other commits.

> +		struct module *v = mh_strnptr_node(module_cache, e)->val;
> +		if (v == m) {
> +			/*
> +			 * The module in cache might be updated
> +			 * via force load and old instance is kept
> +			 * by a reference only.
> +			 */
> +			mh_strnptr_del(module_cache, e, NULL);
> +		}
> +	}
> +}

<...>

> +
> +struct module *
> +module_load(const char *package, size_t package_len)
> +{
> +	char path[PATH_MAX];
> +
> +	if (find_package(package, package_len, path, sizeof(path)) != 0)
> +		return NULL;
> +
> +	struct module *m = cache_find(package, package_len);
> +	if (m != NULL) {
> +		struct module_attr attr;
> +		struct stat st;
> +		if (stat(path, &st) != 0) {
> +			diag_set(SystemError, "failed to stat() %s", path);
> +			return NULL;
> +		}
> +
> +		/*
> +		 * In case of cache hit we may reuse existing
> +		 * module which speedup load procedure.
> +		 */
> +		module_attr_fill(&attr, &st);
> +		if (memcmp(&attr, &m->attr, sizeof(attr)) == 0) {

4. Please, add a static assertion, that sizeof(module_attr) == 40.
Otherwise somebody might add a new attribute, which won't be uint64_t,
and would break the comparison without noticing. Also you can make the
attributes be stored as a byte array char[40] to make it impossible to
add any padding into it. Also you can compare the attributes one by
one.

> +			module_ref(m);
> +			return m;
> +		}
> +
> +		/*
> +		 * Module has been updated on a storage device,
> +		 * so load a new instance and update the cache,
> +		 * old entry get evicted but continue residing
> +		 * in memory, fully functional, until last
> +		 * function is unloaded.
> +		 */
> +		m = module_new(package, package_len, path);
> +		if (m != NULL)
> +			cache_update(m);
> +	} else {
> +		m = module_new(package, package_len, path);
> +		if (m != NULL && cache_put(m) != 0) {
> +			module_unload(m);
> +			return NULL;
> +		}
> +	}
> +
> +	return m;
> +}
> +
> +void
> +module_unload(struct module *m)
> +{
> +	module_unref(m);
> +}
> +
> +void
> +module_free(void)
> +{
> +	mh_int_t e;
> +
> +	mh_foreach(module_cache, e) {
> +		struct module *m = mh_strnptr_node(module_cache, e)->val;
> +		module_unload(m);

5. As I said in the previous review, it does not make much sense.
If there are any not unloaded modules, and they try to unload later,
they will see module_cache == NULL and will crash.

Also you can't do unload here, because the module_cache itself does
not keep any references. All the unloads must be done by the module
objects owners. Not by module_cache on its own. For example, if there
is a module having a single reference and used in some other subsystem,
your unload will free it and make it memory invalid. That will crash
in case the module owner will try to access it again.

There should be a panic-check that the module cache is empty already.

> +	}
> +
> +	mh_strnptr_delete(module_cache);
> +	module_cache = NULL;
> +}
> diff --git a/src/box/module_cache.h b/src/box/module_cache.h
> new file mode 100644
> index 000000000..18eb3866a
> --- /dev/null
> +++ b/src/box/module_cache.h
> @@ -0,0 +1,208 @@
> +/*
> + * SPDX-License-Identifier: BSD-2-Clause
> + *
> + * Copyright 2010-2021, Tarantool AUTHORS, please see AUTHORS file.
> + */
> +
> +#pragma once
> +
> +#include <stdint.h>
> +
> +#include <sys/types.h>
> +#include <sys/stat.h>

6. You don't need these headers in module_cache.h. They are
needed only in the .c file.


More information about the Tarantool-patches mailing list