Tarantool development patches archive
 help / color / mirror / Atom feed
From: Vladislav Shpilevoy via Tarantool-patches <tarantool-patches@dev.tarantool.org>
To: Cyrill Gorcunov <gorcunov@gmail.com>,
	tml <tarantool-patches@dev.tarantool.org>
Subject: Re: [Tarantool-patches] [PATCH v19 3/6] box/module_cache: introduce modules subsystem
Date: Mon, 8 Mar 2021 23:45:02 +0100	[thread overview]
Message-ID: <e10fe8bf-e99a-8ca9-85bd-e698abd8af43@tarantool.org> (raw)
In-Reply-To: <20210301212343.422372-4-gorcunov@gmail.com>

Thanks for the patch!

See 10 comments below.

> diff --git a/src/box/module_cache.c b/src/box/module_cache.c
> new file mode 100644
> index 000000000..bc2184e5f
> --- /dev/null
> +++ b/src/box/module_cache.c
> @@ -0,0 +1,476 @@
> +/*
> + * SPDX-License-Identifier: BSD-2-Clause
> + *
> + * Copyright 2010-2021, Tarantool AUTHORS, please see AUTHORS file.
> + */
> +
> +#include <unistd.h>
> +#include <string.h>
> +#include <fcntl.h>
> +#include <dlfcn.h>
> +#include <lua.h>
> +
> +#include "assoc.h"
> +#include "diag.h"
> +#include "fiber.h"
> +#include "module_cache.h"
> +
> +#include "box/error.h"
> +#include "box/port.h"
> +
> +#include "lua/utils.h"
> +#include "libeio/eio.h"
> +
> +static struct mh_strnptr_t *module_cache = NULL;
> +
> +/**
> + * Helpers for cache manipulations.
> + */
> +static void *
> +cache_find(const char *str, size_t len)
> +{
> +	mh_int_t e = mh_strnptr_find_inp(module_cache, str, len);
> +	if (e == mh_end(module_cache))
> +		return NULL;
> +	return mh_strnptr_node(module_cache, e)->val;
> +}
> +
> +static void
> +cache_update(struct module *m)
> +{
> +	const char *str = m->package;
> +	size_t len = m->package_len;
> +
> +	mh_int_t e = mh_strnptr_find_inp(module_cache, str, len);
> +	if (e == mh_end(module_cache))
> +		panic("module: failed to update cache: %s", str);
> +
> +	mh_strnptr_node(module_cache, e)->str = m->package;
> +	mh_strnptr_node(module_cache, e)->val = m;
> +}
> +
> +static int
> +cache_add(struct module *m)

1. In func.c it is called `cache_put`, but does the same. What is the
difference between `put` and `add` then? If none, then why not use
only one verb?

> +{
> +	const struct mh_strnptr_node_t nd = {
> +		.str	= m->package,
> +		.len	= m->package_len,
> +		.hash	= mh_strn_hash(m->package, m->package_len),
> +		.val	= m,
> +	};
> +
> +	mh_int_t e = mh_strnptr_put(module_cache, &nd, NULL, NULL);
> +	if (e == mh_end(module_cache)) {
> +		diag_set(OutOfMemory, sizeof(nd), "malloc",
> +			 "module_cache node");
> +		return -1;
> +	}
> +	return 0;
> +}
> +
> +static void
> +cache_del(struct module *m)
> +{
> +	const char *str = m->package;
> +	size_t len = m->package_len;
> +
> +	mh_int_t e = mh_strnptr_find_inp(module_cache, str, len);
> +	if (e != mh_end(module_cache)) {
> +		struct module *v = mh_strnptr_node(module_cache, e)->val;
> +		if (v == m)
> +			mh_strnptr_del(module_cache, e, NULL);

2. Please, add a comment about why the module pointer in the cache
might be different.

> +	}
> +}
> +
> +struct module *
> +module_load_force(const char *package, size_t package_len)
> +{
> +	char path[PATH_MAX];
> +	size_t size = sizeof(path);
> +
> +	if (find_package(package, package_len, path, size) != 0)
> +		return NULL;
> +
> +	struct module *m = module_new(package, package_len, path);
> +	if (m == NULL)
> +		return NULL;
> +
> +	struct module *c = cache_find(package, package_len);
> +	if (c != NULL) {
> +		cache_update(m);
> +		say_info("module_cache: force load: %s", package);

3. That is not very helpful for a user - he does not know what
'module_cache' is, and what is 'force load'. Try to make a more
user-friendly message. It should not use any internal names.

> +	} else {
> +		if (cache_add(m) != 0) {
> +			module_unload(m);
> +			return NULL;
> +		}
> +	}
> +
> +	return m;
> +}
> +
> +struct module *
> +module_load(const char *package, size_t package_len)
> +{
> +	char path[PATH_MAX];
> +
> +	if (find_package(package, package_len, path, sizeof(path)) != 0)
> +		return NULL;
> +
> +	struct module *m = cache_find(package, package_len);
> +	if (m != NULL) {
> +		struct module_attr attr;
> +		struct stat st;
> +		if (stat(path, &st) != 0) {
> +			diag_set(SystemError, "failed to stat() %s", path);
> +			return NULL;
> +		}
> +
> +		/*
> +		 * In case of cache hit we may reuse existing
> +		 * module which speedup load procedure.
> +		 */
> +		module_attr_fill(&attr, &st);
> +		if (memcmp(&attr, &m->attr, sizeof(attr)) == 0) {

4. That is not safe in case the struct has padding between
members and/or in the end. You either must compare the members
manually, or must ensure there is no any padding. For instance,
reorder the members not to have the padding in between, add a dummy
member in the end to fill the end-padding if necessary and fill it
with 0 always, and add a static_assert its size is equal to the sum
of member sizes. Then memcmp should be safe. Otherwise you risk to
compare garbage.

> +			module_ref(m);
> +			return m;
> +		}
> +
> +		/*
> +		 * Module has been updated on a storage device,
> +		 * so load a new instance and update the cache,
> +		 * old entry get evicted but continue residing
> +		 * in memory, fully functional, until last
> +		 * function is unloaded.
> +		 */
> +		m = module_new(package, package_len, path);
> +		if (m != NULL) {
> +			cache_update(m);
> +			say_info("module_cache: attr changed: %s",
> +				 package);
> +		}
> +	} else {
> +		m = module_new(package, package_len, path);
> +		if (m != NULL) {
> +			if (cache_add(m) != 0) {

5. You could improve readability a bit if you would do early
returns on errors (return NULL right away if m == NULL), or
reduce the indentation. For instance, by using a single `if`
with the condition `m != NULL && cache_add(m) != 0` instead
of 2 `if`s.

> +				module_unload(m);
> +				return NULL;
> +			}
> +		}
> +	}
> +
> +	return m;
> +}
> +
> +void
> +module_unload(struct module *m)
> +{
> +	module_unref(m);
> +}
> +
> +void
> +module_free(void)
> +{
> +	size_t nr_busy = 0;
> +	mh_int_t e;
> +
> +	mh_foreach(module_cache, e) {
> +		struct module *m = mh_strnptr_node(module_cache, e)->val;
> +		if (m->refs > 1)
> +			nr_busy++;
> +		module_unload(m);
> +	}
> +
> +	mh_strnptr_delete(module_cache);
> +	module_cache = NULL;
> +
> +	if (nr_busy > 0)
> +		say_info("module_cache: %zu entries left", nr_busy);

6. This probably must be panic. If there are modules, they are going to
try to delete themselves from the cache later, which is NULL, so it
would crash anyway but without any message.

`module_cache` must be empty when module_free() is called.

> +}
> diff --git a/src/box/module_cache.h b/src/box/module_cache.h
> new file mode 100644
> index 000000000..78dd5c7ad
> --- /dev/null
> +++ b/src/box/module_cache.h
> @@ -0,0 +1,206 @@
> +/*
> + * SPDX-License-Identifier: BSD-2-Clause
> + *
> + * Copyright 2010-2021, Tarantool AUTHORS, please see AUTHORS file.
> + */
> +
> +#pragma once
> +
> +#include <sys/types.h>
> +#include <sys/stat.h>
> +
> +#include "trivia/config.h"
> +
> +#if defined(__cplusplus)
> +extern "C" {
> +#endif /* defined(__cplusplus) */
> +
> +/**
> + * API of C stored function.
> + */
> +
> +struct port;
> +
> +struct box_function_ctx {
> +	struct port *port;
> +};
> +
> +typedef struct box_function_ctx box_function_ctx_t;
> +typedef int (*box_function_t)(box_function_ctx_t *ctx,
> +			      const char *args,
> +			      const char *args_end);
> +
> +/**
> + * Shared library file attributes for
> + * module cache invalidation.
> + */
> +struct module_attr {
> +#ifdef TARGET_OS_DARWIN
> +	struct timespec st_mtimespec;
> +#else
> +	struct timespec st_mtim;
> +#endif
> +	dev_t st_dev;
> +	ino_t st_ino;
> +	off_t st_size;

7. For the sake of easier memcmp() you could use platform-agnostic types.
For instance, u/int64_t or u/int32_t.

> +};
> +
> +/**
> + * Dynamic shared module.
> + */
> +struct module {
> +	/**
> +	 * Module handle, dlopen() result.
> +	 */
> +	void *handle;
> +	/**
> +	 * File attributes.
> +	 */
> +	struct module_attr attr;
> +	/**
> +	 * Count of active references.
> +	 */
> +	int64_t refs;
> +	/**
> +	 * Length of @a package.
> +	 */
> +	size_t package_len;
> +	/**
> +	 * Module's name without file extension.
> +	 */
> +	char package[0];
> +};
> +
> +/**
> + * Module function.
> + */
> +struct module_func {
> +	/**
> +	 * Function's address, iow dlsym() result.
> +	 */
> +	 box_function_t func;
> +	/**
> +	 * Function's module.
> +	 */
> +	struct module *module;
> +};
> +
> +/**
> + * Load a module.
> + *
> + * Lookup for a module instance in cache and if not found
> + * the module is loaded from a storage device.

8. It also checks for the attributes being changed.

> + *
> + * @param package module package (without file extension).
> + * @param package_len length of @a package.
> + *
> + * Possible errors:
> + * ClientError: the package is not found on a storage device.
> + * ClientError: an error happened when been loading the package.
> + * SystemError: a system error happened during procedure.
> + * OutOfMemory: unable to allocate new memory for module instance.
> + *
> + * @return a module instance on success, NULL otherwise (diag is set)
> + */
> +struct module *
> +module_load(const char *package, size_t package_len);
> +
> +/**
> + * Force load a module.
> + *
> + * Load a module from a storage device and invalidate
> + * an associated cache entry.

9. Reload, not invalidate. It stays cached, but with a new
pointer.

> + *
> + * @param package module package (without file extension).
> + * @param package_len length of @a package.
> + *
> + * Possible errors:
> + * ClientError: the package is not found on a storage device.
> + * ClientError: an error happened when been loading the package.
> + * SystemError: a system error happened during procedure.
> + * OutOfMemory: unable to allocate new memory for module instance.
> + *
> + * @return a module instance on success, NULL otherwise (diag is set)
> + */
> +struct module *
> +module_load_force(const char *package, size_t package_len);
> +
> +/**
> + * Unload a module instance.
> + *
> + * @param m a module to unload.
> + */
> +void
> +module_unload(struct module *m);
> +
> +/** Test if module function is empty. */
> +static inline bool
> +is_module_func_empty(struct module_func *mf)

10. Struct prefix is "stronger" than 'is' prefix. Otherwise
we would end up with tons of 'is' checkers in the code not
related to each other but having the same prefix, and
conflicting with variable names.

Here it should be module_func_is_empty().

> +{
> +	return mf->module == NULL;
> +}

  reply	other threads:[~2021-03-08 22:45 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-03-01 21:23 [Tarantool-patches] [PATCH v19 0/6] box: implement cmod Lua module Cyrill Gorcunov via Tarantool-patches
2021-03-01 21:23 ` [Tarantool-patches] [PATCH v19 1/6] box/func: module_reload -- drop redundant argument Cyrill Gorcunov via Tarantool-patches
2021-03-01 21:23 ` [Tarantool-patches] [PATCH v19 2/6] box/func: prepare for transition to modules subsystem Cyrill Gorcunov via Tarantool-patches
2021-03-08 22:44   ` Vladislav Shpilevoy via Tarantool-patches
2021-03-01 21:23 ` [Tarantool-patches] [PATCH v19 3/6] box/module_cache: introduce " Cyrill Gorcunov via Tarantool-patches
2021-03-08 22:45   ` Vladislav Shpilevoy via Tarantool-patches [this message]
2021-03-01 21:23 ` [Tarantool-patches] [PATCH v19 4/6] box/func: switch to module_cache interface Cyrill Gorcunov via Tarantool-patches
2021-03-08 22:47   ` Vladislav Shpilevoy via Tarantool-patches
2021-03-01 21:23 ` [Tarantool-patches] [PATCH v19 5/6] box/cmod: implement cmod Lua module Cyrill Gorcunov via Tarantool-patches
2021-03-08 22:51   ` Vladislav Shpilevoy via Tarantool-patches
2021-03-01 21:23 ` [Tarantool-patches] [PATCH v19 6/6] test: box/cfunc -- add cmod test Cyrill Gorcunov via Tarantool-patches
2021-03-08 22:51   ` Vladislav Shpilevoy via Tarantool-patches

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=e10fe8bf-e99a-8ca9-85bd-e698abd8af43@tarantool.org \
    --to=tarantool-patches@dev.tarantool.org \
    --cc=gorcunov@gmail.com \
    --cc=v.shpilevoy@tarantool.org \
    --subject='Re: [Tarantool-patches] [PATCH v19 3/6] box/module_cache: introduce modules subsystem' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox