From: Vladislav Shpilevoy via Tarantool-patches <tarantool-patches@dev.tarantool.org> To: Cyrill Gorcunov <gorcunov@gmail.com>, tml <tarantool-patches@dev.tarantool.org> Subject: Re: [Tarantool-patches] [PATCH v19 3/6] box/module_cache: introduce modules subsystem Date: Mon, 8 Mar 2021 23:45:02 +0100 [thread overview] Message-ID: <e10fe8bf-e99a-8ca9-85bd-e698abd8af43@tarantool.org> (raw) In-Reply-To: <20210301212343.422372-4-gorcunov@gmail.com> Thanks for the patch! See 10 comments below. > diff --git a/src/box/module_cache.c b/src/box/module_cache.c > new file mode 100644 > index 000000000..bc2184e5f > --- /dev/null > +++ b/src/box/module_cache.c > @@ -0,0 +1,476 @@ > +/* > + * SPDX-License-Identifier: BSD-2-Clause > + * > + * Copyright 2010-2021, Tarantool AUTHORS, please see AUTHORS file. > + */ > + > +#include <unistd.h> > +#include <string.h> > +#include <fcntl.h> > +#include <dlfcn.h> > +#include <lua.h> > + > +#include "assoc.h" > +#include "diag.h" > +#include "fiber.h" > +#include "module_cache.h" > + > +#include "box/error.h" > +#include "box/port.h" > + > +#include "lua/utils.h" > +#include "libeio/eio.h" > + > +static struct mh_strnptr_t *module_cache = NULL; > + > +/** > + * Helpers for cache manipulations. > + */ > +static void * > +cache_find(const char *str, size_t len) > +{ > + mh_int_t e = mh_strnptr_find_inp(module_cache, str, len); > + if (e == mh_end(module_cache)) > + return NULL; > + return mh_strnptr_node(module_cache, e)->val; > +} > + > +static void > +cache_update(struct module *m) > +{ > + const char *str = m->package; > + size_t len = m->package_len; > + > + mh_int_t e = mh_strnptr_find_inp(module_cache, str, len); > + if (e == mh_end(module_cache)) > + panic("module: failed to update cache: %s", str); > + > + mh_strnptr_node(module_cache, e)->str = m->package; > + mh_strnptr_node(module_cache, e)->val = m; > +} > + > +static int > +cache_add(struct module *m) 1. In func.c it is called `cache_put`, but does the same. What is the difference between `put` and `add` then? If none, then why not use only one verb? > +{ > + const struct mh_strnptr_node_t nd = { > + .str = m->package, > + .len = m->package_len, > + .hash = mh_strn_hash(m->package, m->package_len), > + .val = m, > + }; > + > + mh_int_t e = mh_strnptr_put(module_cache, &nd, NULL, NULL); > + if (e == mh_end(module_cache)) { > + diag_set(OutOfMemory, sizeof(nd), "malloc", > + "module_cache node"); > + return -1; > + } > + return 0; > +} > + > +static void > +cache_del(struct module *m) > +{ > + const char *str = m->package; > + size_t len = m->package_len; > + > + mh_int_t e = mh_strnptr_find_inp(module_cache, str, len); > + if (e != mh_end(module_cache)) { > + struct module *v = mh_strnptr_node(module_cache, e)->val; > + if (v == m) > + mh_strnptr_del(module_cache, e, NULL); 2. Please, add a comment about why the module pointer in the cache might be different. > + } > +} > + > +struct module * > +module_load_force(const char *package, size_t package_len) > +{ > + char path[PATH_MAX]; > + size_t size = sizeof(path); > + > + if (find_package(package, package_len, path, size) != 0) > + return NULL; > + > + struct module *m = module_new(package, package_len, path); > + if (m == NULL) > + return NULL; > + > + struct module *c = cache_find(package, package_len); > + if (c != NULL) { > + cache_update(m); > + say_info("module_cache: force load: %s", package); 3. That is not very helpful for a user - he does not know what 'module_cache' is, and what is 'force load'. Try to make a more user-friendly message. It should not use any internal names. > + } else { > + if (cache_add(m) != 0) { > + module_unload(m); > + return NULL; > + } > + } > + > + return m; > +} > + > +struct module * > +module_load(const char *package, size_t package_len) > +{ > + char path[PATH_MAX]; > + > + if (find_package(package, package_len, path, sizeof(path)) != 0) > + return NULL; > + > + struct module *m = cache_find(package, package_len); > + if (m != NULL) { > + struct module_attr attr; > + struct stat st; > + if (stat(path, &st) != 0) { > + diag_set(SystemError, "failed to stat() %s", path); > + return NULL; > + } > + > + /* > + * In case of cache hit we may reuse existing > + * module which speedup load procedure. > + */ > + module_attr_fill(&attr, &st); > + if (memcmp(&attr, &m->attr, sizeof(attr)) == 0) { 4. That is not safe in case the struct has padding between members and/or in the end. You either must compare the members manually, or must ensure there is no any padding. For instance, reorder the members not to have the padding in between, add a dummy member in the end to fill the end-padding if necessary and fill it with 0 always, and add a static_assert its size is equal to the sum of member sizes. Then memcmp should be safe. Otherwise you risk to compare garbage. > + module_ref(m); > + return m; > + } > + > + /* > + * Module has been updated on a storage device, > + * so load a new instance and update the cache, > + * old entry get evicted but continue residing > + * in memory, fully functional, until last > + * function is unloaded. > + */ > + m = module_new(package, package_len, path); > + if (m != NULL) { > + cache_update(m); > + say_info("module_cache: attr changed: %s", > + package); > + } > + } else { > + m = module_new(package, package_len, path); > + if (m != NULL) { > + if (cache_add(m) != 0) { 5. You could improve readability a bit if you would do early returns on errors (return NULL right away if m == NULL), or reduce the indentation. For instance, by using a single `if` with the condition `m != NULL && cache_add(m) != 0` instead of 2 `if`s. > + module_unload(m); > + return NULL; > + } > + } > + } > + > + return m; > +} > + > +void > +module_unload(struct module *m) > +{ > + module_unref(m); > +} > + > +void > +module_free(void) > +{ > + size_t nr_busy = 0; > + mh_int_t e; > + > + mh_foreach(module_cache, e) { > + struct module *m = mh_strnptr_node(module_cache, e)->val; > + if (m->refs > 1) > + nr_busy++; > + module_unload(m); > + } > + > + mh_strnptr_delete(module_cache); > + module_cache = NULL; > + > + if (nr_busy > 0) > + say_info("module_cache: %zu entries left", nr_busy); 6. This probably must be panic. If there are modules, they are going to try to delete themselves from the cache later, which is NULL, so it would crash anyway but without any message. `module_cache` must be empty when module_free() is called. > +} > diff --git a/src/box/module_cache.h b/src/box/module_cache.h > new file mode 100644 > index 000000000..78dd5c7ad > --- /dev/null > +++ b/src/box/module_cache.h > @@ -0,0 +1,206 @@ > +/* > + * SPDX-License-Identifier: BSD-2-Clause > + * > + * Copyright 2010-2021, Tarantool AUTHORS, please see AUTHORS file. > + */ > + > +#pragma once > + > +#include <sys/types.h> > +#include <sys/stat.h> > + > +#include "trivia/config.h" > + > +#if defined(__cplusplus) > +extern "C" { > +#endif /* defined(__cplusplus) */ > + > +/** > + * API of C stored function. > + */ > + > +struct port; > + > +struct box_function_ctx { > + struct port *port; > +}; > + > +typedef struct box_function_ctx box_function_ctx_t; > +typedef int (*box_function_t)(box_function_ctx_t *ctx, > + const char *args, > + const char *args_end); > + > +/** > + * Shared library file attributes for > + * module cache invalidation. > + */ > +struct module_attr { > +#ifdef TARGET_OS_DARWIN > + struct timespec st_mtimespec; > +#else > + struct timespec st_mtim; > +#endif > + dev_t st_dev; > + ino_t st_ino; > + off_t st_size; 7. For the sake of easier memcmp() you could use platform-agnostic types. For instance, u/int64_t or u/int32_t. > +}; > + > +/** > + * Dynamic shared module. > + */ > +struct module { > + /** > + * Module handle, dlopen() result. > + */ > + void *handle; > + /** > + * File attributes. > + */ > + struct module_attr attr; > + /** > + * Count of active references. > + */ > + int64_t refs; > + /** > + * Length of @a package. > + */ > + size_t package_len; > + /** > + * Module's name without file extension. > + */ > + char package[0]; > +}; > + > +/** > + * Module function. > + */ > +struct module_func { > + /** > + * Function's address, iow dlsym() result. > + */ > + box_function_t func; > + /** > + * Function's module. > + */ > + struct module *module; > +}; > + > +/** > + * Load a module. > + * > + * Lookup for a module instance in cache and if not found > + * the module is loaded from a storage device. 8. It also checks for the attributes being changed. > + * > + * @param package module package (without file extension). > + * @param package_len length of @a package. > + * > + * Possible errors: > + * ClientError: the package is not found on a storage device. > + * ClientError: an error happened when been loading the package. > + * SystemError: a system error happened during procedure. > + * OutOfMemory: unable to allocate new memory for module instance. > + * > + * @return a module instance on success, NULL otherwise (diag is set) > + */ > +struct module * > +module_load(const char *package, size_t package_len); > + > +/** > + * Force load a module. > + * > + * Load a module from a storage device and invalidate > + * an associated cache entry. 9. Reload, not invalidate. It stays cached, but with a new pointer. > + * > + * @param package module package (without file extension). > + * @param package_len length of @a package. > + * > + * Possible errors: > + * ClientError: the package is not found on a storage device. > + * ClientError: an error happened when been loading the package. > + * SystemError: a system error happened during procedure. > + * OutOfMemory: unable to allocate new memory for module instance. > + * > + * @return a module instance on success, NULL otherwise (diag is set) > + */ > +struct module * > +module_load_force(const char *package, size_t package_len); > + > +/** > + * Unload a module instance. > + * > + * @param m a module to unload. > + */ > +void > +module_unload(struct module *m); > + > +/** Test if module function is empty. */ > +static inline bool > +is_module_func_empty(struct module_func *mf) 10. Struct prefix is "stronger" than 'is' prefix. Otherwise we would end up with tons of 'is' checkers in the code not related to each other but having the same prefix, and conflicting with variable names. Here it should be module_func_is_empty(). > +{ > + return mf->module == NULL; > +}
next prev parent reply other threads:[~2021-03-08 22:45 UTC|newest] Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top 2021-03-01 21:23 [Tarantool-patches] [PATCH v19 0/6] box: implement cmod Lua module Cyrill Gorcunov via Tarantool-patches 2021-03-01 21:23 ` [Tarantool-patches] [PATCH v19 1/6] box/func: module_reload -- drop redundant argument Cyrill Gorcunov via Tarantool-patches 2021-03-01 21:23 ` [Tarantool-patches] [PATCH v19 2/6] box/func: prepare for transition to modules subsystem Cyrill Gorcunov via Tarantool-patches 2021-03-08 22:44 ` Vladislav Shpilevoy via Tarantool-patches 2021-03-01 21:23 ` [Tarantool-patches] [PATCH v19 3/6] box/module_cache: introduce " Cyrill Gorcunov via Tarantool-patches 2021-03-08 22:45 ` Vladislav Shpilevoy via Tarantool-patches [this message] 2021-03-01 21:23 ` [Tarantool-patches] [PATCH v19 4/6] box/func: switch to module_cache interface Cyrill Gorcunov via Tarantool-patches 2021-03-08 22:47 ` Vladislav Shpilevoy via Tarantool-patches 2021-03-01 21:23 ` [Tarantool-patches] [PATCH v19 5/6] box/cmod: implement cmod Lua module Cyrill Gorcunov via Tarantool-patches 2021-03-08 22:51 ` Vladislav Shpilevoy via Tarantool-patches 2021-03-01 21:23 ` [Tarantool-patches] [PATCH v19 6/6] test: box/cfunc -- add cmod test Cyrill Gorcunov via Tarantool-patches 2021-03-08 22:51 ` Vladislav Shpilevoy via Tarantool-patches
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=e10fe8bf-e99a-8ca9-85bd-e698abd8af43@tarantool.org \ --to=tarantool-patches@dev.tarantool.org \ --cc=gorcunov@gmail.com \ --cc=v.shpilevoy@tarantool.org \ --subject='Re: [Tarantool-patches] [PATCH v19 3/6] box/module_cache: introduce modules subsystem' \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox