From: Cyrill Gorcunov via Tarantool-patches <tarantool-patches@dev.tarantool.org> To: tml <tarantool-patches@dev.tarantool.org> Cc: Vladislav Shpilevoy <v.shpilevoy@tarantool.org> Subject: [Tarantool-patches] [PATCH v19 3/6] box/module_cache: introduce modules subsystem Date: Tue, 2 Mar 2021 00:23:40 +0300 [thread overview] Message-ID: <20210301212343.422372-4-gorcunov@gmail.com> (raw) In-Reply-To: <20210301212343.422372-1-gorcunov@gmail.com> The modules subsystem hides some low-level operations under API. In particular the modules subsystem is responsible for - modules lookup in Lua's "package.search" storage - modules caching to eliminate expencive load procedure - function symbol resolving Part-of #4642 Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com> --- src/box/CMakeLists.txt | 1 + src/box/module_cache.c | 476 +++++++++++++++++++++++++++++++++++++++++ src/box/module_cache.h | 206 ++++++++++++++++++ src/main.cc | 3 + 4 files changed, 686 insertions(+) create mode 100644 src/box/module_cache.c create mode 100644 src/box/module_cache.h diff --git a/src/box/CMakeLists.txt b/src/box/CMakeLists.txt index 19203f770..cc2e17e94 100644 --- a/src/box/CMakeLists.txt +++ b/src/box/CMakeLists.txt @@ -126,6 +126,7 @@ add_library(box STATIC memtx_rtree.c memtx_bitset.c memtx_tx.c + module_cache.c engine.c memtx_engine.c memtx_space.c diff --git a/src/box/module_cache.c b/src/box/module_cache.c new file mode 100644 index 000000000..bc2184e5f --- /dev/null +++ b/src/box/module_cache.c @@ -0,0 +1,476 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright 2010-2021, Tarantool AUTHORS, please see AUTHORS file. + */ + +#include <unistd.h> +#include <string.h> +#include <fcntl.h> +#include <dlfcn.h> +#include <lua.h> + +#include "assoc.h" +#include "diag.h" +#include "fiber.h" +#include "module_cache.h" + +#include "box/error.h" +#include "box/port.h" + +#include "lua/utils.h" +#include "libeio/eio.h" + +static struct mh_strnptr_t *module_cache = NULL; + +/** + * Helpers for cache manipulations. + */ +static void * +cache_find(const char *str, size_t len) +{ + mh_int_t e = mh_strnptr_find_inp(module_cache, str, len); + if (e == mh_end(module_cache)) + return NULL; + return mh_strnptr_node(module_cache, e)->val; +} + +static void +cache_update(struct module *m) +{ + const char *str = m->package; + size_t len = m->package_len; + + mh_int_t e = mh_strnptr_find_inp(module_cache, str, len); + if (e == mh_end(module_cache)) + panic("module: failed to update cache: %s", str); + + mh_strnptr_node(module_cache, e)->str = m->package; + mh_strnptr_node(module_cache, e)->val = m; +} + +static int +cache_add(struct module *m) +{ + const struct mh_strnptr_node_t nd = { + .str = m->package, + .len = m->package_len, + .hash = mh_strn_hash(m->package, m->package_len), + .val = m, + }; + + mh_int_t e = mh_strnptr_put(module_cache, &nd, NULL, NULL); + if (e == mh_end(module_cache)) { + diag_set(OutOfMemory, sizeof(nd), "malloc", + "module_cache node"); + return -1; + } + return 0; +} + +static void +cache_del(struct module *m) +{ + const char *str = m->package; + size_t len = m->package_len; + + mh_int_t e = mh_strnptr_find_inp(module_cache, str, len); + if (e != mh_end(module_cache)) { + struct module *v = mh_strnptr_node(module_cache, e)->val; + if (v == m) + mh_strnptr_del(module_cache, e, NULL); + } +} + +/** Arguments for lpackage_search. */ +struct find_ctx { + const char *package; + size_t package_len; + char *path; + size_t path_len; +}; + +/** A helper for find_package(). */ +static int +lpackage_search(lua_State *L) +{ + struct find_ctx *ctx = (void *)lua_topointer(L, 1); + + lua_getglobal(L, "package"); + lua_getfield(L, -1, "search"); + lua_pushlstring(L, ctx->package, ctx->package_len); + + lua_call(L, 1, 1); + if (lua_isnil(L, -1)) + return luaL_error(L, "module not found"); + + char resolved[PATH_MAX]; + if (realpath(lua_tostring(L, -1), resolved) == NULL) { + diag_set(SystemError, "realpath"); + return luaT_error(L); + } + + /* + * No need for result being trimmed test, it + * is guaranteed by realpath call. + */ + snprintf(ctx->path, ctx->path_len, "%s", resolved); + return 0; +} + +/** Find package in Lua's "package.search". */ +static int +find_package(const char *package, size_t package_len, + char *path, size_t path_len) +{ + struct find_ctx ctx = { + .package = package, + .package_len = package_len, + .path = path, + .path_len = path_len, + }; + + lua_State *L = tarantool_L; + int top = lua_gettop(L); + if (luaT_cpcall(L, lpackage_search, &ctx) != 0) { + diag_set(ClientError, ER_LOAD_MODULE, ctx.package_len, + ctx.package, lua_tostring(L, -1)); + lua_settop(L, top); + return -1; + } + assert(top == lua_gettop(L)); + return 0; +} + +void +module_ref(struct module *m) +{ + assert(m->refs >= 0); + ++m->refs; +} + +void +module_unref(struct module *m) +{ + assert(m->refs > 0); + if (--m->refs == 0) { + cache_del(m); + dlclose(m->handle); + TRASH(m); + free(m); + } +} + +int +module_func_load(struct module *m, const char *func_name, + struct module_func *mf) +{ + void *sym = dlsym(m->handle, func_name); + if (sym == NULL) { + diag_set(ClientError, ER_LOAD_FUNCTION, + func_name, dlerror()); + return -1; + } + + mf->func = sym; + mf->module = m; + module_ref(m); + + return 0; +} + +void +module_func_unload(struct module_func *mf) +{ + module_unref(mf->module); + /* + * Strictly speaking there is no need + * for implicit creation, it is up to + * the caller to clear the module function, + * but since it is cheap, lets prevent from + * even potential use after free. + */ + module_func_create(mf); +} + +int +module_func_call(struct module_func *mf, struct port *args, + struct port *ret) +{ + struct region *region = &fiber()->gc; + size_t region_svp = region_used(region); + + uint32_t data_sz; + const char *data = port_get_msgpack(args, &data_sz); + if (data == NULL) + return -1; + + port_c_create(ret); + box_function_ctx_t ctx = { + .port = ret, + }; + + /* + * We don't know what exactly the callee + * gonna do during the execution, it may + * even try to unload itself, thus we make + * sure the dso won't be unloaded until + * execution is complete. + * + * Moreover the callee might release the memory + * associated with the module_func pointer itself + * so keep the address of the module locally. + */ + struct module *m = mf->module; + module_ref(m); + int rc = mf->func(&ctx, data, data + data_sz); + module_unref(m); + + region_truncate(region, region_svp); + + if (rc != 0) { + if (diag_last_error(&fiber()->diag) == NULL) + diag_set(ClientError, ER_PROC_C, "unknown error"); + port_destroy(ret); + return -1; + } + + return 0; +} + +/** Fill attributes from stat. */ +static void +module_attr_fill(struct module_attr *attr, struct stat *st) +{ + attr->st_dev = st->st_dev; + attr->st_ino = st->st_ino; + attr->st_size = st->st_size; +#ifdef TARGET_OS_DARWIN + attr->st_mtimespec = st->st_mtimespec; +#else + attr->st_mtim = st->st_mtim; +#endif +} + +/** + * Copy shared library to temp directory and load from there, + * then remove it from this temp place leaving in memory. This + * is because there was a bug in libc which screw file updates + * detection properly such that next dlopen call simply return + * a cached version instead of rereading a library from the disk. + * + * We keep own copy of file attributes and reload the library + * on demand. + */ +static struct module * +module_new(const char *package, size_t package_len, + const char *source_path) +{ + size_t size = sizeof(struct module) + package_len + 1; + struct module *m = malloc(size); + if (m == NULL) { + diag_set(OutOfMemory, size, "malloc", "module"); + return NULL; + } + + m->package_len = package_len; + m->refs = 0; + + memcpy(m->package, package, package_len); + m->package[package_len] = 0; + + const char *tmpdir = getenv("TMPDIR"); + if (tmpdir == NULL) + tmpdir = "/tmp"; + + char dir_name[PATH_MAX]; + int rc = snprintf(dir_name, sizeof(dir_name), + "%s/tntXXXXXX", tmpdir); + if (rc < 0 || (size_t)rc >= sizeof(dir_name)) { + diag_set(SystemError, "failed to generate path to tmp dir"); + goto error; + } + + if (mkdtemp(dir_name) == NULL) { + diag_set(SystemError, "failed to create unique dir name: %s", + dir_name); + goto error; + } + + char load_name[PATH_MAX]; + rc = snprintf(load_name, sizeof(load_name), + "%s/%.*s." TARANTOOL_LIBEXT, + dir_name, (int)package_len, package); + if (rc < 0 || (size_t)rc >= sizeof(dir_name)) { + diag_set(SystemError, "failed to generate path to dso"); + goto error; + } + + struct stat st; + if (stat(source_path, &st) < 0) { + diag_set(SystemError, "failed to stat() module: %s", + source_path); + goto error; + } + module_attr_fill(&m->attr, &st); + + int source_fd = open(source_path, O_RDONLY); + if (source_fd < 0) { + diag_set(SystemError, "failed to open module %s " + "file for reading", source_path); + goto error; + } + int dest_fd = open(load_name, O_WRONLY | O_CREAT | O_TRUNC, + st.st_mode & 0777); + if (dest_fd < 0) { + diag_set(SystemError, "failed to open file %s " + "for writing ", load_name); + close(source_fd); + goto error; + } + + off_t ret = eio_sendfile_sync(dest_fd, source_fd, 0, st.st_size); + close(source_fd); + close(dest_fd); + if (ret != st.st_size) { + diag_set(SystemError, "failed to copy dso %s to %s", + source_path, load_name); + goto error; + } + + m->handle = dlopen(load_name, RTLD_NOW | RTLD_LOCAL); + if (unlink(load_name) != 0) + say_warn("failed to unlink dso link: %s", load_name); + if (rmdir(dir_name) != 0) + say_warn("failed to delete temporary dir: %s", dir_name); + if (m->handle == NULL) { + diag_set(ClientError, ER_LOAD_MODULE, package_len, + package, dlerror()); + goto error; + } + + module_ref(m); + return m; + +error: + free(m); + return NULL; +} + +struct module * +module_load_force(const char *package, size_t package_len) +{ + char path[PATH_MAX]; + size_t size = sizeof(path); + + if (find_package(package, package_len, path, size) != 0) + return NULL; + + struct module *m = module_new(package, package_len, path); + if (m == NULL) + return NULL; + + struct module *c = cache_find(package, package_len); + if (c != NULL) { + cache_update(m); + say_info("module_cache: force load: %s", package); + } else { + if (cache_add(m) != 0) { + module_unload(m); + return NULL; + } + } + + return m; +} + +struct module * +module_load(const char *package, size_t package_len) +{ + char path[PATH_MAX]; + + if (find_package(package, package_len, path, sizeof(path)) != 0) + return NULL; + + struct module *m = cache_find(package, package_len); + if (m != NULL) { + struct module_attr attr; + struct stat st; + if (stat(path, &st) != 0) { + diag_set(SystemError, "failed to stat() %s", path); + return NULL; + } + + /* + * In case of cache hit we may reuse existing + * module which speedup load procedure. + */ + module_attr_fill(&attr, &st); + if (memcmp(&attr, &m->attr, sizeof(attr)) == 0) { + module_ref(m); + return m; + } + + /* + * Module has been updated on a storage device, + * so load a new instance and update the cache, + * old entry get evicted but continue residing + * in memory, fully functional, until last + * function is unloaded. + */ + m = module_new(package, package_len, path); + if (m != NULL) { + cache_update(m); + say_info("module_cache: attr changed: %s", + package); + } + } else { + m = module_new(package, package_len, path); + if (m != NULL) { + if (cache_add(m) != 0) { + module_unload(m); + return NULL; + } + } + } + + return m; +} + +void +module_unload(struct module *m) +{ + module_unref(m); +} + +void +module_free(void) +{ + size_t nr_busy = 0; + mh_int_t e; + + mh_foreach(module_cache, e) { + struct module *m = mh_strnptr_node(module_cache, e)->val; + if (m->refs > 1) + nr_busy++; + module_unload(m); + } + + mh_strnptr_delete(module_cache); + module_cache = NULL; + + if (nr_busy > 0) + say_info("module_cache: %zu entries left", nr_busy); +} + +int +module_init(void) +{ + module_cache = mh_strnptr_new(); + if (module_cache == NULL) { + diag_set(OutOfMemory, sizeof(*module_cache), + "malloc", "module_cache"); + return -1; + } + return 0; +} diff --git a/src/box/module_cache.h b/src/box/module_cache.h new file mode 100644 index 000000000..78dd5c7ad --- /dev/null +++ b/src/box/module_cache.h @@ -0,0 +1,206 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright 2010-2021, Tarantool AUTHORS, please see AUTHORS file. + */ + +#pragma once + +#include <sys/types.h> +#include <sys/stat.h> + +#include "trivia/config.h" + +#if defined(__cplusplus) +extern "C" { +#endif /* defined(__cplusplus) */ + +/** + * API of C stored function. + */ + +struct port; + +struct box_function_ctx { + struct port *port; +}; + +typedef struct box_function_ctx box_function_ctx_t; +typedef int (*box_function_t)(box_function_ctx_t *ctx, + const char *args, + const char *args_end); + +/** + * Shared library file attributes for + * module cache invalidation. + */ +struct module_attr { +#ifdef TARGET_OS_DARWIN + struct timespec st_mtimespec; +#else + struct timespec st_mtim; +#endif + dev_t st_dev; + ino_t st_ino; + off_t st_size; +}; + +/** + * Dynamic shared module. + */ +struct module { + /** + * Module handle, dlopen() result. + */ + void *handle; + /** + * File attributes. + */ + struct module_attr attr; + /** + * Count of active references. + */ + int64_t refs; + /** + * Length of @a package. + */ + size_t package_len; + /** + * Module's name without file extension. + */ + char package[0]; +}; + +/** + * Module function. + */ +struct module_func { + /** + * Function's address, iow dlsym() result. + */ + box_function_t func; + /** + * Function's module. + */ + struct module *module; +}; + +/** + * Load a module. + * + * Lookup for a module instance in cache and if not found + * the module is loaded from a storage device. + * + * @param package module package (without file extension). + * @param package_len length of @a package. + * + * Possible errors: + * ClientError: the package is not found on a storage device. + * ClientError: an error happened when been loading the package. + * SystemError: a system error happened during procedure. + * OutOfMemory: unable to allocate new memory for module instance. + * + * @return a module instance on success, NULL otherwise (diag is set) + */ +struct module * +module_load(const char *package, size_t package_len); + +/** + * Force load a module. + * + * Load a module from a storage device and invalidate + * an associated cache entry. + * + * @param package module package (without file extension). + * @param package_len length of @a package. + * + * Possible errors: + * ClientError: the package is not found on a storage device. + * ClientError: an error happened when been loading the package. + * SystemError: a system error happened during procedure. + * OutOfMemory: unable to allocate new memory for module instance. + * + * @return a module instance on success, NULL otherwise (diag is set) + */ +struct module * +module_load_force(const char *package, size_t package_len); + +/** + * Unload a module instance. + * + * @param m a module to unload. + */ +void +module_unload(struct module *m); + +/** Test if module function is empty. */ +static inline bool +is_module_func_empty(struct module_func *mf) +{ + return mf->module == NULL; +} + +/** Create new empty module function. */ +static inline void +module_func_create(struct module_func *mf) +{ + mf->module = NULL; + mf->func = NULL; +} + +/** + * Load a new function. + * + * @param m a module to load a function from. + * @param func_name function name. + * @param mf[out] function instance. + * + * Possible errors: + * ClientError: no such function in a module. + * + * @return 0 on success, -1 otherwise (diag is set). + */ +int +module_func_load(struct module *m, const char *func_name, + struct module_func *mf); + +/** + * Unload a function. + * + * @param mf module function. + */ +void +module_func_unload(struct module_func *mf); + +/** + * Execute a function. + * + * @param mf a function to execute. + * @param args function arguments. + * @param ret[out] execution results. + * + * @return 0 on success, -1 otherwise (diag is set). + */ +int +module_func_call(struct module_func *mf, struct port *args, + struct port *ret); + +/** Increment reference to a module. */ +void +module_ref(struct module *m); + +/** Decrement reference of a module. */ +void +module_unref(struct module *m); + +/** Initialize modules subsystem. */ +int +module_init(void); + +/** Free modules subsystem. */ +void +module_free(void); + +#if defined(__cplusplus) +} +#endif /* defined(__plusplus) */ diff --git a/src/main.cc b/src/main.cc index 2fce81bb3..df4ccf35a 100644 --- a/src/main.cc +++ b/src/main.cc @@ -76,6 +76,7 @@ #include "box/lua/init.h" /* box_lua_init() */ #include "box/session.h" #include "box/memtx_tx.h" +#include "box/module_cache.h" #include "systemd.h" #include "crypto/crypto.h" #include "core/popen.h" @@ -519,6 +520,7 @@ tarantool_free(void) title_free(main_argc, main_argv); popen_free(); + module_free(); /* unlink pidfile. */ if (pid_file_handle != NULL && pidfile_remove(pid_file_handle) == -1) @@ -708,6 +710,7 @@ main(int argc, char **argv) cbus_init(); coll_init(); memtx_tx_manager_init(); + module_init(); crypto_init(); systemd_init(); tarantool_lua_init(tarantool_bin, main_argc, main_argv); -- 2.29.2
next prev parent reply other threads:[~2021-03-01 21:25 UTC|newest] Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top 2021-03-01 21:23 [Tarantool-patches] [PATCH v19 0/6] box: implement cmod Lua module Cyrill Gorcunov via Tarantool-patches 2021-03-01 21:23 ` [Tarantool-patches] [PATCH v19 1/6] box/func: module_reload -- drop redundant argument Cyrill Gorcunov via Tarantool-patches 2021-03-01 21:23 ` [Tarantool-patches] [PATCH v19 2/6] box/func: prepare for transition to modules subsystem Cyrill Gorcunov via Tarantool-patches 2021-03-08 22:44 ` Vladislav Shpilevoy via Tarantool-patches 2021-03-01 21:23 ` Cyrill Gorcunov via Tarantool-patches [this message] 2021-03-08 22:45 ` [Tarantool-patches] [PATCH v19 3/6] box/module_cache: introduce " Vladislav Shpilevoy via Tarantool-patches 2021-03-01 21:23 ` [Tarantool-patches] [PATCH v19 4/6] box/func: switch to module_cache interface Cyrill Gorcunov via Tarantool-patches 2021-03-08 22:47 ` Vladislav Shpilevoy via Tarantool-patches 2021-03-01 21:23 ` [Tarantool-patches] [PATCH v19 5/6] box/cmod: implement cmod Lua module Cyrill Gorcunov via Tarantool-patches 2021-03-08 22:51 ` Vladislav Shpilevoy via Tarantool-patches 2021-03-01 21:23 ` [Tarantool-patches] [PATCH v19 6/6] test: box/cfunc -- add cmod test Cyrill Gorcunov via Tarantool-patches 2021-03-08 22:51 ` Vladislav Shpilevoy via Tarantool-patches
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20210301212343.422372-4-gorcunov@gmail.com \ --to=tarantool-patches@dev.tarantool.org \ --cc=gorcunov@gmail.com \ --cc=v.shpilevoy@tarantool.org \ --subject='Re: [Tarantool-patches] [PATCH v19 3/6] box/module_cache: introduce modules subsystem' \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox