[Tarantool-patches] [PATCH v19 3/6] box/module_cache: introduce modules subsystem
Cyrill Gorcunov
gorcunov at gmail.com
Tue Mar 2 00:23:40 MSK 2021
The modules subsystem hides some low-level operations under API.
In particular the modules subsystem is responsible for
- modules lookup in Lua's "package.search" storage
- modules caching to eliminate expencive load procedure
- function symbol resolving
Part-of #4642
Signed-off-by: Cyrill Gorcunov <gorcunov at gmail.com>
---
src/box/CMakeLists.txt | 1 +
src/box/module_cache.c | 476 +++++++++++++++++++++++++++++++++++++++++
src/box/module_cache.h | 206 ++++++++++++++++++
src/main.cc | 3 +
4 files changed, 686 insertions(+)
create mode 100644 src/box/module_cache.c
create mode 100644 src/box/module_cache.h
diff --git a/src/box/CMakeLists.txt b/src/box/CMakeLists.txt
index 19203f770..cc2e17e94 100644
--- a/src/box/CMakeLists.txt
+++ b/src/box/CMakeLists.txt
@@ -126,6 +126,7 @@ add_library(box STATIC
memtx_rtree.c
memtx_bitset.c
memtx_tx.c
+ module_cache.c
engine.c
memtx_engine.c
memtx_space.c
diff --git a/src/box/module_cache.c b/src/box/module_cache.c
new file mode 100644
index 000000000..bc2184e5f
--- /dev/null
+++ b/src/box/module_cache.c
@@ -0,0 +1,476 @@
+/*
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright 2010-2021, Tarantool AUTHORS, please see AUTHORS file.
+ */
+
+#include <unistd.h>
+#include <string.h>
+#include <fcntl.h>
+#include <dlfcn.h>
+#include <lua.h>
+
+#include "assoc.h"
+#include "diag.h"
+#include "fiber.h"
+#include "module_cache.h"
+
+#include "box/error.h"
+#include "box/port.h"
+
+#include "lua/utils.h"
+#include "libeio/eio.h"
+
+static struct mh_strnptr_t *module_cache = NULL;
+
+/**
+ * Helpers for cache manipulations.
+ */
+static void *
+cache_find(const char *str, size_t len)
+{
+ mh_int_t e = mh_strnptr_find_inp(module_cache, str, len);
+ if (e == mh_end(module_cache))
+ return NULL;
+ return mh_strnptr_node(module_cache, e)->val;
+}
+
+static void
+cache_update(struct module *m)
+{
+ const char *str = m->package;
+ size_t len = m->package_len;
+
+ mh_int_t e = mh_strnptr_find_inp(module_cache, str, len);
+ if (e == mh_end(module_cache))
+ panic("module: failed to update cache: %s", str);
+
+ mh_strnptr_node(module_cache, e)->str = m->package;
+ mh_strnptr_node(module_cache, e)->val = m;
+}
+
+static int
+cache_add(struct module *m)
+{
+ const struct mh_strnptr_node_t nd = {
+ .str = m->package,
+ .len = m->package_len,
+ .hash = mh_strn_hash(m->package, m->package_len),
+ .val = m,
+ };
+
+ mh_int_t e = mh_strnptr_put(module_cache, &nd, NULL, NULL);
+ if (e == mh_end(module_cache)) {
+ diag_set(OutOfMemory, sizeof(nd), "malloc",
+ "module_cache node");
+ return -1;
+ }
+ return 0;
+}
+
+static void
+cache_del(struct module *m)
+{
+ const char *str = m->package;
+ size_t len = m->package_len;
+
+ mh_int_t e = mh_strnptr_find_inp(module_cache, str, len);
+ if (e != mh_end(module_cache)) {
+ struct module *v = mh_strnptr_node(module_cache, e)->val;
+ if (v == m)
+ mh_strnptr_del(module_cache, e, NULL);
+ }
+}
+
+/** Arguments for lpackage_search. */
+struct find_ctx {
+ const char *package;
+ size_t package_len;
+ char *path;
+ size_t path_len;
+};
+
+/** A helper for find_package(). */
+static int
+lpackage_search(lua_State *L)
+{
+ struct find_ctx *ctx = (void *)lua_topointer(L, 1);
+
+ lua_getglobal(L, "package");
+ lua_getfield(L, -1, "search");
+ lua_pushlstring(L, ctx->package, ctx->package_len);
+
+ lua_call(L, 1, 1);
+ if (lua_isnil(L, -1))
+ return luaL_error(L, "module not found");
+
+ char resolved[PATH_MAX];
+ if (realpath(lua_tostring(L, -1), resolved) == NULL) {
+ diag_set(SystemError, "realpath");
+ return luaT_error(L);
+ }
+
+ /*
+ * No need for result being trimmed test, it
+ * is guaranteed by realpath call.
+ */
+ snprintf(ctx->path, ctx->path_len, "%s", resolved);
+ return 0;
+}
+
+/** Find package in Lua's "package.search". */
+static int
+find_package(const char *package, size_t package_len,
+ char *path, size_t path_len)
+{
+ struct find_ctx ctx = {
+ .package = package,
+ .package_len = package_len,
+ .path = path,
+ .path_len = path_len,
+ };
+
+ lua_State *L = tarantool_L;
+ int top = lua_gettop(L);
+ if (luaT_cpcall(L, lpackage_search, &ctx) != 0) {
+ diag_set(ClientError, ER_LOAD_MODULE, ctx.package_len,
+ ctx.package, lua_tostring(L, -1));
+ lua_settop(L, top);
+ return -1;
+ }
+ assert(top == lua_gettop(L));
+ return 0;
+}
+
+void
+module_ref(struct module *m)
+{
+ assert(m->refs >= 0);
+ ++m->refs;
+}
+
+void
+module_unref(struct module *m)
+{
+ assert(m->refs > 0);
+ if (--m->refs == 0) {
+ cache_del(m);
+ dlclose(m->handle);
+ TRASH(m);
+ free(m);
+ }
+}
+
+int
+module_func_load(struct module *m, const char *func_name,
+ struct module_func *mf)
+{
+ void *sym = dlsym(m->handle, func_name);
+ if (sym == NULL) {
+ diag_set(ClientError, ER_LOAD_FUNCTION,
+ func_name, dlerror());
+ return -1;
+ }
+
+ mf->func = sym;
+ mf->module = m;
+ module_ref(m);
+
+ return 0;
+}
+
+void
+module_func_unload(struct module_func *mf)
+{
+ module_unref(mf->module);
+ /*
+ * Strictly speaking there is no need
+ * for implicit creation, it is up to
+ * the caller to clear the module function,
+ * but since it is cheap, lets prevent from
+ * even potential use after free.
+ */
+ module_func_create(mf);
+}
+
+int
+module_func_call(struct module_func *mf, struct port *args,
+ struct port *ret)
+{
+ struct region *region = &fiber()->gc;
+ size_t region_svp = region_used(region);
+
+ uint32_t data_sz;
+ const char *data = port_get_msgpack(args, &data_sz);
+ if (data == NULL)
+ return -1;
+
+ port_c_create(ret);
+ box_function_ctx_t ctx = {
+ .port = ret,
+ };
+
+ /*
+ * We don't know what exactly the callee
+ * gonna do during the execution, it may
+ * even try to unload itself, thus we make
+ * sure the dso won't be unloaded until
+ * execution is complete.
+ *
+ * Moreover the callee might release the memory
+ * associated with the module_func pointer itself
+ * so keep the address of the module locally.
+ */
+ struct module *m = mf->module;
+ module_ref(m);
+ int rc = mf->func(&ctx, data, data + data_sz);
+ module_unref(m);
+
+ region_truncate(region, region_svp);
+
+ if (rc != 0) {
+ if (diag_last_error(&fiber()->diag) == NULL)
+ diag_set(ClientError, ER_PROC_C, "unknown error");
+ port_destroy(ret);
+ return -1;
+ }
+
+ return 0;
+}
+
+/** Fill attributes from stat. */
+static void
+module_attr_fill(struct module_attr *attr, struct stat *st)
+{
+ attr->st_dev = st->st_dev;
+ attr->st_ino = st->st_ino;
+ attr->st_size = st->st_size;
+#ifdef TARGET_OS_DARWIN
+ attr->st_mtimespec = st->st_mtimespec;
+#else
+ attr->st_mtim = st->st_mtim;
+#endif
+}
+
+/**
+ * Copy shared library to temp directory and load from there,
+ * then remove it from this temp place leaving in memory. This
+ * is because there was a bug in libc which screw file updates
+ * detection properly such that next dlopen call simply return
+ * a cached version instead of rereading a library from the disk.
+ *
+ * We keep own copy of file attributes and reload the library
+ * on demand.
+ */
+static struct module *
+module_new(const char *package, size_t package_len,
+ const char *source_path)
+{
+ size_t size = sizeof(struct module) + package_len + 1;
+ struct module *m = malloc(size);
+ if (m == NULL) {
+ diag_set(OutOfMemory, size, "malloc", "module");
+ return NULL;
+ }
+
+ m->package_len = package_len;
+ m->refs = 0;
+
+ memcpy(m->package, package, package_len);
+ m->package[package_len] = 0;
+
+ const char *tmpdir = getenv("TMPDIR");
+ if (tmpdir == NULL)
+ tmpdir = "/tmp";
+
+ char dir_name[PATH_MAX];
+ int rc = snprintf(dir_name, sizeof(dir_name),
+ "%s/tntXXXXXX", tmpdir);
+ if (rc < 0 || (size_t)rc >= sizeof(dir_name)) {
+ diag_set(SystemError, "failed to generate path to tmp dir");
+ goto error;
+ }
+
+ if (mkdtemp(dir_name) == NULL) {
+ diag_set(SystemError, "failed to create unique dir name: %s",
+ dir_name);
+ goto error;
+ }
+
+ char load_name[PATH_MAX];
+ rc = snprintf(load_name, sizeof(load_name),
+ "%s/%.*s." TARANTOOL_LIBEXT,
+ dir_name, (int)package_len, package);
+ if (rc < 0 || (size_t)rc >= sizeof(dir_name)) {
+ diag_set(SystemError, "failed to generate path to dso");
+ goto error;
+ }
+
+ struct stat st;
+ if (stat(source_path, &st) < 0) {
+ diag_set(SystemError, "failed to stat() module: %s",
+ source_path);
+ goto error;
+ }
+ module_attr_fill(&m->attr, &st);
+
+ int source_fd = open(source_path, O_RDONLY);
+ if (source_fd < 0) {
+ diag_set(SystemError, "failed to open module %s "
+ "file for reading", source_path);
+ goto error;
+ }
+ int dest_fd = open(load_name, O_WRONLY | O_CREAT | O_TRUNC,
+ st.st_mode & 0777);
+ if (dest_fd < 0) {
+ diag_set(SystemError, "failed to open file %s "
+ "for writing ", load_name);
+ close(source_fd);
+ goto error;
+ }
+
+ off_t ret = eio_sendfile_sync(dest_fd, source_fd, 0, st.st_size);
+ close(source_fd);
+ close(dest_fd);
+ if (ret != st.st_size) {
+ diag_set(SystemError, "failed to copy dso %s to %s",
+ source_path, load_name);
+ goto error;
+ }
+
+ m->handle = dlopen(load_name, RTLD_NOW | RTLD_LOCAL);
+ if (unlink(load_name) != 0)
+ say_warn("failed to unlink dso link: %s", load_name);
+ if (rmdir(dir_name) != 0)
+ say_warn("failed to delete temporary dir: %s", dir_name);
+ if (m->handle == NULL) {
+ diag_set(ClientError, ER_LOAD_MODULE, package_len,
+ package, dlerror());
+ goto error;
+ }
+
+ module_ref(m);
+ return m;
+
+error:
+ free(m);
+ return NULL;
+}
+
+struct module *
+module_load_force(const char *package, size_t package_len)
+{
+ char path[PATH_MAX];
+ size_t size = sizeof(path);
+
+ if (find_package(package, package_len, path, size) != 0)
+ return NULL;
+
+ struct module *m = module_new(package, package_len, path);
+ if (m == NULL)
+ return NULL;
+
+ struct module *c = cache_find(package, package_len);
+ if (c != NULL) {
+ cache_update(m);
+ say_info("module_cache: force load: %s", package);
+ } else {
+ if (cache_add(m) != 0) {
+ module_unload(m);
+ return NULL;
+ }
+ }
+
+ return m;
+}
+
+struct module *
+module_load(const char *package, size_t package_len)
+{
+ char path[PATH_MAX];
+
+ if (find_package(package, package_len, path, sizeof(path)) != 0)
+ return NULL;
+
+ struct module *m = cache_find(package, package_len);
+ if (m != NULL) {
+ struct module_attr attr;
+ struct stat st;
+ if (stat(path, &st) != 0) {
+ diag_set(SystemError, "failed to stat() %s", path);
+ return NULL;
+ }
+
+ /*
+ * In case of cache hit we may reuse existing
+ * module which speedup load procedure.
+ */
+ module_attr_fill(&attr, &st);
+ if (memcmp(&attr, &m->attr, sizeof(attr)) == 0) {
+ module_ref(m);
+ return m;
+ }
+
+ /*
+ * Module has been updated on a storage device,
+ * so load a new instance and update the cache,
+ * old entry get evicted but continue residing
+ * in memory, fully functional, until last
+ * function is unloaded.
+ */
+ m = module_new(package, package_len, path);
+ if (m != NULL) {
+ cache_update(m);
+ say_info("module_cache: attr changed: %s",
+ package);
+ }
+ } else {
+ m = module_new(package, package_len, path);
+ if (m != NULL) {
+ if (cache_add(m) != 0) {
+ module_unload(m);
+ return NULL;
+ }
+ }
+ }
+
+ return m;
+}
+
+void
+module_unload(struct module *m)
+{
+ module_unref(m);
+}
+
+void
+module_free(void)
+{
+ size_t nr_busy = 0;
+ mh_int_t e;
+
+ mh_foreach(module_cache, e) {
+ struct module *m = mh_strnptr_node(module_cache, e)->val;
+ if (m->refs > 1)
+ nr_busy++;
+ module_unload(m);
+ }
+
+ mh_strnptr_delete(module_cache);
+ module_cache = NULL;
+
+ if (nr_busy > 0)
+ say_info("module_cache: %zu entries left", nr_busy);
+}
+
+int
+module_init(void)
+{
+ module_cache = mh_strnptr_new();
+ if (module_cache == NULL) {
+ diag_set(OutOfMemory, sizeof(*module_cache),
+ "malloc", "module_cache");
+ return -1;
+ }
+ return 0;
+}
diff --git a/src/box/module_cache.h b/src/box/module_cache.h
new file mode 100644
index 000000000..78dd5c7ad
--- /dev/null
+++ b/src/box/module_cache.h
@@ -0,0 +1,206 @@
+/*
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright 2010-2021, Tarantool AUTHORS, please see AUTHORS file.
+ */
+
+#pragma once
+
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "trivia/config.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif /* defined(__cplusplus) */
+
+/**
+ * API of C stored function.
+ */
+
+struct port;
+
+struct box_function_ctx {
+ struct port *port;
+};
+
+typedef struct box_function_ctx box_function_ctx_t;
+typedef int (*box_function_t)(box_function_ctx_t *ctx,
+ const char *args,
+ const char *args_end);
+
+/**
+ * Shared library file attributes for
+ * module cache invalidation.
+ */
+struct module_attr {
+#ifdef TARGET_OS_DARWIN
+ struct timespec st_mtimespec;
+#else
+ struct timespec st_mtim;
+#endif
+ dev_t st_dev;
+ ino_t st_ino;
+ off_t st_size;
+};
+
+/**
+ * Dynamic shared module.
+ */
+struct module {
+ /**
+ * Module handle, dlopen() result.
+ */
+ void *handle;
+ /**
+ * File attributes.
+ */
+ struct module_attr attr;
+ /**
+ * Count of active references.
+ */
+ int64_t refs;
+ /**
+ * Length of @a package.
+ */
+ size_t package_len;
+ /**
+ * Module's name without file extension.
+ */
+ char package[0];
+};
+
+/**
+ * Module function.
+ */
+struct module_func {
+ /**
+ * Function's address, iow dlsym() result.
+ */
+ box_function_t func;
+ /**
+ * Function's module.
+ */
+ struct module *module;
+};
+
+/**
+ * Load a module.
+ *
+ * Lookup for a module instance in cache and if not found
+ * the module is loaded from a storage device.
+ *
+ * @param package module package (without file extension).
+ * @param package_len length of @a package.
+ *
+ * Possible errors:
+ * ClientError: the package is not found on a storage device.
+ * ClientError: an error happened when been loading the package.
+ * SystemError: a system error happened during procedure.
+ * OutOfMemory: unable to allocate new memory for module instance.
+ *
+ * @return a module instance on success, NULL otherwise (diag is set)
+ */
+struct module *
+module_load(const char *package, size_t package_len);
+
+/**
+ * Force load a module.
+ *
+ * Load a module from a storage device and invalidate
+ * an associated cache entry.
+ *
+ * @param package module package (without file extension).
+ * @param package_len length of @a package.
+ *
+ * Possible errors:
+ * ClientError: the package is not found on a storage device.
+ * ClientError: an error happened when been loading the package.
+ * SystemError: a system error happened during procedure.
+ * OutOfMemory: unable to allocate new memory for module instance.
+ *
+ * @return a module instance on success, NULL otherwise (diag is set)
+ */
+struct module *
+module_load_force(const char *package, size_t package_len);
+
+/**
+ * Unload a module instance.
+ *
+ * @param m a module to unload.
+ */
+void
+module_unload(struct module *m);
+
+/** Test if module function is empty. */
+static inline bool
+is_module_func_empty(struct module_func *mf)
+{
+ return mf->module == NULL;
+}
+
+/** Create new empty module function. */
+static inline void
+module_func_create(struct module_func *mf)
+{
+ mf->module = NULL;
+ mf->func = NULL;
+}
+
+/**
+ * Load a new function.
+ *
+ * @param m a module to load a function from.
+ * @param func_name function name.
+ * @param mf[out] function instance.
+ *
+ * Possible errors:
+ * ClientError: no such function in a module.
+ *
+ * @return 0 on success, -1 otherwise (diag is set).
+ */
+int
+module_func_load(struct module *m, const char *func_name,
+ struct module_func *mf);
+
+/**
+ * Unload a function.
+ *
+ * @param mf module function.
+ */
+void
+module_func_unload(struct module_func *mf);
+
+/**
+ * Execute a function.
+ *
+ * @param mf a function to execute.
+ * @param args function arguments.
+ * @param ret[out] execution results.
+ *
+ * @return 0 on success, -1 otherwise (diag is set).
+ */
+int
+module_func_call(struct module_func *mf, struct port *args,
+ struct port *ret);
+
+/** Increment reference to a module. */
+void
+module_ref(struct module *m);
+
+/** Decrement reference of a module. */
+void
+module_unref(struct module *m);
+
+/** Initialize modules subsystem. */
+int
+module_init(void);
+
+/** Free modules subsystem. */
+void
+module_free(void);
+
+#if defined(__cplusplus)
+}
+#endif /* defined(__plusplus) */
diff --git a/src/main.cc b/src/main.cc
index 2fce81bb3..df4ccf35a 100644
--- a/src/main.cc
+++ b/src/main.cc
@@ -76,6 +76,7 @@
#include "box/lua/init.h" /* box_lua_init() */
#include "box/session.h"
#include "box/memtx_tx.h"
+#include "box/module_cache.h"
#include "systemd.h"
#include "crypto/crypto.h"
#include "core/popen.h"
@@ -519,6 +520,7 @@ tarantool_free(void)
title_free(main_argc, main_argv);
popen_free();
+ module_free();
/* unlink pidfile. */
if (pid_file_handle != NULL && pidfile_remove(pid_file_handle) == -1)
@@ -708,6 +710,7 @@ main(int argc, char **argv)
cbus_init();
coll_init();
memtx_tx_manager_init();
+ module_init();
crypto_init();
systemd_init();
tarantool_lua_init(tarantool_bin, main_argc, main_argv);
--
2.29.2
More information about the Tarantool-patches
mailing list