[Tarantool-patches] [PATCH v19 3/6] box/module_cache: introduce modules subsystem

Cyrill Gorcunov gorcunov at gmail.com
Tue Mar 2 00:23:40 MSK 2021


The modules subsystem hides some low-level operations under API.
In particular the modules subsystem is responsible for

 - modules lookup in Lua's "package.search" storage
 - modules caching to eliminate expencive load procedure
 - function symbol resolving

Part-of #4642

Signed-off-by: Cyrill Gorcunov <gorcunov at gmail.com>
---
 src/box/CMakeLists.txt |   1 +
 src/box/module_cache.c | 476 +++++++++++++++++++++++++++++++++++++++++
 src/box/module_cache.h | 206 ++++++++++++++++++
 src/main.cc            |   3 +
 4 files changed, 686 insertions(+)
 create mode 100644 src/box/module_cache.c
 create mode 100644 src/box/module_cache.h

diff --git a/src/box/CMakeLists.txt b/src/box/CMakeLists.txt
index 19203f770..cc2e17e94 100644
--- a/src/box/CMakeLists.txt
+++ b/src/box/CMakeLists.txt
@@ -126,6 +126,7 @@ add_library(box STATIC
     memtx_rtree.c
     memtx_bitset.c
     memtx_tx.c
+    module_cache.c
     engine.c
     memtx_engine.c
     memtx_space.c
diff --git a/src/box/module_cache.c b/src/box/module_cache.c
new file mode 100644
index 000000000..bc2184e5f
--- /dev/null
+++ b/src/box/module_cache.c
@@ -0,0 +1,476 @@
+/*
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright 2010-2021, Tarantool AUTHORS, please see AUTHORS file.
+ */
+
+#include <unistd.h>
+#include <string.h>
+#include <fcntl.h>
+#include <dlfcn.h>
+#include <lua.h>
+
+#include "assoc.h"
+#include "diag.h"
+#include "fiber.h"
+#include "module_cache.h"
+
+#include "box/error.h"
+#include "box/port.h"
+
+#include "lua/utils.h"
+#include "libeio/eio.h"
+
+static struct mh_strnptr_t *module_cache = NULL;
+
+/**
+ * Helpers for cache manipulations.
+ */
+static void *
+cache_find(const char *str, size_t len)
+{
+	mh_int_t e = mh_strnptr_find_inp(module_cache, str, len);
+	if (e == mh_end(module_cache))
+		return NULL;
+	return mh_strnptr_node(module_cache, e)->val;
+}
+
+static void
+cache_update(struct module *m)
+{
+	const char *str = m->package;
+	size_t len = m->package_len;
+
+	mh_int_t e = mh_strnptr_find_inp(module_cache, str, len);
+	if (e == mh_end(module_cache))
+		panic("module: failed to update cache: %s", str);
+
+	mh_strnptr_node(module_cache, e)->str = m->package;
+	mh_strnptr_node(module_cache, e)->val = m;
+}
+
+static int
+cache_add(struct module *m)
+{
+	const struct mh_strnptr_node_t nd = {
+		.str	= m->package,
+		.len	= m->package_len,
+		.hash	= mh_strn_hash(m->package, m->package_len),
+		.val	= m,
+	};
+
+	mh_int_t e = mh_strnptr_put(module_cache, &nd, NULL, NULL);
+	if (e == mh_end(module_cache)) {
+		diag_set(OutOfMemory, sizeof(nd), "malloc",
+			 "module_cache node");
+		return -1;
+	}
+	return 0;
+}
+
+static void
+cache_del(struct module *m)
+{
+	const char *str = m->package;
+	size_t len = m->package_len;
+
+	mh_int_t e = mh_strnptr_find_inp(module_cache, str, len);
+	if (e != mh_end(module_cache)) {
+		struct module *v = mh_strnptr_node(module_cache, e)->val;
+		if (v == m)
+			mh_strnptr_del(module_cache, e, NULL);
+	}
+}
+
+/** Arguments for lpackage_search. */
+struct find_ctx {
+	const char *package;
+	size_t package_len;
+	char *path;
+	size_t path_len;
+};
+
+/** A helper for find_package(). */
+static int
+lpackage_search(lua_State *L)
+{
+	struct find_ctx *ctx = (void *)lua_topointer(L, 1);
+
+	lua_getglobal(L, "package");
+	lua_getfield(L, -1, "search");
+	lua_pushlstring(L, ctx->package, ctx->package_len);
+
+	lua_call(L, 1, 1);
+	if (lua_isnil(L, -1))
+		return luaL_error(L, "module not found");
+
+	char resolved[PATH_MAX];
+	if (realpath(lua_tostring(L, -1), resolved) == NULL) {
+		diag_set(SystemError, "realpath");
+		return luaT_error(L);
+	}
+
+	/*
+	 * No need for result being trimmed test, it
+	 * is guaranteed by realpath call.
+	 */
+	snprintf(ctx->path, ctx->path_len, "%s", resolved);
+	return 0;
+}
+
+/** Find package in Lua's "package.search". */
+static int
+find_package(const char *package, size_t package_len,
+	     char *path, size_t path_len)
+{
+	struct find_ctx ctx = {
+		.package	= package,
+		.package_len	= package_len,
+		.path		= path,
+		.path_len	= path_len,
+	};
+
+	lua_State *L = tarantool_L;
+	int top = lua_gettop(L);
+	if (luaT_cpcall(L, lpackage_search, &ctx) != 0) {
+		diag_set(ClientError, ER_LOAD_MODULE, ctx.package_len,
+			 ctx.package, lua_tostring(L, -1));
+		lua_settop(L, top);
+		return -1;
+	}
+	assert(top == lua_gettop(L));
+	return 0;
+}
+
+void
+module_ref(struct module *m)
+{
+	assert(m->refs >= 0);
+	++m->refs;
+}
+
+void
+module_unref(struct module *m)
+{
+	assert(m->refs > 0);
+	if (--m->refs == 0) {
+		cache_del(m);
+		dlclose(m->handle);
+		TRASH(m);
+		free(m);
+	}
+}
+
+int
+module_func_load(struct module *m, const char *func_name,
+		 struct module_func *mf)
+{
+	void *sym = dlsym(m->handle, func_name);
+	if (sym == NULL) {
+		diag_set(ClientError, ER_LOAD_FUNCTION,
+			 func_name, dlerror());
+		return -1;
+	}
+
+	mf->func = sym;
+	mf->module = m;
+	module_ref(m);
+
+	return 0;
+}
+
+void
+module_func_unload(struct module_func *mf)
+{
+	module_unref(mf->module);
+	/*
+	 * Strictly speaking there is no need
+	 * for implicit creation, it is up to
+	 * the caller to clear the module function,
+	 * but since it is cheap, lets prevent from
+	 * even potential use after free.
+	 */
+	module_func_create(mf);
+}
+
+int
+module_func_call(struct module_func *mf, struct port *args,
+		 struct port *ret)
+{
+	struct region *region = &fiber()->gc;
+	size_t region_svp = region_used(region);
+
+	uint32_t data_sz;
+	const char *data = port_get_msgpack(args, &data_sz);
+	if (data == NULL)
+		return -1;
+
+	port_c_create(ret);
+	box_function_ctx_t ctx = {
+		.port = ret,
+	};
+
+	/*
+	 * We don't know what exactly the callee
+	 * gonna do during the execution, it may
+	 * even try to unload itself, thus we make
+	 * sure the dso won't be unloaded until
+	 * execution is complete.
+	 *
+	 * Moreover the callee might release the memory
+	 * associated with the module_func pointer itself
+	 * so keep the address of the module locally.
+	 */
+	struct module *m = mf->module;
+	module_ref(m);
+	int rc = mf->func(&ctx, data, data + data_sz);
+	module_unref(m);
+
+	region_truncate(region, region_svp);
+
+	if (rc != 0) {
+		if (diag_last_error(&fiber()->diag) == NULL)
+			diag_set(ClientError, ER_PROC_C, "unknown error");
+		port_destroy(ret);
+		return -1;
+	}
+
+	return 0;
+}
+
+/** Fill attributes from stat. */
+static void
+module_attr_fill(struct module_attr *attr, struct stat *st)
+{
+	attr->st_dev = st->st_dev;
+	attr->st_ino = st->st_ino;
+	attr->st_size = st->st_size;
+#ifdef TARGET_OS_DARWIN
+	attr->st_mtimespec = st->st_mtimespec;
+#else
+	attr->st_mtim = st->st_mtim;
+#endif
+}
+
+/**
+ * Copy shared library to temp directory and load from there,
+ * then remove it from this temp place leaving in memory. This
+ * is because there was a bug in libc which screw file updates
+ * detection properly such that next dlopen call simply return
+ * a cached version instead of rereading a library from the disk.
+ *
+ * We keep own copy of file attributes and reload the library
+ * on demand.
+ */
+static struct module *
+module_new(const char *package, size_t package_len,
+	   const char *source_path)
+{
+	size_t size = sizeof(struct module) + package_len + 1;
+	struct module *m = malloc(size);
+	if (m == NULL) {
+		diag_set(OutOfMemory, size, "malloc", "module");
+		return NULL;
+	}
+
+	m->package_len = package_len;
+	m->refs = 0;
+
+	memcpy(m->package, package, package_len);
+	m->package[package_len] = 0;
+
+	const char *tmpdir = getenv("TMPDIR");
+	if (tmpdir == NULL)
+		tmpdir = "/tmp";
+
+	char dir_name[PATH_MAX];
+	int rc = snprintf(dir_name, sizeof(dir_name),
+			  "%s/tntXXXXXX", tmpdir);
+	if (rc < 0 || (size_t)rc >= sizeof(dir_name)) {
+		diag_set(SystemError, "failed to generate path to tmp dir");
+		goto error;
+	}
+
+	if (mkdtemp(dir_name) == NULL) {
+		diag_set(SystemError, "failed to create unique dir name: %s",
+			 dir_name);
+		goto error;
+	}
+
+	char load_name[PATH_MAX];
+	rc = snprintf(load_name, sizeof(load_name),
+		      "%s/%.*s." TARANTOOL_LIBEXT,
+		      dir_name, (int)package_len, package);
+	if (rc < 0 || (size_t)rc >= sizeof(dir_name)) {
+		diag_set(SystemError, "failed to generate path to dso");
+		goto error;
+	}
+
+	struct stat st;
+	if (stat(source_path, &st) < 0) {
+		diag_set(SystemError, "failed to stat() module: %s",
+			 source_path);
+		goto error;
+	}
+	module_attr_fill(&m->attr, &st);
+
+	int source_fd = open(source_path, O_RDONLY);
+	if (source_fd < 0) {
+		diag_set(SystemError, "failed to open module %s "
+			 "file for reading", source_path);
+		goto error;
+	}
+	int dest_fd = open(load_name, O_WRONLY | O_CREAT | O_TRUNC,
+			   st.st_mode & 0777);
+	if (dest_fd < 0) {
+		diag_set(SystemError, "failed to open file %s "
+			 "for writing ", load_name);
+		close(source_fd);
+		goto error;
+	}
+
+	off_t ret = eio_sendfile_sync(dest_fd, source_fd, 0, st.st_size);
+	close(source_fd);
+	close(dest_fd);
+	if (ret != st.st_size) {
+		diag_set(SystemError, "failed to copy dso %s to %s",
+			 source_path, load_name);
+		goto error;
+	}
+
+	m->handle = dlopen(load_name, RTLD_NOW | RTLD_LOCAL);
+	if (unlink(load_name) != 0)
+		say_warn("failed to unlink dso link: %s", load_name);
+	if (rmdir(dir_name) != 0)
+		say_warn("failed to delete temporary dir: %s", dir_name);
+	if (m->handle == NULL) {
+		diag_set(ClientError, ER_LOAD_MODULE, package_len,
+			  package, dlerror());
+		goto error;
+	}
+
+	module_ref(m);
+	return m;
+
+error:
+	free(m);
+	return NULL;
+}
+
+struct module *
+module_load_force(const char *package, size_t package_len)
+{
+	char path[PATH_MAX];
+	size_t size = sizeof(path);
+
+	if (find_package(package, package_len, path, size) != 0)
+		return NULL;
+
+	struct module *m = module_new(package, package_len, path);
+	if (m == NULL)
+		return NULL;
+
+	struct module *c = cache_find(package, package_len);
+	if (c != NULL) {
+		cache_update(m);
+		say_info("module_cache: force load: %s", package);
+	} else {
+		if (cache_add(m) != 0) {
+			module_unload(m);
+			return NULL;
+		}
+	}
+
+	return m;
+}
+
+struct module *
+module_load(const char *package, size_t package_len)
+{
+	char path[PATH_MAX];
+
+	if (find_package(package, package_len, path, sizeof(path)) != 0)
+		return NULL;
+
+	struct module *m = cache_find(package, package_len);
+	if (m != NULL) {
+		struct module_attr attr;
+		struct stat st;
+		if (stat(path, &st) != 0) {
+			diag_set(SystemError, "failed to stat() %s", path);
+			return NULL;
+		}
+
+		/*
+		 * In case of cache hit we may reuse existing
+		 * module which speedup load procedure.
+		 */
+		module_attr_fill(&attr, &st);
+		if (memcmp(&attr, &m->attr, sizeof(attr)) == 0) {
+			module_ref(m);
+			return m;
+		}
+
+		/*
+		 * Module has been updated on a storage device,
+		 * so load a new instance and update the cache,
+		 * old entry get evicted but continue residing
+		 * in memory, fully functional, until last
+		 * function is unloaded.
+		 */
+		m = module_new(package, package_len, path);
+		if (m != NULL) {
+			cache_update(m);
+			say_info("module_cache: attr changed: %s",
+				 package);
+		}
+	} else {
+		m = module_new(package, package_len, path);
+		if (m != NULL) {
+			if (cache_add(m) != 0) {
+				module_unload(m);
+				return NULL;
+			}
+		}
+	}
+
+	return m;
+}
+
+void
+module_unload(struct module *m)
+{
+	module_unref(m);
+}
+
+void
+module_free(void)
+{
+	size_t nr_busy = 0;
+	mh_int_t e;
+
+	mh_foreach(module_cache, e) {
+		struct module *m = mh_strnptr_node(module_cache, e)->val;
+		if (m->refs > 1)
+			nr_busy++;
+		module_unload(m);
+	}
+
+	mh_strnptr_delete(module_cache);
+	module_cache = NULL;
+
+	if (nr_busy > 0)
+		say_info("module_cache: %zu entries left", nr_busy);
+}
+
+int
+module_init(void)
+{
+	module_cache = mh_strnptr_new();
+	if (module_cache == NULL) {
+		diag_set(OutOfMemory, sizeof(*module_cache),
+			 "malloc", "module_cache");
+		return -1;
+	}
+	return 0;
+}
diff --git a/src/box/module_cache.h b/src/box/module_cache.h
new file mode 100644
index 000000000..78dd5c7ad
--- /dev/null
+++ b/src/box/module_cache.h
@@ -0,0 +1,206 @@
+/*
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright 2010-2021, Tarantool AUTHORS, please see AUTHORS file.
+ */
+
+#pragma once
+
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "trivia/config.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif /* defined(__cplusplus) */
+
+/**
+ * API of C stored function.
+ */
+
+struct port;
+
+struct box_function_ctx {
+	struct port *port;
+};
+
+typedef struct box_function_ctx box_function_ctx_t;
+typedef int (*box_function_t)(box_function_ctx_t *ctx,
+			      const char *args,
+			      const char *args_end);
+
+/**
+ * Shared library file attributes for
+ * module cache invalidation.
+ */
+struct module_attr {
+#ifdef TARGET_OS_DARWIN
+	struct timespec st_mtimespec;
+#else
+	struct timespec st_mtim;
+#endif
+	dev_t st_dev;
+	ino_t st_ino;
+	off_t st_size;
+};
+
+/**
+ * Dynamic shared module.
+ */
+struct module {
+	/**
+	 * Module handle, dlopen() result.
+	 */
+	void *handle;
+	/**
+	 * File attributes.
+	 */
+	struct module_attr attr;
+	/**
+	 * Count of active references.
+	 */
+	int64_t refs;
+	/**
+	 * Length of @a package.
+	 */
+	size_t package_len;
+	/**
+	 * Module's name without file extension.
+	 */
+	char package[0];
+};
+
+/**
+ * Module function.
+ */
+struct module_func {
+	/**
+	 * Function's address, iow dlsym() result.
+	 */
+	 box_function_t func;
+	/**
+	 * Function's module.
+	 */
+	struct module *module;
+};
+
+/**
+ * Load a module.
+ *
+ * Lookup for a module instance in cache and if not found
+ * the module is loaded from a storage device.
+ *
+ * @param package module package (without file extension).
+ * @param package_len length of @a package.
+ *
+ * Possible errors:
+ * ClientError: the package is not found on a storage device.
+ * ClientError: an error happened when been loading the package.
+ * SystemError: a system error happened during procedure.
+ * OutOfMemory: unable to allocate new memory for module instance.
+ *
+ * @return a module instance on success, NULL otherwise (diag is set)
+ */
+struct module *
+module_load(const char *package, size_t package_len);
+
+/**
+ * Force load a module.
+ *
+ * Load a module from a storage device and invalidate
+ * an associated cache entry.
+ *
+ * @param package module package (without file extension).
+ * @param package_len length of @a package.
+ *
+ * Possible errors:
+ * ClientError: the package is not found on a storage device.
+ * ClientError: an error happened when been loading the package.
+ * SystemError: a system error happened during procedure.
+ * OutOfMemory: unable to allocate new memory for module instance.
+ *
+ * @return a module instance on success, NULL otherwise (diag is set)
+ */
+struct module *
+module_load_force(const char *package, size_t package_len);
+
+/**
+ * Unload a module instance.
+ *
+ * @param m a module to unload.
+ */
+void
+module_unload(struct module *m);
+
+/** Test if module function is empty. */
+static inline bool
+is_module_func_empty(struct module_func *mf)
+{
+	return mf->module == NULL;
+}
+
+/** Create new empty module function. */
+static inline void
+module_func_create(struct module_func *mf)
+{
+	mf->module = NULL;
+	mf->func = NULL;
+}
+
+/**
+ * Load a new function.
+ *
+ * @param m a module to load a function from.
+ * @param func_name function name.
+ * @param mf[out] function instance.
+ *
+ * Possible errors:
+ * ClientError: no such function in a module.
+ *
+ * @return 0 on success, -1 otherwise (diag is set).
+ */
+int
+module_func_load(struct module *m, const char *func_name,
+		 struct module_func *mf);
+
+/**
+ * Unload a function.
+ *
+ * @param mf module function.
+ */
+void
+module_func_unload(struct module_func *mf);
+
+/**
+ * Execute a function.
+ *
+ * @param mf a function to execute.
+ * @param args function arguments.
+ * @param ret[out] execution results.
+ *
+ * @return 0 on success, -1 otherwise (diag is set).
+ */
+int
+module_func_call(struct module_func *mf, struct port *args,
+		 struct port *ret);
+
+/** Increment reference to a module. */
+void
+module_ref(struct module *m);
+
+/** Decrement reference of a module. */
+void
+module_unref(struct module *m);
+
+/** Initialize modules subsystem. */
+int
+module_init(void);
+
+/** Free modules subsystem. */
+void
+module_free(void);
+
+#if defined(__cplusplus)
+}
+#endif /* defined(__plusplus) */
diff --git a/src/main.cc b/src/main.cc
index 2fce81bb3..df4ccf35a 100644
--- a/src/main.cc
+++ b/src/main.cc
@@ -76,6 +76,7 @@
 #include "box/lua/init.h" /* box_lua_init() */
 #include "box/session.h"
 #include "box/memtx_tx.h"
+#include "box/module_cache.h"
 #include "systemd.h"
 #include "crypto/crypto.h"
 #include "core/popen.h"
@@ -519,6 +520,7 @@ tarantool_free(void)
 	title_free(main_argc, main_argv);
 
 	popen_free();
+	module_free();
 
 	/* unlink pidfile. */
 	if (pid_file_handle != NULL && pidfile_remove(pid_file_handle) == -1)
@@ -708,6 +710,7 @@ main(int argc, char **argv)
 	cbus_init();
 	coll_init();
 	memtx_tx_manager_init();
+	module_init();
 	crypto_init();
 	systemd_init();
 	tarantool_lua_init(tarantool_bin, main_argc, main_argv);
-- 
2.29.2



More information about the Tarantool-patches mailing list