[Tarantool-patches] [PATCH] Add new 'allocator' option in box.cfg
mechanik20051988
mechanik20.05.1988 at gmail.com
Fri Dec 18 16:58:04 MSK 2020
Slab allocator, which is used for tuples allocation, has a certa$
disadvantage - it tends to unresolvable fragmentation on certain
workloads (size migration). New option allows to select
the appropriate allocator if necessary.
@TarantoolBot document
Title: Add new 'allocator' option
Add new 'allocator' option which allows to select
the appropriate allocator for memtx tuples if necessary.
Use box.cfg{allocator="small"} or no option to use default
small allocator, use box.cfg{allocator="system"} to use
libc malloc.
Closes #5419
---
CMakeLists.txt | 11 ++
perf/allocator_perf.test.lua | 34 ++++
src/box/allocator.h | 200 ++++++++++++++++++++
src/box/box.cc | 1 +
src/box/lua/load_cfg.lua | 2 +
src/box/lua/slab.c | 39 +++-
src/box/memtx_engine.c | 53 ++++--
src/box/memtx_engine.h | 41 +++-
src/box/system_allocator.h | 226 +++++++++++++++++++++++
src/trivia/config.h.cmake | 3 +
test/app-tap/init_script.result | 1 +
test/box/admin.result | 4 +-
test/box/cfg.result | 8 +-
test/box/choose_memtx_allocator.lua | 9 +
test/box/choose_memtx_allocator.result | 135 ++++++++++++++
test/box/choose_memtx_allocator.test.lua | 43 +++++
16 files changed, 776 insertions(+), 34 deletions(-)
create mode 100755 perf/allocator_perf.test.lua
create mode 100644 src/box/allocator.h
create mode 100644 src/box/system_allocator.h
create mode 100644 test/box/choose_memtx_allocator.lua
create mode 100644 test/box/choose_memtx_allocator.result
create mode 100644 test/box/choose_memtx_allocator.test.lua
diff --git a/CMakeLists.txt b/CMakeLists.txt
index fa6818f8e..290cd535a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -92,6 +92,17 @@ check_symbol_exists(posix_fadvise fcntl.h HAVE_POSIX_FADVISE)
check_symbol_exists(fallocate fcntl.h HAVE_FALLOCATE)
check_symbol_exists(mremap sys/mman.h HAVE_MREMAP)
+check_function_exists(malloc_usable_size HAVE_MALLOC_USABLE_SIZE)
+check_symbol_exists(malloc_size malloc/malloc.h HAVE_MALLOC_SIZE_DARWIN)
+
+if (HAVE_MALLOC_USABLE_SIZE)
+ if (TARGET_OS_LINUX)
+ set(HAVE_MALLOC_USABLE_SIZE_LINUX 1)
+ else ()
+ set(HAVE_MALLOC_USABLE_SIZE_BSD 1)
+ endif ()
+endif ()
+
check_function_exists(sync_file_range HAVE_SYNC_FILE_RANGE)
check_function_exists(memmem HAVE_MEMMEM)
check_function_exists(memrchr HAVE_MEMRCHR)
diff --git a/perf/allocator_perf.test.lua b/perf/allocator_perf.test.lua
new file mode 100755
index 000000000..be270379b
--- /dev/null
+++ b/perf/allocator_perf.test.lua
@@ -0,0 +1,34 @@
+#!/usr/bin/env ../src/tarantool
+os.execute('rm -rf *.snap *.xlog *.vylog ./512 ./513 ./514 ./515 ./516 ./517 ./518 ./519 ./520 ./521')
+local clock = require('clock')
+box.cfg{listen = 3301, wal_mode='none', allocator=arg[1]}
+local space = box.schema.space.create('test')
+space:format({ {name = 'id', type = 'unsigned'}, {name = 'year', type = 'unsigned'} })
+space:create_index('primary', { parts = {'id'} })
+local time_insert = 0
+local time_replace = 0
+local time_delete = 0
+local cnt = 0
+local cnt_max = 20
+local op_max = 2500000
+local nanosec = 1.0e9
+while cnt < cnt_max do
+ cnt = cnt + 1
+ local time_before = clock.monotonic64()
+ for key = 1, op_max do space:insert({key, key + 1000}) end
+ local time_after = clock.monotonic64()
+ time_insert = time_insert + (time_after - time_before)
+ time_before = clock.monotonic64()
+ for key = 1, op_max do space:replace({key, key + 5000}) end
+ time_after = clock.monotonic64()
+ time_replace = time_replace + (time_after - time_before)
+ time_before = clock.monotonic64()
+ for key = 1, op_max do space:delete(key) end
+ time_after = clock.monotonic64()
+ time_delete = time_delete + (time_after - time_before)
+end
+io.write("{\n")
+io.write(string.format(" \"alloc time\": \"%.3f\"\n", tonumber(time_insert) / (nanosec * cnt_max)))
+io.write(string.format(" \"replace time\": \"%.3f\"\n", tonumber(time_replace) / (nanosec * cnt_max)))
+io.write(string.format(" \"delete time\": \"%.3f\"\n}\n", tonumber(time_delete) / (nanosec * cnt_max)))
+os.exit()
diff --git a/src/box/allocator.h b/src/box/allocator.h
new file mode 100644
index 000000000..3bea67f50
--- /dev/null
+++ b/src/box/allocator.h
@@ -0,0 +1,200 @@
+#pragma once
+/*
+ * Copyright 2010-2020, Tarantool AUTHORS, please see AUTHORS file.
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the
+ * following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#include <small/small.h>
+#include <trivia/util.h>
+#include <stdarg.h>
+
+#include "memtx_engine.h"
+#include "system_allocator.h"
+#include "tuple.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif /* defined(__cplusplus) */
+
+#define noop_one_arg(a)
+#define noop_two_arg(a, b)
+
+struct allocator_stats {
+ size_t used;
+ size_t total;
+};
+
+static inline void
+_small_allocator_create(struct memtx_engine *memtx, va_list argptr)
+{
+ uint32_t objsize_min = va_arg(argptr, uint32_t);
+ double alloc_factor = va_arg(argptr, double);
+ return small_alloc_create(memtx->alloc, &memtx->slab_cache,
+ objsize_min, (float)alloc_factor);
+}
+
+static inline void
+_system_allocator_create(struct memtx_engine *memtx, MAYBE_UNUSED va_list argptr)
+{
+ return system_alloc_create(memtx->alloc, &memtx->quota);
+}
+
+static inline void
+_small_allocator_stats(struct small_alloc *alloc, struct small_stats *stats,
+ va_list argptr)
+{
+ mempool_stats_cb stats_cb =
+ va_arg(argptr, mempool_stats_cb);
+ void *cb_ctx = va_arg(argptr, void *);
+ return small_stats(alloc, stats, stats_cb, cb_ctx);
+}
+
+static inline void
+_system_allocator_stats(struct system_alloc *alloc, struct system_stats *stats,
+ MAYBE_UNUSED va_list argptr)
+{
+ return system_stats(alloc, stats);
+}
+
+#define MEM_CHECK_FUNC(prefix, func, param) \
+static inline void \
+prefix##_mem_check(MAYBE_UNUSED struct prefix##_alloc *alloc) \
+{ \
+ func(alloc->param); \
+}
+MEM_CHECK_FUNC(small, slab_cache_check, cache)
+MEM_CHECK_FUNC(system, noop_one_arg, noop)
+
+/**
+ * Global abstract method to memory alloc
+ */
+typedef void *(*global_alloc)(void *alloc, size_t bytes);
+static global_alloc memtx_global_alloc;
+
+/**
+ * Global abstract method to memory free
+ */
+typedef void (*global_free)(void *alloc, void *ptr, size_t bytes);
+static global_free memtx_global_free;
+
+/**
+ * Global abstract method to delayed memory free
+ */
+typedef void (*global_free_delayed)(void *alloc, void *ptr, size_t bytes);
+static global_free_delayed memtx_global_free_delayed;
+
+#define DECLARE_MEMTX_ALLOCATOR_DESTROY(prefix) \
+static inline void \
+prefix##_allocator_destroy(struct memtx_engine *memtx) \
+{ \
+ prefix##_alloc_destroy(memtx->alloc); \
+}
+DECLARE_MEMTX_ALLOCATOR_DESTROY(small)
+DECLARE_MEMTX_ALLOCATOR_DESTROY(system)
+
+#define DECLARE_MEMTX_ALLOCATOR_CREATE(prefix) \
+static inline void \
+prefix##_allocator_create(struct memtx_engine *memtx, ...) \
+{ \
+ va_list argptr; \
+ va_start(argptr, memtx); \
+ _##prefix##_allocator_create(memtx, argptr); \
+ va_end(argptr); \
+}
+DECLARE_MEMTX_ALLOCATOR_CREATE(small)
+DECLARE_MEMTX_ALLOCATOR_CREATE(system)
+
+#define DECLARE_MEMTX_ALLOCATOR_ENTER_DELAYED_FREE_MODE(prefix, PREFIX) \
+static inline void \
+prefix##_allocator_enter_delayed_free_mode(struct memtx_engine *memtx) \
+{ \
+ return prefix##_##alloc_setopt(memtx->alloc, \
+ PREFIX##_##DELAYED_FREE_MODE, true); \
+}
+DECLARE_MEMTX_ALLOCATOR_ENTER_DELAYED_FREE_MODE(small, SMALL)
+DECLARE_MEMTX_ALLOCATOR_ENTER_DELAYED_FREE_MODE(system, SYSTEM)
+
+#define DECLARE_MEMTX_ALLOCATOR_LEAVE_DELAYED_FREE_MODE(prefix, PREFIX) \
+static inline void \
+prefix##_allocator_leave_delayed_free_mode(struct memtx_engine *memtx) \
+{ \
+ return prefix##_##alloc_setopt(memtx->alloc, \
+ PREFIX##_##DELAYED_FREE_MODE, false); \
+}
+DECLARE_MEMTX_ALLOCATOR_LEAVE_DELAYED_FREE_MODE(small, SMALL)
+DECLARE_MEMTX_ALLOCATOR_LEAVE_DELAYED_FREE_MODE(system, SYSTEM)
+
+#define DECLARE_MEMTX_ALLOCATOR_STATS(prefix) \
+static inline void \
+prefix##_allocator_stats(struct memtx_engine *memtx, \
+ struct allocator_stats *stats, ...) \
+{ \
+ va_list argptr; \
+ va_start(argptr, stats); \
+ struct prefix##_stats data_stats; \
+ _##prefix##_allocator_stats(memtx->alloc, &data_stats, argptr); \
+ va_end(argptr); \
+ stats->used = data_stats.used; \
+ stats->total = data_stats.total; \
+}
+DECLARE_MEMTX_ALLOCATOR_STATS(small)
+DECLARE_MEMTX_ALLOCATOR_STATS(system)
+
+#define DECLARE_MEMTX_MEM_CHECK(prefix) \
+static inline void \
+prefix##_allocator_mem_check(struct memtx_engine *memtx) \
+{ \
+ prefix##_mem_check((struct prefix##_alloc *)(memtx->alloc)); \
+}
+DECLARE_MEMTX_MEM_CHECK(small)
+DECLARE_MEMTX_MEM_CHECK(system)
+
+#define DECLARE_MEMTX_ALLOCATOR_CHOICE(prefix, alloc_func, free_func, \
+ free_dealyed_func) \
+static inline void \
+prefix##_memtx_allocator_choice(struct memtx_engine *memtx) \
+{ \
+ memtx_global_alloc = (void *)alloc_func; \
+ memtx_global_free = (void *)free_func; \
+ memtx_global_free_delayed = (void *)free_dealyed_func; \
+ memtx->alloc = &memtx->prefix##_alloc; \
+ memtx->memtx_allocator_create = prefix##_allocator_create; \
+ memtx->memtx_allocator_destroy = prefix##_allocator_destroy; \
+ memtx->memtx_enter_delayed_free_mode = \
+ prefix##_allocator_enter_delayed_free_mode; \
+ memtx->memtx_leave_delayed_free_mode = \
+ prefix##_allocator_leave_delayed_free_mode; \
+ memtx->memtx_allocator_stats = prefix##_allocator_stats; \
+ memtx->memtx_mem_check = prefix##_allocator_mem_check; \
+}
+DECLARE_MEMTX_ALLOCATOR_CHOICE(small, smalloc, smfree, smfree_delayed)
+DECLARE_MEMTX_ALLOCATOR_CHOICE(system, sysalloc, sysfree, sysfree_delayed)
+
+#if defined(__cplusplus)
+} /* extern "C" */
+#endif /* defined(__cplusplus) */
diff --git a/src/box/box.cc b/src/box/box.cc
index a8bc3471d..66f6030df 100644
--- a/src/box/box.cc
+++ b/src/box/box.cc
@@ -2250,6 +2250,7 @@ engine_init()
cfg_getd("memtx_memory"),
cfg_geti("memtx_min_tuple_size"),
cfg_geti("strip_core"),
+ cfg_gets("allocator"),
cfg_getd("slab_alloc_factor"));
engine_register((struct engine *)memtx);
box_set_memtx_max_tuple_size();
diff --git a/src/box/lua/load_cfg.lua b/src/box/lua/load_cfg.lua
index 770442052..817b8dbd5 100644
--- a/src/box/lua/load_cfg.lua
+++ b/src/box/lua/load_cfg.lua
@@ -43,6 +43,7 @@ local default_cfg = {
memtx_min_tuple_size = 16,
memtx_max_tuple_size = 1024 * 1024,
slab_alloc_factor = 1.05,
+ allocator = "small",
work_dir = nil,
memtx_dir = ".",
wal_dir = ".",
@@ -123,6 +124,7 @@ local template_cfg = {
memtx_min_tuple_size = 'number',
memtx_max_tuple_size = 'number',
slab_alloc_factor = 'number',
+ allocator = 'string',
work_dir = 'string',
memtx_dir = 'string',
wal_dir = 'string',
diff --git a/src/box/lua/slab.c b/src/box/lua/slab.c
index 9f5e7e95c..e3f140570 100644
--- a/src/box/lua/slab.c
+++ b/src/box/lua/slab.c
@@ -43,6 +43,7 @@
#include "memory.h"
#include "box/engine.h"
#include "box/memtx_engine.h"
+#include "box/allocator.h"
static int
small_stats_noop_cb(const struct mempool_stats *stats, void *cb_ctx)
@@ -108,17 +109,31 @@ lbox_slab_stats(struct lua_State *L)
struct memtx_engine *memtx;
memtx = (struct memtx_engine *)engine_by_name("memtx");
- struct small_stats totals;
+ struct allocator_stats totals;
lua_newtable(L);
/*
* List all slabs used for tuples and slabs used for
* indexes, with their stats.
*/
- small_stats(&memtx->alloc, &totals, small_stats_lua_cb, L);
+ if (memtx->alloc == &memtx->small_alloc) {
+ memtx->memtx_allocator_stats(memtx, &totals, small_stats_lua_cb, L);
+ } else {
+ memtx->memtx_allocator_stats(memtx, &totals);
+ lua_pushnumber(L, lua_objlen(L, -1) + 1);
+ lua_newtable(L);
+ luaL_setmaphint(L, -1);
+ lua_pushstring(L, "mem_used");
+ luaL_pushuint64(L, totals.used);
+ lua_settable(L, -3);
+
+ lua_pushstring(L, "mem_free");
+ luaL_pushuint64(L, totals.total - totals.used);
+ lua_settable(L, -3);
+ lua_settable(L, -3);
+ }
struct mempool_stats index_stats;
mempool_stats(&memtx->index_extent_pool, &index_stats);
small_stats_lua_cb(&index_stats, L);
-
return 1;
}
@@ -128,14 +143,21 @@ lbox_slab_info(struct lua_State *L)
struct memtx_engine *memtx;
memtx = (struct memtx_engine *)engine_by_name("memtx");
- struct small_stats totals;
+ struct allocator_stats totals;
+ bool is_small_alloc;
/*
* List all slabs used for tuples and slabs used for
* indexes, with their stats.
*/
lua_newtable(L);
- small_stats(&memtx->alloc, &totals, small_stats_noop_cb, L);
+ if (memtx->alloc == &memtx->small_alloc) {
+ memtx->memtx_allocator_stats(memtx, &totals, small_stats_noop_cb, L);
+ is_small_alloc = true;
+ } else {
+ memtx->memtx_allocator_stats(memtx, &totals);
+ is_small_alloc = false;
+ }
struct mempool_stats index_stats;
mempool_stats(&memtx->index_extent_pool, &index_stats);
@@ -187,10 +209,10 @@ lbox_slab_info(struct lua_State *L)
* data (tuples and indexes).
*/
lua_pushstring(L, "arena_used");
- luaL_pushuint64(L, totals.used + index_stats.totals.used);
+ luaL_pushuint64(L, (is_small_alloc ? totals.used : 0) + index_stats.totals.used);
lua_settable(L, -3);
- ratio = 100 * ((double) (totals.used + index_stats.totals.used)
+ ratio = 100 * ((double) ((is_small_alloc ? totals.used : 0) + index_stats.totals.used)
/ (double) arena_size);
snprintf(ratio_buf, sizeof(ratio_buf), "%0.1lf%%", ratio);
@@ -227,7 +249,6 @@ lbox_slab_info(struct lua_State *L)
lua_pushstring(L, "quota_used_ratio");
lua_pushstring(L, ratio_buf);
lua_settable(L, -3);
-
return 1;
}
@@ -259,7 +280,7 @@ lbox_slab_check(MAYBE_UNUSED struct lua_State *L)
{
struct memtx_engine *memtx;
memtx = (struct memtx_engine *)engine_by_name("memtx");
- slab_cache_check(memtx->alloc.cache);
+ memtx->memtx_mem_check(memtx);
return 0;
}
diff --git a/src/box/memtx_engine.c b/src/box/memtx_engine.c
index db2bb2333..e70cfc35a 100644
--- a/src/box/memtx_engine.c
+++ b/src/box/memtx_engine.c
@@ -50,6 +50,7 @@
#include "schema.h"
#include "gc.h"
#include "raft.h"
+#include "allocator.h"
/* sync snapshot every 16MB */
#define SNAP_SYNC_INTERVAL (1 << 24)
@@ -141,8 +142,9 @@ memtx_engine_shutdown(struct engine *engine)
mempool_destroy(&memtx->rtree_iterator_pool);
mempool_destroy(&memtx->index_extent_pool);
slab_cache_destroy(&memtx->index_slab_cache);
- small_alloc_destroy(&memtx->alloc);
- slab_cache_destroy(&memtx->slab_cache);
+ memtx->memtx_allocator_destroy(memtx);
+ if (memtx->alloc == &memtx->small_alloc)
+ slab_cache_destroy(&memtx->slab_cache);
tuple_arena_destroy(&memtx->arena);
xdir_destroy(&memtx->snap_dir);
free(memtx);
@@ -971,10 +973,13 @@ static void
memtx_engine_memory_stat(struct engine *engine, struct engine_memory_stat *stat)
{
struct memtx_engine *memtx = (struct memtx_engine *)engine;
- struct small_stats data_stats;
+ struct allocator_stats data_stats;
struct mempool_stats index_stats;
mempool_stats(&memtx->index_extent_pool, &index_stats);
- small_stats(&memtx->alloc, &data_stats, small_stats_noop_cb, NULL);
+ if (memtx->alloc == &memtx->small_alloc)
+ memtx->memtx_allocator_stats(memtx, &data_stats, small_stats_noop_cb, NULL);
+ else
+ memtx->memtx_allocator_stats(memtx, &data_stats);
stat->data += data_stats.used;
stat->index += index_stats.totals.used;
}
@@ -1052,7 +1057,7 @@ memtx_engine_gc_f(va_list va)
struct memtx_engine *
memtx_engine_new(const char *snap_dirname, bool force_recovery,
uint64_t tuple_arena_max_size, uint32_t objsize_min,
- bool dontdump, float alloc_factor)
+ bool dontdump, const char *allocator, float alloc_factor)
{
struct memtx_engine *memtx = calloc(1, sizeof(*memtx));
if (memtx == NULL) {
@@ -1061,6 +1066,18 @@ memtx_engine_new(const char *snap_dirname, bool force_recovery,
return NULL;
}
+ assert(allocator != NULL);
+ /* Default allocator */
+ if(!strcmp(allocator, "small")) {
+ small_memtx_allocator_choice(memtx);
+ } else if (!strcmp(allocator, "system")) {
+ system_memtx_allocator_choice(memtx);
+ } else {
+ diag_set(IllegalParams, "Bad memory allocator name");
+ free(memtx);
+ return NULL;
+ }
+
xdir_create(&memtx->snap_dir, snap_dirname, SNAP, &INSTANCE_UUID,
&xlog_opts_default);
memtx->snap_dir.force_recovery = force_recovery;
@@ -1108,9 +1125,12 @@ memtx_engine_new(const char *snap_dirname, bool force_recovery,
quota_init(&memtx->quota, tuple_arena_max_size);
tuple_arena_create(&memtx->arena, &memtx->quota, tuple_arena_max_size,
SLAB_SIZE, dontdump, "memtx");
- slab_cache_create(&memtx->slab_cache, &memtx->arena);
- small_alloc_create(&memtx->alloc, &memtx->slab_cache,
- objsize_min, alloc_factor);
+ if (memtx->alloc == &memtx->small_alloc) {
+ slab_cache_create(&memtx->slab_cache, &memtx->arena);
+ memtx->memtx_allocator_create(memtx, objsize_min, (double)alloc_factor);
+ } else {
+ memtx->memtx_allocator_create(memtx);
+ }
/* Initialize index extent allocator. */
slab_cache_create(&memtx->index_slab_cache, &memtx->arena);
@@ -1175,7 +1195,7 @@ memtx_enter_delayed_free_mode(struct memtx_engine *memtx)
{
memtx->snapshot_version++;
if (memtx->delayed_free_mode++ == 0)
- small_alloc_setopt(&memtx->alloc, SMALL_DELAYED_FREE_MODE, true);
+ memtx->memtx_enter_delayed_free_mode(memtx);
}
void
@@ -1183,7 +1203,7 @@ memtx_leave_delayed_free_mode(struct memtx_engine *memtx)
{
assert(memtx->delayed_free_mode > 0);
if (--memtx->delayed_free_mode == 0)
- small_alloc_setopt(&memtx->alloc, SMALL_DELAYED_FREE_MODE, false);
+ memtx->memtx_leave_delayed_free_mode(memtx);
}
struct tuple *
@@ -1225,7 +1245,7 @@ memtx_tuple_new(struct tuple_format *format, const char *data, const char *end)
}
struct memtx_tuple *memtx_tuple;
- while ((memtx_tuple = smalloc(&memtx->alloc, total)) == NULL) {
+ while ((memtx_tuple = memtx_global_alloc(memtx->alloc, total)) == NULL) {
bool stop;
memtx_engine_run_gc(memtx, &stop);
if (stop)
@@ -1262,12 +1282,11 @@ memtx_tuple_delete(struct tuple_format *format, struct tuple *tuple)
struct memtx_tuple *memtx_tuple =
container_of(tuple, struct memtx_tuple, base);
size_t total = tuple_size(tuple) + offsetof(struct memtx_tuple, base);
- if (memtx->alloc.free_mode != SMALL_DELAYED_FREE ||
- memtx_tuple->version == memtx->snapshot_version ||
+ if (memtx_tuple->version == memtx->snapshot_version ||
format->is_temporary)
- smfree(&memtx->alloc, memtx_tuple, total);
+ memtx_global_free(memtx->alloc, memtx_tuple, total);
else
- smfree_delayed(&memtx->alloc, memtx_tuple, total);
+ memtx_global_free_delayed(memtx->alloc, memtx_tuple, total);
tuple_format_unref(format);
}
@@ -1279,7 +1298,7 @@ metmx_tuple_chunk_delete(struct tuple_format *format, const char *data)
container_of((const char (*)[0])data,
struct tuple_chunk, data);
uint32_t sz = tuple_chunk_sz(tuple_chunk->data_sz);
- smfree(&memtx->alloc, tuple_chunk, sz);
+ memtx_global_free(memtx->alloc, tuple_chunk, sz);
}
const char *
@@ -1289,7 +1308,7 @@ memtx_tuple_chunk_new(struct tuple_format *format, struct tuple *tuple,
struct memtx_engine *memtx = (struct memtx_engine *)format->engine;
uint32_t sz = tuple_chunk_sz(data_sz);
struct tuple_chunk *tuple_chunk =
- (struct tuple_chunk *) smalloc(&memtx->alloc, sz);
+ (struct tuple_chunk *) memtx_global_alloc(memtx->alloc, sz);
if (tuple == NULL) {
diag_set(OutOfMemory, sz, "smalloc", "tuple");
return NULL;
diff --git a/src/box/memtx_engine.h b/src/box/memtx_engine.h
index 8b380bf3c..cb7d4c1ce 100644
--- a/src/box/memtx_engine.h
+++ b/src/box/memtx_engine.h
@@ -40,6 +40,7 @@
#include "engine.h"
#include "xlog.h"
#include "salad/stailq.h"
+#include "system_allocator.h"
#if defined(__cplusplus)
extern "C" {
@@ -49,6 +50,7 @@ struct index;
struct fiber;
struct tuple;
struct tuple_format;
+struct allocator_stats;
/**
* The state of memtx recovery process.
@@ -135,8 +137,12 @@ struct memtx_engine {
struct slab_arena arena;
/** Slab cache for allocating tuples. */
struct slab_cache slab_cache;
- /** Tuple allocator. */
- struct small_alloc alloc;
+ /** Small tuple allocator. */
+ struct small_alloc small_alloc;
+ /** System tuple allocator */
+ struct system_alloc system_alloc;
+ /** Tuple allocator currently used */
+ void *alloc;
/** Slab cache for allocating index extents. */
struct slab_cache index_slab_cache;
/** Index extent allocator. */
@@ -178,6 +184,31 @@ struct memtx_engine {
* memtx_gc_task::link.
*/
struct stailq gc_queue;
+ /**
+ * Method to create memtx allocator
+ */
+ void (*memtx_allocator_create)(struct memtx_engine *memtx, ...);
+ /**
+ * Method to destroy memtx allocator
+ */
+ void (*memtx_allocator_destroy)(struct memtx_engine *memtx);
+ /**
+ * Method to enter delayed free mode
+ */
+ void (*memtx_enter_delayed_free_mode)(struct memtx_engine *memtx);
+ /**
+ * Method to leave delayed free mode
+ */
+ void (*memtx_leave_delayed_free_mode)(struct memtx_engine *memtx);
+ /**
+ * Method to get allocation statistic
+ */
+ void (*memtx_allocator_stats)(struct memtx_engine *memtx,
+ struct allocator_stats *stats, ...);
+ /**
+ * Method to memtx memory check
+ */
+ void (*memtx_mem_check)(struct memtx_engine *memtx);
};
struct memtx_gc_task;
@@ -213,7 +244,7 @@ struct memtx_engine *
memtx_engine_new(const char *snap_dirname, bool force_recovery,
uint64_t tuple_arena_max_size,
uint32_t objsize_min, bool dontdump,
- float alloc_factor);
+ const char *allocator, float alloc_factor);
int
memtx_engine_recover_snapshot(struct memtx_engine *memtx,
@@ -299,13 +330,13 @@ static inline struct memtx_engine *
memtx_engine_new_xc(const char *snap_dirname, bool force_recovery,
uint64_t tuple_arena_max_size,
uint32_t objsize_min, bool dontdump,
- float alloc_factor)
+ const char *allocator, float alloc_factor)
{
struct memtx_engine *memtx;
memtx = memtx_engine_new(snap_dirname, force_recovery,
tuple_arena_max_size,
objsize_min, dontdump,
- alloc_factor);
+ allocator, alloc_factor);
if (memtx == NULL)
diag_raise();
return memtx;
diff --git a/src/box/system_allocator.h b/src/box/system_allocator.h
new file mode 100644
index 000000000..8e039f8e4
--- /dev/null
+++ b/src/box/system_allocator.h
@@ -0,0 +1,226 @@
+#pragma once
+/*
+ * Copyright 2010-2020, Tarantool AUTHORS, please see AUTHORS file.
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the
+ * following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#include <stdlib.h>
+#include <trivia/util.h>
+#include <trivia/config.h>
+
+#if defined(__cplusplus)
+extern "C" {
+#endif /* defined(__cplusplus) */
+
+#if HAVE_MALLOC_SIZE_DARWIN
+#include <malloc/malloc.h>
+static inline size_t
+portable_malloc_usable_size(void *p)
+{
+ return malloc_size(p);
+}
+#elif HAVE_MALLOC_USABLE_SIZE_BSD
+#include <malloc_np.h>
+static inline size_t
+portable_malloc_usable_size(void *p)
+{
+ return malloc_usable_size(p);
+}
+#elif HAVE_MALLOC_USABLE_SIZE_LINUX
+#include <malloc.h>
+static inline size_t
+portable_malloc_usable_size(void *p)
+{
+ return malloc_usable_size(p);
+}
+#else
+#error "Undefined system type"
+#endif
+
+/**
+ * Free mode
+ */
+enum system_free_mode {
+ /** Free objects immediately. */
+ SYSTEM_FREE,
+ /** Collect garbage after delayed free. */
+ SYSTEM_COLLECT_GARBAGE,
+ /** Postpone deletion of objects. */
+ SYSTEM_DELAYED_FREE,
+};
+
+struct system_alloc {
+ /**
+ * Bytes allocated by system allocator
+ */
+ uint64_t used_bytes;
+ /**
+ * Allocator quota
+ */
+ struct quota *quota;
+ /**
+ * Free mode.
+ */
+ enum system_free_mode free_mode;
+ /**
+ * List of pointers for delayed free.
+ */
+ struct lifo delayed;
+ bool init;
+};
+
+struct system_stats {
+ size_t used;
+ size_t total;
+};
+
+enum system_opt {
+ SYSTEM_DELAYED_FREE_MODE
+};
+
+static inline void
+sysfree(struct system_alloc *alloc, void *ptr, MAYBE_UNUSED size_t bytes)
+{
+ assert(alloc->init == true);
+ size_t size = portable_malloc_usable_size(ptr);
+ uint32_t s = size % QUOTA_UNIT_SIZE, units = size / QUOTA_UNIT_SIZE;
+ size_t used_bytes = pm_atomic_fetch_sub(&alloc->used_bytes, size);
+ if (small_align(used_bytes, QUOTA_UNIT_SIZE) >
+ small_align(used_bytes - s, QUOTA_UNIT_SIZE))
+ units++;
+ if (units > 0)
+ quota_release(alloc->quota, units * QUOTA_UNIT_SIZE);
+ free(ptr);
+}
+
+static inline void
+system_collect_garbage(struct system_alloc *alloc)
+{
+ assert(alloc->init == true);
+ if (alloc->free_mode != SYSTEM_COLLECT_GARBAGE)
+ return;
+
+ const int BATCH = 100;
+ if (!lifo_is_empty(&alloc->delayed)) {
+ for (int i = 0; i < BATCH; i++) {
+ void *item = lifo_pop(&alloc->delayed);
+ if (item == NULL)
+ break;
+ sysfree(alloc, item, 0 /* unused parameter */);
+ }
+ } else {
+ /* Finish garbage collection and switch to regular mode */
+ alloc->free_mode = SYSTEM_FREE;
+ }
+}
+
+static inline void
+system_alloc_setopt(struct system_alloc *alloc, enum system_opt opt, bool val)
+{
+ assert(alloc->init == true);
+ switch (opt) {
+ case SYSTEM_DELAYED_FREE_MODE:
+ alloc->free_mode = val ? SYSTEM_DELAYED_FREE :
+ SYSTEM_COLLECT_GARBAGE;
+ break;
+ default:
+ assert(false);
+ break;
+ }
+}
+
+static inline void
+system_stats(struct system_alloc *alloc, struct system_stats *totals)
+{
+ assert(alloc->init == true);
+ totals->used = pm_atomic_load_explicit(&alloc->used_bytes,
+ pm_memory_order_relaxed);
+ totals->total = quota_total(alloc->quota);
+}
+
+static inline void
+system_alloc_create(struct system_alloc *alloc, struct quota *quota)
+{
+ alloc->used_bytes = 0;
+ alloc->quota = quota;
+ lifo_init(&alloc->delayed);
+ alloc->init = true;
+}
+
+static inline void
+system_alloc_destroy(MAYBE_UNUSED struct system_alloc *alloc)
+{
+ alloc->init = false;
+}
+
+static inline void
+sysfree_delayed(struct system_alloc *alloc, void *ptr, size_t bytes)
+{
+ assert(alloc->init == true);
+ if (alloc->free_mode == SYSTEM_DELAYED_FREE && ptr) {
+ lifo_push(&alloc->delayed, ptr);
+ } else {
+ sysfree(alloc, ptr, bytes);
+ }
+}
+
+static inline void *
+sysalloc(struct system_alloc *alloc, size_t bytes)
+{
+ assert(alloc->init == true);
+ system_collect_garbage(alloc);
+
+ void *ptr = malloc(bytes);
+ if (!ptr)
+ return NULL;
+ size_t size = portable_malloc_usable_size(ptr);
+ uint32_t s = size % QUOTA_UNIT_SIZE, units = size / QUOTA_UNIT_SIZE;
+ while (1) {
+ size_t used_bytes = pm_atomic_load(&alloc->used_bytes);
+ if (small_align(used_bytes, QUOTA_UNIT_SIZE) <
+ small_align(used_bytes + s, QUOTA_UNIT_SIZE))
+ units++;
+ if (units > 0) {
+ if (quota_use(alloc->quota,
+ units * QUOTA_UNIT_SIZE) < 0) {
+ free(ptr);
+ return NULL;
+ }
+ }
+ if (pm_atomic_compare_exchange_strong(&alloc->used_bytes,
+ &used_bytes, used_bytes + size))
+ break;
+ if (units > 0)
+ quota_release(alloc->quota, units * QUOTA_UNIT_SIZE);
+ }
+ return ptr;
+}
+
+#if defined(__cplusplus)
+} /* extern "C" */
+#endif /* defined(__cplusplus) */
diff --git a/src/trivia/config.h.cmake b/src/trivia/config.h.cmake
index 89e0d39c6..107cd8049 100644
--- a/src/trivia/config.h.cmake
+++ b/src/trivia/config.h.cmake
@@ -169,6 +169,9 @@
#cmakedefine HAVE_POSIX_FADVISE 1
#cmakedefine HAVE_FALLOCATE 1
#cmakedefine HAVE_MREMAP 1
+#cmakedefine HAVE_MALLOC_USABLE_SIZE_LINUX 1
+#cmakedefine HAVE_MALLOC_USABLE_SIZE_BSD 1
+#cmakedefine HAVE_MALLOC_SIZE_DARWIN 1
#cmakedefine HAVE_SYNC_FILE_RANGE 1
#cmakedefine HAVE_MSG_NOSIGNAL 1
diff --git a/test/app-tap/init_script.result b/test/app-tap/init_script.result
index 72aa67db2..3b5211a90 100644
--- a/test/app-tap/init_script.result
+++ b/test/app-tap/init_script.result
@@ -3,6 +3,7 @@
--
box.cfg
+allocator:small
background:false
checkpoint_count:2
checkpoint_interval:3600
diff --git a/test/box/admin.result b/test/box/admin.result
index e05440f66..9e4813133 100644
--- a/test/box/admin.result
+++ b/test/box/admin.result
@@ -27,7 +27,9 @@ help()
...
cfg_filter(box.cfg)
---
-- - - background
+- - - allocator
+ - small
+ - - background
- false
- - checkpoint_count
- 2
diff --git a/test/box/cfg.result b/test/box/cfg.result
index 10fef006c..d23255872 100644
--- a/test/box/cfg.result
+++ b/test/box/cfg.result
@@ -15,7 +15,9 @@ box.cfg.nosuchoption = 1
| ...
cfg_filter(box.cfg)
| ---
- | - - - background
+ | - - - allocator
+ | - small
+ | - - background
| - false
| - - checkpoint_count
| - 2
@@ -128,7 +130,9 @@ box.cfg()
| ...
cfg_filter(box.cfg)
| ---
- | - - - background
+ | - - - allocator
+ | - small
+ | - - background
| - false
| - - checkpoint_count
| - 2
diff --git a/test/box/choose_memtx_allocator.lua b/test/box/choose_memtx_allocator.lua
new file mode 100644
index 000000000..77a0ec638
--- /dev/null
+++ b/test/box/choose_memtx_allocator.lua
@@ -0,0 +1,9 @@
+#!/usr/bin/env tarantool
+
+require('console').listen(os.getenv('ADMIN'))
+
+box.cfg({
+ listen = os.getenv("LISTEN"),
+ allocator=arg[1],
+ checkpoint_interval=10
+})
diff --git a/test/box/choose_memtx_allocator.result b/test/box/choose_memtx_allocator.result
new file mode 100644
index 000000000..dab316b93
--- /dev/null
+++ b/test/box/choose_memtx_allocator.result
@@ -0,0 +1,135 @@
+-- test-run result file version 2
+
+-- write data recover from latest snapshot
+env = require('test_run')
+ | ---
+ | ...
+test_run = env.new()
+ | ---
+ | ...
+test_run:cmd('create server test with script="box/choose_memtx_allocator.lua"')
+ | ---
+ | - true
+ | ...
+--test small allocator
+test_run:cmd('start server test with args="small"')
+ | ---
+ | - true
+ | ...
+test_run:cmd('switch test')
+ | ---
+ | - true
+ | ...
+space = box.schema.space.create('test')
+ | ---
+ | ...
+space:format({ {name = 'id', type = 'unsigned'}, {name = 'year', type = 'unsigned'} })
+ | ---
+ | ...
+s = space:create_index('primary', { parts = {'id'} })
+ | ---
+ | ...
+for key = 1, 1000 do space:insert({key, key + 1000}) end
+ | ---
+ | ...
+for key = 1, 1000 do space:replace({key, key + 5000}) end
+ | ---
+ | ...
+for key = 1, 1000 do space:delete(key) end
+ | ---
+ | ...
+space:drop()
+ | ---
+ | ...
+test_run:cmd('switch default')
+ | ---
+ | - true
+ | ...
+test_run:cmd('stop server test')
+ | ---
+ | - true
+ | ...
+--test system(malloc) allocator
+test_run:cmd('start server test with args="system"')
+ | ---
+ | - true
+ | ...
+test_run:cmd('switch test')
+ | ---
+ | - true
+ | ...
+space = box.schema.space.create('test')
+ | ---
+ | ...
+space:format({ {name = 'id', type = 'unsigned'}, {name = 'year', type = 'unsigned'} })
+ | ---
+ | ...
+s = space:create_index('primary', { parts = {'id'} })
+ | ---
+ | ...
+for key = 1, 500000 do space:insert({key, key + 1000}) end
+ | ---
+ | ...
+for key = 1, 500000 do space:replace({key, key + 5000}) end
+ | ---
+ | ...
+for key = 1, 500000 do space:delete(key) end
+ | ---
+ | ...
+space:drop()
+ | ---
+ | ...
+test_run:cmd('switch default')
+ | ---
+ | - true
+ | ...
+test_run:cmd('stop server test')
+ | ---
+ | - true
+ | ...
+--test default (small) allocator
+test_run:cmd('start server test')
+ | ---
+ | - true
+ | ...
+test_run:cmd('switch test')
+ | ---
+ | - true
+ | ...
+space = box.schema.space.create('test')
+ | ---
+ | ...
+space:format({ {name = 'id', type = 'unsigned'}, {name = 'year', type = 'unsigned'} })
+ | ---
+ | ...
+s = space:create_index('primary', { parts = {'id'} })
+ | ---
+ | ...
+for key = 1, 1000 do space:insert({key, key + 1000}) end
+ | ---
+ | ...
+for key = 1, 1000 do space:replace({key, key + 5000}) end
+ | ---
+ | ...
+for key = 1, 1000 do space:delete(key) end
+ | ---
+ | ...
+space:drop()
+ | ---
+ | ...
+test_run:cmd('switch default')
+ | ---
+ | - true
+ | ...
+test_run:cmd('stop server test')
+ | ---
+ | - true
+ | ...
+test_run:cmd('cleanup server test')
+ | ---
+ | - true
+ | ...
+test_run:cmd('delete server test')
+ | ---
+ | - true
+ | ...
diff --git a/test/box/choose_memtx_allocator.test.lua b/test/box/choose_memtx_allocator.test.lua
new file mode 100644
index 000000000..007b01d80
--- /dev/null
+++ b/test/box/choose_memtx_allocator.test.lua
@@ -0,0 +1,43 @@
+
+-- write data recover from latest snapshot
+env = require('test_run')
+test_run = env.new()
+test_run:cmd('create server test with script="box/choose_memtx_allocator.lua"')
+--test small allocator
+test_run:cmd('start server test with args="small"')
+test_run:cmd('switch test')
+space = box.schema.space.create('test')
+space:format({ {name = 'id', type = 'unsigned'}, {name = 'year', type = 'unsigned'} })
+s = space:create_index('primary', { parts = {'id'} })
+for key = 1, 1000 do space:insert({key, key + 1000}) end
+for key = 1, 1000 do space:replace({key, key + 5000}) end
+for key = 1, 1000 do space:delete(key) end
+space:drop()
+test_run:cmd('switch default')
+test_run:cmd('stop server test')
+--test system(malloc) allocator
+test_run:cmd('start server test with args="system"')
+test_run:cmd('switch test')
+space = box.schema.space.create('test')
+space:format({ {name = 'id', type = 'unsigned'}, {name = 'year', type = 'unsigned'} })
+s = space:create_index('primary', { parts = {'id'} })
+for key = 1, 500000 do space:insert({key, key + 1000}) end
+for key = 1, 500000 do space:replace({key, key + 5000}) end
+for key = 1, 500000 do space:delete(key) end
+space:drop()
+test_run:cmd('switch default')
+test_run:cmd('stop server test')
+--test default (small) allocator
+test_run:cmd('start server test')
+test_run:cmd('switch test')
+space = box.schema.space.create('test')
+space:format({ {name = 'id', type = 'unsigned'}, {name = 'year', type = 'unsigned'} })
+s = space:create_index('primary', { parts = {'id'} })
+for key = 1, 1000 do space:insert({key, key + 1000}) end
+for key = 1, 1000 do space:replace({key, key + 5000}) end
+for key = 1, 1000 do space:delete(key) end
+space:drop()
+test_run:cmd('switch default')
+test_run:cmd('stop server test')
+test_run:cmd('cleanup server test')
+test_run:cmd('delete server test')
--
2.20.1
More information about the Tarantool-patches
mailing list