From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from smtp56.i.mail.ru (smtp56.i.mail.ru [217.69.128.36]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dev.tarantool.org (Postfix) with ESMTPS id EDF6D4765E2 for ; Tue, 29 Dec 2020 14:03:41 +0300 (MSK) From: mechanik20051988 Date: Tue, 29 Dec 2020 14:03:31 +0300 Message-Id: <88afdf49f30ccd53898a8925868534479d4836d1.1609239402.git.mechanik20051988@tarantool.org> In-Reply-To: References: MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Subject: [Tarantool-patches] [PATCH 3/4] memtx: implement api for memory allocator selection List-Id: Tarantool development patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: v.shpilevoy@tarantool.org, alyapunov@tarantool.org Cc: mechanik20051988 , tarantool-patches@dev.tarantool.org From: mechanik20051988 Slab allocator, which is used for tuples allocation, has a certain disadvantage - it tends to unresolvable fragmentation on certain workloads (size migration). New option allows to select the appropriate allocator if necessary. @TarantoolBot document Title: Add new 'allocator' option Add new 'allocator' option which allows to select the appropriate allocator for memtx tuples if necessary. Closes #5419 --- src/box/CMakeLists.txt | 2 + src/box/box.cc | 3 + src/box/lua/init.c | 2 +- src/box/lua/load_cfg.lua | 2 + src/box/lua/slab.c | 214 +---------------------- src/box/lua/slab.cc | 292 ++++++++++++++++++++++++++++++++ src/box/lua/slab.h | 1 + src/box/memtx_engine.cc | 113 ++++++++---- src/box/memtx_engine.h | 53 ++++-- src/box/memtx_space.cc | 75 ++++---- src/box/small_allocator.cc | 74 ++++++++ src/box/small_allocator.h | 58 +++++++ test/app-tap/init_script.result | 1 + test/box/admin.result | 4 +- test/box/cfg.result | 8 +- 15 files changed, 606 insertions(+), 296 deletions(-) create mode 100644 src/box/lua/slab.cc create mode 100644 src/box/small_allocator.cc create mode 100644 src/box/small_allocator.h diff --git a/src/box/CMakeLists.txt b/src/box/CMakeLists.txt index d2af89d05..aebf76bd4 100644 --- a/src/box/CMakeLists.txt +++ b/src/box/CMakeLists.txt @@ -129,6 +129,7 @@ add_library(box STATIC engine.c memtx_engine.cc memtx_space.cc + small_allocator.cc sysview.c blackhole.c service_engine.c @@ -198,6 +199,7 @@ add_library(box STATIC lua/serialize_lua.c lua/tuple.c lua/slab.c + lua/slab.cc lua/index.c lua/space.cc lua/sequence.c diff --git a/src/box/box.cc b/src/box/box.cc index 26cbe8aab..7e1b9d207 100644 --- a/src/box/box.cc +++ b/src/box/box.cc @@ -33,6 +33,7 @@ #include "trivia/config.h" #include "lua/utils.h" /* lua_hash() */ +#include "lua/slab.h" /* box_lua_slab_init */ #include "fiber_pool.h" #include #include @@ -2541,6 +2542,7 @@ engine_init() cfg_getd("memtx_memory"), cfg_geti("memtx_min_tuple_size"), cfg_geti("strip_core"), + cfg_gets("allocator"), cfg_getd("slab_alloc_factor")); engine_register((struct engine *)memtx); box_set_memtx_max_tuple_size(); @@ -2947,6 +2949,7 @@ box_cfg_xc(void) gc_init(); engine_init(); + box_lua_slab_init(tarantool_L); schema_init(); replication_init(); port_init(); diff --git a/src/box/lua/init.c b/src/box/lua/init.c index fbcdfb20b..480176f7a 100644 --- a/src/box/lua/init.c +++ b/src/box/lua/init.c @@ -465,7 +465,7 @@ box_lua_init(struct lua_State *L) box_lua_tuple_init(L); box_lua_call_init(L); box_lua_cfg_init(L); - box_lua_slab_init(L); + box_lua_slab_runtime_init(L); box_lua_index_init(L); box_lua_space_init(L); box_lua_sequence_init(L); diff --git a/src/box/lua/load_cfg.lua b/src/box/lua/load_cfg.lua index 574c8bef4..2fe8a5b6c 100644 --- a/src/box/lua/load_cfg.lua +++ b/src/box/lua/load_cfg.lua @@ -43,6 +43,7 @@ local default_cfg = { memtx_min_tuple_size = 16, memtx_max_tuple_size = 1024 * 1024, slab_alloc_factor = 1.05, + allocator = "small", work_dir = nil, memtx_dir = ".", wal_dir = ".", @@ -124,6 +125,7 @@ local template_cfg = { memtx_min_tuple_size = 'number', memtx_max_tuple_size = 'number', slab_alloc_factor = 'number', + allocator = 'string', work_dir = 'string', memtx_dir = 'string', wal_dir = 'string', diff --git a/src/box/lua/slab.c b/src/box/lua/slab.c index 9f5e7e95c..b9565e768 100644 --- a/src/box/lua/slab.c +++ b/src/box/lua/slab.c @@ -44,193 +44,6 @@ #include "box/engine.h" #include "box/memtx_engine.h" -static int -small_stats_noop_cb(const struct mempool_stats *stats, void *cb_ctx) -{ - (void) stats; - (void) cb_ctx; - return 0; -} - -static int -small_stats_lua_cb(const struct mempool_stats *stats, void *cb_ctx) -{ - /** Don't publish information about empty slabs. */ - if (stats->slabcount == 0) - return 0; - - struct lua_State *L = (struct lua_State *) cb_ctx; - - /* - * Create a Lua table for every slab class. A class is - * defined by its item size. - */ - /** Assign next slab size to the next member of an array. */ - lua_pushnumber(L, lua_objlen(L, -1) + 1); - lua_newtable(L); - /** - * This is in fact only to force YaML flow "compact" for this - * table. - */ - luaL_setmaphint(L, -1); - - lua_pushstring(L, "mem_used"); - luaL_pushuint64(L, stats->totals.used); - lua_settable(L, -3); - - lua_pushstring(L, "slab_size"); - luaL_pushuint64(L, stats->slabsize); - lua_settable(L, -3); - - lua_pushstring(L, "mem_free"); - luaL_pushuint64(L, stats->totals.total - stats->totals.used); - lua_settable(L, -3); - - lua_pushstring(L, "item_size"); - luaL_pushuint64(L, stats->objsize); - lua_settable(L, -3); - - lua_pushstring(L, "slab_count"); - luaL_pushuint64(L, stats->slabcount); - lua_settable(L, -3); - - lua_pushstring(L, "item_count"); - luaL_pushuint64(L, stats->objcount); - lua_settable(L, -3); - - lua_settable(L, -3); - return 0; -} - -static int -lbox_slab_stats(struct lua_State *L) -{ - struct memtx_engine *memtx; - memtx = (struct memtx_engine *)engine_by_name("memtx"); - - struct small_stats totals; - lua_newtable(L); - /* - * List all slabs used for tuples and slabs used for - * indexes, with their stats. - */ - small_stats(&memtx->alloc, &totals, small_stats_lua_cb, L); - struct mempool_stats index_stats; - mempool_stats(&memtx->index_extent_pool, &index_stats); - small_stats_lua_cb(&index_stats, L); - - return 1; -} - -static int -lbox_slab_info(struct lua_State *L) -{ - struct memtx_engine *memtx; - memtx = (struct memtx_engine *)engine_by_name("memtx"); - - struct small_stats totals; - - /* - * List all slabs used for tuples and slabs used for - * indexes, with their stats. - */ - lua_newtable(L); - small_stats(&memtx->alloc, &totals, small_stats_noop_cb, L); - struct mempool_stats index_stats; - mempool_stats(&memtx->index_extent_pool, &index_stats); - - double ratio; - char ratio_buf[32]; - - ratio = 100 * ((double) totals.used - / ((double) totals.total + 0.0001)); - snprintf(ratio_buf, sizeof(ratio_buf), "%0.2lf%%", ratio); - - /** How much address space has been already touched */ - lua_pushstring(L, "items_size"); - luaL_pushuint64(L, totals.total); - lua_settable(L, -3); - /** - * How much of this formatted address space is used for - * actual data. - */ - lua_pushstring(L, "items_used"); - luaL_pushuint64(L, totals.used); - lua_settable(L, -3); - - /* - * Fragmentation factor for tuples. Don't account indexes, - * even if they are fragmented, there is nothing people - * can do about it. - */ - lua_pushstring(L, "items_used_ratio"); - lua_pushstring(L, ratio_buf); - lua_settable(L, -3); - - /** How much address space has been already touched - * (tuples and indexes) */ - lua_pushstring(L, "arena_size"); - /* - * We could use totals.total + index_stats.total here, - * but this would not account for slabs which are sitting - * in slab cache or in the arena, available for reuse. - * Make sure a simple formula: - * items_used_ratio > 0.9 && arena_used_ratio > 0.9 && - * quota_used_ratio > 0.9 work as an indicator - * for reaching Tarantool memory limit. - */ - size_t arena_size = memtx->arena.used; - luaL_pushuint64(L, arena_size); - lua_settable(L, -3); - /** - * How much of this formatted address space is used for - * data (tuples and indexes). - */ - lua_pushstring(L, "arena_used"); - luaL_pushuint64(L, totals.used + index_stats.totals.used); - lua_settable(L, -3); - - ratio = 100 * ((double) (totals.used + index_stats.totals.used) - / (double) arena_size); - snprintf(ratio_buf, sizeof(ratio_buf), "%0.1lf%%", ratio); - - lua_pushstring(L, "arena_used_ratio"); - lua_pushstring(L, ratio_buf); - lua_settable(L, -3); - - /* - * This is pretty much the same as - * box.cfg.slab_alloc_arena, but in bytes - */ - lua_pushstring(L, "quota_size"); - luaL_pushuint64(L, quota_total(&memtx->quota)); - lua_settable(L, -3); - - /* - * How much quota has been booked - reflects the total - * size of slabs in various slab caches. - */ - lua_pushstring(L, "quota_used"); - luaL_pushuint64(L, quota_used(&memtx->quota)); - lua_settable(L, -3); - - /** - * This should be the same as arena_size/arena_used, however, - * don't trust totals in the most important monitoring - * factor, it's the quota that give you OOM error in the - * end of the day. - */ - ratio = 100 * ((double) quota_used(&memtx->quota) / - ((double) quota_total(&memtx->quota) + 0.0001)); - snprintf(ratio_buf, sizeof(ratio_buf), "%0.2lf%%", ratio); - - lua_pushstring(L, "quota_used_ratio"); - lua_pushstring(L, ratio_buf); - lua_settable(L, -3); - - return 1; -} - static int lbox_runtime_info(struct lua_State *L) { @@ -254,36 +67,11 @@ lbox_runtime_info(struct lua_State *L) return 1; } -static int -lbox_slab_check(MAYBE_UNUSED struct lua_State *L) -{ - struct memtx_engine *memtx; - memtx = (struct memtx_engine *)engine_by_name("memtx"); - slab_cache_check(memtx->alloc.cache); - return 0; -} - /** Initialize box.slab package. */ void -box_lua_slab_init(struct lua_State *L) +box_lua_slab_runtime_init(struct lua_State *L) { lua_getfield(L, LUA_GLOBALSINDEX, "box"); - lua_pushstring(L, "slab"); - lua_newtable(L); - - lua_pushstring(L, "info"); - lua_pushcfunction(L, lbox_slab_info); - lua_settable(L, -3); - - lua_pushstring(L, "stats"); - lua_pushcfunction(L, lbox_slab_stats); - lua_settable(L, -3); - - lua_pushstring(L, "check"); - lua_pushcfunction(L, lbox_slab_check); - lua_settable(L, -3); - - lua_settable(L, -3); /* box.slab */ lua_pushstring(L, "runtime"); lua_newtable(L); diff --git a/src/box/lua/slab.cc b/src/box/lua/slab.cc new file mode 100644 index 000000000..4b247885f --- /dev/null +++ b/src/box/lua/slab.cc @@ -0,0 +1,292 @@ +/* + * Copyright 2010-2020, Tarantool AUTHORS, please see AUTHORS file. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the + * following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#include "trivia/util.h" + +#include "box/lua/slab.h" +#include "lua/utils.h" + +#include +#include +#include +#include /* internals: lua in box.runtime.info() */ + +#include "small/small.h" +#include "small/quota.h" +#include "memory.h" +#include "box/engine.h" +#include "box/memtx_engine.h" +#include "box/small_allocator.h" + +static int +small_stats_noop_cb(const struct mempool_stats *stats, void *cb_ctx) +{ + (void) stats; + (void) cb_ctx; + return 0; +} + +static int +small_stats_lua_cb(const struct mempool_stats *stats, void *cb_ctx) +{ + /** Don't publish information about empty slabs. */ + if (stats->slabcount == 0) + return 0; + + struct lua_State *L = (struct lua_State *) cb_ctx; + + /* + * Create a Lua table for every slab class. A class is + * defined by its item size. + */ + /** Assign next slab size to the next member of an array. */ + lua_pushnumber(L, lua_objlen(L, -1) + 1); + lua_newtable(L); + /** + * This is in fact only to force YaML flow "compact" for this + * table. + */ + luaL_setmaphint(L, -1); + + lua_pushstring(L, "mem_used"); + luaL_pushuint64(L, stats->totals.used); + lua_settable(L, -3); + + lua_pushstring(L, "slab_size"); + luaL_pushuint64(L, stats->slabsize); + lua_settable(L, -3); + + lua_pushstring(L, "mem_free"); + luaL_pushuint64(L, stats->totals.total - stats->totals.used); + lua_settable(L, -3); + + lua_pushstring(L, "item_size"); + luaL_pushuint64(L, stats->objsize); + lua_settable(L, -3); + + lua_pushstring(L, "slab_count"); + luaL_pushuint64(L, stats->slabcount); + lua_settable(L, -3); + + lua_pushstring(L, "item_count"); + luaL_pushuint64(L, stats->objcount); + lua_settable(L, -3); + + lua_settable(L, -3); + return 0; +} + +template +static int +lbox_slab_stats(struct lua_State *L) +{ + struct memtx_engine *memtx; + memtx = (struct memtx_engine *)engine_by_name("memtx"); + + allocator_stats totals; + lua_newtable(L); + /* + * List all slabs used for tuples and slabs used for + * indexes, with their stats. + */ + Allocator::stats(&totals, stats_cb, L); + struct mempool_stats index_stats; + mempool_stats(&memtx->index_extent_pool, &index_stats); + stats_cb(&index_stats, L); + + return 1; +} + +template +static int +lbox_slab_info(struct lua_State *L) +{ + struct memtx_engine *memtx; + memtx = (struct memtx_engine *)engine_by_name("memtx"); + + allocator_stats totals; + + /* + * List all slabs used for tuples and slabs used for + * indexes, with their stats. + */ + lua_newtable(L); + Allocator::stats(&totals, stats_cb, L); + struct mempool_stats index_stats; + mempool_stats(&memtx->index_extent_pool, &index_stats); + + double ratio; + char ratio_buf[32]; + + ratio = 100 * ((double) totals.used + / ((double) totals.total + 0.0001)); + snprintf(ratio_buf, sizeof(ratio_buf), "%0.2lf%%", ratio); + + /** How much address space has been already touched */ + lua_pushstring(L, "items_size"); + luaL_pushuint64(L, totals.total); + lua_settable(L, -3); + /** + * How much of this formatted address space is used for + * actual data. + */ + lua_pushstring(L, "items_used"); + luaL_pushuint64(L, totals.used); + lua_settable(L, -3); + + /* + * Fragmentation factor for tuples. Don't account indexes, + * even if they are fragmented, there is nothing people + * can do about it. + */ + lua_pushstring(L, "items_used_ratio"); + lua_pushstring(L, ratio_buf); + lua_settable(L, -3); + + /** How much address space has been already touched + * (tuples and indexes) */ + lua_pushstring(L, "arena_size"); + /* + * We could use totals.total + index_stats.total here, + * but this would not account for slabs which are sitting + * in slab cache or in the arena, available for reuse. + * Make sure a simple formula: + * items_used_ratio > 0.9 && arena_used_ratio > 0.9 && + * quota_used_ratio > 0.9 work as an indicator + * for reaching Tarantool memory limit. + */ + size_t arena_size = memtx->arena.used; + luaL_pushuint64(L, arena_size); + lua_settable(L, -3); + /** + * How much of this formatted address space is used for + * data (tuples and indexes). + */ + lua_pushstring(L, "arena_used"); + luaL_pushuint64(L, totals.used + index_stats.totals.used); + lua_settable(L, -3); + + ratio = 100 * ((double) (totals.used + index_stats.totals.used) + / (double) arena_size); + snprintf(ratio_buf, sizeof(ratio_buf), "%0.1lf%%", ratio); + + lua_pushstring(L, "arena_used_ratio"); + lua_pushstring(L, ratio_buf); + lua_settable(L, -3); + + /* + * This is pretty much the same as + * box.cfg.slab_alloc_arena, but in bytes + */ + lua_pushstring(L, "quota_size"); + luaL_pushuint64(L, quota_total(&memtx->quota)); + lua_settable(L, -3); + + /* + * How much quota has been booked - reflects the total + * size of slabs in various slab caches. + */ + lua_pushstring(L, "quota_used"); + luaL_pushuint64(L, quota_used(&memtx->quota)); + lua_settable(L, -3); + + /** + * This should be the same as arena_size/arena_used, however, + * don't trust totals in the most important monitoring + * factor, it's the quota that give you OOM error in the + * end of the day. + */ + ratio = 100 * ((double) quota_used(&memtx->quota) / + ((double) quota_total(&memtx->quota) + 0.0001)); + snprintf(ratio_buf, sizeof(ratio_buf), "%0.2lf%%", ratio); + + lua_pushstring(L, "quota_used_ratio"); + lua_pushstring(L, ratio_buf); + lua_settable(L, -3); + + return 1; +} + +template +static int +lbox_slab_check(MAYBE_UNUSED struct lua_State *L) +{ + Allocator::memory_check(); + return 0; +} + +template +static void +box_lua_slab_init(struct lua_State *L) +{ + lua_pushstring(L, "info"); + lua_pushcfunction(L, (lbox_slab_info)); + lua_settable(L, -3); + + lua_pushstring(L, "stats"); + lua_pushcfunction(L, (lbox_slab_stats)); + lua_settable(L, -3); + + lua_pushstring(L, "check"); + lua_pushcfunction(L, lbox_slab_check); + lua_settable(L, -3); +} + +/** Initialize box.slab package. */ +void +box_lua_slab_init(struct lua_State *L) +{ + struct memtx_engine *memtx; + memtx = (struct memtx_engine *)engine_by_name("memtx"); + + lua_getfield(L, LUA_GLOBALSINDEX, "box"); + lua_pushstring(L, "slab"); + lua_newtable(L); + + switch(memtx->allocator_type) { + case MEMTX_SMALL_ALLOCATOR: + box_lua_slab_init(L); + break; + default: + ; + } + + lua_settable(L, -3); /* box.slab */ + + lua_pop(L, 1); /* box. */ +} diff --git a/src/box/lua/slab.h b/src/box/lua/slab.h index fd4ef8893..41280343f 100644 --- a/src/box/lua/slab.h +++ b/src/box/lua/slab.h @@ -35,6 +35,7 @@ extern "C" { #endif /* defined(__cplusplus) */ struct lua_State; +void box_lua_slab_runtime_init(struct lua_State *L); void box_lua_slab_init(struct lua_State *L); #if defined(__cplusplus) diff --git a/src/box/memtx_engine.cc b/src/box/memtx_engine.cc index 520a221dd..48c0f13d0 100644 --- a/src/box/memtx_engine.cc +++ b/src/box/memtx_engine.cc @@ -50,10 +50,20 @@ #include "schema.h" #include "gc.h" #include "raft.h" +#include "small_allocator.h" /* sync snapshot every 16MB */ #define SNAP_SYNC_INTERVAL (1 << 24) +#define MEMXT_TUPLE_FORMAT_VTAB(Allocator) \ +memtx_tuple_format_vtab.tuple_delete = memtx_tuple_delete; \ +memtx_tuple_format_vtab.tuple_new = memtx_tuple_new; \ +memtx_tuple_format_vtab.tuple_chunk_delete = \ + metmx_tuple_chunk_delete; \ +memtx_tuple_format_vtab.tuple_chunk_new = \ + memtx_tuple_chunk_new; + + static void checkpoint_cancel(struct checkpoint *ckpt); @@ -141,8 +151,13 @@ memtx_engine_shutdown(struct engine *engine) mempool_destroy(&memtx->rtree_iterator_pool); mempool_destroy(&memtx->index_extent_pool); slab_cache_destroy(&memtx->index_slab_cache); - small_alloc_destroy(&memtx->alloc); - slab_cache_destroy(&memtx->slab_cache); + switch (memtx->allocator_type) { + case MEMTX_SMALL_ALLOCATOR: + SmallAllocator::destroy(); + break; + default: + ; + } tuple_arena_destroy(&memtx->arena); xdir_destroy(&memtx->snap_dir); free(memtx); @@ -979,19 +994,21 @@ small_stats_noop_cb(const struct mempool_stats *stats, void *cb_ctx) return 0; } +template static void memtx_engine_memory_stat(struct engine *engine, struct engine_memory_stat *stat) { struct memtx_engine *memtx = (struct memtx_engine *)engine; - struct small_stats data_stats; + allocator_stats data_stats; struct mempool_stats index_stats; mempool_stats(&memtx->index_extent_pool, &index_stats); - small_stats(&memtx->alloc, &data_stats, small_stats_noop_cb, NULL); + Allocator::stats(&data_stats, stats_cb, NULL); stat->data += data_stats.used; stat->index += index_stats.totals.used; } -static const struct engine_vtab memtx_engine_vtab = { +static struct engine_vtab memtx_engine_vtab = { /* .shutdown = */ memtx_engine_shutdown, /* .create_space = */ memtx_engine_create_space, /* .prepare_join = */ memtx_engine_prepare_join, @@ -1014,7 +1031,7 @@ static const struct engine_vtab memtx_engine_vtab = { /* .abort_checkpoint = */ memtx_engine_abort_checkpoint, /* .collect_garbage = */ memtx_engine_collect_garbage, /* .backup = */ memtx_engine_backup, - /* .memory_stat = */ memtx_engine_memory_stat, + /* .memory_stat = */ nullptr, /* .reset_stat = */ generic_engine_reset_stat, /* .check_space_def = */ generic_engine_check_space_def, }; @@ -1064,7 +1081,7 @@ memtx_engine_gc_f(va_list va) struct memtx_engine * memtx_engine_new(const char *snap_dirname, bool force_recovery, uint64_t tuple_arena_max_size, uint32_t objsize_min, - bool dontdump, float alloc_factor) + bool dontdump, const char *allocator, float alloc_factor) { int64_t snap_signature; struct memtx_engine *memtx = (struct memtx_engine *)calloc(1, sizeof(*memtx)); @@ -1074,6 +1091,21 @@ memtx_engine_new(const char *snap_dirname, bool force_recovery, return NULL; } + assert(allocator != NULL); + if (!strcmp(allocator, "small")) { + memtx->allocator_type = MEMTX_SMALL_ALLOCATOR; + MEMXT_TUPLE_FORMAT_VTAB(SmallAllocator) + memtx_engine_vtab.memory_stat = + memtx_engine_memory_stat; + } else { + diag_set(IllegalParams, "Invalid memory allocator name"); + free(memtx); + return NULL; + } + xdir_create(&memtx->snap_dir, snap_dirname, SNAP, &INSTANCE_UUID, &xlog_opts_default); memtx->snap_dir.force_recovery = force_recovery; @@ -1131,12 +1163,17 @@ memtx_engine_new(const char *snap_dirname, bool force_recovery, quota_init(&memtx->quota, tuple_arena_max_size); tuple_arena_create(&memtx->arena, &memtx->quota, tuple_arena_max_size, SLAB_SIZE, dontdump, "memtx"); - slab_cache_create(&memtx->slab_cache, &memtx->arena); - float actual_alloc_factor; - small_alloc_create(&memtx->alloc, &memtx->slab_cache, - objsize_min, alloc_factor, &actual_alloc_factor); - say_info("Actual slab_alloc_factor calculated on the basis of desired " - "slab_alloc_factor = %f", actual_alloc_factor); + + switch (memtx->allocator_type) { + case MEMTX_SMALL_ALLOCATOR: + float actual_alloc_factor; + SmallAllocator::create(&memtx->arena, objsize_min, alloc_factor, &actual_alloc_factor); + say_info("Actual slab_alloc_factor calculated on the basis of desired " + "slab_alloc_factor = %f", actual_alloc_factor); + break; + default: + ; + } /* Initialize index extent allocator. */ slab_cache_create(&memtx->index_slab_cache, &memtx->arena); @@ -1200,18 +1237,33 @@ void memtx_enter_delayed_free_mode(struct memtx_engine *memtx) { memtx->snapshot_version++; - if (memtx->delayed_free_mode++ == 0) - small_alloc_setopt(&memtx->alloc, SMALL_DELAYED_FREE_MODE, true); + if (memtx->delayed_free_mode++ == 0) { + switch (memtx->allocator_type) { + case MEMTX_SMALL_ALLOCATOR: + SmallAllocator::enter_delayed_free_mode(); + break; + default: + ; + } + } } void memtx_leave_delayed_free_mode(struct memtx_engine *memtx) { assert(memtx->delayed_free_mode > 0); - if (--memtx->delayed_free_mode == 0) - small_alloc_setopt(&memtx->alloc, SMALL_DELAYED_FREE_MODE, false); + if (--memtx->delayed_free_mode == 0) { + switch (memtx->allocator_type) { + case MEMTX_SMALL_ALLOCATOR: + SmallAllocator::enter_delayed_free_mode(); + break; + default: + ; + } + } } +template struct tuple * memtx_tuple_new(struct tuple_format *format, const char *data, const char *end) { @@ -1256,7 +1308,7 @@ memtx_tuple_new(struct tuple_format *format, const char *data, const char *end) struct memtx_tuple *memtx_tuple; while ((memtx_tuple = (struct memtx_tuple *) - smalloc(&memtx->alloc, total)) == NULL) { + Allocator::alloc(total)) == NULL) { bool stop; memtx_engine_run_gc(memtx, &stop); if (stop) @@ -1284,6 +1336,7 @@ end: return tuple; } +template void memtx_tuple_delete(struct tuple_format *format, struct tuple *tuple) { @@ -1293,34 +1346,35 @@ memtx_tuple_delete(struct tuple_format *format, struct tuple *tuple) struct memtx_tuple *memtx_tuple = container_of(tuple, struct memtx_tuple, base); size_t total = tuple_size(tuple) + offsetof(struct memtx_tuple, base); - if (memtx->alloc.free_mode != SMALL_DELAYED_FREE || - memtx_tuple->version == memtx->snapshot_version || + if (memtx_tuple->version == memtx->snapshot_version || format->is_temporary) - smfree(&memtx->alloc, memtx_tuple, total); + Allocator::free(memtx_tuple, total); else - smfree_delayed(&memtx->alloc, memtx_tuple, total); + Allocator::free_delayed(memtx_tuple, total); tuple_format_unref(format); } +template void metmx_tuple_chunk_delete(struct tuple_format *format, const char *data) { - struct memtx_engine *memtx = (struct memtx_engine *)format->engine; + (void)format; struct tuple_chunk *tuple_chunk = container_of((const char (*)[0])data, struct tuple_chunk, data); uint32_t sz = tuple_chunk_sz(tuple_chunk->data_sz); - smfree(&memtx->alloc, tuple_chunk, sz); + Allocator::free(tuple_chunk, sz); } +template const char * memtx_tuple_chunk_new(struct tuple_format *format, struct tuple *tuple, const char *data, uint32_t data_sz) { - struct memtx_engine *memtx = (struct memtx_engine *)format->engine; + (void)format; uint32_t sz = tuple_chunk_sz(data_sz); struct tuple_chunk *tuple_chunk = - (struct tuple_chunk *) smalloc(&memtx->alloc, sz); + (struct tuple_chunk *) Allocator::alloc(sz); if (tuple == NULL) { diag_set(OutOfMemory, sz, "smalloc", "tuple"); return NULL; @@ -1330,12 +1384,7 @@ memtx_tuple_chunk_new(struct tuple_format *format, struct tuple *tuple, return tuple_chunk->data; } -struct tuple_format_vtab memtx_tuple_format_vtab = { - memtx_tuple_delete, - memtx_tuple_new, - metmx_tuple_chunk_delete, - memtx_tuple_chunk_new, -}; +struct tuple_format_vtab memtx_tuple_format_vtab; /** * Allocate a block of size MEMTX_EXTENT_SIZE for memtx index diff --git a/src/box/memtx_engine.h b/src/box/memtx_engine.h index 8b380bf3c..6edb8b373 100644 --- a/src/box/memtx_engine.h +++ b/src/box/memtx_engine.h @@ -99,6 +99,11 @@ enum memtx_reserve_extents_num { RESERVE_EXTENTS_BEFORE_REPLACE = 16 }; +enum memtx_allocator_type { + MEMTX_SMALL_ALLOCATOR, + MEMTX_SYSTEM_ALLOCATOR, +}; + /** * The size of the biggest memtx iterator. Used with * mempool_create. This is the size of the block that will be @@ -133,10 +138,6 @@ struct memtx_engine { * is reflected in box.slab.info(), @sa lua/slab.c. */ struct slab_arena arena; - /** Slab cache for allocating tuples. */ - struct slab_cache slab_cache; - /** Tuple allocator. */ - struct small_alloc alloc; /** Slab cache for allocating index extents. */ struct slab_cache index_slab_cache; /** Index extent allocator. */ @@ -178,6 +179,10 @@ struct memtx_engine { * memtx_gc_task::link. */ struct stailq gc_queue; + /** + * TYpe of memtx allocator + */ + enum memtx_allocator_type allocator_type; }; struct memtx_gc_task; @@ -213,7 +218,7 @@ struct memtx_engine * memtx_engine_new(const char *snap_dirname, bool force_recovery, uint64_t tuple_arena_max_size, uint32_t objsize_min, bool dontdump, - float alloc_factor); + const char *allocator, float alloc_factor); int memtx_engine_recover_snapshot(struct memtx_engine *memtx, @@ -238,6 +243,9 @@ memtx_engine_set_max_tuple_size(struct memtx_engine *memtx, size_t max_size); void memtx_enter_delayed_free_mode(struct memtx_engine *memtx); +/** Tuple format vtab for memtx engine. */ +extern struct tuple_format_vtab memtx_tuple_format_vtab; + /** * Leave tuple delayed free mode. This function undoes the effect * of memtx_enter_delayed_free_mode(). @@ -245,17 +253,6 @@ memtx_enter_delayed_free_mode(struct memtx_engine *memtx); void memtx_leave_delayed_free_mode(struct memtx_engine *memtx); -/** Allocate a memtx tuple. @sa tuple_new(). */ -struct tuple * -memtx_tuple_new(struct tuple_format *format, const char *data, const char *end); - -/** Free a memtx tuple. @sa tuple_delete(). */ -void -memtx_tuple_delete(struct tuple_format *format, struct tuple *tuple); - -/** Tuple format vtab for memtx engine. */ -extern struct tuple_format_vtab memtx_tuple_format_vtab; - enum { MEMTX_EXTENT_SIZE = 16 * 1024, MEMTX_SLAB_SIZE = 4 * 1024 * 1024 @@ -294,18 +291,38 @@ memtx_index_def_change_requires_rebuild(struct index *index, } /* extern "C" */ #include "diag.h" +#include "tuple_format.h" + +/** Allocate a memtx tuple. @sa tuple_new(). */ +template +struct tuple * +memtx_tuple_new(struct tuple_format *format, const char *data, const char *end); + +/** Free a memtx tuple. @sa tuple_delete(). */ +template +void +memtx_tuple_delete(struct tuple_format *format, struct tuple *tuple); + +template +const char * +memtx_tuple_chunk_new(MAYBE_UNUSED struct tuple_format *format, struct tuple *tuple, + const char *data, uint32_t data_sz); + +template +void +metmx_tuple_chunk_delete(MAYBE_UNUSED struct tuple_format *format, const char *data); static inline struct memtx_engine * memtx_engine_new_xc(const char *snap_dirname, bool force_recovery, uint64_t tuple_arena_max_size, uint32_t objsize_min, bool dontdump, - float alloc_factor) + const char *allocator, float alloc_factor) { struct memtx_engine *memtx; memtx = memtx_engine_new(snap_dirname, force_recovery, tuple_arena_max_size, objsize_min, dontdump, - alloc_factor); + allocator, alloc_factor); if (memtx == NULL) diag_raise(); return memtx; diff --git a/src/box/memtx_space.cc b/src/box/memtx_space.cc index e46e4eaeb..932b3af16 100644 --- a/src/box/memtx_space.cc +++ b/src/box/memtx_space.cc @@ -320,6 +320,7 @@ dup_replace_mode(uint32_t op) return op == IPROTO_INSERT ? DUP_INSERT : DUP_REPLACE_OR_INSERT; } +template static int memtx_space_execute_replace(struct space *space, struct txn *txn, struct request *request, struct tuple **result) @@ -327,8 +328,8 @@ memtx_space_execute_replace(struct space *space, struct txn *txn, struct memtx_space *memtx_space = (struct memtx_space *)space; struct txn_stmt *stmt = txn_current_stmt(txn); enum dup_replace_mode mode = dup_replace_mode(request->type); - stmt->new_tuple = memtx_tuple_new(space->format, request->tuple, - request->tuple_end); + stmt->new_tuple = memtx_tuple_new(space->format, + request->tuple, request->tuple_end); if (stmt->new_tuple == NULL) return -1; tuple_ref(stmt->new_tuple); @@ -378,6 +379,7 @@ memtx_space_execute_delete(struct space *space, struct txn *txn, return 0; } +template static int memtx_space_execute_update(struct space *space, struct txn *txn, struct request *request, struct tuple **result) @@ -412,7 +414,7 @@ memtx_space_execute_update(struct space *space, struct txn *txn, if (new_data == NULL) return -1; - stmt->new_tuple = memtx_tuple_new(format, new_data, + stmt->new_tuple = memtx_tuple_new(format, new_data, new_data + new_size); if (stmt->new_tuple == NULL) return -1; @@ -428,6 +430,7 @@ memtx_space_execute_update(struct space *space, struct txn *txn, return 0; } +template static int memtx_space_execute_upsert(struct space *space, struct txn *txn, struct request *request) @@ -483,7 +486,7 @@ memtx_space_execute_upsert(struct space *space, struct txn *txn, format, request->index_base) != 0) { return -1; } - stmt->new_tuple = memtx_tuple_new(format, request->tuple, + stmt->new_tuple = memtx_tuple_new(format, request->tuple, request->tuple_end); if (stmt->new_tuple == NULL) return -1; @@ -507,7 +510,7 @@ memtx_space_execute_upsert(struct space *space, struct txn *txn, if (new_data == NULL) return -1; - stmt->new_tuple = memtx_tuple_new(format, new_data, + stmt->new_tuple = memtx_tuple_new(format, new_data, new_data + new_size); if (stmt->new_tuple == NULL) return -1; @@ -554,19 +557,20 @@ memtx_space_execute_upsert(struct space *space, struct txn *txn, * destroyed space may lead to undefined behaviour. For this reason it * doesn't take txn as an argument. */ +template static int memtx_space_ephemeral_replace(struct space *space, const char *tuple, const char *tuple_end) { struct memtx_space *memtx_space = (struct memtx_space *)space; - struct tuple *new_tuple = memtx_tuple_new(space->format, tuple, + struct tuple *new_tuple = memtx_tuple_new(space->format, tuple, tuple_end); if (new_tuple == NULL) return -1; struct tuple *old_tuple; if (memtx_space->replace(space, NULL, new_tuple, DUP_REPLACE_OR_INSERT, &old_tuple) != 0) { - memtx_tuple_delete(space->format, new_tuple); + memtx_tuple_delete(space->format, new_tuple); return -1; } if (old_tuple != NULL) @@ -1166,28 +1170,31 @@ memtx_space_prepare_alter(struct space *old_space, struct space *new_space) /* }}} DDL */ -static const struct space_vtab memtx_space_vtab = { - /* .destroy = */ memtx_space_destroy, - /* .bsize = */ memtx_space_bsize, - /* .execute_replace = */ memtx_space_execute_replace, - /* .execute_delete = */ memtx_space_execute_delete, - /* .execute_update = */ memtx_space_execute_update, - /* .execute_upsert = */ memtx_space_execute_upsert, - /* .ephemeral_replace = */ memtx_space_ephemeral_replace, - /* .ephemeral_delete = */ memtx_space_ephemeral_delete, - /* .ephemeral_rowid_next = */ memtx_space_ephemeral_rowid_next, - /* .init_system_space = */ memtx_init_system_space, - /* .init_ephemeral_space = */ memtx_init_ephemeral_space, - /* .check_index_def = */ memtx_space_check_index_def, - /* .create_index = */ memtx_space_create_index, - /* .add_primary_key = */ memtx_space_add_primary_key, - /* .drop_primary_key = */ memtx_space_drop_primary_key, - /* .check_format = */ memtx_space_check_format, - /* .build_index = */ memtx_space_build_index, - /* .swap_index = */ generic_space_swap_index, - /* .prepare_alter = */ memtx_space_prepare_alter, - /* .invalidate = */ generic_space_invalidate, +struct SmallAllocator; +#define MEMTX_SPACE_VTAB(Allocator, allocator) \ +static const struct space_vtab memtx_space_vtab_##allocator = { \ + /* .destroy = */ memtx_space_destroy, \ + /* .bsize = */ memtx_space_bsize, \ + /* .execute_replace = */ memtx_space_execute_replace, \ + /* .execute_delete = */ memtx_space_execute_delete, \ + /* .execute_update = */ memtx_space_execute_update, \ + /* .execute_upsert = */ memtx_space_execute_upsert, \ + /* .ephemeral_replace = */ memtx_space_ephemeral_replace, \ + /* .ephemeral_delete = */ memtx_space_ephemeral_delete, \ + /* .ephemeral_rowid_next = */ memtx_space_ephemeral_rowid_next, \ + /* .init_system_space = */ memtx_init_system_space, \ + /* .init_ephemeral_space = */ memtx_init_ephemeral_space, \ + /* .check_index_def = */ memtx_space_check_index_def, \ + /* .create_index = */ memtx_space_create_index, \ + /* .add_primary_key = */ memtx_space_add_primary_key, \ + /* .drop_primary_key = */ memtx_space_drop_primary_key, \ + /* .check_format = */ memtx_space_check_format, \ + /* .build_index = */ memtx_space_build_index, \ + /* .swap_index = */ generic_space_swap_index, \ + /* .prepare_alter = */ memtx_space_prepare_alter, \ + /* .invalidate = */ generic_space_invalidate, \ }; +MEMTX_SPACE_VTAB(SmallAllocator, small) struct space * memtx_space_new(struct memtx_engine *memtx, @@ -1219,8 +1226,18 @@ memtx_space_new(struct memtx_engine *memtx, } tuple_format_ref(format); + const struct space_vtab *vtab; + switch (memtx->allocator_type) { + case MEMTX_SMALL_ALLOCATOR: + vtab = &memtx_space_vtab_small; + break; + default: + tuple_format_unref(format); + free(memtx_space); + return NULL; + } if (space_create((struct space *)memtx_space, (struct engine *)memtx, - &memtx_space_vtab, def, key_list, format) != 0) { + vtab, def, key_list, format) != 0) { tuple_format_unref(format); free(memtx_space); return NULL; diff --git a/src/box/small_allocator.cc b/src/box/small_allocator.cc new file mode 100644 index 000000000..e6b21c355 --- /dev/null +++ b/src/box/small_allocator.cc @@ -0,0 +1,74 @@ +/* + * Copyright 2010-2020, Tarantool AUTHORS, please see AUTHORS file. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the + * following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#include "small_allocator.h" + +void +SmallAllocator::create(struct slab_arena *arena, + uint32_t objsize_min, float alloc_factor, float *actual_alloc_factor) +{ + slab_cache_create(&slab_cache, arena); + small_alloc_create(&small_alloc, &slab_cache, + objsize_min, alloc_factor, actual_alloc_factor); +} + +void +SmallAllocator::destroy(void) +{ + small_alloc_destroy(&small_alloc); + slab_cache_destroy(&slab_cache); +} + +void +SmallAllocator::enter_delayed_free_mode(void) +{ + small_alloc_setopt(&small_alloc, SMALL_DELAYED_FREE_MODE, true); +} + +void +SmallAllocator::leave_delayed_free_mode(void) +{ + small_alloc_setopt(&small_alloc, SMALL_DELAYED_FREE_MODE, false); +} + +void +SmallAllocator::stats(struct small_stats *stats, mempool_stats_cb cb, void *cb_ctx) +{ + small_stats(&small_alloc, stats, cb, cb_ctx); +} + +void +SmallAllocator::memory_check(void) +{ + slab_cache_check(&slab_cache); +} + +struct small_alloc SmallAllocator::small_alloc; +struct slab_cache SmallAllocator::slab_cache; diff --git a/src/box/small_allocator.h b/src/box/small_allocator.h new file mode 100644 index 000000000..f6aa5a069 --- /dev/null +++ b/src/box/small_allocator.h @@ -0,0 +1,58 @@ +#pragma once +/* + * Copyright 2010-2020, Tarantool AUTHORS, please see AUTHORS file. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the + * following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#include + +struct SmallAllocator +{ + static void create(struct slab_arena *arena, + uint32_t objsize_min, float alloc_factor, + float *actual_alloc_factor); + static void destroy(void); + static void enter_delayed_free_mode(void); + static void leave_delayed_free_mode(void); + static void stats(struct small_stats *stats, mempool_stats_cb cb, void *cb_ctx); + static void memory_check(void); + static inline void *alloc(size_t size) { + return smalloc(&small_alloc, size); + }; + static inline void free(void *ptr, size_t size) { + smfree(&small_alloc, ptr, size); + } + static inline void free_delayed(void *ptr, size_t size) { + smfree_delayed(&small_alloc, ptr, size); + } + + /** Tuple allocator. */ + static struct small_alloc small_alloc; + /** Slab cache for allocating tuples. */ + static struct slab_cache slab_cache; +}; diff --git a/test/app-tap/init_script.result b/test/app-tap/init_script.result index 16c5b01d2..cd5218e61 100644 --- a/test/app-tap/init_script.result +++ b/test/app-tap/init_script.result @@ -3,6 +3,7 @@ -- box.cfg +allocator:small background:false checkpoint_count:2 checkpoint_interval:3600 diff --git a/test/box/admin.result b/test/box/admin.result index 05debe673..ecea53957 100644 --- a/test/box/admin.result +++ b/test/box/admin.result @@ -27,7 +27,9 @@ help() ... cfg_filter(box.cfg) --- -- - - background +- - - allocator + - small + - - background - false - - checkpoint_count - 2 diff --git a/test/box/cfg.result b/test/box/cfg.result index 22a720c2c..16b321008 100644 --- a/test/box/cfg.result +++ b/test/box/cfg.result @@ -15,7 +15,9 @@ box.cfg.nosuchoption = 1 | ... cfg_filter(box.cfg) | --- - | - - - background + | - - - allocator + | - small + | - - background | - false | - - checkpoint_count | - 2 @@ -130,7 +132,9 @@ box.cfg() | ... cfg_filter(box.cfg) | --- - | - - - background + | - - - allocator + | - small + | - - background | - false | - - checkpoint_count | - 2 -- 2.20.1