From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Return-Path: Date: Thu, 16 May 2019 00:49:11 +0300 From: Cyrill Gorcunov Subject: [PATCH v4] slab_arena: Enhance slab_arena_create to support madvise hints Message-ID: <20190515214911.GA11494@uranus> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline To: Vladimir Davydov Cc: tml , Alexander Turenko List-ID: The purpose of this extension is to be able to eliminate some of slab arenas from dumping, especially when huge amount of memory involves. To keep backward compatibility we map flags passed into new SLAB_ARENA_x flags. To be able to distinguish old and new interfaces we reserve high bit of the integer value and remap flags internally to syscall compatible form. Same time I have to modify build script to define _GNU_SOURCE symbol otherwise madvise bits won't be found. Another aspect of the patch is simple feature engine: we will need to test if dontdump is supported runtime, for this sake one can call small_test_feature(SMALL_FEATURE_DONTDUMP); to test if we can exclude particular slab arena from dumping. Part-of https://github.com/tarantool/tarantool/issues/3509 --- Take a look please, if such feature testing engine is fine for you? v2: - Encode madvise hint inside slab_arena_create flags, for backward compatibility - Enhance test to address dynamic slabs - Enhance build procedure to test all falgs and functions we're interested in v3: - Keep SLAB_ARENA_ flags inside struct slab_arena - Use HAVE keyword in cmake - Move madvise into madvise_checked helper - Extend comment for SLAB_ARENA_ flags v4: - Implement small_test_feature helper to test supported features runtime .gitignore | 1 + CMakeLists.txt | 20 +++++++ small/config.h.cmake | 31 ++++++++++ small/features.c | 134 +++++++++++++++++++++++++++++++++++++++++++ small/features.h | 60 +++++++++++++++++++ small/slab_arena.c | 66 ++++++++++++++++++--- small/slab_arena.h | 23 +++++++- test/slab_arena.c | 128 +++++++++++++++++++++++++++++++++++++++++ 8 files changed, 454 insertions(+), 9 deletions(-) create mode 100644 small/config.h.cmake create mode 100644 small/features.c create mode 100644 small/features.h diff --git a/.gitignore b/.gitignore index e2339c3..c4e4ade 100644 --- a/.gitignore +++ b/.gitignore @@ -17,3 +17,4 @@ build/ CTestTestfile.cmake Testing CTestTestfile.cmake +small/config.h diff --git a/CMakeLists.txt b/CMakeLists.txt index ad27423..5c35b98 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,9 @@ project(small C CXX) cmake_minimum_required(VERSION 2.8 FATAL_ERROR) +include(CheckFunctionExists) +include(CheckSymbolExists) + if(NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE Debug) endif() @@ -15,10 +18,26 @@ endif() # Enable GNU glibc extentions. add_definitions("-D_GNU_SOURCE") +set(CMAKE_REQUIRED_DEFINITIONS "-D_GNU_SOURCE") + +check_symbol_exists(MAP_ANON sys/mman.h TARANTOOL_SMALL_HAVE_MAP_ANON) +check_symbol_exists(MAP_ANONYMOUS sys/mman.h TARANTOOL_SMALL_HAVE_MAP_ANONYMOUS) + +check_function_exists(madvise TARANTOOL_SMALL_HAVE_MADVISE) +check_symbol_exists(MADV_DONTDUMP sys/mman.h TARANTOOL_SMALL_HAVE_MADV_DONTDUMP) + +configure_file( + "small/config.h.cmake" + "small/config.h" + ) +message (STATUS "") + # Valgrind include_directories(third_party) set(lib_headers + small/config.h + small/features.h small/ibuf.h small/lf_lifo.h small/lifo.h @@ -35,6 +54,7 @@ set(lib_headers small/lsregion.h) set(lib_sources + small/features.c small/slab_cache.c small/region.c small/mempool.c diff --git a/small/config.h.cmake b/small/config.h.cmake new file mode 100644 index 0000000..989ffd1 --- /dev/null +++ b/small/config.h.cmake @@ -0,0 +1,31 @@ +#ifndef TARANTOOL_SMALL_CONFIG_H_INCLUDED +#define TARANTOOL_SMALL_CONFIG_H_INCLUDED + +/* + * Check for deprecated MAP_ANON. + */ +#cmakedefine TARANTOOL_SMALL_HAVE_MAP_ANON 1 +#cmakedefine TARANTOOL_SMALL_HAVE_MAP_ANONYMOUS 1 + +#if !defined(TARANTOOL_SMALL_HAVE_MAP_ANONYMOUS) && defined(TARANTOOL_SMALL_HAVE_MAP_ANON) +/* + * MAP_ANON is deprecated, MAP_ANONYMOUS should be used instead. + * Unfortunately, it's not universally present (e.g. not present + * on FreeBSD. + */ +# define MAP_ANONYMOUS MAP_ANON +#endif + +/* + * Defined if this platform has madvise(..) + * and flags we're interested in. + */ +#cmakedefine TARANTOOL_SMALL_HAVE_MADVISE 1 +#cmakedefine TARANTOOL_SMALL_HAVE_MADV_DONTDUMP 1 + +#if defined(TARANTOOL_SMALL_HAVE_MADVISE) && \ + defined(TARANTOOL_SMALL_HAVE_MADV_DONTDUMP) +# define TARANTOOL_SMALL_USE_MADVISE 1 +#endif + +#endif /* TARANTOOL_SMALL_CONFIG_H_INCLUDED */ diff --git a/small/features.c b/small/features.c new file mode 100644 index 0000000..ad17d5c --- /dev/null +++ b/small/features.c @@ -0,0 +1,134 @@ +/* + * Copyright 2010-2019, Tarantool AUTHORS, please see AUTHORS file. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the + * following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "features.h" +#include "config.h" + +typedef bool (*rt_helper_t)(void); + +#define SMALL_FEATURE_MASK(v) ((uint64_t)1 << (v)) + +/* The mask carries bits for compiled in features */ +static uint64_t builtin_mask = +#ifdef TARANTOOL_SMALL_USE_MADVISE + SMALL_FEATURE_MASK(SMALL_FEATURE_DONTDUMP) | +#endif + 0; + +#ifdef TARANTOOL_SMALL_USE_MADVISE +static bool +test_dontdump(void) +{ + size_t size = sysconf(_SC_PAGESIZE); + intptr_t ignore_it; + bool ret = false; + char buf[64]; + void *ptr; + + (void)ignore_it; + + /* + * We need to try madvise a real data, + * plain madvise() with -ENOSYS is not + * enough: we need a specific flag to + * be implemented. Thus allocate page + * and work on it. + */ + + ptr = mmap(NULL, size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + if (ptr == MAP_FAILED) { + /* + * We're out of memory, and cant guarantee anything. + */ + ignore_it = (intptr_t)strerror_r(errno, buf, sizeof(buf)); + fprintf(stderr, "Error in mmap(NULL, %zu, ...): %s\n", size, buf); + goto out; + } + + if (madvise(ptr, size, MADV_DONTDUMP) == 0) + ret = true; + + if (munmap(ptr, size)) { + ignore_it = (intptr_t)strerror_r(errno, buf, sizeof(buf)); + fprintf(stderr, "Error in munmap(%p, %zu): %s\n", + ptr, size, buf); + } +out: + return ret; +} +#else +static bool test_dontdump(void) { return false; } +#endif + +/* + * Runtime testers, put there features if they are dynamic. + */ +static rt_helper_t rt_helpers[FEATURE_MAX] = { + [SMALL_FEATURE_DONTDUMP] = test_dontdump, +}; + +/** + * small_test_feature -- test if particular feature is supported + * @feature: A feature to test. + * + * Returns true if feature is available, false othrewise. + */ +bool +small_test_feature(unsigned int feature) +{ + uint64_t mask = SMALL_FEATURE_MASK(feature); + + if (feature >= FEATURE_MAX) + return false; + + /* Make sure it is compiled in. */ + if (!(builtin_mask & mask)) + return false; + + /* + * Some feature may require runtime + * testing (say we're compiled with + * particular feature support but + * it is unavailable on the system + * we're running on, or get filtered + * out). + */ + return rt_helpers[feature] ? rt_helpers[feature]() : true; +} diff --git a/small/features.h b/small/features.h new file mode 100644 index 0000000..0a994cc --- /dev/null +++ b/small/features.h @@ -0,0 +1,60 @@ +#ifndef TARANTOOL_SMALL_FEATURES_H_INCLUDED +#define TARANTOOL_SMALL_FEATURES_H_INCLUDED +/* + * Copyright 2010-2019, Tarantool AUTHORS, please see AUTHORS file. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the + * following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include + +#if defined(__cplusplus) +extern "C" { +#endif /* defined(__cplusplus) */ + +/** + * A list of features to test. + */ +enum { + /* To check if SLAB_ARENA_DONTDUMP is supported */ + SMALL_FEATURE_DONTDUMP = 0, + + FEATURE_MAX +}; + +/** + * Test if a feature is supported. + */ +bool +small_test_feature(unsigned int feature); + +#if defined(__cplusplus) +} /* extern "C" */ +#endif + +#endif /* TARANTOOL_SMALL_FEATURES_H_INCLUDED */ diff --git a/small/slab_arena.c b/small/slab_arena.c index 26e2620..3a53ce4 100644 --- a/small/slab_arena.c +++ b/small/slab_arena.c @@ -41,9 +41,29 @@ #include #include -#if !defined(MAP_ANONYMOUS) -#define MAP_ANONYMOUS MAP_ANON +static void +madvise_checked(void *ptr, size_t size, int flags) +{ +#ifdef TARANTOOL_SMALL_USE_MADVISE + if (!ptr || !(flags & SLAB_ARENA_DONTDUMP)) + return; + + if (madvise(ptr, size, MADV_DONTDUMP)) { + intptr_t ignore_it; + char buf[64]; + + ignore_it = (intptr_t)strerror_r(errno, buf, sizeof(buf)); + (void)ignore_it; + + fprintf(stderr, "Error in madvise(%p, %zu, 0x%x): %s\n", + ptr, size, MADV_DONTDUMP, buf); + } +#else + (void)ptr; + (void)size; + (void)flags; #endif +} static void munmap_checked(void *addr, size_t size) @@ -66,13 +86,18 @@ mmap_checked(size_t size, size_t align, int flags) assert((align & (align - 1)) == 0); /* The size must be a multiple of alignment */ assert((size & (align - 1)) == 0); + + if (flags & SLAB_ARENA_PRIVATE) + flags = MAP_PRIVATE | MAP_ANONYMOUS; + else + flags = MAP_SHARED | MAP_ANONYMOUS; + /* * All mappings except the first are likely to * be aligned already. Be optimistic by trying * to map exactly the requested amount. */ - void *map = mmap(NULL, size, PROT_READ | PROT_WRITE, - flags | MAP_ANONYMOUS, -1, 0); + void *map = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, -1, 0); if (map == MAP_FAILED) return NULL; if (((intptr_t) map & (align - 1)) == 0) @@ -85,8 +110,7 @@ mmap_checked(size_t size, size_t align, int flags) * fragmentation depending on the kernels allocation * strategy. */ - map = mmap(NULL, size + align, PROT_READ | PROT_WRITE, - flags | MAP_ANONYMOUS, -1, 0); + map = mmap(NULL, size + align, PROT_READ | PROT_WRITE, flags, -1, 0); if (map == MAP_FAILED) return NULL; @@ -123,11 +147,31 @@ pow2round(size_t size) #define MAX(a, b) ((a) > (b) ? (a) : (b)) #define MIN(a, b) ((a) < (b) ? (a) : (b)) +static void +slab_arena_init_flags(struct slab_arena *arena, int flags) +{ + /* + * Old interface for backward compatibility, MAP_ + * flags are passed directly without SLAB_ARENA_FLAG_MARK, + * map them to internal ones. + */ + if (!(flags & SLAB_ARENA_FLAG_MARK)) { + assert(flags & (MAP_PRIVATE | MAP_SHARED)); + if (flags == MAP_PRIVATE) + arena->flags = SLAB_ARENA_PRIVATE; + else + arena->flags = SLAB_ARENA_SHARED; + return; + } + + assert(flags & (SLAB_ARENA_PRIVATE | SLAB_ARENA_SHARED)); + arena->flags = flags; +} + int slab_arena_create(struct slab_arena *arena, struct quota *quota, size_t prealloc, uint32_t slab_size, int flags) { - assert(flags & (MAP_PRIVATE | MAP_SHARED)); lf_lifo_init(&arena->cache); VALGRIND_MAKE_MEM_DEFINED(&arena->cache, sizeof(struct lf_lifo)); @@ -148,7 +192,7 @@ slab_arena_create(struct slab_arena *arena, struct quota *quota, arena->used = 0; - arena->flags = flags; + slab_arena_init_flags(arena, flags); if (arena->prealloc) { arena->arena = mmap_checked(arena->prealloc, @@ -157,6 +201,9 @@ slab_arena_create(struct slab_arena *arena, struct quota *quota, } else { arena->arena = NULL; } + + madvise_checked(arena->arena, arena->slab_size, arena->flags); + return arena->prealloc && !arena->arena ? -1 : 0; } @@ -205,6 +252,9 @@ slab_map(struct slab_arena *arena) __sync_sub_and_fetch(&arena->used, arena->slab_size); quota_release(arena->quota, arena->slab_size); } + + madvise_checked(ptr, arena->slab_size, arena->flags); + VALGRIND_MAKE_MEM_UNDEFINED(ptr, arena->slab_size); return ptr; } diff --git a/small/slab_arena.h b/small/slab_arena.h index 364252f..2545b00 100644 --- a/small/slab_arena.h +++ b/small/slab_arena.h @@ -33,6 +33,7 @@ #include "lf_lifo.h" #include #include +#include "config.h" #if defined(__cplusplus) extern "C" { @@ -45,6 +46,26 @@ enum { SMALL_UNLIMITED = SIZE_MAX/2 + 1 }; +/** + * Backward compatible flags to be used with slab_arena_create(). + * Initially we have been passing MAP_SHARED or MAP_PRIVATE flags + * only, thus to continue supporting them we need to sign new API + * with some predefined value. For this sake we reserve high bit + * as a mark which allows us to distinguish system independent + * SLAB_ARENA_ flags from anything else. + */ +#define SLAB_ARENA_FLAG_MARK (0x80000000) +#define SLAB_ARENA_FLAG(x) ((x) | SLAB_ARENA_FLAG_MARK) + +enum { + /* mmap() flags */ + SLAB_ARENA_PRIVATE = SLAB_ARENA_FLAG(1 << 0), + SLAB_ARENA_SHARED = SLAB_ARENA_FLAG(1 << 1), + + /* madvise() flags */ + SLAB_ARENA_DONTDUMP = SLAB_ARENA_FLAG(1 << 2) +}; + /** * slab_arena -- a source of large aligned blocks of memory. * MT-safe. @@ -94,7 +115,7 @@ struct slab_arena { */ uint32_t slab_size; /** - * mmap() flags: MAP_SHARED or MAP_PRIVATE + * SLAB_ARENA_ flags for mmap() and madvise() calls. */ int flags; }; diff --git a/test/slab_arena.c b/test/slab_arena.c index d0a78d1..a9ea838 100644 --- a/test/slab_arena.c +++ b/test/slab_arena.c @@ -2,7 +2,9 @@ #include #include #include +#include #include +#include #include #include "unit.h" @@ -15,6 +17,130 @@ slab_arena_print(struct slab_arena *arena) arena->used, arena->slab_size); } +#define is_hex_digit(c) \ + (((c) >= '0' && (c) <= '9') || \ + ((c) >= 'a' && (c) <= 'f') || \ + ((c) >= 'A' && (c) <= 'F')) + +static bool +is_vma_range_fmt(char *line, unsigned long *start, unsigned long *end) +{ + char *p = line; + while (*line && is_hex_digit(*line)) + line++; + + if (*line++ != '-') + return false; + + while (*line && is_hex_digit(*line)) + line++; + + if (*line++ != ' ') + return false; + + sscanf(p, "%lx-%lx", start, end); + return true; +} + +static bool +vma_has_flag(unsigned long vm_start, const char *flag) +{ + unsigned long start = 0, end = 0; + char buf[1024], *tok = NULL; + bool found = false; + FILE *f = NULL; + + f = fopen("/proc/self/smaps", "r"); + if (!f) { + printf("ERROR: Can't open smaps for %lx\n", vm_start); + return false; + } + + while (fgets(buf, sizeof(buf), f)) { + if (is_vma_range_fmt(buf, &start, &end)) + continue; + if (strncmp(buf, "VmFlags: ", 9) || start != vm_start) + continue; + tok = buf; + break; + } + + if (tok) { + for (tok = strtok(tok, " \n"); tok; + tok = strtok(NULL, " \n")) { + if (strcmp(tok, flag)) + continue; + found = true; + break; + } + } + + fclose(f); + return found; +} + +static void +slab_test_madvise(void) +{ + struct slab_arena arena; + struct quota quota; + void *ptr; + + /* + * The system doesn't support flags fetching. + */ + if (access("/proc/self/smaps", F_OK)) + return; + + /* + * If system supports madvise call, test that + * preallocated area has been madvised. + */ + quota_init("a, 2000000); + slab_arena_create(&arena, "a, 3000000, 1, SLAB_ARENA_DONTDUMP); + + /* + * Will fetch from preallocated area. + */ + ptr = slab_map(&arena); + if (!ptr) { + printf("ERROR: can't obtain preallocated slab\n"); + goto out; + } + + if (!vma_has_flag((unsigned long)ptr, "dd")) + goto no_dd; + + slab_unmap(&arena, ptr); + slab_arena_destroy(&arena); + + /* + * A new slab for dynamic allocation. + */ + quota_init("a, 2000000); + slab_arena_create(&arena, "a, 0, 0x10000, SLAB_ARENA_DONTDUMP); + + /* + * Will fetch newly allocated area. + */ + ptr = slab_map(&arena); + if (!ptr) { + printf("ERROR: can't obtain dynamic slab\n"); + goto out; + } + + if (!vma_has_flag((unsigned long)ptr, "dd")) + goto no_dd; + +out: + slab_unmap(&arena, ptr); + slab_arena_destroy(&arena); + return; +no_dd: + printf("ERROR: Expected dd flag on VMA address %p\n", ptr); + goto out; +} + int main() { struct quota quota; @@ -42,4 +168,6 @@ int main() slab_arena_create(&arena, "a, 3000000, 1, MAP_PRIVATE); slab_arena_print(&arena); slab_arena_destroy(&arena); + + slab_test_madvise(); } -- 2.20.1