From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Date: Mon, 13 May 2019 13:06:42 +0300 From: Vladimir Davydov Subject: Re: [RFC v2] slab_arena: Enhance slab_arena_create to support madvise hints Message-ID: <20190513100642.h3ye4sphinkcemv4@esperanza> References: <20190506174321.11614-1-gorcunov@gmail.com> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <20190506174321.11614-1-gorcunov@gmail.com> To: Cyrill Gorcunov Cc: tml , Alexander Turenko List-ID: [Cc += Alexander] Alexander, please take a look at how Cyrill handled the backward compatibility problem. Cyrill, please see a few comments inline. On Mon, May 06, 2019 at 08:43:21PM +0300, Cyrill Gorcunov wrote: > The purpose of this extension is to be able to eliminate > some of slab arenas from dumping, especially when huge > amount of memory involves. > > To keep backward compatibility we map flags passed into > new SLAB_ARENA_x flags. To be able to distinguish old > and new interfaces we reserve high bit of the integer > value and remap flags internally to syscall compatible > form. > > Same time I have to modify build script to define > _GNU_SOURCE symbol otherwise madvise bits won't be > found. > > Part-of https://github.com/tarantool/tarantool/issues/3509 > --- > v2: > - Encode madvise hint inside slab_arena_create flags, for backward compatibility > - Enhance test to address dynamic slabs > - Enhance build procedure to test all falgs and functions we're interested in > > Please review very carefully. I still don't modify tarantool code, I'm pretty > sure it should be a separate patch since this one is big enough already and > more importantly it *must not* break existing code thus better testing on > its own. > > .gitignore | 1 + > CMakeLists.txt | 19 +++++++ > small/config.h.cmake | 33 +++++++++++ > small/slab_arena.c | 52 +++++++++++++++--- > small/slab_arena.h | 25 ++++++++- > test/slab_arena.c | 128 +++++++++++++++++++++++++++++++++++++++++++ > 6 files changed, 248 insertions(+), 10 deletions(-) > create mode 100644 small/config.h.cmake > > diff --git a/.gitignore b/.gitignore > index e2339c3..c4e4ade 100644 > --- a/.gitignore > +++ b/.gitignore > @@ -17,3 +17,4 @@ build/ > CTestTestfile.cmake > Testing > CTestTestfile.cmake > +small/config.h > diff --git a/CMakeLists.txt b/CMakeLists.txt > index ad27423..208528e 100644 > --- a/CMakeLists.txt > +++ b/CMakeLists.txt > @@ -1,6 +1,9 @@ > project(small C CXX) > cmake_minimum_required(VERSION 2.8 FATAL_ERROR) > > +include(CheckFunctionExists) > +include(CheckSymbolExists) > + > if(NOT CMAKE_BUILD_TYPE) > set(CMAKE_BUILD_TYPE Debug) > endif() > @@ -15,10 +18,26 @@ endif() > # Enable GNU glibc extentions. > add_definitions("-D_GNU_SOURCE") > > +set(CMAKE_REQUIRED_DEFINITIONS "-D_GNU_SOURCE") > + > +check_symbol_exists(MAP_ANON sys/mman.h TARANTOOL_SMALL_HAVE_MAP_ANON) > +check_symbol_exists(MAP_ANONYMOUS sys/mman.h TARANTOOL_SMALL_HAVE_MAP_ANONYMOUS) > + > +check_function_exists(madvise TARANTOOL_SMALL_HAS_MADVISE) HAS/HAVE - please be consistent in naming. > +check_symbol_exists(MADV_NORMAL sys/mman.h TARANTOOL_SMALL_HAS_MADV_NORMAL) > +check_symbol_exists(MADV_DONTDUMP sys/mman.h TARANTOOL_SMALL_HAS_MADV_DONTDUMP) Do we really need to check all of that? Is a check for MADV_DONTDUMP not enough? > + > +configure_file( > + "small/config.h.cmake" > + "small/config.h" > + ) > +message (STATUS "") > + > # Valgrind > include_directories(third_party) > > set(lib_headers > + small/config.h > small/ibuf.h > small/lf_lifo.h > small/lifo.h > diff --git a/small/config.h.cmake b/small/config.h.cmake > new file mode 100644 > index 0000000..cbb0429 > --- /dev/null > +++ b/small/config.h.cmake > @@ -0,0 +1,33 @@ > +#ifndef TARANTOOL_SMALL_CONFIG_H_INCLUDED > +#define TARANTOOL_SMALL_CONFIG_H_INCLUDED > + > +/* > + * Check for deprecated MAP_ANON. > + */ > +#cmakedefine TARANTOOL_SMALL_HAS_MAP_ANON 1 > +#cmakedefine TARANTOOL_SMALL_HAS_MAP_ANONYMOUS 1 > + > +#if !defined(TARANTOOL_SMALL_HAS_MAP_ANONYMOUS) && defined(TARANTOOL_SMALL_HAS_MAP_ANON) > +/* > + * MAP_ANON is deprecated, MAP_ANONYMOUS should be used instead. > + * Unfortunately, it's not universally present (e.g. not present > + * on FreeBSD. > + */ > +# define MAP_ANONYMOUS MAP_ANON > +#endif > + > +/* > + * Defined if this platform has madvise(..) > + * and flags we're interested in. > + */ > +#cmakedefine TARANTOOL_SMALL_HAS_MADVISE 1 > +#cmakedefine TARANTOOL_SMALL_HAS_MADV_NORMAL 1 > +#cmakedefine TARANTOOL_SMALL_HAS_MADV_DONTDUMP 1 > + > +#if defined(TARANTOOL_SMALL_HAS_MADVISE) && \ > + defined(TARANTOOL_SMALL_HAS_MADV_NORMAL) && \ > + defined(TARANTOOL_SMALL_HAS_MADV_DONTDUMP) > +# define TARANTOOL_SMALL_USE_MADVISE 1 > +#endif > + > +#endif /* TARANTOOL_SMALL_CONFIG_H_INCLUDED */ > diff --git a/small/slab_arena.c b/small/slab_arena.c > index 26e2620..0b6f2f5 100644 > --- a/small/slab_arena.c > +++ b/small/slab_arena.c > @@ -41,17 +41,12 @@ > #include > #include > > -#if !defined(MAP_ANONYMOUS) > -#define MAP_ANONYMOUS MAP_ANON > -#endif > - > static void > munmap_checked(void *addr, size_t size) > { > if (munmap(addr, size)) { > char buf[64]; > - intptr_t ignore_it = (intptr_t)strerror_r(errno, buf, > - sizeof(buf)); > + char *ignore_it = strerror_r(errno, buf, sizeof(buf)); > (void)ignore_it; > fprintf(stderr, "Error in munmap(%p, %zu): %s\n", > addr, size, buf); > @@ -123,11 +118,38 @@ pow2round(size_t size) > #define MAX(a, b) ((a) > (b) ? (a) : (b)) > #define MIN(a, b) ((a) < (b) ? (a) : (b)) > > +static void > +slab_arena_init_flags(struct slab_arena *arena, int slab_flags) > +{ > +#ifdef TARANTOOL_SMALL_USE_MADVISE > + arena->madv_flag = MADV_NORMAL; > +#endif > + > + /* > + * Old interface for backward compatibility, MAP_ > + * flags are passed directly without SLAB_ARENA_FLAG_MARK. > + */ > + if (!(slab_flags & SLAB_ARENA_FLAG_MARK)) { > + assert(slab_flags & (MAP_PRIVATE | MAP_SHARED)); > + arena->flags = slab_flags; > + return; > + } > + > + if (!(slab_flags & SLAB_ARENA_SHARED)) > + arena->flags = MAP_PRIVATE; > + else > + arena->flags = MAP_SHARED; > + > +#ifdef TARANTOOL_SMALL_USE_MADVISE > + if (slab_flags & SLAB_ARENA_DONTDUMP) > + arena->madv_flag = MADV_DONTDUMP; > +#endif May be, better store slab_flags in arena and convert them to mmap/madvise flags when necessary? Would be one variable instead of two. > +} > + > int > slab_arena_create(struct slab_arena *arena, struct quota *quota, > - size_t prealloc, uint32_t slab_size, int flags) > + size_t prealloc, uint32_t slab_size, int slab_flags) > { > - assert(flags & (MAP_PRIVATE | MAP_SHARED)); > lf_lifo_init(&arena->cache); > VALGRIND_MAKE_MEM_DEFINED(&arena->cache, sizeof(struct lf_lifo)); > > @@ -148,7 +170,7 @@ slab_arena_create(struct slab_arena *arena, struct quota *quota, > > arena->used = 0; > > - arena->flags = flags; > + slab_arena_init_flags(arena, slab_flags); > > if (arena->prealloc) { > arena->arena = mmap_checked(arena->prealloc, > @@ -157,6 +179,12 @@ slab_arena_create(struct slab_arena *arena, struct quota *quota, > } else { > arena->arena = NULL; > } > + > +#ifdef TARANTOOL_SMALL_USE_MADVISE > + if (arena->arena && arena->madv_flag != MADV_NORMAL) > + madvise(arena->arena, arena->slab_size, arena->madv_flag); > +#endif > + > return arena->prealloc && !arena->arena ? -1 : 0; > } > > @@ -205,6 +233,12 @@ slab_map(struct slab_arena *arena) > __sync_sub_and_fetch(&arena->used, arena->slab_size); > quota_release(arena->quota, arena->slab_size); > } > + > +#ifdef TARANTOOL_SMALL_USE_MADVISE > + if (ptr && arena->madv_flag != MADV_NORMAL) > + madvise(ptr, arena->slab_size, arena->madv_flag); > +#endif > + I'd move this code to a helper function to avoid code duplication in slab_arena_create/slab_map. > VALGRIND_MAKE_MEM_UNDEFINED(ptr, arena->slab_size); > return ptr; > } > diff --git a/small/slab_arena.h b/small/slab_arena.h > index 364252f..133878b 100644 > --- a/small/slab_arena.h > +++ b/small/slab_arena.h > @@ -33,6 +33,7 @@ > #include "lf_lifo.h" > #include > #include > +#include "config.h" > > #if defined(__cplusplus) > extern "C" { > @@ -45,6 +46,22 @@ enum { > SMALL_UNLIMITED = SIZE_MAX/2 + 1 > }; > > +/** > + * Backward compatible flags to be used with slab_arena_create(). Please elaborate the comment: explain how this backward compatibility hack works, what it's needed for. > + */ > +#define SLAB_ARENA_FLAG_MARK (0x80000000) > +#define SLAB_ARENA_FLAG(x) ((x) | SLAB_ARENA_FLAG_MARK) > + > +enum { > + /* mmap() flags */ > + SLAB_ARENA_PRIVATE = SLAB_ARENA_FLAG(1 << 0), > + SLAB_ARENA_SHARED = SLAB_ARENA_FLAG(1 << 1), > + > + /* madvise() flags */ > + SLAB_ARENA_NORMAL = SLAB_ARENA_FLAG(1 << 2), > + SLAB_ARENA_DONTDUMP = SLAB_ARENA_FLAG(1 << 3) > +}; > + > /** > * slab_arena -- a source of large aligned blocks of memory. > * MT-safe. > @@ -97,12 +114,18 @@ struct slab_arena { > * mmap() flags: MAP_SHARED or MAP_PRIVATE > */ > int flags; > +#ifdef TARANTOOL_SMALL_USE_MADVISE > + /** > + * madvise() flag: MADV_DONTDUMP or MADV_NORMAL. > + */ > + int madv_flag; > +#endif > }; > > /** Initialize an arena. */ > int > slab_arena_create(struct slab_arena *arena, struct quota *quota, > - size_t prealloc, uint32_t slab_size, int flags); > + size_t prealloc, uint32_t slab_size, int slab_flags); > > /** Destroy an arena. */ > void > diff --git a/test/slab_arena.c b/test/slab_arena.c > index d0a78d1..a9ea838 100644 > --- a/test/slab_arena.c > +++ b/test/slab_arena.c > @@ -2,7 +2,9 @@ > #include > #include > #include > +#include > #include > +#include > #include > #include "unit.h" > > @@ -15,6 +17,130 @@ slab_arena_print(struct slab_arena *arena) > arena->used, arena->slab_size); > } > > +#define is_hex_digit(c) \ > + (((c) >= '0' && (c) <= '9') || \ > + ((c) >= 'a' && (c) <= 'f') || \ > + ((c) >= 'A' && (c) <= 'F')) > + > +static bool > +is_vma_range_fmt(char *line, unsigned long *start, unsigned long *end) > +{ > + char *p = line; > + while (*line && is_hex_digit(*line)) > + line++; > + > + if (*line++ != '-') > + return false; > + > + while (*line && is_hex_digit(*line)) > + line++; > + > + if (*line++ != ' ') > + return false; > + > + sscanf(p, "%lx-%lx", start, end); > + return true; > +} > + > +static bool > +vma_has_flag(unsigned long vm_start, const char *flag) > +{ > + unsigned long start = 0, end = 0; > + char buf[1024], *tok = NULL; > + bool found = false; > + FILE *f = NULL; > + > + f = fopen("/proc/self/smaps", "r"); > + if (!f) { > + printf("ERROR: Can't open smaps for %lx\n", vm_start); > + return false; > + } > + > + while (fgets(buf, sizeof(buf), f)) { > + if (is_vma_range_fmt(buf, &start, &end)) > + continue; > + if (strncmp(buf, "VmFlags: ", 9) || start != vm_start) > + continue; > + tok = buf; > + break; > + } > + > + if (tok) { > + for (tok = strtok(tok, " \n"); tok; > + tok = strtok(NULL, " \n")) { > + if (strcmp(tok, flag)) > + continue; > + found = true; > + break; > + } > + } > + > + fclose(f); > + return found; > +} > + > +static void > +slab_test_madvise(void) > +{ > + struct slab_arena arena; > + struct quota quota; > + void *ptr; > + > + /* > + * The system doesn't support flags fetching. > + */ > + if (access("/proc/self/smaps", F_OK)) > + return; > + > + /* > + * If system supports madvise call, test that > + * preallocated area has been madvised. > + */ > + quota_init("a, 2000000); > + slab_arena_create(&arena, "a, 3000000, 1, SLAB_ARENA_DONTDUMP); > + > + /* > + * Will fetch from preallocated area. > + */ > + ptr = slab_map(&arena); > + if (!ptr) { > + printf("ERROR: can't obtain preallocated slab\n"); > + goto out; > + } > + > + if (!vma_has_flag((unsigned long)ptr, "dd")) > + goto no_dd; > + > + slab_unmap(&arena, ptr); > + slab_arena_destroy(&arena); > + > + /* > + * A new slab for dynamic allocation. > + */ > + quota_init("a, 2000000); > + slab_arena_create(&arena, "a, 0, 0x10000, SLAB_ARENA_DONTDUMP); > + > + /* > + * Will fetch newly allocated area. > + */ > + ptr = slab_map(&arena); > + if (!ptr) { > + printf("ERROR: can't obtain dynamic slab\n"); > + goto out; > + } > + > + if (!vma_has_flag((unsigned long)ptr, "dd")) > + goto no_dd; > + > +out: > + slab_unmap(&arena, ptr); > + slab_arena_destroy(&arena); > + return; > +no_dd: > + printf("ERROR: Expected dd flag on VMA address %p\n", ptr); > + goto out; > +} > + > int main() > { > struct quota quota; > @@ -42,4 +168,6 @@ int main() > slab_arena_create(&arena, "a, 3000000, 1, MAP_PRIVATE); > slab_arena_print(&arena); > slab_arena_destroy(&arena); > + > + slab_test_madvise(); > }