[RFC v2] slab_arena: Enhance slab_arena_create to support madvise hints
Vladimir Davydov
vdavydov.dev at gmail.com
Mon May 13 13:06:42 MSK 2019
[Cc += Alexander]
Alexander, please take a look at how Cyrill handled the backward
compatibility problem.
Cyrill, please see a few comments inline.
On Mon, May 06, 2019 at 08:43:21PM +0300, Cyrill Gorcunov wrote:
> The purpose of this extension is to be able to eliminate
> some of slab arenas from dumping, especially when huge
> amount of memory involves.
>
> To keep backward compatibility we map flags passed into
> new SLAB_ARENA_x flags. To be able to distinguish old
> and new interfaces we reserve high bit of the integer
> value and remap flags internally to syscall compatible
> form.
>
> Same time I have to modify build script to define
> _GNU_SOURCE symbol otherwise madvise bits won't be
> found.
>
> Part-of https://github.com/tarantool/tarantool/issues/3509
> ---
> v2:
> - Encode madvise hint inside slab_arena_create flags, for backward compatibility
> - Enhance test to address dynamic slabs
> - Enhance build procedure to test all falgs and functions we're interested in
>
> Please review very carefully. I still don't modify tarantool code, I'm pretty
> sure it should be a separate patch since this one is big enough already and
> more importantly it *must not* break existing code thus better testing on
> its own.
>
> .gitignore | 1 +
> CMakeLists.txt | 19 +++++++
> small/config.h.cmake | 33 +++++++++++
> small/slab_arena.c | 52 +++++++++++++++---
> small/slab_arena.h | 25 ++++++++-
> test/slab_arena.c | 128 +++++++++++++++++++++++++++++++++++++++++++
> 6 files changed, 248 insertions(+), 10 deletions(-)
> create mode 100644 small/config.h.cmake
>
> diff --git a/.gitignore b/.gitignore
> index e2339c3..c4e4ade 100644
> --- a/.gitignore
> +++ b/.gitignore
> @@ -17,3 +17,4 @@ build/
> CTestTestfile.cmake
> Testing
> CTestTestfile.cmake
> +small/config.h
> diff --git a/CMakeLists.txt b/CMakeLists.txt
> index ad27423..208528e 100644
> --- a/CMakeLists.txt
> +++ b/CMakeLists.txt
> @@ -1,6 +1,9 @@
> project(small C CXX)
> cmake_minimum_required(VERSION 2.8 FATAL_ERROR)
>
> +include(CheckFunctionExists)
> +include(CheckSymbolExists)
> +
> if(NOT CMAKE_BUILD_TYPE)
> set(CMAKE_BUILD_TYPE Debug)
> endif()
> @@ -15,10 +18,26 @@ endif()
> # Enable GNU glibc extentions.
> add_definitions("-D_GNU_SOURCE")
>
> +set(CMAKE_REQUIRED_DEFINITIONS "-D_GNU_SOURCE")
> +
> +check_symbol_exists(MAP_ANON sys/mman.h TARANTOOL_SMALL_HAVE_MAP_ANON)
> +check_symbol_exists(MAP_ANONYMOUS sys/mman.h TARANTOOL_SMALL_HAVE_MAP_ANONYMOUS)
> +
> +check_function_exists(madvise TARANTOOL_SMALL_HAS_MADVISE)
HAS/HAVE - please be consistent in naming.
> +check_symbol_exists(MADV_NORMAL sys/mman.h TARANTOOL_SMALL_HAS_MADV_NORMAL)
> +check_symbol_exists(MADV_DONTDUMP sys/mman.h TARANTOOL_SMALL_HAS_MADV_DONTDUMP)
Do we really need to check all of that? Is a check for MADV_DONTDUMP not
enough?
> +
> +configure_file(
> + "small/config.h.cmake"
> + "small/config.h"
> + )
> +message (STATUS "")
> +
> # Valgrind
> include_directories(third_party)
>
> set(lib_headers
> + small/config.h
> small/ibuf.h
> small/lf_lifo.h
> small/lifo.h
> diff --git a/small/config.h.cmake b/small/config.h.cmake
> new file mode 100644
> index 0000000..cbb0429
> --- /dev/null
> +++ b/small/config.h.cmake
> @@ -0,0 +1,33 @@
> +#ifndef TARANTOOL_SMALL_CONFIG_H_INCLUDED
> +#define TARANTOOL_SMALL_CONFIG_H_INCLUDED
> +
> +/*
> + * Check for deprecated MAP_ANON.
> + */
> +#cmakedefine TARANTOOL_SMALL_HAS_MAP_ANON 1
> +#cmakedefine TARANTOOL_SMALL_HAS_MAP_ANONYMOUS 1
> +
> +#if !defined(TARANTOOL_SMALL_HAS_MAP_ANONYMOUS) && defined(TARANTOOL_SMALL_HAS_MAP_ANON)
> +/*
> + * MAP_ANON is deprecated, MAP_ANONYMOUS should be used instead.
> + * Unfortunately, it's not universally present (e.g. not present
> + * on FreeBSD.
> + */
> +# define MAP_ANONYMOUS MAP_ANON
> +#endif
> +
> +/*
> + * Defined if this platform has madvise(..)
> + * and flags we're interested in.
> + */
> +#cmakedefine TARANTOOL_SMALL_HAS_MADVISE 1
> +#cmakedefine TARANTOOL_SMALL_HAS_MADV_NORMAL 1
> +#cmakedefine TARANTOOL_SMALL_HAS_MADV_DONTDUMP 1
> +
> +#if defined(TARANTOOL_SMALL_HAS_MADVISE) && \
> + defined(TARANTOOL_SMALL_HAS_MADV_NORMAL) && \
> + defined(TARANTOOL_SMALL_HAS_MADV_DONTDUMP)
> +# define TARANTOOL_SMALL_USE_MADVISE 1
> +#endif
> +
> +#endif /* TARANTOOL_SMALL_CONFIG_H_INCLUDED */
> diff --git a/small/slab_arena.c b/small/slab_arena.c
> index 26e2620..0b6f2f5 100644
> --- a/small/slab_arena.c
> +++ b/small/slab_arena.c
> @@ -41,17 +41,12 @@
> #include <valgrind/valgrind.h>
> #include <valgrind/memcheck.h>
>
> -#if !defined(MAP_ANONYMOUS)
> -#define MAP_ANONYMOUS MAP_ANON
> -#endif
> -
> static void
> munmap_checked(void *addr, size_t size)
> {
> if (munmap(addr, size)) {
> char buf[64];
> - intptr_t ignore_it = (intptr_t)strerror_r(errno, buf,
> - sizeof(buf));
> + char *ignore_it = strerror_r(errno, buf, sizeof(buf));
> (void)ignore_it;
> fprintf(stderr, "Error in munmap(%p, %zu): %s\n",
> addr, size, buf);
> @@ -123,11 +118,38 @@ pow2round(size_t size)
> #define MAX(a, b) ((a) > (b) ? (a) : (b))
> #define MIN(a, b) ((a) < (b) ? (a) : (b))
>
> +static void
> +slab_arena_init_flags(struct slab_arena *arena, int slab_flags)
> +{
> +#ifdef TARANTOOL_SMALL_USE_MADVISE
> + arena->madv_flag = MADV_NORMAL;
> +#endif
> +
> + /*
> + * Old interface for backward compatibility, MAP_
> + * flags are passed directly without SLAB_ARENA_FLAG_MARK.
> + */
> + if (!(slab_flags & SLAB_ARENA_FLAG_MARK)) {
> + assert(slab_flags & (MAP_PRIVATE | MAP_SHARED));
> + arena->flags = slab_flags;
> + return;
> + }
> +
> + if (!(slab_flags & SLAB_ARENA_SHARED))
> + arena->flags = MAP_PRIVATE;
> + else
> + arena->flags = MAP_SHARED;
> +
> +#ifdef TARANTOOL_SMALL_USE_MADVISE
> + if (slab_flags & SLAB_ARENA_DONTDUMP)
> + arena->madv_flag = MADV_DONTDUMP;
> +#endif
May be, better store slab_flags in arena and convert them to
mmap/madvise flags when necessary? Would be one variable instead
of two.
> +}
> +
> int
> slab_arena_create(struct slab_arena *arena, struct quota *quota,
> - size_t prealloc, uint32_t slab_size, int flags)
> + size_t prealloc, uint32_t slab_size, int slab_flags)
> {
> - assert(flags & (MAP_PRIVATE | MAP_SHARED));
> lf_lifo_init(&arena->cache);
> VALGRIND_MAKE_MEM_DEFINED(&arena->cache, sizeof(struct lf_lifo));
>
> @@ -148,7 +170,7 @@ slab_arena_create(struct slab_arena *arena, struct quota *quota,
>
> arena->used = 0;
>
> - arena->flags = flags;
> + slab_arena_init_flags(arena, slab_flags);
>
> if (arena->prealloc) {
> arena->arena = mmap_checked(arena->prealloc,
> @@ -157,6 +179,12 @@ slab_arena_create(struct slab_arena *arena, struct quota *quota,
> } else {
> arena->arena = NULL;
> }
> +
> +#ifdef TARANTOOL_SMALL_USE_MADVISE
> + if (arena->arena && arena->madv_flag != MADV_NORMAL)
> + madvise(arena->arena, arena->slab_size, arena->madv_flag);
> +#endif
> +
> return arena->prealloc && !arena->arena ? -1 : 0;
> }
>
> @@ -205,6 +233,12 @@ slab_map(struct slab_arena *arena)
> __sync_sub_and_fetch(&arena->used, arena->slab_size);
> quota_release(arena->quota, arena->slab_size);
> }
> +
> +#ifdef TARANTOOL_SMALL_USE_MADVISE
> + if (ptr && arena->madv_flag != MADV_NORMAL)
> + madvise(ptr, arena->slab_size, arena->madv_flag);
> +#endif
> +
I'd move this code to a helper function to avoid code duplication in
slab_arena_create/slab_map.
> VALGRIND_MAKE_MEM_UNDEFINED(ptr, arena->slab_size);
> return ptr;
> }
> diff --git a/small/slab_arena.h b/small/slab_arena.h
> index 364252f..133878b 100644
> --- a/small/slab_arena.h
> +++ b/small/slab_arena.h
> @@ -33,6 +33,7 @@
> #include "lf_lifo.h"
> #include <sys/mman.h>
> #include <limits.h>
> +#include "config.h"
>
> #if defined(__cplusplus)
> extern "C" {
> @@ -45,6 +46,22 @@ enum {
> SMALL_UNLIMITED = SIZE_MAX/2 + 1
> };
>
> +/**
> + * Backward compatible flags to be used with slab_arena_create().
Please elaborate the comment: explain how this backward compatibility
hack works, what it's needed for.
> + */
> +#define SLAB_ARENA_FLAG_MARK (0x80000000)
> +#define SLAB_ARENA_FLAG(x) ((x) | SLAB_ARENA_FLAG_MARK)
> +
> +enum {
> + /* mmap() flags */
> + SLAB_ARENA_PRIVATE = SLAB_ARENA_FLAG(1 << 0),
> + SLAB_ARENA_SHARED = SLAB_ARENA_FLAG(1 << 1),
> +
> + /* madvise() flags */
> + SLAB_ARENA_NORMAL = SLAB_ARENA_FLAG(1 << 2),
> + SLAB_ARENA_DONTDUMP = SLAB_ARENA_FLAG(1 << 3)
> +};
> +
> /**
> * slab_arena -- a source of large aligned blocks of memory.
> * MT-safe.
> @@ -97,12 +114,18 @@ struct slab_arena {
> * mmap() flags: MAP_SHARED or MAP_PRIVATE
> */
> int flags;
> +#ifdef TARANTOOL_SMALL_USE_MADVISE
> + /**
> + * madvise() flag: MADV_DONTDUMP or MADV_NORMAL.
> + */
> + int madv_flag;
> +#endif
> };
>
> /** Initialize an arena. */
> int
> slab_arena_create(struct slab_arena *arena, struct quota *quota,
> - size_t prealloc, uint32_t slab_size, int flags);
> + size_t prealloc, uint32_t slab_size, int slab_flags);
>
> /** Destroy an arena. */
> void
> diff --git a/test/slab_arena.c b/test/slab_arena.c
> index d0a78d1..a9ea838 100644
> --- a/test/slab_arena.c
> +++ b/test/slab_arena.c
> @@ -2,7 +2,9 @@
> #include <small/quota.h>
> #include <stdio.h>
> #include <limits.h>
> +#include <unistd.h>
> #include <stdlib.h>
> +#include <string.h>
> #include <time.h>
> #include "unit.h"
>
> @@ -15,6 +17,130 @@ slab_arena_print(struct slab_arena *arena)
> arena->used, arena->slab_size);
> }
>
> +#define is_hex_digit(c) \
> + (((c) >= '0' && (c) <= '9') || \
> + ((c) >= 'a' && (c) <= 'f') || \
> + ((c) >= 'A' && (c) <= 'F'))
> +
> +static bool
> +is_vma_range_fmt(char *line, unsigned long *start, unsigned long *end)
> +{
> + char *p = line;
> + while (*line && is_hex_digit(*line))
> + line++;
> +
> + if (*line++ != '-')
> + return false;
> +
> + while (*line && is_hex_digit(*line))
> + line++;
> +
> + if (*line++ != ' ')
> + return false;
> +
> + sscanf(p, "%lx-%lx", start, end);
> + return true;
> +}
> +
> +static bool
> +vma_has_flag(unsigned long vm_start, const char *flag)
> +{
> + unsigned long start = 0, end = 0;
> + char buf[1024], *tok = NULL;
> + bool found = false;
> + FILE *f = NULL;
> +
> + f = fopen("/proc/self/smaps", "r");
> + if (!f) {
> + printf("ERROR: Can't open smaps for %lx\n", vm_start);
> + return false;
> + }
> +
> + while (fgets(buf, sizeof(buf), f)) {
> + if (is_vma_range_fmt(buf, &start, &end))
> + continue;
> + if (strncmp(buf, "VmFlags: ", 9) || start != vm_start)
> + continue;
> + tok = buf;
> + break;
> + }
> +
> + if (tok) {
> + for (tok = strtok(tok, " \n"); tok;
> + tok = strtok(NULL, " \n")) {
> + if (strcmp(tok, flag))
> + continue;
> + found = true;
> + break;
> + }
> + }
> +
> + fclose(f);
> + return found;
> +}
> +
> +static void
> +slab_test_madvise(void)
> +{
> + struct slab_arena arena;
> + struct quota quota;
> + void *ptr;
> +
> + /*
> + * The system doesn't support flags fetching.
> + */
> + if (access("/proc/self/smaps", F_OK))
> + return;
> +
> + /*
> + * If system supports madvise call, test that
> + * preallocated area has been madvised.
> + */
> + quota_init("a, 2000000);
> + slab_arena_create(&arena, "a, 3000000, 1, SLAB_ARENA_DONTDUMP);
> +
> + /*
> + * Will fetch from preallocated area.
> + */
> + ptr = slab_map(&arena);
> + if (!ptr) {
> + printf("ERROR: can't obtain preallocated slab\n");
> + goto out;
> + }
> +
> + if (!vma_has_flag((unsigned long)ptr, "dd"))
> + goto no_dd;
> +
> + slab_unmap(&arena, ptr);
> + slab_arena_destroy(&arena);
> +
> + /*
> + * A new slab for dynamic allocation.
> + */
> + quota_init("a, 2000000);
> + slab_arena_create(&arena, "a, 0, 0x10000, SLAB_ARENA_DONTDUMP);
> +
> + /*
> + * Will fetch newly allocated area.
> + */
> + ptr = slab_map(&arena);
> + if (!ptr) {
> + printf("ERROR: can't obtain dynamic slab\n");
> + goto out;
> + }
> +
> + if (!vma_has_flag((unsigned long)ptr, "dd"))
> + goto no_dd;
> +
> +out:
> + slab_unmap(&arena, ptr);
> + slab_arena_destroy(&arena);
> + return;
> +no_dd:
> + printf("ERROR: Expected dd flag on VMA address %p\n", ptr);
> + goto out;
> +}
> +
> int main()
> {
> struct quota quota;
> @@ -42,4 +168,6 @@ int main()
> slab_arena_create(&arena, "a, 3000000, 1, MAP_PRIVATE);
> slab_arena_print(&arena);
> slab_arena_destroy(&arena);
> +
> + slab_test_madvise();
> }
More information about the Tarantool-patches
mailing list