Tarantool development patches archive
 help / color / mirror / Atom feed
* [PATCH v2] box/memtx: Allow to skip tuple memory from coredump
@ 2019-05-13 22:20 Cyrill Gorcunov
  2019-05-15 12:45 ` Vladimir Davydov
  0 siblings, 1 reply; 3+ messages in thread
From: Cyrill Gorcunov @ 2019-05-13 22:20 UTC (permalink / raw)
  To: Alexander Turenko, Vladimir Davydov; +Cc: tml

In case if there are huge amount of tuples the whole
memory goes to coredump file even if we don't need it
for problem investigation. In result coredump may
blow up to gigabytes in size.

Lets allow to exclude this memory from dumping via
box.cfg::strip_core boolean parameter.

Note that the tuple's arena is used not only for tuples
themselves but for memtx->index_extent_pool and
memtx->iterator_pool as well, so they are affected
too.

Fixes #3509

@TarantoolBot document
Title: Document box.cfg.strip_core

When Tarantool runs under a heavy load the memory allocated
for tuples may be very huge in size and to eliminate this
memory from being present in `coredump` file the `box.cfg.strip_core`
parameter should be set to `true`.

The default value is `false`.
---
v2:
 - Use strip_core name for box parameter
 - Pass cfg_geti directly to functions
 - vy_mem_env_create for now left as it was,
   simply because we can't use cfg_geti there
   (linking with library fails), I think we can
   address it on top later

Guys, take a look please, once time permit.

 src/box/box.cc           |  1 +
 src/box/lua/load_cfg.lua |  2 ++
 src/box/memtx_engine.c   |  4 ++--
 src/box/memtx_engine.h   |  9 ++++++---
 src/box/tuple.c          | 12 +++++++++---
 src/box/tuple.h          |  2 +-
 src/box/vy_mem.c         |  2 +-
 test/box/admin.result    |  2 ++
 test/box/cfg.result      |  4 ++++
 9 files changed, 28 insertions(+), 10 deletions(-)

diff --git a/src/box/box.cc b/src/box/box.cc
index 7828f575b..57419ee01 100644
--- a/src/box/box.cc
+++ b/src/box/box.cc
@@ -1681,6 +1681,7 @@ engine_init()
 				    cfg_geti("force_recovery"),
 				    cfg_getd("memtx_memory"),
 				    cfg_geti("memtx_min_tuple_size"),
+				    cfg_geti("strip_core"),
 				    cfg_getd("slab_alloc_factor"));
 	engine_register((struct engine *)memtx);
 	box_set_memtx_max_tuple_size();
diff --git a/src/box/lua/load_cfg.lua b/src/box/lua/load_cfg.lua
index 5530b2caa..9f3344da3 100644
--- a/src/box/lua/load_cfg.lua
+++ b/src/box/lua/load_cfg.lua
@@ -25,6 +25,7 @@ end
 local default_cfg = {
     listen              = nil,
     memtx_memory        = 256 * 1024 *1024,
+    strip_core          = false,
     memtx_min_tuple_size = 16,
     memtx_max_tuple_size = 1024 * 1024,
     slab_alloc_factor   = 1.05,
@@ -88,6 +89,7 @@ local default_cfg = {
 local template_cfg = {
     listen              = 'string, number',
     memtx_memory        = 'number',
+    strip_core          = 'boolean',
     memtx_min_tuple_size  = 'number',
     memtx_max_tuple_size  = 'number',
     slab_alloc_factor   = 'number',
diff --git a/src/box/memtx_engine.c b/src/box/memtx_engine.c
index 58cfd6195..b0c567046 100644
--- a/src/box/memtx_engine.c
+++ b/src/box/memtx_engine.c
@@ -1011,7 +1011,7 @@ memtx_engine_gc_f(va_list va)
 struct memtx_engine *
 memtx_engine_new(const char *snap_dirname, bool force_recovery,
 		 uint64_t tuple_arena_max_size, uint32_t objsize_min,
-		 float alloc_factor)
+		 bool dontdump, float alloc_factor)
 {
 	struct memtx_engine *memtx = calloc(1, sizeof(*memtx));
 	if (memtx == NULL) {
@@ -1066,7 +1066,7 @@ memtx_engine_new(const char *snap_dirname, bool force_recovery,
 	/* Initialize tuple allocator. */
 	quota_init(&memtx->quota, tuple_arena_max_size);
 	tuple_arena_create(&memtx->arena, &memtx->quota, tuple_arena_max_size,
-			   SLAB_SIZE, "memtx");
+			   SLAB_SIZE, dontdump, "memtx");
 	slab_cache_create(&memtx->slab_cache, &memtx->arena);
 	small_alloc_create(&memtx->alloc, &memtx->slab_cache,
 			   objsize_min, alloc_factor);
diff --git a/src/box/memtx_engine.h b/src/box/memtx_engine.h
index 8f4ce7cdd..ccb51678d 100644
--- a/src/box/memtx_engine.h
+++ b/src/box/memtx_engine.h
@@ -189,7 +189,8 @@ memtx_engine_schedule_gc(struct memtx_engine *memtx,
 struct memtx_engine *
 memtx_engine_new(const char *snap_dirname, bool force_recovery,
 		 uint64_t tuple_arena_max_size,
-		 uint32_t objsize_min, float alloc_factor);
+		 uint32_t objsize_min, bool dontdump,
+		 float alloc_factor);
 
 int
 memtx_engine_recover_snapshot(struct memtx_engine *memtx,
@@ -257,12 +258,14 @@ memtx_index_def_change_requires_rebuild(struct index *index,
 static inline struct memtx_engine *
 memtx_engine_new_xc(const char *snap_dirname, bool force_recovery,
 		    uint64_t tuple_arena_max_size,
-		    uint32_t objsize_min, float alloc_factor)
+		    uint32_t objsize_min, bool dontdump,
+		    float alloc_factor)
 {
 	struct memtx_engine *memtx;
 	memtx = memtx_engine_new(snap_dirname, force_recovery,
 				 tuple_arena_max_size,
-				 objsize_min, alloc_factor);
+				 objsize_min, dontdump,
+				 alloc_factor);
 	if (memtx == NULL)
 		diag_raise();
 	return memtx;
diff --git a/src/box/tuple.c b/src/box/tuple.c
index 45c6727a4..962e05691 100644
--- a/src/box/tuple.c
+++ b/src/box/tuple.c
@@ -323,7 +323,7 @@ tuple_init(field_name_hash_f hash)
 void
 tuple_arena_create(struct slab_arena *arena, struct quota *quota,
 		   uint64_t arena_max_size, uint32_t slab_size,
-		   const char *arena_name)
+		   bool dontdump, const char *arena_name)
 {
 	/*
 	 * Ensure that quota is a multiple of slab_size, to
@@ -331,11 +331,17 @@ tuple_arena_create(struct slab_arena *arena, struct quota *quota,
 	 */
 	size_t prealloc = small_align(arena_max_size, slab_size);
 
+        /*
+         * Skip from coredump if requested.
+         */
+        int flags = SLAB_ARENA_PRIVATE;
+        if (dontdump)
+                flags |= SLAB_ARENA_DONTDUMP;
+
 	say_info("mapping %zu bytes for %s tuple arena...", prealloc,
 		 arena_name);
 
-	if (slab_arena_create(arena, quota, prealloc, slab_size,
-			      MAP_PRIVATE) != 0) {
+	if (slab_arena_create(arena, quota, prealloc, slab_size, flags) != 0) {
 		if (errno == ENOMEM) {
 			panic("failed to preallocate %zu bytes: Cannot "\
 			      "allocate memory, check option '%s_memory' in box.cfg(..)", prealloc,
diff --git a/src/box/tuple.h b/src/box/tuple.h
index 4acda7891..5857586dc 100644
--- a/src/box/tuple.h
+++ b/src/box/tuple.h
@@ -69,7 +69,7 @@ tuple_free(void);
 void
 tuple_arena_create(struct slab_arena *arena, struct quota *quota,
 		   uint64_t arena_max_size, uint32_t slab_size,
-		   const char *arena_name);
+		   bool dontdump, const char *arena_name);
 
 void
 tuple_arena_destroy(struct slab_arena *arena);
diff --git a/src/box/vy_mem.c b/src/box/vy_mem.c
index a4fae26e2..b4d016a68 100644
--- a/src/box/vy_mem.c
+++ b/src/box/vy_mem.c
@@ -54,7 +54,7 @@ vy_mem_env_create(struct vy_mem_env *env, size_t memory)
 	/* Vinyl memory is limited by vy_quota. */
 	quota_init(&env->quota, QUOTA_MAX);
 	tuple_arena_create(&env->arena, &env->quota, memory,
-			   SLAB_SIZE, "vinyl");
+			   SLAB_SIZE, false, "vinyl");
 	lsregion_create(&env->allocator, &env->arena);
 	env->tree_extent_size = 0;
 }
diff --git a/test/box/admin.result b/test/box/admin.result
index 53ced2fcc..bbebbd224 100644
--- a/test/box/admin.result
+++ b/test/box/admin.result
@@ -84,6 +84,8 @@ cfg_filter(box.cfg)
     - 500000
   - - slab_alloc_factor
     - 1.05
+  - - strip_core
+    - false
   - - too_long_threshold
     - 0.5
   - - vinyl_bloom_fpr
diff --git a/test/box/cfg.result b/test/box/cfg.result
index 66b02f591..81f4afac8 100644
--- a/test/box/cfg.result
+++ b/test/box/cfg.result
@@ -72,6 +72,8 @@ cfg_filter(box.cfg)
     - 500000
   - - slab_alloc_factor
     - 1.05
+  - - strip_core
+    - false
   - - too_long_threshold
     - 0.5
   - - vinyl_bloom_fpr
@@ -171,6 +173,8 @@ cfg_filter(box.cfg)
     - 500000
   - - slab_alloc_factor
     - 1.05
+  - - strip_core
+    - false
   - - too_long_threshold
     - 0.5
   - - vinyl_bloom_fpr
-- 
2.20.1

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH v2] box/memtx: Allow to skip tuple memory from coredump
  2019-05-13 22:20 [PATCH v2] box/memtx: Allow to skip tuple memory from coredump Cyrill Gorcunov
@ 2019-05-15 12:45 ` Vladimir Davydov
  2019-05-15 12:54   ` Cyrill Gorcunov
  0 siblings, 1 reply; 3+ messages in thread
From: Vladimir Davydov @ 2019-05-15 12:45 UTC (permalink / raw)
  To: Cyrill Gorcunov; +Cc: Alexander Turenko, tml

On Tue, May 14, 2019 at 01:20:53AM +0300, Cyrill Gorcunov wrote:
> In case if there are huge amount of tuples the whole
> memory goes to coredump file even if we don't need it
> for problem investigation. In result coredump may
> blow up to gigabytes in size.
> 
> Lets allow to exclude this memory from dumping via
> box.cfg::strip_core boolean parameter.
> 
> Note that the tuple's arena is used not only for tuples
> themselves but for memtx->index_extent_pool and
> memtx->iterator_pool as well, so they are affected
> too.
> 
> Fixes #3509
> 
> @TarantoolBot document
> Title: Document box.cfg.strip_core
> 
> When Tarantool runs under a heavy load the memory allocated
> for tuples may be very huge in size and to eliminate this
> memory from being present in `coredump` file the `box.cfg.strip_core`
> parameter should be set to `true`.
> 
> The default value is `false`.
> ---
> v2:
>  - Use strip_core name for box parameter
>  - Pass cfg_geti directly to functions
>  - vy_mem_env_create for now left as it was,
>    simply because we can't use cfg_geti there
>    (linking with library fails), I think we can
>    address it on top later

Please patch all places to use the new API. This better be done in a
separate patch. Call it "Update small submodule". Then I will squash
submodule sha update in it once we are done.

I checked - it turns out that SLAB_DONTDUMP path in madvise_checked is
executed even if box.cfg.strip_core is false. Looks like flags aren't
initialized properly somewhere. Please fix.

Please double-check (manually) that this new option does reduce core
size. Try setting box.cfg.memtx_memory, writing some data to a space,
then increasing box.cfg.memtx_memory, writing more, crashing the app,
and checking that the core size is significantly smaller with strip_core
set.

Also, as I mentioned in the comment to the previous patch, we should
emit a warning in case MADV_DONTDUMP is unavailable while strip_core is
set. This probably better be done here, in Tarantool, using say_warn.

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH v2] box/memtx: Allow to skip tuple memory from coredump
  2019-05-15 12:45 ` Vladimir Davydov
@ 2019-05-15 12:54   ` Cyrill Gorcunov
  0 siblings, 0 replies; 3+ messages in thread
From: Cyrill Gorcunov @ 2019-05-15 12:54 UTC (permalink / raw)
  To: Vladimir Davydov; +Cc: Alexander Turenko, tml

On Wed, May 15, 2019 at 03:45:58PM +0300, Vladimir Davydov wrote:
> 
> Please patch all places to use the new API. This better be done in a
> separate patch. Call it "Update small submodule". Then I will squash
> submodule sha update in it once we are done.
> 
> I checked - it turns out that SLAB_DONTDUMP path in madvise_checked is
> executed even if box.cfg.strip_core is false. Looks like flags aren't
> initialized properly somewhere. Please fix.
> 
> Please double-check (manually) that this new option does reduce core
> size. Try setting box.cfg.memtx_memory, writing some data to a space,
> then increasing box.cfg.memtx_memory, writing more, crashing the app,
> and checking that the core size is significantly smaller with strip_core
> set.
> 
> Also, as I mentioned in the comment to the previous patch, we should
> emit a warning in case MADV_DONTDUMP is unavailable while strip_core is
> set. This probably better be done here, in Tarantool, using say_warn.

Thanks for review! Will address.

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2019-05-15 12:54 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-05-13 22:20 [PATCH v2] box/memtx: Allow to skip tuple memory from coredump Cyrill Gorcunov
2019-05-15 12:45 ` Vladimir Davydov
2019-05-15 12:54   ` Cyrill Gorcunov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox