Tarantool development patches archive
 help / color / mirror / Atom feed
* [Tarantool-patches] [PATCH v8 0/2] fiber: Handle stack madvise/mprotect errors
@ 2020-02-14 13:22 Cyrill Gorcunov
  2020-02-14 13:22 ` [Tarantool-patches] [PATCH v8 1/2] fiber: set diagnostics at madvise/mprotect failure Cyrill Gorcunov
                   ` (2 more replies)
  0 siblings, 3 replies; 12+ messages in thread
From: Cyrill Gorcunov @ 2020-02-14 13:22 UTC (permalink / raw)
  To: tml; +Cc: Vladislav Shpilevoy

in v8:
 - address Vlad's comments
 - include slab stat condition into the test

issue https://github.com/tarantool/tarantool/issues/4722
branch https://github.com/tarantool/tarantool/tree/gorcunov/gh-4722-mprotect-diag-error-8

Cyrill Gorcunov (2):
  fiber: set diagnostics at madvise/mprotect failure
  fiber: leak slab if unable to bring prots back

 src/lib/core/errinj.h        |   2 +
 src/lib/core/fiber.c         |  93 ++++++++++++++++++++++++------
 test/box/errinj.result       |   2 +
 test/unit/CMakeLists.txt     |   4 ++
 test/unit/fiber_stack.c      | 109 +++++++++++++++++++++++++++++++++++
 test/unit/fiber_stack.result |  11 ++++
 test/unit/suite.ini          |   2 +-
 7 files changed, 204 insertions(+), 19 deletions(-)
 create mode 100644 test/unit/fiber_stack.c
 create mode 100644 test/unit/fiber_stack.result


base-commit: 95b9a48daf00b6686b8e1993a023daa0755b460e
-- 
2.20.1

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [Tarantool-patches] [PATCH v8 1/2] fiber: set diagnostics at madvise/mprotect failure
  2020-02-14 13:22 [Tarantool-patches] [PATCH v8 0/2] fiber: Handle stack madvise/mprotect errors Cyrill Gorcunov
@ 2020-02-14 13:22 ` Cyrill Gorcunov
  2020-02-20 21:30   ` Vladislav Shpilevoy
  2020-02-14 13:22 ` [Tarantool-patches] [PATCH v8 2/2] fiber: leak slab if unable to bring prots back Cyrill Gorcunov
  2020-02-24 19:25 ` [Tarantool-patches] [PATCH v8 0/2] fiber: Handle stack madvise/mprotect errors Kirill Yukhin
  2 siblings, 1 reply; 12+ messages in thread
From: Cyrill Gorcunov @ 2020-02-14 13:22 UTC (permalink / raw)
  To: tml; +Cc: Vladislav Shpilevoy

Both madvise and mprotect calls can fail due to various
reasons, mostly because of lack of free memory in the
system.

We log such cases via say_x helpers but this is not enough.
In particular tarantool/memcached relies on diag error to be
set to detect an error condition:

 | expire_fiber = fiber_new(name, memcached_expire_loop);
 | const box_error_t *err = box_error_last();
 | if (err) {
 |	say_error("Can't start the expire fiber");
 |	say_error("%s", box_error_message(err));
 |	return -1;
 | }

Thus lets use diag_set() helper here and instead of macros
use inline functions for better readability.

Fixes #4722

Reported-by: Alexander Turenko <alexander.turenko@tarantool.org>
Reviewed-by: Alexander Turenko <alexander.turenko@tarantool.org>
Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
---
 src/lib/core/errinj.h        |  2 +
 src/lib/core/fiber.c         | 84 ++++++++++++++++++++++++++++--------
 test/box/errinj.result       |  2 +
 test/unit/CMakeLists.txt     |  4 ++
 test/unit/fiber_stack.c      | 79 +++++++++++++++++++++++++++++++++
 test/unit/fiber_stack.result |  8 ++++
 test/unit/suite.ini          |  2 +-
 7 files changed, 163 insertions(+), 18 deletions(-)
 create mode 100644 test/unit/fiber_stack.c
 create mode 100644 test/unit/fiber_stack.result

diff --git a/src/lib/core/errinj.h b/src/lib/core/errinj.h
index 672da2119..ed0cba903 100644
--- a/src/lib/core/errinj.h
+++ b/src/lib/core/errinj.h
@@ -135,6 +135,8 @@ struct errinj {
 	_(ERRINJ_COIO_SENDFILE_CHUNK, ERRINJ_INT, {.iparam = -1}) \
 	_(ERRINJ_SWIM_FD_ONLY, ERRINJ_BOOL, {.bparam = false}) \
 	_(ERRINJ_DYN_MODULE_COUNT, ERRINJ_INT, {.iparam = 0}) \
+	_(ERRINJ_FIBER_MADVISE, ERRINJ_BOOL, {.bparam = false}) \
+	_(ERRINJ_FIBER_MPROTECT, ERRINJ_INT, {.iparam = -1})
 
 ENUM0(errinj_id, ERRINJ_LIST);
 extern struct errinj errinjs[];
diff --git a/src/lib/core/fiber.c b/src/lib/core/fiber.c
index 00ae8cded..f10687b29 100644
--- a/src/lib/core/fiber.c
+++ b/src/lib/core/fiber.c
@@ -41,6 +41,7 @@
 #include "assoc.h"
 #include "memory.h"
 #include "trigger.h"
+#include "errinj.h"
 
 #if ENABLE_FIBER_TOP
 #include <x86intrin.h> /* __rdtscp() */
@@ -173,21 +174,40 @@ static int (*fiber_invoke)(fiber_func f, va_list ap);
 #define ASAN_FINISH_SWITCH_FIBER(var_name)
 #endif
 
-#define fiber_madvise(addr, len, advice)				\
-({									\
-	int err = madvise((addr), (len), (advice));			\
-	if (err)							\
-		say_syserror("madvise");				\
-	err;								\
-})
-
-#define fiber_mprotect(addr, len, prot)					\
-({									\
-	int err = mprotect((addr), (len), (prot));			\
-	if (err)							\
-		say_syserror("mprotect");				\
-	err;								\
-})
+static inline int
+fiber_madvise(void *addr, size_t len, int advice)
+{
+	int rc = 0;
+
+	ERROR_INJECT(ERRINJ_FIBER_MADVISE, {
+		errno = ENOMEM;
+		rc = -1;
+	});
+
+	if (rc != 0 || madvise(addr, len, advice) != 0) {
+		diag_set(SystemError, "fiber madvise failed");
+		return -1;
+	}
+	return 0;
+}
+
+static inline int
+fiber_mprotect(void *addr, size_t len, int prot)
+{
+	int rc = 0;
+
+	struct errinj *inj = errinj(ERRINJ_FIBER_MPROTECT, ERRINJ_INT);
+	if (inj != NULL && inj->iparam == prot) {
+		errno = ENOMEM;
+		rc = -1;
+	}
+
+	if (rc != 0 || mprotect(addr, len, prot) != 0) {
+		diag_set(SystemError, "fiber mprotect failed");
+		return -1;
+	}
+	return 0;
+}
 
 #if ENABLE_FIBER_TOP
 static __thread bool fiber_top_enabled = false;
@@ -951,6 +971,12 @@ fiber_stack_recycle(struct fiber *fiber)
 		start = page_align_up(fiber->stack_watermark);
 		end = fiber->stack + fiber->stack_size;
 	}
+
+	/*
+	 * Ignore errors on MADV_DONTNEED because this is
+	 * just a hint for OS and not critical for
+	 * functionality.
+	 */
 	fiber_madvise(start, end - start, MADV_DONTNEED);
 	stack_put_watermark(fiber->stack_watermark);
 }
@@ -969,7 +995,9 @@ fiber_stack_watermark_create(struct fiber *fiber)
 
 	/*
 	 * We don't expect the whole stack usage in regular
-	 * loads, let's try to minimize rss pressure.
+	 * loads, let's try to minimize rss pressure. But do
+	 * not exit if MADV_DONTNEED failed, it is just a hint
+	 * for OS, not critical one.
 	 */
 	fiber_madvise(fiber->stack, fiber->stack_size, MADV_DONTNEED);
 
@@ -1007,6 +1035,8 @@ fiber_stack_watermark_create(struct fiber *fiber)
 static void
 fiber_stack_destroy(struct fiber *fiber, struct slab_cache *slabc)
 {
+	static const int mprotect_flags = PROT_READ | PROT_WRITE;
+
 	if (fiber->stack != NULL) {
 		VALGRIND_STACK_DEREGISTER(fiber->stack_id);
 #if ENABLE_ASAN
@@ -1017,7 +1047,22 @@ fiber_stack_destroy(struct fiber *fiber, struct slab_cache *slabc)
 			guard = page_align_down(fiber->stack - page_size);
 		else
 			guard = page_align_up(fiber->stack + fiber->stack_size);
-		fiber_mprotect(guard, page_size, PROT_READ | PROT_WRITE);
+
+		if (fiber_mprotect(guard, page_size, mprotect_flags) != 0) {
+			/*
+			 * FIXME: We need some intelligent handling:
+			 * say put this slab into a queue and retry
+			 * to setup the original protection back in
+			 * background.
+			 *
+			 * Note that in case if we're called from
+			 * fiber_stack_create() the @a mprotect_flags is
+			 * the same as the slab been created with, so
+			 * calling mprotect for VMA with same flags
+			 * won't fail.
+			 */
+			diag_log();
+		}
 		slab_put(slabc, fiber->stack_slab);
 	}
 }
@@ -1064,6 +1109,11 @@ fiber_stack_create(struct fiber *fiber, struct slab_cache *slabc,
 						  fiber->stack_size);
 
 	if (fiber_mprotect(guard, page_size, PROT_NONE)) {
+		/*
+		 * Write an error into the log since a guard
+		 * page is critical for functionality.
+		 */
+		diag_log();
 		fiber_stack_destroy(fiber, slabc);
 		return -1;
 	}
diff --git a/test/box/errinj.result b/test/box/errinj.result
index f043c6689..daa27ed24 100644
--- a/test/box/errinj.result
+++ b/test/box/errinj.result
@@ -48,6 +48,8 @@ evals
   - ERRINJ_CHECK_FORMAT_DELAY: false
   - ERRINJ_COIO_SENDFILE_CHUNK: -1
   - ERRINJ_DYN_MODULE_COUNT: 0
+  - ERRINJ_FIBER_MADVISE: false
+  - ERRINJ_FIBER_MPROTECT: -1
   - ERRINJ_HTTPC_EXECUTE: false
   - ERRINJ_HTTP_RESPONSE_ADD_WAIT: false
   - ERRINJ_INDEX_ALLOC: false
diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt
index 4a57597e9..c037ac539 100644
--- a/test/unit/CMakeLists.txt
+++ b/test/unit/CMakeLists.txt
@@ -74,6 +74,10 @@ add_executable(fiber.test fiber.cc)
 set_source_files_properties(fiber.cc PROPERTIES COMPILE_FLAGS -O0)
 target_link_libraries(fiber.test core unit)
 
+add_executable(fiber_stack.test fiber_stack.c)
+set_source_files_properties(fiber_stack.c PROPERTIES COMPILE_FLAGS -O0)
+target_link_libraries(fiber_stack.test core unit)
+
 if (NOT ENABLE_GCOV)
     # This test is known to be broken with GCOV
     add_executable(guard.test guard.cc)
diff --git a/test/unit/fiber_stack.c b/test/unit/fiber_stack.c
new file mode 100644
index 000000000..103dfaf0d
--- /dev/null
+++ b/test/unit/fiber_stack.c
@@ -0,0 +1,79 @@
+#include "memory.h"
+#include "fiber.h"
+#include "unit.h"
+#include "trivia/util.h"
+#include "errinj.h"
+
+static struct fiber_attr default_attr;
+
+static int
+noop_f(va_list ap)
+{
+	return 0;
+}
+
+static int
+main_f(va_list ap)
+{
+	struct errinj *inj;
+	struct fiber *fiber;
+
+	header();
+	plan(4);
+
+	/*
+	 * Set non-default stack size to prevent reusing of an
+	 * existing fiber.
+	 */
+	struct fiber_attr *fiber_attr = fiber_attr_new();
+	fiber_attr_setstacksize(fiber_attr, default_attr.stack_size * 2);
+
+	/*
+	 * Clear the fiber's diagnostics area to check that failed
+	 * fiber_new() sets an error.
+	 */
+	diag_clear(diag_get());
+
+	/*
+	 * Check guard page setup via mprotect. We can't test the fiber
+	 * destroy path since it clears fiber's diag.
+	 */
+	inj = errinj(ERRINJ_FIBER_MPROTECT, ERRINJ_INT);
+	inj->iparam = PROT_NONE;
+	fiber = fiber_new_ex("test_mprotect", fiber_attr, noop_f);
+	inj->iparam = -1;
+
+	ok(fiber == NULL, "mprotect: failed to setup fiber guard page");
+	ok(diag_get() != NULL, "mprotect: diag is armed after error");
+
+	/*
+	 * Check madvise. We can't test the fiber destroy
+	 * path since it is cleaning error.
+	 */
+	diag_clear(diag_get());
+	inj = errinj(ERRINJ_FIBER_MADVISE, ERRINJ_BOOL);
+	inj->bparam = true;
+	fiber = fiber_new_ex("test_madvise", fiber_attr, noop_f);
+	inj->bparam = false;
+
+	ok(fiber != NULL, "madvise: non critical error on madvise hint");
+	ok(diag_get() != NULL, "madvise: diag is armed after error");
+
+	footer();
+
+	ev_break(loop(), EVBREAK_ALL);
+	return check_plan();
+}
+
+int main()
+{
+	memory_init();
+	fiber_init(fiber_c_invoke);
+	fiber_attr_create(&default_attr);
+	struct fiber *f = fiber_new("main", main_f);
+	fiber_wakeup(f);
+	ev_run(loop(), 0);
+	fiber_free();
+	memory_free();
+	return 0;
+}
diff --git a/test/unit/fiber_stack.result b/test/unit/fiber_stack.result
new file mode 100644
index 000000000..43ff74b2f
--- /dev/null
+++ b/test/unit/fiber_stack.result
@@ -0,0 +1,8 @@
+SystemError fiber mprotect failed: Cannot allocate memory
+	*** main_f ***
+1..4
+ok 1 - mprotect: failed to setup fiber guard page
+ok 2 - mprotect: diag is armed after error
+ok 3 - madvise: non critical error on madvise hint
+ok 4 - madvise: diag is armed after error
+	*** main_f: done ***
diff --git a/test/unit/suite.ini b/test/unit/suite.ini
index 89c8499fc..d429c95e9 100644
--- a/test/unit/suite.ini
+++ b/test/unit/suite.ini
@@ -1,5 +1,5 @@
 [default]
 core = unittest
 description = unit tests
-release_disabled = swim_errinj.test
+release_disabled = fiber_stack.test swim_errinj.test
 is_parallel = True
-- 
2.20.1

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [Tarantool-patches] [PATCH v8 2/2] fiber: leak slab if unable to bring prots back
  2020-02-14 13:22 [Tarantool-patches] [PATCH v8 0/2] fiber: Handle stack madvise/mprotect errors Cyrill Gorcunov
  2020-02-14 13:22 ` [Tarantool-patches] [PATCH v8 1/2] fiber: set diagnostics at madvise/mprotect failure Cyrill Gorcunov
@ 2020-02-14 13:22 ` Cyrill Gorcunov
  2020-02-14 14:17   ` Konstantin Osipov
  2020-02-24 19:25 ` [Tarantool-patches] [PATCH v8 0/2] fiber: Handle stack madvise/mprotect errors Kirill Yukhin
  2 siblings, 1 reply; 12+ messages in thread
From: Cyrill Gorcunov @ 2020-02-14 13:22 UTC (permalink / raw)
  To: tml; +Cc: Vladislav Shpilevoy

In case if we unable to revert guard page back to
read|write we should never use such slab again.

Initially I thought of just put panic here and
exit but it is too destructive. I think better
print an error and continue. If node admin ignore
this message then one moment at future there won't
be slab left for use and creating new fibers get
prohibited.

In future (hopefully near one) we plan to drop
guard pages to prevent VMA fracturing and use
stack marks instead.

Reviewed-by: Alexander Turenko <alexander.turenko@tarantool.org>
Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
---
 src/lib/core/fiber.c         | 11 +++++++++--
 test/unit/fiber_stack.c      | 36 +++++++++++++++++++++++++++++++++---
 test/unit/fiber_stack.result |  5 ++++-
 3 files changed, 46 insertions(+), 6 deletions(-)

diff --git a/src/lib/core/fiber.c b/src/lib/core/fiber.c
index f10687b29..73de7765b 100644
--- a/src/lib/core/fiber.c
+++ b/src/lib/core/fiber.c
@@ -1055,15 +1055,22 @@ fiber_stack_destroy(struct fiber *fiber, struct slab_cache *slabc)
 			 * to setup the original protection back in
 			 * background.
 			 *
+			 * For now lets keep such slab referenced and
+			 * leaked: if mprotect failed we must not allow
+			 * to reuse such slab with PROT_NONE'ed page
+			 * inside.
+			 *
 			 * Note that in case if we're called from
 			 * fiber_stack_create() the @a mprotect_flags is
 			 * the same as the slab been created with, so
 			 * calling mprotect for VMA with same flags
 			 * won't fail.
 			 */
-			diag_log();
+			say_syserror("fiber: Can't put guard page to slab. "
+				     "Leak %zu bytes", (size_t)fiber->stack_size);
+		} else {
+			slab_put(slabc, fiber->stack_slab);
 		}
-		slab_put(slabc, fiber->stack_slab);
 	}
 }
 
diff --git a/test/unit/fiber_stack.c b/test/unit/fiber_stack.c
index 103dfaf0d..41abe3764 100644
--- a/test/unit/fiber_stack.c
+++ b/test/unit/fiber_stack.c
@@ -15,11 +15,13 @@ noop_f(va_list ap)
 static int
 main_f(va_list ap)
 {
+	struct slab_cache *slabc = &cord()->slabc;
+	size_t used_before, used_after;
 	struct errinj *inj;
 	struct fiber *fiber;
 
 	header();
-	plan(4);
+	plan(6);
 
 	/*
 	 * Set non-default stack size to prevent reusing of an
@@ -47,8 +49,7 @@ main_f(va_list ap)
 	ok(diag_get() != NULL, "mprotect: diag is armed after error");
 
 	/*
-	 * Check madvise. We can't test the fiber destroy
-	 * path since it is cleaning error.
+	 * Check madvise error on fiber creation.
 	 */
 	diag_clear(diag_get());
 	inj = errinj(ERRINJ_FIBER_MADVISE, ERRINJ_BOOL);
@@ -59,6 +60,35 @@ main_f(va_list ap)
 	ok(fiber != NULL, "madvise: non critical error on madvise hint");
 	ok(diag_get() != NULL, "madvise: diag is armed after error");
 
+	/*
+	 * Check if we leak on fiber destruction.
+	 * We will print an error and result get
+	 * compared by testing engine.
+	 */
+	fiber_attr_delete(fiber_attr);
+	fiber_attr = fiber_attr_new();
+	fiber_attr->flags |= FIBER_CUSTOM_STACK;
+	fiber_attr->stack_size = 64 << 10;
+
+	diag_clear(diag_get());
+
+	used_before = slabc->allocated.stats.used;
+
+	fiber = fiber_new_ex("test_madvise", fiber_attr, noop_f);
+	ok(fiber != NULL, "fiber with custom stack");
+	fiber_set_joinable(fiber, true);
+
+	inj = errinj(ERRINJ_FIBER_MPROTECT, ERRINJ_INT);
+	inj->iparam = PROT_READ | PROT_WRITE;
+
+	fiber_start(fiber);
+	fiber_join(fiber);
+	inj->iparam = -1;
+
+	used_after = slabc->allocated.stats.used;
+	ok(used_after > used_before, "expected leak detected");
+
+	fiber_attr_delete(fiber_attr);
 	footer();
 
 	ev_break(loop(), EVBREAK_ALL);
diff --git a/test/unit/fiber_stack.result b/test/unit/fiber_stack.result
index 43ff74b2f..7cae4e96c 100644
--- a/test/unit/fiber_stack.result
+++ b/test/unit/fiber_stack.result
@@ -1,8 +1,11 @@
 SystemError fiber mprotect failed: Cannot allocate memory
+fiber: Can't put guard page to slab. Leak 57344 bytes: Cannot allocate memory
 	*** main_f ***
-1..4
+1..6
 ok 1 - mprotect: failed to setup fiber guard page
 ok 2 - mprotect: diag is armed after error
 ok 3 - madvise: non critical error on madvise hint
 ok 4 - madvise: diag is armed after error
+ok 5 - fiber with custom stack
+ok 6 - expected leak detected
 	*** main_f: done ***
-- 
2.20.1

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Tarantool-patches] [PATCH v8 2/2] fiber: leak slab if unable to bring prots back
  2020-02-14 13:22 ` [Tarantool-patches] [PATCH v8 2/2] fiber: leak slab if unable to bring prots back Cyrill Gorcunov
@ 2020-02-14 14:17   ` Konstantin Osipov
  2020-02-14 14:22     ` Cyrill Gorcunov
  0 siblings, 1 reply; 12+ messages in thread
From: Konstantin Osipov @ 2020-02-14 14:17 UTC (permalink / raw)
  To: Cyrill Gorcunov; +Cc: tml, Vladislav Shpilevoy

* Cyrill Gorcunov <gorcunov@gmail.com> [20/02/14 16:25]:
> In case if we unable to revert guard page back to
> read|write we should never use such slab again.
> 
> Initially I thought of just put panic here and
> exit but it is too destructive. I think better
> print an error and continue. If node admin ignore
> this message then one moment at future there won't
> be slab left for use and creating new fibers get
> prohibited.
> 
> In future (hopefully near one) we plan to drop
> guard pages to prevent VMA fracturing and use
> stack marks instead.

Seriously, let's have some decency, first we pushed an optional
feature and then it broke and you're suggestinga hot fix that leaks 
memory.

What's the issue with disabling the broken feature?


-- 
Konstantin Osipov, Moscow, Russia

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Tarantool-patches] [PATCH v8 2/2] fiber: leak slab if unable to bring prots back
  2020-02-14 14:17   ` Konstantin Osipov
@ 2020-02-14 14:22     ` Cyrill Gorcunov
  2020-02-14 14:29       ` Konstantin Osipov
  0 siblings, 1 reply; 12+ messages in thread
From: Cyrill Gorcunov @ 2020-02-14 14:22 UTC (permalink / raw)
  To: Konstantin Osipov; +Cc: tml, Vladislav Shpilevoy

On Fri, Feb 14, 2020 at 05:17:04PM +0300, Konstantin Osipov wrote:
> > 
> > In future (hopefully near one) we plan to drop
> > guard pages to prevent VMA fracturing and use
> > stack marks instead.
> 
> Seriously, let's have some decency, first we pushed an optional
> feature and then it broke and you're suggestinga hot fix that leaks 
> memory.
> 
> What's the issue with disabling the broken feature?

As far as I understand we gonna drop guard pages for release
builds only, for debug builds it will be with us, no?

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Tarantool-patches] [PATCH v8 2/2] fiber: leak slab if unable to bring prots back
  2020-02-14 14:22     ` Cyrill Gorcunov
@ 2020-02-14 14:29       ` Konstantin Osipov
  2020-02-14 14:31         ` Cyrill Gorcunov
  0 siblings, 1 reply; 12+ messages in thread
From: Konstantin Osipov @ 2020-02-14 14:29 UTC (permalink / raw)
  To: Cyrill Gorcunov; +Cc: tml, Vladislav Shpilevoy

* Cyrill Gorcunov <gorcunov@gmail.com> [20/02/14 17:22]:
> On Fri, Feb 14, 2020 at 05:17:04PM +0300, Konstantin Osipov wrote:
> > > 
> > > In future (hopefully near one) we plan to drop
> > > guard pages to prevent VMA fracturing and use
> > > stack marks instead.
> > 
> > Seriously, let's have some decency, first we pushed an optional
> > feature and then it broke and you're suggestinga hot fix that leaks 
> > memory.
> > 
> > What's the issue with disabling the broken feature?
> 
> As far as I understand we gonna drop guard pages for release
> builds only, for debug builds it will be with us, no?

Panic in debug is ok.

-- 
Konstantin Osipov, Moscow, Russia

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Tarantool-patches] [PATCH v8 2/2] fiber: leak slab if unable to bring prots back
  2020-02-14 14:29       ` Konstantin Osipov
@ 2020-02-14 14:31         ` Cyrill Gorcunov
  2020-02-20 21:00           ` Alexander Turenko
  0 siblings, 1 reply; 12+ messages in thread
From: Cyrill Gorcunov @ 2020-02-14 14:31 UTC (permalink / raw)
  To: Konstantin Osipov; +Cc: tml, Vladislav Shpilevoy

On Fri, Feb 14, 2020 at 05:29:19PM +0300, Konstantin Osipov wrote:
> > 
> > As far as I understand we gonna drop guard pages for release
> > builds only, for debug builds it will be with us, no?
> 
> Panic in debug is ok.

Thanks for clarification, Kostya! I'll rework the series once time permit.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Tarantool-patches] [PATCH v8 2/2] fiber: leak slab if unable to bring prots back
  2020-02-14 14:31         ` Cyrill Gorcunov
@ 2020-02-20 21:00           ` Alexander Turenko
  2020-02-21  7:15             ` Konstantin Osipov
  0 siblings, 1 reply; 12+ messages in thread
From: Alexander Turenko @ 2020-02-20 21:00 UTC (permalink / raw)
  To: Cyrill Gorcunov; +Cc: tml, Vladislav Shpilevoy

Guys, please don't remove me from CC in discussions, where I
participated.

On Fri, Feb 14, 2020 at 05:31:35PM +0300, Cyrill Gorcunov wrote:
> On Fri, Feb 14, 2020 at 05:29:19PM +0300, Konstantin Osipov wrote:
> > > 
> > > As far as I understand we gonna drop guard pages for release
> > > builds only, for debug builds it will be with us, no?
> > 
> > Panic in debug is ok.
> 
> Thanks for clarification, Kostya! I'll rework the series once time permit.

We using guard page for a long time (f3155daa 'Add a guard page to the
end fiber stack', 2015) and reconsidering the approach as well as
reimplementation may took significant time, may be stuck due to some
reason or be rejected (for debug builds or at all). It also requires
laborious performance testing. I propose to don't block this bugfix.

For panic on debug: it is okay, but not good, when we able to operate
further. In some rare cases it may even affect customers, when we're
working with them to catch a problem on a debug build and it spins on
some part of production servers under load for a while.

When we detect that we made something wrong, assertion fail looks good.
When we detect that a system resource is exhausted and we able to
proceed with this situation w/o data corruption, it is better to do the
same thing in both release and debug builds.

WBR, Alexander Turenko.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Tarantool-patches] [PATCH v8 1/2] fiber: set diagnostics at madvise/mprotect failure
  2020-02-14 13:22 ` [Tarantool-patches] [PATCH v8 1/2] fiber: set diagnostics at madvise/mprotect failure Cyrill Gorcunov
@ 2020-02-20 21:30   ` Vladislav Shpilevoy
  0 siblings, 0 replies; 12+ messages in thread
From: Vladislav Shpilevoy @ 2020-02-20 21:30 UTC (permalink / raw)
  To: Cyrill Gorcunov, tml

Hi! Thanks for the patch!

> diff --git a/src/lib/core/fiber.c b/src/lib/core/fiber.c
> index 00ae8cded..f10687b29 100644
> --- a/src/lib/core/fiber.c
> +++ b/src/lib/core/fiber.c
> @@ -1007,6 +1035,8 @@ fiber_stack_watermark_create(struct fiber *fiber)
>  static void
>  fiber_stack_destroy(struct fiber *fiber, struct slab_cache *slabc)
>  {
> +	static const int mprotect_flags = PROT_READ | PROT_WRITE;
> +

Well, seems like you won't change this. Ok, since I can't do
anything about that, I have nothing to do but to say the
patchset is ok.

>  	if (fiber->stack != NULL) {
>  		VALGRIND_STACK_DEREGISTER(fiber->stack_id);
>  #if ENABLE_ASAN
> @@ -1017,7 +1047,22 @@ fiber_stack_destroy(struct fiber *fiber, struct slab_cache *slabc)
>  			guard = page_align_down(fiber->stack - page_size);
>  		else
>  			guard = page_align_up(fiber->stack + fiber->stack_size);
> -		fiber_mprotect(guard, page_size, PROT_READ | PROT_WRITE);
> +
> +		if (fiber_mprotect(guard, page_size, mprotect_flags) != 0) {
> +			/*
> +			 * FIXME: We need some intelligent handling:
> +			 * say put this slab into a queue and retry
> +			 * to setup the original protection back in
> +			 * background.
> +			 *
> +			 * Note that in case if we're called from
> +			 * fiber_stack_create() the @a mprotect_flags is

You could just say "protection flags are the same as the slab..." instead
of mentioning a variable here. If you didn't think that enum would be good
here.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Tarantool-patches] [PATCH v8 2/2] fiber: leak slab if unable to bring prots back
  2020-02-20 21:00           ` Alexander Turenko
@ 2020-02-21  7:15             ` Konstantin Osipov
  0 siblings, 0 replies; 12+ messages in thread
From: Konstantin Osipov @ 2020-02-21  7:15 UTC (permalink / raw)
  To: Alexander Turenko; +Cc: tml, Vladislav Shpilevoy

* Alexander Turenko <alexander.turenko@tarantool.org> [20/02/21 00:02]:

It's a complicated matter which has a lot of options.

One option is to do nothing, btw, there is a system-wide
workaround - increase /proc/sys/vm/max_map_count - just document
it.


-- 
Konstantin Osipov, Moscow, Russia

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Tarantool-patches] [PATCH v8 0/2] fiber: Handle stack madvise/mprotect errors
  2020-02-14 13:22 [Tarantool-patches] [PATCH v8 0/2] fiber: Handle stack madvise/mprotect errors Cyrill Gorcunov
  2020-02-14 13:22 ` [Tarantool-patches] [PATCH v8 1/2] fiber: set diagnostics at madvise/mprotect failure Cyrill Gorcunov
  2020-02-14 13:22 ` [Tarantool-patches] [PATCH v8 2/2] fiber: leak slab if unable to bring prots back Cyrill Gorcunov
@ 2020-02-24 19:25 ` Kirill Yukhin
  2020-02-24 19:31   ` Cyrill Gorcunov
  2 siblings, 1 reply; 12+ messages in thread
From: Kirill Yukhin @ 2020-02-24 19:25 UTC (permalink / raw)
  To: Cyrill Gorcunov; +Cc: tml, Vladislav Shpilevoy

Hello,

On 14 фев 16:22, Cyrill Gorcunov wrote:
> in v8:
>  - address Vlad's comments
>  - include slab stat condition into the test
> 
> issue https://github.com/tarantool/tarantool/issues/4722
> branch https://github.com/tarantool/tarantool/tree/gorcunov/gh-4722-mprotect-diag-error-8

I've checked your patchset into 2.2, 2.3 and master.

I hope we'll improve it further.

Still, this is not something critical, so I didn't checked
it into 1.10.

--
Regards, Kirill Yukhin

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Tarantool-patches] [PATCH v8 0/2] fiber: Handle stack madvise/mprotect errors
  2020-02-24 19:25 ` [Tarantool-patches] [PATCH v8 0/2] fiber: Handle stack madvise/mprotect errors Kirill Yukhin
@ 2020-02-24 19:31   ` Cyrill Gorcunov
  0 siblings, 0 replies; 12+ messages in thread
From: Cyrill Gorcunov @ 2020-02-24 19:31 UTC (permalink / raw)
  To: Kirill Yukhin; +Cc: tml, Vladislav Shpilevoy

On Mon, Feb 24, 2020 at 10:25:09PM +0300, Kirill Yukhin wrote:
> I've checked your patchset into 2.2, 2.3 and master.
> 
> I hope we'll improve it further.

Thanks, I'll file a bug for myself to rework guard pages.

> 
> Still, this is not something critical, so I didn't checked
> it into 1.10.

yup

	Cyrill

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2020-02-24 19:31 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-02-14 13:22 [Tarantool-patches] [PATCH v8 0/2] fiber: Handle stack madvise/mprotect errors Cyrill Gorcunov
2020-02-14 13:22 ` [Tarantool-patches] [PATCH v8 1/2] fiber: set diagnostics at madvise/mprotect failure Cyrill Gorcunov
2020-02-20 21:30   ` Vladislav Shpilevoy
2020-02-14 13:22 ` [Tarantool-patches] [PATCH v8 2/2] fiber: leak slab if unable to bring prots back Cyrill Gorcunov
2020-02-14 14:17   ` Konstantin Osipov
2020-02-14 14:22     ` Cyrill Gorcunov
2020-02-14 14:29       ` Konstantin Osipov
2020-02-14 14:31         ` Cyrill Gorcunov
2020-02-20 21:00           ` Alexander Turenko
2020-02-21  7:15             ` Konstantin Osipov
2020-02-24 19:25 ` [Tarantool-patches] [PATCH v8 0/2] fiber: Handle stack madvise/mprotect errors Kirill Yukhin
2020-02-24 19:31   ` Cyrill Gorcunov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox