Tarantool development patches archive
 help / color / mirror / Atom feed
* [RFC] fiber: Increase default stack size
@ 2019-02-21 21:26 Cyrill Gorcunov
  2019-02-22  7:38 ` [tarantool-patches] " Георгий Кириченко
  2019-02-22 10:48 ` [tarantool-patches] " Alexander Turenko
  0 siblings, 2 replies; 7+ messages in thread
From: Cyrill Gorcunov @ 2019-02-21 21:26 UTC (permalink / raw)
  To: tarantool; +Cc: Kirill Yukhin, Vladimir Davydov

The default 64K stack size used for years become too
small for modern distors (Fedora 29 and etc) where third
party libraries (such as ncurses) started to use 64K for
own buffers and we get SIGSGV early without reaching
interactive console phase.

To address this problem and hopefully eliminate such
problems in future we increase default size up to 1M.
Because this value may be too big for old distros or
other libraries, which would never use such deep stack,
we do a trick: put watermark at 64K offset of the stack
and once fiber get scheduled out we test if the mark is
still present. If we're lucky and noone touched the memory
we use madvise() syscall to reduce RSS usage.

https://github.com/tarantool/tarantool/issues/3418

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
---
Please review the patch very very carefully since I'm
not that familiar with the codebase.

 src/fiber.c |  110 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 src/fiber.h |    6 +++
 2 files changed, 115 insertions(+), 1 deletion(-)

Index: tarantool.git/src/fiber.c
===================================================================
--- tarantool.git.orig/src/fiber.c
+++ tarantool.git/src/fiber.c
@@ -91,11 +91,26 @@ pthread_t main_thread_id;
 static size_t page_size;
 static int stack_direction;
 
+/*
+ * A random unique value with help of uuidgen.
+ *
+ * 39ee5420-13f7-417b-9610-ea100c591ab6
+ */
+static const char stack_wmark[] = {
+	0x39, 0xee, 0x54, 0x20, 0x13, 0xf7, 0x41, 0x7b,
+	0x96, 0x10, 0xea, 0x10, 0x0c, 0x59, 0x1a, 0xb6
+};
+
+static void
+stack_sched_out(struct fiber *fiber);
+
 enum {
 	/* The minimum allowable fiber stack size in bytes */
 	FIBER_STACK_SIZE_MINIMAL = 16384,
+	/* Stack size for stack relaxed tasks */
+	FIBER_STACK_MADVISE_LIMIT = 64536,
 	/* Default fiber stack size in bytes */
-	FIBER_STACK_SIZE_DEFAULT = 65536
+	FIBER_STACK_SIZE_DEFAULT = 1048576
 };
 
 /** Default fiber attributes */
@@ -188,6 +203,7 @@ fiber_call_impl(struct fiber *callee)
 	ASAN_START_SWITCH_FIBER(asan_state, 1,
 				callee->stack,
 				callee->stack_size);
+	stack_sched_out(callee);
 	coro_transfer(&caller->ctx, &callee->ctx);
 	ASAN_FINISH_SWITCH_FIBER(asan_state);
 }
@@ -441,6 +457,7 @@ fiber_yield(void)
 				(caller->flags & FIBER_IS_DEAD) == 0,
 				callee->stack,
 				callee->stack_size);
+	stack_sched_out(callee);
 	coro_transfer(&caller->ctx, &callee->ctx);
 	ASAN_FINISH_SWITCH_FIBER(asan_state);
 }
@@ -710,6 +727,96 @@ page_align_up(void *ptr)
 	return page_align_down(ptr + page_size - 1);
 }
 
+static inline void *
+stack_wmark_pos(struct fiber *fiber)
+{
+	void *pos;
+
+	assert(fiber->stack);
+	assert(fiber->stack_size);
+
+	if (stack_direction < 0) {
+		pos  = fiber->stack + fiber->stack_size;
+		pos -= FIBER_STACK_MADVISE_LIMIT;
+		return page_align_up(pos);
+	} else {
+		pos  = fiber->stack - fiber->stack_size;
+		pos += FIBER_STACK_MADVISE_LIMIT;
+		return page_align_down(pos);
+	}
+}
+
+/*
+ * Set watermark to the predefined place thus on
+ * fiber sched-out procedure we may detect if
+ * a task was too eager for stack usage.
+ */
+static inline void
+stack_set_wmark(struct fiber *fiber)
+{
+	void *pos = stack_wmark_pos(fiber);
+	memcpy(pos, stack_wmark, sizeof(stack_wmark));
+}
+
+static inline bool
+stack_has_wmark(struct fiber *fiber)
+{
+	void *pos = stack_wmark_pos(fiber);
+	return !!!memcmp(pos, stack_wmark, sizeof(stack_wmark));
+}
+
+/*
+ * Process stack switching: we try to eliminate unneeded
+ * physical page usage for stack. If fiber never touched
+ * area after/before the watermark, the OS get ralaxed in
+ * RSS usage.
+ */
+static void
+stack_sched_out(struct fiber *fiber)
+{
+	if (!fiber->stack || (fiber->flags & FIBER_CUSTOM_STACK))
+		return;
+
+	/*
+	 * If fiber ecxeed a watermark, just clear the
+	 * flag and don't waste time on it in future.
+	 */
+	if (!stack_has_wmark(fiber)) {
+		fiber->flags &= ~FIBER_MADVISED_STACK;
+		return;
+	}
+
+	/*
+	 * This is a good one, we simply notify
+	 * OS about unused stack tail, so associated
+	 * pages would be put back into a page pool.
+	 *
+	 * Note though the fiber still can use
+	 * remaining space, simply won't be handled
+	 * that fast on _first_ #pf.
+	 */
+	if (!(fiber->flags & FIBER_MADVISED_STACK)) {
+		size_t size;
+		void *tail;
+
+		if (stack_direction < 0) {
+			tail = stack_wmark_pos(fiber) - page_size;
+			size = tail - fiber->stack;
+		} else {
+			tail = stack_wmark_pos(fiber) + page_size;
+			size = fiber->stack - tail;
+		}
+
+		/*
+		 * Set the flag iif've successed,
+		 * otherwise will try on the next
+		 * round.
+		 */
+		if (!madvise(fiber->stack, size, MADV_DONTNEED))
+			fiber->flags |= FIBER_MADVISED_STACK;
+	}
+}
+
 static int
 fiber_stack_create(struct fiber *fiber, size_t stack_size)
 {
@@ -751,6 +858,7 @@ fiber_stack_create(struct fiber *fiber,
 						  fiber->stack_size);
 
 	mprotect(guard, page_size, PROT_NONE);
+	stack_set_wmark(fiber);
 	return 0;
 }
 
Index: tarantool.git/src/fiber.h
===================================================================
--- tarantool.git.orig/src/fiber.h
+++ tarantool.git/src/fiber.h
@@ -89,6 +89,12 @@ enum {
 	 * This flag is set when fiber uses custom stack size.
 	 */
 	FIBER_CUSTOM_STACK	= 1 << 5,
+	/**
+	 * This flag is set when fiber didn't exceed stack's
+	 * size watermark and considered being small enough.
+	 */
+	FIBER_MADVISED_STACK	= 1 << 6,
+
 	FIBER_DEFAULT_FLAGS = FIBER_IS_CANCELLABLE
 };
 

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [tarantool-patches] Re: [RFC] fiber: Increase default stack size
  2019-02-21 21:26 [RFC] fiber: Increase default stack size Cyrill Gorcunov
@ 2019-02-22  7:38 ` Георгий Кириченко
  2019-02-22  7:46   ` Cyrill Gorcunov
  2019-02-22 10:48 ` [tarantool-patches] " Alexander Turenko
  1 sibling, 1 reply; 7+ messages in thread
From: Георгий Кириченко @ 2019-02-22  7:38 UTC (permalink / raw)
  To: tarantool-patches

[-- Attachment #1: Type: text/plain, Size: 6239 bytes --]

Hi Cyrill!
Thanks for the patch, please see some comments bellow.
I like your approach but I think we should do it in a different manner: we 
could poison 64k page of a fiber stack on start and then check the poison mark 
when fiber finished. If watermark was overwritten by fiber activity we could use 
madvise in order to decrease RSS usage.


On Friday, February 22, 2019 12:26:42 AM MSK Cyrill Gorcunov wrote:
> The default 64K stack size used for years become too
> small for modern distors (Fedora 29 and etc) where third
> party libraries (such as ncurses) started to use 64K for
> own buffers and we get SIGSGV early without reaching
> interactive console phase.
> 
> To address this problem and hopefully eliminate such
> problems in future we increase default size up to 1M.
> Because this value may be too big for old distros or
> other libraries, which would never use such deep stack,
> we do a trick: put watermark at 64K offset of the stack
> and once fiber get scheduled out we test if the mark is
> still present. If we're lucky and noone touched the memory
> we use madvise() syscall to reduce RSS usage.
> 
> https://github.com/tarantool/tarantool/issues/3418
> 
> Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
> ---
> Please review the patch very very carefully since I'm
> not that familiar with the codebase.
> 
>  src/fiber.c |  110
> +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- src/fiber.h | 
>   6 +++
>  2 files changed, 115 insertions(+), 1 deletion(-)
> 
> Index: tarantool.git/src/fiber.c
> ===================================================================
> --- tarantool.git.orig/src/fiber.c
> +++ tarantool.git/src/fiber.c
> @@ -91,11 +91,26 @@ pthread_t main_thread_id;
>  static size_t page_size;
>  static int stack_direction;
> 
> +/*
> + * A random unique value with help of uuidgen.
> + *
> + * 39ee5420-13f7-417b-9610-ea100c591ab6
> + */
> +static const char stack_wmark[] = {
> +	0x39, 0xee, 0x54, 0x20, 0x13, 0xf7, 0x41, 0x7b,
> +	0x96, 0x10, 0xea, 0x10, 0x0c, 0x59, 0x1a, 0xb6
> +};
> +
> +static void
> +stack_sched_out(struct fiber *fiber);
> +
>  enum {
>  	/* The minimum allowable fiber stack size in bytes */
>  	FIBER_STACK_SIZE_MINIMAL = 16384,
> +	/* Stack size for stack relaxed tasks */
> +	FIBER_STACK_MADVISE_LIMIT = 64536,
>  	/* Default fiber stack size in bytes */
> -	FIBER_STACK_SIZE_DEFAULT = 65536
> +	FIBER_STACK_SIZE_DEFAULT = 1048576
>  };
> 
>  /** Default fiber attributes */
> @@ -188,6 +203,7 @@ fiber_call_impl(struct fiber *callee)
>  	ASAN_START_SWITCH_FIBER(asan_state, 1,
>  				callee->stack,
>  				callee->stack_size);
> +	stack_sched_out(callee);
>  	coro_transfer(&caller->ctx, &callee->ctx);
>  	ASAN_FINISH_SWITCH_FIBER(asan_state);
>  }
> @@ -441,6 +457,7 @@ fiber_yield(void)
>  				(caller->flags & FIBER_IS_DEAD) == 0,
>  				callee->stack,
>  				callee->stack_size);
> +	stack_sched_out(callee);
>  	coro_transfer(&caller->ctx, &callee->ctx);
>  	ASAN_FINISH_SWITCH_FIBER(asan_state);
>  }
> @@ -710,6 +727,96 @@ page_align_up(void *ptr)
>  	return page_align_down(ptr + page_size - 1);
>  }
> 
> +static inline void *
> +stack_wmark_pos(struct fiber *fiber)
> +{
> +	void *pos;
> +
> +	assert(fiber->stack);
> +	assert(fiber->stack_size);
> +
> +	if (stack_direction < 0) {
> +		pos  = fiber->stack + fiber->stack_size;
> +		pos -= FIBER_STACK_MADVISE_LIMIT;
> +		return page_align_up(pos);
> +	} else {
> +		pos  = fiber->stack - fiber->stack_size;
> +		pos += FIBER_STACK_MADVISE_LIMIT;
> +		return page_align_down(pos);
> +	}
> +}
> +
> +/*
> + * Set watermark to the predefined place thus on
> + * fiber sched-out procedure we may detect if
> + * a task was too eager for stack usage.
> + */
> +static inline void
> +stack_set_wmark(struct fiber *fiber)
> +{
> +	void *pos = stack_wmark_pos(fiber);
> +	memcpy(pos, stack_wmark, sizeof(stack_wmark));
> +}
> +
> +static inline bool
> +stack_has_wmark(struct fiber *fiber)
> +{
> +	void *pos = stack_wmark_pos(fiber);
> +	return !!!memcmp(pos, stack_wmark, sizeof(stack_wmark));
> +}
> +
> +/*
> + * Process stack switching: we try to eliminate unneeded
> + * physical page usage for stack. If fiber never touched
> + * area after/before the watermark, the OS get ralaxed in
> + * RSS usage.
> + */
> +static void
> +stack_sched_out(struct fiber *fiber)
> +{
> +	if (!fiber->stack || (fiber->flags & FIBER_CUSTOM_STACK))
> +		return;
> +
> +	/*
> +	 * If fiber ecxeed a watermark, just clear the
> +	 * flag and don't waste time on it in future.
> +	 */
> +	if (!stack_has_wmark(fiber)) {
> +		fiber->flags &= ~FIBER_MADVISED_STACK;
> +		return;
> +	}
> +
> +	/*
> +	 * This is a good one, we simply notify
> +	 * OS about unused stack tail, so associated
> +	 * pages would be put back into a page pool.
> +	 *
> +	 * Note though the fiber still can use
> +	 * remaining space, simply won't be handled
> +	 * that fast on _first_ #pf.
> +	 */
> +	if (!(fiber->flags & FIBER_MADVISED_STACK)) {
> +		size_t size;
> +		void *tail;
> +
> +		if (stack_direction < 0) {
> +			tail = stack_wmark_pos(fiber) - page_size;
> +			size = tail - fiber->stack;
> +		} else {
> +			tail = stack_wmark_pos(fiber) + page_size;
> +			size = fiber->stack - tail;
> +		}
> +
> +		/*
> +		 * Set the flag iif've successed,
> +		 * otherwise will try on the next
> +		 * round.
> +		 */
> +		if (!madvise(fiber->stack, size, MADV_DONTNEED))
> +			fiber->flags |= FIBER_MADVISED_STACK;
> +	}
> +}
> +
>  static int
>  fiber_stack_create(struct fiber *fiber, size_t stack_size)
>  {
> @@ -751,6 +858,7 @@ fiber_stack_create(struct fiber *fiber,
>  						  fiber->stack_size);
> 
>  	mprotect(guard, page_size, PROT_NONE);
> +	stack_set_wmark(fiber);
>  	return 0;
>  }
> 
> Index: tarantool.git/src/fiber.h
> ===================================================================
> --- tarantool.git.orig/src/fiber.h
> +++ tarantool.git/src/fiber.h
> @@ -89,6 +89,12 @@ enum {
>  	 * This flag is set when fiber uses custom stack size.
>  	 */
>  	FIBER_CUSTOM_STACK	= 1 << 5,
> +	/**
> +	 * This flag is set when fiber didn't exceed stack's
> +	 * size watermark and considered being small enough.
> +	 */
> +	FIBER_MADVISED_STACK	= 1 << 6,
> +
>  	FIBER_DEFAULT_FLAGS = FIBER_IS_CANCELLABLE
>  };


[-- Attachment #2: This is a digitally signed message part. --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [tarantool-patches] Re: [RFC] fiber: Increase default stack size
  2019-02-22  7:38 ` [tarantool-patches] " Георгий Кириченко
@ 2019-02-22  7:46   ` Cyrill Gorcunov
  2019-02-22  9:24     ` Георгий Кириченко
  0 siblings, 1 reply; 7+ messages in thread
From: Cyrill Gorcunov @ 2019-02-22  7:46 UTC (permalink / raw)
  To: Георгий
	Кириченко
  Cc: tarantool-patches

On Fri, Feb 22, 2019 at 10:38:42AM +0300, Георгий Кириченко wrote:
> Hi Cyrill!

Hi!

> Thanks for the patch, please see some comments bellow.
> I like your approach but I think we should do it in a different manner: we 
> could poison 64k page of a fiber stack on start and then check the poison mark 
> when fiber finished. If watermark was overwritten by fiber activity we could use 
> madvise in order to decrease RSS usage.

I somehow fail to see how it is different from the current scheme.
In the patch we put single 8 16 bytes poison at 64K (well, page
aligned offset to be precise) and once the fiber scheduled out we
test it. I suspect the difference you mean is to _when_ test the poison?
Or you mean to poison the whole 64K?

p.s.
You know, I think this stack limits *must not* be fixed in size but
rather configurable from some parameters (getenv or something). Otherwise
it will be an endless run and catch game.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [tarantool-patches] Re: [RFC] fiber: Increase default stack size
  2019-02-22  7:46   ` Cyrill Gorcunov
@ 2019-02-22  9:24     ` Георгий Кириченко
  2019-02-22  9:45       ` Cyrill Gorcunov
  0 siblings, 1 reply; 7+ messages in thread
From: Георгий Кириченко @ 2019-02-22  9:24 UTC (permalink / raw)
  To: Cyrill Gorcunov; +Cc: tarantool-patches

[-- Attachment #1: Type: text/plain, Size: 1332 bytes --]

On Friday, February 22, 2019 10:46:07 AM MSK Cyrill Gorcunov wrote:
> On Fri, Feb 22, 2019 at 10:38:42AM +0300, Георгий Кириченко wrote:
> > Hi Cyrill!
> 
> Hi!
> 
> > Thanks for the patch, please see some comments bellow.
> > I like your approach but I think we should do it in a different manner: we
> > could poison 64k page of a fiber stack on start and then check the poison
> > mark when fiber finished. If watermark was overwritten by fiber activity
> > we could use madvise in order to decrease RSS usage.
> 
> I somehow fail to see how it is different from the current scheme.
> In the patch we put single 8 16 bytes poison at 64K (well, page
> aligned offset to be precise) and once the fiber scheduled out we
> test it. I suspect the difference you mean is to _when_ test the poison?
> Or you mean to poison the whole 64K?
I complain only that you check poison on each fiber schedule what could be too 
expensive - a fiber could be scheduled about million times per second.
I think If we would check a fiber stack poison when it goes to recycle so is 
will be a better option.
> 
> p.s.
> You know, I think this stack limits *must not* be fixed in size but
> rather configurable from some parameters (getenv or something). Otherwise
> it will be an endless run and catch game.


[-- Attachment #2: This is a digitally signed message part. --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [tarantool-patches] Re: [RFC] fiber: Increase default stack size
  2019-02-22  9:24     ` Георгий Кириченко
@ 2019-02-22  9:45       ` Cyrill Gorcunov
  0 siblings, 0 replies; 7+ messages in thread
From: Cyrill Gorcunov @ 2019-02-22  9:45 UTC (permalink / raw)
  To: Георгий
	Кириченко
  Cc: tarantool-patches

On Fri, Feb 22, 2019 at 12:24:12PM +0300, Георгий Кириченко wrote:
> > 
> > I somehow fail to see how it is different from the current scheme.
> > In the patch we put single 8 16 bytes poison at 64K (well, page
> > aligned offset to be precise) and once the fiber scheduled out we
> > test it. I suspect the difference you mean is to _when_ test the poison?
> > Or you mean to poison the whole 64K?
>
> I complain only that you check poison on each fiber schedule what could be too 
> expensive - a fiber could be scheduled about million times per second.
> I think If we would check a fiber stack poison when it goes to recycle so is 
> will be a better option.

Sounds reasonable. Will take a look, thanks! Still we should take into
account that the stack of a fiber (for our regular fibers) won't exceed
1M thus will be covered by 2M pmd entry and expensive part is a cache
line refill only. Initially I thought if I could simply test for
poison just once when fiber is sched'ing out but then realised that
a task can easily wipe poison out on later sched cycles.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [tarantool-patches] [RFC] fiber: Increase default stack size
  2019-02-21 21:26 [RFC] fiber: Increase default stack size Cyrill Gorcunov
  2019-02-22  7:38 ` [tarantool-patches] " Георгий Кириченко
@ 2019-02-22 10:48 ` Alexander Turenko
  2019-02-22 10:57   ` Cyrill Gorcunov
  1 sibling, 1 reply; 7+ messages in thread
From: Alexander Turenko @ 2019-02-22 10:48 UTC (permalink / raw)
  To: Cyrill Gorcunov
  Cc: Kirill Yukhin, Vladimir Davydov, Georgy Kirichenko, tarantool-patches

Hi!

I look at the patch and found several typos, so wrote this email. See
below for them.

Also I wonder whether we can add a metric for a fiber stack size via,
say, fiber.info()? It'll give us understanding how close we to the
limits in certain workloads. We already have backtraces there.

WBR, Alexander Turenko.

On Fri, Feb 22, 2019 at 12:26:42AM +0300, Cyrill Gorcunov wrote:
> The default 64K stack size used for years become too
> small for modern distors (Fedora 29 and etc) where third
> party libraries (such as ncurses) started to use 64K for
> own buffers and we get SIGSGV early without reaching
> interactive console phase.
> 
> To address this problem and hopefully eliminate such
> problems in future we increase default size up to 1M.
> Because this value may be too big for old distros or
> other libraries, which would never use such deep stack,
> we do a trick: put watermark at 64K offset of the stack
> and once fiber get scheduled out we test if the mark is
> still present. If we're lucky and noone touched the memory
> we use madvise() syscall to reduce RSS usage.
> 
> https://github.com/tarantool/tarantool/issues/3418
> 
> Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
> ---
> Please review the patch very very carefully since I'm
> not that familiar with the codebase.
> 
>  src/fiber.c |  110 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
>  src/fiber.h |    6 +++
>  2 files changed, 115 insertions(+), 1 deletion(-)
> 
> Index: tarantool.git/src/fiber.c
> ===================================================================
> --- tarantool.git.orig/src/fiber.c
> +++ tarantool.git/src/fiber.c
> @@ -91,11 +91,26 @@ pthread_t main_thread_id;
>  static size_t page_size;
>  static int stack_direction;
>  
> +/*
> + * A random unique value with help of uuidgen.
> + *
> + * 39ee5420-13f7-417b-9610-ea100c591ab6
> + */
> +static const char stack_wmark[] = {
> +	0x39, 0xee, 0x54, 0x20, 0x13, 0xf7, 0x41, 0x7b,
> +	0x96, 0x10, 0xea, 0x10, 0x0c, 0x59, 0x1a, 0xb6
> +};
> +
> +static void
> +stack_sched_out(struct fiber *fiber);
> +
>  enum {
>  	/* The minimum allowable fiber stack size in bytes */
>  	FIBER_STACK_SIZE_MINIMAL = 16384,
> +	/* Stack size for stack relaxed tasks */
> +	FIBER_STACK_MADVISE_LIMIT = 64536,
>  	/* Default fiber stack size in bytes */
> -	FIBER_STACK_SIZE_DEFAULT = 65536
> +	FIBER_STACK_SIZE_DEFAULT = 1048576
>  };
>  
>  /** Default fiber attributes */
> @@ -188,6 +203,7 @@ fiber_call_impl(struct fiber *callee)
>  	ASAN_START_SWITCH_FIBER(asan_state, 1,
>  				callee->stack,
>  				callee->stack_size);
> +	stack_sched_out(callee);
>  	coro_transfer(&caller->ctx, &callee->ctx);
>  	ASAN_FINISH_SWITCH_FIBER(asan_state);
>  }
> @@ -441,6 +457,7 @@ fiber_yield(void)
>  				(caller->flags & FIBER_IS_DEAD) == 0,
>  				callee->stack,
>  				callee->stack_size);
> +	stack_sched_out(callee);
>  	coro_transfer(&caller->ctx, &callee->ctx);
>  	ASAN_FINISH_SWITCH_FIBER(asan_state);
>  }
> @@ -710,6 +727,96 @@ page_align_up(void *ptr)
>  	return page_align_down(ptr + page_size - 1);
>  }
>  
> +static inline void *
> +stack_wmark_pos(struct fiber *fiber)
> +{
> +	void *pos;
> +
> +	assert(fiber->stack);
> +	assert(fiber->stack_size);
> +
> +	if (stack_direction < 0) {
> +		pos  = fiber->stack + fiber->stack_size;
> +		pos -= FIBER_STACK_MADVISE_LIMIT;
> +		return page_align_up(pos);
> +	} else {
> +		pos  = fiber->stack - fiber->stack_size;
> +		pos += FIBER_STACK_MADVISE_LIMIT;
> +		return page_align_down(pos);
> +	}
> +}
> +
> +/*
> + * Set watermark to the predefined place thus on
> + * fiber sched-out procedure we may detect if
> + * a task was too eager for stack usage.
> + */
> +static inline void
> +stack_set_wmark(struct fiber *fiber)
> +{
> +	void *pos = stack_wmark_pos(fiber);
> +	memcpy(pos, stack_wmark, sizeof(stack_wmark));
> +}
> +
> +static inline bool
> +stack_has_wmark(struct fiber *fiber)
> +{
> +	void *pos = stack_wmark_pos(fiber);
> +	return !!!memcmp(pos, stack_wmark, sizeof(stack_wmark));

Triple negation? Why?

> +}
> +
> +/*
> + * Process stack switching: we try to eliminate unneeded
> + * physical page usage for stack. If fiber never touched
> + * area after/before the watermark, the OS get ralaxed in

ralaxed

> + * RSS usage.
> + */
> +static void
> +stack_sched_out(struct fiber *fiber)
> +{
> +	if (!fiber->stack || (fiber->flags & FIBER_CUSTOM_STACK))
> +		return;
> +
> +	/*
> +	 * If fiber ecxeed a watermark, just clear the

ecxeed

> +	 * flag and don't waste time on it in future.
> +	 */
> +	if (!stack_has_wmark(fiber)) {
> +		fiber->flags &= ~FIBER_MADVISED_STACK;
> +		return;
> +	}
> +
> +	/*
> +	 * This is a good one, we simply notify
> +	 * OS about unused stack tail, so associated
> +	 * pages would be put back into a page pool.
> +	 *
> +	 * Note though the fiber still can use
> +	 * remaining space, simply won't be handled
> +	 * that fast on _first_ #pf.
> +	 */
> +	if (!(fiber->flags & FIBER_MADVISED_STACK)) {
> +		size_t size;
> +		void *tail;
> +
> +		if (stack_direction < 0) {
> +			tail = stack_wmark_pos(fiber) - page_size;
> +			size = tail - fiber->stack;
> +		} else {
> +			tail = stack_wmark_pos(fiber) + page_size;
> +			size = fiber->stack - tail;
> +		}
> +
> +		/*
> +		 * Set the flag iif've successed,

iif've

> +		 * otherwise will try on the next
> +		 * round.
> +		 */
> +		if (!madvise(fiber->stack, size, MADV_DONTNEED))
> +			fiber->flags |= FIBER_MADVISED_STACK;
> +	}
> +}
> +
>  static int
>  fiber_stack_create(struct fiber *fiber, size_t stack_size)
>  {
> @@ -751,6 +858,7 @@ fiber_stack_create(struct fiber *fiber,
>  						  fiber->stack_size);
>  
>  	mprotect(guard, page_size, PROT_NONE);
> +	stack_set_wmark(fiber);
>  	return 0;
>  }
>  
> Index: tarantool.git/src/fiber.h
> ===================================================================
> --- tarantool.git.orig/src/fiber.h
> +++ tarantool.git/src/fiber.h
> @@ -89,6 +89,12 @@ enum {
>  	 * This flag is set when fiber uses custom stack size.
>  	 */
>  	FIBER_CUSTOM_STACK	= 1 << 5,
> +	/**
> +	 * This flag is set when fiber didn't exceed stack's
> +	 * size watermark and considered being small enough.
> +	 */
> +	FIBER_MADVISED_STACK	= 1 << 6,
> +
>  	FIBER_DEFAULT_FLAGS = FIBER_IS_CANCELLABLE
>  };
>  
> 

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [tarantool-patches] [RFC] fiber: Increase default stack size
  2019-02-22 10:48 ` [tarantool-patches] " Alexander Turenko
@ 2019-02-22 10:57   ` Cyrill Gorcunov
  0 siblings, 0 replies; 7+ messages in thread
From: Cyrill Gorcunov @ 2019-02-22 10:57 UTC (permalink / raw)
  To: Alexander Turenko
  Cc: Kirill Yukhin, Vladimir Davydov, Georgy Kirichenko, tarantool-patches

On Fri, Feb 22, 2019 at 01:48:11PM +0300, Alexander Turenko wrote:
> Hi!
> 
> I look at the patch and found several typos, so wrote this email. See
> below for them.

Thanks!

> Also I wonder whether we can add a metric for a fiber stack size via,
> say, fiber.info()? It'll give us understanding how close we to the
> limits in certain workloads. We already have backtraces there.

Didn't know about fiber.info() feature. I think we can add some stat.

...
> > +
> > +static inline bool
> > +stack_has_wmark(struct fiber *fiber)
> > +{
> > +	void *pos = stack_wmark_pos(fiber);
> > +	return !!!memcmp(pos, stack_wmark, sizeof(stack_wmark));
> 
> Triple negation? Why?

To convert into bool's inverse. IOW, it is a common trick using
double negation to map [0;N] into bools and third negation
for the inverse. Actually on low level it is the same as
return memcmp(...) == 0 but shorter. If it is confusing
I can put traditional comparision.

> > +}
> > +
> > +/*
> > + * Process stack switching: we try to eliminate unneeded
> > + * physical page usage for stack. If fiber never touched
> > + * area after/before the watermark, the OS get ralaxed in
> 
> ralaxed
+1

> > +	 * If fiber ecxeed a watermark, just clear the
> 
> ecxeed

thanks!

> > +
> > +		/*
> > +		 * Set the flag iif've successed,
> 
> iif've

iif is correct, i managed to miss "we" :) thanks

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2019-02-22 10:57 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-02-21 21:26 [RFC] fiber: Increase default stack size Cyrill Gorcunov
2019-02-22  7:38 ` [tarantool-patches] " Георгий Кириченко
2019-02-22  7:46   ` Cyrill Gorcunov
2019-02-22  9:24     ` Георгий Кириченко
2019-02-22  9:45       ` Cyrill Gorcunov
2019-02-22 10:48 ` [tarantool-patches] " Alexander Turenko
2019-02-22 10:57   ` Cyrill Gorcunov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox