From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Date: Wed, 13 Mar 2019 16:52:04 +0300 From: Vladimir Davydov Subject: Re: [PATCH 3/3] lib/core/fiber: Put watermarks into stack to track its usage Message-ID: <20190313135204.x7gn54iqiy5ulavn@esperanza> References: <20190312224721.8053-1-gorcunov@gmail.com> <20190312224721.8053-4-gorcunov@gmail.com> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <20190312224721.8053-4-gorcunov@gmail.com> To: Cyrill Gorcunov Cc: tml List-ID: On Wed, Mar 13, 2019 at 01:47:21AM +0300, Cyrill Gorcunov wrote: > We want to detect a situation where task in fiber is too eager for > stack and close to its exhausting. For this sake upon stack creation > we put 8 marks on last stack page with step of 128 bytes. Such params > allows us to fill ~1/4 of a page, which does seem reasonable but > we might change this params with time. > > Since the watermark position is permanent and some task is close > to stack limit we print about the situation once to not spam > a user much and stop putting the mark on recycling. > > Still this techique doesn't guarantee to handle all possible > situations so to increas probability of catching eager fibers > we put marks *not* at page start address but withing some offset, > randomly generated. > > To minimize a pressue on memory system we try to relax stack > usage with that named "shink" watermark. Basically it is the > same mark as for overflow detection but placed at 64K bound > and on every recycle we try to shrink stack usage dropping > pages if the watermark has been erased by a task. > > Closes #3418 > > Signed-off-by: Cyrill Gorcunov > --- > src/lib/core/fiber.c | 167 +++++++++++++++++++++++++++++++++++++++++++ > src/lib/core/fiber.h | 15 ++++ > 2 files changed, 182 insertions(+) > > diff --git a/src/lib/core/fiber.c b/src/lib/core/fiber.c > index a1b261ad4..cae291d1a 100644 > --- a/src/lib/core/fiber.c > +++ b/src/lib/core/fiber.c > @@ -104,6 +104,27 @@ static const struct fiber_attr fiber_attr_default = { > .flags = FIBER_DEFAULT_FLAGS > }; > > +/* > + * Random values generated with uuid. > + */ > +static const uint64_t poison_pool[] = { > + 0x74f31d37285c4c37, 0xb10269a05bf10c29, > + 0x0994d845bd284e0f, 0x9ffd4f7129c184df, > + 0x357151e6711c4415, 0x8c5e5f41aafe6f28, > + 0x6917dd79e78049d5, 0xba61957c65ca2465, > +}; > + > +/* > + * We poison by 8 bytes as it natural for stack > + * step on x86-64. Also 128 byte gap between > + * poison values should cover a common cases. > + */ > +#define POISON_SIZE (sizeof(poison_pool) / sizeof(poison_pool[0])) > +#define POISON_GAP (128 + sizeof(poison_pool[0])) > +#define POISON_OFF (POISON_GAP / sizeof(poison_pool[0])) > + > +static void fiber_wmark_recycle(struct fiber *fiber); > + > void > fiber_attr_create(struct fiber_attr *fiber_attr) > { > @@ -624,6 +645,7 @@ fiber_recycle(struct fiber *fiber) > /* no pending wakeup */ > assert(rlist_empty(&fiber->state)); > bool has_custom_stack = fiber->flags & FIBER_CUSTOM_STACK; > + fiber_wmark_recycle(fiber); > fiber_reset(fiber); > fiber->name[0] = '\0'; > fiber->f = NULL; > @@ -710,6 +732,146 @@ page_align_up(void *ptr) > return page_align_down(ptr + page_size - 1); > } > > +static bool > +stack_has_wmark(void *addr) Please add brief comments to all the functions you add. > +{ > + const uint64_t *src = poison_pool; > + const uint64_t *dst = addr; > + size_t i; > + > + for (i = 0; i < POISON_SIZE; i++) { > + if (*dst != src[i]) > + return false; > + dst += POISON_OFF; > + } > + > + return true; > +} > + > +static void > +stack_put_wmark(void *addr) > +{ > + const uint64_t *src = poison_pool; > + uint64_t *dst = addr; > + size_t i; > + > + for (i = 0; i < POISON_SIZE; i++) { > + *dst = src[i]; > + dst += POISON_OFF; > + } > +} > + > +#ifndef TARGET_OS_DARWIN > +static void > +stack_shrink(struct fiber *fiber) > +{ > + void *start, *end; > + > + /* > + * When dropping pages make sure the page > + * containing overflow mark is untouched. > + * Same time no need to unmap the page which > + * carries "shrink" wmark, since we're updating > + * this page anyway. > + */ > + if (stack_direction < 0) { > + end = page_align_down(fiber->stack_shrink_wmark); > + start = page_align_up(fiber->stack_overflow_wmark); > + madvise(start, end - start, MADV_DONTNEED); > + } else { > + start = page_align_up(fiber->stack_shrink_wmark); > + end = page_align_down(fiber->stack_overflow_wmark); > + madvise(start, end - start, MADV_DONTNEED); > + } Nit: you could move madvise() out of the if-else block. > + stack_put_wmark(fiber->stack_shrink_wmark); > +} > +#endif > + > +static void > +fiber_wmark_recycle(struct fiber *fiber) > +{ > + static bool overflow_warned = false; > + > + if (fiber->stack == NULL || fiber->flags & FIBER_CUSTOM_STACK) > + return; > + > +#ifndef TARGET_OS_DARWIN > + /* > + * On recycle we're trying to shrink stack > + * to release memory pressure but if only > + * a fiber has been using too much memory. > + */ > + if (!stack_has_wmark(fiber->stack_shrink_wmark)) > + stack_shrink(fiber); > +#endif > + > + /* > + * We are watching for stack overflow in one shot way: > + * simply to not spam a user with messages, if someone > + * triggered the problem it is highly likely that > + * an another fiber hit the same soon. > + */ > + if (overflow_warned) > + return; > + > + if (!stack_has_wmark(fiber->stack_overflow_wmark)) { > + say_warn("stack usage is close to the limit of %zu bytes", > + (size_t)FIBER_STACK_SIZE_DEFAULT); IMO warning only when we are close to the limit isn't very useful. Let's keep track of the max stack size instead and print it whenever it's increased. > + overflow_warned = true; > + } > +} > + > +static void > +fiber_wmark_init(struct fiber *fiber) > +{ > + /* > + * No tracking on custom stacks > + * in a sake of simplicity. > + */ > + if (fiber->flags & FIBER_CUSTOM_STACK) { > + fiber->stack_overflow_wmark = NULL; > + fiber->stack_shrink_wmark = NULL; > + fiber->wmark_inpage_offset = 0; > + return; > + } > + > + /* > + * To increase probability of the stack overflow > + * detection we put first mark at random position > + * of the first 128 bytes range. The rest of the marks > + * are put with constant step simply to not carry > + * offsets in memory. > + */ > + fiber->wmark_inpage_offset = ((rand() % 128) + 8) & ~7; Please define a constant for this. > + > + /* > + * Initially we arm the last page of the stack > + * to catch if we're getting close to its exhausting. > + * > + * The shrink watermark is put at 64K limit which is > + * known value to not cause much memory pressue even > + * with large number of fibers. > + */ > + if (stack_direction < 0) { > + fiber->stack_overflow_wmark = fiber->stack; > + fiber->stack_overflow_wmark += fiber->wmark_inpage_offset; > + > + fiber->stack_shrink_wmark = fiber->stack + fiber->stack_size; > + fiber->stack_shrink_wmark -= 16 << 12; Ditto. > + fiber->stack_shrink_wmark += fiber->wmark_inpage_offset; > + } else { > + fiber->stack_overflow_wmark = fiber->stack + fiber->stack_size; > + fiber->stack_overflow_wmark -= page_size; > + fiber->stack_overflow_wmark += fiber->wmark_inpage_offset; > + > + fiber->stack_shrink_wmark = fiber->stack; > + fiber->stack_shrink_wmark += (16 << 12) - page_size; > + fiber->stack_shrink_wmark += fiber->wmark_inpage_offset; > + } > + stack_put_wmark(fiber->stack_overflow_wmark); > + stack_put_wmark(fiber->stack_shrink_wmark); > +} > + > static int > fiber_stack_create(struct fiber *fiber, size_t stack_size) > { > @@ -758,6 +920,7 @@ fiber_stack_create(struct fiber *fiber, size_t stack_size) > madvise(fiber->stack, fiber->stack_size, MADV_DONTNEED); > #endif > > + fiber_wmark_init(fiber); > mprotect(guard, page_size, PROT_NONE); > return 0; > } > @@ -927,8 +1090,12 @@ cord_create(struct cord *cord, const char *name) > /* Record stack extents */ > tt_pthread_attr_getstack(cord->id, &cord->sched.stack, > &cord->sched.stack_size); > + cord->sched.stack_overflow_wmark = cord->sched.stack; > + cord->sched.stack_shrink_wmark = cord->sched.stack; I don't think we need to set the watermarks for the sched fiber, even if ASAN is on. > #else > cord->sched.stack = NULL; > + cord->sched.stack_overflow_wmark = NULL; > + cord->sched.stack_shrink_wmark = NULL; > cord->sched.stack_size = 0; > #endif > } > diff --git a/src/lib/core/fiber.h b/src/lib/core/fiber.h > index f1f5a0555..431e3da09 100644 > --- a/src/lib/core/fiber.h > +++ b/src/lib/core/fiber.h > @@ -348,6 +348,21 @@ struct fiber { > struct slab *stack_slab; > /** Coro stack addr. */ > void *stack; > + /** > + * Stack watermark addr to detect > + * if we need shrink stack on reuse. > + */ > + void *stack_shrink_wmark; > + /** > + * Stack watermark addr for overflow detection. To warn > + * a user about stack eager fibers. > + */ > + void *stack_overflow_wmark; > + /** > + * An offset to watermark position in stack > + * since page bound address. > + */ > + unsigned int wmark_inpage_offset; > /** Coro stack size. */ > size_t stack_size; > /** Valgrind stack id. */