From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-lj1-f196.google.com (mail-lj1-f196.google.com [209.85.208.196]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by dev.tarantool.org (Postfix) with ESMTPS id 63C70469719 for ; Thu, 13 Feb 2020 23:56:33 +0300 (MSK) Received: by mail-lj1-f196.google.com with SMTP id y6so8230353lji.0 for ; Thu, 13 Feb 2020 12:56:33 -0800 (PST) From: Cyrill Gorcunov Date: Thu, 13 Feb 2020 23:56:17 +0300 Message-Id: <20200213205618.7982-2-gorcunov@gmail.com> In-Reply-To: <20200213205618.7982-1-gorcunov@gmail.com> References: <20200213205618.7982-1-gorcunov@gmail.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Subject: [Tarantool-patches] [PATCH v7 1/2] fiber: set diagnostics at madvise/mprotect failure List-Id: Tarantool development patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: tml Cc: Vladislav Shpilevoy Both madvise and mprotect calls can fail due to various reasons, mostly because of lack of free memory in the system. We log such cases via say_x helpers but this is not enough. In particular tarantool/memcached relies on diag error to be set to detect an error condition: | expire_fiber = fiber_new(name, memcached_expire_loop); | const box_error_t *err = box_error_last(); | if (err) { | say_error("Can't start the expire fiber"); | say_error("%s", box_error_message(err)); | return -1; | } Thus lets use diag_set() helper here and instead of macros use inline functions for better readability. Fixes #4722 Reported-by: Alexander Turenko Reviewed-by: Alexander Turenko Signed-off-by: Cyrill Gorcunov --- src/lib/core/errinj.h | 2 + src/lib/core/fiber.c | 84 +++++++++++++++++----- test/box/errinj.result | 136 ++++++++++++++++++----------------- test/unit/CMakeLists.txt | 4 ++ test/unit/fiber_stack.c | 79 ++++++++++++++++++++ test/unit/fiber_stack.result | 8 +++ test/unit/suite.ini | 2 +- 7 files changed, 231 insertions(+), 84 deletions(-) create mode 100644 test/unit/fiber_stack.c create mode 100644 test/unit/fiber_stack.result diff --git a/src/lib/core/errinj.h b/src/lib/core/errinj.h index 672da2119..ed0cba903 100644 --- a/src/lib/core/errinj.h +++ b/src/lib/core/errinj.h @@ -135,6 +135,8 @@ struct errinj { _(ERRINJ_COIO_SENDFILE_CHUNK, ERRINJ_INT, {.iparam = -1}) \ _(ERRINJ_SWIM_FD_ONLY, ERRINJ_BOOL, {.bparam = false}) \ _(ERRINJ_DYN_MODULE_COUNT, ERRINJ_INT, {.iparam = 0}) \ + _(ERRINJ_FIBER_MADVISE, ERRINJ_BOOL, {.bparam = false}) \ + _(ERRINJ_FIBER_MPROTECT, ERRINJ_INT, {.iparam = -1}) ENUM0(errinj_id, ERRINJ_LIST); extern struct errinj errinjs[]; diff --git a/src/lib/core/fiber.c b/src/lib/core/fiber.c index 00ae8cded..f10687b29 100644 --- a/src/lib/core/fiber.c +++ b/src/lib/core/fiber.c @@ -41,6 +41,7 @@ #include "assoc.h" #include "memory.h" #include "trigger.h" +#include "errinj.h" #if ENABLE_FIBER_TOP #include /* __rdtscp() */ @@ -173,21 +174,40 @@ static int (*fiber_invoke)(fiber_func f, va_list ap); #define ASAN_FINISH_SWITCH_FIBER(var_name) #endif -#define fiber_madvise(addr, len, advice) \ -({ \ - int err = madvise((addr), (len), (advice)); \ - if (err) \ - say_syserror("madvise"); \ - err; \ -}) - -#define fiber_mprotect(addr, len, prot) \ -({ \ - int err = mprotect((addr), (len), (prot)); \ - if (err) \ - say_syserror("mprotect"); \ - err; \ -}) +static inline int +fiber_madvise(void *addr, size_t len, int advice) +{ + int rc = 0; + + ERROR_INJECT(ERRINJ_FIBER_MADVISE, { + errno = ENOMEM; + rc = -1; + }); + + if (rc != 0 || madvise(addr, len, advice) != 0) { + diag_set(SystemError, "fiber madvise failed"); + return -1; + } + return 0; +} + +static inline int +fiber_mprotect(void *addr, size_t len, int prot) +{ + int rc = 0; + + struct errinj *inj = errinj(ERRINJ_FIBER_MPROTECT, ERRINJ_INT); + if (inj != NULL && inj->iparam == prot) { + errno = ENOMEM; + rc = -1; + } + + if (rc != 0 || mprotect(addr, len, prot) != 0) { + diag_set(SystemError, "fiber mprotect failed"); + return -1; + } + return 0; +} #if ENABLE_FIBER_TOP static __thread bool fiber_top_enabled = false; @@ -951,6 +971,12 @@ fiber_stack_recycle(struct fiber *fiber) start = page_align_up(fiber->stack_watermark); end = fiber->stack + fiber->stack_size; } + + /* + * Ignore errors on MADV_DONTNEED because this is + * just a hint for OS and not critical for + * functionality. + */ fiber_madvise(start, end - start, MADV_DONTNEED); stack_put_watermark(fiber->stack_watermark); } @@ -969,7 +995,9 @@ fiber_stack_watermark_create(struct fiber *fiber) /* * We don't expect the whole stack usage in regular - * loads, let's try to minimize rss pressure. + * loads, let's try to minimize rss pressure. But do + * not exit if MADV_DONTNEED failed, it is just a hint + * for OS, not critical one. */ fiber_madvise(fiber->stack, fiber->stack_size, MADV_DONTNEED); @@ -1007,6 +1035,8 @@ fiber_stack_watermark_create(struct fiber *fiber) static void fiber_stack_destroy(struct fiber *fiber, struct slab_cache *slabc) { + static const int mprotect_flags = PROT_READ | PROT_WRITE; + if (fiber->stack != NULL) { VALGRIND_STACK_DEREGISTER(fiber->stack_id); #if ENABLE_ASAN @@ -1017,7 +1047,22 @@ fiber_stack_destroy(struct fiber *fiber, struct slab_cache *slabc) guard = page_align_down(fiber->stack - page_size); else guard = page_align_up(fiber->stack + fiber->stack_size); - fiber_mprotect(guard, page_size, PROT_READ | PROT_WRITE); + + if (fiber_mprotect(guard, page_size, mprotect_flags) != 0) { + /* + * FIXME: We need some intelligent handling: + * say put this slab into a queue and retry + * to setup the original protection back in + * background. + * + * Note that in case if we're called from + * fiber_stack_create() the @a mprotect_flags is + * the same as the slab been created with, so + * calling mprotect for VMA with same flags + * won't fail. + */ + diag_log(); + } slab_put(slabc, fiber->stack_slab); } } @@ -1064,6 +1109,11 @@ fiber_stack_create(struct fiber *fiber, struct slab_cache *slabc, fiber->stack_size); if (fiber_mprotect(guard, page_size, PROT_NONE)) { + /* + * Write an error into the log since a guard + * page is critical for functionality. + */ + diag_log(); fiber_stack_destroy(fiber, slabc); return -1; } diff --git a/test/box/errinj.result b/test/box/errinj.result index babe36b1b..16f6b40c2 100644 --- a/test/box/errinj.result +++ b/test/box/errinj.result @@ -23,132 +23,136 @@ errinj.info() --- - ERRINJ_VY_RUN_WRITE_STMT_TIMEOUT: state: 0 - ERRINJ_WAL_WRITE: - state: false - ERRINJ_RELAY_BREAK_LSN: + ERRINJ_WAL_BREAK_LSN: state: -1 - ERRINJ_HTTPC_EXECUTE: - state: false ERRINJ_VYRUN_DATA_READ: state: false - ERRINJ_SWIM_FD_ONLY: - state: false - ERRINJ_SQL_NAME_NORMALIZATION: - state: false ERRINJ_VY_SCHED_TIMEOUT: state: 0 - ERRINJ_COIO_SENDFILE_CHUNK: - state: -1 ERRINJ_HTTP_RESPONSE_ADD_WAIT: state: false - ERRINJ_WAL_WRITE_PARTIAL: - state: -1 - ERRINJ_VY_GC: - state: false - ERRINJ_WAL_DELAY: - state: false - ERRINJ_INDEX_ALLOC: - state: false ERRINJ_WAL_WRITE_EOF: state: false - ERRINJ_WAL_SYNC: - state: false - ERRINJ_BUILD_INDEX: - state: -1 ERRINJ_BUILD_INDEX_DELAY: state: false - ERRINJ_VY_RUN_FILE_RENAME: - state: false - ERRINJ_VY_COMPACTION_DELAY: - state: false - ERRINJ_VY_DUMP_DELAY: - state: false ERRINJ_VY_DELAY_PK_LOOKUP: state: false - ERRINJ_VY_TASK_COMPLETE: - state: false - ERRINJ_PORT_DUMP: + ERRINJ_VY_POINT_ITER_WAIT: state: false - ERRINJ_WAL_BREAK_LSN: - state: -1 ERRINJ_WAL_IO: state: false - ERRINJ_WAL_FALLOCATE: - state: 0 - ERRINJ_DYN_MODULE_COUNT: - state: 0 ERRINJ_VY_INDEX_FILE_RENAME: state: false ERRINJ_TUPLE_FORMAT_COUNT: state: -1 ERRINJ_TUPLE_ALLOC: state: false - ERRINJ_VY_RUN_WRITE_DELAY: + ERRINJ_VY_RUN_FILE_RENAME: state: false ERRINJ_VY_READ_PAGE: state: false ERRINJ_RELAY_REPORT_INTERVAL: state: 0 - ERRINJ_VY_LOG_FILE_RENAME: - state: false - ERRINJ_VY_READ_PAGE_TIMEOUT: - state: 0 + ERRINJ_RELAY_BREAK_LSN: + state: -1 ERRINJ_XLOG_META: state: false - ERRINJ_SIO_READ_MAX: - state: -1 ERRINJ_SNAP_COMMIT_DELAY: state: false - ERRINJ_WAL_WRITE_DISK: + ERRINJ_VY_RUN_WRITE: state: false - ERRINJ_SNAP_WRITE_DELAY: + ERRINJ_BUILD_INDEX: + state: -1 + ERRINJ_RELAY_FINAL_JOIN: + state: false + ERRINJ_REPLICA_JOIN_DELAY: state: false ERRINJ_LOG_ROTATE: state: false - ERRINJ_VY_RUN_WRITE: + ERRINJ_MEMTX_DELAY_GC: state: false - ERRINJ_CHECK_FORMAT_DELAY: + ERRINJ_XLOG_GARBAGE: + state: false + ERRINJ_VY_READ_PAGE_DELAY: + state: false + ERRINJ_SWIM_FD_ONLY: + state: false + ERRINJ_WAL_WRITE: + state: false + ERRINJ_HTTPC_EXECUTE: + state: false + ERRINJ_SQL_NAME_NORMALIZATION: + state: false + ERRINJ_WAL_WRITE_PARTIAL: + state: -1 + ERRINJ_VY_GC: + state: false + ERRINJ_WAL_DELAY: + state: false + ERRINJ_XLOG_READ: + state: -1 + ERRINJ_WAL_SYNC: + state: false + ERRINJ_VY_TASK_COMPLETE: state: false + ERRINJ_PORT_DUMP: + state: false + ERRINJ_COIO_SENDFILE_CHUNK: + state: -1 + ERRINJ_DYN_MODULE_COUNT: + state: 0 + ERRINJ_SIO_READ_MAX: + state: -1 + ERRINJ_FIBER_MPROTECT: + state: -1 + ERRINJ_FIBER_MADVISE: + state: false + ERRINJ_RELAY_TIMEOUT: + state: 0 + ERRINJ_VY_DUMP_DELAY: + state: false + ERRINJ_VY_SQUASH_TIMEOUT: + state: 0 ERRINJ_VY_LOG_FLUSH_DELAY: state: false - ERRINJ_RELAY_FINAL_JOIN: + ERRINJ_RELAY_SEND_DELAY: state: false - ERRINJ_REPLICA_JOIN_DELAY: + ERRINJ_VY_COMPACTION_DELAY: state: false - ERRINJ_RELAY_FINAL_SLEEP: + ERRINJ_VY_LOG_FILE_RENAME: state: false ERRINJ_VY_RUN_DISCARD: state: false ERRINJ_WAL_ROTATE: state: false - ERRINJ_RELAY_EXIT_DELAY: + ERRINJ_VY_READ_PAGE_TIMEOUT: state: 0 - ERRINJ_VY_POINT_ITER_WAIT: + ERRINJ_VY_INDEX_DUMP: + state: -1 + ERRINJ_TUPLE_FIELD: state: false - ERRINJ_MEMTX_DELAY_GC: + ERRINJ_SNAP_WRITE_DELAY: state: false ERRINJ_IPROTO_TX_DELAY: state: false - ERRINJ_XLOG_READ: - state: -1 - ERRINJ_TUPLE_FIELD: + ERRINJ_RELAY_EXIT_DELAY: + state: 0 + ERRINJ_RELAY_FINAL_SLEEP: state: false - ERRINJ_XLOG_GARBAGE: + ERRINJ_WAL_WRITE_DISK: state: false - ERRINJ_VY_INDEX_DUMP: - state: -1 - ERRINJ_VY_READ_PAGE_DELAY: + ERRINJ_CHECK_FORMAT_DELAY: state: false ERRINJ_TESTING: state: false - ERRINJ_RELAY_SEND_DELAY: + ERRINJ_VY_RUN_WRITE_DELAY: state: false - ERRINJ_VY_SQUASH_TIMEOUT: + ERRINJ_WAL_FALLOCATE: state: 0 ERRINJ_VY_LOG_FLUSH: state: false - ERRINJ_RELAY_TIMEOUT: - state: 0 + ERRINJ_INDEX_ALLOC: + state: false ... errinj.set("some-injection", true) --- diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt index 4a57597e9..c037ac539 100644 --- a/test/unit/CMakeLists.txt +++ b/test/unit/CMakeLists.txt @@ -74,6 +74,10 @@ add_executable(fiber.test fiber.cc) set_source_files_properties(fiber.cc PROPERTIES COMPILE_FLAGS -O0) target_link_libraries(fiber.test core unit) +add_executable(fiber_stack.test fiber_stack.c) +set_source_files_properties(fiber_stack.c PROPERTIES COMPILE_FLAGS -O0) +target_link_libraries(fiber_stack.test core unit) + if (NOT ENABLE_GCOV) # This test is known to be broken with GCOV add_executable(guard.test guard.cc) diff --git a/test/unit/fiber_stack.c b/test/unit/fiber_stack.c new file mode 100644 index 000000000..103dfaf0d --- /dev/null +++ b/test/unit/fiber_stack.c @@ -0,0 +1,79 @@ +#include "memory.h" +#include "fiber.h" +#include "unit.h" +#include "trivia/util.h" +#include "errinj.h" + +static struct fiber_attr default_attr; + +static int +noop_f(va_list ap) +{ + return 0; +} + +static int +main_f(va_list ap) +{ + struct errinj *inj; + struct fiber *fiber; + + header(); + plan(4); + + /* + * Set non-default stack size to prevent reusing of an + * existing fiber. + */ + struct fiber_attr *fiber_attr = fiber_attr_new(); + fiber_attr_setstacksize(fiber_attr, default_attr.stack_size * 2); + + /* + * Clear the fiber's diagnostics area to check that failed + * fiber_new() sets an error. + */ + diag_clear(diag_get()); + + /* + * Check guard page setup via mprotect. We can't test the fiber + * destroy path since it clears fiber's diag. + */ + inj = errinj(ERRINJ_FIBER_MPROTECT, ERRINJ_INT); + inj->iparam = PROT_NONE; + fiber = fiber_new_ex("test_mprotect", fiber_attr, noop_f); + inj->iparam = -1; + + ok(fiber == NULL, "mprotect: failed to setup fiber guard page"); + ok(diag_get() != NULL, "mprotect: diag is armed after error"); + + /* + * Check madvise. We can't test the fiber destroy + * path since it is cleaning error. + */ + diag_clear(diag_get()); + inj = errinj(ERRINJ_FIBER_MADVISE, ERRINJ_BOOL); + inj->bparam = true; + fiber = fiber_new_ex("test_madvise", fiber_attr, noop_f); + inj->bparam = false; + + ok(fiber != NULL, "madvise: non critical error on madvise hint"); + ok(diag_get() != NULL, "madvise: diag is armed after error"); + + footer(); + + ev_break(loop(), EVBREAK_ALL); + return check_plan(); +} + +int main() +{ + memory_init(); + fiber_init(fiber_c_invoke); + fiber_attr_create(&default_attr); + struct fiber *f = fiber_new("main", main_f); + fiber_wakeup(f); + ev_run(loop(), 0); + fiber_free(); + memory_free(); + return 0; +} diff --git a/test/unit/fiber_stack.result b/test/unit/fiber_stack.result new file mode 100644 index 000000000..43ff74b2f --- /dev/null +++ b/test/unit/fiber_stack.result @@ -0,0 +1,8 @@ +SystemError fiber mprotect failed: Cannot allocate memory + *** main_f *** +1..4 +ok 1 - mprotect: failed to setup fiber guard page +ok 2 - mprotect: diag is armed after error +ok 3 - madvise: non critical error on madvise hint +ok 4 - madvise: diag is armed after error + *** main_f: done *** diff --git a/test/unit/suite.ini b/test/unit/suite.ini index 89c8499fc..d429c95e9 100644 --- a/test/unit/suite.ini +++ b/test/unit/suite.ini @@ -1,5 +1,5 @@ [default] core = unittest description = unit tests -release_disabled = swim_errinj.test +release_disabled = fiber_stack.test swim_errinj.test is_parallel = True -- 2.20.1