From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: From: Vladimir Davydov Subject: [PATCH] vinyl: cancel reader and writer threads on shutdown Date: Mon, 11 Feb 2019 20:37:39 +0300 Message-Id: <5448d74ca0079657d09b342c639013d5b00f7899.1549906085.git.vdavydov.dev@gmail.com> To: kostja@tarantool.org Cc: tarantool-patches@freelists.org List-ID: Currently, vinyl won't shutdown until all reader and writer threads gracefully complete all their pending requests, which may take a while, especially for writer threads that may happen to be doing compaction at the time. This is annoying - there's absolutely no reason to delay termination in such a case. Let's forcefully cancel all threads, like we do in case of relay threads. This should fix sporadic vinyl/replica_quota test hang. Closes #3949 --- https://github.com/tarantool/tarantool/issues/3949 https://github.com/tarantool/tarantool/commits/dv/gh-3949-vy-cancel-threads-on-shutdown src/box/vy_run.c | 7 ++----- src/box/vy_scheduler.c | 5 ++--- test/vinyl/errinj.result | 40 ++++++++++++++++++++++++++++++++++++++++ test/vinyl/errinj.test.lua | 20 ++++++++++++++++++++ 4 files changed, 64 insertions(+), 8 deletions(-) diff --git a/src/box/vy_run.c b/src/box/vy_run.c index cee90458..9aa3beec 100644 --- a/src/box/vy_run.c +++ b/src/box/vy_run.c @@ -152,11 +152,8 @@ vy_run_env_stop_readers(struct vy_run_env *env) { for (int i = 0; i < env->reader_pool_size; i++) { struct vy_run_reader *reader = &env->reader_pool[i]; - - cbus_stop_loop(&reader->reader_pipe); - cpipe_destroy(&reader->reader_pipe); - if (cord_join(&reader->cord) != 0) - panic("failed to join vinyl reader thread"); + tt_pthread_cancel(reader->cord.id); + tt_pthread_join(reader->cord.id, NULL); } free(env->reader_pool); } diff --git a/src/box/vy_scheduler.c b/src/box/vy_scheduler.c index 5ec6d171..d4047197 100644 --- a/src/box/vy_scheduler.c +++ b/src/box/vy_scheduler.c @@ -371,9 +371,8 @@ vy_worker_pool_stop(struct vy_worker_pool *pool) assert(pool->workers != NULL); for (int i = 0; i < pool->size; i++) { struct vy_worker *worker = &pool->workers[i]; - cbus_stop_loop(&worker->worker_pipe); - cpipe_destroy(&worker->worker_pipe); - cord_join(&worker->cord); + tt_pthread_cancel(worker->cord.id); + tt_pthread_join(worker->cord.id, NULL); } free(pool->workers); pool->workers = NULL; diff --git a/test/vinyl/errinj.result b/test/vinyl/errinj.result index 990c7e85..248b32c8 100644 --- a/test/vinyl/errinj.result +++ b/test/vinyl/errinj.result @@ -1126,3 +1126,43 @@ box.schema.user.revoke('guest', 'replication') s:drop() --- ... +-- +-- Check that tarantool stops immediately even if a vinyl worker +-- thread is blocked (see gh-3225). +-- +s = box.schema.space.create('test', {engine = 'vinyl'}) +--- +... +_ = s:create_index('pk') +--- +... +s:replace{1, 1} +--- +- [1, 1] +... +box.snapshot() +--- +- ok +... +errinj.set('ERRINJ_VY_READ_PAGE_TIMEOUT', 9000) +--- +- ok +... +_ = fiber.create(function() s:get(1) end) +--- +... +s:replace{1, 2} +--- +- [1, 2] +... +errinj.set('ERRINJ_VY_RUN_WRITE_STMT_TIMEOUT', 9000) +--- +- ok +... +_ = fiber.create(function() box.snapshot() end) +--- +... +test_run:cmd("restart server default") +box.space.test:drop() +--- +... diff --git a/test/vinyl/errinj.test.lua b/test/vinyl/errinj.test.lua index d374a910..eaec52a5 100644 --- a/test/vinyl/errinj.test.lua +++ b/test/vinyl/errinj.test.lua @@ -408,3 +408,23 @@ test_run:cmd("delete server replica") errinj.set('ERRINJ_VYRUN_INDEX_GARBAGE', false) box.schema.user.revoke('guest', 'replication') s:drop() + +-- +-- Check that tarantool stops immediately even if a vinyl worker +-- thread is blocked (see gh-3225). +-- +s = box.schema.space.create('test', {engine = 'vinyl'}) +_ = s:create_index('pk') +s:replace{1, 1} +box.snapshot() + +errinj.set('ERRINJ_VY_READ_PAGE_TIMEOUT', 9000) +_ = fiber.create(function() s:get(1) end) + +s:replace{1, 2} + +errinj.set('ERRINJ_VY_RUN_WRITE_STMT_TIMEOUT', 9000) +_ = fiber.create(function() box.snapshot() end) + +test_run:cmd("restart server default") +box.space.test:drop() -- 2.11.0