Hi Ilya, The patch LGTM, thanks.           Alexander >Вторник, 15 октября 2019, 18:50 +03:00 от Ilya Kosarev : > >If a tarantool instance exits while joining replica is in progress, >the replica joining thread can access already freed data resulting >in a crash. Let's fix this the same way we did for checkpoint thread >- simply cancel the thread forcefully and wait for it to terminate. > >Closes #4528 >--- >https://github.com/tarantool/tarantool/tree/i.kosarev/gh-4528-fix-shutdown-on-replica-join >https://github.com/tarantool/tarantool/issues/4528 > > src/box/memtx_engine.c | 25 ++++++++++++++++++++++++- > src/box/memtx_engine.h | 2 ++ > 2 files changed, 26 insertions(+), 1 deletion(-) > >diff --git a/src/box/memtx_engine.c b/src/box/memtx_engine.c >index eb11346c1..325129a6e 100644 >--- a/src/box/memtx_engine.c >+++ b/src/box/memtx_engine.c >@@ -55,6 +55,9 @@ > static void > checkpoint_cancel(struct checkpoint *ckpt); >  >+static void >+replica_join_cancel(struct cord *replica_join_cord); >+ > struct PACKED memtx_tuple { >  /* >  * sic: the header of the tuple is used >@@ -129,6 +132,8 @@ memtx_engine_shutdown(struct engine *engine) >  struct memtx_engine *memtx = (struct memtx_engine *)engine; >  if (memtx->checkpoint != NULL) >  checkpoint_cancel(memtx->checkpoint); >+ if (memtx->replica_join_cord != NULL) >+ replica_join_cancel(memtx->replica_join_cord); >  mempool_destroy(&memtx->iterator_pool); >  if (mempool_is_initialized(&memtx->rtree_iterator_pool)) >  mempool_destroy(&memtx->rtree_iterator_pool); >@@ -528,6 +533,18 @@ checkpoint_cancel(struct checkpoint *ckpt) >  checkpoint_delete(ckpt); > } >  >+static void >+replica_join_cancel(struct cord *replica_join_cord) >+{ >+ /* >+ * Cancel the thread being used to join replica if it's >+ * running and wait for it to terminate so as to >+ * eliminate the possibility of use-after-free. >+ */ >+ tt_pthread_cancel(replica_join_cord->id); >+ tt_pthread_join(replica_join_cord->id, NULL); >+} >+ > static int > checkpoint_add_space(struct space *sp, void *data) > { >@@ -848,7 +865,11 @@ memtx_engine_join(struct engine *engine, void *arg, struct xstream *stream) >  struct cord cord; >  if (cord_costart(&cord, "initial_join", memtx_join_f, ctx) != 0) >  return -1; >- return cord_cojoin(&cord); >+ struct memtx_engine *memtx = (struct memtx_engine *)engine; >+ memtx->replica_join_cord = &cord; >+ int res = cord_cojoin(&cord); >+ memtx->replica_join_cord = NULL; >+ return res; > } >  > static void >@@ -1030,6 +1051,8 @@ memtx_engine_new(const char *snap_dirname, bool force_recovery, >  memtx->max_tuple_size = MAX_TUPLE_SIZE; >  memtx->force_recovery = force_recovery; >  >+ memtx->replica_join_cord = NULL; >+ >  memtx->base.vtab = &memtx_engine_vtab; >  memtx->base.name = "memtx"; >  >diff --git a/src/box/memtx_engine.h b/src/box/memtx_engine.h >index c092f5d8e..43e16879d 100644 >--- a/src/box/memtx_engine.h >+++ b/src/box/memtx_engine.h >@@ -107,6 +107,8 @@ struct memtx_engine { >  uint64_t snap_io_rate_limit; >  /** Skip invalid snapshot records if this flag is set. */ >  bool force_recovery; >+ /** cord being currently used to join replica **/ >+ struct cord *replica_join_cord; >  /** Common quota for tuples and indexes. */ >  struct quota quota; >  /** >-- >2.17.1 > > -- Alexander Tikhonov