[Tarantool-patches] [PATCH] replication: cancel replica joining thread at exit
Vladislav Shpilevoy
v.shpilevoy at tarantool.org
Wed Oct 23 23:55:05 MSK 2019
LGTM.
On 23/10/2019 14:59, Ilya Kosarev wrote:
> If a tarantool instance exits while joining replica is in progress,
> the replica joining thread can access already freed data resulting
> in a crash. Let's fix this the same way we did for checkpoint thread
> - simply cancel the thread forcefully and wait for it to terminate.
>
> Closes #4528
> ---
> https://github.com/tarantool/tarantool/tree/i.kosarev/gh-4528-fix-shutdown-on-replica-join
> https://github.com/tarantool/tarantool/issues/4528
>
> src/box/memtx_engine.c | 25 ++++++++++++++++++++++++-
> src/box/memtx_engine.h | 5 +++++
> 2 files changed, 29 insertions(+), 1 deletion(-)
>
> diff --git a/src/box/memtx_engine.c b/src/box/memtx_engine.c
> index ecce3b1b6..23ccc4703 100644
> --- a/src/box/memtx_engine.c
> +++ b/src/box/memtx_engine.c
> @@ -55,6 +55,9 @@
> static void
> checkpoint_cancel(struct checkpoint *ckpt);
>
> +static void
> +replica_join_cancel(struct cord *replica_join_cord);
> +
> struct PACKED memtx_tuple {
> /*
> * sic: the header of the tuple is used
> @@ -129,6 +132,8 @@ memtx_engine_shutdown(struct engine *engine)
> struct memtx_engine *memtx = (struct memtx_engine *)engine;
> if (memtx->checkpoint != NULL)
> checkpoint_cancel(memtx->checkpoint);
> + if (memtx->replica_join_cord != NULL)
> + replica_join_cancel(memtx->replica_join_cord);
> mempool_destroy(&memtx->iterator_pool);
> if (mempool_is_initialized(&memtx->rtree_iterator_pool))
> mempool_destroy(&memtx->rtree_iterator_pool);
> @@ -527,6 +532,18 @@ checkpoint_cancel(struct checkpoint *ckpt)
> checkpoint_delete(ckpt);
> }
>
> +static void
> +replica_join_cancel(struct cord *replica_join_cord)
> +{
> + /*
> + * Cancel the thread being used to join replica if it's
> + * running and wait for it to terminate so as to
> + * eliminate the possibility of use-after-free.
> + */
> + tt_pthread_cancel(replica_join_cord->id);
> + tt_pthread_join(replica_join_cord->id, NULL);
> +}
> +
> static int
> checkpoint_add_space(struct space *sp, void *data)
> {
> @@ -848,7 +865,11 @@ memtx_engine_join(struct engine *engine, void *arg, struct xstream *stream)
> struct cord cord;
> if (cord_costart(&cord, "initial_join", memtx_join_f, ctx) != 0)
> return -1;
> - return cord_cojoin(&cord);
> + struct memtx_engine *memtx = (struct memtx_engine *)engine;
> + memtx->replica_join_cord = &cord;
> + int res = cord_cojoin(&cord);
> + memtx->replica_join_cord = NULL;
> + return res;
> }
>
> static void
> @@ -1030,6 +1051,8 @@ memtx_engine_new(const char *snap_dirname, bool force_recovery,
> memtx->max_tuple_size = MAX_TUPLE_SIZE;
> memtx->force_recovery = force_recovery;
>
> + memtx->replica_join_cord = NULL;
> +
> memtx->base.vtab = &memtx_engine_vtab;
> memtx->base.name = "memtx";
>
> diff --git a/src/box/memtx_engine.h b/src/box/memtx_engine.h
> index c092f5d8e..f562c66df 100644
> --- a/src/box/memtx_engine.h
> +++ b/src/box/memtx_engine.h
> @@ -107,6 +107,11 @@ struct memtx_engine {
> uint64_t snap_io_rate_limit;
> /** Skip invalid snapshot records if this flag is set. */
> bool force_recovery;
> + /**
> + * Cord being currently used to join replica. It is only
> + * needed to be able to cancel it on shutdown.
> + */
> + struct cord *replica_join_cord;
> /** Common quota for tuples and indexes. */
> struct quota quota;
> /**
>
More information about the Tarantool-patches
mailing list