From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from smtp36.i.mail.ru (smtp36.i.mail.ru [94.100.177.96]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dev.tarantool.org (Postfix) with ESMTPS id 3C8DF42EF62 for ; Tue, 30 Jun 2020 02:15:43 +0300 (MSK) From: Vladislav Shpilevoy Date: Tue, 30 Jun 2020 01:15:19 +0200 Message-Id: <8a49694d7373cf8976924aef2ca498bc7c6451d5.1593472477.git.v.shpilevoy@tarantool.org> In-Reply-To: References: MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Subject: [Tarantool-patches] [PATCH v2 18/19] replication: only send confirmed data during final join List-Id: Tarantool development patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: tarantool-patches@dev.tarantool.org, sergepetrenko@tarantool.org From: Serge Petrenko Final join (or register) stage is needed to deliver the replica its _cluster registration. Since this stage is followed by a snapshot on replica, the data received during this stage must be confirmed. Make master check that there are no rollbacks for the data to be sent during final join and that all the data is confirmed before final join starts. Closes #5097 --- src/box/box.cc | 33 +++++++++++++++++++++++++++++++++ src/box/txn_limbo.c | 5 +++++ src/box/txn_limbo.h | 35 +++++++++++++++++++++++++++++++++++ 3 files changed, 73 insertions(+) diff --git a/src/box/box.cc b/src/box/box.cc index ad76f4f00..37bf4ab3b 100644 --- a/src/box/box.cc +++ b/src/box/box.cc @@ -1711,6 +1711,11 @@ box_process_register(struct ev_io *io, struct xrow_header *header) say_info("registering replica %s at %s", tt_uuid_str(&instance_uuid), sio_socketname(io->fd)); + /* See box_process_join() */ + txn_limbo_start_recording(&txn_limbo); + auto limbo_guard = make_scoped_guard([&] { + txn_limbo_stop_recording(&txn_limbo); + }); struct vclock start_vclock; vclock_copy(&start_vclock, &replicaset.vclock); @@ -1726,6 +1731,14 @@ box_process_register(struct ev_io *io, struct xrow_header *header) struct vclock stop_vclock; vclock_copy(&stop_vclock, &replicaset.vclock); + if (txn_limbo_got_rollback(&txn_limbo)) + tnt_raise(ClientError, ER_SYNC_ROLLBACK); + txn_limbo_stop_recording(&txn_limbo); + limbo_guard.is_active = false; + + if (txn_limbo_wait_confirm(&txn_limbo) != 0) + diag_raise(); + /* * Feed replica with WALs in range * (start_vclock, stop_vclock) so that it gets its @@ -1847,6 +1860,18 @@ box_process_join(struct ev_io *io, struct xrow_header *header) say_info("joining replica %s at %s", tt_uuid_str(&instance_uuid), sio_socketname(io->fd)); + /* + * In order to join a replica, master has to make sure it + * doesn't send unconfirmed data. We have to check that + * there are no rolled back transactions between + * start_vclock and stop_vclock, and that the data right + * before stop_vclock is confirmed, before we can proceed + * to final join. + */ + txn_limbo_start_recording(&txn_limbo); + auto limbo_guard = make_scoped_guard([&] { + txn_limbo_stop_recording(&txn_limbo); + }); /* * Initial stream: feed replica with dirty data from engines. */ @@ -1868,6 +1893,14 @@ box_process_join(struct ev_io *io, struct xrow_header *header) struct vclock stop_vclock; vclock_copy(&stop_vclock, &replicaset.vclock); + if (txn_limbo_got_rollback(&txn_limbo)) + tnt_raise(ClientError, ER_SYNC_ROLLBACK); + txn_limbo_stop_recording(&txn_limbo); + limbo_guard.is_active = false; + + if (txn_limbo_wait_confirm(&txn_limbo) != 0) + diag_raise(); + /* Send end of initial stage data marker */ struct xrow_header row; xrow_encode_vclock_xc(&row, &stop_vclock); diff --git a/src/box/txn_limbo.c b/src/box/txn_limbo.c index abea26731..fbe4dcecf 100644 --- a/src/box/txn_limbo.c +++ b/src/box/txn_limbo.c @@ -40,6 +40,8 @@ txn_limbo_create(struct txn_limbo *limbo) rlist_create(&limbo->queue); limbo->instance_id = REPLICA_ID_NIL; vclock_create(&limbo->vclock); + limbo->is_recording = false; + limbo->got_rollback = false; } struct txn_limbo_entry * @@ -90,8 +92,11 @@ txn_limbo_pop(struct txn_limbo *limbo, struct txn_limbo_entry *entry) assert(!rlist_empty(&entry->in_queue)); assert(rlist_last_entry(&limbo->queue, struct txn_limbo_entry, in_queue) == entry); + assert(entry->is_rollback); (void) limbo; rlist_del_entry(entry, in_queue); + if (limbo->is_recording) + limbo->got_rollback = true; } void diff --git a/src/box/txn_limbo.h b/src/box/txn_limbo.h index 138093c7c..a9fc83b0c 100644 --- a/src/box/txn_limbo.h +++ b/src/box/txn_limbo.h @@ -117,6 +117,13 @@ struct txn_limbo { * transactions, created on the limbo's owner node. */ struct vclock vclock; + /** Set to true when limbo records rollback occurrence. */ + bool is_recording; + /** + * Whether any rollbacks happened during the recording + * period. + */ + bool got_rollback; }; /** @@ -126,6 +133,34 @@ struct txn_limbo { */ extern struct txn_limbo txn_limbo; +/** + * Make limbo remember the occurrence of rollbacks due to failed + * quorum collection. + */ +static inline void +txn_limbo_start_recording(struct txn_limbo *limbo) +{ + limbo->is_recording = true; +} + +/** Stop the recording of failed quorum collection events. */ +static inline void +txn_limbo_stop_recording(struct txn_limbo *limbo) +{ + limbo->is_recording = false; + limbo->got_rollback = false; +} + +/** + * Returns true in case the limbo rolled back any tx since the + * moment txn_limbo_start_recording() was called. + */ +static inline bool +txn_limbo_got_rollback(struct txn_limbo *limbo) +{ + return limbo->got_rollback; +} + /** * Allocate, create, and append a new transaction to the limbo. * The limbo entry is allocated on the transaction's region. -- 2.21.1 (Apple Git-122.3)