From: Leonid Vasiliev <lvasiliev@tarantool.org> To: v.shpilevoy@tarantool.org, gorcunov@gmail.com, sergepetrenko@tarantool.org Cc: tarantool-patches@dev.tarantool.org Subject: [Tarantool-patches] [PATCH] replication: add support of qsync to the snapshot machinery Date: Wed, 10 Jun 2020 17:34:16 +0300 [thread overview] Message-ID: <c12d6cdef83360ab3dfea08d9928a39941b54696.1591799464.git.lvasiliev@tarantool.org> (raw) To support qsync replication, the waiting for confirmation of current "sync" transactions during a timeout has been added to the snapshot machinery. In the case of rollback or the timeout expiration, the snapshot will be cancelled. Closes #4850 --- Dirty version without tests. https://github.com/tarantool/tarantool/issues/4850 a part of https://github.com/tarantool/tarantool/tree/gh-4842-sync-replication src/box/gc.c | 92 +++++++++++++++++++++++++++++++++++++++++++++++++++++ src/box/txn_limbo.h | 19 +++++++++++ 2 files changed, 111 insertions(+) diff --git a/src/box/gc.c b/src/box/gc.c index 8e8ffea..fb7b37a 100644 --- a/src/box/gc.c +++ b/src/box/gc.c @@ -57,6 +57,9 @@ #include "engine.h" /* engine_collect_garbage() */ #include "wal.h" /* wal_collect_garbage() */ #include "checkpoint_schedule.h" +#include "trigger.h" +#include "txn.h" +#include "txn_limbo.h" struct gc_state gc; @@ -65,6 +68,26 @@ gc_cleanup_fiber_f(va_list); static int gc_checkpoint_fiber_f(va_list); +//TODO: quorum timeout should be used instead +double snap_confirm_timeout = 5.0; /* seconds */ + +/** + * Waitpoint stores information about the progress of confirmation. + * In the case of multimaster support, it will store a bitset + * or array instead of the boolean. + */ +struct confirm_waitpoint { + /** + * Variable for wake up the fiber that is waiting for + * the end of confirmation. + */ + struct fiber_cond confirm_cond; + /** + * Result flag. + */ + bool is_confirm; +}; + /** * Comparator used for ordering gc_consumer objects * lexicographically by their vclock in a binary tree. @@ -377,6 +400,64 @@ gc_add_checkpoint(const struct vclock *vclock) } static int +gc_txn_commit_cb(struct trigger *trigger, void *event) +{ + (void)event; + struct confirm_waitpoint *cwp = + (struct confirm_waitpoint *)trigger->data; + cwp->is_confirm = true; + fiber_cond_signal(&cwp->confirm_cond); + return 0; +} + +static int +gc_txn_rollback_cb(struct trigger *trigger, void *event) +{ + (void)event; + struct confirm_waitpoint *cwp = + (struct confirm_waitpoint *)trigger->data; + fiber_cond_signal(&cwp->confirm_cond); + return 0; +} + +/** + * Waiting for confirmation of all "sync" transactions + * during snap_confirm_timeout or fail. + */ +static int +gc_wait_confirm(void) +{ + /* initialization of a waitpoint. */ + struct confirm_waitpoint cwp; + fiber_cond_create(&cwp.confirm_cond); + cwp.is_confirm = false; + + /* Set triggers for the last limbo transaction. */ + struct trigger on_complete; + trigger_create(&on_complete, gc_txn_commit_cb, &cwp, NULL); + struct trigger on_rollback; + trigger_create(&on_rollback, gc_txn_rollback_cb, &cwp, NULL); + struct txn_limbo_entry *tle = txn_limbo_last_entry(&txn_limbo); + txn_on_commit(tle->txn, &on_complete); + txn_on_rollback(tle->txn, &on_rollback); + + int rc = fiber_cond_wait_timeout(&cwp.confirm_cond, + snap_confirm_timeout); + fiber_cond_destroy(&cwp.confirm_cond); + if (rc != 0) { + /* Clear the triggers if the timeout has been reached. */ + trigger_clear(&on_complete); + trigger_clear(&on_rollback); + return -1; + } + if (!cwp.is_confirm) { + /* The transaction has been rollbacked. */ + return -1; + } + return 0; +} + +static int gc_do_checkpoint(bool is_scheduled) { int rc; @@ -395,6 +476,17 @@ gc_do_checkpoint(bool is_scheduled) rc = wal_begin_checkpoint(&checkpoint); if (rc != 0) goto out; + + /* + * Wait the confirms on all "sync" transactions before + * create a snapshot. + */ + if (!txn_limbo_is_empty(&txn_limbo)) { + rc = gc_wait_confirm(); + if (rc != 0) + goto out; + } + rc = engine_commit_checkpoint(&checkpoint.vclock); if (rc != 0) goto out; diff --git a/src/box/txn_limbo.h b/src/box/txn_limbo.h index de415cd..c0b821a 100644 --- a/src/box/txn_limbo.h +++ b/src/box/txn_limbo.h @@ -166,6 +166,25 @@ txn_limbo_wait_complete(struct txn_limbo *limbo, struct txn_limbo_entry *entry); void txn_limbo_read_confirm(struct txn_limbo *limbo, int64_t lsn); +/** + * Return TRUE if limbo is empty. + */ +static inline bool +txn_limbo_is_empty(struct txn_limbo *limbo) +{ + return rlist_empty(&limbo->queue); +} + +/** + * Return a pointer to the last txn_limbo_entry of limbo. + */ +static inline struct txn_limbo_entry * +txn_limbo_last_entry(struct txn_limbo *limbo) +{ + return rlist_last_entry(&limbo->queue, struct txn_limbo_entry, + in_queue); +} + void txn_limbo_init(); -- 2.7.4
next reply other threads:[~2020-06-10 14:34 UTC|newest] Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top 2020-06-10 14:34 Leonid Vasiliev [this message] 2020-06-11 9:02 ` Serge Petrenko 2020-06-15 22:28 ` Vladislav Shpilevoy 2020-06-15 23:05 ` Vladislav Shpilevoy 2020-06-18 11:43 ` Leonid Vasiliev
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=c12d6cdef83360ab3dfea08d9928a39941b54696.1591799464.git.lvasiliev@tarantool.org \ --to=lvasiliev@tarantool.org \ --cc=gorcunov@gmail.com \ --cc=sergepetrenko@tarantool.org \ --cc=tarantool-patches@dev.tarantool.org \ --cc=v.shpilevoy@tarantool.org \ --subject='Re: [Tarantool-patches] [PATCH] replication: add support of qsync to the snapshot machinery' \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox