[Tarantool-patches] [PATCH 1/1] txn: remove TXN_IS_DONE check from txn_commit()
Serge Petrenko
sergepetrenko at tarantool.org
Sun Jul 5 13:26:40 MSK 2020
04.07.2020 02:23, Vladislav Shpilevoy пишет:
> TXN_IS_DONE was used in txn_commit() to finalize the transaction
> in a case it is not finished yet. But this happens only not in
> so common cases - during bootstrap and recovery. During normal
> operation the transaction is always finished when WAL thread
> returns it to TX thread after a disk write.
>
> So this is a matter of journal, and should be solved here, not in
> txn code with some crutch, especially in such a hot path place.
>
> This commit makes so that after journal_write() the transaction is
> always already done, i.e. txn_complete_async() was called. Nothing
> changes for the normal operation mode except this is -1 'if'.
> ---
> Branch: http://github.com/tarantool/tarantool/tree/gerold103/txn_commit-opt
>
> I couldn't get any stable bench results on my machine. But seems
> like this solution probably gives something about 1-2% perf
> increase for pure 'insert' workload without anything else.
>
> Need someone else to try too. Sergey Os.?
>
> src/box/box.cc | 23 +++++++++++++++++++++--
> src/box/journal.c | 45 +--------------------------------------------
> src/box/journal.h | 11 -----------
> src/box/txn.c | 6 +-----
> src/box/wal.c | 2 +-
> 5 files changed, 24 insertions(+), 63 deletions(-)
>
> diff --git a/src/box/box.cc b/src/box/box.cc
> index 871b0d976..ca061dc4f 100644
> --- a/src/box/box.cc
> +++ b/src/box/box.cc
> @@ -323,6 +323,7 @@ recovery_journal_write(struct journal *base,
> {
> struct recovery_journal *journal = (struct recovery_journal *) base;
> entry->res = vclock_sum(journal->vclock);
> + journal_async_complete(base, entry);
> return 0;
> }
>
> @@ -330,8 +331,7 @@ static void
> recovery_journal_create(struct vclock *v)
> {
> static struct recovery_journal journal;
> - journal_create(&journal.base, journal_no_write_async,
> - journal_no_write_async_cb,
> + journal_create(&journal.base, NULL, txn_complete_async,
> recovery_journal_write);
> journal.vclock = v;
> journal_set(&journal.base);
> @@ -2017,6 +2017,14 @@ engine_init()
> box_set_vinyl_timeout();
> }
>
> +static int
> +bootstrap_journal_write(struct journal *base, struct journal_entry *entry)
> +{
> + entry->res = 0;
> + journal_async_complete(base, entry);
> + return 0;
> +}
> +
> /**
> * Initialize the first replica of a new replica set.
> */
> @@ -2438,6 +2446,14 @@ box_cfg_xc(void)
> if (!cfg_geti("hot_standby") || checkpoint == NULL)
> tnt_raise(ClientError, ER_ALREADY_RUNNING, cfg_gets("wal_dir"));
> }
> + struct journal bootstrap_journal;
> + journal_create(&bootstrap_journal, NULL, txn_complete_async,
> + bootstrap_journal_write);
> + journal_set(&bootstrap_journal);
> + auto bootstrap_journal_guard = make_scoped_guard([] {
> + journal_set(NULL);
> + });
> +
> bool is_bootstrap_leader = false;
> if (checkpoint != NULL) {
> /* Recover the instance from the local directory */
> @@ -2450,6 +2466,9 @@ box_cfg_xc(void)
> }
> fiber_gc();
>
> + bootstrap_journal_guard.is_active = false;
> + assert(current_journal != &bootstrap_journal);
> +
> /*
> * Check for correct registration of the instance in _cluster
> * The instance won't exist in _cluster space if it is an
> diff --git a/src/box/journal.c b/src/box/journal.c
> index d84f960d5..f1e89aaa2 100644
> --- a/src/box/journal.c
> +++ b/src/box/journal.c
> @@ -32,50 +32,7 @@
> #include <small/region.h>
> #include <diag.h>
>
> -int
> -journal_no_write_async(struct journal *journal,
> - struct journal_entry *entry)
> -{
> - (void)journal;
> - assert(false);
> -
> - errno = EINVAL;
> - diag_set(SystemError, "write_async wrong context");
> -
> - entry->res = -1;
> - return -1;
> -}
> -
> -void
> -journal_no_write_async_cb(struct journal_entry *entry)
> -{
> - assert(false);
> -
> - errno = EINVAL;
> - diag_set(SystemError, "write_async_cb wrong context");
> -
> - entry->res = -1;
> -}
> -
> -/**
> - * Used to load from a memtx snapshot. LSN is not used,
> - * but txn_commit() must work.
> - */
> -static int
> -dummy_journal_write(struct journal *journal, struct journal_entry *entry)
> -{
> - (void) journal;
> - entry->res = 0;
> - return 0;
> -}
> -
> -static struct journal dummy_journal = {
> - .write_async = journal_no_write_async,
> - .write_async_cb = journal_no_write_async_cb,
> - .write = dummy_journal_write,
> -};
> -
> -struct journal *current_journal = &dummy_journal;
> +struct journal *current_journal = NULL;
>
> struct journal_entry *
> journal_entry_new(size_t n_rows, struct region *region,
> diff --git a/src/box/journal.h b/src/box/journal.h
> index ebc5cb708..1a10e66c3 100644
> --- a/src/box/journal.h
> +++ b/src/box/journal.h
> @@ -182,17 +182,6 @@ journal_create(struct journal *journal,
> journal->write = write;
> }
>
> -/**
> - * A stub to issue an error in case if asynchronous
> - * write is diabled in the backend.
> - */
> -extern int
> -journal_no_write_async(struct journal *journal,
> - struct journal_entry *entry);
> -
> -extern void
> -journal_no_write_async_cb(struct journal_entry *entry);
> -
> static inline bool
> journal_is_initialized(struct journal *journal)
> {
> diff --git a/src/box/txn.c b/src/box/txn.c
> index 123520166..e86a03447 100644
> --- a/src/box/txn.c
> +++ b/src/box/txn.c
> @@ -652,11 +652,7 @@ txn_commit(struct txn *txn)
> diag_log();
> return -1;
> }
> -
> - if (!txn_has_flag(txn, TXN_IS_DONE)) {
> - txn->signature = req->res;
> - txn_complete(txn);
> - }
> + assert(txn_has_flag(txn, TXN_IS_DONE));
>
> int res = txn->signature >= 0 ? 0 : -1;
> if (res != 0) {
> diff --git a/src/box/wal.c b/src/box/wal.c
> index 74cc74684..9ee1a4e7f 100644
> --- a/src/box/wal.c
> +++ b/src/box/wal.c
> @@ -1285,7 +1285,7 @@ wal_write_none_async(struct journal *journal,
> vclock_merge(&writer->vclock, &vclock_diff);
> vclock_copy(&replicaset.vclock, &writer->vclock);
> entry->res = vclock_sum(&writer->vclock);
> -
> + journal_async_complete(journal, entry);
> return 0;
> }
>
Thanks for the patch! Looks good.
Speaking of benchmarks, making replaces in hash index with wal_mode = 'none'
on release, I somehow got a 5% decrease in performance. However, it's really
hard to measure the difference, because results aren't stable.
With debug build and tree index, wal_mode='none', there is no noticeable
difference.
--
Serge Petrenko
More information about the Tarantool-patches
mailing list