[Tarantool-patches] [PATCH 1/1] txn: remove TXN_IS_DONE check from txn_commit()

Serge Petrenko sergepetrenko at tarantool.org
Sun Jul 5 13:26:40 MSK 2020


04.07.2020 02:23, Vladislav Shpilevoy пишет:
> TXN_IS_DONE was used in txn_commit() to finalize the transaction
> in a case it is not finished yet. But this happens only not in
> so common cases - during bootstrap and recovery. During normal
> operation the transaction is always finished when WAL thread
> returns it to TX thread after a disk write.
>
> So this is a matter of journal, and should be solved here, not in
> txn code with some crutch, especially in such a hot path place.
>
> This commit makes so that after journal_write() the transaction is
> always already done, i.e. txn_complete_async() was called. Nothing
> changes for the normal operation mode except this is -1 'if'.
> ---
> Branch: http://github.com/tarantool/tarantool/tree/gerold103/txn_commit-opt
>
> I couldn't get any stable bench results on my machine. But seems
> like this solution probably gives something about 1-2% perf
> increase for pure 'insert' workload without anything else.
>
> Need someone else to try too. Sergey Os.?
>
>   src/box/box.cc    | 23 +++++++++++++++++++++--
>   src/box/journal.c | 45 +--------------------------------------------
>   src/box/journal.h | 11 -----------
>   src/box/txn.c     |  6 +-----
>   src/box/wal.c     |  2 +-
>   5 files changed, 24 insertions(+), 63 deletions(-)
>
> diff --git a/src/box/box.cc b/src/box/box.cc
> index 871b0d976..ca061dc4f 100644
> --- a/src/box/box.cc
> +++ b/src/box/box.cc
> @@ -323,6 +323,7 @@ recovery_journal_write(struct journal *base,
>   {
>   	struct recovery_journal *journal = (struct recovery_journal *) base;
>   	entry->res = vclock_sum(journal->vclock);
> +	journal_async_complete(base, entry);
>   	return 0;
>   }
>   
> @@ -330,8 +331,7 @@ static void
>   recovery_journal_create(struct vclock *v)
>   {
>   	static struct recovery_journal journal;
> -	journal_create(&journal.base, journal_no_write_async,
> -		       journal_no_write_async_cb,
> +	journal_create(&journal.base, NULL, txn_complete_async,
>   		       recovery_journal_write);
>   	journal.vclock = v;
>   	journal_set(&journal.base);
> @@ -2017,6 +2017,14 @@ engine_init()
>   	box_set_vinyl_timeout();
>   }
>   
> +static int
> +bootstrap_journal_write(struct journal *base, struct journal_entry *entry)
> +{
> +	entry->res = 0;
> +	journal_async_complete(base, entry);
> +	return 0;
> +}
> +
>   /**
>    * Initialize the first replica of a new replica set.
>    */
> @@ -2438,6 +2446,14 @@ box_cfg_xc(void)
>   		if (!cfg_geti("hot_standby") || checkpoint == NULL)
>   			tnt_raise(ClientError, ER_ALREADY_RUNNING, cfg_gets("wal_dir"));
>   	}
> +	struct journal bootstrap_journal;
> +	journal_create(&bootstrap_journal, NULL, txn_complete_async,
> +		       bootstrap_journal_write);
> +	journal_set(&bootstrap_journal);
> +	auto bootstrap_journal_guard = make_scoped_guard([] {
> +		journal_set(NULL);
> +	});
> +
>   	bool is_bootstrap_leader = false;
>   	if (checkpoint != NULL) {
>   		/* Recover the instance from the local directory */
> @@ -2450,6 +2466,9 @@ box_cfg_xc(void)
>   	}
>   	fiber_gc();
>   
> +	bootstrap_journal_guard.is_active = false;
> +	assert(current_journal != &bootstrap_journal);
> +
>   	/*
>   	 * Check for correct registration of the instance in _cluster
>   	 * The instance won't exist in _cluster space if it is an
> diff --git a/src/box/journal.c b/src/box/journal.c
> index d84f960d5..f1e89aaa2 100644
> --- a/src/box/journal.c
> +++ b/src/box/journal.c
> @@ -32,50 +32,7 @@
>   #include <small/region.h>
>   #include <diag.h>
>   
> -int
> -journal_no_write_async(struct journal *journal,
> -		       struct journal_entry *entry)
> -{
> -	(void)journal;
> -	assert(false);
> -
> -	errno = EINVAL;
> -	diag_set(SystemError, "write_async wrong context");
> -
> -	entry->res = -1;
> -	return -1;
> -}
> -
> -void
> -journal_no_write_async_cb(struct journal_entry *entry)
> -{
> -	assert(false);
> -
> -	errno = EINVAL;
> -	diag_set(SystemError, "write_async_cb wrong context");
> -
> -	entry->res = -1;
> -}
> -
> -/**
> - * Used to load from a memtx snapshot. LSN is not used,
> - * but txn_commit() must work.
> - */
> -static int
> -dummy_journal_write(struct journal *journal, struct journal_entry *entry)
> -{
> -	(void) journal;
> -	entry->res = 0;
> -	return 0;
> -}
> -
> -static struct journal dummy_journal = {
> -	.write_async	= journal_no_write_async,
> -	.write_async_cb	= journal_no_write_async_cb,
> -	.write		= dummy_journal_write,
> -};
> -
> -struct journal *current_journal = &dummy_journal;
> +struct journal *current_journal = NULL;
>   
>   struct journal_entry *
>   journal_entry_new(size_t n_rows, struct region *region,
> diff --git a/src/box/journal.h b/src/box/journal.h
> index ebc5cb708..1a10e66c3 100644
> --- a/src/box/journal.h
> +++ b/src/box/journal.h
> @@ -182,17 +182,6 @@ journal_create(struct journal *journal,
>   	journal->write		= write;
>   }
>   
> -/**
> - * A stub to issue an error in case if asynchronous
> - * write is diabled in the backend.
> - */
> -extern int
> -journal_no_write_async(struct journal *journal,
> -		       struct journal_entry *entry);
> -
> -extern void
> -journal_no_write_async_cb(struct journal_entry *entry);
> -
>   static inline bool
>   journal_is_initialized(struct journal *journal)
>   {
> diff --git a/src/box/txn.c b/src/box/txn.c
> index 123520166..e86a03447 100644
> --- a/src/box/txn.c
> +++ b/src/box/txn.c
> @@ -652,11 +652,7 @@ txn_commit(struct txn *txn)
>   		diag_log();
>   		return -1;
>   	}
> -
> -	if (!txn_has_flag(txn, TXN_IS_DONE)) {
> -		txn->signature = req->res;
> -		txn_complete(txn);
> -	}
> +	assert(txn_has_flag(txn, TXN_IS_DONE));
>   
>   	int res = txn->signature >= 0 ? 0 : -1;
>   	if (res != 0) {
> diff --git a/src/box/wal.c b/src/box/wal.c
> index 74cc74684..9ee1a4e7f 100644
> --- a/src/box/wal.c
> +++ b/src/box/wal.c
> @@ -1285,7 +1285,7 @@ wal_write_none_async(struct journal *journal,
>   	vclock_merge(&writer->vclock, &vclock_diff);
>   	vclock_copy(&replicaset.vclock, &writer->vclock);
>   	entry->res = vclock_sum(&writer->vclock);
> -
> +	journal_async_complete(journal, entry);
>   	return 0;
>   }
>   

Thanks for the patch! Looks good.

Speaking of benchmarks, making replaces in hash index with wal_mode = 'none'
on release, I somehow got a 5% decrease in performance. However, it's really
hard to measure the difference, because results aren't stable.

With debug build and tree index, wal_mode='none', there is no noticeable 
difference.

-- 
Serge Petrenko



More information about the Tarantool-patches mailing list