Tarantool development patches archive
 help / color / mirror / Atom feed
From: Georgy Kirichenko <georgy@tarantool.org>
To: tarantool-patches@freelists.org
Cc: Georgy Kirichenko <georgy@tarantool.org>
Subject: [tarantool-patches] [PATCH v2 2/2] Track wal vclock changes instead of copying
Date: Wed, 13 Feb 2019 11:35:17 +0300	[thread overview]
Message-ID: <9b0d551519dd24f3c456fc28e512e26e953275fc.1550046797.git.georgy@tarantool.org> (raw)
In-Reply-To: <cover.1550046797.git.georgy@tarantool.org>

Spare some vclock_copy invocations because they could be expensive.

Follow-up #2283
---
 src/box/vclock.h | 14 ++++++++++++++
 src/box/wal.c    | 44 ++++++++++++++++++++++++++++++--------------
 2 files changed, 44 insertions(+), 14 deletions(-)

diff --git a/src/box/vclock.h b/src/box/vclock.h
index a59b2bddb..0c9996902 100644
--- a/src/box/vclock.h
+++ b/src/box/vclock.h
@@ -227,6 +227,20 @@ vclock_sum(const struct vclock *vclock)
 int64_t
 vclock_follow(struct vclock *vclock, uint32_t replica_id, int64_t lsn);
 
+/**
+ * Merge all diff changes into the destination
+ * vclock and after reset the diff.
+ */
+static inline void
+vclock_merge(struct vclock *dst, struct vclock *diff)
+{
+	struct vclock_iterator it;
+	vclock_iterator_init(&it, diff);
+	vclock_foreach(&it, item)
+		vclock_follow(dst, item.id, vclock_get(dst, item.id) + item.lsn);
+	vclock_create(diff);
+}
+
 /**
  * \brief Format vclock to YAML-compatible string representation:
  * { replica_id: lsn, replica_id:lsn })
diff --git a/src/box/wal.c b/src/box/wal.c
index 0b49548c0..b2652bb17 100644
--- a/src/box/wal.c
+++ b/src/box/wal.c
@@ -886,17 +886,25 @@ wal_writer_begin_rollback(struct wal_writer *writer)
 	cpipe_push(&writer->tx_prio_pipe, &writer->in_rollback);
 }
 
+/*
+ * Assign lsn and replica identifier for local writes and track
+ * row into vclock_diff.
+ */
 static void
-wal_assign_lsn(struct vclock *vclock, struct xrow_header **row,
+wal_assign_lsn(struct vclock *vclock_diff, struct vclock *base,
+	       struct xrow_header **row,
 	       struct xrow_header **end)
 {
 	/** Assign LSN to all local rows. */
 	for ( ; row < end; row++) {
 		if ((*row)->replica_id == 0) {
-			(*row)->lsn = vclock_inc(vclock, instance_id);
+			(*row)->lsn = vclock_inc(vclock_diff, instance_id) +
+				      vclock_get(base, instance_id);
 			(*row)->replica_id = instance_id;
 		} else {
-			vclock_follow_xrow(vclock, *row);
+			vclock_follow(vclock_diff, (*row)->replica_id,
+				      (*row)->lsn - vclock_get(base,
+							       (*row)->replica_id));
 		}
 	}
 }
@@ -909,13 +917,12 @@ wal_write_to_disk(struct cmsg *msg)
 	struct error *error;
 
 	/*
-	 * In order not to promote writer's vclock in case of an error,
-	 * we create a copy to assign LSNs before rows are actually
-	 * written. After successful xlog flush we update writer's vclock
-	 * to the last written vclock value.
+	 * Track all vclock changes made by this batch into
+	 * vclock_diff variable and then apply it into writers'
+	 * vclock after each xlog flush.
 	 */
-	struct vclock vclock;
-	vclock_copy(&vclock, &writer->vclock);
+	struct vclock vclock_diff;
+	vclock_create(&vclock_diff);
 
 	struct errinj *inj = errinj(ERRINJ_WAL_DELAY, ERRINJ_BOOL);
 	while (inj != NULL && inj->bparam)
@@ -924,18 +931,21 @@ wal_write_to_disk(struct cmsg *msg)
 	if (writer->in_rollback.route != NULL) {
 		/* We're rolling back a failed write. */
 		stailq_concat(&wal_msg->rollback, &wal_msg->commit);
+		vclock_copy(&wal_msg->vclock, &writer->vclock);
 		return;
 	}
 
 	/* Xlog is only rotated between queue processing  */
 	if (wal_opt_rotate(writer) != 0) {
 		stailq_concat(&wal_msg->rollback, &wal_msg->commit);
+		vclock_copy(&wal_msg->vclock, &writer->vclock);
 		return wal_writer_begin_rollback(writer);
 	}
 
 	/* Ensure there's enough disk space before writing anything. */
 	if (wal_fallocate(writer, wal_msg->approx_len) != 0) {
 		stailq_concat(&wal_msg->rollback, &wal_msg->commit);
+		vclock_copy(&wal_msg->vclock, &writer->vclock);
 		return wal_writer_begin_rollback(writer);
 	}
 
@@ -969,15 +979,17 @@ wal_write_to_disk(struct cmsg *msg)
 	struct journal_entry *entry;
 	struct stailq_entry *last_committed = NULL;
 	stailq_foreach_entry(entry, &wal_msg->commit, fifo) {
-		wal_assign_lsn(&vclock, entry->rows, entry->rows + entry->n_rows);
-		entry->res = vclock_sum(&vclock);
+		wal_assign_lsn(&vclock_diff, &writer->vclock,
+			       entry->rows, entry->rows + entry->n_rows);
+		entry->res = vclock_sum(&vclock_diff) +
+			     vclock_sum(&writer->vclock);
 		rc = xlog_write_entry(l, entry);
 		if (rc < 0)
 			goto done;
 		if (rc > 0) {
 			writer->checkpoint_wal_size += rc;
 			last_committed = &entry->fifo;
-			vclock_copy(&writer->vclock, &vclock);
+			vclock_merge(&writer->vclock, &vclock_diff);
 		}
 		/* rc == 0: the write is buffered in xlog_tx */
 	}
@@ -987,7 +999,7 @@ wal_write_to_disk(struct cmsg *msg)
 
 	writer->checkpoint_wal_size += rc;
 	last_committed = stailq_last(&wal_msg->commit);
-	vclock_copy(&writer->vclock, &vclock);
+	vclock_merge(&writer->vclock, &vclock_diff);
 
 	/*
 	 * Notify TX if the checkpoint threshold has been exceeded.
@@ -1162,7 +1174,11 @@ wal_write_in_wal_mode_none(struct journal *journal,
 			   struct journal_entry *entry)
 {
 	struct wal_writer *writer = (struct wal_writer *) journal;
-	wal_assign_lsn(&writer->vclock, entry->rows, entry->rows + entry->n_rows);
+	struct vclock vclock_diff;
+	vclock_create(&vclock_diff);
+	wal_assign_lsn(&vclock_diff, &writer->vclock, entry->rows,
+		       entry->rows + entry->n_rows);
+	vclock_merge(&writer->vclock, &vclock_diff);
 	vclock_copy(&replicaset.vclock, &writer->vclock);
 	return vclock_sum(&writer->vclock);
 }
-- 
2.20.1

  parent reply	other threads:[~2019-02-13  8:33 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-02-13  8:35 [tarantool-patches] [PATCH v2 0/2] Reduce wal vclock handling complecity Georgy Kirichenko
2019-02-13  8:35 ` [tarantool-patches] [PATCH v2 1/2] Lightweight vclock_create and vclock_copy Georgy Kirichenko
2019-02-14 10:26   ` [tarantool-patches] " Konstantin Osipov
2019-02-13  8:35 ` Georgy Kirichenko [this message]
2019-02-14 10:31   ` [tarantool-patches] Re: [PATCH v2 2/2] Track wal vclock changes instead of copying Konstantin Osipov
2019-02-14 14:04   ` [tarantool-patches] " Vladimir Davydov
2019-02-14 14:42 ` [tarantool-patches] [PATCH v2 0/2] Reduce wal vclock handling complecity Vladimir Davydov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=9b0d551519dd24f3c456fc28e512e26e953275fc.1550046797.git.georgy@tarantool.org \
    --to=georgy@tarantool.org \
    --cc=tarantool-patches@freelists.org \
    --subject='Re: [tarantool-patches] [PATCH v2 2/2] Track wal vclock changes instead of copying' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox