Tarantool development patches archive
 help / color / mirror / Atom feed
* [tarantool-patches] [PATCH v2 0/2] Reduce wal vclock handling complecity
@ 2019-02-13  8:35 Georgy Kirichenko
  2019-02-13  8:35 ` [tarantool-patches] [PATCH v2 1/2] Lightweight vclock_create and vclock_copy Georgy Kirichenko
                   ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: Georgy Kirichenko @ 2019-02-13  8:35 UTC (permalink / raw)
  To: tarantool-patches; +Cc: Georgy Kirichenko

Track vclock changes during wal write instead of simple vclock_copy
and make vclock_copy and vclock_create cheaper.

Changes in v2:
 - Eliminate branching from vclock_get
 - Minor fixes

Branch: https://github.com/tarantool/tarantool/tree/g.kirichenko/gh-2283-follow-up

Georgy Kirichenko (2):
  Lightweight vclock_create and vclock_copy
  Track wal vclock changes instead of copying

 src/box/vclock.c    |  5 +++--
 src/box/vclock.h    | 51 ++++++++++++++++++++++++++++++++++++---------
 src/box/wal.c       | 44 +++++++++++++++++++++++++-------------
 src/box/xrow.h      |  4 ++--
 test/unit/vclock.cc |  2 +-
 5 files changed, 77 insertions(+), 29 deletions(-)

-- 
2.20.1

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [tarantool-patches] [PATCH v2 1/2] Lightweight vclock_create and vclock_copy
  2019-02-13  8:35 [tarantool-patches] [PATCH v2 0/2] Reduce wal vclock handling complecity Georgy Kirichenko
@ 2019-02-13  8:35 ` Georgy Kirichenko
  2019-02-14 10:26   ` [tarantool-patches] " Konstantin Osipov
  2019-02-13  8:35 ` [tarantool-patches] [PATCH v2 2/2] Track wal vclock changes instead of copying Georgy Kirichenko
  2019-02-14 14:42 ` [tarantool-patches] [PATCH v2 0/2] Reduce wal vclock handling complecity Vladimir Davydov
  2 siblings, 1 reply; 7+ messages in thread
From: Georgy Kirichenko @ 2019-02-13  8:35 UTC (permalink / raw)
  To: tarantool-patches; +Cc: Georgy Kirichenko

Modify only needed part of a vclock structure instead of full structure
writing.

Follow-up: #2283
---
 src/box/vclock.c    |  5 +++--
 src/box/vclock.h    | 37 +++++++++++++++++++++++++++----------
 src/box/xrow.h      |  4 ++--
 test/unit/vclock.cc |  2 +-
 4 files changed, 33 insertions(+), 15 deletions(-)

diff --git a/src/box/vclock.c b/src/box/vclock.c
index b5eb2800b..d4b2ba759 100644
--- a/src/box/vclock.c
+++ b/src/box/vclock.c
@@ -41,7 +41,7 @@ vclock_follow(struct vclock *vclock, uint32_t replica_id, int64_t lsn)
 {
 	assert(lsn >= 0);
 	assert(replica_id < VCLOCK_MAX);
-	int64_t prev_lsn = vclock->lsn[replica_id];
+	int64_t prev_lsn = vclock_get(vclock, replica_id);
 	assert(lsn > prev_lsn);
 	/* Easier add each time than check. */
 	vclock->map |= 1 << replica_id;
@@ -159,7 +159,8 @@ vclock_from_string(struct vclock *vclock, const char *str)
 			errno = 0;
 			lsn = strtoll(p, (char **)  &p, 10);
 			if (errno != 0 || lsn < 0 || lsn > INT64_MAX ||
-			    replica_id >= VCLOCK_MAX || vclock->lsn[replica_id] > 0)
+			    replica_id >= VCLOCK_MAX ||
+			    vclock_get(vclock, replica_id) > 0)
 				goto error;
 			vclock->map |= 1 << replica_id;
 			vclock->lsn[replica_id] = lsn;
diff --git a/src/box/vclock.h b/src/box/vclock.h
index 111e29160..a59b2bddb 100644
--- a/src/box/vclock.h
+++ b/src/box/vclock.h
@@ -48,7 +48,7 @@ extern "C" {
 
 enum {
 	/**
-	 * The maximum number of components in vclock
+	 * The maximum number of components in vclock, should be power of two.
 	 */
 	VCLOCK_MAX = 32,
 
@@ -129,7 +129,9 @@ vclock_iterator_next(struct vclock_iterator *it)
 static inline void
 vclock_create(struct vclock *vclock)
 {
-	memset(vclock, 0, sizeof(*vclock));
+	vclock->signature = 0;
+	vclock->map = 0;
+	vclock->lsn[0] = 0;
 }
 
 /**
@@ -139,8 +141,9 @@ vclock_create(struct vclock *vclock)
 static inline void
 vclock_clear(struct vclock *vclock)
 {
-	memset(vclock, 0, sizeof(*vclock));
 	vclock->signature = -1;
+	vclock->map = 0;
+	vclock->lsn[0] = 0;
 }
 
 /**
@@ -156,16 +159,24 @@ vclock_is_set(const struct vclock *vclock)
 static inline int64_t
 vclock_get(const struct vclock *vclock, uint32_t replica_id)
 {
-	if (replica_id >= VCLOCK_MAX)
-		return 0;
-	return vclock->lsn[replica_id];
+	/*
+	 * If replica_id does not fit in VCLOCK_MAX - 1 then
+	 * let result be undefined.
+	 */
+	replica_id &= VCLOCK_MAX - 1;
+	/* Evaluate a bitmask to avoid branching. */
+	int64_t mask = 0 - ((vclock->map >> replica_id) & 0x1);
+	return mask & vclock->lsn[replica_id];
 }
 
 static inline int64_t
 vclock_inc(struct vclock *vclock, uint32_t replica_id)
 {
 	/* Easier add each time than check. */
-	vclock->map |= 1 << replica_id;
+	if (((vclock->map >> replica_id) & 0x01) == 0) {
+		vclock->lsn[replica_id] = 0;
+		vclock->map |= 1 << replica_id;
+	}
 	vclock->signature++;
 	return ++vclock->lsn[replica_id];
 }
@@ -173,7 +184,13 @@ vclock_inc(struct vclock *vclock, uint32_t replica_id)
 static inline void
 vclock_copy(struct vclock *dst, const struct vclock *src)
 {
-	*dst = *src;
+	/*
+	 * Set the last bit of map because bit_clz_u32 returns
+	 * undefined result if zero passed.
+	 */
+	unsigned int max_pos = VCLOCK_MAX - bit_clz_u32(src->map | 0x01);
+	memcpy(dst, src, offsetof(struct vclock, lsn) +
+			 sizeof(*dst->lsn) * max_pos);
 }
 
 static inline uint32_t
@@ -253,8 +270,8 @@ vclock_compare(const struct vclock *a, const struct vclock *b)
 	for (size_t replica_id = bit_iterator_next(&it); replica_id < VCLOCK_MAX;
 	     replica_id = bit_iterator_next(&it)) {
 
-		int64_t lsn_a = a->lsn[replica_id];
-		int64_t lsn_b = b->lsn[replica_id];
+		int64_t lsn_a = vclock_get(a, replica_id);
+		int64_t lsn_b = vclock_get(b, replica_id);
 		le = le && lsn_a <= lsn_b;
 		ge = ge && lsn_a >= lsn_b;
 		if (!ge && !le)
diff --git a/src/box/xrow.h b/src/box/xrow.h
index 719add4f0..2fce83bbc 100644
--- a/src/box/xrow.h
+++ b/src/box/xrow.h
@@ -631,7 +631,7 @@ vclock_follow_xrow(struct vclock* vclock, const struct xrow_header *row)
 {
 	assert(row);
 	assert(row->replica_id < VCLOCK_MAX);
-	if (row->lsn <= vclock->lsn[row->replica_id]) {
+	if (row->lsn <= vclock_get(vclock, row->replica_id)) {
 		struct request req;
 		const char *req_str = "n/a";
 		if (xrow_decode_dml((struct xrow_header *)row, &req, 0) == 0)
@@ -640,7 +640,7 @@ vclock_follow_xrow(struct vclock* vclock, const struct xrow_header *row)
 		panic("LSN for %u is used twice or COMMIT order is broken: "
 		      "confirmed: %lld, new: %lld, req: %s",
 		      (unsigned) row->replica_id,
-		      (long long) vclock->lsn[row->replica_id],
+		      (long long) vclock_get(vclock, row->replica_id),
 		      (long long) row->lsn,
 		      req_str);
 	}
diff --git a/test/unit/vclock.cc b/test/unit/vclock.cc
index 8498eba3b..6a1d3bc27 100644
--- a/test/unit/vclock.cc
+++ b/test/unit/vclock.cc
@@ -308,7 +308,7 @@ test_fromstring_one(const char *str, uint32_t count, const int64_t *lsns)
 	vclock_create(&check);
 	for (uint32_t node_id = 0; node_id < count; node_id++) {
 		if (lsns[node_id] >= 0)
-			check.lsn[node_id] = lsns[node_id];
+			vclock_follow(&check, node_id, lsns[node_id]);
 	}
 
 	return (rc != 0 || vclock_compare(&vclock, &check) != 0);
-- 
2.20.1

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [tarantool-patches] [PATCH v2 2/2] Track wal vclock changes instead of copying
  2019-02-13  8:35 [tarantool-patches] [PATCH v2 0/2] Reduce wal vclock handling complecity Georgy Kirichenko
  2019-02-13  8:35 ` [tarantool-patches] [PATCH v2 1/2] Lightweight vclock_create and vclock_copy Georgy Kirichenko
@ 2019-02-13  8:35 ` Georgy Kirichenko
  2019-02-14 10:31   ` [tarantool-patches] " Konstantin Osipov
  2019-02-14 14:04   ` [tarantool-patches] " Vladimir Davydov
  2019-02-14 14:42 ` [tarantool-patches] [PATCH v2 0/2] Reduce wal vclock handling complecity Vladimir Davydov
  2 siblings, 2 replies; 7+ messages in thread
From: Georgy Kirichenko @ 2019-02-13  8:35 UTC (permalink / raw)
  To: tarantool-patches; +Cc: Georgy Kirichenko

Spare some vclock_copy invocations because they could be expensive.

Follow-up #2283
---
 src/box/vclock.h | 14 ++++++++++++++
 src/box/wal.c    | 44 ++++++++++++++++++++++++++++++--------------
 2 files changed, 44 insertions(+), 14 deletions(-)

diff --git a/src/box/vclock.h b/src/box/vclock.h
index a59b2bddb..0c9996902 100644
--- a/src/box/vclock.h
+++ b/src/box/vclock.h
@@ -227,6 +227,20 @@ vclock_sum(const struct vclock *vclock)
 int64_t
 vclock_follow(struct vclock *vclock, uint32_t replica_id, int64_t lsn);
 
+/**
+ * Merge all diff changes into the destination
+ * vclock and after reset the diff.
+ */
+static inline void
+vclock_merge(struct vclock *dst, struct vclock *diff)
+{
+	struct vclock_iterator it;
+	vclock_iterator_init(&it, diff);
+	vclock_foreach(&it, item)
+		vclock_follow(dst, item.id, vclock_get(dst, item.id) + item.lsn);
+	vclock_create(diff);
+}
+
 /**
  * \brief Format vclock to YAML-compatible string representation:
  * { replica_id: lsn, replica_id:lsn })
diff --git a/src/box/wal.c b/src/box/wal.c
index 0b49548c0..b2652bb17 100644
--- a/src/box/wal.c
+++ b/src/box/wal.c
@@ -886,17 +886,25 @@ wal_writer_begin_rollback(struct wal_writer *writer)
 	cpipe_push(&writer->tx_prio_pipe, &writer->in_rollback);
 }
 
+/*
+ * Assign lsn and replica identifier for local writes and track
+ * row into vclock_diff.
+ */
 static void
-wal_assign_lsn(struct vclock *vclock, struct xrow_header **row,
+wal_assign_lsn(struct vclock *vclock_diff, struct vclock *base,
+	       struct xrow_header **row,
 	       struct xrow_header **end)
 {
 	/** Assign LSN to all local rows. */
 	for ( ; row < end; row++) {
 		if ((*row)->replica_id == 0) {
-			(*row)->lsn = vclock_inc(vclock, instance_id);
+			(*row)->lsn = vclock_inc(vclock_diff, instance_id) +
+				      vclock_get(base, instance_id);
 			(*row)->replica_id = instance_id;
 		} else {
-			vclock_follow_xrow(vclock, *row);
+			vclock_follow(vclock_diff, (*row)->replica_id,
+				      (*row)->lsn - vclock_get(base,
+							       (*row)->replica_id));
 		}
 	}
 }
@@ -909,13 +917,12 @@ wal_write_to_disk(struct cmsg *msg)
 	struct error *error;
 
 	/*
-	 * In order not to promote writer's vclock in case of an error,
-	 * we create a copy to assign LSNs before rows are actually
-	 * written. After successful xlog flush we update writer's vclock
-	 * to the last written vclock value.
+	 * Track all vclock changes made by this batch into
+	 * vclock_diff variable and then apply it into writers'
+	 * vclock after each xlog flush.
 	 */
-	struct vclock vclock;
-	vclock_copy(&vclock, &writer->vclock);
+	struct vclock vclock_diff;
+	vclock_create(&vclock_diff);
 
 	struct errinj *inj = errinj(ERRINJ_WAL_DELAY, ERRINJ_BOOL);
 	while (inj != NULL && inj->bparam)
@@ -924,18 +931,21 @@ wal_write_to_disk(struct cmsg *msg)
 	if (writer->in_rollback.route != NULL) {
 		/* We're rolling back a failed write. */
 		stailq_concat(&wal_msg->rollback, &wal_msg->commit);
+		vclock_copy(&wal_msg->vclock, &writer->vclock);
 		return;
 	}
 
 	/* Xlog is only rotated between queue processing  */
 	if (wal_opt_rotate(writer) != 0) {
 		stailq_concat(&wal_msg->rollback, &wal_msg->commit);
+		vclock_copy(&wal_msg->vclock, &writer->vclock);
 		return wal_writer_begin_rollback(writer);
 	}
 
 	/* Ensure there's enough disk space before writing anything. */
 	if (wal_fallocate(writer, wal_msg->approx_len) != 0) {
 		stailq_concat(&wal_msg->rollback, &wal_msg->commit);
+		vclock_copy(&wal_msg->vclock, &writer->vclock);
 		return wal_writer_begin_rollback(writer);
 	}
 
@@ -969,15 +979,17 @@ wal_write_to_disk(struct cmsg *msg)
 	struct journal_entry *entry;
 	struct stailq_entry *last_committed = NULL;
 	stailq_foreach_entry(entry, &wal_msg->commit, fifo) {
-		wal_assign_lsn(&vclock, entry->rows, entry->rows + entry->n_rows);
-		entry->res = vclock_sum(&vclock);
+		wal_assign_lsn(&vclock_diff, &writer->vclock,
+			       entry->rows, entry->rows + entry->n_rows);
+		entry->res = vclock_sum(&vclock_diff) +
+			     vclock_sum(&writer->vclock);
 		rc = xlog_write_entry(l, entry);
 		if (rc < 0)
 			goto done;
 		if (rc > 0) {
 			writer->checkpoint_wal_size += rc;
 			last_committed = &entry->fifo;
-			vclock_copy(&writer->vclock, &vclock);
+			vclock_merge(&writer->vclock, &vclock_diff);
 		}
 		/* rc == 0: the write is buffered in xlog_tx */
 	}
@@ -987,7 +999,7 @@ wal_write_to_disk(struct cmsg *msg)
 
 	writer->checkpoint_wal_size += rc;
 	last_committed = stailq_last(&wal_msg->commit);
-	vclock_copy(&writer->vclock, &vclock);
+	vclock_merge(&writer->vclock, &vclock_diff);
 
 	/*
 	 * Notify TX if the checkpoint threshold has been exceeded.
@@ -1162,7 +1174,11 @@ wal_write_in_wal_mode_none(struct journal *journal,
 			   struct journal_entry *entry)
 {
 	struct wal_writer *writer = (struct wal_writer *) journal;
-	wal_assign_lsn(&writer->vclock, entry->rows, entry->rows + entry->n_rows);
+	struct vclock vclock_diff;
+	vclock_create(&vclock_diff);
+	wal_assign_lsn(&vclock_diff, &writer->vclock, entry->rows,
+		       entry->rows + entry->n_rows);
+	vclock_merge(&writer->vclock, &vclock_diff);
 	vclock_copy(&replicaset.vclock, &writer->vclock);
 	return vclock_sum(&writer->vclock);
 }
-- 
2.20.1

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [tarantool-patches] Re: [PATCH v2 1/2] Lightweight vclock_create and vclock_copy
  2019-02-13  8:35 ` [tarantool-patches] [PATCH v2 1/2] Lightweight vclock_create and vclock_copy Georgy Kirichenko
@ 2019-02-14 10:26   ` Konstantin Osipov
  0 siblings, 0 replies; 7+ messages in thread
From: Konstantin Osipov @ 2019-02-14 10:26 UTC (permalink / raw)
  To: tarantool-patches; +Cc: Georgy Kirichenko

* Georgy Kirichenko <georgy@tarantool.org> [19/02/13 11:54]:
> Modify only needed part of a vclock structure instead of full structure
> writing.

Awesome, thank you, ok to push.


-- 
Konstantin Osipov, Moscow, Russia, +7 903 626 22 32
http://tarantool.io - www.twitter.com/kostja_osipov

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [tarantool-patches] Re: [PATCH v2 2/2] Track wal vclock changes instead of copying
  2019-02-13  8:35 ` [tarantool-patches] [PATCH v2 2/2] Track wal vclock changes instead of copying Georgy Kirichenko
@ 2019-02-14 10:31   ` Konstantin Osipov
  2019-02-14 14:04   ` [tarantool-patches] " Vladimir Davydov
  1 sibling, 0 replies; 7+ messages in thread
From: Konstantin Osipov @ 2019-02-14 10:31 UTC (permalink / raw)
  To: tarantool-patches; +Cc: Georgy Kirichenko

* Georgy Kirichenko <georgy@tarantool.org> [19/02/13 11:54]:
> Spare some vclock_copy invocations because they could be expensive.

OK to push.
> +static inline void
> +vclock_merge(struct vclock *dst, struct vclock *diff)
> +{
> +	struct vclock_iterator it;
> +	vclock_iterator_init(&it, diff);
> +	vclock_foreach(&it, item)
> +		vclock_follow(dst, item.id, vclock_get(dst, item.id) + item.lsn);
> +	vclock_create(diff);
> +}

I agree with the approach since in most cases you're going to have
just one component in vclock_diff, so this approach is not going
to depend on the size of vclock (VCLOCK_MAX).

Thank you for following up on this.


-- 
Konstantin Osipov, Moscow, Russia, +7 903 626 22 32
http://tarantool.io - www.twitter.com/kostja_osipov

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [tarantool-patches] [PATCH v2 2/2] Track wal vclock changes instead of copying
  2019-02-13  8:35 ` [tarantool-patches] [PATCH v2 2/2] Track wal vclock changes instead of copying Georgy Kirichenko
  2019-02-14 10:31   ` [tarantool-patches] " Konstantin Osipov
@ 2019-02-14 14:04   ` Vladimir Davydov
  1 sibling, 0 replies; 7+ messages in thread
From: Vladimir Davydov @ 2019-02-14 14:04 UTC (permalink / raw)
  To: Georgy Kirichenko; +Cc: tarantool-patches

On Wed, Feb 13, 2019 at 11:35:17AM +0300, Georgy Kirichenko wrote:
> +/*
> + * Assign lsn and replica identifier for local writes and track
> + * row into vclock_diff.
> + */
>  static void
> -wal_assign_lsn(struct vclock *vclock, struct xrow_header **row,
> +wal_assign_lsn(struct vclock *vclock_diff, struct vclock *base,
> +	       struct xrow_header **row,
>  	       struct xrow_header **end)
>  {
>  	/** Assign LSN to all local rows. */
>  	for ( ; row < end; row++) {
>  		if ((*row)->replica_id == 0) {
> -			(*row)->lsn = vclock_inc(vclock, instance_id);
> +			(*row)->lsn = vclock_inc(vclock_diff, instance_id) +
> +				      vclock_get(base, instance_id);
>  			(*row)->replica_id = instance_id;
>  		} else {
> -			vclock_follow_xrow(vclock, *row);
> +			vclock_follow(vclock_diff, (*row)->replica_id,
> +				      (*row)->lsn - vclock_get(base,
> +							       (*row)->replica_id));

vclock_follow_xrow() checks that LSN doesn't go backwards and panics if
it does.

vclock_follow() doesn't have such check.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [tarantool-patches] [PATCH v2 0/2] Reduce wal vclock handling complecity
  2019-02-13  8:35 [tarantool-patches] [PATCH v2 0/2] Reduce wal vclock handling complecity Georgy Kirichenko
  2019-02-13  8:35 ` [tarantool-patches] [PATCH v2 1/2] Lightweight vclock_create and vclock_copy Georgy Kirichenko
  2019-02-13  8:35 ` [tarantool-patches] [PATCH v2 2/2] Track wal vclock changes instead of copying Georgy Kirichenko
@ 2019-02-14 14:42 ` Vladimir Davydov
  2 siblings, 0 replies; 7+ messages in thread
From: Vladimir Davydov @ 2019-02-14 14:42 UTC (permalink / raw)
  To: Georgy Kirichenko; +Cc: tarantool-patches

On Wed, Feb 13, 2019 at 11:35:15AM +0300, Georgy Kirichenko wrote:
> Georgy Kirichenko (2):
>   Lightweight vclock_create and vclock_copy
>   Track wal vclock changes instead of copying

Pushed to 2.1 and 1.10.

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2019-02-14 14:42 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-02-13  8:35 [tarantool-patches] [PATCH v2 0/2] Reduce wal vclock handling complecity Georgy Kirichenko
2019-02-13  8:35 ` [tarantool-patches] [PATCH v2 1/2] Lightweight vclock_create and vclock_copy Georgy Kirichenko
2019-02-14 10:26   ` [tarantool-patches] " Konstantin Osipov
2019-02-13  8:35 ` [tarantool-patches] [PATCH v2 2/2] Track wal vclock changes instead of copying Georgy Kirichenko
2019-02-14 10:31   ` [tarantool-patches] " Konstantin Osipov
2019-02-14 14:04   ` [tarantool-patches] " Vladimir Davydov
2019-02-14 14:42 ` [tarantool-patches] [PATCH v2 0/2] Reduce wal vclock handling complecity Vladimir Davydov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox