* [tarantool-patches] [PATCH v2 0/2] Reduce wal vclock handling complecity
@ 2019-02-13 8:35 Georgy Kirichenko
2019-02-13 8:35 ` [tarantool-patches] [PATCH v2 1/2] Lightweight vclock_create and vclock_copy Georgy Kirichenko
` (2 more replies)
0 siblings, 3 replies; 7+ messages in thread
From: Georgy Kirichenko @ 2019-02-13 8:35 UTC (permalink / raw)
To: tarantool-patches; +Cc: Georgy Kirichenko
Track vclock changes during wal write instead of simple vclock_copy
and make vclock_copy and vclock_create cheaper.
Changes in v2:
- Eliminate branching from vclock_get
- Minor fixes
Branch: https://github.com/tarantool/tarantool/tree/g.kirichenko/gh-2283-follow-up
Georgy Kirichenko (2):
Lightweight vclock_create and vclock_copy
Track wal vclock changes instead of copying
src/box/vclock.c | 5 +++--
src/box/vclock.h | 51 ++++++++++++++++++++++++++++++++++++---------
src/box/wal.c | 44 +++++++++++++++++++++++++-------------
src/box/xrow.h | 4 ++--
test/unit/vclock.cc | 2 +-
5 files changed, 77 insertions(+), 29 deletions(-)
--
2.20.1
^ permalink raw reply [flat|nested] 7+ messages in thread
* [tarantool-patches] [PATCH v2 1/2] Lightweight vclock_create and vclock_copy
2019-02-13 8:35 [tarantool-patches] [PATCH v2 0/2] Reduce wal vclock handling complecity Georgy Kirichenko
@ 2019-02-13 8:35 ` Georgy Kirichenko
2019-02-14 10:26 ` [tarantool-patches] " Konstantin Osipov
2019-02-13 8:35 ` [tarantool-patches] [PATCH v2 2/2] Track wal vclock changes instead of copying Georgy Kirichenko
2019-02-14 14:42 ` [tarantool-patches] [PATCH v2 0/2] Reduce wal vclock handling complecity Vladimir Davydov
2 siblings, 1 reply; 7+ messages in thread
From: Georgy Kirichenko @ 2019-02-13 8:35 UTC (permalink / raw)
To: tarantool-patches; +Cc: Georgy Kirichenko
Modify only needed part of a vclock structure instead of full structure
writing.
Follow-up: #2283
---
src/box/vclock.c | 5 +++--
src/box/vclock.h | 37 +++++++++++++++++++++++++++----------
src/box/xrow.h | 4 ++--
test/unit/vclock.cc | 2 +-
4 files changed, 33 insertions(+), 15 deletions(-)
diff --git a/src/box/vclock.c b/src/box/vclock.c
index b5eb2800b..d4b2ba759 100644
--- a/src/box/vclock.c
+++ b/src/box/vclock.c
@@ -41,7 +41,7 @@ vclock_follow(struct vclock *vclock, uint32_t replica_id, int64_t lsn)
{
assert(lsn >= 0);
assert(replica_id < VCLOCK_MAX);
- int64_t prev_lsn = vclock->lsn[replica_id];
+ int64_t prev_lsn = vclock_get(vclock, replica_id);
assert(lsn > prev_lsn);
/* Easier add each time than check. */
vclock->map |= 1 << replica_id;
@@ -159,7 +159,8 @@ vclock_from_string(struct vclock *vclock, const char *str)
errno = 0;
lsn = strtoll(p, (char **) &p, 10);
if (errno != 0 || lsn < 0 || lsn > INT64_MAX ||
- replica_id >= VCLOCK_MAX || vclock->lsn[replica_id] > 0)
+ replica_id >= VCLOCK_MAX ||
+ vclock_get(vclock, replica_id) > 0)
goto error;
vclock->map |= 1 << replica_id;
vclock->lsn[replica_id] = lsn;
diff --git a/src/box/vclock.h b/src/box/vclock.h
index 111e29160..a59b2bddb 100644
--- a/src/box/vclock.h
+++ b/src/box/vclock.h
@@ -48,7 +48,7 @@ extern "C" {
enum {
/**
- * The maximum number of components in vclock
+ * The maximum number of components in vclock, should be power of two.
*/
VCLOCK_MAX = 32,
@@ -129,7 +129,9 @@ vclock_iterator_next(struct vclock_iterator *it)
static inline void
vclock_create(struct vclock *vclock)
{
- memset(vclock, 0, sizeof(*vclock));
+ vclock->signature = 0;
+ vclock->map = 0;
+ vclock->lsn[0] = 0;
}
/**
@@ -139,8 +141,9 @@ vclock_create(struct vclock *vclock)
static inline void
vclock_clear(struct vclock *vclock)
{
- memset(vclock, 0, sizeof(*vclock));
vclock->signature = -1;
+ vclock->map = 0;
+ vclock->lsn[0] = 0;
}
/**
@@ -156,16 +159,24 @@ vclock_is_set(const struct vclock *vclock)
static inline int64_t
vclock_get(const struct vclock *vclock, uint32_t replica_id)
{
- if (replica_id >= VCLOCK_MAX)
- return 0;
- return vclock->lsn[replica_id];
+ /*
+ * If replica_id does not fit in VCLOCK_MAX - 1 then
+ * let result be undefined.
+ */
+ replica_id &= VCLOCK_MAX - 1;
+ /* Evaluate a bitmask to avoid branching. */
+ int64_t mask = 0 - ((vclock->map >> replica_id) & 0x1);
+ return mask & vclock->lsn[replica_id];
}
static inline int64_t
vclock_inc(struct vclock *vclock, uint32_t replica_id)
{
/* Easier add each time than check. */
- vclock->map |= 1 << replica_id;
+ if (((vclock->map >> replica_id) & 0x01) == 0) {
+ vclock->lsn[replica_id] = 0;
+ vclock->map |= 1 << replica_id;
+ }
vclock->signature++;
return ++vclock->lsn[replica_id];
}
@@ -173,7 +184,13 @@ vclock_inc(struct vclock *vclock, uint32_t replica_id)
static inline void
vclock_copy(struct vclock *dst, const struct vclock *src)
{
- *dst = *src;
+ /*
+ * Set the last bit of map because bit_clz_u32 returns
+ * undefined result if zero passed.
+ */
+ unsigned int max_pos = VCLOCK_MAX - bit_clz_u32(src->map | 0x01);
+ memcpy(dst, src, offsetof(struct vclock, lsn) +
+ sizeof(*dst->lsn) * max_pos);
}
static inline uint32_t
@@ -253,8 +270,8 @@ vclock_compare(const struct vclock *a, const struct vclock *b)
for (size_t replica_id = bit_iterator_next(&it); replica_id < VCLOCK_MAX;
replica_id = bit_iterator_next(&it)) {
- int64_t lsn_a = a->lsn[replica_id];
- int64_t lsn_b = b->lsn[replica_id];
+ int64_t lsn_a = vclock_get(a, replica_id);
+ int64_t lsn_b = vclock_get(b, replica_id);
le = le && lsn_a <= lsn_b;
ge = ge && lsn_a >= lsn_b;
if (!ge && !le)
diff --git a/src/box/xrow.h b/src/box/xrow.h
index 719add4f0..2fce83bbc 100644
--- a/src/box/xrow.h
+++ b/src/box/xrow.h
@@ -631,7 +631,7 @@ vclock_follow_xrow(struct vclock* vclock, const struct xrow_header *row)
{
assert(row);
assert(row->replica_id < VCLOCK_MAX);
- if (row->lsn <= vclock->lsn[row->replica_id]) {
+ if (row->lsn <= vclock_get(vclock, row->replica_id)) {
struct request req;
const char *req_str = "n/a";
if (xrow_decode_dml((struct xrow_header *)row, &req, 0) == 0)
@@ -640,7 +640,7 @@ vclock_follow_xrow(struct vclock* vclock, const struct xrow_header *row)
panic("LSN for %u is used twice or COMMIT order is broken: "
"confirmed: %lld, new: %lld, req: %s",
(unsigned) row->replica_id,
- (long long) vclock->lsn[row->replica_id],
+ (long long) vclock_get(vclock, row->replica_id),
(long long) row->lsn,
req_str);
}
diff --git a/test/unit/vclock.cc b/test/unit/vclock.cc
index 8498eba3b..6a1d3bc27 100644
--- a/test/unit/vclock.cc
+++ b/test/unit/vclock.cc
@@ -308,7 +308,7 @@ test_fromstring_one(const char *str, uint32_t count, const int64_t *lsns)
vclock_create(&check);
for (uint32_t node_id = 0; node_id < count; node_id++) {
if (lsns[node_id] >= 0)
- check.lsn[node_id] = lsns[node_id];
+ vclock_follow(&check, node_id, lsns[node_id]);
}
return (rc != 0 || vclock_compare(&vclock, &check) != 0);
--
2.20.1
^ permalink raw reply [flat|nested] 7+ messages in thread
* [tarantool-patches] [PATCH v2 2/2] Track wal vclock changes instead of copying
2019-02-13 8:35 [tarantool-patches] [PATCH v2 0/2] Reduce wal vclock handling complecity Georgy Kirichenko
2019-02-13 8:35 ` [tarantool-patches] [PATCH v2 1/2] Lightweight vclock_create and vclock_copy Georgy Kirichenko
@ 2019-02-13 8:35 ` Georgy Kirichenko
2019-02-14 10:31 ` [tarantool-patches] " Konstantin Osipov
2019-02-14 14:04 ` [tarantool-patches] " Vladimir Davydov
2019-02-14 14:42 ` [tarantool-patches] [PATCH v2 0/2] Reduce wal vclock handling complecity Vladimir Davydov
2 siblings, 2 replies; 7+ messages in thread
From: Georgy Kirichenko @ 2019-02-13 8:35 UTC (permalink / raw)
To: tarantool-patches; +Cc: Georgy Kirichenko
Spare some vclock_copy invocations because they could be expensive.
Follow-up #2283
---
src/box/vclock.h | 14 ++++++++++++++
src/box/wal.c | 44 ++++++++++++++++++++++++++++++--------------
2 files changed, 44 insertions(+), 14 deletions(-)
diff --git a/src/box/vclock.h b/src/box/vclock.h
index a59b2bddb..0c9996902 100644
--- a/src/box/vclock.h
+++ b/src/box/vclock.h
@@ -227,6 +227,20 @@ vclock_sum(const struct vclock *vclock)
int64_t
vclock_follow(struct vclock *vclock, uint32_t replica_id, int64_t lsn);
+/**
+ * Merge all diff changes into the destination
+ * vclock and after reset the diff.
+ */
+static inline void
+vclock_merge(struct vclock *dst, struct vclock *diff)
+{
+ struct vclock_iterator it;
+ vclock_iterator_init(&it, diff);
+ vclock_foreach(&it, item)
+ vclock_follow(dst, item.id, vclock_get(dst, item.id) + item.lsn);
+ vclock_create(diff);
+}
+
/**
* \brief Format vclock to YAML-compatible string representation:
* { replica_id: lsn, replica_id:lsn })
diff --git a/src/box/wal.c b/src/box/wal.c
index 0b49548c0..b2652bb17 100644
--- a/src/box/wal.c
+++ b/src/box/wal.c
@@ -886,17 +886,25 @@ wal_writer_begin_rollback(struct wal_writer *writer)
cpipe_push(&writer->tx_prio_pipe, &writer->in_rollback);
}
+/*
+ * Assign lsn and replica identifier for local writes and track
+ * row into vclock_diff.
+ */
static void
-wal_assign_lsn(struct vclock *vclock, struct xrow_header **row,
+wal_assign_lsn(struct vclock *vclock_diff, struct vclock *base,
+ struct xrow_header **row,
struct xrow_header **end)
{
/** Assign LSN to all local rows. */
for ( ; row < end; row++) {
if ((*row)->replica_id == 0) {
- (*row)->lsn = vclock_inc(vclock, instance_id);
+ (*row)->lsn = vclock_inc(vclock_diff, instance_id) +
+ vclock_get(base, instance_id);
(*row)->replica_id = instance_id;
} else {
- vclock_follow_xrow(vclock, *row);
+ vclock_follow(vclock_diff, (*row)->replica_id,
+ (*row)->lsn - vclock_get(base,
+ (*row)->replica_id));
}
}
}
@@ -909,13 +917,12 @@ wal_write_to_disk(struct cmsg *msg)
struct error *error;
/*
- * In order not to promote writer's vclock in case of an error,
- * we create a copy to assign LSNs before rows are actually
- * written. After successful xlog flush we update writer's vclock
- * to the last written vclock value.
+ * Track all vclock changes made by this batch into
+ * vclock_diff variable and then apply it into writers'
+ * vclock after each xlog flush.
*/
- struct vclock vclock;
- vclock_copy(&vclock, &writer->vclock);
+ struct vclock vclock_diff;
+ vclock_create(&vclock_diff);
struct errinj *inj = errinj(ERRINJ_WAL_DELAY, ERRINJ_BOOL);
while (inj != NULL && inj->bparam)
@@ -924,18 +931,21 @@ wal_write_to_disk(struct cmsg *msg)
if (writer->in_rollback.route != NULL) {
/* We're rolling back a failed write. */
stailq_concat(&wal_msg->rollback, &wal_msg->commit);
+ vclock_copy(&wal_msg->vclock, &writer->vclock);
return;
}
/* Xlog is only rotated between queue processing */
if (wal_opt_rotate(writer) != 0) {
stailq_concat(&wal_msg->rollback, &wal_msg->commit);
+ vclock_copy(&wal_msg->vclock, &writer->vclock);
return wal_writer_begin_rollback(writer);
}
/* Ensure there's enough disk space before writing anything. */
if (wal_fallocate(writer, wal_msg->approx_len) != 0) {
stailq_concat(&wal_msg->rollback, &wal_msg->commit);
+ vclock_copy(&wal_msg->vclock, &writer->vclock);
return wal_writer_begin_rollback(writer);
}
@@ -969,15 +979,17 @@ wal_write_to_disk(struct cmsg *msg)
struct journal_entry *entry;
struct stailq_entry *last_committed = NULL;
stailq_foreach_entry(entry, &wal_msg->commit, fifo) {
- wal_assign_lsn(&vclock, entry->rows, entry->rows + entry->n_rows);
- entry->res = vclock_sum(&vclock);
+ wal_assign_lsn(&vclock_diff, &writer->vclock,
+ entry->rows, entry->rows + entry->n_rows);
+ entry->res = vclock_sum(&vclock_diff) +
+ vclock_sum(&writer->vclock);
rc = xlog_write_entry(l, entry);
if (rc < 0)
goto done;
if (rc > 0) {
writer->checkpoint_wal_size += rc;
last_committed = &entry->fifo;
- vclock_copy(&writer->vclock, &vclock);
+ vclock_merge(&writer->vclock, &vclock_diff);
}
/* rc == 0: the write is buffered in xlog_tx */
}
@@ -987,7 +999,7 @@ wal_write_to_disk(struct cmsg *msg)
writer->checkpoint_wal_size += rc;
last_committed = stailq_last(&wal_msg->commit);
- vclock_copy(&writer->vclock, &vclock);
+ vclock_merge(&writer->vclock, &vclock_diff);
/*
* Notify TX if the checkpoint threshold has been exceeded.
@@ -1162,7 +1174,11 @@ wal_write_in_wal_mode_none(struct journal *journal,
struct journal_entry *entry)
{
struct wal_writer *writer = (struct wal_writer *) journal;
- wal_assign_lsn(&writer->vclock, entry->rows, entry->rows + entry->n_rows);
+ struct vclock vclock_diff;
+ vclock_create(&vclock_diff);
+ wal_assign_lsn(&vclock_diff, &writer->vclock, entry->rows,
+ entry->rows + entry->n_rows);
+ vclock_merge(&writer->vclock, &vclock_diff);
vclock_copy(&replicaset.vclock, &writer->vclock);
return vclock_sum(&writer->vclock);
}
--
2.20.1
^ permalink raw reply [flat|nested] 7+ messages in thread
* [tarantool-patches] Re: [PATCH v2 1/2] Lightweight vclock_create and vclock_copy
2019-02-13 8:35 ` [tarantool-patches] [PATCH v2 1/2] Lightweight vclock_create and vclock_copy Georgy Kirichenko
@ 2019-02-14 10:26 ` Konstantin Osipov
0 siblings, 0 replies; 7+ messages in thread
From: Konstantin Osipov @ 2019-02-14 10:26 UTC (permalink / raw)
To: tarantool-patches; +Cc: Georgy Kirichenko
* Georgy Kirichenko <georgy@tarantool.org> [19/02/13 11:54]:
> Modify only needed part of a vclock structure instead of full structure
> writing.
Awesome, thank you, ok to push.
--
Konstantin Osipov, Moscow, Russia, +7 903 626 22 32
http://tarantool.io - www.twitter.com/kostja_osipov
^ permalink raw reply [flat|nested] 7+ messages in thread
* [tarantool-patches] Re: [PATCH v2 2/2] Track wal vclock changes instead of copying
2019-02-13 8:35 ` [tarantool-patches] [PATCH v2 2/2] Track wal vclock changes instead of copying Georgy Kirichenko
@ 2019-02-14 10:31 ` Konstantin Osipov
2019-02-14 14:04 ` [tarantool-patches] " Vladimir Davydov
1 sibling, 0 replies; 7+ messages in thread
From: Konstantin Osipov @ 2019-02-14 10:31 UTC (permalink / raw)
To: tarantool-patches; +Cc: Georgy Kirichenko
* Georgy Kirichenko <georgy@tarantool.org> [19/02/13 11:54]:
> Spare some vclock_copy invocations because they could be expensive.
OK to push.
> +static inline void
> +vclock_merge(struct vclock *dst, struct vclock *diff)
> +{
> + struct vclock_iterator it;
> + vclock_iterator_init(&it, diff);
> + vclock_foreach(&it, item)
> + vclock_follow(dst, item.id, vclock_get(dst, item.id) + item.lsn);
> + vclock_create(diff);
> +}
I agree with the approach since in most cases you're going to have
just one component in vclock_diff, so this approach is not going
to depend on the size of vclock (VCLOCK_MAX).
Thank you for following up on this.
--
Konstantin Osipov, Moscow, Russia, +7 903 626 22 32
http://tarantool.io - www.twitter.com/kostja_osipov
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [tarantool-patches] [PATCH v2 2/2] Track wal vclock changes instead of copying
2019-02-13 8:35 ` [tarantool-patches] [PATCH v2 2/2] Track wal vclock changes instead of copying Georgy Kirichenko
2019-02-14 10:31 ` [tarantool-patches] " Konstantin Osipov
@ 2019-02-14 14:04 ` Vladimir Davydov
1 sibling, 0 replies; 7+ messages in thread
From: Vladimir Davydov @ 2019-02-14 14:04 UTC (permalink / raw)
To: Georgy Kirichenko; +Cc: tarantool-patches
On Wed, Feb 13, 2019 at 11:35:17AM +0300, Georgy Kirichenko wrote:
> +/*
> + * Assign lsn and replica identifier for local writes and track
> + * row into vclock_diff.
> + */
> static void
> -wal_assign_lsn(struct vclock *vclock, struct xrow_header **row,
> +wal_assign_lsn(struct vclock *vclock_diff, struct vclock *base,
> + struct xrow_header **row,
> struct xrow_header **end)
> {
> /** Assign LSN to all local rows. */
> for ( ; row < end; row++) {
> if ((*row)->replica_id == 0) {
> - (*row)->lsn = vclock_inc(vclock, instance_id);
> + (*row)->lsn = vclock_inc(vclock_diff, instance_id) +
> + vclock_get(base, instance_id);
> (*row)->replica_id = instance_id;
> } else {
> - vclock_follow_xrow(vclock, *row);
> + vclock_follow(vclock_diff, (*row)->replica_id,
> + (*row)->lsn - vclock_get(base,
> + (*row)->replica_id));
vclock_follow_xrow() checks that LSN doesn't go backwards and panics if
it does.
vclock_follow() doesn't have such check.
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [tarantool-patches] [PATCH v2 0/2] Reduce wal vclock handling complecity
2019-02-13 8:35 [tarantool-patches] [PATCH v2 0/2] Reduce wal vclock handling complecity Georgy Kirichenko
2019-02-13 8:35 ` [tarantool-patches] [PATCH v2 1/2] Lightweight vclock_create and vclock_copy Georgy Kirichenko
2019-02-13 8:35 ` [tarantool-patches] [PATCH v2 2/2] Track wal vclock changes instead of copying Georgy Kirichenko
@ 2019-02-14 14:42 ` Vladimir Davydov
2 siblings, 0 replies; 7+ messages in thread
From: Vladimir Davydov @ 2019-02-14 14:42 UTC (permalink / raw)
To: Georgy Kirichenko; +Cc: tarantool-patches
On Wed, Feb 13, 2019 at 11:35:15AM +0300, Georgy Kirichenko wrote:
> Georgy Kirichenko (2):
> Lightweight vclock_create and vclock_copy
> Track wal vclock changes instead of copying
Pushed to 2.1 and 1.10.
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2019-02-14 14:42 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-02-13 8:35 [tarantool-patches] [PATCH v2 0/2] Reduce wal vclock handling complecity Georgy Kirichenko
2019-02-13 8:35 ` [tarantool-patches] [PATCH v2 1/2] Lightweight vclock_create and vclock_copy Georgy Kirichenko
2019-02-14 10:26 ` [tarantool-patches] " Konstantin Osipov
2019-02-13 8:35 ` [tarantool-patches] [PATCH v2 2/2] Track wal vclock changes instead of copying Georgy Kirichenko
2019-02-14 10:31 ` [tarantool-patches] " Konstantin Osipov
2019-02-14 14:04 ` [tarantool-patches] " Vladimir Davydov
2019-02-14 14:42 ` [tarantool-patches] [PATCH v2 0/2] Reduce wal vclock handling complecity Vladimir Davydov
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox