* [tarantool-patches] [PATCH 1/2] Lightweight vclock_create and vclock_copy
2019-02-12 14:09 [tarantool-patches] [PATCH 0/2] Reduce wal vclock handling complecity Georgy Kirichenko
@ 2019-02-12 14:09 ` Georgy Kirichenko
2019-02-12 19:25 ` [tarantool-patches] " Konstantin Osipov
2019-02-12 14:09 ` [tarantool-patches] [PATCH 2/2] Track wal vclock changes instead of copying Georgy Kirichenko
1 sibling, 1 reply; 7+ messages in thread
From: Georgy Kirichenko @ 2019-02-12 14:09 UTC (permalink / raw)
To: tarantool-patches; +Cc: Georgy Kirichenko
Modify only needed part of a vclock structure instead of full structure
writing.
Follow-up #2283
---
src/box/vclock.c | 5 +++--
src/box/vclock.h | 21 ++++++++++++++++-----
src/box/xrow.h | 4 ++--
test/unit/vclock.cc | 2 +-
4 files changed, 22 insertions(+), 10 deletions(-)
diff --git a/src/box/vclock.c b/src/box/vclock.c
index b5eb2800b..d4b2ba759 100644
--- a/src/box/vclock.c
+++ b/src/box/vclock.c
@@ -41,7 +41,7 @@ vclock_follow(struct vclock *vclock, uint32_t replica_id, int64_t lsn)
{
assert(lsn >= 0);
assert(replica_id < VCLOCK_MAX);
- int64_t prev_lsn = vclock->lsn[replica_id];
+ int64_t prev_lsn = vclock_get(vclock, replica_id);
assert(lsn > prev_lsn);
/* Easier add each time than check. */
vclock->map |= 1 << replica_id;
@@ -159,7 +159,8 @@ vclock_from_string(struct vclock *vclock, const char *str)
errno = 0;
lsn = strtoll(p, (char **) &p, 10);
if (errno != 0 || lsn < 0 || lsn > INT64_MAX ||
- replica_id >= VCLOCK_MAX || vclock->lsn[replica_id] > 0)
+ replica_id >= VCLOCK_MAX ||
+ vclock_get(vclock, replica_id) > 0)
goto error;
vclock->map |= 1 << replica_id;
vclock->lsn[replica_id] = lsn;
diff --git a/src/box/vclock.h b/src/box/vclock.h
index 111e29160..9e97aff27 100644
--- a/src/box/vclock.h
+++ b/src/box/vclock.h
@@ -129,7 +129,8 @@ vclock_iterator_next(struct vclock_iterator *it)
static inline void
vclock_create(struct vclock *vclock)
{
- memset(vclock, 0, sizeof(*vclock));
+ vclock->signature = 0;
+ vclock->map = 0;
}
/**
@@ -139,8 +140,8 @@ vclock_create(struct vclock *vclock)
static inline void
vclock_clear(struct vclock *vclock)
{
- memset(vclock, 0, sizeof(*vclock));
vclock->signature = -1;
+ vclock->map = 0;
}
/**
@@ -158,6 +159,8 @@ vclock_get(const struct vclock *vclock, uint32_t replica_id)
{
if (replica_id >= VCLOCK_MAX)
return 0;
+ if ((vclock->map & (1 << replica_id)) == 0)
+ return 0;
return vclock->lsn[replica_id];
}
@@ -165,6 +168,8 @@ static inline int64_t
vclock_inc(struct vclock *vclock, uint32_t replica_id)
{
/* Easier add each time than check. */
+ if ((vclock->map & (1 << replica_id)) == 0)
+ vclock->lsn[replica_id] = 0;
vclock->map |= 1 << replica_id;
vclock->signature++;
return ++vclock->lsn[replica_id];
@@ -173,7 +178,13 @@ vclock_inc(struct vclock *vclock, uint32_t replica_id)
static inline void
vclock_copy(struct vclock *dst, const struct vclock *src)
{
- *dst = *src;
+ if (src->map == 0) {
+ dst->map = src->map;
+ dst->signature = src->signature;
+ return;
+ }
+ unsigned int max_pos = VCLOCK_MAX - bit_clz_u32(src->map);
+ memcpy(dst, src, offsetof(struct vclock, lsn) + sizeof(*dst->lsn) * max_pos);
}
static inline uint32_t
@@ -253,8 +264,8 @@ vclock_compare(const struct vclock *a, const struct vclock *b)
for (size_t replica_id = bit_iterator_next(&it); replica_id < VCLOCK_MAX;
replica_id = bit_iterator_next(&it)) {
- int64_t lsn_a = a->lsn[replica_id];
- int64_t lsn_b = b->lsn[replica_id];
+ int64_t lsn_a = vclock_get(a, replica_id);
+ int64_t lsn_b = vclock_get(b, replica_id);
le = le && lsn_a <= lsn_b;
ge = ge && lsn_a >= lsn_b;
if (!ge && !le)
diff --git a/src/box/xrow.h b/src/box/xrow.h
index 719add4f0..2fce83bbc 100644
--- a/src/box/xrow.h
+++ b/src/box/xrow.h
@@ -631,7 +631,7 @@ vclock_follow_xrow(struct vclock* vclock, const struct xrow_header *row)
{
assert(row);
assert(row->replica_id < VCLOCK_MAX);
- if (row->lsn <= vclock->lsn[row->replica_id]) {
+ if (row->lsn <= vclock_get(vclock, row->replica_id)) {
struct request req;
const char *req_str = "n/a";
if (xrow_decode_dml((struct xrow_header *)row, &req, 0) == 0)
@@ -640,7 +640,7 @@ vclock_follow_xrow(struct vclock* vclock, const struct xrow_header *row)
panic("LSN for %u is used twice or COMMIT order is broken: "
"confirmed: %lld, new: %lld, req: %s",
(unsigned) row->replica_id,
- (long long) vclock->lsn[row->replica_id],
+ (long long) vclock_get(vclock, row->replica_id),
(long long) row->lsn,
req_str);
}
diff --git a/test/unit/vclock.cc b/test/unit/vclock.cc
index 8498eba3b..6a1d3bc27 100644
--- a/test/unit/vclock.cc
+++ b/test/unit/vclock.cc
@@ -308,7 +308,7 @@ test_fromstring_one(const char *str, uint32_t count, const int64_t *lsns)
vclock_create(&check);
for (uint32_t node_id = 0; node_id < count; node_id++) {
if (lsns[node_id] >= 0)
- check.lsn[node_id] = lsns[node_id];
+ vclock_follow(&check, node_id, lsns[node_id]);
}
return (rc != 0 || vclock_compare(&vclock, &check) != 0);
--
2.20.1
^ permalink raw reply [flat|nested] 7+ messages in thread
* [tarantool-patches] [PATCH 2/2] Track wal vclock changes instead of copying
2019-02-12 14:09 [tarantool-patches] [PATCH 0/2] Reduce wal vclock handling complecity Georgy Kirichenko
2019-02-12 14:09 ` [tarantool-patches] [PATCH 1/2] Lightweight vclock_create and vclock_copy Georgy Kirichenko
@ 2019-02-12 14:09 ` Georgy Kirichenko
2019-02-12 19:15 ` [tarantool-patches] " Konstantin Osipov
1 sibling, 1 reply; 7+ messages in thread
From: Georgy Kirichenko @ 2019-02-12 14:09 UTC (permalink / raw)
To: tarantool-patches; +Cc: Georgy Kirichenko
Spare some vclock_copy invocations because they could be expensive.
Follow-up #2283
---
src/box/vclock.h | 14 ++++++++++++++
src/box/wal.c | 44 ++++++++++++++++++++++++++++++--------------
2 files changed, 44 insertions(+), 14 deletions(-)
diff --git a/src/box/vclock.h b/src/box/vclock.h
index 9e97aff27..7e9a2a902 100644
--- a/src/box/vclock.h
+++ b/src/box/vclock.h
@@ -221,6 +221,20 @@ vclock_sum(const struct vclock *vclock)
int64_t
vclock_follow(struct vclock *vclock, uint32_t replica_id, int64_t lsn);
+/**
+ * Merge all diff changes into the destination
+ * vclock and after reset the diff.
+ */
+static inline void
+vclock_merge(struct vclock *dst, struct vclock *diff)
+{
+ struct vclock_iterator it;
+ vclock_iterator_init(&it, diff);
+ vclock_foreach(&it, item)
+ vclock_follow(dst, item.id, vclock_get(dst, item.id) + item.lsn);
+ vclock_create(diff);
+}
+
/**
* \brief Format vclock to YAML-compatible string representation:
* { replica_id: lsn, replica_id:lsn })
diff --git a/src/box/wal.c b/src/box/wal.c
index 0b49548c0..b2652bb17 100644
--- a/src/box/wal.c
+++ b/src/box/wal.c
@@ -886,17 +886,25 @@ wal_writer_begin_rollback(struct wal_writer *writer)
cpipe_push(&writer->tx_prio_pipe, &writer->in_rollback);
}
+/*
+ * Assign lsn and replica identifier for local writes and track
+ * row into vclock_diff.
+ */
static void
-wal_assign_lsn(struct vclock *vclock, struct xrow_header **row,
+wal_assign_lsn(struct vclock *vclock_diff, struct vclock *base,
+ struct xrow_header **row,
struct xrow_header **end)
{
/** Assign LSN to all local rows. */
for ( ; row < end; row++) {
if ((*row)->replica_id == 0) {
- (*row)->lsn = vclock_inc(vclock, instance_id);
+ (*row)->lsn = vclock_inc(vclock_diff, instance_id) +
+ vclock_get(base, instance_id);
(*row)->replica_id = instance_id;
} else {
- vclock_follow_xrow(vclock, *row);
+ vclock_follow(vclock_diff, (*row)->replica_id,
+ (*row)->lsn - vclock_get(base,
+ (*row)->replica_id));
}
}
}
@@ -909,13 +917,12 @@ wal_write_to_disk(struct cmsg *msg)
struct error *error;
/*
- * In order not to promote writer's vclock in case of an error,
- * we create a copy to assign LSNs before rows are actually
- * written. After successful xlog flush we update writer's vclock
- * to the last written vclock value.
+ * Track all vclock changes made by this batch into
+ * vclock_diff variable and then apply it into writers'
+ * vclock after each xlog flush.
*/
- struct vclock vclock;
- vclock_copy(&vclock, &writer->vclock);
+ struct vclock vclock_diff;
+ vclock_create(&vclock_diff);
struct errinj *inj = errinj(ERRINJ_WAL_DELAY, ERRINJ_BOOL);
while (inj != NULL && inj->bparam)
@@ -924,18 +931,21 @@ wal_write_to_disk(struct cmsg *msg)
if (writer->in_rollback.route != NULL) {
/* We're rolling back a failed write. */
stailq_concat(&wal_msg->rollback, &wal_msg->commit);
+ vclock_copy(&wal_msg->vclock, &writer->vclock);
return;
}
/* Xlog is only rotated between queue processing */
if (wal_opt_rotate(writer) != 0) {
stailq_concat(&wal_msg->rollback, &wal_msg->commit);
+ vclock_copy(&wal_msg->vclock, &writer->vclock);
return wal_writer_begin_rollback(writer);
}
/* Ensure there's enough disk space before writing anything. */
if (wal_fallocate(writer, wal_msg->approx_len) != 0) {
stailq_concat(&wal_msg->rollback, &wal_msg->commit);
+ vclock_copy(&wal_msg->vclock, &writer->vclock);
return wal_writer_begin_rollback(writer);
}
@@ -969,15 +979,17 @@ wal_write_to_disk(struct cmsg *msg)
struct journal_entry *entry;
struct stailq_entry *last_committed = NULL;
stailq_foreach_entry(entry, &wal_msg->commit, fifo) {
- wal_assign_lsn(&vclock, entry->rows, entry->rows + entry->n_rows);
- entry->res = vclock_sum(&vclock);
+ wal_assign_lsn(&vclock_diff, &writer->vclock,
+ entry->rows, entry->rows + entry->n_rows);
+ entry->res = vclock_sum(&vclock_diff) +
+ vclock_sum(&writer->vclock);
rc = xlog_write_entry(l, entry);
if (rc < 0)
goto done;
if (rc > 0) {
writer->checkpoint_wal_size += rc;
last_committed = &entry->fifo;
- vclock_copy(&writer->vclock, &vclock);
+ vclock_merge(&writer->vclock, &vclock_diff);
}
/* rc == 0: the write is buffered in xlog_tx */
}
@@ -987,7 +999,7 @@ wal_write_to_disk(struct cmsg *msg)
writer->checkpoint_wal_size += rc;
last_committed = stailq_last(&wal_msg->commit);
- vclock_copy(&writer->vclock, &vclock);
+ vclock_merge(&writer->vclock, &vclock_diff);
/*
* Notify TX if the checkpoint threshold has been exceeded.
@@ -1162,7 +1174,11 @@ wal_write_in_wal_mode_none(struct journal *journal,
struct journal_entry *entry)
{
struct wal_writer *writer = (struct wal_writer *) journal;
- wal_assign_lsn(&writer->vclock, entry->rows, entry->rows + entry->n_rows);
+ struct vclock vclock_diff;
+ vclock_create(&vclock_diff);
+ wal_assign_lsn(&vclock_diff, &writer->vclock, entry->rows,
+ entry->rows + entry->n_rows);
+ vclock_merge(&writer->vclock, &vclock_diff);
vclock_copy(&replicaset.vclock, &writer->vclock);
return vclock_sum(&writer->vclock);
}
--
2.20.1
^ permalink raw reply [flat|nested] 7+ messages in thread