Tarantool development patches archive
 help / color / mirror / Atom feed
* [Tarantool-patches] [PATCH v2 0/3] box/info: report replication.X.downstream.lag
@ 2021-02-01 10:00 Cyrill Gorcunov via Tarantool-patches
  2021-02-01 10:00 ` [Tarantool-patches] [PATCH v2 1/3] applier: encode timestamp into vclock message Cyrill Gorcunov via Tarantool-patches
                   ` (2 more replies)
  0 siblings, 3 replies; 5+ messages in thread
From: Cyrill Gorcunov via Tarantool-patches @ 2021-02-01 10:00 UTC (permalink / raw)
  To: tml; +Cc: Vladislav Shpilevoy

Vlad, Serge, take a look once time permit.

v2:
 - provide special helper xrow_encode_vclock_timed to send
   timestamp with applier ack message
 - document new entry
 - style fix in test

issue https://github.com/tarantool/tarantool/issues/5447
branch gorcunov/gh-5447-relay-lag-2

Cyrill Gorcunov (3):
  applier: encode timestamp into vclock message
  box/info: report replication.X.downstream.lag value
  test: replication/status -- fetch downstream lag field

 src/box/applier.cc               |  3 ++-
 src/box/lua/info.c               |  8 ++++++++
 src/box/relay.cc                 | 16 ++++++++++++++++
 src/box/relay.h                  |  8 ++++++++
 src/box/xrow.c                   |  5 ++++-
 src/box/xrow.h                   | 21 +++++++++++++++++++--
 test/replication/status.result   |  8 ++++++++
 test/replication/status.test.lua |  6 ++++++
 8 files changed, 71 insertions(+), 4 deletions(-)


base-commit: fbc04dfbda911195ec1a6a1e0e912e48eb6bb73c
-- 
2.29.2


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [Tarantool-patches] [PATCH v2 1/3] applier: encode timestamp into vclock message
  2021-02-01 10:00 [Tarantool-patches] [PATCH v2 0/3] box/info: report replication.X.downstream.lag Cyrill Gorcunov via Tarantool-patches
@ 2021-02-01 10:00 ` Cyrill Gorcunov via Tarantool-patches
  2021-02-01 10:00 ` [Tarantool-patches] [PATCH v2 2/3] box/info: report replication.X.downstream.lag value Cyrill Gorcunov via Tarantool-patches
  2021-02-01 10:00 ` [Tarantool-patches] [PATCH v2 3/3] test: replication/status -- fetch downstream lag field Cyrill Gorcunov via Tarantool-patches
  2 siblings, 0 replies; 5+ messages in thread
From: Cyrill Gorcunov via Tarantool-patches @ 2021-02-01 10:00 UTC (permalink / raw)
  To: tml; +Cc: Vladislav Shpilevoy

The vclock message sent by "applierw" fiber to the replication
master node operates not only for tracking status of applied data
but also as a heartbeat packets (after 1.7.7).

To track downstream node lag we can put realtime value here.
This won't break any existing instances without the patch
because the field is unused anywhere yet.

Part-of #5447

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
---
 src/box/applier.cc |  3 ++-
 src/box/xrow.c     |  5 ++++-
 src/box/xrow.h     | 21 +++++++++++++++++++--
 3 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/src/box/applier.cc b/src/box/applier.cc
index 553db76fc..a418edef0 100644
--- a/src/box/applier.cc
+++ b/src/box/applier.cc
@@ -183,7 +183,8 @@ applier_writer_f(va_list ap)
 		try {
 			applier->has_acks_to_send = false;
 			struct xrow_header xrow;
-			xrow_encode_vclock(&xrow, &replicaset.vclock);
+			xrow_encode_vclock_timed(&xrow, &replicaset.vclock,
+						 ev_now(loop()));
 			coio_write_xrow(&io, &xrow);
 			ERROR_INJECT(ERRINJ_APPLIER_SLOW_ACK, {
 				fiber_sleep(0.01);
diff --git a/src/box/xrow.c b/src/box/xrow.c
index bc06738ad..b7b522820 100644
--- a/src/box/xrow.c
+++ b/src/box/xrow.c
@@ -1629,7 +1629,9 @@ xrow_encode_join(struct xrow_header *row, const struct tt_uuid *instance_uuid)
 }
 
 int
-xrow_encode_vclock(struct xrow_header *row, const struct vclock *vclock)
+xrow_encode_vclock_timed(struct xrow_header *row,
+			 const struct vclock *vclock,
+			 double tm)
 {
 	memset(row, 0, sizeof(*row));
 
@@ -1648,6 +1650,7 @@ xrow_encode_vclock(struct xrow_header *row, const struct vclock *vclock)
 	row->body[0].iov_base = buf;
 	row->body[0].iov_len = (data - buf);
 	row->bodycnt = 1;
+	row->tm = tm;
 	row->type = IPROTO_OK;
 	return 0;
 }
diff --git a/src/box/xrow.h b/src/box/xrow.h
index fde8f9474..8b91a3241 100644
--- a/src/box/xrow.h
+++ b/src/box/xrow.h
@@ -478,6 +478,20 @@ xrow_decode_register(struct xrow_header *row, struct tt_uuid *instance_uuid,
 				     NULL, NULL);
 }
 
+/**
+ * Encode vector clock with timestamp.
+ * @param row[out] Row to encode into.
+ * @param vclock vector clock to encode.
+ * @param tm timestamp.
+ *
+ * @retval  0 Success.
+ * @retval -1 Memory error.
+ */
+int
+xrow_encode_vclock_timed(struct xrow_header *row,
+			 const struct vclock *vclock,
+			 double tm);
+
 /**
  * Encode end of stream command (a response to JOIN command).
  * @param row[out] Row to encode into.
@@ -486,8 +500,11 @@ xrow_decode_register(struct xrow_header *row, struct tt_uuid *instance_uuid,
  * @retval  0 Success.
  * @retval -1 Memory error.
  */
-int
-xrow_encode_vclock(struct xrow_header *row, const struct vclock *vclock);
+static inline int
+xrow_encode_vclock(struct xrow_header *row, const struct vclock *vclock)
+{
+	return xrow_encode_vclock_timed(row, vclock, 0);
+}
 
 /**
  * Decode end of stream command (a response to JOIN command).
-- 
2.29.2


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [Tarantool-patches] [PATCH v2 2/3] box/info: report replication.X.downstream.lag value
  2021-02-01 10:00 [Tarantool-patches] [PATCH v2 0/3] box/info: report replication.X.downstream.lag Cyrill Gorcunov via Tarantool-patches
  2021-02-01 10:00 ` [Tarantool-patches] [PATCH v2 1/3] applier: encode timestamp into vclock message Cyrill Gorcunov via Tarantool-patches
@ 2021-02-01 10:00 ` Cyrill Gorcunov via Tarantool-patches
  2021-02-04 13:28   ` Serge Petrenko via Tarantool-patches
  2021-02-01 10:00 ` [Tarantool-patches] [PATCH v2 3/3] test: replication/status -- fetch downstream lag field Cyrill Gorcunov via Tarantool-patches
  2 siblings, 1 reply; 5+ messages in thread
From: Cyrill Gorcunov via Tarantool-patches @ 2021-02-01 10:00 UTC (permalink / raw)
  To: tml; +Cc: Vladislav Shpilevoy

This is basically a reflection of replication.X.upstream.lag value.
The upstream lag can be considered as transaction RTT in direction
from master to replica, in turn downstream lag is the reverse and
represents RTT from replica to master.

An example of output is (on replica node)

 | 2:
 |   id: 2
 |   uuid: 8bb22366-cd21-492e-98df-693884be11bd
 |   lsn: 0
 |   downstream:
 |     status: follow
 |     idle: 0.55381065199617
 |     vclock: {1: 119}
 |     lag: 0.00019168853759766

In case if there some old replicas which are not sending
timestamp in vclock encoding we simply don't show lag
field for backward compatibility sake.

Closes #5447

@TarantoolBot document
Title: Document box.info.replication[n].downstream.lag

replication[n].downstream.lag is time difference between
local time of the replica `n` sending current vector clock
state and the time this message was received by a master
node.

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
---
 src/box/lua/info.c |  8 ++++++++
 src/box/relay.cc   | 16 ++++++++++++++++
 src/box/relay.h    |  8 ++++++++
 3 files changed, 32 insertions(+)

diff --git a/src/box/lua/info.c b/src/box/lua/info.c
index c4c9fa0a0..1e533fe8d 100644
--- a/src/box/lua/info.c
+++ b/src/box/lua/info.c
@@ -133,16 +133,24 @@ lbox_pushrelay(lua_State *L, struct relay *relay)
 
 	switch(relay_get_state(relay)) {
 	case RELAY_FOLLOW:
+	{
+		double lag = relay_lag(relay);
 		lua_pushstring(L, "follow");
 		lua_settable(L, -3);
 		lua_pushstring(L, "vclock");
 		lbox_pushvclock(L, relay_vclock(relay));
 		lua_settable(L, -3);
+		if (lag != 0) {
+			lua_pushstring(L, "lag");
+			lua_pushnumber(L, relay_lag(relay));
+			lua_settable(L, -3);
+		}
 		lua_pushstring(L, "idle");
 		lua_pushnumber(L, ev_monotonic_now(loop()) -
 			       relay_last_row_time(relay));
 		lua_settable(L, -3);
 		break;
+	}
 	case RELAY_STOPPED:
 	{
 		lua_pushstring(L, "stopped");
diff --git a/src/box/relay.cc b/src/box/relay.cc
index df04f8198..f23e43b30 100644
--- a/src/box/relay.cc
+++ b/src/box/relay.cc
@@ -138,6 +138,8 @@ struct relay {
 	struct stailq pending_gc;
 	/** Time when last row was sent to peer. */
 	double last_row_time;
+	/** Number of seconds this master is prior the remote replica. */
+	double lag;
 	/** Relay sync state. */
 	enum relay_state state;
 
@@ -179,6 +181,12 @@ relay_last_row_time(const struct relay *relay)
 	return relay->last_row_time;
 }
 
+double
+relay_lag(const struct relay *relay)
+{
+	return relay->lag;
+}
+
 static void
 relay_send(struct relay *relay, struct xrow_header *packet);
 static void
@@ -219,6 +227,7 @@ relay_start(struct relay *relay, int fd, uint64_t sync,
 	relay->sync = sync;
 	relay->state = RELAY_FOLLOW;
 	relay->last_row_time = ev_monotonic_now(loop());
+	relay->lag = 0;
 }
 
 void
@@ -558,6 +567,13 @@ relay_reader_f(va_list ap)
 			/* vclock is followed while decoding, zeroing it. */
 			vclock_create(&relay->recv_vclock);
 			xrow_decode_vclock_xc(&xrow, &relay->recv_vclock);
+			/*
+			 * Old versions may send not a timestamp but
+			 * zeroified memory field. We can use +0 as
+			 * a sign that there is nothing encoded.
+			 */
+			if (xrow.tm != 0)
+				relay->lag = ev_now(loop()) - xrow.tm;
 			fiber_cond_signal(&relay->reader_cond);
 		}
 	} catch (Exception *e) {
diff --git a/src/box/relay.h b/src/box/relay.h
index b32e2ea2a..ec9d16925 100644
--- a/src/box/relay.h
+++ b/src/box/relay.h
@@ -93,6 +93,14 @@ relay_vclock(const struct relay *relay);
 double
 relay_last_row_time(const struct relay *relay);
 
+/**
+ * Returns relay's lag
+ * @param relay relay
+ * @returns relay's lag
+ */
+double
+relay_lag(const struct relay *relay);
+
 /**
  * Send a Raft update request to the relay channel. It is not
  * guaranteed that it will be delivered. The connection may break.
-- 
2.29.2


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [Tarantool-patches] [PATCH v2 3/3] test: replication/status -- fetch downstream lag field
  2021-02-01 10:00 [Tarantool-patches] [PATCH v2 0/3] box/info: report replication.X.downstream.lag Cyrill Gorcunov via Tarantool-patches
  2021-02-01 10:00 ` [Tarantool-patches] [PATCH v2 1/3] applier: encode timestamp into vclock message Cyrill Gorcunov via Tarantool-patches
  2021-02-01 10:00 ` [Tarantool-patches] [PATCH v2 2/3] box/info: report replication.X.downstream.lag value Cyrill Gorcunov via Tarantool-patches
@ 2021-02-01 10:00 ` Cyrill Gorcunov via Tarantool-patches
  2 siblings, 0 replies; 5+ messages in thread
From: Cyrill Gorcunov via Tarantool-patches @ 2021-02-01 10:00 UTC (permalink / raw)
  To: tml; +Cc: Vladislav Shpilevoy

Extend the test to observe downstream.lag value.

Part-of #5447

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
---
 test/replication/status.result   | 8 ++++++++
 test/replication/status.test.lua | 6 ++++++
 2 files changed, 14 insertions(+)

diff --git a/test/replication/status.result b/test/replication/status.result
index a8c515dbb..9b5418da4 100644
--- a/test/replication/status.result
+++ b/test/replication/status.result
@@ -186,6 +186,14 @@ test_run:wait_cond(function()                    \
 ---
 - true
 ...
+-- gh-5447: Once replication vclock is reached there
+-- should be non infinity downstream lag, set it to some
+-- sane value but not short since there might be network
+-- lags on a low level.
+replica.downstream.lag ~= nil and replica.downstream.lag < 60
+---
+- true
+...
 --
 -- Replica
 --
diff --git a/test/replication/status.test.lua b/test/replication/status.test.lua
index 431463d8a..1ecf85382 100644
--- a/test/replication/status.test.lua
+++ b/test/replication/status.test.lua
@@ -73,6 +73,12 @@ test_run:wait_cond(function()                    \
             r[replica_id] == box.info.vclock[replica_id]) \
     end) or require('log').error(box.info)
 
+-- gh-5447: Once replication vclock is reached there
+-- should be non infinity downstream lag, set it to some
+-- sane value but not short since there might be network
+-- lags on a low level.
+replica.downstream.lag ~= nil and replica.downstream.lag < 60
+
 --
 -- Replica
 --
-- 
2.29.2


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [Tarantool-patches] [PATCH v2 2/3] box/info: report replication.X.downstream.lag value
  2021-02-01 10:00 ` [Tarantool-patches] [PATCH v2 2/3] box/info: report replication.X.downstream.lag value Cyrill Gorcunov via Tarantool-patches
@ 2021-02-04 13:28   ` Serge Petrenko via Tarantool-patches
  0 siblings, 0 replies; 5+ messages in thread
From: Serge Petrenko via Tarantool-patches @ 2021-02-04 13:28 UTC (permalink / raw)
  To: Cyrill Gorcunov, tml; +Cc: Vladislav Shpilevoy



01.02.2021 13:00, Cyrill Gorcunov пишет:
> This is basically a reflection of replication.X.upstream.lag value.
> The upstream lag can be considered as transaction RTT in direction
> from master to replica, in turn downstream lag is the reverse and
> represents RTT from replica to master.
>
> An example of output is (on replica node)
>
>   | 2:
>   |   id: 2
>   |   uuid: 8bb22366-cd21-492e-98df-693884be11bd
>   |   lsn: 0
>   |   downstream:
>   |     status: follow
>   |     idle: 0.55381065199617
>   |     vclock: {1: 119}
>   |     lag: 0.00019168853759766
>
> In case if there some old replicas which are not sending
> timestamp in vclock encoding we simply don't show lag
> field for backward compatibility sake.
>
> Closes #5447
>
> @TarantoolBot document
> Title: Document box.info.replication[n].downstream.lag
>
> replication[n].downstream.lag is time difference between
> local time of the replica `n` sending current vector clock
> state and the time this message was received by a master
> node.
>
> Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
> ---

Hi! Thanks for the patch! Generally looks good, however, I have a comment.

Actually, what you're counting here is not related to upstream lag.
Upstream lag is the difference between master's WAL write time and
replica's time at the moment of receiving the entry. So it may indicate
how much the replica has fallen behind master. This is a meaningful value,
which may be quite high when the replica is syncing with master.

Downstream lag, as you count it, is simply the time it took a network 
packet to
travel from replica to master. It doesn't show anything about replica's 
status
as compared to master. So there's not much value in it.

Sorry for noticing this so late. I had some magic representation of how 
it should
work in my head and didn't look into the patch thoroughly enough.

This is not what the ticket was about. I see two options here:
1) you may simply send applier lag in row->tm, and save it in relay->lag.
    This way downstream.lag on master will be the same as upstream.lag 
on replica,
    so it'll be just a helper, but still better than what we have now.
2) We need to measure difference between transaction WAL write time and the
    time we received an ACK for this transaction. This may be too 
complicated for
    async transactions, since you need to remember times for a bunch of 
already
    committed transactions. So this will make sense only for synchronous 
transactions
    and qsync statistics.

I personally prefer option 1 now, as a counterpart of upstream.lag on 
master, and
option 2 as a part of qsync statistics ticket. But maybe we don't need 
(2) at all.

>   src/box/lua/info.c |  8 ++++++++
>   src/box/relay.cc   | 16 ++++++++++++++++
>   src/box/relay.h    |  8 ++++++++
>   3 files changed, 32 insertions(+)
>
> diff --git a/src/box/lua/info.c b/src/box/lua/info.c
> index c4c9fa0a0..1e533fe8d 100644
> --- a/src/box/lua/info.c
> +++ b/src/box/lua/info.c
> @@ -133,16 +133,24 @@ lbox_pushrelay(lua_State *L, struct relay *relay)
>   
>   	switch(relay_get_state(relay)) {
>   	case RELAY_FOLLOW:
> +	{
> +		double lag = relay_lag(relay);
>   		lua_pushstring(L, "follow");
>   		lua_settable(L, -3);
>   		lua_pushstring(L, "vclock");
>   		lbox_pushvclock(L, relay_vclock(relay));
>   		lua_settable(L, -3);
> +		if (lag != 0) {
> +			lua_pushstring(L, "lag");
> +			lua_pushnumber(L, relay_lag(relay));
> +			lua_settable(L, -3);
> +		}
>   		lua_pushstring(L, "idle");
>   		lua_pushnumber(L, ev_monotonic_now(loop()) -
>   			       relay_last_row_time(relay));
>   		lua_settable(L, -3);
>   		break;
> +	}
>   	case RELAY_STOPPED:
>   	{
>   		lua_pushstring(L, "stopped");
> diff --git a/src/box/relay.cc b/src/box/relay.cc
> index df04f8198..f23e43b30 100644
> --- a/src/box/relay.cc
> +++ b/src/box/relay.cc
> @@ -138,6 +138,8 @@ struct relay {
>   	struct stailq pending_gc;
>   	/** Time when last row was sent to peer. */
>   	double last_row_time;
> +	/** Number of seconds this master is prior the remote replica. */
> +	double lag;
>   	/** Relay sync state. */
>   	enum relay_state state;
>   
> @@ -179,6 +181,12 @@ relay_last_row_time(const struct relay *relay)
>   	return relay->last_row_time;
>   }
>   
> +double
> +relay_lag(const struct relay *relay)
> +{
> +	return relay->lag;
> +}
> +
>   static void
>   relay_send(struct relay *relay, struct xrow_header *packet);
>   static void
> @@ -219,6 +227,7 @@ relay_start(struct relay *relay, int fd, uint64_t sync,
>   	relay->sync = sync;
>   	relay->state = RELAY_FOLLOW;
>   	relay->last_row_time = ev_monotonic_now(loop());
> +	relay->lag = 0;
>   }
>   
>   void
> @@ -558,6 +567,13 @@ relay_reader_f(va_list ap)
>   			/* vclock is followed while decoding, zeroing it. */
>   			vclock_create(&relay->recv_vclock);
>   			xrow_decode_vclock_xc(&xrow, &relay->recv_vclock);
> +			/*
> +			 * Old versions may send not a timestamp but
> +			 * zeroified memory field. We can use +0 as
> +			 * a sign that there is nothing encoded.
> +			 */
> +			if (xrow.tm != 0)
> +				relay->lag = ev_now(loop()) - xrow.tm;
>   			fiber_cond_signal(&relay->reader_cond);
>   		}
>   	} catch (Exception *e) {
> diff --git a/src/box/relay.h b/src/box/relay.h
> index b32e2ea2a..ec9d16925 100644
> --- a/src/box/relay.h
> +++ b/src/box/relay.h
> @@ -93,6 +93,14 @@ relay_vclock(const struct relay *relay);
>   double
>   relay_last_row_time(const struct relay *relay);
>   
> +/**
> + * Returns relay's lag
> + * @param relay relay
> + * @returns relay's lag
> + */
> +double
> +relay_lag(const struct relay *relay);
> +
>   /**
>    * Send a Raft update request to the relay channel. It is not
>    * guaranteed that it will be delivered. The connection may break.

-- 
Serge Petrenko


^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2021-02-04 13:28 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-02-01 10:00 [Tarantool-patches] [PATCH v2 0/3] box/info: report replication.X.downstream.lag Cyrill Gorcunov via Tarantool-patches
2021-02-01 10:00 ` [Tarantool-patches] [PATCH v2 1/3] applier: encode timestamp into vclock message Cyrill Gorcunov via Tarantool-patches
2021-02-01 10:00 ` [Tarantool-patches] [PATCH v2 2/3] box/info: report replication.X.downstream.lag value Cyrill Gorcunov via Tarantool-patches
2021-02-04 13:28   ` Serge Petrenko via Tarantool-patches
2021-02-01 10:00 ` [Tarantool-patches] [PATCH v2 3/3] test: replication/status -- fetch downstream lag field Cyrill Gorcunov via Tarantool-patches

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox