Tarantool development patches archive
 help / color / mirror / Atom feed
From: Cyrill Gorcunov via Tarantool-patches <tarantool-patches@dev.tarantool.org>
To: Vladislav Shpilevoy <v.shpilevoy@tarantool.org>
Cc: tml <tarantool-patches@dev.tarantool.org>
Subject: Re: [Tarantool-patches] [PATCH v9 2/2] relay: provide information about downstream lag
Date: Mon, 21 Jun 2021 19:17:20 +0300	[thread overview]
Message-ID: <YNC7kGxi4lU90p7g@grain> (raw)
In-Reply-To: <909ea97e-9b56-d327-860b-65aba685fce3@tarantool.org>

On Sun, Jun 20, 2021 at 04:37:21PM +0200, Vladislav Shpilevoy wrote:
> Hi! Thanks for the patch!
> 
> The test fails when I run it multiple times:
> 
> [014] Test failed! Result content mismatch:
> [014] --- replication/gh-5447-downstream-lag.result	Sun Jun 20 16:10:26 2021
> [014] +++ var/rejects/replication/gh-5447-downstream-lag.reject	Sun Jun 20 16:33:01 2021
> [014] @@ -37,7 +37,7 @@
> [014]  -- Upon replica startup there is no ACKs to process.
> [014]  assert(box.info.replication[replica_id].downstream.lag == 0)
> [014]   | ---
> [014] - | - true
> [014] + | - error: assertion failed!
> 
> See 4 comments below.

Vlad, here is an update, I force pushed it into the same branch.
I'll fix the error injection nit. Could you please retry the
test to run simultaneously (I did it locally with 200 tests
but it didn't trigger anything). I rebased the series on top
of master.
---
From da969da89beab720c91c7e895613ab9cf6ab2ea7 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Mon, 21 Jun 2021 14:30:52 +0300
Subject: [PATCH] Update

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
---
 src/box/relay.cc                              | 19 +------------------
 .../replication/gh-5447-downstream-lag.result | 10 +++-------
 .../gh-5447-downstream-lag.test.lua           |  3 +--
 3 files changed, 5 insertions(+), 27 deletions(-)

diff --git a/src/box/relay.cc b/src/box/relay.cc
index 14c9b0f03..115037fc3 100644
--- a/src/box/relay.cc
+++ b/src/box/relay.cc
@@ -160,11 +160,6 @@ struct relay {
 	struct stailq pending_gc;
 	/** Time when last row was sent to peer. */
 	double last_row_time;
-	/**
-	 * Last timestamp observed from remote node to
-	 * compute @a txn_lag value.
-	 */
-	double txn_acked_tm;
 	/**
 	 * A time difference between the moment when we
 	 * wrote a transaction to the local WAL and when
@@ -310,15 +305,6 @@ relay_start(struct relay *relay, int fd, uint64_t sync,
 	relay->state = RELAY_FOLLOW;
 	relay->row_count = 0;
 	relay->last_row_time = ev_monotonic_now(loop());
-	/*
-	 * We assume that previously written rows in WAL
-	 * are older than current node real time which allows
-	 * to simplify @a tx.txn_lag calculation. In worst
-	 * scenario when runtime has been adjusted backwards
-	 * between restart we simply get some big value in
-	 * @a tx.txn_lag until next transaction get replicated.
-	 */
-	relay->txn_acked_tm = ev_now(loop());
 }
 
 void
@@ -375,7 +361,6 @@ relay_stop(struct relay *relay)
 	 * If relay is stopped then lag statistics should
 	 * be updated on next new ACK packets obtained.
 	 */
-	relay->txn_acked_tm = 0;
 	relay->txn_lag = 0;
 	relay->tx.txn_lag = 0;
 }
@@ -682,10 +667,8 @@ relay_reader_f(va_list ap)
 			 * can compute time spent regardless of the clock
 			 * value on remote replica.
 			 */
-			if (relay->txn_acked_tm < xrow.tm) {
-				relay->txn_acked_tm = xrow.tm;
+			if (xrow.tm != 0)
 				relay->txn_lag = ev_now(loop()) - xrow.tm;
-			}
 			fiber_cond_signal(&relay->reader_cond);
 		}
 	} catch (Exception *e) {
diff --git a/test/replication/gh-5447-downstream-lag.result b/test/replication/gh-5447-downstream-lag.result
index 2cc020451..0d5de2564 100644
--- a/test/replication/gh-5447-downstream-lag.result
+++ b/test/replication/gh-5447-downstream-lag.result
@@ -70,17 +70,10 @@ test_run:switch('default')
  | ---
  | - true
  | ...
-lsn = box.info.lsn
- | ---
- | ...
 box.space.test:insert({1})
  | ---
  | - [1]
  | ...
-test_run:wait_cond(function() return box.info.lsn > lsn end)
- | ---
- | - true
- | ...
 -- The record is written on the master node.
 test_run:switch('replica')
  | ---
@@ -111,6 +104,9 @@ test_run:switch('default')
  | ---
  | - true
  | ...
+box.space.test:drop()
+ | ---
+ | ...
 box.schema.user.revoke('guest', 'replication')
  | ---
  | ...
diff --git a/test/replication/gh-5447-downstream-lag.test.lua b/test/replication/gh-5447-downstream-lag.test.lua
index 3096e2ac3..dd1d2e2c9 100644
--- a/test/replication/gh-5447-downstream-lag.test.lua
+++ b/test/replication/gh-5447-downstream-lag.test.lua
@@ -35,9 +35,7 @@ box.error.injection.set("ERRINJ_WAL_DELAY", true)
 --
 -- Insert a record and wakeup replica's WAL to process data.
 test_run:switch('default')
-lsn = box.info.lsn
 box.space.test:insert({1})
-test_run:wait_cond(function() return box.info.lsn > lsn end)
 -- The record is written on the master node.
 test_run:switch('replica')
 box.error.injection.set("ERRINJ_WAL_DELAY", false)
@@ -51,6 +49,7 @@ assert(box.info.replication[replica_id].downstream.lag > 0)
 --
 -- Cleanup everything.
 test_run:switch('default')
+box.space.test:drop()
 box.schema.user.revoke('guest', 'replication')
 test_run:cmd('stop server replica')
 test_run:cmd('cleanup server replica')
-- 
2.31.1


  parent reply	other threads:[~2021-06-21 16:17 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-06-17 15:48 [Tarantool-patches] [PATCH v9 0/2] relay: provide downstream lag information Cyrill Gorcunov via Tarantool-patches
2021-06-17 15:48 ` [Tarantool-patches] [PATCH v9 1/2] applier: send transaction's first row WAL time in the applier_writer_f Cyrill Gorcunov via Tarantool-patches
2021-06-18  9:51   ` Serge Petrenko via Tarantool-patches
2021-06-18 18:06     ` Cyrill Gorcunov via Tarantool-patches
2021-06-21  8:35       ` Serge Petrenko via Tarantool-patches
2021-06-17 15:48 ` [Tarantool-patches] [PATCH v9 2/2] relay: provide information about downstream lag Cyrill Gorcunov via Tarantool-patches
2021-06-18  9:50   ` Serge Petrenko via Tarantool-patches
2021-06-20 14:37   ` Vladislav Shpilevoy via Tarantool-patches
2021-06-21  8:44     ` Cyrill Gorcunov via Tarantool-patches
2021-06-21 16:17     ` Cyrill Gorcunov via Tarantool-patches [this message]
2021-06-21 21:16       ` Vladislav Shpilevoy via Tarantool-patches

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=YNC7kGxi4lU90p7g@grain \
    --to=tarantool-patches@dev.tarantool.org \
    --cc=gorcunov@gmail.com \
    --cc=v.shpilevoy@tarantool.org \
    --subject='Re: [Tarantool-patches] [PATCH v9 2/2] relay: provide information about downstream lag' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox