[tarantool-patches] [PATCH v3] box: adds replication sync after cfg. update
Olga Arkhangelskaia
krishtal.olja at gmail.com
Sun Aug 26 11:25:37 MSK 2018
When replica reconnects to replica set not for the first time, we
suffer from absence of synchronization. Such behavior leads to giving
away outdated data.
Closes #3427
---
https://github.com/tarantool/tarantool/issues/3427
https://github.com/tarantool/tarantool/tree/OKriw/replication_no_sync-1.9
v1:
https://www.freelists.org/post/tarantool-patches/PATCH-replication-adds-replication-sync-after-cfg-update
v2:
https://www.freelists.org/post/tarantool-patches/PATCH-v2-replication-adds-replication-sync-after-cfg-update
Changes in v2:
- fixed test
- changed replicaset_sync
Changes in v3:
- now we raise the exception when sync is not successful.
- fixed test
- renamed test
src/box/box.cc | 5 ++
src/box/errcode.h | 1 +
src/box/replication.cc | 7 ++-
src/box/replication.h | 6 +-
test/replication/config_change_sync.result | 90 ++++++++++++++++++++++++++++
test/replication/config_change_sync.test.lua | 43 +++++++++++++
test/replication/replica_orphan.lua | 12 ++++
7 files changed, 158 insertions(+), 6 deletions(-)
create mode 100644 test/replication/config_change_sync.result
create mode 100644 test/replication/config_change_sync.test.lua
create mode 100644 test/replication/replica_orphan.lua
diff --git a/src/box/box.cc b/src/box/box.cc
index 8d7454d1f..617067ab5 100644
--- a/src/box/box.cc
+++ b/src/box/box.cc
@@ -634,6 +634,11 @@ box_set_replication(void)
box_sync_replication(true);
/* Follow replica */
replicaset_follow();
+ /* Sync replica up to quorum */
+ if (!replicaset_sync()) {
+ tnt_raise(ClientError, ER_REPLICA_SYNC, cfg_gets("instance_uuid"),
+ cfg_gets("replicaset_uuid"));
+ }
}
void
diff --git a/src/box/errcode.h b/src/box/errcode.h
index 7e3ea1ed1..4059930c0 100644
--- a/src/box/errcode.h
+++ b/src/box/errcode.h
@@ -207,6 +207,7 @@ struct errcode_record {
/*152 */_(ER_NULLABLE_PRIMARY, "Primary index of the space '%s' can not contain nullable parts") \
/*153 */_(ER_NULLABLE_MISMATCH, "Field %d is %s in space format, but %s in index parts") \
/*154 */_(ER_TRANSACTION_YIELD, "Transaction has been aborted by a fiber yield") \
+ /*155 */_(ER_REPLICA_SYNC, "Failed to synchronize replica %s with replicaset %s") \
/*
* !IMPORTANT! Please follow instructions at start of the file
diff --git a/src/box/replication.cc b/src/box/replication.cc
index 861ce34ea..9ccb13fa2 100644
--- a/src/box/replication.cc
+++ b/src/box/replication.cc
@@ -661,13 +661,13 @@ replicaset_follow(void)
}
}
-void
+bool
replicaset_sync(void)
{
int quorum = replicaset_quorum();
if (quorum == 0)
- return;
+ return true;
say_verbose("synchronizing with %d replicas", quorum);
@@ -687,11 +687,12 @@ replicaset_sync(void)
* in 'orphan' state.
*/
say_crit("entering orphan mode");
- return;
+ return false;
}
say_crit("replica set sync complete, quorum of %d "
"replicas formed", quorum);
+ return true;
}
void
diff --git a/src/box/replication.h b/src/box/replication.h
index 06a2867b6..d4e6f7e3e 100644
--- a/src/box/replication.h
+++ b/src/box/replication.h
@@ -373,10 +373,10 @@ replicaset_follow(void);
/**
* Wait until a replication quorum is formed.
- * Return immediately if a quorum cannot be
- * formed because of errors.
+ * @return true in case of success.
+ * @return false if a quorum cannot be formed because of errors.
*/
-void
+bool
replicaset_sync(void);
/**
diff --git a/test/replication/config_change_sync.result b/test/replication/config_change_sync.result
new file mode 100644
index 000000000..18774c4b0
--- /dev/null
+++ b/test/replication/config_change_sync.result
@@ -0,0 +1,90 @@
+--
+-- gh-3427: no sync after configuration update
+--
+env = require('test_run')
+---
+...
+test_run = env.new()
+---
+...
+engine = test_run:get_cfg('engine')
+---
+...
+box.schema.user.grant('guest', 'read,write,execute', 'universe')
+---
+...
+box.schema.user.grant('guest', 'replication')
+---
+...
+test_run:cmd("create server replica with rpl_master=default, script='replication/replica_orphan.lua'")
+---
+- true
+...
+test_run:cmd("start server replica")
+---
+- true
+...
+repl = test_run:eval('replica', 'return box.cfg.listen')[1]
+---
+...
+box.cfg{replication = repl}
+---
+...
+s = box.schema.space.create('test', {engine = engine})
+---
+...
+index = s:create_index('primary')
+---
+...
+-- change replica configuration
+test_run:cmd("switch replica")
+---
+- true
+...
+box.cfg{replication={}}
+---
+...
+test_run:cmd("switch default")
+---
+- true
+...
+-- insert values on the master while replica is unconfigured
+a = 50000 box.begin() while a > 0 do a = a-1 box.space.test:insert{a,a} end box.commit()
+---
+...
+test_run:cmd("switch replica")
+---
+- true
+...
+box.cfg{replication = os.getenv("MASTER")}
+---
+...
+require'fiber'.sleep(0.1)
+---
+...
+box.info.replication[1].upstream.lag > 0.1
+---
+- false
+...
+test_run:cmd("switch default")
+---
+- true
+...
+-- cleanup
+test_run:cmd("stop server replica")
+---
+- true
+...
+test_run:cmd("cleanup server replica")
+---
+- true
+...
+box.space.test:drop()
+---
+...
+box.schema.user.revoke('guest', 'replication')
+---
+...
+box.schema.user.revoke('guest', 'read,write,execute', 'universe')
+---
+...
diff --git a/test/replication/config_change_sync.test.lua b/test/replication/config_change_sync.test.lua
new file mode 100644
index 000000000..b1a71dbaa
--- /dev/null
+++ b/test/replication/config_change_sync.test.lua
@@ -0,0 +1,43 @@
+--
+-- gh-3427: no sync after configuration update
+--
+
+env = require('test_run')
+test_run = env.new()
+engine = test_run:get_cfg('engine')
+
+box.schema.user.grant('guest', 'read,write,execute', 'universe')
+
+box.schema.user.grant('guest', 'replication')
+test_run:cmd("create server replica with rpl_master=default, script='replication/replica_orphan.lua'")
+test_run:cmd("start server replica")
+
+repl = test_run:eval('replica', 'return box.cfg.listen')[1]
+box.cfg{replication = repl}
+
+s = box.schema.space.create('test', {engine = engine})
+index = s:create_index('primary')
+
+-- change replica configuration
+test_run:cmd("switch replica")
+box.cfg{replication={}}
+
+test_run:cmd("switch default")
+-- insert values on the master while replica is unconfigured
+a = 50000 box.begin() while a > 0 do a = a-1 box.space.test:insert{a,a} end box.commit()
+
+test_run:cmd("switch replica")
+box.cfg{replication = os.getenv("MASTER")}
+require'fiber'.sleep(0.1)
+
+
+box.info.replication[1].upstream.lag > 0.1
+
+test_run:cmd("switch default")
+
+-- cleanup
+test_run:cmd("stop server replica")
+test_run:cmd("cleanup server replica")
+box.space.test:drop()
+box.schema.user.revoke('guest', 'replication')
+box.schema.user.revoke('guest', 'read,write,execute', 'universe')
diff --git a/test/replication/replica_orphan.lua b/test/replication/replica_orphan.lua
new file mode 100644
index 000000000..97740d69a
--- /dev/null
+++ b/test/replication/replica_orphan.lua
@@ -0,0 +1,12 @@
+#!/usr/bin/env tarantool
+
+local TIMEOUT = tonumber(arg[1])
+
+box.cfg({
+ listen = os.getenv("LISTEN"),
+ replication = os.getenv("MASTER"),
+ replication_connect_timeout = 0.5,
+ replication_sync_lag = 0.01,
+})
+
+require('console').listen(os.getenv('ADMIN'))
--
2.14.3 (Apple Git-98)
More information about the Tarantool-patches
mailing list