From: Serge Petrenko <sergepetrenko@tarantool.org> To: v.shpilevoy@tarantool.org, kostja.osipov@gmail.com, georgy@tarantool.org Cc: tarantool-patches@dev.tarantool.org Subject: [Tarantool-patches] [PATCH v2 5/5] box: start counting local space requests separately Date: Wed, 18 Mar 2020 22:47:48 +0300 [thread overview] Message-ID: <b2d68aee1c5864965db81c6d7c42d04bcf5d25a6.1584558067.git.sergepetrenko@tarantool.org> (raw) In-Reply-To: <cover.1584558067.git.sergepetrenko@tarantool.org> Sign local space requests with a zero instance id. This allows to split local changes aside from the changes, which should be visible to the whole cluster, and stop sending NOPs to replicas to follow local vclock. Closes #4114 --- src/box/box.cc | 20 +++ src/box/relay.cc | 20 +-- src/box/wal.c | 16 ++- test/replication/autobootstrap.result | 6 + test/replication/autobootstrap.test.lua | 2 + test/replication/before_replace.result | 8 +- test/replication/before_replace.test.lua | 4 +- .../gh-4114-local-space-replication.result | 125 ++++++++++++++++++ .../gh-4114-local-space-replication.test.lua | 48 +++++++ test/replication/local_spaces.result | 4 + test/replication/local_spaces.test.lua | 3 + test/replication/misc.result | 6 + test/replication/misc.test.lua | 2 + test/replication/quorum.result | 6 + test/replication/quorum.test.lua | 2 + test/replication/replica_rejoin.result | 9 ++ test/replication/replica_rejoin.test.lua | 3 + test/replication/skip_conflict_row.result | 3 + test/replication/skip_conflict_row.test.lua | 1 + test/replication/suite.cfg | 1 + test/vinyl/errinj.result | 5 + test/vinyl/errinj.test.lua | 4 + 22 files changed, 282 insertions(+), 16 deletions(-) create mode 100644 test/replication/gh-4114-local-space-replication.result create mode 100644 test/replication/gh-4114-local-space-replication.test.lua diff --git a/src/box/box.cc b/src/box/box.cc index 21c7a3324..f4f7fd276 100644 --- a/src/box/box.cc +++ b/src/box/box.cc @@ -1870,6 +1870,26 @@ box_process_subscribe(struct ev_io *io, struct xrow_header *header) say_info("remote vclock %s local vclock %s", vclock_to_string(&replica_clock), vclock_to_string(&vclock)); + /* + * Replica clock is used in gc state and recovery + * initialization, so we need to replace the remote 0-th + * component with our own one. This makes recovery work + * correctly: we're trying to recover from an xlog whose + * vclock is smaller than remote replica clock in each + * component, exluding the 0-th component. This leads to + * finding the correct WAL, if it exists, since we do not + * need to recover local rows (the ones, that contribute + * to the 0-th vclock component). It would be bad to set + * 0-th vclock component to a smaller value, since it + * would unnecessarily require additional WALs, which may + * have already been deleted. + * Same stands for gc. Remote instances do not need this + * instance's local rows, and after gc was reworked to + * track individual vclock components instead of + * signatures it's safe to set the local component to the + * most recent value. + */ + vclock_set(&replica_clock, 0, vclock_get(&replicaset.vclock, 0)); /* * Process SUBSCRIBE request via replication relay * Send current recovery vector clock as a marker diff --git a/src/box/relay.cc b/src/box/relay.cc index 3191ad99f..454cb5811 100644 --- a/src/box/relay.cc +++ b/src/box/relay.cc @@ -375,8 +375,11 @@ static void tx_status_update(struct cmsg *msg) { struct relay_status_msg *status = (struct relay_status_msg *)msg; + struct vclock subst_vclock; + vclock_copy(&subst_vclock, &status->vclock); + vclock_set(&subst_vclock, 0, vclock_get(&replicaset.vclock, 0)); if (!status->relay->replica->anon) - wal_relay_status_update(status->relay->replica->id, &status->vclock); + wal_relay_status_update(status->relay->replica->id, &subst_vclock); vclock_copy(&status->relay->tx.vclock, &status->vclock); static const struct cmsg_hop route[] = { {relay_status_update, NULL} @@ -647,16 +650,15 @@ relay_send_row(struct xstream *stream, struct xrow_header *packet) { struct relay *relay = container_of(stream, struct relay, stream); assert(iproto_type_is_dml(packet->type)); - /* - * Transform replica local requests to IPROTO_NOP so as to - * promote vclock on the replica without actually modifying - * any data. - */ if (packet->group_id == GROUP_LOCAL) { /* - * Replica-local requests generated while replica - * was anonymous have a zero instance id. Just - * skip all these rows. + * We do not relay replica-local rows to other + * instances, since we started signing them with + * a zero instance id. However, if replica-local + * rows, signed with a non-zero id are present in + * our WAL, we still need to relay them as NOPs in + * order to correctly promote the vclock on the + * replica. */ if (packet->replica_id == REPLICA_ID_NIL) return; diff --git a/src/box/wal.c b/src/box/wal.c index 411850a05..197508214 100644 --- a/src/box/wal.c +++ b/src/box/wal.c @@ -1042,13 +1042,19 @@ wal_assign_lsn(struct vclock *vclock_diff, struct vclock *base, /** Assign LSN to all local rows. */ for ( ; row < end; row++) { if ((*row)->replica_id == 0) { - (*row)->lsn = vclock_inc(vclock_diff, instance_id) + - vclock_get(base, instance_id); /* - * Note, an anonymous replica signs local - * rows whith a zero instance id. + * All rows representing local space data + * manipulations are signed wth a zero + * instance id. This is also true for + * anonymous replicas, since they are + * only capable of writing to local and + * temporary spaces. */ - (*row)->replica_id = instance_id; + if ((*row)->group_id != GROUP_LOCAL) + (*row)->replica_id = instance_id; + + (*row)->lsn = vclock_inc(vclock_diff, (*row)->replica_id) + + vclock_get(base, (*row)->replica_id); /* Use lsn of the first local row as transaction id. */ tsn = tsn == 0 ? (*row)->lsn : tsn; (*row)->tsn = tsn; diff --git a/test/replication/autobootstrap.result b/test/replication/autobootstrap.result index 743982d47..6918e23ea 100644 --- a/test/replication/autobootstrap.result +++ b/test/replication/autobootstrap.result @@ -162,6 +162,9 @@ box.schema.user.revoke('test_u', 'create', 'space') vclock = test_run:get_vclock('autobootstrap1') --- ... +vclock[0] = nil +--- +... _ = test_run:wait_vclock("autobootstrap2", vclock) --- ... @@ -206,6 +209,9 @@ test_run:wait_fullmesh(SERVERS) vclock = test_run:get_vclock("autobootstrap1") --- ... +vclock[0] = nil +--- +... _ = test_run:wait_vclock("autobootstrap2", vclock) --- ... diff --git a/test/replication/autobootstrap.test.lua b/test/replication/autobootstrap.test.lua index 055ea4277..f8bb1c74a 100644 --- a/test/replication/autobootstrap.test.lua +++ b/test/replication/autobootstrap.test.lua @@ -74,6 +74,7 @@ box.schema.user.revoke('test_u', 'create', 'space') -- Synchronize vclock = test_run:get_vclock('autobootstrap1') +vclock[0] = nil _ = test_run:wait_vclock("autobootstrap2", vclock) _ = test_run:wait_vclock("autobootstrap3", vclock) @@ -95,6 +96,7 @@ _ = test_run:cmd("switch default") test_run:wait_fullmesh(SERVERS) vclock = test_run:get_vclock("autobootstrap1") +vclock[0] = nil _ = test_run:wait_vclock("autobootstrap2", vclock) _ = test_run:wait_vclock("autobootstrap3", vclock) diff --git a/test/replication/before_replace.result b/test/replication/before_replace.result index ced40547e..61a552e84 100644 --- a/test/replication/before_replace.result +++ b/test/replication/before_replace.result @@ -266,7 +266,13 @@ box.space.test:replace{1, 1} --- - [1, 1] ... -_ = test_run:wait_vclock('replica', test_run:get_vclock('default')) +vclock = test_run:get_vclock('default') +--- +... +vclock[0] = nil +--- +... +_ = test_run:wait_vclock('replica', vclock) --- ... -- Check that replace{1, 2} coming from the master was suppressed diff --git a/test/replication/before_replace.test.lua b/test/replication/before_replace.test.lua index bcc6dc00d..ecd8ff044 100644 --- a/test/replication/before_replace.test.lua +++ b/test/replication/before_replace.test.lua @@ -101,7 +101,9 @@ _ = box.space.test:before_replace(function(old, new) return new:update{{'+', 2, test_run:cmd("switch default") box.space.test:replace{1, 1} -_ = test_run:wait_vclock('replica', test_run:get_vclock('default')) +vclock = test_run:get_vclock('default') +vclock[0] = nil +_ = test_run:wait_vclock('replica', vclock) -- Check that replace{1, 2} coming from the master was suppressed -- by the before_replace trigger on the replica. diff --git a/test/replication/gh-4114-local-space-replication.result b/test/replication/gh-4114-local-space-replication.result new file mode 100644 index 000000000..e524c9a1b --- /dev/null +++ b/test/replication/gh-4114-local-space-replication.result @@ -0,0 +1,125 @@ +-- test-run result file version 2 +env = require('test_run') + | --- + | ... +test_run = env.new() + | --- + | ... + +-- +-- gh-4114. Account local space changes in a separate vclock +-- component. Do not replicate local space changes, even as NOPs. +-- + +box.schema.user.grant('guest', 'replication') + | --- + | ... +_ = box.schema.space.create('test', {is_local=true}) + | --- + | ... +_ = box.space.test:create_index("pk") + | --- + | ... + +test_run:cmd('create server replica with rpl_master=default, script "replication/replica.lua"') + | --- + | - true + | ... +test_run:cmd('start server replica with wait=True, wait_load=True') + | --- + | - true + | ... + +a = box.info.vclock[0] or 0 + | --- + | ... +for i = 1,10 do box.space.test:insert{i} end + | --- + | ... +box.info.vclock[0] == a + 10 or box.info.vclock[0] - a + | --- + | - true + | ... + +test_run:cmd('switch replica') + | --- + | - true + | ... +box.info.vclock[0] + | --- + | - null + | ... +box.cfg{checkpoint_count=1} + | --- + | ... +box.space.test:select{} + | --- + | - [] + | ... +box.space.test:insert{1} + | --- + | - [1] + | ... +box.snapshot() + | --- + | - ok + | ... +box.space.test:insert{2} + | --- + | - [2] + | ... +box.snapshot() + | --- + | - ok + | ... +box.space.test:insert{3} + | --- + | - [3] + | ... +box.snapshot() + | --- + | - ok + | ... + +box.info.vclock[0] + | --- + | - 3 + | ... + +test_run:cmd('switch default') + | --- + | - true + | ... + +test_run:cmd('set variable repl_source to "replica.listen"') + | --- + | - true + | ... + +box.cfg{replication=repl_source} + | --- + | ... +test_run:wait_cond(function()\ + return box.info.replication[2].upstream and\ + box.info.replication[2].upstream.status == 'follow'\ + end,\ + 10) + | --- + | - true + | ... + +-- Cleanup. +test_run:cmd('stop server replica') + | --- + | - true + | ... +test_run:cmd('delete server replica') + | --- + | - true + | ... +box.space.test:drop() + | --- + | ... +box.schema.user.revoke('guest', 'replication') + | --- + | ... diff --git a/test/replication/gh-4114-local-space-replication.test.lua b/test/replication/gh-4114-local-space-replication.test.lua new file mode 100644 index 000000000..26dccee68 --- /dev/null +++ b/test/replication/gh-4114-local-space-replication.test.lua @@ -0,0 +1,48 @@ +env = require('test_run') +test_run = env.new() + +-- +-- gh-4114. Account local space changes in a separate vclock +-- component. Do not replicate local space changes, even as NOPs. +-- + +box.schema.user.grant('guest', 'replication') +_ = box.schema.space.create('test', {is_local=true}) +_ = box.space.test:create_index("pk") + +test_run:cmd('create server replica with rpl_master=default, script "replication/replica.lua"') +test_run:cmd('start server replica with wait=True, wait_load=True') + +a = box.info.vclock[0] or 0 +for i = 1,10 do box.space.test:insert{i} end +box.info.vclock[0] == a + 10 or box.info.vclock[0] - a + +test_run:cmd('switch replica') +box.info.vclock[0] +box.cfg{checkpoint_count=1} +box.space.test:select{} +box.space.test:insert{1} +box.snapshot() +box.space.test:insert{2} +box.snapshot() +box.space.test:insert{3} +box.snapshot() + +box.info.vclock[0] + +test_run:cmd('switch default') + +test_run:cmd('set variable repl_source to "replica.listen"') + +box.cfg{replication=repl_source} +test_run:wait_cond(function()\ + return box.info.replication[2].upstream and\ + box.info.replication[2].upstream.status == 'follow'\ + end,\ + 10) + +-- Cleanup. +test_run:cmd('stop server replica') +test_run:cmd('delete server replica') +box.space.test:drop() +box.schema.user.revoke('guest', 'replication') diff --git a/test/replication/local_spaces.result b/test/replication/local_spaces.result index cf2c52010..4855d8a88 100644 --- a/test/replication/local_spaces.result +++ b/test/replication/local_spaces.result @@ -288,6 +288,10 @@ _ = s3:insert{3} vclock = test_run:get_vclock('default') --- ... +-- Ignore 0-th component when waiting. They don't match. +vclock[0] = nil +--- +... _ = test_run:wait_vclock('replica', vclock) --- ... diff --git a/test/replication/local_spaces.test.lua b/test/replication/local_spaces.test.lua index 373e2cd20..c5e224030 100644 --- a/test/replication/local_spaces.test.lua +++ b/test/replication/local_spaces.test.lua @@ -112,6 +112,9 @@ _ = s1:insert{3} _ = s2:insert{3} _ = s3:insert{3} vclock = test_run:get_vclock('default') + +-- Ignore 0-th component when waiting. They don't match. +vclock[0] = nil _ = test_run:wait_vclock('replica', vclock) test_run:cmd("switch replica") diff --git a/test/replication/misc.result b/test/replication/misc.result index b63d72846..e5d1f560e 100644 --- a/test/replication/misc.result +++ b/test/replication/misc.result @@ -214,6 +214,9 @@ box.space.space1:select{} vclock = test_run:get_vclock("autobootstrap1") --- ... +vclock[0] = nil +--- +... _ = test_run:wait_vclock("autobootstrap2", vclock) --- ... @@ -414,6 +417,9 @@ while box.info.replication[2] == nil do fiber.sleep(0.01) end vclock = test_run:get_vclock('default') --- ... +vclock[0] = nil +--- +... _ = test_run:wait_vclock('replica_auth', vclock) --- ... diff --git a/test/replication/misc.test.lua b/test/replication/misc.test.lua index c454a0992..d285b014a 100644 --- a/test/replication/misc.test.lua +++ b/test/replication/misc.test.lua @@ -88,6 +88,7 @@ c.space.space1:insert{box.NULL, "data"} -- fails, but bumps sequence value c.space.space1:insert{box.NULL, 1, "data"} box.space.space1:select{} vclock = test_run:get_vclock("autobootstrap1") +vclock[0] = nil _ = test_run:wait_vclock("autobootstrap2", vclock) test_run:cmd("switch autobootstrap2") box.space.space1:select{} @@ -172,6 +173,7 @@ box.schema.user.grant('cluster', 'replication') while box.info.replication[2] == nil do fiber.sleep(0.01) end vclock = test_run:get_vclock('default') +vclock[0] = nil _ = test_run:wait_vclock('replica_auth', vclock) test_run:cmd("stop server replica_auth") diff --git a/test/replication/quorum.result b/test/replication/quorum.result index 07abe7f2a..5ef66bf0a 100644 --- a/test/replication/quorum.result +++ b/test/replication/quorum.result @@ -325,6 +325,9 @@ space:insert{2} vclock = test_run:get_vclock("default") --- ... +vclock[0] = nil +--- +... _ = test_run:wait_vclock("replica", vclock) --- ... @@ -390,6 +393,9 @@ box.cfg{replication = repl} vclock = test_run:get_vclock("master_quorum1") --- ... +vclock[0] = nil +--- +... _ = test_run:wait_vclock("master_quorum2", vclock) --- ... diff --git a/test/replication/quorum.test.lua b/test/replication/quorum.test.lua index 5f2872675..da24f34a0 100644 --- a/test/replication/quorum.test.lua +++ b/test/replication/quorum.test.lua @@ -125,6 +125,7 @@ test_run:cmd("switch default") box.cfg{listen = listen} space:insert{2} vclock = test_run:get_vclock("default") +vclock[0] = nil _ = test_run:wait_vclock("replica", vclock) test_run:cmd("switch replica") box.info.status -- running @@ -145,6 +146,7 @@ box.cfg{replication = ""} box.space.test:insert{1} box.cfg{replication = repl} vclock = test_run:get_vclock("master_quorum1") +vclock[0] = nil _ = test_run:wait_vclock("master_quorum2", vclock) test_run:cmd("switch master_quorum2") box.space.test:select() diff --git a/test/replication/replica_rejoin.result b/test/replication/replica_rejoin.result index b8ed79f14..dd04ae297 100644 --- a/test/replication/replica_rejoin.result +++ b/test/replication/replica_rejoin.result @@ -144,6 +144,9 @@ for i = 10, 30, 10 do box.space.test:update(i, {{'!', 1, i}}) end vclock = test_run:get_vclock('default') --- ... +vclock[0] = nil +--- +... _ = test_run:wait_vclock('replica', vclock) --- ... @@ -295,6 +298,9 @@ for i = 1, 10 do box.space.test:replace{2} end vclock = test_run:get_vclock('replica') --- ... +vclock[0] = nil +--- +... _ = test_run:wait_vclock('default', vclock) --- ... @@ -345,6 +351,9 @@ for i = 1, 10 do box.space.test:replace{2} end vclock = test_run:get_vclock('replica') --- ... +vclock[0] = nil +--- +... _ = test_run:wait_vclock('default', vclock) --- ... diff --git a/test/replication/replica_rejoin.test.lua b/test/replication/replica_rejoin.test.lua index 25c0849ec..410ef44d7 100644 --- a/test/replication/replica_rejoin.test.lua +++ b/test/replication/replica_rejoin.test.lua @@ -55,6 +55,7 @@ test_run:cmd("switch default") -- Make sure the replica follows new changes. for i = 10, 30, 10 do box.space.test:update(i, {{'!', 1, i}}) end vclock = test_run:get_vclock('default') +vclock[0] = nil _ = test_run:wait_vclock('replica', vclock) test_run:cmd("switch replica") box.space.test:select() @@ -109,6 +110,7 @@ box.space.test:replace{1} test_run:cmd("switch replica") for i = 1, 10 do box.space.test:replace{2} end vclock = test_run:get_vclock('replica') +vclock[0] = nil _ = test_run:wait_vclock('default', vclock) -- Restart the master and force garbage collection. test_run:cmd("switch default") @@ -126,6 +128,7 @@ test_run:wait_cond(function() return #fio.glob(fio.pathjoin(box.cfg.wal_dir, '*. test_run:cmd("switch replica") for i = 1, 10 do box.space.test:replace{2} end vclock = test_run:get_vclock('replica') +vclock[0] = nil _ = test_run:wait_vclock('default', vclock) -- Restart the replica. It should successfully rebootstrap. test_run:cmd("restart server replica with args='true'") diff --git a/test/replication/skip_conflict_row.result b/test/replication/skip_conflict_row.result index 9b2777872..d70ac8e2a 100644 --- a/test/replication/skip_conflict_row.result +++ b/test/replication/skip_conflict_row.result @@ -54,6 +54,9 @@ box.info.status vclock = test_run:get_vclock('default') --- ... +vclock[0] = nil +--- +... _ = test_run:wait_vclock("replica", vclock) --- ... diff --git a/test/replication/skip_conflict_row.test.lua b/test/replication/skip_conflict_row.test.lua index 2982c730a..04fd08136 100644 --- a/test/replication/skip_conflict_row.test.lua +++ b/test/replication/skip_conflict_row.test.lua @@ -19,6 +19,7 @@ space:insert{2} box.info.status vclock = test_run:get_vclock('default') +vclock[0] = nil _ = test_run:wait_vclock("replica", vclock) test_run:cmd("switch replica") box.info.replication[1].upstream.message diff --git a/test/replication/suite.cfg b/test/replication/suite.cfg index 90fd53ca6..0907ac17f 100644 --- a/test/replication/suite.cfg +++ b/test/replication/suite.cfg @@ -12,6 +12,7 @@ "on_schema_init.test.lua": {}, "long_row_timeout.test.lua": {}, "join_without_snap.test.lua": {}, + "gh-4114-local-space-replication.test.lua": {}, "gh-4402-info-errno.test.lua": {}, "gh-4605-empty-password.test.lua": {}, "gh-4606-admin-creds.test.lua": {}, diff --git a/test/vinyl/errinj.result b/test/vinyl/errinj.result index 2635da265..2bd701f70 100644 --- a/test/vinyl/errinj.result +++ b/test/vinyl/errinj.result @@ -1241,6 +1241,11 @@ test_run:cmd("switch default") vclock = test_run:get_vclock("default") --- ... +-- Ignore 0-th vclock component. They don't match between +-- replicas. +vclock[0] = nil +--- +... _ = test_run:wait_vclock("replica", vclock) --- ... diff --git a/test/vinyl/errinj.test.lua b/test/vinyl/errinj.test.lua index 4230cfae3..750d3bfe8 100644 --- a/test/vinyl/errinj.test.lua +++ b/test/vinyl/errinj.test.lua @@ -455,6 +455,10 @@ box.cfg{read_only = true} test_run:cmd("switch default") vclock = test_run:get_vclock("default") + +-- Ignore 0-th vclock component. They don't match between +-- replicas. +vclock[0] = nil _ = test_run:wait_vclock("replica", vclock) test_run:cmd("stop server replica") -- 2.21.1 (Apple Git-122.3)
next prev parent reply other threads:[~2020-03-18 19:48 UTC|newest] Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top 2020-03-18 19:47 [Tarantool-patches] [PATCH v2 0/5] replication: fix local space tracking Serge Petrenko 2020-03-18 19:47 ` [Tarantool-patches] [PATCH v2 1/5] box: introduce matrix clock Serge Petrenko 2020-03-18 20:08 ` Konstantin Osipov 2020-03-19 8:11 ` Timur Safin 2020-03-19 8:41 ` 'Konstantin Osipov' 2020-03-19 9:17 ` Sergey Ostanevich 2020-03-19 11:28 ` Serge Petrenko 2020-03-19 11:56 ` Konstantin Osipov 2020-03-19 11:59 ` Serge Petrenko 2020-03-18 19:47 ` [Tarantool-patches] [PATCH v2 2/5] wal: track consumer vclock and collect logs in wal thread Serge Petrenko 2020-03-18 19:47 ` [Tarantool-patches] [PATCH v2 3/5] vclock: add an ability to set individual clock components Serge Petrenko 2020-03-18 20:10 ` Konstantin Osipov 2020-03-19 11:31 ` Serge Petrenko 2020-03-18 19:47 ` [Tarantool-patches] [PATCH v2 4/5] replication: hide 0-th vclock components in replication responses Serge Petrenko 2020-03-18 19:47 ` Serge Petrenko [this message] 2020-03-18 21:12 ` [Tarantool-patches] [PATCH v2 0/5] replication: fix local space tracking Vladislav Shpilevoy 2020-03-19 8:17 ` Konstantin Osipov
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=b2d68aee1c5864965db81c6d7c42d04bcf5d25a6.1584558067.git.sergepetrenko@tarantool.org \ --to=sergepetrenko@tarantool.org \ --cc=georgy@tarantool.org \ --cc=kostja.osipov@gmail.com \ --cc=tarantool-patches@dev.tarantool.org \ --cc=v.shpilevoy@tarantool.org \ --subject='Re: [Tarantool-patches] [PATCH v2 5/5] box: start counting local space requests separately' \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox