From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from localhost (localhost [127.0.0.1]) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTP id A2F3C27413 for ; Tue, 12 Feb 2019 10:05:31 -0500 (EST) Received: from turing.freelists.org ([127.0.0.1]) by localhost (turing.freelists.org [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id SUK51iuGdbUr for ; Tue, 12 Feb 2019 10:05:31 -0500 (EST) Received: from smtp3.mail.ru (smtp3.mail.ru [94.100.179.58]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by turing.freelists.org (Avenir Technologies Mail Multiplex) with ESMTPS id 5F21A217AB for ; Tue, 12 Feb 2019 10:05:31 -0500 (EST) From: Georgy Kirichenko Subject: [tarantool-patches] [PATCH] Emit NOPs in case of a replication conflict Date: Tue, 12 Feb 2019 18:07:36 +0300 Message-Id: <49a2387ecbdb9d0a98090e37b11af09dfea720a5.1549983926.git.georgy@tarantool.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Sender: tarantool-patches-bounce@freelists.org Errors-to: tarantool-patches-bounce@freelists.org Reply-To: tarantool-patches@freelists.org List-help: List-unsubscribe: List-software: Ecartis version 1.0.0 List-Id: tarantool-patches List-subscribe: List-owner: List-post: List-archive: To: tarantool-patches@freelists.org Cc: Georgy Kirichenko Applier writes NOP to wal if it is not able to apply a masters row because of conflict and option replication_skip_conflict is set. This prevents applier against reapplying already skipped rows after restart. Closes: #3977 Issue: https://github.com/tarantool/tarantool/issues/3977 Branch: https://github.com/tarantool/tarantool/tree/g.kirichenko/gh-3977-emit-nop-for-applier-conflict --- src/box/applier.cc | 20 +++-- test/replication/skip_conflict_row.result | 98 +++++++++++++++++++++ test/replication/skip_conflict_row.test.lua | 34 +++++++ 3 files changed, 146 insertions(+), 6 deletions(-) diff --git a/src/box/applier.cc b/src/box/applier.cc index 7f37fe2ee..fb2025f6c 100644 --- a/src/box/applier.cc +++ b/src/box/applier.cc @@ -545,18 +545,26 @@ applier_subscribe(struct applier *applier) if (res != 0) { struct error *e = diag_last_error(diag_get()); /** - * Silently skip ER_TUPLE_FOUND error if such - * option is set in config. + * In case of ER_TUPLE_FOUND error replca row + * with nop if such option is set in config. */ if (e->type == &type_ClientError && box_error_code(e) == ER_TUPLE_FOUND && - replication_skip_conflict) + replication_skip_conflict) { diag_clear(diag_get()); - else { - latch_unlock(latch); - diag_raise(); + res = 0; + struct xrow_header nop; + nop.type = IPROTO_NOP; + nop.bodycnt = 0; + nop.replica_id = row.replica_id; + nop.lsn = row.lsn; + res = xstream_write(applier->subscribe_stream, &nop); } } + if (res != 0) { + latch_unlock(latch); + diag_raise(); + } } latch_unlock(latch); diff --git a/test/replication/skip_conflict_row.result b/test/replication/skip_conflict_row.result index bcbbbcc34..34be807eb 100644 --- a/test/replication/skip_conflict_row.result +++ b/test/replication/skip_conflict_row.result @@ -141,6 +141,104 @@ box.info.replication[1].upstream.message --- - Duplicate key exists in unique index 'primary' in space 'test' ... +replication = box.cfg.replication +--- +... +box.cfg{replication_skip_conflict = true, replication = {}} +--- +... +box.cfg{replication = replication} +--- +... +test_run:cmd("switch default") +--- +- true +... +-- test if nop were really written +box.space.test:truncate() +--- +... +test_run:cmd("restart server replica") +--- +- true +... +test_run:cmd("switch replica") +--- +- true +... +box.info.replication[1].upstream.status +--- +- follow +... +-- write some conflicting records on slave +for i = 1, 10 do box.space.test:insert({i, 'r'}) end +--- +... +box.cfg{replication_skip_conflict = true} +--- +... +v1 = box.info.vclock[1] +--- +... +-- write some conflicting records on master +test_run:cmd("switch default") +--- +- true +... +for i = 1, 10 do box.space.test:insert({i, 'm'}) end +--- +... +test_run:cmd("switch replica") +--- +- true +... +-- lsn should be incremented +v1 == box.info.vclock[1] - 10 +--- +- true +... +-- and state is follow +box.info.replication[1].upstream.status +--- +- follow +... +-- restart server and check replication continues from nop-ed vclock +test_run:cmd("switch default") +--- +- true +... +test_run:cmd("stop server replica") +--- +- true +... +for i = 11, 20 do box.space.test:insert({i, 'm'}) end +--- +... +test_run:cmd("start server replica") +--- +- true +... +test_run:cmd("switch replica") +--- +- true +... +box.info.replication[1].upstream.status +--- +- follow +... +box.space.test:select({11}, {iterator = "GE"}) +--- +- - [11, 'm'] + - [12, 'm'] + - [13, 'm'] + - [14, 'm'] + - [15, 'm'] + - [16, 'm'] + - [17, 'm'] + - [18, 'm'] + - [19, 'm'] + - [20, 'm'] +... test_run:cmd("switch default") --- - true diff --git a/test/replication/skip_conflict_row.test.lua b/test/replication/skip_conflict_row.test.lua index 3a9076b39..b7fabd012 100644 --- a/test/replication/skip_conflict_row.test.lua +++ b/test/replication/skip_conflict_row.test.lua @@ -46,8 +46,42 @@ test_run:cmd("switch default") test_run:cmd("restart server replica") -- applier is not in follow state box.info.replication[1].upstream.message + +replication = box.cfg.replication +box.cfg{replication_skip_conflict = true, replication = {}} +box.cfg{replication = replication} +test_run:cmd("switch default") + +-- test if nop were really written +box.space.test:truncate() +test_run:cmd("restart server replica") +test_run:cmd("switch replica") +box.info.replication[1].upstream.status +-- write some conflicting records on slave +for i = 1, 10 do box.space.test:insert({i, 'r'}) end +box.cfg{replication_skip_conflict = true} +v1 = box.info.vclock[1] + +-- write some conflicting records on master +test_run:cmd("switch default") +for i = 1, 10 do box.space.test:insert({i, 'm'}) end + +test_run:cmd("switch replica") +-- lsn should be incremented +v1 == box.info.vclock[1] - 10 +-- and state is follow +box.info.replication[1].upstream.status + +-- restart server and check replication continues from nop-ed vclock test_run:cmd("switch default") +test_run:cmd("stop server replica") +for i = 11, 20 do box.space.test:insert({i, 'm'}) end +test_run:cmd("start server replica") +test_run:cmd("switch replica") +box.info.replication[1].upstream.status +box.space.test:select({11}, {iterator = "GE"}) +test_run:cmd("switch default") -- cleanup test_run:cmd("stop server replica") test_run:cmd("cleanup server replica") -- 2.20.1