From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from smtp16.mail.ru (smtp16.mail.ru [94.100.176.153]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dev.tarantool.org (Postfix) with ESMTPS id 0FF20445320 for ; Wed, 8 Jul 2020 02:32:43 +0300 (MSK) Received: by smtp16.mail.ru with esmtpa (envelope-from ) id 1jsx4s-0000HZ-Dp for tarantool-patches@dev.tarantool.org; Wed, 08 Jul 2020 02:32:42 +0300 From: Vladislav Shpilevoy Date: Wed, 8 Jul 2020 01:32:41 +0200 Message-Id: MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Subject: [Tarantool-patches] [PATCH 1/1] [tosquash] txn_limbo: local WAL write rollback should start from end List-Id: Tarantool development patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: tarantool-patches@dev.tarantool.org Transactions are rolled back in reversed order, always. Limbo somewhy removed rolled back transactions from the beginning, not from the end. Closes #5147 --- Branch: http://github.com/tarantool/tarantool/tree/gh-4842-sync-replication Issue 1: https://github.com/tarantool/tarantool/issues/4842 Issue 2: https://github.com/tarantool/tarantool/issues/5147 src/box/txn_limbo.c | 8 ++- test/replication/qsync_errinj.result | 76 ++++++++++++++++++++++++++ test/replication/qsync_errinj.test.lua | 25 +++++++++ 3 files changed, 108 insertions(+), 1 deletion(-) diff --git a/src/box/txn_limbo.c b/src/box/txn_limbo.c index 884c188b2..709bef9d3 100644 --- a/src/box/txn_limbo.c +++ b/src/box/txn_limbo.c @@ -109,7 +109,13 @@ void txn_limbo_abort(struct txn_limbo *limbo, struct txn_limbo_entry *entry) { entry->is_rollback = true; - txn_limbo_remove(limbo, entry); + /* + * The simple rule about rollback/commit order applies + * here as well: commit always in the order of WAL write, + * rollback in the reversed order. Rolled back transaction + * is always the last. + */ + txn_limbo_pop(limbo, entry); } void diff --git a/test/replication/qsync_errinj.result b/test/replication/qsync_errinj.result index 15dbc9bb2..49429cc80 100644 --- a/test/replication/qsync_errinj.result +++ b/test/replication/qsync_errinj.result @@ -138,6 +138,82 @@ box.space.sync:select{12} | - - [12] | ... +-- +-- gh-5147: at local WAL write fail limbo entries should be +-- deleted from the end of the limbo, not from the beginning. +-- Otherwise it should crash. +-- +test_run:switch('default') + | --- + | - true + | ... +fiber = require('fiber') + | --- + | ... +box.cfg{replication_synchro_quorum = 3, replication_synchro_timeout = 1000} + | --- + | ... +box.error.injection.set("ERRINJ_WAL_DELAY", true) + | --- + | - ok + | ... +ok1, err1 = nil + | --- + | ... +f1 = fiber.create(function() \ + ok1, err1 = pcall(box.space.sync.replace, box.space.sync, {13}) \ +end) + | --- + | ... +box.error.injection.set("ERRINJ_WAL_IO", true) + | --- + | - ok + | ... +box.space.sync:replace({14}) + | --- + | - error: Failed to write to disk + | ... +box.error.injection.set("ERRINJ_WAL_IO", false) + | --- + | - ok + | ... +box.error.injection.set("ERRINJ_WAL_DELAY", false) + | --- + | - ok + | ... +box.cfg{replication_synchro_quorum = 2} + | --- + | ... +box.space.sync:replace({15}) + | --- + | - [15] + | ... +test_run:wait_cond(function() return f1:status() == 'dead' end) + | --- + | - true + | ... +ok1, err1 + | --- + | - true + | - [13] + | ... +box.space.sync:select{13}, box.space.sync:select{14}, box.space.sync:select{15} + | --- + | - - [13] + | - [] + | - - [15] + | ... +test_run:switch('replica') + | --- + | - true + | ... +box.space.sync:select{13}, box.space.sync:select{14}, box.space.sync:select{15} + | --- + | - - [13] + | - [] + | - - [15] + | ... + test_run:cmd('switch default') | --- | - true diff --git a/test/replication/qsync_errinj.test.lua b/test/replication/qsync_errinj.test.lua index 82abf7999..fe8bb4387 100644 --- a/test/replication/qsync_errinj.test.lua +++ b/test/replication/qsync_errinj.test.lua @@ -52,6 +52,31 @@ box.error.injection.set('ERRINJ_WAL_IO', false) test_run:cmd('restart server replica') box.space.sync:select{12} +-- +-- gh-5147: at local WAL write fail limbo entries should be +-- deleted from the end of the limbo, not from the beginning. +-- Otherwise it should crash. +-- +test_run:switch('default') +fiber = require('fiber') +box.cfg{replication_synchro_quorum = 3, replication_synchro_timeout = 1000} +box.error.injection.set("ERRINJ_WAL_DELAY", true) +ok1, err1 = nil +f1 = fiber.create(function() \ + ok1, err1 = pcall(box.space.sync.replace, box.space.sync, {13}) \ +end) +box.error.injection.set("ERRINJ_WAL_IO", true) +box.space.sync:replace({14}) +box.error.injection.set("ERRINJ_WAL_IO", false) +box.error.injection.set("ERRINJ_WAL_DELAY", false) +box.cfg{replication_synchro_quorum = 2} +box.space.sync:replace({15}) +test_run:wait_cond(function() return f1:status() == 'dead' end) +ok1, err1 +box.space.sync:select{13}, box.space.sync:select{14}, box.space.sync:select{15} +test_run:switch('replica') +box.space.sync:select{13}, box.space.sync:select{14}, box.space.sync:select{15} + test_run:cmd('switch default') box.cfg{ \ -- 2.21.1 (Apple Git-122.3)