[Tarantool-patches] [PATCH 1/1] [tosquash] txn_limbo: local WAL write rollback should start from end

Vladislav Shpilevoy v.shpilevoy at tarantool.org
Wed Jul 8 02:32:41 MSK 2020


Transactions are rolled back in reversed order, always. Limbo
somewhy removed rolled back transactions from the beginning, not
from the end.

Closes #5147
---
Branch: http://github.com/tarantool/tarantool/tree/gh-4842-sync-replication
Issue 1: https://github.com/tarantool/tarantool/issues/4842
Issue 2: https://github.com/tarantool/tarantool/issues/5147

 src/box/txn_limbo.c                    |  8 ++-
 test/replication/qsync_errinj.result   | 76 ++++++++++++++++++++++++++
 test/replication/qsync_errinj.test.lua | 25 +++++++++
 3 files changed, 108 insertions(+), 1 deletion(-)

diff --git a/src/box/txn_limbo.c b/src/box/txn_limbo.c
index 884c188b2..709bef9d3 100644
--- a/src/box/txn_limbo.c
+++ b/src/box/txn_limbo.c
@@ -109,7 +109,13 @@ void
 txn_limbo_abort(struct txn_limbo *limbo, struct txn_limbo_entry *entry)
 {
 	entry->is_rollback = true;
-	txn_limbo_remove(limbo, entry);
+	/*
+	 * The simple rule about rollback/commit order applies
+	 * here as well: commit always in the order of WAL write,
+	 * rollback in the reversed order. Rolled back transaction
+	 * is always the last.
+	 */
+	txn_limbo_pop(limbo, entry);
 }
 
 void
diff --git a/test/replication/qsync_errinj.result b/test/replication/qsync_errinj.result
index 15dbc9bb2..49429cc80 100644
--- a/test/replication/qsync_errinj.result
+++ b/test/replication/qsync_errinj.result
@@ -138,6 +138,82 @@ box.space.sync:select{12}
  | - - [12]
  | ...
 
+--
+-- gh-5147: at local WAL write fail limbo entries should be
+-- deleted from the end of the limbo, not from the beginning.
+-- Otherwise it should crash.
+--
+test_run:switch('default')
+ | ---
+ | - true
+ | ...
+fiber = require('fiber')
+ | ---
+ | ...
+box.cfg{replication_synchro_quorum = 3, replication_synchro_timeout = 1000}
+ | ---
+ | ...
+box.error.injection.set("ERRINJ_WAL_DELAY", true)
+ | ---
+ | - ok
+ | ...
+ok1, err1 = nil
+ | ---
+ | ...
+f1 = fiber.create(function()                                                    \
+    ok1, err1 = pcall(box.space.sync.replace, box.space.sync, {13})             \
+end)
+ | ---
+ | ...
+box.error.injection.set("ERRINJ_WAL_IO", true)
+ | ---
+ | - ok
+ | ...
+box.space.sync:replace({14})
+ | ---
+ | - error: Failed to write to disk
+ | ...
+box.error.injection.set("ERRINJ_WAL_IO", false)
+ | ---
+ | - ok
+ | ...
+box.error.injection.set("ERRINJ_WAL_DELAY", false)
+ | ---
+ | - ok
+ | ...
+box.cfg{replication_synchro_quorum = 2}
+ | ---
+ | ...
+box.space.sync:replace({15})
+ | ---
+ | - [15]
+ | ...
+test_run:wait_cond(function() return f1:status() == 'dead' end)
+ | ---
+ | - true
+ | ...
+ok1, err1
+ | ---
+ | - true
+ | - [13]
+ | ...
+box.space.sync:select{13}, box.space.sync:select{14}, box.space.sync:select{15}
+ | ---
+ | - - [13]
+ | - []
+ | - - [15]
+ | ...
+test_run:switch('replica')
+ | ---
+ | - true
+ | ...
+box.space.sync:select{13}, box.space.sync:select{14}, box.space.sync:select{15}
+ | ---
+ | - - [13]
+ | - []
+ | - - [15]
+ | ...
+
 test_run:cmd('switch default')
  | ---
  | - true
diff --git a/test/replication/qsync_errinj.test.lua b/test/replication/qsync_errinj.test.lua
index 82abf7999..fe8bb4387 100644
--- a/test/replication/qsync_errinj.test.lua
+++ b/test/replication/qsync_errinj.test.lua
@@ -52,6 +52,31 @@ box.error.injection.set('ERRINJ_WAL_IO', false)
 test_run:cmd('restart server replica')
 box.space.sync:select{12}
 
+--
+-- gh-5147: at local WAL write fail limbo entries should be
+-- deleted from the end of the limbo, not from the beginning.
+-- Otherwise it should crash.
+--
+test_run:switch('default')
+fiber = require('fiber')
+box.cfg{replication_synchro_quorum = 3, replication_synchro_timeout = 1000}
+box.error.injection.set("ERRINJ_WAL_DELAY", true)
+ok1, err1 = nil
+f1 = fiber.create(function()                                                    \
+    ok1, err1 = pcall(box.space.sync.replace, box.space.sync, {13})             \
+end)
+box.error.injection.set("ERRINJ_WAL_IO", true)
+box.space.sync:replace({14})
+box.error.injection.set("ERRINJ_WAL_IO", false)
+box.error.injection.set("ERRINJ_WAL_DELAY", false)
+box.cfg{replication_synchro_quorum = 2}
+box.space.sync:replace({15})
+test_run:wait_cond(function() return f1:status() == 'dead' end)
+ok1, err1
+box.space.sync:select{13}, box.space.sync:select{14}, box.space.sync:select{15}
+test_run:switch('replica')
+box.space.sync:select{13}, box.space.sync:select{14}, box.space.sync:select{15}
+
 test_run:cmd('switch default')
 
 box.cfg{                                                                        \
-- 
2.21.1 (Apple Git-122.3)



More information about the Tarantool-patches mailing list