[Tarantool-patches] [PATCH 1/1] [tosquash] xrow: encode CONFIRM/ROLLBACK on txn region

Vladislav Shpilevoy v.shpilevoy at tarantool.org
Wed Jul 8 02:32:09 MSK 2020


There were 2 bugs: one memory leak and one memory corruption.

The leak was in txn_limbo_write_confirm_rollback(). This function
used fiber->gc region to encode CONFIRM/ROLLBACK, but never freed
it.

The corruption was in applier.cc in process_confirm_rollback().
CONFIRM/ROLLBACK were stored on the applier's ibuf. As a result,
if applier experiences relatively intensive load, the ibuf will
be quickly recycled, right during a WAL write of CONFIRM/ROLLBACK
stored on it.

DML requests would have the same problem, but they were copied in
txn_add_redo() inside of xrow_encode_dml() call. This patch makes
synchro requests copied as well.

This commit should be squashed into the commits introducing
CONFIRM/ROLLBACK entries and their handling.

Closes #5138
---
Branch: http://github.com/tarantool/tarantool/tree/gh-4842-sync-replication
Issue 1: https://github.com/tarantool/tarantool/issues/4842
Issue 2: https://github.com/tarantool/tarantool/issues/5138

 src/box/txn.c                         |  2 ++
 src/box/txn_limbo.c                   | 15 +++++----
 src/box/xrow.c                        | 31 ++++++++++++++----
 src/box/xrow.h                        | 16 ++++++++--
 test/replication/qsync_basic.result   | 46 +++++++++++++++++++++++++++
 test/replication/qsync_basic.test.lua | 18 +++++++++++
 6 files changed, 113 insertions(+), 15 deletions(-)

diff --git a/src/box/txn.c b/src/box/txn.c
index 59145eef8..205e5e9f9 100644
--- a/src/box/txn.c
+++ b/src/box/txn.c
@@ -88,6 +88,8 @@ txn_add_redo(struct txn *txn, struct txn_stmt *stmt, struct request *request)
 	 */
 	if (likely(!iproto_type_is_synchro_request(row->type)))
 		row->bodycnt = xrow_encode_dml(request, &txn->region, row->body);
+	else
+		row->bodycnt = xrow_header_dup_body(row, &txn->region);
 	if (row->bodycnt < 0)
 		return -1;
 	stmt->row = row;
diff --git a/src/box/txn_limbo.c b/src/box/txn_limbo.c
index f22ca1123..884c188b2 100644
--- a/src/box/txn_limbo.c
+++ b/src/box/txn_limbo.c
@@ -237,22 +237,25 @@ txn_limbo_write_confirm_rollback(struct txn_limbo *limbo, int64_t lsn,
 		.header = &row,
 	};
 
+	struct txn *txn = txn_begin();
+	if (txn == NULL)
+		return -1;
+
 	int res = 0;
 	if (is_confirm) {
-		res = xrow_encode_confirm(&row, limbo->instance_id, lsn);
+		res = xrow_encode_confirm(&row, &txn->region,
+					  limbo->instance_id, lsn);
 	} else {
 		/*
 		 * This LSN is the first to be rolled back, so
 		 * the last "safe" lsn is lsn - 1.
 		 */
-		res = xrow_encode_rollback(&row, limbo->instance_id, lsn - 1);
+		res = xrow_encode_rollback(&row, &txn->region,
+					   limbo->instance_id, lsn - 1);
 	}
 	if (res == -1)
-		return -1;
+		goto rollback;
 
-	struct txn *txn = txn_begin();
-	if (txn == NULL)
-		return -1;
 	/*
 	 * This is not really a transaction. It just uses txn API
 	 * to put the data into WAL. And obviously it should not
diff --git a/src/box/xrow.c b/src/box/xrow.c
index 39d1814c4..0c797a9d5 100644
--- a/src/box/xrow.c
+++ b/src/box/xrow.c
@@ -220,6 +220,21 @@ error:
 	return 0;
 }
 
+int
+xrow_header_dup_body(struct xrow_header *row, struct region *region)
+{
+	assert(row->bodycnt == 1);
+	size_t size = row->body[0].iov_len;
+	char *copy = (char *)region_alloc(region, size);
+	if (copy == NULL) {
+		diag_set(OutOfMemory, size, "region_alloc", "copy");
+		return -1;
+	}
+	memcpy(copy, row->body[0].iov_base, size);
+	row->body[0].iov_base = copy;
+	return 1;
+}
+
 /**
  * @pre pos points at a valid msgpack
  */
@@ -879,13 +894,13 @@ xrow_encode_dml(const struct request *request, struct region *region,
 }
 
 static int
-xrow_encode_confirm_rollback(struct xrow_header *row, uint32_t replica_id,
-			     int64_t lsn, int type)
+xrow_encode_confirm_rollback(struct xrow_header *row, struct region *region,
+			     uint32_t replica_id, int64_t lsn, int type)
 {
 	size_t len = mp_sizeof_map(2) + mp_sizeof_uint(IPROTO_REPLICA_ID) +
 		     mp_sizeof_uint(replica_id) + mp_sizeof_uint(IPROTO_LSN) +
 		     mp_sizeof_uint(lsn);
-	char *buf = (char *)region_alloc(&fiber()->gc, len);
+	char *buf = (char *)region_alloc(region, len);
 	if (buf == NULL) {
 		diag_set(OutOfMemory, len, "region_alloc", "buf");
 		return -1;
@@ -910,16 +925,18 @@ xrow_encode_confirm_rollback(struct xrow_header *row, uint32_t replica_id,
 }
 
 int
-xrow_encode_confirm(struct xrow_header *row, uint32_t replica_id, int64_t lsn)
+xrow_encode_confirm(struct xrow_header *row, struct region *region,
+		    uint32_t replica_id, int64_t lsn)
 {
-	return xrow_encode_confirm_rollback(row, replica_id, lsn,
+	return xrow_encode_confirm_rollback(row, region, replica_id, lsn,
 					    IPROTO_CONFIRM);
 }
 
 int
-xrow_encode_rollback(struct xrow_header *row, uint32_t replica_id, int64_t lsn)
+xrow_encode_rollback(struct xrow_header *row, struct region *region,
+		     uint32_t replica_id, int64_t lsn)
 {
-	return xrow_encode_confirm_rollback(row, replica_id, lsn,
+	return xrow_encode_confirm_rollback(row, region, replica_id, lsn,
 					    IPROTO_ROLLBACK);
 }
 
diff --git a/src/box/xrow.h b/src/box/xrow.h
index 1def394e7..7e6a4aceb 100644
--- a/src/box/xrow.h
+++ b/src/box/xrow.h
@@ -141,6 +141,14 @@ int
 xrow_header_decode(struct xrow_header *header, const char **pos,
 		   const char *end, bool end_is_exact);
 
+/**
+ * Duplicate the xrow's body onto the given region.
+ * @retval -1 Error.
+ * @retval >= 0 Iov count in the body.
+ */
+int
+xrow_header_dup_body(struct xrow_header *header, struct region *region);
+
 /**
  * DML request.
  */
@@ -211,13 +219,15 @@ xrow_encode_dml(const struct request *request, struct region *region,
  * Encode the CONFIRM to row body and set row type to
  * IPROTO_CONFIRM.
  * @param row xrow header.
+ * @param region Region to use to encode the confirmation body.
  * @param replica_id master's instance id.
  * @param lsn last confirmed lsn.
  * @retval -1 on error.
  * @retval 0 success.
  */
 int
-xrow_encode_confirm(struct xrow_header *row, uint32_t replica_id, int64_t lsn);
+xrow_encode_confirm(struct xrow_header *row, struct region *region,
+		    uint32_t replica_id, int64_t lsn);
 
 /**
  * Decode the CONFIRM request body.
@@ -234,13 +244,15 @@ xrow_decode_confirm(struct xrow_header *row, uint32_t *replica_id, int64_t *lsn)
  * Encode the ROLLBACK row body and set row type to
  * IPROTO_ROLLBACK.
  * @param row xrow header.
+ * @param region Region to use to encode the rollback body.
  * @param replica_id master's instance id.
  * @param lsn lsn to rollback to.
  * @retval -1  on error.
  * @retval 0 success.
  */
 int
-xrow_encode_rollback(struct xrow_header *row, uint32_t replica_id, int64_t lsn);
+xrow_encode_rollback(struct xrow_header *row, struct region *region,
+		     uint32_t replica_id, int64_t lsn);
 
 /**
  * Decode the ROLLBACK row body.
diff --git a/test/replication/qsync_basic.result b/test/replication/qsync_basic.result
index 59f5d9123..fa93b0263 100644
--- a/test/replication/qsync_basic.result
+++ b/test/replication/qsync_basic.result
@@ -565,6 +565,52 @@ box.space.sync:select{12}
  | - - [12]
  | ...
 
+--
+-- gh-5138: synchro rows were not saved onto txns region, and
+-- could get corrupted under load.
+--
+test_run:switch('default')
+ | ---
+ | - true
+ | ...
+box.cfg{replication_synchro_timeout = 1000}
+ | ---
+ | ...
+for i = 1, 100 do box.space.sync:replace{i} end
+ | ---
+ | ...
+test_run:cmd('switch replica')
+ | ---
+ | - true
+ | ...
+box.space.sync:count()
+ | ---
+ | - 100
+ | ...
+-- Rows could be corrupted during WAL writes. Restart should
+-- reveal the problem during recovery.
+test_run:cmd('restart server replica')
+ | 
+box.space.sync:count()
+ | ---
+ | - 100
+ | ...
+test_run:cmd('switch default')
+ | ---
+ | - true
+ | ...
+for i = 1, 100 do box.space.sync:delete{i} end
+ | ---
+ | ...
+test_run:cmd('switch replica')
+ | ---
+ | - true
+ | ...
+box.space.sync:count()
+ | ---
+ | - 0
+ | ...
+
 -- Cleanup.
 test_run:cmd('switch default')
  | ---
diff --git a/test/replication/qsync_basic.test.lua b/test/replication/qsync_basic.test.lua
index 1bb3ba87d..043e6b4ba 100644
--- a/test/replication/qsync_basic.test.lua
+++ b/test/replication/qsync_basic.test.lua
@@ -230,6 +230,24 @@ box.space.sync:select{12}
 test_run:switch('replica')
 box.space.sync:select{12}
 
+--
+-- gh-5138: synchro rows were not saved onto txns region, and
+-- could get corrupted under load.
+--
+test_run:switch('default')
+box.cfg{replication_synchro_timeout = 1000}
+for i = 1, 100 do box.space.sync:replace{i} end
+test_run:cmd('switch replica')
+box.space.sync:count()
+-- Rows could be corrupted during WAL writes. Restart should
+-- reveal the problem during recovery.
+test_run:cmd('restart server replica')
+box.space.sync:count()
+test_run:cmd('switch default')
+for i = 1, 100 do box.space.sync:delete{i} end
+test_run:cmd('switch replica')
+box.space.sync:count()
+
 -- Cleanup.
 test_run:cmd('switch default')
 
-- 
2.21.1 (Apple Git-122.3)



More information about the Tarantool-patches mailing list