[Tarantool-patches] [PATCH v3 08/12] txn_limbo: persist the latest effective promote in snapshot

Serge Petrenko sergepetrenko at tarantool.org
Tue Jun 29 01:12:54 MSK 2021


Previously PROMOTE entries, just like CONFIRM and ROLLBACK were only
stored in WALs. This is because snapshots consist solely of confirmed
transactions, so there's nothing to CONFIRM or ROLLBACK.

PROMOTE has gained additional meaning recently: it pins limbo ownership
to a specific instance, rendering everyone else read-only. So now
PROMOTE information must be stored in snapshots as well.

Save the latest limbo state (owner id and latest confirmed lsn) to the
snapshot as a PROMOTE request.

Follow-up #6034
---
 src/box/memtx_engine.c | 32 ++++++++++++++++++++++++++++++++
 src/box/txn_limbo.c    | 10 ++++++++++
 src/box/txn_limbo.h    |  7 +++++++
 3 files changed, 49 insertions(+)

diff --git a/src/box/memtx_engine.c b/src/box/memtx_engine.c
index c662a3c8c..a2cfb2615 100644
--- a/src/box/memtx_engine.c
+++ b/src/box/memtx_engine.c
@@ -50,6 +50,7 @@
 #include "schema.h"
 #include "gc.h"
 #include "raft.h"
+#include "txn_limbo.h"
 
 /* sync snapshot every 16MB */
 #define SNAP_SYNC_INTERVAL	(1 << 24)
@@ -225,6 +226,22 @@ memtx_engine_recover_raft(const struct xrow_header *row)
 	return 0;
 }
 
+static int
+memtx_engine_recover_synchro(const struct xrow_header *row)
+{
+	assert(row->type == IPROTO_PROMOTE);
+	struct synchro_request req;
+	if (xrow_decode_synchro(row, &req) != 0)
+		return -1;
+	/*
+	 * Origin id cannot be deduced from row.replica_id in a checkpoint,
+	 * because all its rows have a zero replica_id.
+	 */
+	req.origin_id = req.replica_id;
+	txn_limbo_process(&txn_limbo, &req);
+	return 0;
+}
+
 static int
 memtx_engine_recover_snapshot_row(struct memtx_engine *memtx,
 				  struct xrow_header *row, int *is_space_system)
@@ -233,6 +250,8 @@ memtx_engine_recover_snapshot_row(struct memtx_engine *memtx,
 	if (row->type != IPROTO_INSERT) {
 		if (row->type == IPROTO_RAFT)
 			return memtx_engine_recover_raft(row);
+		if (row->type == IPROTO_PROMOTE)
+			return memtx_engine_recover_synchro(row);
 		diag_set(ClientError, ER_UNKNOWN_REQUEST_TYPE,
 			 (uint32_t) row->type);
 		return -1;
@@ -542,6 +561,7 @@ struct checkpoint {
 	struct vclock vclock;
 	struct xdir dir;
 	struct raft_request raft;
+	struct synchro_request synchro_state;
 	/**
 	 * Do nothing, just touch the snapshot file - the
 	 * checkpoint already exists.
@@ -567,6 +587,7 @@ checkpoint_new(const char *snap_dirname, uint64_t snap_io_rate_limit)
 	xdir_create(&ckpt->dir, snap_dirname, SNAP, &INSTANCE_UUID, &opts);
 	vclock_create(&ckpt->vclock);
 	box_raft_checkpoint_local(&ckpt->raft);
+	txn_limbo_checkpoint(&txn_limbo, &ckpt->synchro_state);
 	ckpt->touch = false;
 	return ckpt;
 }
@@ -655,6 +676,15 @@ finish:
 	return rc;
 }
 
+static int
+checkpoint_write_synchro(struct xlog *l, const struct synchro_request *req)
+{
+	struct xrow_header row;
+	char body[XROW_SYNCHRO_BODY_LEN_MAX];
+	xrow_encode_synchro(&row, body, req);
+	return checkpoint_write_row(l, &row);
+}
+
 static int
 checkpoint_f(va_list ap)
 {
@@ -692,6 +722,8 @@ checkpoint_f(va_list ap)
 	}
 	if (checkpoint_write_raft(&snap, &ckpt->raft) != 0)
 		goto fail;
+	if (checkpoint_write_synchro(&snap, &ckpt->synchro_state) != 0)
+		goto fail;
 	if (xlog_flush(&snap) < 0)
 		goto fail;
 
diff --git a/src/box/txn_limbo.c b/src/box/txn_limbo.c
index d21f05557..239b6da76 100644
--- a/src/box/txn_limbo.c
+++ b/src/box/txn_limbo.c
@@ -301,6 +301,16 @@ complete:
 	return 0;
 }
 
+void
+txn_limbo_checkpoint(const struct txn_limbo *limbo,
+		     struct synchro_request *req)
+{
+	req->type = IPROTO_PROMOTE;
+	req->replica_id = limbo->owner_id;
+	req->lsn = limbo->confirmed_lsn;
+	req->term = limbo->promote_greatest_term;
+}
+
 static void
 txn_limbo_write_synchro(struct txn_limbo *limbo, uint16_t type, int64_t lsn,
 			uint64_t term)
diff --git a/src/box/txn_limbo.h b/src/box/txn_limbo.h
index 801a1a0ee..442f2483e 100644
--- a/src/box/txn_limbo.h
+++ b/src/box/txn_limbo.h
@@ -311,6 +311,13 @@ txn_limbo_process(struct txn_limbo *limbo, const struct synchro_request *req);
 int
 txn_limbo_wait_confirm(struct txn_limbo *limbo);
 
+/**
+ * Persist limbo state to a given synchro request.
+ */
+void
+txn_limbo_checkpoint(const struct txn_limbo *limbo,
+		     struct synchro_request *req);
+
 /**
  * Write a PROMOTE request, which has the same effect as CONFIRM(@a lsn) and
  * ROLLBACK(@a lsn + 1) combined.
-- 
2.30.1 (Apple Git-130)



More information about the Tarantool-patches mailing list