[Tarantool-patches] [PATCH 4/7] txn_limbo: persist the latest effective promote in snapshot

Serge Petrenko sergepetrenko at tarantool.org
Thu Jun 10 16:32:54 MSK 2021


Previously PROMOTE entries, just like CONFIRM and ROLLBACK were only
stored in WALs. This is because snapshots consist solely of confirmed
transactions, so there's nothing to CONFIRM or ROLLBACK.

PROMOTE has gained additional meaning recently: it pins limbo ownership
to a specific instance, rendering everyone else read-only. So now
PROMOTE information must be stored in snapshots as well.

Save the latest limbo state (owner id and latest confirmed lsn) to the
snapshot as a PROMOTE request.

Part-of #6034
---
 src/box/memtx_engine.c | 34 ++++++++++++++++++++++++++++++++++
 src/box/txn_limbo.c    |  9 +++++++++
 src/box/txn_limbo.h    |  6 ++++++
 3 files changed, 49 insertions(+)

diff --git a/src/box/memtx_engine.c b/src/box/memtx_engine.c
index 6c4982b9f..3713d39d0 100644
--- a/src/box/memtx_engine.c
+++ b/src/box/memtx_engine.c
@@ -50,6 +50,7 @@
 #include "schema.h"
 #include "gc.h"
 #include "raft.h"
+#include "txn_limbo.h"
 
 /* sync snapshot every 16MB */
 #define SNAP_SYNC_INTERVAL	(1 << 24)
@@ -225,6 +226,22 @@ memtx_engine_recover_raft(const struct xrow_header *row)
 	return 0;
 }
 
+static int
+memtx_engine_recover_promote(const struct xrow_header *row)
+{
+	assert(row->type == IPROTO_PROMOTE);
+	struct synchro_request req;
+	if (xrow_decode_synchro(row, &req) != 0)
+		return -1;
+	/*
+	 * Origin id cannot be deduced from row.replica_id in a checkpoint,
+	 * because all it's rows have a zero replica_id.
+	 */
+	req.origin_id = req.replica_id;
+	txn_limbo_process(&txn_limbo, &req);
+	return 0;
+}
+
 static int
 memtx_engine_recover_snapshot_row(struct memtx_engine *memtx,
 				  struct xrow_header *row, int *is_space_system)
@@ -233,6 +250,8 @@ memtx_engine_recover_snapshot_row(struct memtx_engine *memtx,
 	if (row->type != IPROTO_INSERT) {
 		if (row->type == IPROTO_RAFT)
 			return memtx_engine_recover_raft(row);
+		if (row->type == IPROTO_PROMOTE)
+			return memtx_engine_recover_promote(row);
 		diag_set(ClientError, ER_UNKNOWN_REQUEST_TYPE,
 			 (uint32_t) row->type);
 		return -1;
@@ -542,6 +561,7 @@ struct checkpoint {
 	struct vclock vclock;
 	struct xdir dir;
 	struct raft_request raft;
+	struct synchro_request promote;
 	/**
 	 * Do nothing, just touch the snapshot file - the
 	 * checkpoint already exists.
@@ -567,6 +587,7 @@ checkpoint_new(const char *snap_dirname, uint64_t snap_io_rate_limit)
 	xdir_create(&ckpt->dir, snap_dirname, SNAP, &INSTANCE_UUID, &opts);
 	vclock_create(&ckpt->vclock);
 	box_raft_checkpoint_local(&ckpt->raft);
+	txn_limbo_checkpoint(&txn_limbo, &ckpt->promote);
 	ckpt->touch = false;
 	return ckpt;
 }
@@ -655,6 +676,17 @@ finish:
 	return rc;
 }
 
+static int
+checkpoint_write_promote(struct xlog *l, const struct synchro_request *req)
+{
+	struct xrow_header row;
+	char body[XROW_SYNCHRO_BODY_LEN_MAX];
+	xrow_encode_synchro(&row, body, req);
+	if (checkpoint_write_row(l, &row) != 0)
+		return -1;
+	return 0;
+}
+
 static int
 checkpoint_f(va_list ap)
 {
@@ -692,6 +724,8 @@ checkpoint_f(va_list ap)
 	}
 	if (checkpoint_write_raft(&snap, &ckpt->raft) != 0)
 		goto fail;
+	if (checkpoint_write_promote(&snap, &ckpt->promote) != 0)
+		goto fail;
 	if (xlog_flush(&snap) < 0)
 		goto fail;
 
diff --git a/src/box/txn_limbo.c b/src/box/txn_limbo.c
index 33a6e5548..40c4a41bb 100644
--- a/src/box/txn_limbo.c
+++ b/src/box/txn_limbo.c
@@ -295,6 +295,15 @@ complete:
 	return 0;
 }
 
+void
+txn_limbo_checkpoint(struct txn_limbo *limbo, struct synchro_request *req)
+{
+	req->type = IPROTO_PROMOTE;
+	req->replica_id = limbo->owner_id;
+	req->lsn = limbo->confirmed_lsn;
+	req->term = limbo->promote_greatest_term;
+}
+
 static void
 txn_limbo_write_synchro(struct txn_limbo *limbo, uint16_t type, int64_t lsn,
 			uint64_t term)
diff --git a/src/box/txn_limbo.h b/src/box/txn_limbo.h
index e409ac657..84a19bb40 100644
--- a/src/box/txn_limbo.h
+++ b/src/box/txn_limbo.h
@@ -311,6 +311,12 @@ txn_limbo_process(struct txn_limbo *limbo, const struct synchro_request *req);
 int
 txn_limbo_wait_confirm(struct txn_limbo *limbo);
 
+/**
+ * Persist limbo state to a given synchro request.
+ */
+void
+txn_limbo_checkpoint(struct txn_limbo *limbo, struct synchro_request *req);
+
 /**
  * Write a PROMOTE request, which has the same effect as CONFIRM(@a lsn) and
  * ROLLBACK(@a lsn + 1) combined.
-- 
2.30.1 (Apple Git-130)



More information about the Tarantool-patches mailing list