[Tarantool-patches] [PATCH v4 03/16] txn_limbo: persist the latest effective promote in snapshot

Serge Petrenko sergepetrenko at tarantool.org
Wed Jul 14 21:25:31 MSK 2021


Previously PROMOTE entries, just like CONFIRM and ROLLBACK were only
stored in WALs. This is because snapshots consist solely of confirmed
transactions, so there's nothing to CONFIRM or ROLLBACK.

PROMOTE has gained additional meaning recently: it pins limbo ownership
to a specific instance, rendering everyone else read-only. So now
PROMOTE information must be stored in snapshots as well.

Save the latest limbo state (owner id and latest confirmed lsn) to the
snapshot as a PROMOTE request.

Prerequisite #6034
---
 src/box/memtx_engine.c | 32 ++++++++++++++++++++++++++++++++
 src/box/txn_limbo.c    | 10 ++++++++++
 src/box/txn_limbo.h    |  7 +++++++
 3 files changed, 49 insertions(+)

diff --git a/src/box/memtx_engine.c b/src/box/memtx_engine.c
index c85dc6af3..0b06e5e63 100644
--- a/src/box/memtx_engine.c
+++ b/src/box/memtx_engine.c
@@ -50,6 +50,7 @@
 #include "schema.h"
 #include "gc.h"
 #include "raft.h"
+#include "txn_limbo.h"
 
 /* sync snapshot every 16MB */
 #define SNAP_SYNC_INTERVAL	(1 << 24)
@@ -225,6 +226,22 @@ memtx_engine_recover_raft(const struct xrow_header *row)
 	return 0;
 }
 
+static int
+memtx_engine_recover_synchro(const struct xrow_header *row)
+{
+	assert(row->type == IPROTO_PROMOTE);
+	struct synchro_request req;
+	if (xrow_decode_synchro(row, &req) != 0)
+		return -1;
+	/*
+	 * Origin id cannot be deduced from row.replica_id in a checkpoint,
+	 * because all its rows have a zero replica_id.
+	 */
+	req.origin_id = req.replica_id;
+	txn_limbo_process(&txn_limbo, &req);
+	return 0;
+}
+
 static int
 memtx_engine_recover_snapshot_row(struct memtx_engine *memtx,
 				  struct xrow_header *row, int *is_space_system)
@@ -233,6 +250,8 @@ memtx_engine_recover_snapshot_row(struct memtx_engine *memtx,
 	if (row->type != IPROTO_INSERT) {
 		if (row->type == IPROTO_RAFT)
 			return memtx_engine_recover_raft(row);
+		if (row->type == IPROTO_PROMOTE)
+			return memtx_engine_recover_synchro(row);
 		diag_set(ClientError, ER_UNKNOWN_REQUEST_TYPE,
 			 (uint32_t) row->type);
 		return -1;
@@ -546,6 +565,7 @@ struct checkpoint {
 	struct vclock vclock;
 	struct xdir dir;
 	struct raft_request raft;
+	struct synchro_request synchro_state;
 	/**
 	 * Do nothing, just touch the snapshot file - the
 	 * checkpoint already exists.
@@ -571,6 +591,7 @@ checkpoint_new(const char *snap_dirname, uint64_t snap_io_rate_limit)
 	xdir_create(&ckpt->dir, snap_dirname, SNAP, &INSTANCE_UUID, &opts);
 	vclock_create(&ckpt->vclock);
 	box_raft_checkpoint_local(&ckpt->raft);
+	txn_limbo_checkpoint(&txn_limbo, &ckpt->synchro_state);
 	ckpt->touch = false;
 	return ckpt;
 }
@@ -659,6 +680,15 @@ finish:
 	return rc;
 }
 
+static int
+checkpoint_write_synchro(struct xlog *l, const struct synchro_request *req)
+{
+	struct xrow_header row;
+	char body[XROW_SYNCHRO_BODY_LEN_MAX];
+	xrow_encode_synchro(&row, body, req);
+	return checkpoint_write_row(l, &row);
+}
+
 static int
 checkpoint_f(va_list ap)
 {
@@ -696,6 +726,8 @@ checkpoint_f(va_list ap)
 	}
 	if (checkpoint_write_raft(&snap, &ckpt->raft) != 0)
 		goto fail;
+	if (checkpoint_write_synchro(&snap, &ckpt->synchro_state) != 0)
+		goto fail;
 	if (xlog_flush(&snap) < 0)
 		goto fail;
 
diff --git a/src/box/txn_limbo.c b/src/box/txn_limbo.c
index 6e5d6d04e..991c47698 100644
--- a/src/box/txn_limbo.c
+++ b/src/box/txn_limbo.c
@@ -306,6 +306,16 @@ complete:
 	return 0;
 }
 
+void
+txn_limbo_checkpoint(const struct txn_limbo *limbo,
+		     struct synchro_request *req)
+{
+	req->type = IPROTO_PROMOTE;
+	req->replica_id = limbo->owner_id;
+	req->lsn = limbo->confirmed_lsn;
+	req->term = limbo->promote_greatest_term;
+}
+
 static void
 txn_limbo_write_synchro(struct txn_limbo *limbo, uint16_t type, int64_t lsn,
 			uint64_t term)
diff --git a/src/box/txn_limbo.h b/src/box/txn_limbo.h
index 7debbc0b9..7151843f4 100644
--- a/src/box/txn_limbo.h
+++ b/src/box/txn_limbo.h
@@ -315,6 +315,13 @@ txn_limbo_wait_confirm(struct txn_limbo *limbo);
 int
 txn_limbo_wait_empty(struct txn_limbo *limbo, double timeout);
 
+/**
+ * Persist limbo state to a given synchro request.
+ */
+void
+txn_limbo_checkpoint(const struct txn_limbo *limbo,
+		     struct synchro_request *req);
+
 /**
  * Write a PROMOTE request, which has the same effect as CONFIRM(@a lsn) and
  * ROLLBACK(@a lsn + 1) combined.
-- 
2.30.1 (Apple Git-130)



More information about the Tarantool-patches mailing list