[Tarantool-patches] [PATCH v3 02/10] xrow: introduce a PROMOTE entry

Serge Petrenko sergepetrenko at tarantool.org
Wed Apr 14 17:17:12 MSK 2021


A PROMOTE entry combines effect of CONFIRM, ROLLBACK and RAFT_TERM
entries with some additional semantics on top.

PROMOTE carries the following arguments:

1) former_leader_id - the id of previous limbo owner whose entries we
   want to confirm.
2) confirm_lsn - the lsn of the last former leader's transaction to be
   confirmed. In this sense PROMOTE(confirm_lsn) replaces
   CONFIRM(confirm_lsn) + ROLLBACK(confirm_lsn + 1).
3) replica_id - id of the instance issuing
   `box.ctl.clear_synchro_queue()`
4) term - the new term the instance issuing
   `box.ctl.clear_synchro_queue()` has just entered.

This entry will be written to WAL instead of the usual CONFIRM +
ROLLBACK pair on a successful `box.ctl.clear_synchro_queue()` call.

Note, the ususal CONFIRM and ROLLBACK occurrences (after a confirmed or
rolled back synchronous transaction) are here to stay.

Part of #5445
---
 src/box/iproto_constants.h | 26 ++++++++++++++++--
 src/box/xrow.c             | 55 +++++++++++++++++++++++++++++++++-----
 src/box/xrow.h             | 34 ++++++++++++++++++++++-
 3 files changed, 105 insertions(+), 10 deletions(-)

diff --git a/src/box/iproto_constants.h b/src/box/iproto_constants.h
index f7f46088f..7d39b0d61 100644
--- a/src/box/iproto_constants.h
+++ b/src/box/iproto_constants.h
@@ -132,6 +132,18 @@ enum iproto_key {
 	IPROTO_REPLICA_ANON = 0x50,
 	IPROTO_ID_FILTER = 0x51,
 	IPROTO_ERROR = 0x52,
+	/**
+	 * Term. Has the same meaning as IPROTO_RAFT_TERM, but is an iproto
+	 * key, rather than a raft key. Used for PROMOTE request, which needs
+	 * both iproto (e.g. REPLICA_ID) and raft (RAFT_TERM) keys.
+	 */
+	IPROTO_TERM = 0x53,
+	/*
+	 * Be careful to not extend iproto_key values over 0x7f.
+	 * iproto_keys are encoded in msgpack as positive fixnum, which ends at
+	 * 0x7f, and we rely on this in some places by allocating a uint8_t to
+	 * hold a msgpack-encoded key value.
+	 */
 	IPROTO_KEY_MAX
 };
 
@@ -226,6 +238,8 @@ enum iproto_type {
 	IPROTO_TYPE_STAT_MAX,
 
 	IPROTO_RAFT = 30,
+	/** PROMOTE request. */
+	IPROTO_PROMOTE = 31,
 
 	/** A confirmation message for synchronous transactions. */
 	IPROTO_CONFIRM = 40,
@@ -340,11 +354,19 @@ dml_request_key_map(uint32_t type)
 	return iproto_body_key_map[type];
 }
 
-/** CONFIRM/ROLLBACK entries for synchronous replication. */
+/** Synchronous replication entries: CONFIRM/ROLLBACK/PROMOTE. */
 static inline bool
 iproto_type_is_synchro_request(uint32_t type)
 {
-	return type == IPROTO_CONFIRM || type == IPROTO_ROLLBACK;
+	return type == IPROTO_CONFIRM || type == IPROTO_ROLLBACK ||
+	       type == IPROTO_PROMOTE;
+}
+
+/** PROMOTE entry (synchronous replication and leader elections). */
+static inline bool
+iproto_type_is_promote_request(uint32_t type)
+{
+       return type == IPROTO_PROMOTE;
 }
 
 static inline bool
diff --git a/src/box/xrow.c b/src/box/xrow.c
index cc8e43ed4..5d515ce92 100644
--- a/src/box/xrow.c
+++ b/src/box/xrow.c
@@ -884,28 +884,63 @@ xrow_encode_dml(const struct request *request, struct region *region,
 	return iovcnt;
 }
 
-void
-xrow_encode_synchro(struct xrow_header *row,
-		    struct synchro_body_bin *body,
-		    const struct synchro_request *req)
+static void
+xrow_encode_synchro_body(struct synchro_body_bin *body,
+		         const struct synchro_request *req)
 {
 	/*
-	 * A map with two elements. We don't compress
+	 * A map with two or three elements. We don't compress
 	 * numbers to have this structure constant in size,
 	 * which allows us to preallocate it on stack.
 	 */
-	body->m_body = 0x80 | 2;
+	body->m_body = 0x80 | (req->type == IPROTO_PROMOTE ? 3 : 2);
 	body->k_replica_id = IPROTO_REPLICA_ID;
 	body->m_replica_id = 0xce;
 	body->v_replica_id = mp_bswap_u32(req->replica_id);
 	body->k_lsn = IPROTO_LSN;
 	body->m_lsn = 0xcf;
 	body->v_lsn = mp_bswap_u64(req->lsn);
+}
+
+void
+xrow_encode_synchro(struct xrow_header *row,
+		    struct synchro_body_bin *body,
+		    const struct synchro_request *req)
+{
+	assert(req->type == IPROTO_CONFIRM || req->type == IPROTO_ROLLBACK);
+
+	xrow_encode_synchro_body(body, req);
 
 	memset(row, 0, sizeof(*row));
+	row->type = req->type;
+	row->body[0].iov_base = body;
+	row->body[0].iov_len = sizeof(*body);
+	row->bodycnt = 1;
+}
+
+static inline void
+xrow_encode_promote_body(struct promote_body_bin *body,
+			 const struct synchro_request *req)
+{
+	xrow_encode_synchro_body(&body->base, req);
+
+	body->k_term = IPROTO_TERM;
+	body->m_term = 0xcf;
+	body->v_term = mp_bswap_u64(req->term);
+}
+
 
+void
+xrow_encode_promote(struct xrow_header *row, struct promote_body_bin *body,
+		    const struct synchro_request *req)
+{
+	assert(req->type == IPROTO_PROMOTE);
+
+	xrow_encode_promote_body(body, req);
+
+	memset(row, 0, sizeof(*row));
 	row->type = req->type;
-	row->body[0].iov_base = (void *)body;
+	row->body[0].iov_base = body;
 	row->body[0].iov_len = sizeof(*body);
 	row->bodycnt = 1;
 }
@@ -952,11 +987,17 @@ xrow_decode_synchro(const struct xrow_header *row, struct synchro_request *req)
 		case IPROTO_LSN:
 			req->lsn = mp_decode_uint(&d);
 			break;
+		case IPROTO_TERM:
+			req->term = mp_decode_uint(&d);
+			break;
 		default:
 			mp_next(&d);
 		}
 	}
+
 	req->type = row->type;
+	req->origin_id = row->replica_id;
+
 	return 0;
 }
 
diff --git a/src/box/xrow.h b/src/box/xrow.h
index 1bb0964dc..51442f9b6 100644
--- a/src/box/xrow.h
+++ b/src/box/xrow.h
@@ -226,7 +226,10 @@ xrow_encode_dml(const struct request *request, struct region *region,
  * pending synchronous transactions.
  */
 struct synchro_request {
-	/** Operation type - IPROTO_ROLLBACK or IPROTO_CONFIRM. */
+	/**
+	 * Operation type - either IPROTO_ROLLBACK or IPROTO_CONFIRM or
+	 * IPROTO_PROMOTE
+	 */
 	uint32_t type;
 	/**
 	 * ID of the instance owning the pending transactions.
@@ -236,14 +239,25 @@ struct synchro_request {
 	 * finish transactions of an old master.
 	 */
 	uint32_t replica_id;
+	/**
+	 * Id of the instance which has issued this request. Only filled on
+	 * decoding, and left blank when encoding a request.
+	 */
+	uint32_t origin_id;
 	/**
 	 * Operation LSN.
 	 * In case of CONFIRM it means 'confirm all
 	 * transactions with lsn <= this value'.
 	 * In case of ROLLBACK it means 'rollback all transactions
 	 * with lsn >= this value'.
+	 * In case of PROMOTE it means CONFIRM(lsn) + ROLLBACK(lsn+1)
 	 */
 	int64_t lsn;
+	/**
+	 * The new term the instance issuing this request is in. Only used for
+	 * PROMOTE request.
+	 */
+	uint64_t term;
 };
 
 /** Synchro request xrow's body in MsgPack format. */
@@ -257,6 +271,14 @@ struct PACKED synchro_body_bin {
 	uint64_t v_lsn;
 };
 
+/** PROMOTE request's xrow body in MsgPack format. */
+struct PACKED promote_body_bin {
+	struct synchro_body_bin base;
+	uint8_t k_term;
+	uint8_t m_term;
+	uint64_t v_term;
+};
+
 /**
  * Encode synchronous replication request.
  * @param row xrow header.
@@ -268,6 +290,16 @@ xrow_encode_synchro(struct xrow_header *row,
 		    struct synchro_body_bin *body,
 		    const struct synchro_request *req);
 
+/**
+ * Encode a promote request.
+ * @param row xrow header.
+ * @param body A place to encode promote body.
+ * @param req Request parameters.
+ */
+void
+xrow_encode_promote(struct xrow_header *row, struct promote_body_bin *body,
+		    const struct synchro_request *req);
+
 /**
  * Decode synchronous replication request.
  * @param row xrow header.
-- 
2.24.3 (Apple Git-128)



More information about the Tarantool-patches mailing list