Tarantool development patches archive
 help / color / mirror / Atom feed
From: Serge Petrenko via Tarantool-patches <tarantool-patches@dev.tarantool.org>
To: v.shpilevoy@tarantool.org, gorcunov@gmail.com
Cc: tarantool-patches@dev.tarantool.org
Subject: [Tarantool-patches] [PATCH v4 03/12] xrow: introduce a PROMOTE entry
Date: Fri, 16 Apr 2021 19:25:34 +0300	[thread overview]
Message-ID: <7f16f58b3274d2f4e07332b59e4243240a455d2b.1618590211.git.sergepetrenko@tarantool.org> (raw)
In-Reply-To: <cover.1618590211.git.sergepetrenko@tarantool.org>

A PROMOTE entry combines effect of CONFIRM, ROLLBACK and RAFT_TERM
entries with some additional semantics on top.

PROMOTE carries the following arguments:

1) former_leader_id - the id of previous limbo owner whose entries we
   want to confirm.
2) confirm_lsn - the lsn of the last former leader's transaction to be
   confirmed. In this sense PROMOTE(confirm_lsn) replaces
   CONFIRM(confirm_lsn) + ROLLBACK(confirm_lsn + 1).
3) replica_id - id of the instance issuing
   `box.ctl.clear_synchro_queue()`
4) term - the new term the instance issuing
   `box.ctl.clear_synchro_queue()` has just entered.

This entry will be written to WAL instead of the usual CONFIRM +
ROLLBACK pair on a successful `box.ctl.clear_synchro_queue()` call.

Note, the ususal CONFIRM and ROLLBACK occurrences (after a confirmed or
rolled back synchronous transaction) are here to stay.

Part of #5445
---
 src/box/iproto_constants.h | 26 ++++++++++++++++++++--
 src/box/txn_limbo.c        |  4 ++--
 src/box/xrow.c             | 45 ++++++++++++++++++++++++--------------
 src/box/xrow.h             | 31 ++++++++++++++------------
 4 files changed, 71 insertions(+), 35 deletions(-)

diff --git a/src/box/iproto_constants.h b/src/box/iproto_constants.h
index e9d1ef5d6..99c8ca184 100644
--- a/src/box/iproto_constants.h
+++ b/src/box/iproto_constants.h
@@ -132,6 +132,18 @@ enum iproto_key {
 	IPROTO_REPLICA_ANON = 0x50,
 	IPROTO_ID_FILTER = 0x51,
 	IPROTO_ERROR = 0x52,
+	/**
+	 * Term. Has the same meaning as IPROTO_RAFT_TERM, but is an iproto
+	 * key, rather than a raft key. Used for PROMOTE request, which needs
+	 * both iproto (e.g. REPLICA_ID) and raft (RAFT_TERM) keys.
+	 */
+	IPROTO_TERM = 0x53,
+	/*
+	 * Be careful to not extend iproto_key values over 0x7f.
+	 * iproto_keys are encoded in msgpack as positive fixnum, which ends at
+	 * 0x7f, and we rely on this in some places by allocating a uint8_t to
+	 * hold a msgpack-encoded key value.
+	 */
 	IPROTO_KEY_MAX
 };
 
@@ -226,6 +238,8 @@ enum iproto_type {
 	IPROTO_TYPE_STAT_MAX,
 
 	IPROTO_RAFT = 30,
+	/** PROMOTE request. */
+	IPROTO_PROMOTE = 31,
 
 	/** A confirmation message for synchronous transactions. */
 	IPROTO_CONFIRM = 40,
@@ -340,11 +354,19 @@ dml_request_key_map(uint16_t type)
 	return iproto_body_key_map[type];
 }
 
-/** CONFIRM/ROLLBACK entries for synchronous replication. */
+/** Synchronous replication entries: CONFIRM/ROLLBACK/PROMOTE. */
 static inline bool
 iproto_type_is_synchro_request(uint16_t type)
 {
-	return type == IPROTO_CONFIRM || type == IPROTO_ROLLBACK;
+	return type == IPROTO_CONFIRM || type == IPROTO_ROLLBACK ||
+	       type == IPROTO_PROMOTE;
+}
+
+/** PROMOTE entry (synchronous replication and leader elections). */
+static inline bool
+iproto_type_is_promote_request(uint32_t type)
+{
+       return type == IPROTO_PROMOTE;
 }
 
 static inline bool
diff --git a/src/box/txn_limbo.c b/src/box/txn_limbo.c
index addcb0f97..c96e497c6 100644
--- a/src/box/txn_limbo.c
+++ b/src/box/txn_limbo.c
@@ -331,7 +331,7 @@ txn_limbo_write_synchro(struct txn_limbo *limbo, uint16_t type, int64_t lsn)
 	 * This is a synchronous commit so we can
 	 * allocate everything on a stack.
 	 */
-	struct synchro_body_bin body;
+	char body[XROW_SYNCHRO_BODY_LEN_MAX];
 	struct xrow_header row;
 	char buf[sizeof(struct journal_entry) +
 		 sizeof(struct xrow_header *)];
@@ -339,7 +339,7 @@ txn_limbo_write_synchro(struct txn_limbo *limbo, uint16_t type, int64_t lsn)
 	struct journal_entry *entry = (struct journal_entry *)buf;
 	entry->rows[0] = &row;
 
-	xrow_encode_synchro(&row, &body, &req);
+	xrow_encode_synchro(&row, body, &req);
 
 	journal_entry_create(entry, 1, xrow_approx_len(&row),
 			     txn_limbo_write_cb, fiber());
diff --git a/src/box/xrow.c b/src/box/xrow.c
index 35e1d1c20..2e364cea5 100644
--- a/src/box/xrow.c
+++ b/src/box/xrow.c
@@ -885,28 +885,33 @@ xrow_encode_dml(const struct request *request, struct region *region,
 }
 
 void
-xrow_encode_synchro(struct xrow_header *row,
-		    struct synchro_body_bin *body,
+xrow_encode_synchro(struct xrow_header *row, char *body,
 		    const struct synchro_request *req)
 {
-	/*
-	 * A map with two elements. We don't compress
-	 * numbers to have this structure constant in size,
-	 * which allows us to preallocate it on stack.
-	 */
-	body->m_body = 0x80 | 2;
-	body->k_replica_id = IPROTO_REPLICA_ID;
-	body->m_replica_id = 0xce;
-	body->v_replica_id = mp_bswap_u32(req->replica_id);
-	body->k_lsn = IPROTO_LSN;
-	body->m_lsn = 0xcf;
-	body->v_lsn = mp_bswap_u64(req->lsn);
+	assert(iproto_type_is_synchro_request(req->type));
 
-	memset(row, 0, sizeof(*row));
+	char *pos = body;
+
+	pos = mp_encode_map(pos,
+			    iproto_type_is_promote_request(req->type) ? 3 : 2);
 
+	pos = mp_encode_uint(pos, IPROTO_REPLICA_ID);
+	pos = mp_encode_uint(pos, req->replica_id);
+
+	pos = mp_encode_uint(pos, IPROTO_LSN);
+	pos = mp_encode_uint(pos, req->lsn);
+
+	if (iproto_type_is_promote_request(req->type)) {
+		pos = mp_encode_uint(pos, IPROTO_TERM);
+		pos = mp_encode_uint(pos, req->term);
+	}
+
+	assert(pos - body < XROW_SYNCHRO_BODY_LEN_MAX);
+
+	memset(row, 0, sizeof(*row));
 	row->type = req->type;
-	row->body[0].iov_base = (void *)body;
-	row->body[0].iov_len = sizeof(*body);
+	row->body[0].iov_base = body;
+	row->body[0].iov_len = pos - body;
 	row->bodycnt = 1;
 }
 
@@ -952,11 +957,17 @@ xrow_decode_synchro(const struct xrow_header *row, struct synchro_request *req)
 		case IPROTO_LSN:
 			req->lsn = mp_decode_uint(&d);
 			break;
+		case IPROTO_TERM:
+			req->term = mp_decode_uint(&d);
+			break;
 		default:
 			mp_next(&d);
 		}
 	}
+
 	req->type = row->type;
+	req->origin_id = row->replica_id;
+
 	return 0;
 }
 
diff --git a/src/box/xrow.h b/src/box/xrow.h
index 5ea99e792..b3c664be2 100644
--- a/src/box/xrow.h
+++ b/src/box/xrow.h
@@ -49,6 +49,7 @@ enum {
 	XROW_IOVMAX = XROW_HEADER_IOVMAX + XROW_BODY_IOVMAX,
 	XROW_HEADER_LEN_MAX = 52,
 	XROW_BODY_LEN_MAX = 256,
+	XROW_SYNCHRO_BODY_LEN_MAX = 32,
 	IPROTO_HEADER_LEN = 28,
 	/** 7 = sizeof(iproto_body_bin). */
 	IPROTO_SELECT_HEADER_LEN = IPROTO_HEADER_LEN + 7,
@@ -226,7 +227,10 @@ xrow_encode_dml(const struct request *request, struct region *region,
  * pending synchronous transactions.
  */
 struct synchro_request {
-	/** Operation type - IPROTO_ROLLBACK or IPROTO_CONFIRM. */
+	/**
+	 * Operation type - either IPROTO_ROLLBACK or IPROTO_CONFIRM or
+	 * IPROTO_PROMOTE
+	 */
 	uint16_t type;
 	/**
 	 * ID of the instance owning the pending transactions.
@@ -236,25 +240,25 @@ struct synchro_request {
 	 * finish transactions of an old master.
 	 */
 	uint32_t replica_id;
+	/**
+	 * Id of the instance which has issued this request. Only filled on
+	 * decoding, and left blank when encoding a request.
+	 */
+	uint32_t origin_id;
 	/**
 	 * Operation LSN.
 	 * In case of CONFIRM it means 'confirm all
 	 * transactions with lsn <= this value'.
 	 * In case of ROLLBACK it means 'rollback all transactions
 	 * with lsn >= this value'.
+	 * In case of PROMOTE it means CONFIRM(lsn) + ROLLBACK(lsn+1)
 	 */
 	int64_t lsn;
-};
-
-/** Synchro request xrow's body in MsgPack format. */
-struct PACKED synchro_body_bin {
-	uint8_t m_body;
-	uint8_t k_replica_id;
-	uint8_t m_replica_id;
-	uint32_t v_replica_id;
-	uint8_t k_lsn;
-	uint8_t m_lsn;
-	uint64_t v_lsn;
+	/**
+	 * The new term the instance issuing this request is in. Only used for
+	 * PROMOTE request.
+	 */
+	uint64_t term;
 };
 
 /**
@@ -264,8 +268,7 @@ struct PACKED synchro_body_bin {
  * @param req Request parameters.
  */
 void
-xrow_encode_synchro(struct xrow_header *row,
-		    struct synchro_body_bin *body,
+xrow_encode_synchro(struct xrow_header *row, char *body,
 		    const struct synchro_request *req);
 
 /**
-- 
2.24.3 (Apple Git-128)


  parent reply	other threads:[~2021-04-16 16:27 UTC|newest]

Thread overview: 57+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-04-16 16:25 [Tarantool-patches] [PATCH v4 00/12] raft: introduce manual elections and fix a bug with re-applying rolled back transactions Serge Petrenko via Tarantool-patches
2021-04-16 16:25 ` [Tarantool-patches] [PATCH v4 01/12] wal: make wal_assign_lsn accept journal entry Serge Petrenko via Tarantool-patches
2021-04-16 16:25 ` [Tarantool-patches] [PATCH v4 02/12] xrow: enrich row's meta information with sync replication flags Serge Petrenko via Tarantool-patches
2021-04-16 16:25 ` Serge Petrenko via Tarantool-patches [this message]
2021-04-16 16:25 ` [Tarantool-patches] [PATCH v4 04/12] box: actualise iproto_key_type array Serge Petrenko via Tarantool-patches
2021-04-16 16:25 ` [Tarantool-patches] [PATCH v4 05/12] box: make clear_synchro_queue() write a PROMOTE entry instead of CONFIRM + ROLLBACK Serge Petrenko via Tarantool-patches
2021-04-16 22:12   ` Vladislav Shpilevoy via Tarantool-patches
2021-04-18  8:24     ` Serge Petrenko via Tarantool-patches
2021-04-20 22:30   ` Vladislav Shpilevoy via Tarantool-patches
2021-04-21  5:58     ` Serge Petrenko via Tarantool-patches
2021-04-16 16:25 ` [Tarantool-patches] [PATCH v4 06/12] box: write PROMOTE even for empty limbo Serge Petrenko via Tarantool-patches
2021-04-19 13:39   ` Serge Petrenko via Tarantool-patches
2021-04-16 16:25 ` [Tarantool-patches] [PATCH v4 07/12] raft: filter rows based on known peer terms Serge Petrenko via Tarantool-patches
2021-04-16 22:21   ` Vladislav Shpilevoy via Tarantool-patches
2021-04-18  8:49     ` Serge Petrenko via Tarantool-patches
2021-04-18 15:44     ` Vladislav Shpilevoy via Tarantool-patches
2021-04-19  9:31       ` Serge Petrenko via Tarantool-patches
2021-04-18 16:27   ` Vladislav Shpilevoy via Tarantool-patches
2021-04-19  9:30     ` Serge Petrenko via Tarantool-patches
2021-04-20 20:29   ` Serge Petrenko via Tarantool-patches
2021-04-20 20:31     ` Serge Petrenko via Tarantool-patches
2021-04-20 20:55       ` Serge Petrenko via Tarantool-patches
2021-04-20 22:30       ` Vladislav Shpilevoy via Tarantool-patches
2021-04-21  5:58         ` Serge Petrenko via Tarantool-patches
2021-04-16 16:25 ` [Tarantool-patches] [PATCH v4 08/12] election: introduce a new election mode: "manual" Serge Petrenko via Tarantool-patches
2021-04-19 22:34   ` Vladislav Shpilevoy via Tarantool-patches
2021-04-20  9:25     ` Serge Petrenko via Tarantool-patches
2021-04-20 17:37       ` Serge Petrenko via Tarantool-patches
2021-04-16 16:25 ` [Tarantool-patches] [PATCH v4 09/12] raft: introduce raft_start/stop_candidate Serge Petrenko via Tarantool-patches
2021-04-16 22:23   ` Vladislav Shpilevoy via Tarantool-patches
2021-04-18  8:59     ` Serge Petrenko via Tarantool-patches
2021-04-19 22:35       ` Vladislav Shpilevoy via Tarantool-patches
2021-04-20  9:28         ` Serge Petrenko via Tarantool-patches
2021-04-19 12:52   ` Serge Petrenko via Tarantool-patches
2021-04-16 16:25 ` [Tarantool-patches] [PATCH v4 10/12] election: support manual elections in clear_synchro_queue() Serge Petrenko via Tarantool-patches
2021-04-16 22:24   ` Vladislav Shpilevoy via Tarantool-patches
2021-04-18  9:26     ` Serge Petrenko via Tarantool-patches
2021-04-18 16:07       ` Vladislav Shpilevoy via Tarantool-patches
2021-04-19  9:32         ` Serge Petrenko via Tarantool-patches
2021-04-19 12:47   ` Serge Petrenko via Tarantool-patches
2021-04-16 16:25 ` [Tarantool-patches] [PATCH v4 11/12] box: remove parameter from clear_synchro_queue Serge Petrenko via Tarantool-patches
2021-04-16 16:25 ` [Tarantool-patches] [PATCH v4 12/12] box.ctl: rename clear_synchro_queue to promote Serge Petrenko via Tarantool-patches
2021-04-19 22:35   ` Vladislav Shpilevoy via Tarantool-patches
2021-04-20 10:22     ` Serge Petrenko via Tarantool-patches
2021-04-18 12:00 ` [Tarantool-patches] [PATCH v4 13/12] replication: send accumulated Raft messages after relay start Serge Petrenko via Tarantool-patches
2021-04-18 16:03   ` Vladislav Shpilevoy via Tarantool-patches
2021-04-19 12:11     ` Serge Petrenko via Tarantool-patches
2021-04-19 22:36       ` Vladislav Shpilevoy via Tarantool-patches
2021-04-20 10:38         ` Serge Petrenko via Tarantool-patches
2021-04-20 22:31           ` Vladislav Shpilevoy via Tarantool-patches
2021-04-21  5:59             ` Serge Petrenko via Tarantool-patches
2021-04-19 22:37 ` [Tarantool-patches] [PATCH v4 00/12] raft: introduce manual elections and fix a bug with re-applying rolled back transactions Vladislav Shpilevoy via Tarantool-patches
2021-04-20 17:38 ` [Tarantool-patches] [PATCH v4 14/12] txn: make NOPs fully asynchronous Serge Petrenko via Tarantool-patches
2021-04-20 22:31   ` Vladislav Shpilevoy via Tarantool-patches
2021-04-21  5:59     ` Serge Petrenko via Tarantool-patches
2021-04-20 22:30 ` [Tarantool-patches] [PATCH v4 00/12] raft: introduce manual elections and fix a bug with re-applying rolled back transactions Vladislav Shpilevoy via Tarantool-patches
2021-04-21  6:01   ` Serge Petrenko via Tarantool-patches

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=7f16f58b3274d2f4e07332b59e4243240a455d2b.1618590211.git.sergepetrenko@tarantool.org \
    --to=tarantool-patches@dev.tarantool.org \
    --cc=gorcunov@gmail.com \
    --cc=sergepetrenko@tarantool.org \
    --cc=v.shpilevoy@tarantool.org \
    --subject='Re: [Tarantool-patches] [PATCH v4 03/12] xrow: introduce a PROMOTE entry' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox