From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from [87.239.111.99] (localhost [127.0.0.1]) by dev.tarantool.org (Postfix) with ESMTP id DB9806BD0D; Sun, 11 Apr 2021 20:57:10 +0300 (MSK) DKIM-Filter: OpenDKIM Filter v2.11.0 dev.tarantool.org DB9806BD0D DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=tarantool.org; s=dev; t=1618163830; bh=OR4DWc/bJwXaw8R1WEOv6SP+lAJ0Yo4/l6oVgxO+QPk=; h=To:Date:In-Reply-To:References:Subject:List-Id:List-Unsubscribe: List-Archive:List-Post:List-Help:List-Subscribe:From:Reply-To:Cc: From; b=cIjT6MFcEHYMDCjml4zdkFuTQzmDh/wpdjC0EY/Fr+WhwFHSSvJc/0Fw0fV4EjsuI 79Pagxk2TH05PEWyvNqKjrgr5v1YGoNeaL871NsPzZIXIm8T4olqYcWVUei9B/148p ls0SrF6KAvYG6bIiLvVRNM3HktnKpFa/gUynGW/g= Received: from smtp42.i.mail.ru (smtp42.i.mail.ru [94.100.177.102]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dev.tarantool.org (Postfix) with ESMTPS id 992DB6BD0E for ; Sun, 11 Apr 2021 20:56:10 +0300 (MSK) DKIM-Filter: OpenDKIM Filter v2.11.0 dev.tarantool.org 992DB6BD0E Received: by smtp42.i.mail.ru with esmtpa (envelope-from ) id 1lVeJd-0001Ua-Vw; Sun, 11 Apr 2021 20:56:10 +0300 To: v.shpilevoy@tarantool.org, gorcunov@gmail.com Date: Sun, 11 Apr 2021 20:55:57 +0300 Message-Id: <45386860deded154a47c6a07b0537b88e362aa51.1618163409.git.sergepetrenko@tarantool.org> X-Mailer: git-send-email 2.24.3 (Apple Git-128) In-Reply-To: References: MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-7564579A: 646B95376F6C166E X-77F55803: 4F1203BC0FB41BD92FFCB8E6708E74806859AC5FE18436AEED970E897805ADA4182A05F538085040C11614A48490DB6FFD4EE69F2CC9B3B54C37A00339B70341084341196AF61951 X-7FA49CB5: FF5795518A3D127A4AD6D5ED66289B5278DA827A17800CE7F09446BC3D835A58EA1F7E6F0F101C67BD4B6F7A4D31EC0BCC500DACC3FED6E28638F802B75D45FF8AA50765F7900637F898CA578D17CA188638F802B75D45FF914D58D5BE9E6BC1A93B80C6DEB9DEE97C6FB206A91F05B2991A1F2A3C3D7339400416A609C0709DAD368CDB2FD0D9CCD2E47CDBA5A96583C09775C1D3CA48CF62968DCAA3E4B45B117882F4460429724CE54428C33FAD30A8DF7F3B2552694AC26CFBAC0749D213D2E47CDBA5A9658378DA827A17800CE7BEE62E5629C982429FA2833FD35BB23DF004C90652538430302FCEF25BFAB3454AD6D5ED66289B5278DA827A17800CE7EE54D3D2257C3E2DD32BA5DBAC0009BE395957E7521B51C20BC6067A898B09E4090A508E0FED6299176DF2183F8FC7C0B91B625A08127FC2CD04E86FAF290E2D7E9C4E3C761E06A71DD303D21008E298D5E8D9A59859A8B6B372FE9A2E580EFC725E5C173C3A84C30584FF81F342DA0735872C767BF85DA2F004C90652538430E4A6367B16DE6309 X-C1DE0DAB: C20DE7B7AB408E4181F030C43753B8183A4AFAF3EA6BDC44E1F4276B80994196E44850EFB5864EA2EED75FC3D22297B6444EEAD89E6D583E9C2B6934AE262D3EE7EAB7254005DCED7532B743992DF240BDC6A1CF3F042BAD6DF99611D93F60EF0417BEADF48D1460699F904B3F4130E343918A1A30D5E7FCCB5012B2E24CD356 X-C8649E89: 4E36BF7865823D7055A7F0CF078B5EC49A30900B95165D34F6110710A6CC527F7B7406B5670D0A7CF5BCA353CBC0E6B5201CD9FE32623F7F747D0625F2D52BF01D7E09C32AA3244CF1BA1C6D76B268053664398E61BF397CE8FBBEFAE1C4874C927AC6DF5659F194 X-D57D3AED: 3ZO7eAau8CL7WIMRKs4sN3D3tLDjz0dLbV79QFUyzQ2Ujvy7cMT6pYYqY16iZVKkSc3dCLJ7zSJH7+u4VD18S7Vl4ZUrpaVfd2+vE6kuoey4m4VkSEu530nj6fImhcD4MUrOEAnl0W826KZ9Q+tr5ycPtXkTV4k65bRjmOUUP8cvGozZ33TWg5HZplvhhXbhDGzqmQDTd6OAevLeAnq3Ra9uf7zvY2zzsIhlcp/Y7m53TZgf2aB4JOg4gkr2biojbL9S8ysBdXjz3uqod8pbhWY5so2xLo6t X-Mailru-Sender: 583F1D7ACE8F49BDD2846D59FC20E9F8A3D5B6EFD69438696CC0C66A821C7127A7F5F57E84946CC8424AE0EB1F3D1D21E2978F233C3FAE6EE63DB1732555E4A8EE80603BA4A5B0BC112434F685709FCF0DA7A0AF5A3A8387 X-Mras: Ok Subject: [Tarantool-patches] [PATCH 2/9] xrow: introduce a PROMOTE entry X-BeenThere: tarantool-patches@dev.tarantool.org X-Mailman-Version: 2.1.34 Precedence: list List-Id: Tarantool development patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , From: Serge Petrenko via Tarantool-patches Reply-To: Serge Petrenko Cc: tarantool-patches@dev.tarantool.org Errors-To: tarantool-patches-bounces@dev.tarantool.org Sender: "Tarantool-patches" A PROMOTE entry combines effect of CONFIRM, ROLLBACK and RAFT_TERM entries with some additional semantics on top. PROMOTE carries the following arguments: 1) former_leader_id - the id of previous limbo owner whose entries we want to confirm. 2) confirm_lsn - the lsn of the last former leader's transaction to be confirmed. In this sense PROMOTE(confirm_lsn) replaces CONFIRM(confirm_lsn) + ROLLBACK(confirm_lsn + 1). 3) replica_id - id of the instance issuing `box.ctl.clear_synchro_queue()` 4) term - the new term the instance issuing `box.ctl.clear_synchro_queue()` has just entered. This entry will be written to WAL instead of the usual CONFIRM + ROLLBACK pair on a successful `box.ctl.clear_synchro_queue()` call. Note, the ususal CONFIRM and ROLLBACK occurrences (after a confirmed or rolled back synchronous transaction) are here to stay. Part of #5445 --- src/box/iproto_constants.h | 17 ++++++++++++++++- src/box/xrow.c | 30 +++++++++++++++++++++++++----- src/box/xrow.h | 24 +++++++++++++++++++++++- 3 files changed, 64 insertions(+), 7 deletions(-) diff --git a/src/box/iproto_constants.h b/src/box/iproto_constants.h index f7f46088f..816a308d8 100644 --- a/src/box/iproto_constants.h +++ b/src/box/iproto_constants.h @@ -132,6 +132,18 @@ enum iproto_key { IPROTO_REPLICA_ANON = 0x50, IPROTO_ID_FILTER = 0x51, IPROTO_ERROR = 0x52, + /** + * Term. Has the same meaning as IPROTO_RAFT_TERM, but is an iproto + * key, rather than a raft key. Used for PROMOTE request, which needs + * both iproto (e.g. REPLICA_ID) and raft (RAFT_TERM) keys. + */ + IPROTO_TERM = 0x53, + /* + * Be careful to not extend iproto_key values over 0x7f. + * iproto_keys are encoded in msgpack as positive fixnum, which ends at + * 0x7f, and we rely on this in some places by allocating a uint8_t to + * hold a msgpack-encoded key value. + */ IPROTO_KEY_MAX }; @@ -226,6 +238,8 @@ enum iproto_type { IPROTO_TYPE_STAT_MAX, IPROTO_RAFT = 30, + /** PROMOTE request. */ + IPROTO_PROMOTE = 31, /** A confirmation message for synchronous transactions. */ IPROTO_CONFIRM = 40, @@ -344,7 +358,8 @@ dml_request_key_map(uint32_t type) static inline bool iproto_type_is_synchro_request(uint32_t type) { - return type == IPROTO_CONFIRM || type == IPROTO_ROLLBACK; + return type == IPROTO_CONFIRM || type == IPROTO_ROLLBACK || + type == IPROTO_PROMOTE; } static inline bool diff --git a/src/box/xrow.c b/src/box/xrow.c index cc8e43ed4..70ba075f8 100644 --- a/src/box/xrow.c +++ b/src/box/xrow.c @@ -890,11 +890,11 @@ xrow_encode_synchro(struct xrow_header *row, const struct synchro_request *req) { /* - * A map with two elements. We don't compress + * A map with two or three elements. We don't compress * numbers to have this structure constant in size, * which allows us to preallocate it on stack. */ - body->m_body = 0x80 | 2; + body->m_body = 0x80 | (req->type == IPROTO_PROMOTE ? 3 : 2); body->k_replica_id = IPROTO_REPLICA_ID; body->m_replica_id = 0xce; body->v_replica_id = mp_bswap_u32(req->replica_id); @@ -903,10 +903,24 @@ xrow_encode_synchro(struct xrow_header *row, body->v_lsn = mp_bswap_u64(req->lsn); memset(row, 0, sizeof(*row)); - row->type = req->type; - row->body[0].iov_base = (void *)body; - row->body[0].iov_len = sizeof(*body); + + /* Promote body is longer. It has an additional IPROTO_TERM field. */ + if (req->type == IPROTO_PROMOTE) { + struct promote_body_bin *promote_body = + (struct promote_body_bin *)body; + + promote_body->k_term = IPROTO_TERM; + promote_body->m_term = 0xcf; + promote_body->v_term = mp_bswap_u64(req->term); + + row->body[0].iov_base = (void *)promote_body; + row->body[0].iov_len = sizeof(*promote_body); + } else { + row->body[0].iov_base = (void *)body; + row->body[0].iov_len = sizeof(*body); + } + row->bodycnt = 1; } @@ -952,11 +966,17 @@ xrow_decode_synchro(const struct xrow_header *row, struct synchro_request *req) case IPROTO_LSN: req->lsn = mp_decode_uint(&d); break; + case IPROTO_TERM: + req->term = mp_decode_uint(&d); + break; default: mp_next(&d); } } + req->type = row->type; + req->origin_id = row->replica_id; + return 0; } diff --git a/src/box/xrow.h b/src/box/xrow.h index 2a18733c0..af4ad0d12 100644 --- a/src/box/xrow.h +++ b/src/box/xrow.h @@ -226,7 +226,10 @@ xrow_encode_dml(const struct request *request, struct region *region, * pending synchronous transactions. */ struct synchro_request { - /** Operation type - IPROTO_ROLLBACK or IPROTO_CONFIRM. */ + /** + * Operation type - either IPROTO_ROLLBACK or IPROTO_CONFIRM or + * IPROTO_PROMOTE + */ uint32_t type; /** * ID of the instance owning the pending transactions. @@ -236,14 +239,25 @@ struct synchro_request { * finish transactions of an old master. */ uint32_t replica_id; + /** + * Id of the instance which has issued this request. Only filled on + * decoding, and left blank when encoding a request. + */ + uint32_t origin_id; /** * Operation LSN. * In case of CONFIRM it means 'confirm all * transactions with lsn <= this value'. * In case of ROLLBACK it means 'rollback all transactions * with lsn >= this value'. + * In case of PROMOTE it means CONFIRM(lsn) + ROLLBACK(lsn+1) */ int64_t lsn; + /** + * The new term the instance issuing this request is in. Only used for + * PROMOTE request. + */ + uint64_t term; }; /** Synchro request xrow's body in MsgPack format. */ @@ -257,6 +271,14 @@ struct PACKED synchro_body_bin { uint64_t v_lsn; }; +/** PROMOTE request's xrow body in MsgPack format. */ +struct PACKED promote_body_bin { + struct synchro_body_bin base; + uint8_t k_term; + uint8_t m_term; + uint64_t v_term; +}; + /** * Encode synchronous replication request. * @param row xrow header. -- 2.24.3 (Apple Git-128)