[Tarantool-patches] [PATCH v4 09/16] box: split promote() into reasonable parts

Sergey Petrenko sergepetrenko at tarantool.org
Fri Jul 23 10:45:03 MSK 2021


22.07.2021 01:26, Vladislav Shpilevoy пишет:
> Thanks for working on this!
>
> See 3 comments below.

Thanks for the review!


>> diff --git a/src/box/box.cc b/src/box/box.cc
>> index 86370514a..445875f8f 100644
>> --- a/src/box/box.cc
>> +++ b/src/box/box.cc
>> @@ -1527,6 +1527,147 @@ box_wait_quorum(uint32_t lead_id, int64_t target_lsn, int quorum,
> <...>
>
>> +
>> +/**
>> + * Check whether the greatest promote term has changed since it was last read.
>> + * IOW check that a foreign PROMOTE arrived while we were sleeping.
>> + */
>> +static int
>> +box_check_promote_term_changed(uint64_t promote_term)
> 1. Normally you call check functions using the pattern
> "check_something_correct". Here the correct behaviour is the term
> being intact. So I propose to rename it to box_check_promote_term_intact.


Ok, sure.


>> +{
>> +	if (txn_limbo.promote_greatest_term != promote_term) {
>> +		diag_set(ClientError, ER_INTERFERING_PROMOTE,
>> +			 txn_limbo.owner_id);
>> +		return -1;
>> +	}
>> +	return 0;
>> +}
> <...>
>
>> +
>> +/**
>> + * A helper to wait until all limbo entries are ready to be confirmed, i.e.
>> + * written to WAL and have gathered a quorum of ACKs from replicas.
>> + * Return lsn of the last limbo entry on success, -1 on error.
>> + */
>> +static int64_t
>> +box_wait_limbo_acked(void)
>> +{
>> +	if (txn_limbo_is_empty(&txn_limbo))
>> +		return txn_limbo.confirmed_lsn;
>> +
>> +	uint64_t promote_term = txn_limbo.promote_greatest_term;
>> +	int quorum = replication_synchro_quorum;
>> +	struct txn_limbo_entry *last_entry;
>> +	last_entry = txn_limbo_last_synchro_entry(&txn_limbo);
>> +	/* Wait for the last entries WAL write. */
>> +	if (last_entry->lsn < 0) {
>> +		int64_t tid = last_entry->txn->id;
>> +
>> +		if (wal_sync(NULL) < 0)
>> +			return -1;
>> +
>> +		if (box_check_promote_term_changed(promote_term) < 0)
> 2. Why < 0? It is not a in the code guidelines, but don't we usually
> use '!= 0'? '< 0' normally assumes you can get > 0, 0, and < 0 meaning
> different things, like it is done in iproto occassionally.


I've put '< 0' here without a second thought.

I'm just used to if (smth() < 0) { err; }, I guess.

AFAICS there are more places where we use if (rc != 0) { err;} more,

so I'll change my code accordingly.


>> +			return -1;
>> +		if (txn_limbo_is_empty(&txn_limbo))
>> +			return txn_limbo.confirmed_lsn;
>> +		if (tid != txn_limbo_last_synchro_entry(&txn_limbo)->txn->id) {
>> +			diag_set(ClientError, ER_QUORUM_WAIT, quorum,
>> +				 "new synchronous transactions appeared");
>> +			return -1;
>> +		}
>> +	}
> <...>
>
>> +
>> +/** Write and process a PROMOTE request. */
>> +static void
>> +box_issue_promote(uint32_t prev_leader_id, int64_t promote_lsn)
>> +{
>> +	assert(box_raft()->volatile_term == box_raft()->term);
>> +	assert(promote_lsn >= 0);
>> +	txn_limbo_write_promote(&txn_limbo, promote_lsn,
>> +				box_raft()->term);
> 3. Maybe cache box_raft() value in a variable? Its usage would look shorter
> then. The same in other places where it is used more than once. Up to
> you.

Done.

Incremental diff:

=========================

diff --git a/src/box/box.cc b/src/box/box.cc
index 341857267..d83c30918 100644
--- a/src/box/box.cc
+++ b/src/box/box.cc
@@ -1563,7 +1563,7 @@ box_run_elections(void)
   * IOW check that a foreign PROMOTE arrived while we were sleeping.
   */
  static int
-box_check_promote_term_changed(uint64_t promote_term)
+box_check_promote_term_intact(uint64_t promote_term)
  {
      if (txn_limbo.promote_greatest_term != promote_term) {
          diag_set(ClientError, ER_INTERFERING_PROMOTE,
@@ -1579,7 +1579,7 @@ box_try_wait_confirm(double timeout)
  {
      uint64_t promote_term = txn_limbo.promote_greatest_term;
      txn_limbo_wait_empty(&txn_limbo, timeout);
-    return box_check_promote_term_changed(promote_term);
+    return box_check_promote_term_intact(promote_term);
  }

  /**
@@ -1604,7 +1604,7 @@ box_wait_limbo_acked(void)
          if (wal_sync(NULL) < 0)
              return -1;

-        if (box_check_promote_term_changed(promote_term) < 0)
+        if (box_check_promote_term_intact(promote_term) != 0)
              return -1;
          if (txn_limbo_is_empty(&txn_limbo))
              return txn_limbo.confirmed_lsn;
@@ -1618,10 +1618,10 @@ box_wait_limbo_acked(void)
      int64_t wait_lsn = last_entry->lsn;

      if (box_wait_quorum(txn_limbo.owner_id, wait_lsn, quorum,
-                replication_synchro_timeout) < 0)
+                replication_synchro_timeout) != 0)
          return -1;

-    if (box_check_promote_term_changed(promote_term) < 0)
+    if (box_check_promote_term_intact(promote_term) != 0)
          return -1;

      if (txn_limbo_is_empty(&txn_limbo))
@@ -1722,10 +1722,10 @@ box_promote(void)

      int64_t wait_lsn = -1;

-    if (run_elections && box_run_elections() < 0)
+    if (run_elections && box_run_elections() != 0)
          return -1;
      if (try_wait &&
-        box_try_wait_confirm(2 * replication_synchro_timeout) < 0)
+        box_try_wait_confirm(2 * replication_synchro_timeout) != 0)
          return -1;
      if ((wait_lsn = box_wait_limbo_acked()) < 0)
          return -1;

=========================

>> +	struct synchro_request req = {
>> +		.type = IPROTO_PROMOTE,
>> +		.replica_id = prev_leader_id,
>> +		.origin_id = instance_id,
>> +		.lsn = promote_lsn,
>> +		.term = box_raft()->term,
>> +	};
>> +	txn_limbo_process(&txn_limbo, &req);
>> +	assert(txn_limbo_is_empty(&txn_limbo));
>> +}


More information about the Tarantool-patches mailing list