[tarantool-patches] Re: [PATCH 2/6] sql: separate VDBE memory holding positive and negative ints

Mon Jul 1 17:21:10 MSK 2019

>> diff --git a/src/box/sql/func.c b/src/box/sql/func.c
>> index bb7405e68..f4c1cbcca 100644
>> --- a/src/box/sql/func.c
>> +++ b/src/box/sql/func.c
>> @@ -111,6 +111,7 @@ typeofFunc(sql_context * context, int NotUsed, sql_value ** argv)
>> 	UNUSED_PARAMETER(NotUsed);
>> 	switch (sql_value_type(argv[0])) {
>> 	case MP_INT:
>> +	case MP_UINT:
> 
> 1. Why on MP_UINT do you return 'integer' instead of
> 'unsigned’?

It is fixed in the next patch (and the rest of built-in functions).

>> 		z = "integer";
>> diff --git a/src/box/sql/vdbe.c b/src/box/sql/vdbe.c
>> index 6ecdb26fc..d141397a0 100644
>> --- a/src/box/sql/vdbe.c
>> +++ b/src/box/sql/vdbe.c
>> @@ -900,8 +904,8 @@ case OP_InitCoroutine: {     /* jump */
>> 	assert(pOp->p3>=0 && pOp->p3<p->nOp);
>> 	pOut = &aMem[pOp->p1];
>> 	assert(!VdbeMemDynamic(pOut));
>> -	pOut->u.i = pOp->p3 - 1;
>> -	pOut->flags = MEM_Int;
>> +	pOut->u.u = pOp->p3 - 1;
> 
> 2. Why are you sure, that p3 >= 1? According to
> the assertion above it is >= 0, but we don't know
> anything about p3 >= 1.

P3 is an address of opcode. OP_Init features 0 address
(always), ergo other instructions have addresses >= 1.

>> @@ -1099,7 +1108,7 @@ case OP_Bool: {         /* out2 */
>> case OP_Int64: {           /* out2 */
>> 	pOut = out2Prerelease(p, pOp);
>> 	assert(pOp->p4.pI64!=0);
>> -	pOut->u.i = *pOp->p4.pI64;
>> +	mem_set_int(pOut, *pOp->p4.pI64, *pOp->p4.pI64 < 0);
> 
> 3. Sorry, but such places are hell. You pass separately a value
> and a flag if it is negative. Please, introduce separate functions
> 
>    /* Set a value, check sign inside. */
>    mem_set_i64(int64_t value);
> 
>    /* Set a non-negative value. */
>    mem_set_u64(uint64_t value);
> 
>    /* Set a big integer with sign passed separately. */
>    mem_set_int(uint64_t value, bool is_negative);
> 
> In most places you will use mem_set_i64 and mem_set_u64. It
> will look shorter and somewhere work faster.

Done. Note that I’ve also rebased onto the fresh master,
so real diff may turn out to be slightly different.

diff --git a/src/box/sql/vdbe.c b/src/box/sql/vdbe.c
index 602b39eb9..6d6300f97 100644
--- a/src/box/sql/vdbe.c
+++ b/src/box/sql/vdbe.c
@@ -313,7 +313,7 @@ mem_apply_type(struct Mem *record, enum field_type type)
                if ((record->flags & MEM_Real) == MEM_Real) {
                        int64_t i = (int64_t) record->u.r;
                        if (i == record->u.r)
-                               mem_set_int(record, i, i < 0);
+                               mem_set_i64(record, i);
                        return 0;
                }
                return sqlVdbeMemIntegerify(record, false);
@@ -898,7 +898,7 @@ case OP_Gosub: {            /* jump */
        pIn1 = &aMem[pOp->p1];
        assert(VdbeMemDynamic(pIn1)==0);
        memAboutToChange(p, pIn1);
-       mem_set_int(pIn1, pOp - aOp, false);
+       mem_set_u64(pIn1, pOp - aOp);
        REGISTER_TRACE(p, pOp->p1, pIn1);
 
        /* Most jump operations do a goto to this spot in order to update
@@ -939,7 +939,7 @@ case OP_InitCoroutine: {     /* jump */
        assert(pOp->p3>=0 && pOp->p3<p->nOp);
        pOut = &aMem[pOp->p1];
        assert(!VdbeMemDynamic(pOut));
-       mem_set_int(pOut, pOp->p3 - 1, false);
+       mem_set_u64(pOut, pOp->p3 - 1);
        if (pOp->p2) goto jump_to_p2;
        break;
 }
@@ -982,7 +982,7 @@ case OP_Yield: {            /* in1, jump */
        pIn1 = &aMem[pOp->p1];
        assert(VdbeMemDynamic(pIn1)==0);
        int pcDest = (int)pIn1->u.u;
-       mem_set_int(pIn1, pOp - aOp, false);
+       mem_set_u64(pIn1, pOp - aOp);
        REGISTER_TRACE(p, pOp->p1, pIn1);
        pOp = &aOp[pcDest];
        break;
@@ -1064,7 +1064,7 @@ case OP_Integer: {         /* out2 */
        if (pOp->p1 < 0)
                pOut->u.i = pOp->p1;
        else
-               mem_set_int(pOut, pOp->p1, false);
+               mem_set_u64(pOut, pOp->p1);
        break;
 }
 
@@ -1089,7 +1089,7 @@ case OP_Bool: {         /* out2 */
 case OP_Int64: {           /* out2 */
        pOut = out2Prerelease(p, pOp);
        assert(pOp->p4.pI64!=0);
-       mem_set_int(pOut, *pOp->p4.pI64, *pOp->p4.pI64 < 0);
+       mem_set_i64(pOut, *pOp->p4.pI64);
        break;
 }
 
@@ -1173,7 +1173,7 @@ case OP_NextAutoincValue: {
                goto abort_due_to_error;
 
        pOut = out2Prerelease(p, pOp);
-       mem_set_int(pOut, value, value < 0);
+       mem_set_i64(pOut, value);
 
        break;
 }
@@ -1605,7 +1605,7 @@ case OP_Remainder: {           /* same as TK_REM, in1, in2, out3 */
                        break;
                }
                }
-               mem_set_int(pOut, iB, iB < 0);
+               mem_set_i64(pOut, iB);
        } else {
                bIntint = 0;
                if (sqlVdbeRealValue(pIn1, &rA) != 0) {
@@ -1870,7 +1870,7 @@ case OP_ShiftRight: {           /* same as TK_RSHIFT, in1, in2, out3 */
                        memcpy(&iA, &uA, sizeof(iA));
                }
        }
-       mem_set_int(pOut, iA, iA < 0);
+       mem_set_i64(pOut, iA);
        break;
 }
 
@@ -2437,7 +2437,7 @@ case OP_BitNot: {             /* same as TK_BITNOT, in1, out2 */
                                 sql_value_text(pIn1), "integer");
                        goto abort_due_to_error;
                }
-               mem_set_int(pOut, ~i, ~i < 0);
+               mem_set_i64(pOut, ~i);
        }
        break;
 }
@@ -2798,7 +2798,7 @@ case OP_Count: {         /* out2 */
                nEntry = tarantoolsqlEphemeralCount(pCrsr);
        }
        pOut = out2Prerelease(p, pOp);
-       mem_set_int(pOut, nEntry, false);
+       mem_set_u64(pOut, nEntry);
        break;
 }
 
@@ -3603,7 +3603,7 @@ case OP_Sequence: {           /* out2 */
        assert(p->apCsr[pOp->p1]!=0);
        pOut = out2Prerelease(p, pOp);
        int64_t seq_val = p->apCsr[pOp->p1]->seqCount++;
-       mem_set_int(pOut, seq_val, false);
+       mem_set_u64(pOut, seq_val);
        break;
 }
 
@@ -3619,8 +3619,7 @@ case OP_NextSequenceId: {
        uint64_t id;
        tarantoolSqlNextSeqId(&id);
        id++;
-       pOut->u.u = id;
-       pOut->flags = MEM_UInt;
+       mem_set_u64(pOut, id);
        break;
 }
 
@@ -3650,7 +3649,7 @@ case OP_NextIdEphemeral: {
                goto abort_due_to_error;
        }
        pOut = &aMem[pOp->p2];
-       mem_set_int(pOut, rowid, false);
+       mem_set_u64(pOut, rowid);
        break;
 }
 
@@ -4904,9 +4903,9 @@ case OP_OffsetLimit: {    /* in1, out2, in3 */
                 * it would take nearly 300 years to actually reach the limit.  So
                 * looping forever is a reasonable approximation.
                 */
-               mem_set_int(pOut, -1, true);
+               mem_set_i64(pOut, -1);
        } else {
-               mem_set_int(pOut, x, false);
+               mem_set_u64(pOut, x);
        }
        break;
 }
diff --git a/src/box/sql/vdbeInt.h b/src/box/sql/vdbeInt.h
index 8df34bb27..b83926a66 100644
--- a/src/box/sql/vdbeInt.h
+++ b/src/box/sql/vdbeInt.h
@@ -470,6 +470,21 @@ mem_set_bool(struct Mem *mem, bool value);
 void
 mem_set_ptr(struct Mem *mem, void *ptr);
 
+/**
+ * Set integer value. Depending on its sign MEM_Int (in case
+ * of negative value) or MEM_UInt flag is set.
+ */
+void
+mem_set_i64(struct Mem *mem, int64_t value);
+
+/** Set unsigned value and MEM_UInt flag. */
+void
+mem_set_u64(struct Mem *mem, uint64_t value);
+
+/**
+ * Set integer value. According to is_neg flag value is considered
+ * to be signed or unsigned.
+ */
 void
 mem_set_int(struct Mem *mem, int64_t value, bool is_neg);
 
diff --git a/src/box/sql/vdbeapi.c b/src/box/sql/vdbeapi.c
index 57bf25ed8..705e869bc 100644
--- a/src/box/sql/vdbeapi.c
+++ b/src/box/sql/vdbeapi.c
@@ -322,7 +322,7 @@ sql_result_double(sql_context * pCtx, double rVal)
 void
 sql_result_int(sql_context * pCtx, int iVal)
 {
-       mem_set_int(pCtx->pOut, iVal, iVal < 0);
+       mem_set_i64(pCtx->pOut, iVal);
 }
 
 void
@@ -334,7 +334,7 @@ sql_result_bool(struct sql_context *ctx, bool value)
 void
 sql_result_int64(sql_context * pCtx, i64 iVal)
 {
-       mem_set_int(pCtx->pOut, iVal, iVal < 0);
+       mem_set_i64(pCtx->pOut, iVal);
 }
 
 void
@@ -998,7 +998,7 @@ sql_bind_int64(sql_stmt * pStmt, int i, sql_int64 iValue)
        if (vdbeUnbind(p, i) != 0)
                return -1;
        int rc = sql_bind_type(p, i, "INTEGER");
-       mem_set_int(&p->aVar[i - 1], iValue, iValue < 0);
+       mem_set_i64(&p->aVar[i - 1], iValue);
        return rc;
 }
 
diff --git a/src/box/sql/vdbeaux.c b/src/box/sql/vdbeaux.c
index 60a4e8669..d5ecd8f43 100644
--- a/src/box/sql/vdbeaux.c
+++ b/src/box/sql/vdbeaux.c
@@ -1430,7 +1430,7 @@ sqlVdbeList(Vdbe * p)
                }
                if (p->explain == 1) {
                        assert(i >= 0);
-                       mem_set_int(pMem, i, false);
+                       mem_set_u64(pMem, i);
 
                        pMem++;
 
@@ -1463,13 +1463,13 @@ sqlVdbeList(Vdbe * p)
                        }
                }
 
-               mem_set_int(pMem, pOp->p1, pOp->p1 < 0);
+               mem_set_i64(pMem, pOp->p1);
                pMem++;
 
-               mem_set_int(pMem, pOp->p2, pOp->p2 < 0);
+               mem_set_i64(pMem, pOp->p2);
                pMem++;
 
-               mem_set_int(pMem, pOp->p3, pOp->p3 < 0);
+               mem_set_i64(pMem, pOp->p3);
                pMem++;
 
                if (sqlVdbeMemClearAndResize(pMem, 256)) {
diff --git a/src/box/sql/vdbemem.c b/src/box/sql/vdbemem.c
index 51d4787f7..5387c6f33 100644
--- a/src/box/sql/vdbemem.c
+++ b/src/box/sql/vdbemem.c
@@ -798,6 +798,25 @@ mem_set_bool(struct Mem *mem, bool value)
        mem->flags = MEM_Bool;
 }
 
+void
+mem_set_i64(struct Mem *mem, int64_t value)
+{
+       if (VdbeMemDynamic(mem))
+               sqlVdbeMemSetNull(mem);
+       mem->u.i = value;
+       int flag = value < 0 ? MEM_Int : MEM_UInt;
+       MemSetTypeFlag(mem, flag);
+}
+
+void
+mem_set_u64(struct Mem *mem, uint64_t value)
+{
+       if (VdbeMemDynamic(mem))
+               sqlVdbeMemSetNull(mem);
+       mem->u.u = value;
+       MemSetTypeFlag(mem, MEM_UInt);
+}
+
 void
 mem_set_int(struct Mem *mem, int64_t value, bool is_neg)
 {
@@ -1376,8 +1395,7 @@ valueFromExpr(sql * db,   /* The database connection */
                if (pVal == 0)
                        goto no_mem;
                if (ExprHasProperty(pExpr, EP_IntValue)) {
-                       mem_set_int(pVal, (i64) pExpr->u.iValue * negInt,
-                                   pExpr->u.iValue * negInt < 0);
+                       mem_set_i64(pVal, (i64) pExpr->u.iValue * negInt);
                } else {
                        zVal =
                            sqlMPrintf(db, "%s%s", zNeg, pExpr->u.zToken);

>> @@ -1609,7 +1617,8 @@ case OP_Remainder: {           /* same as TK_REM, in1, in2, out3 */
>> 	pOut = &aMem[pOp->p3];
>> 	flags = pIn1->flags | pIn2->flags;
>> 	if ((flags & MEM_Null)!=0) goto arithmetic_result_is_null;
>> -	if ((type1 & type2 & MEM_Int)!=0) {
>> +	if ((type1 & (MEM_Int | MEM_UInt)) !=0 &&
>> +	    (type2 & (MEM_Int | MEM_UInt)) !=0) {
>> 		iA = pIn1->u.i;
>> 		iB = pIn2->u.i;
> 
> 4. How can you access pIn1/2->u.i if the types can be MEM_UInt?

There’s no opportunity to operate on values in the range
[INT64_MAX + 1, UINT64_MAX] in scope of this patch.
So, basically it is safe to use values as integers members
of union. It is fixed in the next patch as well as other arithmetic
operations.

>> 		bIntint = 1;
>> @@ -2485,14 +2490,14 @@ case OP_BitNot: {             /* same as TK_BITNOT, in1, out2 */
>> 	sqlVdbeMemSetNull(pOut);
>> 	if ((pIn1->flags & MEM_Null)==0) {
>> 		int64_t i;
>> -		if (sqlVdbeIntValue(pIn1, &i) != 0) {
>> +		bool is_neg;
>> +		if (sqlVdbeIntValue(pIn1, &i, &is_neg) != 0) {
>> 			diag_set(ClientError, ER_SQL_TYPE_MISMATCH,
>> 				 sql_value_text(pIn1), "integer");
>> 			rc = SQL_TARANTOOL_ERROR;
>> 			goto abort_due_to_error;
>> 		}
>> -		pOut->flags = MEM_Int;
>> -		pOut->u.i = ~i;
>> +		mem_set_int(pOut, ~i, ~i < 0);
> 
> 5. Why do you compare 'i' with 0? What if it was a big
> unsigned value?

See answer above: in this patch “i” can’t be “big unsigned value”.

> You have 'is_neg' flag for this, don't you?
> 
>> 	}
>> 	break;
>> }
>> @@ -3507,12 +3515,16 @@ case OP_SeekGT: {       /* jump, in3 */
>> 		 * the seek, so convert it.
>> 		 */
>> 		pIn3 = &aMem[reg_ipk];
>> -		if ((pIn3->flags & (MEM_Int|MEM_Real|MEM_Str))==MEM_Str) {
>> +		if ((pIn3->flags & (MEM_Int|MEM_UInt|MEM_Real|MEM_Str))==MEM_Str) {
> 
> 6. Please, remind me how is it possible, that a value is at the
> same time string and number? Why not to check for flags & MEM_Str != 0
> only?

It definitely could be some time ago (when string was converted to a number).
I do not remember reason of that behaviour. Now it seems that this situation
doesn’t appear anymore, so we really can simplify this check. Fixed by
applying your diff.

>> @@ -3814,10 +3828,10 @@ case OP_Sequence: {           /* out2 */
>>  */
>> case OP_NextSequenceId: {
>> 	pOut = &aMem[pOp->p2];
>> -	tarantoolSqlNextSeqId((uint64_t *) &pOut->u.i);
>> -
>> -	pOut->u.i += 1;
>> -	pOut->flags = MEM_Int;
>> +	int64_t id;
>> +	tarantoolSqlNextSeqId((uint64_t *) &id);
>> +	id++;
>> +	mem_set_int(pOut, id, id < 0);
> 
> 7. tarantoolSqlNextSeqId returns _sequence.id field. The
> field has unsigned type. How is it possible, that it is < 0?

No way. Fixed:

diff --git a/src/box/sql/vdbe.c b/src/box/sql/vdbe.c
index 7e7c12ee4..47f558011 100644
--- a/src/box/sql/vdbe.c
+++ b/src/box/sql/vdbe.c
@@ -3828,10 +3828,11 @@ case OP_Sequence: {           /* out2 */
  */
 case OP_NextSequenceId: {
        pOut = &aMem[pOp->p2];
-       int64_t id;
-       tarantoolSqlNextSeqId((uint64_t *) &id);
+       uint64_t id;
+       tarantoolSqlNextSeqId(&id);
        id++;
-       mem_set_int(pOut, id, id < 0);
+      mem_set_u64(pOut, id);
        break;
 }

>> @@ -5079,10 +5092,17 @@ case OP_FkIfZero: {         /* jump */
>>  */
>> case OP_IfPos: {        /* jump, in1 */
>> 	pIn1 = &aMem[pOp->p1];
>> -	assert(pIn1->flags&MEM_Int);
>> -	VdbeBranchTaken( pIn1->u.i>0, 2);
>> -	if (pIn1->u.i>0) {
>> -		pIn1->u.i -= pOp->p3;
>> +	assert((pIn1->flags & (MEM_Int | MEM_UInt)) != 0);
>> +	if ((pIn1->flags & MEM_UInt) != 0 && pIn1->u.u != 0) {
>> +		assert(pOp->p3 >= 0);
>> +		uint64_t res = pIn1->u.u - (uint64_t) pOp->p3;
>> +		/*
>> +		 * To not bother setting integer flag in case
>> +		 * result of subtraction is negative, just
>> +		 * use saturated arithmetic.
>> +		 */
>> +		res &= -(res <= pIn1->u.u);
> 
> 8. I do not understand. Why do you compare res and pIn1->u.u?
> You compare 'u.u' and 'u.u - p3'. Obvioiusly, the latter is
> smaller. Probably, this is because you rely on 'res' being
> overflowed will be bigger than 'u.u’?

Yep.

> 
> In such a case, I think it would be cleaner to compare 'u.u' and
> 'p3'.
> 
>    res &= -(u.u > p3);

Sorry, can’t understand what you mean or how it
should be implemented. This is common way to
implement saturated subtraction.

> Additionally, now this opcode's comment is wrong - P1 is not equal
> to P1 - P3 from this moment.

Ok, fixed a bit comment:

diff --git a/src/box/sql/vdbe.c b/src/box/sql/vdbe.c
index b93e12b9a..fcc3b250c 100644
--- a/src/box/sql/vdbe.c
+++ b/src/box/sql/vdbe.c
@@ -5115,7 +5115,8 @@ case OP_FkIfZero: {         /* jump */
  *
  * Register P1 must contain an integer.
  * If the value of register P1 is 1 or greater, subtract P3 from the
- * value in P1 and jump to P2.
+ * value in P1 and jump to P2. If the result of subtraction is
+ * negative, then register P1 will hold 0.
  *
  * If the initial value of register P1 is less than 1, then the
  * value is unchanged and control passes through to the next instruction.

>> @@ -5111,10 +5131,10 @@ case OP_OffsetLimit: {    /* in1, out2, in3 */
>> 	pIn1 = &aMem[pOp->p1];
>> 	pIn3 = &aMem[pOp->p3];
>> 	pOut = out2Prerelease(p, pOp);
>> -	assert(pIn1->flags & MEM_Int);
>> -	assert(pIn3->flags & MEM_Int);
>> +	assert(pIn1->flags & (MEM_Int | MEM_UInt));
>> +	assert(pIn3->flags & (MEM_Int | MEM_UInt));
>> 	x = pIn1->u.i;
>> -	if (x<=0 || sqlAddInt64(&x, pIn3->u.i>0?pIn3->u.i:0)) {
>> +	if (x<=0 || sqlAddInt64(&x, pIn3->u.i > 0 ? pIn3->u.i : 0)) {
> 
> 9. Why do you touch pIn3.u.i without checking its type? In case it
> is a big unsigned, you will get 'u.i < 0'. Additionally, as I understand,
> now no one should treat 'u.i' as an unsigned. It is supposed to be
> negative always.

Due to the reason I’ve already mentioned: in this patch there’s
no chance that values in range [INT64_MAX + 1, …] get to VDBE.

>> 		/* If the LIMIT is less than or equal to zero, loop forever.  This
>> 		 * is documented.  But also, if the LIMIT+OFFSET exceeds 2^63 then
>> 		 * also loop forever.  This is undocumented.  In fact, one could argue
>> @@ -5124,8 +5144,9 @@ case OP_OffsetLimit: {    /* in1, out2, in3 */
>> 		 * looping forever is a reasonable approximation.
>> 		 */
>> 		pOut->u.i = -1;
>> +		pOut->flags = MEM_Int;
>> 	} else {
>> -		pOut->u.i = x;
>> +		mem_set_int(pOut, x, x < 0);
> 
> 10. Here 'x' is always >= 0.

Fixed:

@@ -5178,7 +5179,7 @@ case OP_OffsetLimit: {    /* in1, out2, in3 */
                 */
                mem_set_int(pOut, -1, true);
        } else {
-               mem_set_int(pOut, x, x < 0);
+               mem_set_u64(pOut, x,);
        }
        break;
 }

>> @@ -5140,7 +5161,7 @@ case OP_OffsetLimit: {    /* in1, out2, in3 */
>>  */
>> case OP_IfNotZero: {        /* jump, in1 */
>> 	pIn1 = &aMem[pOp->p1];
>> -	assert(pIn1->flags&MEM_Int);
>> +	assert(pIn1->flags&(MEM_Int|MEM_UInt));
>> 	VdbeBranchTaken(pIn1->u.i<0, 2);
>> 	if (pIn1->u.i) {
>> 		if (pIn1->u.i>0) pIn1->u.i--;
> 
> 11. You can't treat 'u.i' as a positive value without
> checking the flags.

P1 is LIMIT counter (according to the only usage of this opcode)
and it can be only positive value. It is checked in
computeLimitRegisters(). I’ve fixed an assertion:

diff --git a/src/box/sql/vdbe.c b/src/box/sql/vdbe.c
index a08194178..f4d311927 100644
--- a/src/box/sql/vdbe.c
+++ b/src/box/sql/vdbe.c
@@ -5149,7 +5149,7 @@ case OP_OffsetLimit: {    /* in1, out2, in3 */
  */
 case OP_IfNotZero: {        /* jump, in1 */
        pIn1 = &aMem[pOp->p1];
-       assert((pIn1->flags & (MEM_Int | MEM_UInt)) != 0);
+       assert((pIn1->flags & MEM_UInt) != 0);
-       VdbeBranchTaken(pIn1->u.i<0, 2);
-       if (pIn1->u.i) {
-               if (pIn1->u.i>0) pIn1->u.i--;
+       VdbeBranchTaken(pIn1->u.u<0, 2);
+       if (pIn1->u.u) {
+               if (pIn1->u.u > 0) pIn1->u.u--;

>> @@ -5157,7 +5178,7 @@ case OP_IfNotZero: {        /* jump, in1 */
>>  */
>> case OP_DecrJumpZero: {      /* jump, in1 */
>> 	pIn1 = &aMem[pOp->p1];
>> -	assert(pIn1->flags&MEM_Int);
>> +	assert((pIn1->flags&(MEM_Int|MEM_UInt)) != 0);
>> 	if (pIn1->u.i>SMALLEST_INT64) pIn1->u.i--;
> 
> 12. The same.

The same is here: P1 is the LIMIT counter, so I’ve fixed
an assertion:

@@ -5166,7 +5166,7 @@ case OP_IfNotZero: {        /* jump, in1 */
  */
 case OP_DecrJumpZero: {      /* jump, in1 */
        pIn1 = &aMem[pOp->p1];
-       assert((pIn1->flags & (MEM_Int | MEM_UInt)) != 0);
+       assert((pIn1->flags & MEM_UInt) != 0);
-       if (pIn1->u.i>SMALLEST_INT64) pIn1->u.i--;
+       if (pIn1->u.i>SMALLEST_INT64) pIn1->u.u--;
        VdbeBranchTaken(pIn1->u.i==0, 2);
-       if (pIn1->u.i==0) goto jump_to_p2;
+       if (pIn1->u.u == 0) goto jump_to_p2;
        break;
 }

>> @@ -5396,12 +5417,12 @@ case OP_Init: {          /* jump */
>> case OP_IncMaxid: {
>> 	assert(pOp->p1 > 0);
>> 	pOut = &aMem[pOp->p1];
>> -
>> -	rc = tarantoolsqlIncrementMaxid((uint64_t*) &pOut->u.i);
>> +	int64_t id;
>> +	rc = tarantoolsqlIncrementMaxid((uint64_t*) &id);
> 
> 13. Id is always non-negative.

Fixed:

@@ -5417,12 +5418,13 @@ case OP_Init: {          /* jump */
 case OP_IncMaxid: {
        assert(pOp->p1 > 0);
        pOut = &aMem[pOp->p1];
-       int64_t id;
-       rc = tarantoolsqlIncrementMaxid((uint64_t*) &id);
+       uint64_t id;
+       rc = tarantoolsqlIncrementMaxid(&id);
        if (rc!=SQL_OK) {
                goto abort_due_to_error;
        }
-       mem_set_int(pOut, id, id < 0);
+      mem_set_u64(pOut, id);
        break;
 }

>> diff --git a/src/box/sql/vdbeInt.h b/src/box/sql/vdbeInt.h
>> index a3100e513..abed46486 100644
>> --- a/src/box/sql/vdbeInt.h
>> +++ b/src/box/sql/vdbeInt.h
>> @@ -259,9 +261,14 @@ struct Mem {
>>  * auxiliary flags.
>>  */
>> enum {
>> -	MEM_PURE_TYPE_MASK = 0x3f
>> +	MEM_PURE_TYPE_MASK = 0x7f
>> };
>> 
>> +static_assert((int) MEM_PURE_TYPE_MASK == (MEM_Null | MEM_Str | MEM_Int |
> 
> 14. Why do you need the cast to 'int’?

Well, in fact I don’t need to do that. Fixed by applying your diff.

> 
>> +					   MEM_Real | MEM_Blob | MEM_Bool |
>> +					   MEM_UInt),
>> +	      "value of type mask must consist of corresponding to memory type bits");
> 
> 15. Out of 80.

Same.

>> diff --git a/src/box/sql/vdbeapi.c b/src/box/sql/vdbeapi.c
>> index e480ae720..393782c23 100644
>> --- a/src/box/sql/vdbeapi.c
>> +++ b/src/box/sql/vdbeapi.c
>> @@ -233,7 +234,8 @@ sql_int64
>> sql_value_int64(sql_value * pVal)
>> {
>> 	int64_t i = 0;
>> -	sqlVdbeIntValue((Mem *) pVal, &i);
>> +	bool is_neg;
>> +	sqlVdbeIntValue((Mem *) pVal, &i, &is_neg);
>> 	return i;
> 
> 16. The result integer is invalid in case it was a
> big unsigned value.

In the next patches I’m introducing sql_value_uint(),
so it’s OK. However, I’d like to rename is_neg to unused
to underline that the usage is correct:

diff --git a/src/box/sql/vdbeapi.c b/src/box/sql/vdbeapi.c
index 3c0a9cc59..5ba7981ec 100644
--- a/src/box/sql/vdbeapi.c
+++ b/src/box/sql/vdbeapi.c
@@ -234,8 +234,8 @@ sql_int64
 sql_value_int64(sql_value * pVal)
 {
        int64_t i = 0;
-       bool is_neg;
-       sqlVdbeIntValue((Mem *) pVal, &i, &is_neg);
+       bool unused;
+       sqlVdbeIntValue((Mem *) pVal, &i, &unused);
        return i;
 }

>> @@ -385,7 +388,7 @@ sql_result_error(sql_context * pCtx, const char *z, int n)
>> void
>> sql_result_int(sql_context * pCtx, int iVal)
>> {
>> -	sqlVdbeMemSetInt64(pCtx->pOut, (i64) iVal);
>> +	mem_set_int(pCtx->pOut, (i64) iVal, iVal < 0);
> 
> 17. int to int64 cast works implicitly. You don't need an
> explicit cast.

Ok, fixed in your diff.

>> diff --git a/src/box/sql/vdbeaux.c b/src/box/sql/vdbeaux.c
>> index 25d4cd759..5a71e1801 100644
>> --- a/src/box/sql/vdbeaux.c
>> +++ b/src/box/sql/vdbeaux.c
>> @@ -1639,6 +1641,8 @@ sqlVdbeList(Vdbe * p)
>> 		if (p->explain == 1) {
>> 			pMem->flags = MEM_Int;
>> 			pMem->u.i = i;	/* Program counter */
>> +			mem_set_int(pMem, i, i < 0);
>> +
>> 			pMem++;
> 
> 18. Stray empty line + you do mem_set_int() twice - once you
> call the function, and second you set flag and value manually.
> Besides, program counter is always > 0.

Fixed:

diff --git a/src/box/sql/vdbeaux.c b/src/box/sql/vdbeaux.c
index 5a71e1801..de5fa5047 100644
--- a/src/box/sql/vdbeaux.c
+++ b/src/box/sql/vdbeaux.c
@@ -1639,9 +1639,8 @@ sqlVdbeList(Vdbe * p)
                        pOp = &apSub[j]->aOp[i];
                }
                if (p->explain == 1) {
-                       pMem->flags = MEM_Int;
-                       pMem->u.i = i;  /* Program counter */
-                       mem_set_int(pMem, i, i < 0);
+                       assert(i >= 0);
+                       mem_set_int(pMem, i, false);
 
                        pMem++;

>> @@ -3395,10 +3403,23 @@ sqlMemCompare(const Mem * pMem1, const Mem * pMem2, const struct coll * pColl)
>> 				return -1;
>> 			}
>> 		}
>> +		if ((f1 & MEM_UInt) != 0) {
>> +			if ((f2 & MEM_Real) != 0) {
>> +				return sqlIntFloatCompare(pMem1->u.i,
>> +							  pMem2->u.r);
>> +			} else if ((f2 & MEM_Int) != 0) {
>> +				return +1;
>> +			} else {
>> +				return -1;
>> +			}
>> +		}
>> 		if ((f1 & MEM_Real) != 0) {
>> 			if ((f2 & MEM_Int) != 0) {
>> 				return -sqlIntFloatCompare(pMem2->u.i,
>> 							       pMem1->u.r);
>> +			} else if ((f2 & MEM_UInt) != 0) {
>> +				return -sqlIntFloatCompare(pMem2->u.u,
> 
> 19. You treat 'u.u' here as int64_t. Why?

Same reason and fixed in the next patch.

>> @@ -3563,13 +3584,14 @@ sqlVdbeCompareMsgpack(const char **key1,
>> 				mem1.u.r = (double)v;
>> 				goto do_float;
>> 			}
>> -			mem1.u.i = v;
>> +			mem1.u.u = v;
>> 			goto do_int;
>> 		}
>> 	case MP_INT:{
>> 			mem1.u.i = mp_decode_int(&aKey1);
>>  do_int:
>> -			if (pKey2->flags & MEM_Int) {
>> +			if ((pKey2->flags & MEM_Int) ||
>> +			    (pKey2->flags & MEM_UInt)) {
>> 				if (mem1.u.i < pKey2->u.i) {
> 
> 20. How can you touch pKey2->u.i, if you don't know
> if it is MEM_Int or MEM_UInt? The only thing you know is
> that it is an integer. You don't know which.

It is fixed in the next patch.

>> diff --git a/src/box/sql/vdbemem.c b/src/box/sql/vdbemem.c
>> index 3a361d066..25119ff16 100644
>> --- a/src/box/sql/vdbemem.c
>> +++ b/src/box/sql/vdbemem.c
>> @@ -418,7 +422,7 @@ sqlVdbeMemRelease(Mem * p)
>>  * return the closest available 64-bit signed integer.
>>  */
>> static int
>> -doubleToInt64(double r, int64_t *i)
>> +doubleToInt64(double r, int64_t *i, bool *is_neg)
> 
> 21. I understand 'is_neg' in the most of other places,
> but why here? In a caller function you can always check
> 'r < 0' if necessary.

Fair, removed:

diff --git a/src/box/sql/vdbemem.c b/src/box/sql/vdbemem.c
index 48cb556e9..a8ac9c806 100644
--- a/src/box/sql/vdbemem.c
+++ b/src/box/sql/vdbemem.c
@@ -422,7 +422,7 @@ sqlVdbeMemRelease(Mem * p)
  * return the closest available 64-bit signed integer.
  */
 static int
-doubleToInt64(double r, int64_t *i, bool *is_neg)
+doubleToInt64(double r, int64_t *i)
 {
        /*
         * Many compilers we encounter do not define constants for the
@@ -433,7 +433,6 @@ doubleToInt64(double r, int64_t *i, bool *is_neg)
         */
        static const int64_t maxInt = LARGEST_INT64;
        static const int64_t minInt = SMALLEST_INT64;
-       *is_neg = r < 0.f;
        if (r <= (double)minInt) {
                *i = minInt;
                return -1;
@@ -472,7 +471,9 @@ sqlVdbeIntValue(Mem * pMem, int64_t *i, bool *is_neg)
                *is_neg = false;
                return 0;
        } else if (flags & MEM_Real) {
-               return doubleToInt64(pMem->u.r, i, is_neg);
+               *is_neg = pMem->u.r < 0;
+               return doubleToInt64(pMem->u.r, i);
        } else if (flags & (MEM_Str)) {
                assert(pMem->z || pMem->n == 0);
                if (sql_atoi64(pMem->z, i, is_neg, pMem->n) == 0)
@@ -500,8 +501,7 @@ sqlVdbeRealValue(Mem * pMem, double *v)
        } else if (pMem->flags & MEM_UInt) {
                *v = (double)pMem->u.u;
                return 0;
-       }
-       else if (pMem->flags & MEM_Str) {
+       } else if (pMem->flags & MEM_Str) {
                if (sqlAtoF(pMem->z, v, pMem->n))
                        return 0;
        }
@@ -530,9 +530,8 @@ mem_apply_integer_type(Mem *pMem)
        assert(pMem->flags & MEM_Real);
        assert(EIGHT_BYTE_ALIGNMENT(pMem));
 
-       bool is_neg;
-       if ((rc = doubleToInt64(pMem->u.r, (int64_t *) &ix, &is_neg)) == 0)
-               mem_set_int(pMem, ix, is_neg);
+       if ((rc = doubleToInt64(pMem->u.r, (int64_t *) &ix)) == 0)
+               mem_set_int(pMem, ix, pMem->u.r <= -1);
        return rc;
 }
 
>> @@ -454,20 +458,24 @@ doubleToInt64(double r, int64_t *i)
>>  * If pMem represents a string value, its encoding might be changed.
>>  */
>> int
>> -sqlVdbeIntValue(Mem * pMem, int64_t *i)
>> +sqlVdbeIntValue(Mem * pMem, int64_t *i, bool *is_neg)
>> {
>> 	int flags;
>> 	assert(EIGHT_BYTE_ALIGNMENT(pMem));
>> 	flags = pMem->flags;
>> 	if (flags & MEM_Int) {
>> 		*i = pMem->u.i;
>> +		*is_neg = true;
>> +		return 0;
>> +	} else if (flags & MEM_UInt) {
>> +		*i = pMem->u.u;
>> +		*is_neg = false;
>> 		return 0;
>> 	} else if (flags & MEM_Real) {
>> -		return doubleToInt64(pMem->u.r, i);
>> +		return doubleToInt64(pMem->u.r, i, is_neg);
>> 	} else if (flags & (MEM_Str)) {
>> 		assert(pMem->z || pMem->n == 0);
>> -		bool is_neg;
>> -		if (sql_atoi64(pMem->z, (int64_t *)i, &is_neg, pMem->n) == 0)
>> +		if (sql_atoi64(pMem->z, (int64_t *)i, is_neg, pMem->n) == 0)
> 
> 22. I do not understand these casts. 'i' is already int64_t *. Why do
> you cast it?

Indeed, there’s no need to cast here. Removed in the previous patch.

>> @@ -489,7 +497,11 @@ sqlVdbeRealValue(Mem * pMem, double *v)
>> 	} else if (pMem->flags & MEM_Int) {
>> 		*v = (double)pMem->u.i;
>> 		return 0;
>> -	} else if (pMem->flags & MEM_Str) {
>> +	} else if (pMem->flags & MEM_UInt) {
>> +		*v = (double)pMem->u.u;
>> +		return 0;
>> +	}
>> +	else if (pMem->flags & MEM_Str) {
> 
> 23. Stray line wrap.

Fixed:

diff --git a/src/box/sql/vdbemem.c b/src/box/sql/vdbemem.c
index 48cb556e9..241a8f78a 100644
--- a/src/box/sql/vdbemem.c
+++ b/src/box/sql/vdbemem.c
@@ -500,8 +500,7 @@ sqlVdbeRealValue(Mem * pMem, double *v)
        } else if (pMem->flags & MEM_UInt) {
                *v = (double)pMem->u.u;
                return 0;
-       }
-       else if (pMem->flags & MEM_Str) {
+       } else if (pMem->flags & MEM_Str) {
                if (sqlAtoF(pMem->z, v, pMem->n))
                        return 0;
        }

>> @@ -777,42 +792,29 @@ sqlVdbeMemSetZeroBlob(Mem * pMem, int n)
>> 	pMem->z = 0;
>> }
>> 
>> -/*
>> - * The pMem is known to contain content that needs to be destroyed prior
>> - * to a value change.  So invoke the destructor, then set the value to
>> - * a 64-bit integer.
>> - */
>> -static SQL_NOINLINE void
>> -vdbeReleaseAndSetInt64(Mem * pMem, i64 val)
>> +void
>> +mem_set_bool(struct Mem *mem, bool value)
> 
> 24. Please, move this function back below mem_set_int to
> avoid diff padding out.

But it is not used anymore, so compilation would
result in error.

> 25. I was confused by several places where you use Mem.u.i not
> as a negative integer, but as a buffer of both integers. To avoid
> further confusion I think we should add another alias to Mem.u -
> 'ib' - integer buffer. It should be uint64_t, but due to its name
> it will be clear that in places of its usage we just want to take
> raw bytes of the stored integer, separately from its sign.
> 
> Besides, in a couple of places 'ib' usage will be faster.

Sounds extremely dubious.

> 1) sqlVdbeMemCast - here the only thing which matters if 'ib' is 0.
> Now you check it twice - for 'u.u' and 'u.i'.
> 
> 2) OP_SeekGT, here:
> 
> 		if ((pIn3->flags & MEM_Int) == MEM_Int) {
> 			i = pIn3->u.i;
> 			is_neg = true;
> 		} else if ((pIn3->flags & MEM_UInt) == MEM_UInt) {
> 			i = pIn3->u.u;
> 			is_neg = false;
> 
> In this place you could write 'i = pIn3->u.ib; is_neg = flags == MEM_Int;’.

I don’t see difference between “i” and “ib”. IMHO it would only
complicate code maintenance.

> Please, consider my review fixes here and on the branch in a separate commit:

Applied.