[Tarantool-patches] [PATCH luajit 2/2] Avoid negation of signed integers in C that may hold INT*_MIN.

Tarantool development patches archive
 help / color / mirror / Atom feed

From: Sergey Kaplun via Tarantool-patches <tarantool-patches@dev.tarantool.org>
To: Maxim Kokryashkin <m.kokryashkin@tarantool.org>,
	Sergey Bronnikov <sergeyb@tarantool.org>
Cc: tarantool-patches@dev.tarantool.org
Subject: [Tarantool-patches] [PATCH luajit 2/2] Avoid negation of signed integers in C that may hold INT*_MIN.
Date: Tue, 25 Jun 2024 18:54:25 +0300	[thread overview]
Message-ID: <e586c7e8418c500e190b330529d51fac32fa6df5.1719329795.git.skaplun@tarantool.org> (raw)
In-Reply-To: <cover.1719329795.git.skaplun@tarantool.org>

From: Mike Pall <mike>

Reported by minoki.
Recent C compilers 'take advantage' of the undefined behavior.
This completely changes the meaning of expressions like (k == -k).

(cherry picked from commit 8a5e398c52c7f8ca3e1a0e574cc2ba38224b759b)

This patch changes all possibly dangerous -x operations on integers to
the corresponding two's complement. Also, it removes all related UBSAN
suppressions, since they are fixed.

Also, this patch limits the `bit.tohex()` result by 254 characters.

There is no testcase for `strscan_oct()`, `strscan_dec()` or/and
`STRSCAN_U32` format since first the unary minus is parsed first and
only after the number itself is parsed during parsing C syntax. So the
error is raised in `cp_expr_prefix()` instead. For parsing the exponent
header, there is no testcase, since the power is limited by
`STRSCAN_MAXEXP`.

Sergey Kaplun:
* added the description and the test for the problem

Part of tarantool/tarantool#9924
Relates to tarantool/tarantool#8473
---
 src/lib_base.c                                |   2 +-
 src/lib_bit.c                                 |   3 +-
 src/lj_asm_mips.h                             |   2 +-
 src/lj_carith.c                               |   7 +-
 src/lj_cparse.c                               |   2 +-
 src/lj_crecord.c                              |   3 +-
 src/lj_ctype.c                                |   2 +-
 src/lj_emit_arm.h                             |   2 +-
 src/lj_emit_arm64.h                           |   9 +-
 src/lj_obj.h                                  |   2 +-
 src/lj_opt_fold.c                             |   6 +-
 src/lj_parse.c                                |  17 +--
 src/lj_strfmt.c                               |   9 +-
 src/lj_strscan.c                              |  26 ++--
 src/lj_vmmath.c                               |   6 +-
 .../lj-928-int-min-negation.test.lua          | 121 ++++++++++++++++++
 16 files changed, 164 insertions(+), 55 deletions(-)
 create mode 100644 test/tarantool-tests/lj-928-int-min-negation.test.lua

diff --git a/src/lib_base.c b/src/lib_base.c
index eb6da054..ad151975 100644
--- a/src/lib_base.c
+++ b/src/lib_base.c
@@ -301,7 +301,7 @@ LJLIB_ASM(tonumber)		LJLIB_REC(.)
 	while (lj_char_isspace((unsigned char)(*ep))) ep++;
 	if (*ep == '\0') {
 	  if (LJ_DUALNUM && LJ_LIKELY(ul < 0x80000000u+neg)) {
-	    if (neg) ul = -ul;
+	    if (neg) ul = ~ul+1u;
 	    setintV(L->base-1-LJ_FR2, (int32_t)ul);
 	  } else {
 	    lua_Number n = (lua_Number)ul;
diff --git a/src/lib_bit.c b/src/lib_bit.c
index c979a448..6dbaf351 100644
--- a/src/lib_bit.c
+++ b/src/lib_bit.c
@@ -155,7 +155,8 @@ LJLIB_CF(bit_tohex)		LJLIB_REC(.)
 #endif
   SBuf *sb = lj_buf_tmp_(L);
   SFormat sf = (STRFMT_UINT|STRFMT_T_HEX);
-  if (n < 0) { n = -n; sf |= STRFMT_F_UPPER; }
+  if (n < 0) { n = (int32_t)(~(uint32_t)n+1u); sf |= STRFMT_F_UPPER; }
+  if ((uint32_t)n > 254) n = 254;
   sf |= ((SFormat)((n+1)&255) << STRFMT_SH_PREC);
 #if LJ_HASFFI
   if (n < 16) b &= ((uint64_t)1 << 4*n)-1;
diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h
index 597c6d62..3aed0da3 100644
--- a/src/lj_asm_mips.h
+++ b/src/lj_asm_mips.h
@@ -1843,7 +1843,7 @@ static void asm_arithov(ASMState *as, IRIns *ir)
   lj_assertA(!irt_is64(ir->t), "bad usage");
   if (irref_isk(ir->op2)) {
     int k = IR(ir->op2)->i;
-    if (ir->o == IR_SUBOV) k = -k;
+    if (ir->o == IR_SUBOV) k = (int)(~(unsigned int)k+1u);
     if (checki16(k)) {  /* (dest < left) == (k >= 0 ? 1 : 0) */
       left = ra_alloc1(as, ir->op1, RSET_GPR);
       asm_guard(as, k >= 0 ? MIPSI_BNE : MIPSI_BEQ, RID_TMP, RID_ZERO);
diff --git a/src/lj_carith.c b/src/lj_carith.c
index 1d9d6fe1..90b3220f 100644
--- a/src/lj_carith.c
+++ b/src/lj_carith.c
@@ -159,11 +159,6 @@ static int carith_ptr(lua_State *L, CTState *cts, CDArith *ca, MMS mm)
 }
 
 /* 64 bit integer arithmetic. */
-#if LUAJIT_USE_UBSAN
-/* See https://github.com/LuaJIT/LuaJIT/issues/928. */
-static int carith_int64(lua_State *L, CTState *cts, CDArith *ca, MMS mm)
-  __attribute__((no_sanitize("signed-integer-overflow")));
-#endif
 static int carith_int64(lua_State *L, CTState *cts, CDArith *ca, MMS mm)
 {
   if (ctype_isnum(ca->ct[0]->info) && ca->ct[0]->size <= 8 &&
@@ -216,7 +211,7 @@ static int carith_int64(lua_State *L, CTState *cts, CDArith *ca, MMS mm)
       else
 	*up = lj_carith_powu64(u0, u1);
       break;
-    case MM_unm: *up = (uint64_t)-(int64_t)u0; break;
+    case MM_unm: *up = ~u0+1u; break;
     default:
       lj_assertL(0, "bad metamethod %d", mm);
       break;
diff --git a/src/lj_cparse.c b/src/lj_cparse.c
index 8506d719..9f3b032a 100644
--- a/src/lj_cparse.c
+++ b/src/lj_cparse.c
@@ -488,7 +488,7 @@ static void cp_expr_prefix(CPState *cp, CPValue *k)
   } else if (cp_opt(cp, '+')) {
     cp_expr_unary(cp, k);  /* Nothing to do (well, integer promotion). */
   } else if (cp_opt(cp, '-')) {
-    cp_expr_unary(cp, k); k->i32 = -k->i32;
+    cp_expr_unary(cp, k); k->i32 = (int32_t)(~(uint32_t)k->i32+1);
   } else if (cp_opt(cp, '~')) {
     cp_expr_unary(cp, k); k->i32 = ~k->i32;
   } else if (cp_opt(cp, '!')) {
diff --git a/src/lj_crecord.c b/src/lj_crecord.c
index e17e512f..255bfa45 100644
--- a/src/lj_crecord.c
+++ b/src/lj_crecord.c
@@ -1877,7 +1877,8 @@ TRef recff_bit64_tohex(jit_State *J, RecordFFData *rd, TRef hdr)
   } else {
     n = id ? 16 : 8;
   }
-  if (n < 0) { n = -n; sf |= STRFMT_F_UPPER; }
+  if (n < 0) { n = (int32_t)(~n+1u); sf |= STRFMT_F_UPPER; }
+  if ((uint32_t)n > 254) n = 254;
   sf |= ((SFormat)((n+1)&255) << STRFMT_SH_PREC);
   if (id) {
     tr = crec_ct_tv(J, ctype_get(cts, id), 0, J->base[0], &rd->argv[0]);
diff --git a/src/lj_ctype.c b/src/lj_ctype.c
index 83042118..53b83031 100644
--- a/src/lj_ctype.c
+++ b/src/lj_ctype.c
@@ -582,7 +582,7 @@ GCstr *lj_ctype_repr_int64(lua_State *L, uint64_t n, int isunsigned)
   if (isunsigned) {
     *--p = 'U';
   } else if ((int64_t)n < 0) {
-    n = (uint64_t)-(int64_t)n;
+    n = ~n+1u;
     sign = 1;
   }
   do { *--p = (char)('0' + n % 10); } while (n /= 10);
diff --git a/src/lj_emit_arm.h b/src/lj_emit_arm.h
index ee299821..e8b33662 100644
--- a/src/lj_emit_arm.h
+++ b/src/lj_emit_arm.h
@@ -157,7 +157,7 @@ static int emit_kdelta2(ASMState *as, Reg rd, int32_t i)
       if (other) {
 	int32_t delta = i - other;
 	uint32_t sh, inv = 0, k2, k;
-	if (delta < 0) { delta = -delta; inv = ARMI_ADD^ARMI_SUB; }
+	if (delta < 0) { delta = (int32_t)(~(uint32_t)delta+1u); inv = ARMI_ADD^ARMI_SUB; }
 	sh = lj_ffs(delta) & ~1;
 	k2 = emit_isk12(0, delta & (255 << sh));
 	k = emit_isk12(0, delta & ~(255 << sh));
diff --git a/src/lj_emit_arm64.h b/src/lj_emit_arm64.h
index 96fbab72..e1a9d3e4 100644
--- a/src/lj_emit_arm64.h
+++ b/src/lj_emit_arm64.h
@@ -27,8 +27,8 @@ static uint64_t get_k64val(ASMState *as, IRRef ref)
 /* Encode constant in K12 format for data processing instructions. */
 static uint32_t emit_isk12(int64_t n)
 {
-  uint64_t k = (n < 0) ? -n : n;
-  uint32_t m = (n < 0) ? 0x40000000 : 0;
+  uint64_t k = n < 0 ? ~(uint64_t)n+1u : (uint64_t)n;
+  uint32_t m = n < 0 ? 0x40000000 : 0;
   if (k < 0x1000) {
     return A64I_K12|m|A64F_U12(k);
   } else if ((k & 0xfff000) == k) {
@@ -177,7 +177,7 @@ static int emit_kdelta(ASMState *as, Reg rd, uint64_t k, int lim)
 	emit_dm(as, A64I_MOVx, rd, r);
 	return 1;
       } else {
-	uint32_t k12 = emit_isk12(delta < 0 ? -delta : delta);
+	uint32_t k12 = emit_isk12(delta < 0 ? (int64_t)(~(uint64_t)delta+1u) : delta);
 	if (k12) {
 	  emit_dn(as, (delta < 0 ? A64I_SUBx : A64I_ADDx)^k12, rd, r);
 	  return 1;
@@ -415,7 +415,8 @@ static void emit_addptr(ASMState *as, Reg r, int32_t ofs)
 {
   if (ofs)
     emit_opk(as, ofs < 0 ? A64I_SUBx : A64I_ADDx, r, r,
-		 ofs < 0 ? -ofs : ofs, rset_exclude(RSET_GPR, r));
+		 ofs < 0 ? (int32_t)(~(uint32_t)ofs+1u) : ofs,
+		 rset_exclude(RSET_GPR, r));
 }
 
 #define emit_spsub(as, ofs)	emit_addptr(as, RID_SP, -(ofs))
diff --git a/src/lj_obj.h b/src/lj_obj.h
index a38911d9..69e94ff2 100644
--- a/src/lj_obj.h
+++ b/src/lj_obj.h
@@ -405,7 +405,7 @@ typedef struct GCproto {
 #define PROTO_UV_IMMUTABLE	0x4000	/* Immutable upvalue. */
 
 #define proto_kgc(pt, idx) \
-  check_exp((uintptr_t)(intptr_t)(idx) >= (uintptr_t)-(intptr_t)(pt)->sizekgc, \
+  check_exp((uintptr_t)(intptr_t)(idx) >= ~(uintptr_t)(pt)->sizekgc+1u, \
 	    gcref(mref((pt)->k, GCRef)[(idx)]))
 #define proto_knumtv(pt, idx) \
   check_exp((uintptr_t)(idx) < (pt)->sizekn, &mref((pt)->k, TValue)[(idx)])
diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c
index b9326c65..e2171e1b 100644
--- a/src/lj_opt_fold.c
+++ b/src/lj_opt_fold.c
@@ -272,7 +272,7 @@ static int32_t kfold_intop(int32_t k1, int32_t k2, IROp op)
   case IR_SUB: k1 -= k2; break;
   case IR_MUL: k1 *= k2; break;
   case IR_MOD: k1 = lj_vm_modi(k1, k2); break;
-  case IR_NEG: k1 = -k1; break;
+  case IR_NEG: k1 = (int32_t)(~(uint32_t)k1+1u); break;
   case IR_BAND: k1 &= k2; break;
   case IR_BOR: k1 |= k2; break;
   case IR_BXOR: k1 ^= k2; break;
@@ -1337,7 +1337,7 @@ LJFOLDF(simplify_intsub_k)
   if (fright->i == 0)  /* i - 0 ==> i */
     return LEFTFOLD;
   fins->o = IR_ADD;  /* i - k ==> i + (-k) */
-  fins->op2 = (IRRef1)lj_ir_kint(J, -fright->i);  /* Overflow for -2^31 ok. */
+  fins->op2 = (IRRef1)lj_ir_kint(J, (int32_t)(~(uint32_t)fright->i+1u));  /* Overflow for -2^31 ok. */
   return RETRYFOLD;
 }
 
@@ -1368,7 +1368,7 @@ LJFOLDF(simplify_intsub_k64)
   if (k == 0)  /* i - 0 ==> i */
     return LEFTFOLD;
   fins->o = IR_ADD;  /* i - k ==> i + (-k) */
-  fins->op2 = (IRRef1)lj_ir_kint64(J, (uint64_t)-(int64_t)k);
+  fins->op2 = (IRRef1)lj_ir_kint64(J, ~k+1u);
   return RETRYFOLD;
 }
 
diff --git a/src/lj_parse.c b/src/lj_parse.c
index acceed17..9b45b103 100644
--- a/src/lj_parse.c
+++ b/src/lj_parse.c
@@ -939,11 +939,6 @@ static void bcemit_binop(FuncState *fs, BinOpr op, ExpDesc *e1, ExpDesc *e2)
 }
 
 /* Emit unary operator. */
-#if LUAJIT_USE_UBSAN
-/* See https://github.com/LuaJIT/LuaJIT/issues/928. */
-static void bcemit_unop(FuncState *fs, BCOp op, ExpDesc *e)
-  __attribute__((no_sanitize("signed-integer-overflow")));
-#endif
 static void bcemit_unop(FuncState *fs, BCOp op, ExpDesc *e)
 {
   if (op == BC_NOT) {
@@ -975,22 +970,22 @@ static void bcemit_unop(FuncState *fs, BCOp op, ExpDesc *e)
 #if LJ_HASFFI
       if (e->k == VKCDATA) {  /* Fold in-place since cdata is not interned. */
 	GCcdata *cd = cdataV(&e->u.nval);
-	int64_t *p = (int64_t *)cdataptr(cd);
+	uint64_t *p = (uint64_t *)cdataptr(cd);
 	if (cd->ctypeid == CTID_COMPLEX_DOUBLE)
-	  p[1] ^= (int64_t)U64x(80000000,00000000);
+	  p[1] ^= U64x(80000000,00000000);
 	else
-	  *p = -*p;
+	  *p = ~*p+1u;
 	return;
       } else
 #endif
       if (expr_isnumk(e) && !expr_numiszero(e)) {  /* Avoid folding to -0. */
 	TValue *o = expr_numtv(e);
 	if (tvisint(o)) {
-	  int32_t k = intV(o);
-	  if (k == -k)
+	  int32_t k = intV(o), negk = (int32_t)(~(uint32_t)k+1u);
+	  if (k == negk)
 	    setnumV(o, -(lua_Number)k);
 	  else
-	    setintV(o, -k);
+	    setintV(o, negk);
 	  return;
 	} else {
 	  o->u64 ^= U64x(80000000,00000000);
diff --git a/src/lj_strfmt.c b/src/lj_strfmt.c
index 9592eff1..b6f2c04e 100644
--- a/src/lj_strfmt.c
+++ b/src/lj_strfmt.c
@@ -93,15 +93,10 @@ retlit:
   { uint32_t d = (x*(((1<<sh)+sc-1)/sc))>>sh; x -= d*sc; *p++ = (char)('0'+d); }
 
 /* Write integer to buffer. */
-#if LUAJIT_USE_UBSAN
-/* See https://github.com/LuaJIT/LuaJIT/issues/928. */
-char * LJ_FASTCALL lj_strfmt_wint(char *p, int32_t k)
-  __attribute__((no_sanitize("signed-integer-overflow")));
-#endif
 char * LJ_FASTCALL lj_strfmt_wint(char *p, int32_t k)
 {
   uint32_t u = (uint32_t)k;
-  if (k < 0) { u = (uint32_t)-k; *p++ = '-'; }
+  if (k < 0) { u = ~u+1u; *p++ = '-'; }
   if (u < 10000) {
     if (u < 10) goto dig1;
     if (u < 100) goto dig2;
@@ -269,7 +264,7 @@ SBuf *lj_strfmt_putfxint(SBuf *sb, SFormat sf, uint64_t k)
   /* Figure out signed prefixes. */
   if (STRFMT_TYPE(sf) == STRFMT_INT) {
     if ((int64_t)k < 0) {
-      k = (uint64_t)-(int64_t)k;
+      k = ~k+1u;
       prefix = 256 + '-';
     } else if ((sf & STRFMT_F_PLUS)) {
       prefix = 256 + '+';
diff --git a/src/lj_strscan.c b/src/lj_strscan.c
index 129010fd..36199d48 100644
--- a/src/lj_strscan.c
+++ b/src/lj_strscan.c
@@ -124,19 +124,19 @@ static StrScanFmt strscan_hex(const uint8_t *p, TValue *o,
   case STRSCAN_INT:
     if (!(opt & STRSCAN_OPT_TONUM) && x < 0x80000000u+neg &&
 	!(x == 0 && neg)) {
-      o->i = neg ? -(int32_t)x : (int32_t)x;
+      o->i = neg ? (int32_t)(~x+1u) : (int32_t)x;
       return STRSCAN_INT;  /* Fast path for 32 bit integers. */
     }
     if (!(opt & STRSCAN_OPT_C)) { fmt = STRSCAN_NUM; break; }
     /* fallthrough */
   case STRSCAN_U32:
     if (dig > 8) return STRSCAN_ERROR;
-    o->i = neg ? -(int32_t)x : (int32_t)x;
+    o->i = neg ? (int32_t)(~x+1u) : (int32_t)x;
     return STRSCAN_U32;
   case STRSCAN_I64:
   case STRSCAN_U64:
     if (dig > 16) return STRSCAN_ERROR;
-    o->u64 = neg ? (uint64_t)-(int64_t)x : x;
+    o->u64 = neg ? ~x+1u : x;
     return fmt;
   default:
     break;
@@ -168,12 +168,12 @@ static StrScanFmt strscan_oct(const uint8_t *p, TValue *o,
     /* fallthrough */
   case STRSCAN_U32:
     if ((x >> 32)) return STRSCAN_ERROR;
-    o->i = neg ? -(int32_t)x : (int32_t)x;
+    o->i = neg ? (int32_t)(~(uint32_t)x+1u) : (int32_t)x;
     break;
   default:
   case STRSCAN_I64:
   case STRSCAN_U64:
-    o->u64 = neg ? (uint64_t)-(int64_t)x : x;
+    o->u64 = neg ? ~x+1u : x;
     break;
   }
   return fmt;
@@ -229,18 +229,18 @@ static StrScanFmt strscan_dec(const uint8_t *p, TValue *o,
       switch (fmt) {
       case STRSCAN_INT:
 	if (!(opt & STRSCAN_OPT_TONUM) && x < 0x80000000u+neg) {
-	  o->i = neg ? -(int32_t)x : (int32_t)x;
+	  o->i = neg ? (int32_t)(~x+1u) : (int32_t)x;
 	  return STRSCAN_INT;  /* Fast path for 32 bit integers. */
 	}
 	if (!(opt & STRSCAN_OPT_C)) { fmt = STRSCAN_NUM; goto plainnumber; }
 	/* fallthrough */
       case STRSCAN_U32:
 	if ((x >> 32) != 0) return STRSCAN_ERROR;
-	o->i = neg ? -(int32_t)x : (int32_t)x;
+	o->i = neg ? (int32_t)(~x+1u) : (int32_t)x;
 	return STRSCAN_U32;
       case STRSCAN_I64:
       case STRSCAN_U64:
-	o->u64 = neg ? (uint64_t)-(int64_t)x : x;
+	o->u64 = neg ? ~x+1u : x;
 	return fmt;
       default:
       plainnumber:  /* Fast path for plain numbers < 2^63. */
@@ -348,18 +348,18 @@ static StrScanFmt strscan_bin(const uint8_t *p, TValue *o,
   switch (fmt) {
   case STRSCAN_INT:
     if (!(opt & STRSCAN_OPT_TONUM) && x < 0x80000000u+neg) {
-      o->i = neg ? -(int32_t)x : (int32_t)x;
+      o->i = neg ? (int32_t)(~x+1u) : (int32_t)x;
       return STRSCAN_INT;  /* Fast path for 32 bit integers. */
     }
     if (!(opt & STRSCAN_OPT_C)) { fmt = STRSCAN_NUM; break; }
     /* fallthrough */
   case STRSCAN_U32:
     if (dig > 32) return STRSCAN_ERROR;
-    o->i = neg ? -(int32_t)x : (int32_t)x;
+    o->i = neg ? (int32_t)(~x+1u) : (int32_t)x;
     return STRSCAN_U32;
   case STRSCAN_I64:
   case STRSCAN_U64:
-    o->u64 = neg ? (uint64_t)-(int64_t)x : x;
+    o->u64 = neg ? ~x+1u : x;
     return fmt;
   default:
     break;
@@ -468,7 +468,7 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, MSize len, TValue *o,
 	if (xx >= STRSCAN_MAXEXP) return STRSCAN_ERROR;
 	p++;
       }
-      ex += negx ? -(int32_t)xx : (int32_t)xx;
+      ex += negx ? (int32_t)(~xx+1u) : (int32_t)xx;
     }
 
     /* Parse suffix. */
@@ -507,7 +507,7 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, MSize len, TValue *o,
 	o->n = -0.0;
 	return STRSCAN_NUM;
       } else {
-	o->i = neg ? -(int32_t)x : (int32_t)x;
+	o->i = neg ? (int32_t)(~x+1u) : (int32_t)x;
 	return STRSCAN_INT;
       }
     }
diff --git a/src/lj_vmmath.c b/src/lj_vmmath.c
index 506867f8..faebe719 100644
--- a/src/lj_vmmath.c
+++ b/src/lj_vmmath.c
@@ -64,11 +64,11 @@ int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b)
   uint32_t y, ua, ub;
   /* This must be checked before using this function. */
   lj_assertX(b != 0, "modulo with zero divisor");
-  ua = a < 0 ? (uint32_t)-a : (uint32_t)a;
-  ub = b < 0 ? (uint32_t)-b : (uint32_t)b;
+  ua = a < 0 ? ~(uint32_t)a+1u : (uint32_t)a;
+  ub = b < 0 ? ~(uint32_t)b+1u : (uint32_t)b;
   y = ua % ub;
   if (y != 0 && (a^b) < 0) y = y - ub;
-  if (((int32_t)y^b) < 0) y = (uint32_t)-(int32_t)y;
+  if (((int32_t)y^b) < 0) y = ~y+1u;
   return (int32_t)y;
 }
 #endif
diff --git a/test/tarantool-tests/lj-928-int-min-negation.test.lua b/test/tarantool-tests/lj-928-int-min-negation.test.lua
new file mode 100644
index 00000000..26f4ed8e
--- /dev/null
+++ b/test/tarantool-tests/lj-928-int-min-negation.test.lua
@@ -0,0 +1,121 @@
+local tap = require('tap')
+
+-- Test file to demonstrate LuaJIT's UBSan failures during
+-- `INT*_MIN` negation.
+-- See also: https://github.com/LuaJIT/LuaJIT/issues/928.
+
+local test = tap.test('lj-928-int-min-negation.'):skipcond({
+  ['Test requires JIT enabled'] = not jit.status(),
+})
+
+local INT32_MIN = -0x80000000
+local INT64_MIN = -0x8000000000000000
+local TOBIT_CHAR_MAX = 254
+
+-- XXX: Many tests (`tonumber()`-related) are failed under UBSan
+-- with DUALNUM enabled. They are included to avoid regressions in
+-- the future if such a build becomes the default.
+local ffi = require('ffi')
+local LL_T = ffi.typeof(1LL)
+
+test:plan(14)
+
+jit.opt.start('hotloop=1')
+
+-- Temporary variable for the results.
+local r
+
+-- <src/lj_vmmath.c>:`lj_vm_modi()`
+for _ = 1, 4 do
+  -- Use additional variables to avoid folding during parsing.
+  -- Operands should be constants on the trace.
+  local x = -0x80000000
+  local y = -0x80000000
+  r = x % y
+end
+test:is(r, 0, 'no UB during lj_vm_modi')
+
+-- <src/lj_strfmt.c>:`lj_strfmt_wint()`
+for _ = 1, 4 do
+  -- Operand should be the constant on the trace.
+  r = tostring(bit.tobit(0x80000000))
+end
+test:is(r, '-2147483648', 'no UB during lj_strfmt_wint')
+
+-- <src/lj_strfmt.c>:`lj_strfmt_putfxint()`
+test:is(('%d'):format(INT64_MIN), '-9223372036854775808',
+        'no UB during lj_strfmt_putfxint')
+
+-- <src/lj_parse.c>:`bcemit_unop()`
+local int64_min_cdata = -0x8000000000000000LL
+test:ok(true, 'no UB during bcemit_unop')
+
+-- <src/lj_carith.c>:`carith_int64()`
+-- Use the additional variable to avoid folding during
+-- `bcemit_unop()`.
+test:is(-int64_min_cdata, int64_min_cdata, 'no UB during carith_int64')
+
+-- <src/lj_ctype.c>:`lj_ctype_repr_int64()`
+-- Use cast to separate the test case from `bcemit_unop()`.
+test:is(tostring(LL_T(INT64_MIN)), '-9223372036854775808LL',
+        'no UB during lj_ctype_repr_int64')
+
+local TOHEX_EXPECTED = ('0'):rep(TOBIT_CHAR_MAX)
+-- <src/lib_bit.c>:`bit_tohex()`
+-- The second argument is the number of bytes to be represented.
+-- The negative value stands for uppercase.
+test:is(bit.tohex(0, INT32_MIN), TOHEX_EXPECTED, 'no UB during bit_tohex')
+
+-- <src/lj_crecord.c>:`recff_bit64_tohex()`
+-- The second argument is the number of bytes to be represented.
+-- The negative value stands for uppercase.
+for _ = 1, 4 do
+  -- The second argument should be the constant on the trace.
+  r = bit.tohex(0, -0x80000000)
+end
+test:is(r, TOHEX_EXPECTED, 'no UB during recording bit.tohex')
+
+-- <src/lj_opt_fold.c>:`simplify_intsub_k()`
+r = 0
+for _ = 1, 4 do
+  r = r - 0x8000000000000000LL
+end
+test:is(r, 0LL, 'no UB during simplify_intsub_k')
+
+-- <src/lj_strscan.c>:`strscan_hex()`
+test:is(tonumber('-0x80000000'), INT32_MIN, 'no UB during strscan_hex')
+
+-- <src/lj_strscan.c>:`strscan_bin()`
+test:is(tonumber('-0b10000000000000000000000000000000'), INT32_MIN,
+        'no UB during strscan_bin')
+
+-- <src/lj_strscan.c>:`lj_strscan_scan()`
+test:is(tonumber('-2147483648'), INT32_MIN, 'no UB during strscan_scan')
+
+-- Test for 32bit long, just in case.
+-- <src/lib_base.c>:`tonumber()`
+test:is(tonumber('-2000000000000000', 4), INT32_MIN,
+        'no UB during tonumber, base 4')
+
+-- <src/lj_cparse.c>:`cp_expr_prefix()`
+-- According to ISO/IEC 9899:2023 [1]:
+-- | Each constant expression shall evaluate to a constant that is
+-- | in the range of representable values for its type.
+-- It means that since 0x80000000 does not fit in the int32_t
+-- range, -0x80000000 does not fit in the int32_t range either.
+--
+-- In the case when the enumeration has no fixed underlying type,
+-- the type of the enum is implementation defined [2][3].
+--
+-- Hence, we used -INT32_MAX - 1 since both values fit into
+-- int32_t, so it can't be ambiguous.
+--
+-- luacheck: ignore (too long line)
+-- [1]: https://www.open-std.org/jtc1/sc22/wg14/www/docs/n3096.pdf#subsection.6.2.6
+-- [2]: https://www.open-std.org/jtc1/sc22/wg14/www/docs/n1256.pdf#%5B%7B%22num%22%3A232%2C%22gen%22%3A0%7D%2C%7B%22name%22%3A%22Fit%22%7D%5D
+-- [3]: https://www.open-std.org/jtc1/sc22/wg14/www/docs/n3096.pdf#subsubsection.6.7.2.2
+ffi.cdef[[typedef enum {enum_int32_min = -0x7fffffff - 1} enum_t;]]
+test:is(ffi.new('enum_t', 'enum_int32_min'), LL_T(INT32_MIN),
+        'no UB during cp_expr_prefix')
+
+test:done(true)
-- 
2.45.1

next prev parent reply	other threads:[~2024-06-25 15:55 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-06-25 15:54 [Tarantool-patches] [PATCH luajit 0/2] Fix UBSan warnings Sergey Kaplun via Tarantool-patches
2024-06-25 15:54 ` [Tarantool-patches] [PATCH luajit 1/2] Prevent sanitizer warning in snap_restoredata() Sergey Kaplun via Tarantool-patches
2024-07-01  8:44   ` Maxim Kokryashkin via Tarantool-patches
2024-07-04  7:58   ` Sergey Bronnikov via Tarantool-patches
2024-07-04  8:41     ` Sergey Kaplun via Tarantool-patches
2024-07-04 14:59       ` Sergey Bronnikov via Tarantool-patches
2024-06-25 15:54 ` Sergey Kaplun via Tarantool-patches [this message]
2024-07-01  9:11   ` [Tarantool-patches] [PATCH luajit 2/2] Avoid negation of signed integers in C that may hold INT*_MIN Maxim Kokryashkin via Tarantool-patches
2024-07-01 10:12     ` Sergey Kaplun via Tarantool-patches
2024-07-04  8:08   ` Sergey Bronnikov via Tarantool-patches
2024-07-04  8:40     ` Sergey Kaplun via Tarantool-patches
2024-07-04 14:59       ` Sergey Bronnikov via Tarantool-patches
2024-07-09  8:08 ` [Tarantool-patches] [PATCH luajit 0/2] Fix UBSan warnings Sergey Kaplun via Tarantool-patches
2024-07-09  8:15 ` Sergey Kaplun via Tarantool-patches

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=e586c7e8418c500e190b330529d51fac32fa6df5.1719329795.git.skaplun@tarantool.org \
    --to=tarantool-patches@dev.tarantool.org \
    --cc=m.kokryashkin@tarantool.org \
    --cc=sergeyb@tarantool.org \
    --cc=skaplun@tarantool.org \
    --subject='Re: [Tarantool-patches] [PATCH luajit 2/2] Avoid negation of signed integers in C that may hold INT*_MIN.' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox