[Tarantool-patches] [PATCH 1/1] tuple: don't truncate float in :update()

Sun Jan 26 19:44:23 MSK 2020

Before the patch there were the rules:
* float +/- double = double
* double +/- double = double
* float +/- float = float

The rules were applied regardless of values. That led to a problem
when float + float exceeding maximal float value could fit into
double, but was stored as an infinity.

The patch makes so that if a floating point arithmetic operation
result fits into float, it is stored as float. Otherwise as
double. Regardless of initial types.

This alongside saves some memory for cases when doubles can be
stored as floats, and therefore takes 4 less bytes. Although
these cases are rare, because any not integer value stored in a
double may have a long garbage tail in its fraction.

Closes #4701
---
Branch: https://github.com/tarantool/tarantool/tree/gerold103/gh-4701-update-float-truncate
Issue: https://github.com/tarantool/tarantool/issues/4701

I am not sure about the patch correctness. Perhaps we should not
save double + double as float even when it fits. It would break
DOUBLE data type, which we are going to introduce, because from
what I understood, it is going to store MP_DOUBLE only.

On the other hand, DOUBLE is not implemented yet, and when it will
be implemented, we may decide to allow to store MP_FLOAT there.

 src/box/xrow_update_field.c | 18 ++++++++--
 test/box/update.result      | 66 +++++++++++++++++++++++++++++++++++++
 test/box/update.test.lua    | 46 ++++++++++++++++++++++++++
 3 files changed, 127 insertions(+), 3 deletions(-)

diff --git a/src/box/xrow_update_field.c b/src/box/xrow_update_field.c
index 7c0f5fb5e..31429ee37 100644
--- a/src/box/xrow_update_field.c
+++ b/src/box/xrow_update_field.c
@@ -400,13 +400,25 @@ xrow_update_arith_make(struct xrow_update_op *op,
 			unreachable();
 			break;
 		}
-		if (lowest_type == XUPDATE_TYPE_DOUBLE) {
+		float fc = (float) c;
+		/*
+		 * A value may be saved as double even if it looks
+		 * like fitting a float. For example, 0.01 + 0.01
+		 * may be stored as double. This is because
+		 * 0.01 may be stored as 0.009999999999999, what
+		 * looks like double precision. And there is no
+		 * way how to check if this is actually 0.01.
+		 * By the same reason FLT_MAX can't be used to
+		 * detect whether a value fits float, because it
+		 * may be <= FLT_MAX, but may have a double
+		 * precision in its fraction part.
+		 */
+		if (c != (double) fc) {
 			ret->type = XUPDATE_TYPE_DOUBLE;
 			ret->dbl = c;
 		} else {
-			assert(lowest_type == XUPDATE_TYPE_FLOAT);
 			ret->type = XUPDATE_TYPE_FLOAT;
-			ret->flt = (float)c;
+			ret->flt = fc;
 		}
 	} else {
 		decimal_t a, b, c;
diff --git a/test/box/update.result b/test/box/update.result
index 28ba47831..0085f1c24 100644
--- a/test/box/update.result
+++ b/test/box/update.result
@@ -1647,3 +1647,69 @@ t:update({{'=', '[2].d.g', 6000}, {'#', '[2].d.old.old', 1}})
 s:drop()
 ---
 ...
+--
+-- gh-4701: arith operations should not truncate result when
+-- float + float fits double.
+--
+msgpackffi = require('msgpackffi')
+---
+...
+mp_array_1 = 0x91
+---
+...
+mp_double = 0xcb
+---
+...
+mp_float = 0xca
+---
+...
+flt_max = 3.402823466e+38
+---
+...
+uint_max = 18446744073709551615ULL
+---
+...
+-- Double + double is float if result fits.                                     \
+-- Double + double is double if result does not fit float.                      \
+-- Double + float is float if result fits.                                      \
+-- Double + float is double if result does not fit float.                       \
+-- Float + float is float when no overflow.                                     \
+-- Float + float is double when overflow.                                       \
+-- Float + int is float, because no such int exists, which could                \
+-- change a max float, even being stored as double.                             \
+-- Precision matters too. Double is used when need to avoid                     \
+-- precision loss.                                                              \
+tests = {                                                                       \
+    {{'double', 1}, {'double', 1}, mp_float},                                   \
+    {{'double', flt_max}, {'double', flt_max}, mp_double},                      \
+    {{'double', 1}, {'float', 1}, mp_float},                                    \
+    {{'double', flt_max}, {'float', flt_max}, mp_double},                       \
+    {{'float', 1}, {'float', 1}, mp_float},                                     \
+    {{'float', flt_max}, {'float', flt_max}, mp_double},                        \
+    {{'float', -flt_max}, {'float', -flt_max}, mp_double},                      \
+    {{'float', 1}, {'int', 1}, mp_float},                                       \
+    {{'float', flt_max}, {'uint64_t', uint_max}, mp_float},                     \
+    {{'float', 1.0001}, {'double', 1.0000000000001}, mp_double},                \
+}
+---
+...
+err = nil
+---
+...
+for i, test in pairs(tests) do                                                  \
+    local val1 = ffi.cast(test[1][1], test[1][2])                               \
+    local val2 = ffi.cast(test[2][1], test[2][2])                               \
+    local t = box.tuple.new({val1})                                             \
+    t = t:update({{'+', 1, val2}})                                              \
+    t = msgpackffi.encode(t)                                                    \
+    if t:byte(1) ~= mp_array_1 or t:byte(2) ~= test[3] then                     \
+        err = {i, t:byte(1), t:byte(2)}                                         \
+        break                                                                   \
+    end                                                                         \
+end
+---
+...
+err
+---
+- null
+...
diff --git a/test/box/update.test.lua b/test/box/update.test.lua
index 314ebef05..89306ccef 100644
--- a/test/box/update.test.lua
+++ b/test/box/update.test.lua
@@ -597,3 +597,49 @@ t:update({{'=', '[2].d.g', 6000}, {'=', '[2].d.new.new', -1}})
 t:update({{'=', '[2].d.g', 6000}, {'#', '[2].d.old.old', 1}})
 
 s:drop()
+
+--
+-- gh-4701: arith operations should not truncate result when
+-- float + float fits double.
+--
+msgpackffi = require('msgpackffi')
+mp_array_1 = 0x91
+mp_double = 0xcb
+mp_float = 0xca
+flt_max = 3.402823466e+38
+uint_max = 18446744073709551615ULL
+tests = {                                                                       \
+-- Double + double is float if result fits.                                     \
+    {{'double', 1}, {'double', 1}, mp_float},                                   \
+-- Double + double is double if result does not fit float.                      \
+    {{'double', flt_max}, {'double', flt_max}, mp_double},                      \
+-- Double + float is float if result fits.                                      \
+    {{'double', 1}, {'float', 1}, mp_float},                                    \
+-- Double + float is double if result does not fit float.                       \
+    {{'double', flt_max}, {'float', flt_max}, mp_double},                       \
+-- Float + float is float when no overflow.                                     \
+    {{'float', 1}, {'float', 1}, mp_float},                                     \
+-- Float + float is double when overflow.                                       \
+    {{'float', flt_max}, {'float', flt_max}, mp_double},                        \
+    {{'float', -flt_max}, {'float', -flt_max}, mp_double},                      \
+-- Float + int is float, because no such int exists, which could                \
+-- change a max float, even being stored as double.                             \
+    {{'float', 1}, {'int', 1}, mp_float},                                       \
+    {{'float', flt_max}, {'uint64_t', uint_max}, mp_float},                     \
+-- Precision matters too. Double is used when need to avoid                     \
+-- precision loss.                                                              \
+    {{'float', 1.0001}, {'double', 1.0000000000001}, mp_double},                \
+}
+err = nil
+for i, test in pairs(tests) do                                                  \
+    local val1 = ffi.cast(test[1][1], test[1][2])                               \
+    local val2 = ffi.cast(test[2][1], test[2][2])                               \
+    local t = box.tuple.new({val1})                                             \
+    t = t:update({{'+', 1, val2}})                                              \
+    t = msgpackffi.encode(t)                                                    \
+    if t:byte(1) ~= mp_array_1 or t:byte(2) ~= test[3] then                     \
+        err = {i, t:byte(1), t:byte(2)}                                         \
+        break                                                                   \
+    end                                                                         \
+end
+err
-- 
2.21.1 (Apple Git-122.3)