[PATCH] tuple: fix hashing of integer numbers

Vladimir Davydov vdavydov.dev at gmail.com
Tue Jan 22 19:13:52 MSK 2019


Integer numbers stored in tuples as MP_FLOAT/MP_DOUBLE are hashed
differently from integer numbers stored as MP_INT/MP_UINT. This breaks
select() for memtx hash indexes and vinyl indexes (the latter use bloom
filters). Fix this by converting MP_FLOAT/MP_DOUBLE to MP_INT/MP_UINT
before hashing if the value can be stored as an integer. This is
consistent with the behavior of tuple comparators, which treat MP_FLOAT
and MP_INT as equal in case they represent the same number.

Closes #3907
---
https://github.com/tarantool/tarantool/issues/3907
https://github.com/tarantool/tarantool/commits/dv/gh-3907-fix-integer-number-hashing

 src/box/tuple_hash.cc     | 29 ++++++++++++++++++++++++++
 test/box/hash.result      | 46 +++++++++++++++++++++++++++++++++++++++++
 test/box/hash.test.lua    | 15 ++++++++++++++
 test/vinyl/bloom.result   | 52 +++++++++++++++++++++++++++++++++++++++++++++++
 test/vinyl/bloom.test.lua | 18 ++++++++++++++++
 5 files changed, 160 insertions(+)

diff --git a/src/box/tuple_hash.cc b/src/box/tuple_hash.cc
index b394804f..96802ad0 100644
--- a/src/box/tuple_hash.cc
+++ b/src/box/tuple_hash.cc
@@ -32,6 +32,7 @@
 #include "tuple_hash.h"
 #include "third_party/PMurHash.h"
 #include "coll.h"
+#include <math.h>
 
 /* Tuple and key hasher */
 namespace {
@@ -282,6 +283,34 @@ tuple_hash_field(uint32_t *ph1, uint32_t *pcarry, const char **field,
 		if (coll != NULL)
 			return coll->hash(f, size, ph1, pcarry, coll);
 		break;
+	case MP_FLOAT:
+	case MP_DOUBLE: {
+		/*
+		 * If a floating point number can be stored as an integer,
+		 * convert it to MP_INT/MP_UINT before hashing so that we
+		 * can select integer values by floating point keys and
+		 * vice versa.
+		 */
+		double iptr;
+		double val = mp_typeof(**field) == MP_FLOAT ?
+			     mp_decode_float(field) :
+			     mp_decode_double(field);
+		if (!isfinite(val) || modf(val, &iptr) != 0 ||
+		    val < -exp(63) || val >= exp(64)) {
+			size = *field - f;
+			break;
+		}
+		char *data;
+		char buf[9]; /* enough to store MP_INT/MP_UINT */
+		if (val >= 0)
+			data = mp_encode_uint(buf, (uint64_t)val);
+		else
+			data = mp_encode_int(buf, (int64_t)val);
+		size = data - buf;
+		assert(size <= sizeof(buf));
+		f = buf;
+		break;
+	}
 	default:
 		mp_next(field);
 		size = *field - f;  /* calculate the size of field */
diff --git a/test/box/hash.result b/test/box/hash.result
index 6893a1be..f0c30ade 100644
--- a/test/box/hash.result
+++ b/test/box/hash.result
@@ -786,3 +786,49 @@ space:select({1}, {iterator = 'BITS_ALL_SET' } )
 space:drop()
 ---
 ...
+-- gh-3907: check that integer numbers stored as MP_FLOAT/MP_DOUBLE
+-- are hashed as MP_INT/MP_UINT.
+ffi = require('ffi')
+---
+...
+s = box.schema.space.create('test')
+---
+...
+_ = s:create_index('primary', {type = 'hash', parts = {1, 'number'}})
+---
+...
+s:insert{ffi.new('double', 0)}
+---
+- [0]
+...
+s:insert{ffi.new('double', -1)}
+---
+- [-1]
+...
+s:insert{ffi.new('double', 9007199254740992)}
+---
+- [9007199254740992]
+...
+s:insert{ffi.new('double', -9007199254740994)}
+---
+- [-9007199254740994]
+...
+s:get(0LL)
+---
+- [0]
+...
+s:get(-1LL)
+---
+- [-1]
+...
+s:get(9007199254740992LL)
+---
+- [9007199254740992]
+...
+s:get(-9007199254740994LL)
+---
+- [-9007199254740994]
+...
+s:drop()
+---
+...
diff --git a/test/box/hash.test.lua b/test/box/hash.test.lua
index 51192851..d97312e3 100644
--- a/test/box/hash.test.lua
+++ b/test/box/hash.test.lua
@@ -332,3 +332,18 @@ space = box.schema.space.create('test')
 index = space:create_index('primary', { type = 'hash' })
 space:select({1}, {iterator = 'BITS_ALL_SET' } )
 space:drop()
+
+-- gh-3907: check that integer numbers stored as MP_FLOAT/MP_DOUBLE
+-- are hashed as MP_INT/MP_UINT.
+ffi = require('ffi')
+s = box.schema.space.create('test')
+_ = s:create_index('primary', {type = 'hash', parts = {1, 'number'}})
+s:insert{ffi.new('double', 0)}
+s:insert{ffi.new('double', -1)}
+s:insert{ffi.new('double', 9007199254740992)}
+s:insert{ffi.new('double', -9007199254740994)}
+s:get(0LL)
+s:get(-1LL)
+s:get(9007199254740992LL)
+s:get(-9007199254740994LL)
+s:drop()
diff --git a/test/vinyl/bloom.result b/test/vinyl/bloom.result
index a1f9aa99..970067fa 100644
--- a/test/vinyl/bloom.result
+++ b/test/vinyl/bloom.result
@@ -313,3 +313,55 @@ s:drop()
 box.cfg{vinyl_cache = vinyl_cache}
 ---
 ...
+--
+-- gh-3907: check that integer numbers stored as MP_FLOAT/MP_DOUBLE
+-- are hashed as MP_INT/MP_UINT.
+--
+ffi = require('ffi')
+---
+...
+s = box.schema.space.create('test', {engine = 'vinyl'})
+---
+...
+_ = s:create_index('primary', {parts = {1, 'number'}})
+---
+...
+s:replace{ffi.new('double', 0)}
+---
+- [0]
+...
+s:replace{ffi.new('double', -1)}
+---
+- [-1]
+...
+s:replace{ffi.new('double', 9007199254740992)}
+---
+- [9007199254740992]
+...
+s:replace{ffi.new('double', -9007199254740994)}
+---
+- [-9007199254740994]
+...
+box.snapshot()
+---
+- ok
+...
+s:get(0LL)
+---
+- [0]
+...
+s:get(-1LL)
+---
+- [-1]
+...
+s:get(9007199254740992LL)
+---
+- [9007199254740992]
+...
+s:get(-9007199254740994LL)
+---
+- [-9007199254740994]
+...
+s:drop()
+---
+...
diff --git a/test/vinyl/bloom.test.lua b/test/vinyl/bloom.test.lua
index 9fa789f0..27e078d6 100644
--- a/test/vinyl/bloom.test.lua
+++ b/test/vinyl/bloom.test.lua
@@ -133,3 +133,21 @@ new_seeks() < 20
 s:drop()
 
 box.cfg{vinyl_cache = vinyl_cache}
+
+--
+-- gh-3907: check that integer numbers stored as MP_FLOAT/MP_DOUBLE
+-- are hashed as MP_INT/MP_UINT.
+--
+ffi = require('ffi')
+s = box.schema.space.create('test', {engine = 'vinyl'})
+_ = s:create_index('primary', {parts = {1, 'number'}})
+s:replace{ffi.new('double', 0)}
+s:replace{ffi.new('double', -1)}
+s:replace{ffi.new('double', 9007199254740992)}
+s:replace{ffi.new('double', -9007199254740994)}
+box.snapshot()
+s:get(0LL)
+s:get(-1LL)
+s:get(9007199254740992LL)
+s:get(-9007199254740994LL)
+s:drop()
-- 
2.11.0




More information about the Tarantool-patches mailing list