From: Vladimir Davydov <vdavydov.dev@gmail.com> To: tarantool-patches@freelists.org Subject: [PATCH] tuple: fix hashing of integer numbers Date: Tue, 22 Jan 2019 19:13:52 +0300 [thread overview] Message-ID: <3bb1fa694ee45642ab6b3c09926f3c577a109fab.1548171646.git.vdavydov.dev@gmail.com> (raw) Integer numbers stored in tuples as MP_FLOAT/MP_DOUBLE are hashed differently from integer numbers stored as MP_INT/MP_UINT. This breaks select() for memtx hash indexes and vinyl indexes (the latter use bloom filters). Fix this by converting MP_FLOAT/MP_DOUBLE to MP_INT/MP_UINT before hashing if the value can be stored as an integer. This is consistent with the behavior of tuple comparators, which treat MP_FLOAT and MP_INT as equal in case they represent the same number. Closes #3907 --- https://github.com/tarantool/tarantool/issues/3907 https://github.com/tarantool/tarantool/commits/dv/gh-3907-fix-integer-number-hashing src/box/tuple_hash.cc | 29 ++++++++++++++++++++++++++ test/box/hash.result | 46 +++++++++++++++++++++++++++++++++++++++++ test/box/hash.test.lua | 15 ++++++++++++++ test/vinyl/bloom.result | 52 +++++++++++++++++++++++++++++++++++++++++++++++ test/vinyl/bloom.test.lua | 18 ++++++++++++++++ 5 files changed, 160 insertions(+) diff --git a/src/box/tuple_hash.cc b/src/box/tuple_hash.cc index b394804f..96802ad0 100644 --- a/src/box/tuple_hash.cc +++ b/src/box/tuple_hash.cc @@ -32,6 +32,7 @@ #include "tuple_hash.h" #include "third_party/PMurHash.h" #include "coll.h" +#include <math.h> /* Tuple and key hasher */ namespace { @@ -282,6 +283,34 @@ tuple_hash_field(uint32_t *ph1, uint32_t *pcarry, const char **field, if (coll != NULL) return coll->hash(f, size, ph1, pcarry, coll); break; + case MP_FLOAT: + case MP_DOUBLE: { + /* + * If a floating point number can be stored as an integer, + * convert it to MP_INT/MP_UINT before hashing so that we + * can select integer values by floating point keys and + * vice versa. + */ + double iptr; + double val = mp_typeof(**field) == MP_FLOAT ? + mp_decode_float(field) : + mp_decode_double(field); + if (!isfinite(val) || modf(val, &iptr) != 0 || + val < -exp(63) || val >= exp(64)) { + size = *field - f; + break; + } + char *data; + char buf[9]; /* enough to store MP_INT/MP_UINT */ + if (val >= 0) + data = mp_encode_uint(buf, (uint64_t)val); + else + data = mp_encode_int(buf, (int64_t)val); + size = data - buf; + assert(size <= sizeof(buf)); + f = buf; + break; + } default: mp_next(field); size = *field - f; /* calculate the size of field */ diff --git a/test/box/hash.result b/test/box/hash.result index 6893a1be..f0c30ade 100644 --- a/test/box/hash.result +++ b/test/box/hash.result @@ -786,3 +786,49 @@ space:select({1}, {iterator = 'BITS_ALL_SET' } ) space:drop() --- ... +-- gh-3907: check that integer numbers stored as MP_FLOAT/MP_DOUBLE +-- are hashed as MP_INT/MP_UINT. +ffi = require('ffi') +--- +... +s = box.schema.space.create('test') +--- +... +_ = s:create_index('primary', {type = 'hash', parts = {1, 'number'}}) +--- +... +s:insert{ffi.new('double', 0)} +--- +- [0] +... +s:insert{ffi.new('double', -1)} +--- +- [-1] +... +s:insert{ffi.new('double', 9007199254740992)} +--- +- [9007199254740992] +... +s:insert{ffi.new('double', -9007199254740994)} +--- +- [-9007199254740994] +... +s:get(0LL) +--- +- [0] +... +s:get(-1LL) +--- +- [-1] +... +s:get(9007199254740992LL) +--- +- [9007199254740992] +... +s:get(-9007199254740994LL) +--- +- [-9007199254740994] +... +s:drop() +--- +... diff --git a/test/box/hash.test.lua b/test/box/hash.test.lua index 51192851..d97312e3 100644 --- a/test/box/hash.test.lua +++ b/test/box/hash.test.lua @@ -332,3 +332,18 @@ space = box.schema.space.create('test') index = space:create_index('primary', { type = 'hash' }) space:select({1}, {iterator = 'BITS_ALL_SET' } ) space:drop() + +-- gh-3907: check that integer numbers stored as MP_FLOAT/MP_DOUBLE +-- are hashed as MP_INT/MP_UINT. +ffi = require('ffi') +s = box.schema.space.create('test') +_ = s:create_index('primary', {type = 'hash', parts = {1, 'number'}}) +s:insert{ffi.new('double', 0)} +s:insert{ffi.new('double', -1)} +s:insert{ffi.new('double', 9007199254740992)} +s:insert{ffi.new('double', -9007199254740994)} +s:get(0LL) +s:get(-1LL) +s:get(9007199254740992LL) +s:get(-9007199254740994LL) +s:drop() diff --git a/test/vinyl/bloom.result b/test/vinyl/bloom.result index a1f9aa99..970067fa 100644 --- a/test/vinyl/bloom.result +++ b/test/vinyl/bloom.result @@ -313,3 +313,55 @@ s:drop() box.cfg{vinyl_cache = vinyl_cache} --- ... +-- +-- gh-3907: check that integer numbers stored as MP_FLOAT/MP_DOUBLE +-- are hashed as MP_INT/MP_UINT. +-- +ffi = require('ffi') +--- +... +s = box.schema.space.create('test', {engine = 'vinyl'}) +--- +... +_ = s:create_index('primary', {parts = {1, 'number'}}) +--- +... +s:replace{ffi.new('double', 0)} +--- +- [0] +... +s:replace{ffi.new('double', -1)} +--- +- [-1] +... +s:replace{ffi.new('double', 9007199254740992)} +--- +- [9007199254740992] +... +s:replace{ffi.new('double', -9007199254740994)} +--- +- [-9007199254740994] +... +box.snapshot() +--- +- ok +... +s:get(0LL) +--- +- [0] +... +s:get(-1LL) +--- +- [-1] +... +s:get(9007199254740992LL) +--- +- [9007199254740992] +... +s:get(-9007199254740994LL) +--- +- [-9007199254740994] +... +s:drop() +--- +... diff --git a/test/vinyl/bloom.test.lua b/test/vinyl/bloom.test.lua index 9fa789f0..27e078d6 100644 --- a/test/vinyl/bloom.test.lua +++ b/test/vinyl/bloom.test.lua @@ -133,3 +133,21 @@ new_seeks() < 20 s:drop() box.cfg{vinyl_cache = vinyl_cache} + +-- +-- gh-3907: check that integer numbers stored as MP_FLOAT/MP_DOUBLE +-- are hashed as MP_INT/MP_UINT. +-- +ffi = require('ffi') +s = box.schema.space.create('test', {engine = 'vinyl'}) +_ = s:create_index('primary', {parts = {1, 'number'}}) +s:replace{ffi.new('double', 0)} +s:replace{ffi.new('double', -1)} +s:replace{ffi.new('double', 9007199254740992)} +s:replace{ffi.new('double', -9007199254740994)} +box.snapshot() +s:get(0LL) +s:get(-1LL) +s:get(9007199254740992LL) +s:get(-9007199254740994LL) +s:drop() -- 2.11.0
next reply other threads:[~2019-01-22 16:13 UTC|newest] Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top 2019-01-22 16:13 Vladimir Davydov [this message] 2019-01-29 13:33 ` [tarantool-patches] " Kirill Yukhin
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=3bb1fa694ee45642ab6b3c09926f3c577a109fab.1548171646.git.vdavydov.dev@gmail.com \ --to=vdavydov.dev@gmail.com \ --cc=tarantool-patches@freelists.org \ --subject='Re: [PATCH] tuple: fix hashing of integer numbers' \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox