From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: From: Vladimir Davydov Subject: [PATCH v2 1/5] bloom: use malloc for bitmap allocations Date: Wed, 28 Mar 2018 22:04:57 +0300 Message-Id: In-Reply-To: References: In-Reply-To: References: To: kostja@tarantool.org Cc: tarantool-patches@freelists.org List-ID: There's absolutely no point in using mmap() instead of malloc() for bitmap allocation - malloc() will fallback on mmap() anyway provided the allocation is large enough. Note about the unit test: since we don't round the bloom filter size up to a multiple of page size anymore, we have to use a more sophisticated hash function for the test to pass. --- src/lib/salad/bloom.c | 62 ++++++++++++++++++++------------------------------ test/unit/bloom.cc | 7 +++--- test/unit/bloom.result | 4 ++-- test/vinyl/info.result | 16 ++++++------- 4 files changed, 38 insertions(+), 51 deletions(-) diff --git a/src/lib/salad/bloom.c b/src/lib/salad/bloom.c index 642024d0..a33d2177 100644 --- a/src/lib/salad/bloom.c +++ b/src/lib/salad/bloom.c @@ -30,8 +30,10 @@ */ #include "bloom.h" -#include -#include +#include +#include +#include +#include #include #include #include @@ -42,41 +44,30 @@ bloom_create(struct bloom *bloom, uint32_t number_of_values, double false_positive_rate, struct quota *quota) { /* Optimal hash_count and bit count calculation */ - bloom->hash_count = (uint32_t) - (log(false_positive_rate) / log(0.5) + 0.99); - /* Number of bits */ - uint64_t m = (uint64_t) - (number_of_values * bloom->hash_count / log(2) + 0.5); - /* mmap page size */ - uint64_t page_size = sysconf(_SC_PAGE_SIZE); - /* Number of bits in one page */ - uint64_t b = page_size * CHAR_BIT; - /* number of pages, round up */ - uint64_t p = (uint32_t)((m + b - 1) / b); - /* bit array size in bytes */ - size_t mmap_size = p * page_size; - bloom->table_size = p * page_size / sizeof(struct bloom_block); - if (quota_use(quota, mmap_size) < 0) { - bloom->table = NULL; + uint16_t hash_count = ceil(log(false_positive_rate) / log(0.5)); + uint64_t bit_count = ceil(number_of_values * hash_count / log(2)); + uint32_t block_bits = CHAR_BIT * sizeof(struct bloom_block); + uint32_t block_count = (bit_count + block_bits - 1) / block_bits; + + if (quota_use(quota, block_count * sizeof(*bloom->table)) < 0) return -1; - } - bloom->table = (struct bloom_block *) - mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (bloom->table == MAP_FAILED) { - bloom->table = NULL; - quota_release(quota, mmap_size); + + bloom->table = calloc(block_count, sizeof(*bloom->table)); + if (bloom->table == NULL) { + quota_release(quota, block_count * sizeof(*bloom->table)); return -1; } + + bloom->table_size = block_count; + bloom->hash_count = hash_count; return 0; } void bloom_destroy(struct bloom *bloom, struct quota *quota) { - size_t mmap_size = bloom->table_size * sizeof(struct bloom_block); - munmap(bloom->table, mmap_size); - quota_release(quota, mmap_size); + quota_release(quota, bloom->table_size * sizeof(*bloom->table)); + free(bloom->table); } size_t @@ -96,20 +87,17 @@ bloom_store(const struct bloom *bloom, char *table) int bloom_load_table(struct bloom *bloom, const char *table, struct quota *quota) { - size_t mmap_size = bloom->table_size * sizeof(struct bloom_block); - if (quota_use(quota, mmap_size) < 0) { + size_t size = bloom->table_size * sizeof(struct bloom_block); + if (quota_use(quota, size) < 0) { bloom->table = NULL; return -1; } - bloom->table = (struct bloom_block *) - mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (bloom->table == MAP_FAILED) { - bloom->table = NULL; - quota_release(quota, mmap_size); + bloom->table = malloc(size); + if (bloom->table == NULL) { + quota_release(quota, size); return -1; } - memcpy(bloom->table, table, mmap_size); + memcpy(bloom->table, table, size); return 0; } diff --git a/test/unit/bloom.cc b/test/unit/bloom.cc index 5e387f7f..e78ab73e 100644 --- a/test/unit/bloom.cc +++ b/test/unit/bloom.cc @@ -7,7 +7,7 @@ using namespace std; uint32_t h(uint32_t i) { - return i; + return i * 2654435761; } void @@ -44,8 +44,7 @@ simple_test() bloom_destroy(&bloom, &q); } double fp_rate = (double)false_positive / tests; - double excess = fp_rate / p; - if (fp_rate > p) + if (fp_rate > p + 0.001) fp_rate_too_big++; } cout << "error_count = " << error_count << endl; @@ -95,7 +94,7 @@ store_load_test() } double fp_rate = (double)false_positive / tests; double excess = fp_rate / p; - if (fp_rate > p) + if (fp_rate > p + 0.001) fp_rate_too_big++; } cout << "error_count = " << error_count << endl; diff --git a/test/unit/bloom.result b/test/unit/bloom.result index a7d5a5bd..39177f51 100644 --- a/test/unit/bloom.result +++ b/test/unit/bloom.result @@ -9,10 +9,10 @@ fp_rate_too_big = 0 memory after destruction = 0 *** spectrum_test *** -bloom table size = 128 +bloom table size = 79 error_count = 0 fpr_rate_is_good = 1 -bloom table size = 128 +bloom table size = 106 error_count = 0 fpr_rate_is_good = 1 memory after destruction = 0 diff --git a/test/vinyl/info.result b/test/vinyl/info.result index 02123600..4ca0f7f3 100644 --- a/test/vinyl/info.result +++ b/test/vinyl/info.result @@ -266,7 +266,7 @@ stat_diff(istat(), st) bytes: 26049 index_size: 294 rows: 25 - bloom_size: 4096 + bloom_size: 64 pages: 7 bytes: 26049 bytes_compressed: @@ -982,7 +982,7 @@ istat() bytes: 0 pages: 25 bytes_compressed: - bloom_size: 8192 + bloom_size: 128 txw: bytes: 0 rows: 0 @@ -1120,8 +1120,8 @@ i1:len(), i2:len() ... i1:bsize(), i2:bsize() --- -- 4390 -- 4946 +- 358 +- 914 ... s:bsize() == st1.disk.bytes --- @@ -1166,8 +1166,8 @@ i1:len(), i2:len() ... i1:bsize(), i2:bsize() --- -- 53542 -- 54098 +- 49510 +- 50066 ... s:bsize() == st1.memory.bytes + st1.disk.bytes --- @@ -1216,8 +1216,8 @@ i1:len(), i2:len() ... i1:bsize(), i2:bsize() --- -- 4390 -- 4946 +- 358 +- 914 ... s:bsize() == st1.disk.bytes --- -- 2.11.0