[PATCH v2 1/5] bloom: use malloc for bitmap allocations
Vladimir Davydov
vdavydov.dev at gmail.com
Wed Mar 28 22:04:57 MSK 2018
There's absolutely no point in using mmap() instead of malloc() for
bitmap allocation - malloc() will fallback on mmap() anyway provided
the allocation is large enough.
Note about the unit test: since we don't round the bloom filter size up
to a multiple of page size anymore, we have to use a more sophisticated
hash function for the test to pass.
---
src/lib/salad/bloom.c | 62 ++++++++++++++++++++------------------------------
test/unit/bloom.cc | 7 +++---
test/unit/bloom.result | 4 ++--
test/vinyl/info.result | 16 ++++++-------
4 files changed, 38 insertions(+), 51 deletions(-)
diff --git a/src/lib/salad/bloom.c b/src/lib/salad/bloom.c
index 642024d0..a33d2177 100644
--- a/src/lib/salad/bloom.c
+++ b/src/lib/salad/bloom.c
@@ -30,8 +30,10 @@
*/
#include "bloom.h"
-#include <unistd.h>
-#include <sys/mman.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
#include <limits.h>
#include <math.h>
#include <assert.h>
@@ -42,41 +44,30 @@ bloom_create(struct bloom *bloom, uint32_t number_of_values,
double false_positive_rate, struct quota *quota)
{
/* Optimal hash_count and bit count calculation */
- bloom->hash_count = (uint32_t)
- (log(false_positive_rate) / log(0.5) + 0.99);
- /* Number of bits */
- uint64_t m = (uint64_t)
- (number_of_values * bloom->hash_count / log(2) + 0.5);
- /* mmap page size */
- uint64_t page_size = sysconf(_SC_PAGE_SIZE);
- /* Number of bits in one page */
- uint64_t b = page_size * CHAR_BIT;
- /* number of pages, round up */
- uint64_t p = (uint32_t)((m + b - 1) / b);
- /* bit array size in bytes */
- size_t mmap_size = p * page_size;
- bloom->table_size = p * page_size / sizeof(struct bloom_block);
- if (quota_use(quota, mmap_size) < 0) {
- bloom->table = NULL;
+ uint16_t hash_count = ceil(log(false_positive_rate) / log(0.5));
+ uint64_t bit_count = ceil(number_of_values * hash_count / log(2));
+ uint32_t block_bits = CHAR_BIT * sizeof(struct bloom_block);
+ uint32_t block_count = (bit_count + block_bits - 1) / block_bits;
+
+ if (quota_use(quota, block_count * sizeof(*bloom->table)) < 0)
return -1;
- }
- bloom->table = (struct bloom_block *)
- mmap(NULL, mmap_size, PROT_READ | PROT_WRITE,
- MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
- if (bloom->table == MAP_FAILED) {
- bloom->table = NULL;
- quota_release(quota, mmap_size);
+
+ bloom->table = calloc(block_count, sizeof(*bloom->table));
+ if (bloom->table == NULL) {
+ quota_release(quota, block_count * sizeof(*bloom->table));
return -1;
}
+
+ bloom->table_size = block_count;
+ bloom->hash_count = hash_count;
return 0;
}
void
bloom_destroy(struct bloom *bloom, struct quota *quota)
{
- size_t mmap_size = bloom->table_size * sizeof(struct bloom_block);
- munmap(bloom->table, mmap_size);
- quota_release(quota, mmap_size);
+ quota_release(quota, bloom->table_size * sizeof(*bloom->table));
+ free(bloom->table);
}
size_t
@@ -96,20 +87,17 @@ bloom_store(const struct bloom *bloom, char *table)
int
bloom_load_table(struct bloom *bloom, const char *table, struct quota *quota)
{
- size_t mmap_size = bloom->table_size * sizeof(struct bloom_block);
- if (quota_use(quota, mmap_size) < 0) {
+ size_t size = bloom->table_size * sizeof(struct bloom_block);
+ if (quota_use(quota, size) < 0) {
bloom->table = NULL;
return -1;
}
- bloom->table = (struct bloom_block *)
- mmap(NULL, mmap_size, PROT_READ | PROT_WRITE,
- MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
- if (bloom->table == MAP_FAILED) {
- bloom->table = NULL;
- quota_release(quota, mmap_size);
+ bloom->table = malloc(size);
+ if (bloom->table == NULL) {
+ quota_release(quota, size);
return -1;
}
- memcpy(bloom->table, table, mmap_size);
+ memcpy(bloom->table, table, size);
return 0;
}
diff --git a/test/unit/bloom.cc b/test/unit/bloom.cc
index 5e387f7f..e78ab73e 100644
--- a/test/unit/bloom.cc
+++ b/test/unit/bloom.cc
@@ -7,7 +7,7 @@ using namespace std;
uint32_t h(uint32_t i)
{
- return i;
+ return i * 2654435761;
}
void
@@ -44,8 +44,7 @@ simple_test()
bloom_destroy(&bloom, &q);
}
double fp_rate = (double)false_positive / tests;
- double excess = fp_rate / p;
- if (fp_rate > p)
+ if (fp_rate > p + 0.001)
fp_rate_too_big++;
}
cout << "error_count = " << error_count << endl;
@@ -95,7 +94,7 @@ store_load_test()
}
double fp_rate = (double)false_positive / tests;
double excess = fp_rate / p;
- if (fp_rate > p)
+ if (fp_rate > p + 0.001)
fp_rate_too_big++;
}
cout << "error_count = " << error_count << endl;
diff --git a/test/unit/bloom.result b/test/unit/bloom.result
index a7d5a5bd..39177f51 100644
--- a/test/unit/bloom.result
+++ b/test/unit/bloom.result
@@ -9,10 +9,10 @@ fp_rate_too_big = 0
memory after destruction = 0
*** spectrum_test ***
-bloom table size = 128
+bloom table size = 79
error_count = 0
fpr_rate_is_good = 1
-bloom table size = 128
+bloom table size = 106
error_count = 0
fpr_rate_is_good = 1
memory after destruction = 0
diff --git a/test/vinyl/info.result b/test/vinyl/info.result
index 02123600..4ca0f7f3 100644
--- a/test/vinyl/info.result
+++ b/test/vinyl/info.result
@@ -266,7 +266,7 @@ stat_diff(istat(), st)
bytes: 26049
index_size: 294
rows: 25
- bloom_size: 4096
+ bloom_size: 64
pages: 7
bytes: 26049
bytes_compressed: <bytes_compressed>
@@ -982,7 +982,7 @@ istat()
bytes: 0
pages: 25
bytes_compressed: <bytes_compressed>
- bloom_size: 8192
+ bloom_size: 128
txw:
bytes: 0
rows: 0
@@ -1120,8 +1120,8 @@ i1:len(), i2:len()
...
i1:bsize(), i2:bsize()
---
-- 4390
-- 4946
+- 358
+- 914
...
s:bsize() == st1.disk.bytes
---
@@ -1166,8 +1166,8 @@ i1:len(), i2:len()
...
i1:bsize(), i2:bsize()
---
-- 53542
-- 54098
+- 49510
+- 50066
...
s:bsize() == st1.memory.bytes + st1.disk.bytes
---
@@ -1216,8 +1216,8 @@ i1:len(), i2:len()
...
i1:bsize(), i2:bsize()
---
-- 4390
-- 4946
+- 358
+- 914
...
s:bsize() == st1.disk.bytes
---
--
2.11.0
More information about the Tarantool-patches
mailing list