From: Vladimir Davydov <vdavydov.dev@gmail.com>
To: kostja@tarantool.org
Cc: tarantool-patches@freelists.org
Subject: [PATCH v2 1/5] bloom: use malloc for bitmap allocations
Date: Wed, 28 Mar 2018 22:04:57 +0300 [thread overview]
Message-ID: <d0b4a2082492d1c9d08034be395cc97a21ac83b4.1522262496.git.vdavydov.dev@gmail.com> (raw)
In-Reply-To: <cover.1522262496.git.vdavydov.dev@gmail.com>
In-Reply-To: <cover.1522262496.git.vdavydov.dev@gmail.com>
There's absolutely no point in using mmap() instead of malloc() for
bitmap allocation - malloc() will fallback on mmap() anyway provided
the allocation is large enough.
Note about the unit test: since we don't round the bloom filter size up
to a multiple of page size anymore, we have to use a more sophisticated
hash function for the test to pass.
---
src/lib/salad/bloom.c | 62 ++++++++++++++++++++------------------------------
test/unit/bloom.cc | 7 +++---
test/unit/bloom.result | 4 ++--
test/vinyl/info.result | 16 ++++++-------
4 files changed, 38 insertions(+), 51 deletions(-)
diff --git a/src/lib/salad/bloom.c b/src/lib/salad/bloom.c
index 642024d0..a33d2177 100644
--- a/src/lib/salad/bloom.c
+++ b/src/lib/salad/bloom.c
@@ -30,8 +30,10 @@
*/
#include "bloom.h"
-#include <unistd.h>
-#include <sys/mman.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
#include <limits.h>
#include <math.h>
#include <assert.h>
@@ -42,41 +44,30 @@ bloom_create(struct bloom *bloom, uint32_t number_of_values,
double false_positive_rate, struct quota *quota)
{
/* Optimal hash_count and bit count calculation */
- bloom->hash_count = (uint32_t)
- (log(false_positive_rate) / log(0.5) + 0.99);
- /* Number of bits */
- uint64_t m = (uint64_t)
- (number_of_values * bloom->hash_count / log(2) + 0.5);
- /* mmap page size */
- uint64_t page_size = sysconf(_SC_PAGE_SIZE);
- /* Number of bits in one page */
- uint64_t b = page_size * CHAR_BIT;
- /* number of pages, round up */
- uint64_t p = (uint32_t)((m + b - 1) / b);
- /* bit array size in bytes */
- size_t mmap_size = p * page_size;
- bloom->table_size = p * page_size / sizeof(struct bloom_block);
- if (quota_use(quota, mmap_size) < 0) {
- bloom->table = NULL;
+ uint16_t hash_count = ceil(log(false_positive_rate) / log(0.5));
+ uint64_t bit_count = ceil(number_of_values * hash_count / log(2));
+ uint32_t block_bits = CHAR_BIT * sizeof(struct bloom_block);
+ uint32_t block_count = (bit_count + block_bits - 1) / block_bits;
+
+ if (quota_use(quota, block_count * sizeof(*bloom->table)) < 0)
return -1;
- }
- bloom->table = (struct bloom_block *)
- mmap(NULL, mmap_size, PROT_READ | PROT_WRITE,
- MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
- if (bloom->table == MAP_FAILED) {
- bloom->table = NULL;
- quota_release(quota, mmap_size);
+
+ bloom->table = calloc(block_count, sizeof(*bloom->table));
+ if (bloom->table == NULL) {
+ quota_release(quota, block_count * sizeof(*bloom->table));
return -1;
}
+
+ bloom->table_size = block_count;
+ bloom->hash_count = hash_count;
return 0;
}
void
bloom_destroy(struct bloom *bloom, struct quota *quota)
{
- size_t mmap_size = bloom->table_size * sizeof(struct bloom_block);
- munmap(bloom->table, mmap_size);
- quota_release(quota, mmap_size);
+ quota_release(quota, bloom->table_size * sizeof(*bloom->table));
+ free(bloom->table);
}
size_t
@@ -96,20 +87,17 @@ bloom_store(const struct bloom *bloom, char *table)
int
bloom_load_table(struct bloom *bloom, const char *table, struct quota *quota)
{
- size_t mmap_size = bloom->table_size * sizeof(struct bloom_block);
- if (quota_use(quota, mmap_size) < 0) {
+ size_t size = bloom->table_size * sizeof(struct bloom_block);
+ if (quota_use(quota, size) < 0) {
bloom->table = NULL;
return -1;
}
- bloom->table = (struct bloom_block *)
- mmap(NULL, mmap_size, PROT_READ | PROT_WRITE,
- MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
- if (bloom->table == MAP_FAILED) {
- bloom->table = NULL;
- quota_release(quota, mmap_size);
+ bloom->table = malloc(size);
+ if (bloom->table == NULL) {
+ quota_release(quota, size);
return -1;
}
- memcpy(bloom->table, table, mmap_size);
+ memcpy(bloom->table, table, size);
return 0;
}
diff --git a/test/unit/bloom.cc b/test/unit/bloom.cc
index 5e387f7f..e78ab73e 100644
--- a/test/unit/bloom.cc
+++ b/test/unit/bloom.cc
@@ -7,7 +7,7 @@ using namespace std;
uint32_t h(uint32_t i)
{
- return i;
+ return i * 2654435761;
}
void
@@ -44,8 +44,7 @@ simple_test()
bloom_destroy(&bloom, &q);
}
double fp_rate = (double)false_positive / tests;
- double excess = fp_rate / p;
- if (fp_rate > p)
+ if (fp_rate > p + 0.001)
fp_rate_too_big++;
}
cout << "error_count = " << error_count << endl;
@@ -95,7 +94,7 @@ store_load_test()
}
double fp_rate = (double)false_positive / tests;
double excess = fp_rate / p;
- if (fp_rate > p)
+ if (fp_rate > p + 0.001)
fp_rate_too_big++;
}
cout << "error_count = " << error_count << endl;
diff --git a/test/unit/bloom.result b/test/unit/bloom.result
index a7d5a5bd..39177f51 100644
--- a/test/unit/bloom.result
+++ b/test/unit/bloom.result
@@ -9,10 +9,10 @@ fp_rate_too_big = 0
memory after destruction = 0
*** spectrum_test ***
-bloom table size = 128
+bloom table size = 79
error_count = 0
fpr_rate_is_good = 1
-bloom table size = 128
+bloom table size = 106
error_count = 0
fpr_rate_is_good = 1
memory after destruction = 0
diff --git a/test/vinyl/info.result b/test/vinyl/info.result
index 02123600..4ca0f7f3 100644
--- a/test/vinyl/info.result
+++ b/test/vinyl/info.result
@@ -266,7 +266,7 @@ stat_diff(istat(), st)
bytes: 26049
index_size: 294
rows: 25
- bloom_size: 4096
+ bloom_size: 64
pages: 7
bytes: 26049
bytes_compressed: <bytes_compressed>
@@ -982,7 +982,7 @@ istat()
bytes: 0
pages: 25
bytes_compressed: <bytes_compressed>
- bloom_size: 8192
+ bloom_size: 128
txw:
bytes: 0
rows: 0
@@ -1120,8 +1120,8 @@ i1:len(), i2:len()
...
i1:bsize(), i2:bsize()
---
-- 4390
-- 4946
+- 358
+- 914
...
s:bsize() == st1.disk.bytes
---
@@ -1166,8 +1166,8 @@ i1:len(), i2:len()
...
i1:bsize(), i2:bsize()
---
-- 53542
-- 54098
+- 49510
+- 50066
...
s:bsize() == st1.memory.bytes + st1.disk.bytes
---
@@ -1216,8 +1216,8 @@ i1:len(), i2:len()
...
i1:bsize(), i2:bsize()
---
-- 4390
-- 4946
+- 358
+- 914
...
s:bsize() == st1.disk.bytes
---
--
2.11.0
next prev parent reply other threads:[~2018-03-28 19:04 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-03-28 19:04 [PATCH v2 0/5] vinyl: multi-part bloom filter Vladimir Davydov
2018-03-28 19:04 ` Vladimir Davydov [this message]
2018-03-28 19:04 ` [PATCH v2 2/5] bloom: rename bloom_possible_has to bloom_maybe_has Vladimir Davydov
2018-03-28 19:04 ` [PATCH v2 3/5] vinyl: introduce bloom filters for partial key lookups Vladimir Davydov
2018-03-28 19:05 ` [PATCH v2 4/5] bloom: optimize tuple bloom filter size Vladimir Davydov
2018-03-28 19:05 ` [PATCH v2 5/5] bloom: drop spectrum Vladimir Davydov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=d0b4a2082492d1c9d08034be395cc97a21ac83b4.1522262496.git.vdavydov.dev@gmail.com \
--to=vdavydov.dev@gmail.com \
--cc=kostja@tarantool.org \
--cc=tarantool-patches@freelists.org \
--subject='Re: [PATCH v2 1/5] bloom: use malloc for bitmap allocations' \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox