Tarantool development patches archive
 help / color / mirror / Atom feed
From: Vladimir Davydov <vdavydov.dev@gmail.com>
To: kostja@tarantool.org
Cc: tarantool-patches@freelists.org
Subject: [PATCH v2 1/5] bloom: use malloc for bitmap allocations
Date: Wed, 28 Mar 2018 22:04:57 +0300	[thread overview]
Message-ID: <d0b4a2082492d1c9d08034be395cc97a21ac83b4.1522262496.git.vdavydov.dev@gmail.com> (raw)
In-Reply-To: <cover.1522262496.git.vdavydov.dev@gmail.com>
In-Reply-To: <cover.1522262496.git.vdavydov.dev@gmail.com>

There's absolutely no point in using mmap() instead of malloc() for
bitmap allocation - malloc() will fallback on mmap() anyway provided
the allocation is large enough.

Note about the unit test: since we don't round the bloom filter size up
to a multiple of page size anymore, we have to use a more sophisticated
hash function for the test to pass.
---
 src/lib/salad/bloom.c  | 62 ++++++++++++++++++++------------------------------
 test/unit/bloom.cc     |  7 +++---
 test/unit/bloom.result |  4 ++--
 test/vinyl/info.result | 16 ++++++-------
 4 files changed, 38 insertions(+), 51 deletions(-)

diff --git a/src/lib/salad/bloom.c b/src/lib/salad/bloom.c
index 642024d0..a33d2177 100644
--- a/src/lib/salad/bloom.c
+++ b/src/lib/salad/bloom.c
@@ -30,8 +30,10 @@
  */
 
 #include "bloom.h"
-#include <unistd.h>
-#include <sys/mman.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
 #include <limits.h>
 #include <math.h>
 #include <assert.h>
@@ -42,41 +44,30 @@ bloom_create(struct bloom *bloom, uint32_t number_of_values,
 	     double false_positive_rate, struct quota *quota)
 {
 	/* Optimal hash_count and bit count calculation */
-	bloom->hash_count = (uint32_t)
-		(log(false_positive_rate) / log(0.5) + 0.99);
-	/* Number of bits */
-	uint64_t m = (uint64_t)
-		(number_of_values * bloom->hash_count / log(2) + 0.5);
-	/* mmap page size */
-	uint64_t page_size = sysconf(_SC_PAGE_SIZE);
-	/* Number of bits in one page */
-	uint64_t b = page_size * CHAR_BIT;
-	/* number of pages, round up */
-	uint64_t p = (uint32_t)((m + b - 1) / b);
-	/* bit array size in bytes */
-	size_t mmap_size = p * page_size;
-	bloom->table_size = p * page_size / sizeof(struct bloom_block);
-	if (quota_use(quota, mmap_size) < 0) {
-		bloom->table = NULL;
+	uint16_t hash_count = ceil(log(false_positive_rate) / log(0.5));
+	uint64_t bit_count = ceil(number_of_values * hash_count / log(2));
+	uint32_t block_bits = CHAR_BIT * sizeof(struct bloom_block);
+	uint32_t block_count = (bit_count + block_bits - 1) / block_bits;
+
+	if (quota_use(quota, block_count * sizeof(*bloom->table)) < 0)
 		return -1;
-	}
-	bloom->table = (struct bloom_block *)
-		mmap(NULL, mmap_size, PROT_READ | PROT_WRITE,
-		     MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-	if (bloom->table == MAP_FAILED) {
-		bloom->table = NULL;
-		quota_release(quota, mmap_size);
+
+	bloom->table = calloc(block_count, sizeof(*bloom->table));
+	if (bloom->table == NULL) {
+		quota_release(quota, block_count * sizeof(*bloom->table));
 		return -1;
 	}
+
+	bloom->table_size = block_count;
+	bloom->hash_count = hash_count;
 	return 0;
 }
 
 void
 bloom_destroy(struct bloom *bloom, struct quota *quota)
 {
-	size_t mmap_size = bloom->table_size * sizeof(struct bloom_block);
-	munmap(bloom->table, mmap_size);
-	quota_release(quota, mmap_size);
+	quota_release(quota, bloom->table_size * sizeof(*bloom->table));
+	free(bloom->table);
 }
 
 size_t
@@ -96,20 +87,17 @@ bloom_store(const struct bloom *bloom, char *table)
 int
 bloom_load_table(struct bloom *bloom, const char *table, struct quota *quota)
 {
-	size_t mmap_size = bloom->table_size * sizeof(struct bloom_block);
-	if (quota_use(quota, mmap_size) < 0) {
+	size_t size = bloom->table_size * sizeof(struct bloom_block);
+	if (quota_use(quota, size) < 0) {
 		bloom->table = NULL;
 		return -1;
 	}
-	bloom->table = (struct bloom_block *)
-		mmap(NULL, mmap_size, PROT_READ | PROT_WRITE,
-		     MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-	if (bloom->table == MAP_FAILED) {
-		bloom->table = NULL;
-		quota_release(quota, mmap_size);
+	bloom->table = malloc(size);
+	if (bloom->table == NULL) {
+		quota_release(quota, size);
 		return -1;
 	}
-	memcpy(bloom->table, table, mmap_size);
+	memcpy(bloom->table, table, size);
 	return 0;
 }
 
diff --git a/test/unit/bloom.cc b/test/unit/bloom.cc
index 5e387f7f..e78ab73e 100644
--- a/test/unit/bloom.cc
+++ b/test/unit/bloom.cc
@@ -7,7 +7,7 @@ using namespace std;
 
 uint32_t h(uint32_t i)
 {
-	return i;
+	return i * 2654435761;
 }
 
 void
@@ -44,8 +44,7 @@ simple_test()
 			bloom_destroy(&bloom, &q);
 		}
 		double fp_rate = (double)false_positive / tests;
-		double excess = fp_rate / p;
-		if (fp_rate > p)
+		if (fp_rate > p + 0.001)
 			fp_rate_too_big++;
 	}
 	cout << "error_count = " << error_count << endl;
@@ -95,7 +94,7 @@ store_load_test()
 		}
 		double fp_rate = (double)false_positive / tests;
 		double excess = fp_rate / p;
-		if (fp_rate > p)
+		if (fp_rate > p + 0.001)
 			fp_rate_too_big++;
 	}
 	cout << "error_count = " << error_count << endl;
diff --git a/test/unit/bloom.result b/test/unit/bloom.result
index a7d5a5bd..39177f51 100644
--- a/test/unit/bloom.result
+++ b/test/unit/bloom.result
@@ -9,10 +9,10 @@ fp_rate_too_big = 0
 memory after destruction = 0
 
 *** spectrum_test ***
-bloom table size = 128
+bloom table size = 79
 error_count = 0
 fpr_rate_is_good = 1
-bloom table size = 128
+bloom table size = 106
 error_count = 0
 fpr_rate_is_good = 1
 memory after destruction = 0
diff --git a/test/vinyl/info.result b/test/vinyl/info.result
index 02123600..4ca0f7f3 100644
--- a/test/vinyl/info.result
+++ b/test/vinyl/info.result
@@ -266,7 +266,7 @@ stat_diff(istat(), st)
         bytes: 26049
     index_size: 294
     rows: 25
-    bloom_size: 4096
+    bloom_size: 64
     pages: 7
     bytes: 26049
     bytes_compressed: <bytes_compressed>
@@ -982,7 +982,7 @@ istat()
         bytes: 0
     pages: 25
     bytes_compressed: <bytes_compressed>
-    bloom_size: 8192
+    bloom_size: 128
   txw:
     bytes: 0
     rows: 0
@@ -1120,8 +1120,8 @@ i1:len(), i2:len()
 ...
 i1:bsize(), i2:bsize()
 ---
-- 4390
-- 4946
+- 358
+- 914
 ...
 s:bsize() == st1.disk.bytes
 ---
@@ -1166,8 +1166,8 @@ i1:len(), i2:len()
 ...
 i1:bsize(), i2:bsize()
 ---
-- 53542
-- 54098
+- 49510
+- 50066
 ...
 s:bsize() == st1.memory.bytes + st1.disk.bytes
 ---
@@ -1216,8 +1216,8 @@ i1:len(), i2:len()
 ...
 i1:bsize(), i2:bsize()
 ---
-- 4390
-- 4946
+- 358
+- 914
 ...
 s:bsize() == st1.disk.bytes
 ---
-- 
2.11.0

  reply	other threads:[~2018-03-28 19:04 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-03-28 19:04 [PATCH v2 0/5] vinyl: multi-part bloom filter Vladimir Davydov
2018-03-28 19:04 ` Vladimir Davydov [this message]
2018-03-28 19:04 ` [PATCH v2 2/5] bloom: rename bloom_possible_has to bloom_maybe_has Vladimir Davydov
2018-03-28 19:04 ` [PATCH v2 3/5] vinyl: introduce bloom filters for partial key lookups Vladimir Davydov
2018-03-28 19:05 ` [PATCH v2 4/5] bloom: optimize tuple bloom filter size Vladimir Davydov
2018-03-28 19:05 ` [PATCH v2 5/5] bloom: drop spectrum Vladimir Davydov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=d0b4a2082492d1c9d08034be395cc97a21ac83b4.1522262496.git.vdavydov.dev@gmail.com \
    --to=vdavydov.dev@gmail.com \
    --cc=kostja@tarantool.org \
    --cc=tarantool-patches@freelists.org \
    --subject='Re: [PATCH v2 1/5] bloom: use malloc for bitmap allocations' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox