[Tarantool-patches] [PATCH v2 04/10] crc32: align memory access

Vladislav Shpilevoy v.shpilevoy at tarantool.org
Thu May 28 02:32:23 MSK 2020


There is some assembly working with a byte array like with an
array of unsigned long values. That is incorrect, because the byte
array may be not aligned by 'unsigned long'.

The patch makes crc32 calculate the hash on the prefix
byte-by-byte, then word-by-word on aligned addresses, and again
byte-by-byte on a tail which is less than word.

Assuming that the word-by-word part is the longest, this should
reduce number of memory/cache loads in ~x2 times. Because in case
of a not aligned word load it was necessary to load 2 words, and
then merge them into one. When addresses are aligned, this is only
1 load.

Part of #4609
---
 src/cpu_feature.c        | 31 ++++++++++++-----
 test/unit/CMakeLists.txt |  3 ++
 test/unit/crc32.c        | 73 ++++++++++++++++++++++++++++++++++++++++
 test/unit/crc32.result   | 11 ++++++
 4 files changed, 110 insertions(+), 8 deletions(-)
 create mode 100644 test/unit/crc32.c
 create mode 100644 test/unit/crc32.result

diff --git a/src/cpu_feature.c b/src/cpu_feature.c
index 98567ccb3..9bf6223de 100644
--- a/src/cpu_feature.c
+++ b/src/cpu_feature.c
@@ -29,6 +29,7 @@
  * SUCH DAMAGE.
  */
 #include "trivia/config.h"
+#include "trivia/util.h"
 #include <sys/types.h>
 #include <errno.h>
 #include <stdlib.h>
@@ -50,7 +51,7 @@
 
 
 static uint32_t
-crc32c_hw_byte(uint32_t crc, unsigned char const *data, unsigned int length)
+crc32c_hw_byte(uint32_t crc, char const *data, unsigned int length)
 {
 	while (length--) {
 		__asm__ __volatile__(
@@ -68,6 +69,26 @@ crc32c_hw_byte(uint32_t crc, unsigned char const *data, unsigned int length)
 uint32_t
 crc32c_hw(uint32_t crc, const char *buf, unsigned int len)
 {
+	const int align = alignof(unsigned long);
+	unsigned long addr = (unsigned long)buf;
+	unsigned int not_aligned_prefix =
+		((addr - 1 + align) & ~(align - 1)) - addr;
+	/*
+	 * Calculate CRC32 for the prefix byte-by-byte so as to
+	 * then use aligned words to calculate the rest. This is
+	 * twice less loads, because every load takes exactly one
+	 * word from memory. Not 2 words, which would need to be
+	 * partially merged then.
+	 * But the main reason is that unaligned loads are just
+	 * unsafe, because this is an undefined behaviour.
+	 */
+	if (not_aligned_prefix < len) {
+		crc = crc32c_hw_byte(crc, buf, not_aligned_prefix);
+		buf += not_aligned_prefix;
+		len -= not_aligned_prefix;
+	} else {
+		return crc32c_hw_byte(crc, buf, len);
+	}
 	unsigned int iquotient = len / SCALE_F;
 	unsigned int iremainder = len % SCALE_F;
 	unsigned long *ptmp = (unsigned long *)buf;
@@ -80,13 +101,7 @@ crc32c_hw(uint32_t crc, const char *buf, unsigned int len)
 		);
 		ptmp++;
 	}
-
-	if (iremainder) {
-		crc = crc32c_hw_byte(crc, (unsigned char const*)ptmp,
-							iremainder);
-	}
-
-	return crc;
+	return crc32c_hw_byte(crc, (const char *)ptmp, iremainder);
 }
 
 bool
diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt
index 3bdfc6bb1..672122118 100644
--- a/test/unit/CMakeLists.txt
+++ b/test/unit/CMakeLists.txt
@@ -138,6 +138,9 @@ target_link_libraries(scramble.test scramble)
 add_executable(guava.test guava.c)
 target_link_libraries(guava.test salad small)
 
+add_executable(crc32.test crc32.c)
+target_link_libraries(crc32.test unit crc32)
+
 add_executable(find_path.test find_path.c
     ${CMAKE_SOURCE_DIR}/src/find_path.c
 )
diff --git a/test/unit/crc32.c b/test/unit/crc32.c
new file mode 100644
index 000000000..7493bc0db
--- /dev/null
+++ b/test/unit/crc32.c
@@ -0,0 +1,73 @@
+/*
+ * Copyright 2010-2020, Tarantool AUTHORS, please see AUTHORS file.
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ *    copyright notice, this list of conditions and the
+ *    following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ *    copyright notice, this list of conditions and the following
+ *    disclaimer in the documentation and/or other materials
+ *    provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * <COPYRIGHT HOLDER> OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#include "unit.h"
+#include "crc32.h"
+
+static void
+test_alignment(void)
+{
+	header();
+	plan(4);
+
+	unsigned long buf[1024];
+	char *str = (char *)buf;
+
+	strcpy(str, "1234567891234567");
+	uint32_t crc = crc32_calc(0, str, strlen(str));
+	is(crc, 3333896965, "aligned crc32 buffer without a tail");
+
+	strcpy(str, "12345678912345678");
+	crc = crc32_calc(0, str, strlen(str));
+	is(crc, 2400039513, "aligned crc32 buffer with a tail");
+
+	crc = crc32_calc(0, str + 2, strlen(str) - 2);
+	is(crc, 984331636, "not aligned crc32 buffer with a tail");
+
+	strcpy(str, "1234");
+	crc = crc32_calc(0, str + 2, strlen(str) - 2);
+	is(crc, 2211472564, "not aligned buffer less than a word");
+
+	check_plan();
+	footer();
+}
+
+int
+main(void)
+{
+	crc32_init();
+
+	header();
+	plan(1);
+	test_alignment();
+	int rc = check_plan();
+	footer();
+	return rc;
+}
diff --git a/test/unit/crc32.result b/test/unit/crc32.result
new file mode 100644
index 000000000..99b62c22a
--- /dev/null
+++ b/test/unit/crc32.result
@@ -0,0 +1,11 @@
+	*** main ***
+1..1
+	*** test_alignment ***
+    1..4
+    ok 1 - aligned crc32 buffer without a tail
+    ok 2 - aligned crc32 buffer with a tail
+    ok 3 - not aligned crc32 buffer with a tail
+    ok 4 - not aligned buffer less than a word
+ok 1 - subtests
+	*** test_alignment: done ***
+	*** main: done ***
-- 
2.21.1 (Apple Git-122.3)



More information about the Tarantool-patches mailing list