[PATCH 1/3] erofs-utils: lib: use xxh64() for faster filtering first for dedupe
Gao Xiang
hsiangkao at linux.alibaba.com
Sat Sep 23 04:30:53 AEST 2023
Let's check if xxh64 equals when rolling back on global compressed
deduplication.
As a result, it could decrease time by 26.4% (6m57.990s -> 5m7.755s)
on a dataset with "-Ededupe -C8192".
Signed-off-by: Gao Xiang <hsiangkao at linux.alibaba.com>
---
include/erofs/defs.h | 5 +++
include/erofs/xxhash.h | 27 -------------
lib/Makefile.am | 4 +-
lib/dedupe.c | 9 +++++
lib/xattr.c | 2 +-
lib/xxhash.c | 92 +++++++++++++++++++++++++++++++++++++++++-
lib/xxhash.h | 40 ++++++++++++++++++
7 files changed, 147 insertions(+), 32 deletions(-)
delete mode 100644 include/erofs/xxhash.h
create mode 100644 lib/xxhash.h
diff --git a/include/erofs/defs.h b/include/erofs/defs.h
index fefa7e7..e7384a1 100644
--- a/include/erofs/defs.h
+++ b/include/erofs/defs.h
@@ -204,6 +204,11 @@ static inline void put_unaligned_le32(u32 val, void *p)
__put_unaligned_t(__le32, cpu_to_le32(val), p);
}
+static inline u32 get_unaligned_le64(const void *p)
+{
+ return le64_to_cpu(__get_unaligned_t(__le64, p));
+}
+
/**
* ilog2 - log of base 2 of 32-bit or a 64-bit unsigned value
* @n - parameter
diff --git a/include/erofs/xxhash.h b/include/erofs/xxhash.h
deleted file mode 100644
index 5441209..0000000
--- a/include/erofs/xxhash.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/* SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0+ */
-#ifndef __EROFS_XXHASH_H
-#define __EROFS_XXHASH_H
-
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-
-#include <stdint.h>
-
-/**
- * xxh32() - calculate the 32-bit hash of the input with a given seed.
- *
- * @input: The data to hash.
- * @length: The length of the data to hash.
- * @seed: The seed can be used to alter the result predictably.
- *
- * Return: The 32-bit hash of the data.
- */
-uint32_t xxh32(const void *input, size_t length, uint32_t seed);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
diff --git a/lib/Makefile.am b/lib/Makefile.am
index 483d410..470e095 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -25,9 +25,9 @@ noinst_HEADERS = $(top_srcdir)/include/erofs_fs.h \
$(top_srcdir)/include/erofs/xattr.h \
$(top_srcdir)/include/erofs/compress_hints.h \
$(top_srcdir)/include/erofs/fragments.h \
- $(top_srcdir)/include/erofs/xxhash.h \
$(top_srcdir)/include/erofs/rebuild.h \
- $(top_srcdir)/lib/liberofs_private.h
+ $(top_srcdir)/lib/liberofs_private.h \
+ $(top_srcdir)/lib/xxhash.h
noinst_HEADERS += compressor.h
liberofs_la_SOURCES = config.c io.c cache.c super.c inode.c xattr.c exclude.c \
diff --git a/lib/dedupe.c b/lib/dedupe.c
index 17da452..2f86f8d 100644
--- a/lib/dedupe.c
+++ b/lib/dedupe.c
@@ -6,6 +6,7 @@
#include "erofs/print.h"
#include "rb_tree.h"
#include "rolling_hash.h"
+#include "xxhash.h"
#include "sha256.h"
unsigned long erofs_memcmp2(const u8 *s1, const u8 *s2,
@@ -66,6 +67,7 @@ static struct rb_tree *dedupe_tree, *dedupe_subtree;
struct z_erofs_dedupe_item {
long long hash;
u8 prefix_sha256[32];
+ u64 prefix_xxh64;
erofs_blk_t compressed_blkaddr;
unsigned int compressed_blks;
@@ -102,6 +104,7 @@ int z_erofs_dedupe_match(struct z_erofs_dedupe_ctx *ctx)
for (; cur >= ctx->start; --cur) {
struct z_erofs_dedupe_item *e;
unsigned int extra;
+ u64 xxh64_csum;
u8 sha256[32];
if (initial) {
@@ -120,6 +123,10 @@ int z_erofs_dedupe_match(struct z_erofs_dedupe_ctx *ctx)
continue;
}
+ xxh64_csum = xxh64(cur, window_size, 0);
+ if (e->prefix_xxh64 != xxh64_csum)
+ continue;
+
erofs_sha256(cur, window_size, sha256);
if (memcmp(sha256, e->prefix_sha256, sizeof(sha256)))
continue;
@@ -155,6 +162,8 @@ int z_erofs_dedupe_insert(struct z_erofs_inmem_extent *e,
di->original_length = e->length;
erofs_sha256(original_data, window_size, di->prefix_sha256);
+
+ di->prefix_xxh64 = xxh64(original_data, window_size, 0);
di->hash = erofs_rolling_hash_init(original_data,
window_size, true);
memcpy(di->extra_data, original_data + window_size,
diff --git a/lib/xattr.c b/lib/xattr.c
index 6c8ebf4..77c8c3a 100644
--- a/lib/xattr.c
+++ b/lib/xattr.c
@@ -18,7 +18,7 @@
#include "erofs/cache.h"
#include "erofs/io.h"
#include "erofs/fragments.h"
-#include "erofs/xxhash.h"
+#include "xxhash.h"
#include "liberofs_private.h"
#ifndef XATTR_SYSTEM_PREFIX
diff --git a/lib/xxhash.c b/lib/xxhash.c
index 7289c77..2768375 100644
--- a/lib/xxhash.c
+++ b/lib/xxhash.c
@@ -43,14 +43,14 @@
* - xxHash homepage: https://cyan4973.github.io/xxHash/
* - xxHash source repository: https://github.com/Cyan4973/xxHash
*/
-
#include "erofs/defs.h"
-#include "erofs/xxhash.h"
+#include "xxhash.h"
/*-*************************************
* Macros
**************************************/
#define xxh_rotl32(x, r) ((x << r) | (x >> (32 - r)))
+#define xxh_rotl64(x, r) ((x << r) | (x >> (64 - r)))
/*-*************************************
* Constants
@@ -61,6 +61,12 @@ static const uint32_t PRIME32_3 = 3266489917U;
static const uint32_t PRIME32_4 = 668265263U;
static const uint32_t PRIME32_5 = 374761393U;
+static const uint64_t PRIME64_1 = 11400714785074694791ULL;
+static const uint64_t PRIME64_2 = 14029467366897019727ULL;
+static const uint64_t PRIME64_3 = 1609587929392839161ULL;
+static const uint64_t PRIME64_4 = 9650029242287828579ULL;
+static const uint64_t PRIME64_5 = 2870177450012600261ULL;
+
/*-***************************
* Simple Hash Functions
****************************/
@@ -124,3 +130,85 @@ uint32_t xxh32(const void *input, const size_t len, const uint32_t seed)
return h32;
}
+
+static uint64_t xxh64_round(uint64_t acc, const uint64_t input)
+{
+ acc += input * PRIME64_2;
+ acc = xxh_rotl64(acc, 31);
+ acc *= PRIME64_1;
+ return acc;
+}
+
+static uint64_t xxh64_merge_round(uint64_t acc, uint64_t val)
+{
+ val = xxh64_round(0, val);
+ acc ^= val;
+ acc = acc * PRIME64_1 + PRIME64_4;
+ return acc;
+}
+
+uint64_t xxh64(const void *input, const size_t len, const uint64_t seed)
+{
+ const uint8_t *p = (const uint8_t *)input;
+ const uint8_t *const b_end = p + len;
+ uint64_t h64;
+
+ if (len >= 32) {
+ const uint8_t *const limit = b_end - 32;
+ uint64_t v1 = seed + PRIME64_1 + PRIME64_2;
+ uint64_t v2 = seed + PRIME64_2;
+ uint64_t v3 = seed + 0;
+ uint64_t v4 = seed - PRIME64_1;
+
+ do {
+ v1 = xxh64_round(v1, get_unaligned_le64(p));
+ p += 8;
+ v2 = xxh64_round(v2, get_unaligned_le64(p));
+ p += 8;
+ v3 = xxh64_round(v3, get_unaligned_le64(p));
+ p += 8;
+ v4 = xxh64_round(v4, get_unaligned_le64(p));
+ p += 8;
+ } while (p <= limit);
+
+ h64 = xxh_rotl64(v1, 1) + xxh_rotl64(v2, 7) +
+ xxh_rotl64(v3, 12) + xxh_rotl64(v4, 18);
+ h64 = xxh64_merge_round(h64, v1);
+ h64 = xxh64_merge_round(h64, v2);
+ h64 = xxh64_merge_round(h64, v3);
+ h64 = xxh64_merge_round(h64, v4);
+
+ } else {
+ h64 = seed + PRIME64_5;
+ }
+
+ h64 += (uint64_t)len;
+
+ while (p + 8 <= b_end) {
+ const uint64_t k1 = xxh64_round(0, get_unaligned_le64(p));
+
+ h64 ^= k1;
+ h64 = xxh_rotl64(h64, 27) * PRIME64_1 + PRIME64_4;
+ p += 8;
+ }
+
+ if (p + 4 <= b_end) {
+ h64 ^= (uint64_t)(get_unaligned_le32(p)) * PRIME64_1;
+ h64 = xxh_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
+ p += 4;
+ }
+
+ while (p < b_end) {
+ h64 ^= (*p) * PRIME64_5;
+ h64 = xxh_rotl64(h64, 11) * PRIME64_1;
+ p++;
+ }
+
+ h64 ^= h64 >> 33;
+ h64 *= PRIME64_2;
+ h64 ^= h64 >> 29;
+ h64 *= PRIME64_3;
+ h64 ^= h64 >> 32;
+
+ return h64;
+}
diff --git a/lib/xxhash.h b/lib/xxhash.h
new file mode 100644
index 0000000..723c3a5
--- /dev/null
+++ b/lib/xxhash.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0+ */
+#ifndef __EROFS_LIB_XXHASH_H
+#define __EROFS_LIB_XXHASH_H
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#include <stdint.h>
+
+/*
+ * xxh32() - calculate the 32-bit hash of the input with a given seed.
+ *
+ * @input: The data to hash.
+ * @length: The length of the data to hash.
+ * @seed: The seed can be used to alter the result predictably.
+ *
+ * Return: The 32-bit hash of the data.
+ */
+uint32_t xxh32(const void *input, size_t length, uint32_t seed);
+
+/*
+ * xxh64() - calculate the 64-bit hash of the input with a given seed.
+ *
+ * @input: The data to hash.
+ * @length: The length of the data to hash.
+ * @seed: The seed can be used to alter the result predictably.
+ *
+ * This function runs 2x faster on 64-bit systems, but slower on 32-bit systems.
+ *
+ * Return: The 64-bit hash of the data.
+ */
+uint64_t xxh64(const void *input, const size_t len, const uint64_t seed);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--
2.39.3
More information about the Linux-erofs
mailing list