[PATCH 1/4] erofs-utils: switch to effective unaligned access

Gao Xiang hsiangkao at linux.alibaba.com
Mon Jul 10 04:25:08 AEST 2023


In order to prepare for LZ77 matchfinder.  Note that erofs_memcmp2()
is still not quite effective.

Signed-off-by: Gao Xiang <hsiangkao at linux.alibaba.com>
---
 include/erofs/defs.h     | 24 ++++++++++++++++++++++--
 include/erofs/internal.h |  2 --
 lib/dedupe.c             | 23 ++++++++++++++++++-----
 3 files changed, 40 insertions(+), 9 deletions(-)

diff --git a/include/erofs/defs.h b/include/erofs/defs.h
index e5aa23c..44af557 100644
--- a/include/erofs/defs.h
+++ b/include/erofs/defs.h
@@ -179,9 +179,29 @@ typedef int64_t         s64;
 #define __maybe_unused      __attribute__((__unused__))
 #endif
 
-static inline u32 get_unaligned_le32(const u8 *p)
+#define __packed __attribute__((__packed__))
+
+#define __get_unaligned_t(type, ptr) ({						\
+	const struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr);	\
+	__pptr->x;								\
+})
+
+#define __put_unaligned_t(type, val, ptr) do {					\
+	struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr);		\
+	__pptr->x = (val);							\
+} while (0)
+
+#define get_unaligned(ptr)	__get_unaligned_t(typeof(*(ptr)), (ptr))
+#define put_unaligned(val, ptr) __put_unaligned_t(typeof(*(ptr)), (val), (ptr))
+
+static inline u32 get_unaligned_le32(const void *p)
+{
+	return le32_to_cpu(__get_unaligned_t(__le32, p));
+}
+
+static inline void put_unaligned_le32(u32 val, void *p)
 {
-	return p[0] | p[1] << 8 | p[2] << 16 | p[3] << 24;
+	__put_unaligned_t(__le32, cpu_to_le32(val), p);
 }
 
 /**
diff --git a/include/erofs/internal.h b/include/erofs/internal.h
index ab964d4..aad2115 100644
--- a/include/erofs/internal.h
+++ b/include/erofs/internal.h
@@ -17,8 +17,6 @@ extern "C"
 
 typedef unsigned short umode_t;
 
-#define __packed __attribute__((__packed__))
-
 #include "erofs_fs.h"
 #include <fcntl.h>
 #include <sys/types.h> /* for off_t definition */
diff --git a/lib/dedupe.c b/lib/dedupe.c
index 0a69b8f..17da452 100644
--- a/lib/dedupe.c
+++ b/lib/dedupe.c
@@ -11,12 +11,14 @@
 unsigned long erofs_memcmp2(const u8 *s1, const u8 *s2,
 			    unsigned long sz)
 {
+	const unsigned long *a1, *a2;
 	unsigned long n = sz;
 
-	if (sz >= sizeof(long) && ((long)s1 & (sizeof(long) - 1)) ==
-			((long)s2 & (sizeof(long) - 1))) {
-		const unsigned long *a1, *a2;
+	if (sz < sizeof(long))
+		goto out_bytes;
 
+	if (((long)s1 & (sizeof(long) - 1)) ==
+			((long)s2 & (sizeof(long) - 1))) {
 		while ((long)s1 & (sizeof(long) - 1)) {
 			if (*s1 != *s2)
 				break;
@@ -34,9 +36,20 @@ unsigned long erofs_memcmp2(const u8 *s1, const u8 *s2,
 			++a2;
 			sz -= sizeof(long);
 		}
-		s1 = (const u8 *)a1;
-		s2 = (const u8 *)a2;
+	} else {
+		a1 = (const unsigned long *)s1;
+		a2 = (const unsigned long *)s2;
+		do {
+			if (get_unaligned(a1) != get_unaligned(a2))
+				break;
+			++a1;
+			++a2;
+			sz -= sizeof(long);
+		} while (sz >= sizeof(long));
 	}
+	s1 = (const u8 *)a1;
+	s2 = (const u8 *)a2;
+out_bytes:
 	while (sz) {
 		if (*s1 != *s2)
 			break;
-- 
2.24.4



More information about the Linux-erofs mailing list