[PATCH 1/2] erofs-utils: tar: support gzip index generation

Gao Xiang hsiangkao at linux.alibaba.com
Wed Sep 17 18:16:52 AEST 2025


Let's support AWS SOCI-compatible zinfo version v2 generation.

Since an OCI image layer cannot be randomly accessed, the new gzip
index can be used for OCI gzip random access.

Example:
 $ mkfs.erofs --tar=i --gzinfo=foo.zinfo foo.tarmeta.erofs foo.tgz

Signed-off-by: Gao Xiang <hsiangkao at linux.alibaba.com>
---
 include/erofs/tar.h  |   7 +-
 lib/Makefile.am      |   2 +
 lib/gzran.c          | 216 +++++++++++++++++++++++++++++++++++++++++++
 lib/liberofs_gzran.h |  21 +++++
 lib/tar.c            |  20 ++++
 mkfs/main.c          |  23 +++++
 6 files changed, 288 insertions(+), 1 deletion(-)
 create mode 100644 lib/gzran.c
 create mode 100644 lib/liberofs_gzran.h

diff --git a/include/erofs/tar.h b/include/erofs/tar.h
index 3bd4b15..cdaef31 100644
--- a/include/erofs/tar.h
+++ b/include/erofs/tar.h
@@ -24,12 +24,17 @@ struct erofs_pax_header {
 #define EROFS_IOS_DECODER_NONE		0
 #define EROFS_IOS_DECODER_GZIP		1
 #define EROFS_IOS_DECODER_LIBLZMA	2
+#define EROFS_IOS_DECODER_GZRAN		3
 
 struct erofs_iostream_liblzma;
+struct erofs_gzran_builder;
 
 struct erofs_iostream {
 	union {
-		struct erofs_vfile vf;
+		struct {
+			struct erofs_vfile vf;
+			struct erofs_gzran_builder *gb;
+		};
 		void *handler;
 #ifdef HAVE_LIBLZMA
 		struct erofs_iostream_liblzma *lzma;
diff --git a/lib/Makefile.am b/lib/Makefile.am
index daf937c..72aa0e8 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -31,6 +31,7 @@ noinst_HEADERS = $(top_srcdir)/include/erofs_fs.h \
       $(top_srcdir)/lib/liberofs_cache.h \
       $(top_srcdir)/lib/liberofs_private.h \
       $(top_srcdir)/lib/liberofs_xxhash.h \
+      $(top_srcdir)/lib/liberofs_gzran.h \
       $(top_srcdir)/lib/liberofs_metabox.h \
       $(top_srcdir)/lib/liberofs_nbd.h \
       $(top_srcdir)/lib/liberofs_s3.h
@@ -86,3 +87,4 @@ endif
 liberofs_la_SOURCES += remotes/oci.c
 liberofs_la_CFLAGS += ${libcurl_CFLAGS} ${json_c_CFLAGS}
 liberofs_la_LDFLAGS += ${libcurl_LIBS} ${json_c_LIBS}
+liberofs_la_SOURCES += gzran.c
diff --git a/lib/gzran.c b/lib/gzran.c
new file mode 100644
index 0000000..ce2759b
--- /dev/null
+++ b/lib/gzran.c
@@ -0,0 +1,216 @@
+// SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0
+/*
+ * Copyright (C) 2025 Alibaba Cloud
+ */
+#include "erofs/list.h"
+#include "erofs/err.h"
+#include "liberofs_gzran.h"
+#include <stdlib.h>
+#include <zlib.h>
+
+#ifdef HAVE_ZLIB
+struct erofs_gzran_cutpoint {
+	u8	window[EROFS_GZRAN_WINSIZE];	/* preceding 32K of uncompressed data */
+	u64	outpos;			/* corresponding offset in uncompressed data */
+	u64	in_bitpos;		/* bit offset in input file of first full byte */
+};
+
+struct erofs_gzran_cutpoint_item {
+	struct erofs_gzran_cutpoint	cp;
+	struct list_head		list;
+};
+
+struct erofs_gzran_builder {
+	struct list_head items;
+	struct erofs_vfile *vf;
+	z_stream strm;
+	u64 totout, totin;
+	u32 entries;
+	u32 span_size;
+	u8 window[EROFS_GZRAN_WINSIZE];
+	u8 src[1 << 14];
+};
+
+struct erofs_gzran_builder *erofs_gzran_builder_init(struct erofs_vfile *vf,
+						     u32 span_size)
+{
+	struct erofs_gzran_builder *gb;
+	z_stream *strm;
+	int ret;
+
+	gb = malloc(sizeof(*gb));
+	if (!gb)
+		return ERR_PTR(-ENOMEM);
+	strm = &gb->strm;
+	/* initialize inflate */
+	strm->zalloc = Z_NULL;
+	strm->zfree = Z_NULL;
+	strm->opaque = Z_NULL;
+	strm->avail_in = 0;
+	strm->next_in = Z_NULL;
+	ret = inflateInit2(strm, 47);	/* automatic zlib or gzip decoding */
+	if (ret != Z_OK)
+		return ERR_PTR(-EFAULT);
+	gb->vf = vf;
+	gb->span_size = span_size;
+	gb->totout = gb->totin = 0;
+	gb->entries = 0;
+	init_list_head(&gb->items);
+	return gb;
+}
+
+/* return up to 32K of data at once */
+int erofs_gzran_builder_read(struct erofs_gzran_builder *gb, char *window)
+{
+	struct erofs_gzran_cutpoint_item *ci;
+	struct erofs_gzran_cutpoint *cp;
+	z_stream *strm = &gb->strm;
+	struct erofs_vfile *vf = gb->vf;
+	int read, ret;
+	u64 last;
+
+	strm->avail_out = sizeof(gb->window);
+	strm->next_out = gb->window;
+	do {
+
+		if (!strm->avail_in) {
+			read = erofs_io_read(vf, gb->src, sizeof(gb->src));
+			if (read <= 0)
+				return read;
+			strm->avail_in = read;
+			strm->next_in = gb->src;
+		}
+		gb->totin += strm->avail_in;
+		gb->totout += strm->avail_out;
+
+		ret = inflate(strm, Z_BLOCK);	/* return at end of block */
+		gb->totin -= strm->avail_in;
+		gb->totout -= strm->avail_out;
+
+		if (ret == Z_NEED_DICT)
+			ret = Z_DATA_ERROR;
+		if (ret == Z_MEM_ERROR || ret == Z_DATA_ERROR)
+			return -EIO;
+		if (ret == Z_STREAM_END)
+			break;
+
+		ci = list_empty(&gb->items) ? NULL :
+			list_last_entry(&gb->items,
+					struct erofs_gzran_cutpoint_item,
+					list);
+		last = ci ? ci->cp.outpos : 0;
+		if ((strm->data_type & 128) && !(strm->data_type & 64) &&
+		    (gb->totout == 0 || gb->totout - last > gb->span_size)) {
+			ci = malloc(sizeof(*ci));
+			if (!ci)
+				return -ENOMEM;
+			init_list_head(&ci->list);
+			cp = &ci->cp;
+
+			cp->in_bitpos = (gb->totin << 3) | (strm->data_type & 7);
+			cp->outpos = gb->totout;
+			read = sizeof(gb->window) - strm->avail_out;
+			if (strm->avail_out)
+				memcpy(cp->window, gb->window + read, strm->avail_out);
+			if (read)
+				memcpy(cp->window + strm->avail_out, gb->window, read);
+			list_add_tail(&ci->list, &gb->items);
+			gb->entries++;
+		}
+	} while (strm->avail_out);
+
+	read = sizeof(gb->window) - strm->avail_out;
+	memcpy(window, gb->window, read);
+	return read;
+}
+
+struct aws_soci_zinfo_header {
+	__le32 have;
+	__le64 span_size;
+} __packed;
+
+struct aws_soci_zinfo_ckpt {
+	__le64 in;
+	__le64 out;
+	__u8 bits;
+	u8 window[EROFS_GZRAN_WINSIZE];
+} __packed;
+
+/* Generate AWS SOCI-compatible on-disk zinfo version 2 */
+int erofs_gzran_builder_export_zinfo(struct erofs_gzran_builder *gb,
+				     struct erofs_vfile *zinfo_vf)
+{
+	union {
+		struct aws_soci_zinfo_header h;
+		struct aws_soci_zinfo_ckpt c;
+	} u;
+	struct erofs_gzran_cutpoint_item *ci;
+	u64 pos;
+	int ret;
+
+	BUILD_BUG_ON(sizeof(u.h) != 12);
+	u.h = (struct aws_soci_zinfo_header) {
+		.have = cpu_to_le32(gb->entries),
+		.span_size = cpu_to_le64(gb->span_size),
+	};
+	ret = erofs_io_pwrite(zinfo_vf, &u.h, 0, sizeof(u.h));
+	if (ret < 0)
+		return ret;
+	if (ret != sizeof(u.h))
+		return -EIO;
+
+	pos = sizeof(u.h);
+	list_for_each_entry(ci, &gb->items, list) {
+		BUILD_BUG_ON(sizeof(u.c) != 17 + EROFS_GZRAN_WINSIZE);
+		u.c.in = cpu_to_le64(ci->cp.in_bitpos >> 3);
+		u.c.out = cpu_to_le64(ci->cp.outpos);
+		u.c.bits = cpu_to_le64(ci->cp.in_bitpos & 7);
+		memcpy(u.c.window, ci->cp.window, EROFS_GZRAN_WINSIZE);
+
+		ret = erofs_io_pwrite(zinfo_vf, &u.c, pos, sizeof(u.c));
+		if (ret < 0)
+			return ret;
+		if (ret != sizeof(u.c))
+			return -EIO;
+		pos += sizeof(u.c);
+	}
+	return 0;
+}
+
+int erofs_gzran_builder_final(struct erofs_gzran_builder *gb)
+{
+	struct erofs_gzran_cutpoint_item *ci, *n;
+	int ret;
+
+	ret = inflateEnd(&gb->strm);
+	if (ret != Z_OK)
+		return -EFAULT;
+	list_for_each_entry_safe(ci, n, &gb->items, list) {
+		list_del(&ci->list);
+		free(ci);
+		--gb->entries;
+	}
+	DBG_BUGON(gb->entries);
+	free(gb);
+	return 0;
+}
+#else
+struct erofs_gzran_builder *erofs_gzran_builder_init(struct erofs_vfile *vf,
+						     u32 span_size)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+int erofs_gzran_builder_read(struct erofs_gzran_builder *gb, char *window)
+{
+	return 0;
+}
+int erofs_gzran_builder_export_zinfo(struct erofs_gzran_builder *gb,
+				     struct erofs_vfile *zinfo_vf)
+{
+	return -EOPNOTSUPP;
+}
+int erofs_gzran_builder_final(struct erofs_gzran_builder *gb)
+{
+	return 0;
+}
+#endif
diff --git a/lib/liberofs_gzran.h b/lib/liberofs_gzran.h
new file mode 100644
index 0000000..4764506
--- /dev/null
+++ b/lib/liberofs_gzran.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0 */
+/*
+ * Copyright (C) 2025 Alibaba Cloud
+ */
+#ifndef __EROFS_LIB_LIBEROFS_GZRAN_H
+#define __EROFS_LIB_LIBEROFS_GZRAN_H
+
+#include "erofs/io.h"
+
+#define EROFS_GZRAN_WINSIZE	32768
+
+struct erofs_gzran_builder;
+
+struct erofs_gzran_builder *erofs_gzran_builder_init(struct erofs_vfile *vf,
+						     u32 span_size);
+int erofs_gzran_builder_read(struct erofs_gzran_builder *gb, char *window);
+int erofs_gzran_builder_export_zinfo(struct erofs_gzran_builder *gb,
+				     struct erofs_vfile *zinfo_vf);
+int erofs_gzran_builder_final(struct erofs_gzran_builder *gb);
+
+#endif
diff --git a/lib/tar.c b/lib/tar.c
index 687da6c..8d068cb 100644
--- a/lib/tar.c
+++ b/lib/tar.c
@@ -17,6 +17,7 @@
 #endif
 #include "liberofs_base64.h"
 #include "liberofs_cache.h"
+#include "liberofs_gzran.h"
 
 /* This file is a tape/volume header.  Ignore it on extraction.  */
 #define GNUTYPE_VOLHDR 'V'
@@ -65,6 +66,9 @@ void erofs_iostream_close(struct erofs_iostream *ios)
 		free(ios->lzma);
 #endif
 		return;
+	} else if (ios->decoder == EROFS_IOS_DECODER_GZRAN) {
+		erofs_gzran_builder_final(ios->gb);
+		return;
 	}
 	erofs_io_close(&ios->vf);
 }
@@ -105,6 +109,14 @@ int erofs_iostream_open(struct erofs_iostream *ios, int fd, int decoder)
 #else
 		return -EOPNOTSUPP;
 #endif
+	} else if (decoder == EROFS_IOS_DECODER_GZRAN) {
+		ios->vf.fd = fd;
+		ios->feof = false;
+		ios->sz = 0;
+		ios->bufsize = EROFS_GZRAN_WINSIZE * 2;
+		ios->gb = erofs_gzran_builder_init(&ios->vf, 4194304);
+		if (IS_ERR(ios->gb))
+			return PTR_ERR(ios->gb);
 	} else {
 		ios->vf.fd = fd;
 		fsz = lseek(fd, 0, SEEK_END);
@@ -204,6 +216,14 @@ int erofs_iostream_read(struct erofs_iostream *ios, void **buf, u64 bytes)
 #else
 			return -EOPNOTSUPP;
 #endif
+		} else if (ios->decoder == EROFS_IOS_DECODER_GZRAN) {
+			ret = erofs_gzran_builder_read(ios->gb, ios->buffer + rabytes);
+			if (ret < 0)
+				return ret;
+			ios->tail += ret;
+			DBG_BUGON(ios->tail > ios->bufsize);
+			if (!ret)
+				ios->feof = true;
 		} else {
 			ret = erofs_io_read(&ios->vf, ios->buffer + rabytes,
 					    ios->bufsize - rabytes);
diff --git a/mkfs/main.c b/mkfs/main.c
index a8208d4..3cf30c6 100644
--- a/mkfs/main.c
+++ b/mkfs/main.c
@@ -33,6 +33,7 @@
 #include "../lib/liberofs_metabox.h"
 #include "../lib/liberofs_s3.h"
 #include "../lib/liberofs_oci.h"
+#include "../lib/liberofs_gzran.h"
 #include "../lib/compressor.h"
 
 static struct option long_options[] = {
@@ -72,6 +73,7 @@ static struct option long_options[] = {
 #ifdef HAVE_ZLIB
 	{"gzip", no_argument, NULL, 518},
 	{"ungzip", optional_argument, NULL, 518},
+	{"gzinfo", optional_argument, NULL, 535},
 #endif
 #ifdef HAVE_LIBLZMA
 	{"unlzma", optional_argument, NULL, 519},
@@ -233,6 +235,9 @@ static void usage(int argc, char **argv)
 #ifdef HAVE_LIBLZMA
 		" --unxz[=X]            try to filter the tarball stream through xz/lzma/lzip\n"
 		"                       (and optionally dump the raw stream to X together)\n"
+#endif
+#ifdef HAVE_ZLIB
+		" --gzinfo[=X]          generate AWS SOCI-compatible zinfo in order to support random gzip access\n"
 #endif
 		" --vmdk-desc=X         generate a VMDK descriptor file to merge sub-filesystems\n"
 #ifdef EROFS_MT_ENABLED
@@ -298,6 +303,7 @@ static bool valid_fixeduuid;
 static unsigned int dsunit;
 static int tarerofs_decoder;
 static FILE *vmdk_dcf;
+static char *mkfs_aws_zinfo_file;
 
 static int erofs_mkfs_feat_set_legacy_compress(bool en, const char *val,
 					       unsigned int vallen)
@@ -1338,6 +1344,11 @@ static int mkfs_parse_options_cfg(struct erofs_importer_params *params,
 			source_mode = EROFS_MKFS_SOURCE_OCI;
 			break;
 #endif
+		case 535:
+			if (optarg)
+				mkfs_aws_zinfo_file = strdup(optarg);
+			tarerofs_decoder = EROFS_IOS_DECODER_GZRAN;
+			break;
 		case 'V':
 			version();
 			exit(0);
@@ -1920,6 +1931,18 @@ exit:
 	erofs_rebuild_cleanup();
 	erofs_diskbuf_exit();
 	if (source_mode == EROFS_MKFS_SOURCE_TAR) {
+		if (mkfs_aws_zinfo_file) {
+			struct erofs_vfile vf;
+			int fd;
+
+			fd = open(mkfs_aws_zinfo_file, O_WRONLY | O_CREAT | O_TRUNC, 0644);
+			if (fd < 0) {
+				err = -errno;
+			} else {
+				vf = (struct erofs_vfile){ .fd = fd };
+				err = erofs_gzran_builder_export_zinfo(erofstar.ios.gb, &vf);
+			}
+		}
 		erofs_iostream_close(&erofstar.ios);
 		if (erofstar.ios.dumpfd >= 0)
 			close(erofstar.ios.dumpfd);
-- 
2.43.5



More information about the Linux-erofs mailing list