[PATCH v2] erofs-utils: add source support for tarindex and gzran

Gao Xiang hsiangkao at linux.alibaba.com
Sun Sep 28 18:16:20 AEST 2025



On 2025/9/28 16:02, ChengyuZhu6 wrote:
> From: Chengyu Zhu <hudsonzhu at tencent.com>
> 
> Add support for combining tarindex files with remote OCI blobs
> through a new source mechanism. This enables local metadata
> storage while keeping blob data in remote registries.
> 
> e.g.:
> mount.erofs -t erofs.nbd -o \
> 'oci.blob=13b7e9...,oci.platform=linux/amd64,\
> oci.tarindex=ubuntu.erofs,oci.zinfo=ubuntu.zinfo' \
> ubuntu:20.04 /mnt
> 
> Signed-off-by: Chengyu Zhu <hudsonzhu at tencent.com>
> ---
>   lib/liberofs_oci.h |   2 +
>   lib/remotes/oci.c  |  20 ---
>   mount/main.c       | 397 ++++++++++++++++++++++++++++++++++++++++++++-
>   3 files changed, 392 insertions(+), 27 deletions(-)
> 
> diff --git a/lib/liberofs_oci.h b/lib/liberofs_oci.h
> index 71c8879..5298f18 100644
> --- a/lib/liberofs_oci.h
> +++ b/lib/liberofs_oci.h
> @@ -35,6 +35,8 @@ struct ocierofs_config {
>   	char *password;
>   	char *blob_digest;
>   	int layer_index;
> +	char *tarindex_path;
> +	char *zinfo_path;
>   };
>   
>   struct ocierofs_layer_info {
> diff --git a/lib/remotes/oci.c b/lib/remotes/oci.c
> index b2f1f59..b25e0b2 100644
> --- a/lib/remotes/oci.c
> +++ b/lib/remotes/oci.c
> @@ -1461,19 +1461,6 @@ static void ocierofs_io_close(struct erofs_vfile *vfile)
>   	*(struct ocierofs_iostream **)vfile->payload = NULL;
>   }
>   
> -static int ocierofs_is_erofs_native_image(struct ocierofs_ctx *ctx)
> -{
> -	if (ctx->layer_count > 0 && ctx->layers[0] &&
> -	    ctx->layers[0]->media_type) {
> -		const char *media_type = ctx->layers[0]->media_type;
> -		size_t len = strlen(media_type);
> -
> -		if (len >= 6 && strcmp(media_type + len - 6, ".erofs") == 0)
> -			return 0;
> -	}
> -	return -ENOENT;
> -}
> -
>   static struct erofs_vfops ocierofs_io_vfops = {
>   	.pread = ocierofs_io_pread,
>   	.read = ocierofs_io_read,
> @@ -1497,13 +1484,6 @@ int ocierofs_io_open(struct erofs_vfile *vfile, const struct ocierofs_config *cf
>   		return err;
>   	}
>   
> -	err = ocierofs_is_erofs_native_image(ctx);
> -	if (err) {
> -		ocierofs_ctx_cleanup(ctx);
> -		free(ctx);
> -		return err;
> -	}
> -
>   	oci_iostream = calloc(1, sizeof(*oci_iostream));
>   	if (!oci_iostream) {
>   		ocierofs_ctx_cleanup(ctx);
> diff --git a/mount/main.c b/mount/main.c
> index eb0dd01..619bf13 100644
> --- a/mount/main.c
> +++ b/mount/main.c
> @@ -16,6 +16,8 @@
>   #include "erofs/io.h"
>   #include "../lib/liberofs_nbd.h"
>   #include "../lib/liberofs_oci.h"
> +#include "../lib/liberofs_gzran.h"
> +
>   #ifdef HAVE_LINUX_LOOP_H
>   #include <linux/loop.h>
>   #else
> @@ -35,6 +37,9 @@ struct loop_info {
>   #include <sys/sysmacros.h>
>   #endif
>   
> +/* Device boundary probe */
> +#define EROFS_IS_DEVICE_PROBE(off) ((off) >= (INT64_MAX >> 9))

I meant just add a macro

#define EROFSMOUNT_NBD_DISK_SIZE	(INT64_MAX >> 9)

> +
>   enum erofs_backend_drv {
>   	EROFSAUTO,
>   	EROFSLOCAL,
> @@ -141,7 +146,25 @@ static int erofsmount_parse_oci_option(const char *option)
>   						if (!oci_cfg->password)
>   							return -ENOMEM;
>   					} else {
> -						return -EINVAL;
> +						p = strstr(option, "oci.tarindex=");
> +						if (p != NULL) {
> +							p += strlen("oci.tarindex=");
> +							free(oci_cfg->tarindex_path);
> +							oci_cfg->tarindex_path = strdup(p);
> +							if (!oci_cfg->tarindex_path)
> +								return -ENOMEM;
> +						} else {
> +							p = strstr(option, "oci.zinfo=");
> +							if (p != NULL) {
> +								p += strlen("oci.zinfo=");
> +								free(oci_cfg->zinfo_path);
> +								oci_cfg->zinfo_path = strdup(p);
> +								if (!oci_cfg->zinfo_path)
> +									return -ENOMEM;
> +							} else {
> +								return -EINVAL;
> +							}
> +						}
>   					}
>   				}
>   			}
> @@ -332,11 +355,284 @@ static int erofsmount_fuse(const char *source, const char *mountpoint,
>   	return 0;
>   }
>   
> +struct erofsmount_tarindex_source {
> +	struct erofs_vfile *tarindex_vf;
> +	struct erofs_vfile *zinfo_vf;
> +	u64 tarindex_size;
> +};
> +
>   struct erofsmount_nbd_ctx {
>   	struct erofs_vfile vd;		/* virtual device */
>   	struct erofs_vfile sk;		/* socket file */
>   };
>   
> +static ssize_t erofs_tarindex_pread(struct erofs_vfile *vf, void *buf,
> +				  size_t count, u64 offset)
> +{
> +	struct erofsmount_tarindex_source *hs;
> +	ssize_t local_read, remote_read;
> +	u64 local_part, remote_part, remote_offset;

index_part and tardata_part may be clearer.

> +
> +	hs = *(struct erofsmount_tarindex_source **)vf->payload;
> +	if (!hs)
> +		return -EINVAL;
> +
> +	/* Handle device boundary probe requests */
> +	if (EROFS_IS_DEVICE_PROBE(offset))
> +		return 0;
> +
> +	if (offset >= hs->tarindex_size) {
> +		remote_offset = offset - hs->tarindex_size;
> +
> +		return hs->zinfo_vf->ops->pread(hs->zinfo_vf, buf, count, remote_offset);
> +	}
> +
> +	if (offset + count <= hs->tarindex_size)
> +		return erofs_io_pread(hs->tarindex_vf, buf, count, offset);
> +
> +	local_part = hs->tarindex_size - offset;
> +	remote_part = count - local_part;
> +
> +	local_read = erofs_io_pread(hs->tarindex_vf, buf, local_part, offset);
> +	if (local_read < 0)
> +		return local_read;
> +
> +	remote_read = hs->zinfo_vf->ops->pread(hs->zinfo_vf,
> +					      (char *)buf + local_read,
> +					      remote_part, 0);
> +	if (remote_read < 0)
> +		return remote_read;
> +	return local_read + remote_read;
> +}
> +
> +static void erofs_tarindex_close(struct erofs_vfile *vf)
> +{
> +	struct erofsmount_tarindex_source *hs;
> +
> +	if (!vf)
> +		return;
> +
> +	hs = *(struct erofsmount_tarindex_source **)vf->payload;
> +	if (!hs)
> +		return;
> +
> +	if (hs->tarindex_size > 0) {
> +		erofs_io_close(hs->tarindex_vf);
> +		free(hs->tarindex_vf);
> +	}
> +
> +	if (hs->zinfo_vf)
> +		erofs_io_close(hs->zinfo_vf);
> +
> +	free(hs);
> +}
> +
> +static int load_file_to_buf(const char *path, void **out, unsigned int *out_len)
> +{
> +	FILE *fp = NULL;
> +	void *buf = NULL;
> +	int ret = 0;
> +	long sz;
> +	size_t num;
> +
> +	fp = fopen(path, "rb");
> +	if (!fp)
> +		return -errno;
> +
> +	if (fseek(fp, 0, SEEK_END) != 0) {
> +		ret = -errno;
> +		goto out;
> +	}
> +	sz = ftell(fp);
> +	if (sz < 0) {
> +		ret = -errno;
> +		goto out;
> +	}
> +	if (fseek(fp, 0, SEEK_SET) != 0) {
> +		ret = -errno;
> +		goto out;
> +	}
> +	if (sz == 0) {
> +		ret = -EINVAL;
> +		goto out;
> +	}
> +
> +	buf = malloc((size_t)sz);
> +	if (!buf) {
> +		ret = -ENOMEM;
> +		goto out;
> +	}
> +
> +	num = fread(buf, 1, (size_t)sz, fp);
> +	if (num != (size_t)sz) {
> +		ret = -EIO;
> +		goto out;
> +	}
> +
> +	*out = buf;
> +	*out_len = (unsigned int)sz;
> +	buf = NULL;
> +
> +out:
> +	if (fp)
> +		fclose(fp);
> +	if (ret < 0 && buf)
> +		free(buf);
> +	return ret;
> +}
> +
> +static int erofsmount_init_gzran(struct erofs_vfile **zinfo_vf,
> +				  const struct ocierofs_config *oci_cfg,
> +				  const char *zinfo_path)
> +{
> +	int err = 0;
> +	void *zinfo_data = NULL;
> +	unsigned int zinfo_len = 0;
> +	struct erofs_vfile *oci_vf = NULL;
> +
> +	oci_vf = malloc(sizeof(*oci_vf));
> +	if (!oci_vf) {
> +		err = -ENOMEM;
> +		goto cleanup;
> +	}
> +
> +	err = ocierofs_io_open(oci_vf, oci_cfg);
> +	if (err) {
> +		free(oci_vf);
> +		goto cleanup;
> +	}
> +
> +	/* If no zinfo_path, return oci_vf directly for tar format */
> +	if (!zinfo_path) {
> +		*zinfo_vf = oci_vf;
> +		return 0;
> +	}
> +
> +	err = load_file_to_buf(zinfo_path, &zinfo_data, &zinfo_len);
> +	if (err) {
> +		erofs_io_close(oci_vf);
> +		free(oci_vf);
> +		return err;
> +	}
> +
> +	*zinfo_vf = erofs_gzran_zinfo_open(oci_vf, zinfo_data, zinfo_len);
> +	if (IS_ERR(*zinfo_vf)) {
> +		err = PTR_ERR(*zinfo_vf);
> +		*zinfo_vf = NULL;
> +		erofs_io_close(oci_vf);
> +		free(oci_vf);
> +		goto cleanup;
> +	}
> +
> +	free(zinfo_data);
> +	return 0;
> +
> +cleanup:
> +	if (zinfo_data)
> +		free(zinfo_data);
> +	return err;
> +}
> +
> +static ssize_t erofs_tarindex_sendfile(struct erofs_vfile *vout, struct erofs_vfile *vin,
> +				      off_t *pos, size_t count)
> +{
> +	static char buf[32768];
> +	ssize_t total_written = 0, ret = 0, written;
> +	size_t to_read;
> +	u64 read_offset;
> +
> +	while (count > 0) {
> +		to_read = min_t(size_t, count, sizeof(buf));
> +		read_offset = pos ? *pos : 0;
> +
> +		ret = erofs_tarindex_pread(vin, buf, to_read, read_offset);
> +		if (ret <= 0) {
> +			if (ret < 0 && total_written == 0)
> +				return ret;
> +			break;
> +		}
> +
> +		written = __erofs_io_write(vout->fd, buf, ret);
> +		if (written < 0) {
> +			ret = -errno;
> +			break;
> +		}
> +		if (written != ret)
> +			ret = written;
> +
> +		total_written += ret;
> +		count -= ret;
> +		if (pos)
> +			*pos += ret;
> +	}
> +	return count;
> +}
> +
> +static struct erofs_vfops tarindex_vfile_ops = {
> +	.pread = erofs_tarindex_pread,
> +	.sendfile = erofs_tarindex_sendfile,
> +	.close = erofs_tarindex_close,
> +};
> +
> +/*
> + * Create tarindex source for gzran+oci hybrid mode with three scenarios:
> + * 1. tarindex + zinfo: Remote data is tar.gzip format
> + * 2. tarindex only: Remote data is tar format
> + */
> +static int erofs_create_tarindex_source(struct erofs_vfile *out_vf,
> +				      const struct ocierofs_config *oci_cfg,
> +				      const char *tarindex_path,
> +				      const char *zinfo_path)
> +{
> +	struct erofsmount_tarindex_source *hs;
> +	int err;
> +	struct stat st;
> +
> +	hs = calloc(1, sizeof(*hs));
> +	if (!hs)
> +		return -ENOMEM;
> +
> +	if (tarindex_path) {
> +		hs->tarindex_vf = malloc(sizeof(*hs->tarindex_vf));
> +		if (!hs->tarindex_vf) {
> +			err = -ENOMEM;
> +			goto cleanup;
> +		}
> +
> +		hs->tarindex_vf->fd = open(tarindex_path, O_RDONLY);

Let's avoid `hs->tarindex_vf->fd`.

I suggest using a local variable vf directly.


Otherwise it looks good to me.

Thanks,
Gao Xiang


More information about the Linux-erofs mailing list