[PATCH 4/5] erofs-utils: mfks: add rebuild FULLDATA for combined EROFS images

zhaoyifan (H) zhaoyifan28 at huawei.com
Thu Mar 5 13:34:49 AEDT 2026


On 2026/3/3 4:01, Lucas Karpinski wrote:
> This patch introduces experimental support for merging multiple source
> images in mkfs. Each source image uses its stored UUID as the device table
> tag. The raw block data from each source is copied using
> erofs_copy_file_range.
>
> This does not yet support chunk-based files at this time or compressed
> images.
>
> Signed-off-by: Lucas Karpinski <lkarpinski at nvidia.com>
> ---
>   lib/cache.c            |   6 ++
>   lib/liberofs_cache.h   |   1 +
>   lib/liberofs_rebuild.h |   5 ++
>   lib/rebuild.c          | 169 ++++++++++++++++++++++++++++++++++++++++++++++++-
>   mkfs/main.c            |   6 +-
>   5 files changed, 183 insertions(+), 4 deletions(-)
>
> diff --git a/lib/cache.c b/lib/cache.c
> index 4c7c386..49742bc 100644
> --- a/lib/cache.c
> +++ b/lib/cache.c
> @@ -544,6 +544,12 @@ erofs_blk_t erofs_total_metablocks(struct erofs_bufmgr *bmgr)
>   	return bmgr->metablkcnt;
>   }
>   
> +void erofs_bset_tail(struct erofs_bufmgr *bmgr, erofs_blk_t blkaddr)
> +{
> +	if (blkaddr > bmgr->tail_blkaddr)
> +		bmgr->tail_blkaddr = blkaddr;
> +}
> +
>   void erofs_buffer_exit(struct erofs_bufmgr *bmgr)
>   {
>   	DBG_BUGON(__erofs_bflush(bmgr, NULL, true));
> diff --git a/lib/liberofs_cache.h b/lib/liberofs_cache.h
> index baac609..55e8f25 100644
> --- a/lib/liberofs_cache.h
> +++ b/lib/liberofs_cache.h
> @@ -138,6 +138,7 @@ int erofs_bflush(struct erofs_bufmgr *bmgr,
>   		 struct erofs_buffer_block *bb);
>   
>   void erofs_bdrop(struct erofs_buffer_head *bh, bool tryrevoke);
> +void erofs_bset_tail(struct erofs_bufmgr *bmgr, erofs_blk_t blkaddr);
>   void erofs_buffer_exit(struct erofs_bufmgr *bmgr);
>   
>   #ifdef __cplusplus
> diff --git a/lib/liberofs_rebuild.h b/lib/liberofs_rebuild.h
> index d8c4c8a..32d9e2f 100644
> --- a/lib/liberofs_rebuild.h
> +++ b/lib/liberofs_rebuild.h
> @@ -17,6 +17,11 @@ int erofs_rebuild_load_tree(struct erofs_inode *root, struct erofs_sb_info *sbi,
>   			    enum erofs_rebuild_datamode mode,
>   			    erofs_blk_t uniaddr_offset);
>   
> +int erofs_rebuild_load_trees_full(struct erofs_inode *root,
> +				  struct erofs_sb_info *sbi,
> +				  struct list_head *src_list,
> +				  unsigned int src_count);
> +
>   int erofs_rebuild_load_basedir(struct erofs_inode *dir, u64 *nr_subdirs,
>   			       unsigned int *i_nlink);
>   #endif
> diff --git a/lib/rebuild.c b/lib/rebuild.c
> index 7e62bc9..16ef0cf 100644
> --- a/lib/rebuild.c
> +++ b/lib/rebuild.c
> @@ -14,8 +14,10 @@
>   #include "erofs/xattr.h"
>   #include "erofs/blobchunk.h"
>   #include "erofs/internal.h"
> +#include "erofs/io.h"
>   #include "liberofs_rebuild.h"
>   #include "liberofs_uuid.h"
> +#include "liberofs_cache.h"
>   
>   #ifdef HAVE_LINUX_AUFS_TYPE_H
>   #include <linux/aufs_type.h>
> @@ -221,9 +223,60 @@ err:
>   	return ret;
>   }
>   
> +static int erofs_rebuild_write_full_data(struct erofs_inode *inode,
> +					 erofs_blk_t uniaddr_offset)
> +{
> +	struct erofs_sb_info *src_sbi = inode->sbi;
> +	int err = 0;
> +
> +	if (inode->datalayout == EROFS_INODE_FLAT_PLAIN) {
> +		if (inode->u.i_blkaddr != EROFS_NULL_ADDR)
> +			inode->u.i_blkaddr += uniaddr_offset;
> +	} else if (inode->datalayout == EROFS_INODE_FLAT_INLINE) {
> +		erofs_blk_t nblocks = erofs_blknr(src_sbi, inode->i_size);
> +		unsigned int inline_size = inode->i_size % erofs_blksiz(src_sbi);
> +
> +		if (nblocks > 0 && inode->u.i_blkaddr != EROFS_NULL_ADDR)
> +			inode->u.i_blkaddr += uniaddr_offset;
> +
> +		inode->idata_size = inline_size;
> +		if (inline_size > 0) {
> +			struct erofs_vfile vf;
> +			erofs_off_t tail_offset = erofs_pos(src_sbi, nblocks);
> +
> +			inode->idata = malloc(inline_size);
> +			if (!inode->idata)
> +				return -ENOMEM;
> +			err = erofs_iopen(&vf, inode);
> +			if (err) {
> +				free(inode->idata);
> +				inode->idata = NULL;
> +				return err;
> +			}
> +			err = erofs_pread(&vf, inode->idata, inline_size,
> +					  tail_offset);
> +			if (err) {
> +				free(inode->idata);
> +				inode->idata = NULL;
> +				return err;
> +			}
> +		}
> +	} else if (inode->datalayout == EROFS_INODE_CHUNK_BASED) {
> +		erofs_err("chunk-based files not yet supported: %s",
> +			  inode->i_srcpath);
> +		err = -EOPNOTSUPP;
> +	} else if (is_inode_layout_compression(inode)) {
> +		erofs_err("compressed files not yet supported: %s",
> +			  inode->i_srcpath);
> +		err = -EOPNOTSUPP;
> +	}
> +	return err;
> +}
> +
>   static int erofs_rebuild_update_inode(struct erofs_sb_info *dst_sb,
>   				      struct erofs_inode *inode,
> -				      enum erofs_rebuild_datamode datamode)
> +				      enum erofs_rebuild_datamode datamode,
> +				      erofs_blk_t uniaddr_offset)
>   {
>   	int err = 0;
>   
> @@ -265,6 +318,8 @@ static int erofs_rebuild_update_inode(struct erofs_sb_info *dst_sb,
>   			err = erofs_rebuild_write_blob_index(dst_sb, inode);
>   		else if (datamode == EROFS_REBUILD_DATA_RESVSP)
>   			inode->datasource = EROFS_INODE_DATA_SOURCE_RESVSP;
> +		else if (datamode == EROFS_REBUILD_DATA_FULL)
> +			err = erofs_rebuild_write_full_data(inode, uniaddr_offset);
>   		else
>   			err = -EOPNOTSUPP;
>   		break;
> @@ -387,7 +442,8 @@ static int erofs_rebuild_dirent_iter(struct erofs_dir_context *ctx)
>   			inode->i_nlink = 1;
>   
>   			ret = erofs_rebuild_update_inode(&g_sbi, inode,
> -							 rctx->datamode);
> +							 rctx->datamode,
> +							 rctx->uniaddr_offset);
>   			if (ret) {
>   				erofs_iput(inode);
>   				goto out;
> @@ -425,6 +481,7 @@ int erofs_rebuild_load_tree(struct erofs_inode *root, struct erofs_sb_info *sbi,
>   {
>   	struct erofs_inode inode = {};
>   	struct erofs_rebuild_dir_context ctx;
> +	struct erofs_inode *mergedir;
>   	char uuid_str[37];
>   	char *fsid = sbi->devname;
>   	int ret;
> @@ -447,16 +504,19 @@ int erofs_rebuild_load_tree(struct erofs_inode *root, struct erofs_sb_info *sbi,
>   		erofs_err("failed to read root inode of %s", fsid);
>   		return ret;
>   	}
> +
> +	mergedir = root;
>   	inode.i_srcpath = strdup("/");
>   
>   	ctx = (struct erofs_rebuild_dir_context) {
>   		.ctx.dir = &inode,
>   		.ctx.cb = erofs_rebuild_dirent_iter,
> -		.mergedir = root,
> +		.mergedir = mergedir,
>   		.datamode = mode,
>   		.uniaddr_offset = uniaddr_offset,
>   	};
>   	ret = erofs_iterate_dir(&ctx.ctx, false);
> +
>   	free(inode.i_srcpath);
>   	return ret;
>   }
> @@ -556,3 +616,106 @@ int erofs_rebuild_load_basedir(struct erofs_inode *dir, u64 *nr_subdirs,
>   	};
>   	return erofs_iterate_dir(&ctx.ctx, false);
>   }
> +
> +static int erofs_rebuild_copy_src_blocks(struct erofs_sb_info *sbi,
> +					 struct list_head *src_list)
> +{
> +	struct erofs_device_info *devs = sbi->devs;
> +	struct erofs_sb_info *src;
> +	erofs_blk_t current_addr = sbi->primarydevice_blocks;
> +	int idx = 0;
> +
> +	list_for_each_entry(src, src_list, list) {
> +		erofs_blk_t src_blocks = devs[idx].blocks;
> +		u64 src_off = 0, dst_off;
> +		u64 len;
> +		int src_fd, dst_fd;
> +
> +		devs[idx].uniaddr = current_addr;
> +
> +		erofs_info("Copying %s: %u blocks at unified address %u",
> +			   src->devname, src_blocks, current_addr);
> +
> +		src_fd = src->bdev.fd;
> +		dst_fd = sbi->bdev.fd;
> +
> +		if (src_fd < 0 || dst_fd < 0) {
> +			erofs_err("failed to get file descriptors");
> +			return -EINVAL;
> +		}
> +
> +		dst_off = erofs_pos(sbi, current_addr);
> +		len = erofs_pos(src, src_blocks);
> +
> +		while (len > 0) {
> +			ssize_t copied = erofs_copy_file_range(
> +				src_fd, &src_off, dst_fd, &dst_off, len);
> +			if (copied < 0) {
> +				erofs_err("failed to copy data from %s: %s",
> +					  src->devname, erofs_strerror(-copied));
> +				return copied;
> +			}
> +			if (copied == 0)
> +				break;
> +			len -= copied;
> +		}
> +
> +		current_addr += src_blocks;
> +		idx++;
> +	}
> +	sbi->primarydevice_blocks = current_addr;
> +	return 0;
> +}
> +
> +int erofs_rebuild_load_trees_full(struct erofs_inode *root,
> +				  struct erofs_sb_info *sbi,
> +				  struct list_head *src_list,
> +				  unsigned int src_count)

Hi Karpinski,


Thanks for your patches and it works well in my simple test.


I really think this function has some similar logic with 
erofs_rebuild_load_trees,

could we integrate it with the existing logic?


Thanks,

Yifan

> +{
> +	struct erofs_device_info *devs;
> +	struct erofs_sb_info *src;
> +	int ret, idx = 0;
> +
> +	ret = erofs_mkfs_init_devices(sbi, src_count);
> +	if (ret) {
> +		erofs_err("failed to initialize devices: %s",
> +			  erofs_strerror(ret));
> +		return ret;
> +	}
> +	devs = sbi->devs;
> +
> +	/* Read source superblocks and populate device table */
> +	list_for_each_entry(src, src_list, list) {
> +		ret = erofs_read_superblock(src);
> +		if (ret) {
> +			erofs_err("failed to read superblock of %s: %s",
> +				  src->devname, erofs_strerror(ret));
> +			return ret;
> +		}
> +		devs[idx].blocks = src->primarydevice_blocks;
> +		erofs_uuid_unparse_as_tag(src->uuid, (char *)devs[idx].tag);
> +		idx++;
> +	}
> +
> +	/* Copy source data blocks */
> +	ret = erofs_rebuild_copy_src_blocks(sbi, src_list);
> +	if (ret)
> +		return ret;
> +
> +	/* Advance buffer manager past copied data */
> +	erofs_bset_tail(sbi->bmgr, sbi->primarydevice_blocks);
> +
> +	/* Load filesystem trees with unified block addresses */
> +	idx = 0;
> +	list_for_each_entry(src, src_list, list) {
> +		ret = erofs_rebuild_load_tree(root, src,
> +					      EROFS_REBUILD_DATA_FULL,
> +					      devs[idx].uniaddr);
> +		if (ret) {
> +			erofs_err("failed to load %s", src->devname);
> +			return ret;
> +		}
> +		idx++;
> +	}
> +	return 0;
> +}
> diff --git a/mkfs/main.c b/mkfs/main.c
> index a8f9a5e..124a024 100644
> --- a/mkfs/main.c
> +++ b/mkfs/main.c
> @@ -15,9 +15,11 @@
>   #include <getopt.h>
>   #include "erofs/config.h"
>   #include "erofs/print.h"
> +#include "erofs/io.h"
>   #include "erofs/importer.h"
>   #include "erofs/diskbuf.h"
>   #include "erofs/inode.h"
> +#include "erofs/dir.h"
>   #include "erofs/tar.h"
>   #include "erofs/dedupe.h"
>   #include "erofs/xattr.h"
> @@ -1726,7 +1728,9 @@ static int erofs_mkfs_rebuild_load_trees(struct erofs_inode *root)
>   		break;
>   	case EROFS_MKFS_DATA_IMPORT_FULLDATA:
>   		datamode = EROFS_REBUILD_DATA_FULL;
> -		break;
> +		return erofs_rebuild_load_trees_full(root, &g_sbi,
> +						     &rebuild_src_list,
> +						     rebuild_src_count);
>   	case EROFS_MKFS_DATA_IMPORT_RVSP:
>   		datamode = EROFS_REBUILD_DATA_RESVSP;
>   		break;
>


More information about the Linux-erofs mailing list