[PATCH 3/3] erofs-utils: mkfs: add optional support for inode meta zone
Gao Xiang
hsiangkao at linux.alibaba.com
Fri Nov 7 21:06:09 AEDT 2025
Many use cases benefit from concentrating inode metadata, such as
image filesystems primarily accessed over a network (e.g., EROFS
native full container images). Otherwise, scattered on-disk inodes
increase network access overhead and make metadata prefetching (so
that systems won't be stuck by metadata I/Os due to network failures,
for example) difficult to implement.
Usage:
`--ZI` or `--ZI=1` Enable inode meta zone;
`--ZI=0` Disable inode meta zone (default).
Closes: https://lore.kernel.org/r/20250422123612.261764-1-lihongbo22@huawei.com
Cc: Hongbo Li <lihongbo22 at huawei.com>
Signed-off-by: Gao Xiang <hsiangkao at linux.alibaba.com>
---
include/erofs/internal.h | 12 ++--
lib/importer.c | 10 ++-
lib/inode.c | 46 ++++++++------
lib/liberofs_metabox.h | 14 ++++-
lib/metabox.c | 127 ++++++++++++++++++++++++++++++---------
lib/super.c | 10 ++-
lib/xattr.c | 2 +-
mkfs/main.c | 12 +++-
8 files changed, 168 insertions(+), 65 deletions(-)
diff --git a/include/erofs/internal.h b/include/erofs/internal.h
index 610650138bee..62594b877151 100644
--- a/include/erofs/internal.h
+++ b/include/erofs/internal.h
@@ -97,7 +97,7 @@ struct erofs_sb_info {
u64 total_blocks;
u64 primarydevice_blocks;
- u32 meta_blkaddr;
+ s32 meta_blkaddr;
u32 xattr_blkaddr;
u32 feature_compat;
@@ -150,7 +150,7 @@ struct erofs_sb_info {
struct erofs_bufmgr *bmgr;
struct erofs_xattrmgr *xamgr;
struct z_erofs_mgr *zmgr;
- struct erofs_metaboxmgr *m2gr;
+ struct erofs_metamgr *m2gr, *mxgr;
struct erofs_packed_inode *packedinode;
struct erofs_buffer_head *bh_sb;
struct erofs_buffer_head *bh_devt;
@@ -309,8 +309,8 @@ static inline bool erofs_inode_in_metabox(struct erofs_inode *inode)
static inline erofs_off_t erofs_iloc(struct erofs_inode *inode)
{
struct erofs_sb_info *sbi = inode->sbi;
- erofs_off_t base = erofs_inode_in_metabox(inode) ? 0 :
- erofs_pos(sbi, sbi->meta_blkaddr);
+ s64 base = erofs_inode_in_metabox(inode) ? 0 :
+ (s64)erofs_pos(sbi, sbi->meta_blkaddr);
return base + ((inode->nid & EROFS_DIRENT_NID_MASK) << EROFS_ISLOTBITS);
}
@@ -434,8 +434,8 @@ int erofs_mkfs_init_devices(struct erofs_sb_info *sbi, unsigned int devices);
int erofs_write_device_table(struct erofs_sb_info *sbi);
int erofs_enable_sb_chksum(struct erofs_sb_info *sbi, u32 *crc);
int erofs_superblock_csum_verify(struct erofs_sb_info *sbi);
-int erofs_mkfs_format_fs(struct erofs_sb_info *sbi,
- unsigned int blkszbits, unsigned int dsunit);
+int erofs_mkfs_format_fs(struct erofs_sb_info *sbi, unsigned int blkszbits,
+ unsigned int dsunit, bool metazone);
int erofs_mkfs_load_fs(struct erofs_sb_info *sbi, unsigned int dsunit);
/* namei.c */
diff --git a/lib/importer.c b/lib/importer.c
index c73dde2529b7..958a433b9eaa 100644
--- a/lib/importer.c
+++ b/lib/importer.c
@@ -69,8 +69,8 @@ int erofs_importer_init(struct erofs_importer *im)
goto out_err;
}
- subsys = "metabox";
- err = erofs_metabox_init(sbi);
+ subsys = "metadata";
+ err = erofs_metadata_init(sbi);
if (err)
goto out_err;
@@ -107,6 +107,10 @@ int erofs_importer_flush_all(struct erofs_importer *im)
if (err)
return err;
+ err = erofs_metazone_flush(sbi);
+ if (err)
+ return err;
+
fsalignblks = im->params->fsalignblks ?
roundup_pow_of_two(im->params->fsalignblks) : 1;
sbi->primarydevice_blocks = roundup(erofs_mapbh(sbi->bmgr, NULL),
@@ -128,6 +132,6 @@ void erofs_importer_exit(struct erofs_importer *im)
struct erofs_sb_info *sbi = im->sbi;
z_erofs_dedupe_ext_exit();
- erofs_metabox_exit(sbi);
+ erofs_metadata_exit(sbi);
erofs_packedfile_exit(sbi);
}
diff --git a/lib/inode.c b/lib/inode.c
index 09b2e507c609..64f6bc34610f 100644
--- a/lib/inode.c
+++ b/lib/inode.c
@@ -376,18 +376,19 @@ erofs_nid_t erofs_lookupnid(struct erofs_inode *inode)
{
struct erofs_buffer_head *const bh = inode->bh;
struct erofs_sb_info *sbi = inode->sbi;
- erofs_off_t off, meta_offset;
+ erofs_off_t off;
+ s64 meta_offset;
erofs_nid_t nid;
if (bh && inode->nid == EROFS_NID_UNALLOCATED) {
erofs_mapbh(NULL, bh->block);
off = erofs_btell(bh, false);
- if (!inode->in_metabox) {
- meta_offset = erofs_pos(sbi, sbi->meta_blkaddr);
- DBG_BUGON(off < meta_offset);
- } else {
+ if (inode->in_metabox) {
meta_offset = 0;
+ } else {
+ meta_offset = (s64)erofs_pos(sbi, sbi->meta_blkaddr);
+ DBG_BUGON(off < meta_offset && !sbi->m2gr);
}
nid = (off - meta_offset) >> EROFS_ISLOTBITS;
@@ -718,8 +719,8 @@ int erofs_iflush(struct erofs_inode *inode)
struct erofs_sb_info *sbi = inode->sbi;
struct erofs_buffer_head *bh = inode->bh;
erofs_off_t off = erofs_iloc(inode);
- struct erofs_bufmgr *ibmgr = inode->in_metabox ?
- erofs_metabox_bmgr(sbi) : sbi->bmgr;
+ struct erofs_bufmgr *ibmgr =
+ erofs_metadata_bmgr(sbi, inode->in_metabox) ?: sbi->bmgr;
union {
struct erofs_inode_compact dic;
struct erofs_inode_extended die;
@@ -921,12 +922,9 @@ static int erofs_prepare_inode_buffer(struct erofs_importer *im,
if (inode->extent_isize)
inodesize = roundup(inodesize, 8) + inode->extent_isize;
- if (!erofs_is_special_identifier(inode->i_srcpath) &&
- erofs_metabox_bmgr(sbi))
+ if (!erofs_is_special_identifier(inode->i_srcpath) && sbi->mxgr)
inode->in_metabox = true;
-
- if (inode->in_metabox)
- ibmgr = erofs_metabox_bmgr(sbi) ?: sbi->bmgr;
+ ibmgr = erofs_metadata_bmgr(sbi, inode->in_metabox) ?: sbi->bmgr;
if (inode->datalayout == EROFS_INODE_FLAT_PLAIN)
goto noinline;
@@ -1000,8 +998,8 @@ static int erofs_bh_flush_write_inline(struct erofs_buffer_head *bh)
{
struct erofs_inode *const inode = bh->fsprivate;
struct erofs_sb_info *sbi = inode->sbi;
- struct erofs_bufmgr *ibmgr = inode->in_metabox ?
- erofs_metabox_bmgr(sbi) : sbi->bmgr;
+ struct erofs_bufmgr *ibmgr =
+ erofs_metadata_bmgr(sbi, inode->in_metabox) ?: sbi->bmgr;
const erofs_off_t off = erofs_btell(bh, false);
int ret;
@@ -1360,21 +1358,29 @@ static void erofs_fixup_meta_blkaddr(struct erofs_inode *root)
const erofs_off_t rootnid_maxoffset = 0xffff << EROFS_ISLOTBITS;
struct erofs_buffer_head *const bh = root->bh;
struct erofs_sb_info *sbi = root->sbi;
- erofs_off_t meta_offset = 0;
+ int bsz = erofs_blksiz(sbi);
+ int meta_offset;
erofs_off_t off;
erofs_mapbh(NULL, bh->block);
off = erofs_btell(bh, false);
- if (!root->in_metabox && off > rootnid_maxoffset)
- meta_offset = round_up(off - rootnid_maxoffset,
- erofs_blksiz(sbi));
- else if (root->in_metabox && !erofs_sb_has_48bit(sbi)) {
+ if (!root->in_metabox) {
+ if (!off) {
+ DBG_BUGON(!sbi->m2gr);
+ DBG_BUGON(sbi->meta_blkaddr != -1);
+ meta_offset = -bsz; /* avoid NID 0 */
+ } else if (off > rootnid_maxoffset) {
+ meta_offset = round_up(off - rootnid_maxoffset, bsz);
+ sbi->meta_blkaddr = erofs_blknr(sbi, meta_offset);
+ } else {
+ meta_offset = 0;
+ }
+ } else if (!erofs_sb_has_48bit(sbi)) {
sbi->build_time = sbi->epoch;
sbi->epoch = max_t(s64, 0, (s64)sbi->build_time - UINT32_MAX);
sbi->build_time -= sbi->epoch;
erofs_sb_set_48bit(sbi);
}
- sbi->meta_blkaddr = erofs_blknr(sbi, meta_offset);
root->nid = ((off - meta_offset) >> EROFS_ISLOTBITS) |
((u64)root->in_metabox << EROFS_DIRENT_NID_METABOX_BIT);
}
diff --git a/lib/liberofs_metabox.h b/lib/liberofs_metabox.h
index d8896c01c298..bf4051cf18e2 100644
--- a/lib/liberofs_metabox.h
+++ b/lib/liberofs_metabox.h
@@ -4,6 +4,8 @@
#include "erofs/internal.h"
+#define EROFS_META_NEW_ADDR ((u32)-1ULL)
+
extern const char *erofs_metabox_identifier;
#define EROFS_METABOX_INODE erofs_metabox_identifier
@@ -12,11 +14,17 @@ static inline bool erofs_is_metabox_inode(struct erofs_inode *inode)
return inode->i_srcpath == EROFS_METABOX_INODE;
}
+static inline bool erofs_has_meta_zone(struct erofs_sb_info *sbi)
+{
+ return sbi->m2gr || sbi->meta_blkaddr == EROFS_META_NEW_ADDR;
+}
+
struct erofs_importer;
-void erofs_metabox_exit(struct erofs_sb_info *sbi);
-int erofs_metabox_init(struct erofs_sb_info *sbi);
-struct erofs_bufmgr *erofs_metabox_bmgr(struct erofs_sb_info *sbi);
+void erofs_metadata_exit(struct erofs_sb_info *sbi);
+int erofs_metadata_init(struct erofs_sb_info *sbi);
+struct erofs_bufmgr *erofs_metadata_bmgr(struct erofs_sb_info *sbi, bool mbox);
int erofs_metabox_iflush(struct erofs_importer *im);
+int erofs_metazone_flush(struct erofs_sb_info *sbi);
#endif
diff --git a/lib/metabox.c b/lib/metabox.c
index bf188f6db0f5..37267ddb73cf 100644
--- a/lib/metabox.c
+++ b/lib/metabox.c
@@ -2,81 +2,152 @@
#include <stdlib.h>
#include "erofs/inode.h"
#include "erofs/importer.h"
+#include "erofs/print.h"
#include "liberofs_cache.h"
#include "liberofs_private.h"
#include "liberofs_metabox.h"
const char *erofs_metabox_identifier = "metabox";
-struct erofs_metaboxmgr {
+struct erofs_metamgr {
struct erofs_vfile vf;
struct erofs_bufmgr *bmgr;
};
-void erofs_metabox_exit(struct erofs_sb_info *sbi)
+static void erofs_metamgr_exit(struct erofs_metamgr *m2gr)
{
- struct erofs_metaboxmgr *m2gr = sbi->m2gr;
-
- if (!m2gr)
- return;
DBG_BUGON(!m2gr->bmgr);
erofs_buffer_exit(m2gr->bmgr);
erofs_io_close(&m2gr->vf);
free(m2gr);
}
-
-int erofs_metabox_init(struct erofs_sb_info *sbi)
+static int erofs_metamgr_init(struct erofs_sb_info *sbi,
+ struct erofs_metamgr *m2gr)
{
- struct erofs_metaboxmgr *m2gr;
int ret;
- if (!erofs_sb_has_metabox(sbi))
- return 0;
-
- m2gr = malloc(sizeof(*m2gr));
- if (!m2gr)
- return -ENOMEM;
-
ret = erofs_tmpfile();
if (ret < 0)
- goto out_err;
+ return ret;
m2gr->vf = (struct erofs_vfile){ .fd = ret };
m2gr->bmgr = erofs_buffer_init(sbi, 0, &m2gr->vf);
- if (m2gr->bmgr) {
+ if (!m2gr->bmgr)
+ return -ENOMEM;
+ return 0;
+}
+
+void erofs_metadata_exit(struct erofs_sb_info *sbi)
+{
+ if (sbi->m2gr) {
+ erofs_metamgr_exit(sbi->m2gr);
+ sbi->m2gr = NULL;
+ }
+ if (sbi->mxgr) {
+ erofs_metamgr_exit(sbi->mxgr);
+ sbi->mxgr = NULL;
+ }
+}
+
+int erofs_metadata_init(struct erofs_sb_info *sbi)
+{
+ struct erofs_metamgr *m2gr;
+ int ret;
+
+ if (!sbi->m2gr && sbi->meta_blkaddr == EROFS_META_NEW_ADDR) {
+ m2gr = malloc(sizeof(*m2gr));
+ if (!m2gr)
+ return -ENOMEM;
+ ret = erofs_metamgr_init(sbi, m2gr);
+ if (ret)
+ goto err_free;
sbi->m2gr = m2gr;
- return 0;
}
- ret = -ENOMEM;
-out_err:
+
+ if (!sbi->mxgr && erofs_sb_has_metabox(sbi)) {
+ m2gr = malloc(sizeof(*m2gr));
+ if (!m2gr)
+ return -ENOMEM;
+ ret = erofs_metamgr_init(sbi, m2gr);
+ if (ret)
+ goto err_free;
+ sbi->mxgr = m2gr;
+ }
+ return 0;
+err_free:
free(m2gr);
return ret;
}
-struct erofs_bufmgr *erofs_metabox_bmgr(struct erofs_sb_info *sbi)
+struct erofs_bufmgr *erofs_metadata_bmgr(struct erofs_sb_info *sbi, bool mbox)
{
- return sbi->m2gr ? sbi->m2gr->bmgr : NULL;
+ if (mbox) {
+ if (sbi->mxgr)
+ return sbi->mxgr->bmgr;
+ } else if (sbi->m2gr) {
+ return sbi->m2gr->bmgr;
+ }
+ return NULL;
}
int erofs_metabox_iflush(struct erofs_importer *im)
{
struct erofs_sb_info *sbi = im->sbi;
- struct erofs_metaboxmgr *m2gr = sbi->m2gr;
+ struct erofs_metamgr *mxgr = sbi->mxgr;
struct erofs_inode *inode;
int err;
- if (!m2gr || !erofs_sb_has_metabox(sbi))
+ if (!mxgr || !erofs_sb_has_metabox(sbi))
return -EINVAL;
- err = erofs_bflush(m2gr->bmgr, NULL);
+ err = erofs_bflush(mxgr->bmgr, NULL);
if (err)
return err;
- if (erofs_io_lseek(&m2gr->vf, 0, SEEK_END) <= 0)
+ if (erofs_io_lseek(&mxgr->vf, 0, SEEK_END) <= 0)
return 0;
- inode = erofs_mkfs_build_special_from_fd(im, m2gr->vf.fd,
+ inode = erofs_mkfs_build_special_from_fd(im, mxgr->vf.fd,
EROFS_METABOX_INODE);
sbi->metabox_nid = erofs_lookupnid(inode);
erofs_iput(inode);
return 0;
}
+
+int erofs_metazone_flush(struct erofs_sb_info *sbi)
+{
+ struct erofs_metamgr *m2gr = sbi->m2gr;
+ struct erofs_buffer_head *bh;
+ struct erofs_bufmgr *m2bgr;
+ erofs_blk_t meta_blkaddr;
+ u64 length, pos_out;
+ int ret, count;
+
+ if (!m2gr)
+ return 0;
+ m2bgr = m2gr->bmgr;
+
+ ret = erofs_bflush(m2bgr, NULL);
+ if (ret)
+ return ret;
+
+ length = erofs_mapbh(m2bgr, NULL) << sbi->blkszbits;
+ bh = erofs_balloc(sbi->bmgr, DATA, length, 0);
+ if (!bh)
+ return PTR_ERR(bh);
+
+ erofs_mapbh(NULL, bh->block);
+ pos_out = erofs_btell(bh, false);
+ meta_blkaddr = pos_out >> sbi->blkszbits;
+ do {
+ count = min_t(erofs_off_t, length, INT_MAX);
+ ret = erofs_io_xcopy(sbi->bmgr->vf, pos_out,
+ &m2gr->vf, count, false);
+ if (ret < 0)
+ break;
+ pos_out += count;
+ } while (length -= count);
+ bh->op = &erofs_drop_directly_bhops;
+ erofs_bdrop(bh, false);
+ sbi->meta_blkaddr += meta_blkaddr;
+ return 0;
+}
diff --git a/lib/super.c b/lib/super.c
index 9760265aa754..d626c7cdc76f 100644
--- a/lib/super.c
+++ b/lib/super.c
@@ -8,6 +8,7 @@
#include "erofs/xattr.h"
#include "liberofs_cache.h"
#include "liberofs_compress.h"
+#include "liberofs_metabox.h"
static bool check_layout_compatibility(struct erofs_sb_info *sbi,
struct erofs_super_block *dsb)
@@ -418,8 +419,8 @@ out:
return 0;
}
-int erofs_mkfs_format_fs(struct erofs_sb_info *sbi,
- unsigned int blkszbits, unsigned int dsunit)
+int erofs_mkfs_format_fs(struct erofs_sb_info *sbi, unsigned int blkszbits,
+ unsigned int dsunit, bool metazone)
{
struct erofs_buffer_head *bh;
struct erofs_bufmgr *bmgr;
@@ -430,7 +431,10 @@ int erofs_mkfs_format_fs(struct erofs_sb_info *sbi,
return -ENOMEM;
sbi->bmgr = bmgr;
bmgr->dsunit = dsunit;
-
+ if (metazone)
+ sbi->meta_blkaddr = EROFS_META_NEW_ADDR;
+ else
+ sbi->meta_blkaddr = 0;
bh = erofs_reserve_sb(bmgr);
if (IS_ERR(bh))
return PTR_ERR(bh);
diff --git a/lib/xattr.c b/lib/xattr.c
index fc22c817f136..8f0332b44a02 100644
--- a/lib/xattr.c
+++ b/lib/xattr.c
@@ -828,7 +828,7 @@ int erofs_xattr_flush_name_prefixes(struct erofs_importer *im, bool plain)
if (!plain) {
if (erofs_sb_has_metabox(sbi)) {
- bmgr = erofs_metabox_bmgr(sbi);
+ bmgr = erofs_metadata_bmgr(sbi, true);
vf = bmgr->vf;
} else if (may_fragments) {
erofs_sb_set_fragments(sbi);
diff --git a/mkfs/main.c b/mkfs/main.c
index 4de298b6dedd..76bf84348364 100644
--- a/mkfs/main.c
+++ b/mkfs/main.c
@@ -101,6 +101,7 @@ static struct option long_options[] = {
{"oci", optional_argument, NULL, 534},
#endif
{"zD", optional_argument, NULL, 536},
+ {"ZI", optional_argument, NULL, 537},
{0, 0, 0, 0},
};
@@ -176,6 +177,7 @@ static void usage(int argc, char **argv)
" --mkfs-time the timestamp is applied as build time only\n"
" -UX use a given filesystem UUID\n"
" --zD[=<0|1>] specify directory compression: 0=disable [default], 1=enable\n"
+ " --ZI[=<0|1>] specify the separate inode metadata zone availability: 0=disable [default], 1=enable\n"
" --all-root make all files owned by root\n"
#ifdef EROFS_MT_ENABLED
" --async-queue-limit=# specify the maximum number of entries in the multi-threaded job queue\n"
@@ -269,6 +271,7 @@ static void version(void)
static struct erofsmkfs_cfg {
/* < 0, xattr disabled and >= INT_MAX, always use inline xattrs */
long inlinexattr_tolerance;
+ bool inode_metazone;
} mkfscfg = {
.inlinexattr_tolerance = 2,
};
@@ -1412,6 +1415,12 @@ static int mkfs_parse_options_cfg(struct erofs_importer_params *params,
else
params->compress_dir = false;
break;
+ case 537:
+ if (!optarg || strcmp(optarg, "1"))
+ mkfscfg.inode_metazone = true;
+ else
+ mkfscfg.inode_metazone = false;
+ break;
case 'V':
version();
exit(0);
@@ -1787,7 +1796,8 @@ int main(int argc, char **argv)
}
if (!incremental_mode)
- err = erofs_mkfs_format_fs(&g_sbi, mkfs_blkszbits, dsunit);
+ err = erofs_mkfs_format_fs(&g_sbi, mkfs_blkszbits, dsunit,
+ mkfscfg.inode_metazone);
else
err = erofs_mkfs_load_fs(&g_sbi, dsunit);
if (err)
--
2.43.5
More information about the Linux-erofs
mailing list