[PATCH v2] erofs-utils: lib: introduce diskbuf
Gao Xiang
xiang at kernel.org
Wed Sep 20 13:46:55 AEST 2023
From: Gao Xiang <hsiangkao at linux.alibaba.com>
Previously, each tar data file will be kept as a temporary file before
landing to the target image since the input stream may be non-seekable.
It's somewhat ineffective. Let's introduce a new diskbuf approach to
manage those buffers. Laterly, each stream can be redirected to blob
files for external reference.
Signed-off-by: Gao Xiang <hsiangkao at linux.alibaba.com>
---
changes since v1:
- fix two issues reported by Jingbo.
include/erofs/blobchunk.h | 3 +-
include/erofs/diskbuf.h | 30 ++++++++
include/erofs/internal.h | 8 ++-
lib/Makefile.am | 3 +-
lib/blobchunk.c | 7 +-
lib/diskbuf.c | 146 ++++++++++++++++++++++++++++++++++++++
lib/inode.c | 35 +++++----
lib/io.c | 1 +
lib/tar.c | 21 ++++--
mkfs/main.c | 10 +++
10 files changed, 236 insertions(+), 28 deletions(-)
create mode 100644 include/erofs/diskbuf.h
create mode 100644 lib/diskbuf.c
diff --git a/include/erofs/blobchunk.h b/include/erofs/blobchunk.h
index fb85d8e..89c8048 100644
--- a/include/erofs/blobchunk.h
+++ b/include/erofs/blobchunk.h
@@ -17,7 +17,8 @@ extern "C"
struct erofs_blobchunk *erofs_get_unhashed_chunk(unsigned int device_id,
erofs_blk_t blkaddr, erofs_off_t sourceoffset);
int erofs_blob_write_chunk_indexes(struct erofs_inode *inode, erofs_off_t off);
-int erofs_blob_write_chunked_file(struct erofs_inode *inode, int fd);
+int erofs_blob_write_chunked_file(struct erofs_inode *inode, int fd,
+ erofs_off_t startoff);
int tarerofs_write_chunkes(struct erofs_inode *inode, erofs_off_t data_offset);
int erofs_mkfs_dump_blobs(struct erofs_sb_info *sbi);
void erofs_blob_exit(void);
diff --git a/include/erofs/diskbuf.h b/include/erofs/diskbuf.h
new file mode 100644
index 0000000..29d9fe2
--- /dev/null
+++ b/include/erofs/diskbuf.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0 */
+#ifndef __EROFS_DISKBUF_H
+#define __EROFS_DISKBUF_H
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#include "erofs/defs.h"
+
+struct erofs_diskbuf {
+ void *sp; /* internal stream pointer */
+ u64 offset; /* internal offset */
+};
+
+int erofs_diskbuf_getfd(struct erofs_diskbuf *db, u64 *off);
+
+int erofs_diskbuf_reserve(struct erofs_diskbuf *db, int sid, u64 *off);
+void erofs_diskbuf_commit(struct erofs_diskbuf *db, u64 len);
+void erofs_diskbuf_close(struct erofs_diskbuf *db);
+
+int erofs_diskbuf_init(unsigned int nstrms);
+void erofs_diskbuf_exit(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/include/erofs/internal.h b/include/erofs/internal.h
index 616cd3a..d859905 100644
--- a/include/erofs/internal.h
+++ b/include/erofs/internal.h
@@ -108,7 +108,7 @@ struct erofs_sb_info {
u8 xattr_prefix_count;
struct erofs_xattr_prefix_item *xattr_prefixes;
- int devfd;
+ int devfd, devblksz;
u64 devsz;
dev_t dev;
unsigned int nblobs;
@@ -151,6 +151,8 @@ EROFS_FEATURE_FUNCS(xattr_filter, compat, COMPAT_XATTR_FILTER)
#define EROFS_I_EA_INITED (1 << 0)
#define EROFS_I_Z_INITED (1 << 1)
+struct erofs_diskbuf;
+
struct erofs_inode {
struct list_head i_hash, i_subdirs, i_xattrs;
@@ -190,7 +192,7 @@ struct erofs_inode {
char *i_srcpath;
union {
char *i_link;
- FILE *i_tmpfile;
+ struct erofs_diskbuf *i_diskbuf;
};
unsigned char datalayout;
unsigned char inode_isize;
@@ -198,7 +200,7 @@ struct erofs_inode {
unsigned short idata_size;
bool compressed_idata;
bool lazy_tailblock;
- bool with_tmpfile;
+ bool with_diskbuf;
bool opaque;
/* OVL: non-merge dir that may contain whiteout entries */
bool whiteouts;
diff --git a/lib/Makefile.am b/lib/Makefile.am
index 8a45bd6..483d410 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -9,6 +9,7 @@ noinst_HEADERS = $(top_srcdir)/include/erofs_fs.h \
$(top_srcdir)/include/erofs/config.h \
$(top_srcdir)/include/erofs/decompress.h \
$(top_srcdir)/include/erofs/defs.h \
+ $(top_srcdir)/include/erofs/diskbuf.h \
$(top_srcdir)/include/erofs/err.h \
$(top_srcdir)/include/erofs/exclude.h \
$(top_srcdir)/include/erofs/flex-array.h \
@@ -33,7 +34,7 @@ liberofs_la_SOURCES = config.c io.c cache.c super.c inode.c xattr.c exclude.c \
namei.c data.c compress.c compressor.c zmap.c decompress.c \
compress_hints.c hashmap.c sha256.c blobchunk.c dir.c \
fragments.c rb_tree.c dedupe.c uuid_unparse.c uuid.c tar.c \
- block_list.c xxhash.c rebuild.c
+ block_list.c xxhash.c rebuild.c diskbuf.c
liberofs_la_CFLAGS = -Wall ${libuuid_CFLAGS} -I$(top_srcdir)/include
if ENABLE_LZ4
diff --git a/lib/blobchunk.c b/lib/blobchunk.c
index a599f3a..317a279 100644
--- a/lib/blobchunk.c
+++ b/lib/blobchunk.c
@@ -223,7 +223,8 @@ out:
return 0;
}
-int erofs_blob_write_chunked_file(struct erofs_inode *inode, int fd)
+int erofs_blob_write_chunked_file(struct erofs_inode *inode, int fd,
+ erofs_off_t startoff)
{
struct erofs_sb_info *sbi = inode->sbi;
unsigned int chunkbits = cfg.c_chunkbits;
@@ -237,7 +238,7 @@ int erofs_blob_write_chunked_file(struct erofs_inode *inode, int fd)
#ifdef SEEK_DATA
/* if the file is fully sparsed, use one big chunk instead */
- if (lseek(fd, 0, SEEK_DATA) < 0 && errno == ENXIO) {
+ if (lseek(fd, startoff, SEEK_DATA) < 0 && errno == ENXIO) {
chunkbits = ilog2(inode->i_size - 1) + 1;
if (chunkbits < sbi->blkszbits)
chunkbits = sbi->blkszbits;
@@ -269,7 +270,7 @@ int erofs_blob_write_chunked_file(struct erofs_inode *inode, int fd)
lastch = NULL;
minextblks = BLK_ROUND_UP(sbi, inode->i_size);
- for (pos = 0; pos < inode->i_size; pos += len) {
+ for (pos = startoff; pos < startoff + inode->i_size; pos += len) {
#ifdef SEEK_DATA
off_t offset = lseek(fd, pos, SEEK_DATA);
diff --git a/lib/diskbuf.c b/lib/diskbuf.c
new file mode 100644
index 0000000..bc14502
--- /dev/null
+++ b/lib/diskbuf.c
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0
+#include "erofs/diskbuf.h"
+#include "erofs/internal.h"
+#include "erofs/print.h"
+#include <stdio.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <stdlib.h>
+
+/* A simple approach to avoid creating too many temporary files */
+static struct erofs_diskbufstrm {
+ u64 count;
+ u64 tailoffset, devpos;
+ int fd;
+ unsigned int alignsize;
+ bool locked;
+} *dbufstrm;
+
+int erofs_diskbuf_getfd(struct erofs_diskbuf *db, u64 *fpos)
+{
+ const struct erofs_diskbufstrm *strm = db->sp;
+ u64 offset;
+
+ if (!strm)
+ return -1;
+ offset = db->offset + strm->devpos;
+ if (lseek(strm->fd, offset, SEEK_SET) != offset)
+ return -E2BIG;
+ if (fpos)
+ *fpos = offset;
+ return strm->fd;
+}
+
+int erofs_diskbuf_reserve(struct erofs_diskbuf *db, int sid, u64 *off)
+{
+ struct erofs_diskbufstrm *strm = dbufstrm + sid;
+
+ if (strm->tailoffset & (strm->alignsize - 1)) {
+ strm->tailoffset = round_up(strm->tailoffset, strm->alignsize);
+ if (lseek(strm->fd, strm->tailoffset + strm->devpos,
+ SEEK_SET) != strm->tailoffset + strm->devpos)
+ return -EIO;
+ }
+ if (off)
+ *off = db->offset + strm->devpos;
+ db->offset = strm->tailoffset;
+ db->sp = strm;
+ ++strm->count;
+ strm->locked = true; /* TODO: need a real lock for MT */
+ return strm->fd;
+}
+
+void erofs_diskbuf_commit(struct erofs_diskbuf *db, u64 len)
+{
+ struct erofs_diskbufstrm *strm = db->sp;
+
+ DBG_BUGON(!strm);
+ DBG_BUGON(!strm->locked);
+ DBG_BUGON(strm->tailoffset != db->offset);
+ strm->tailoffset += len;
+}
+
+void erofs_diskbuf_close(struct erofs_diskbuf *db)
+{
+ struct erofs_diskbufstrm *strm = db->sp;
+
+ DBG_BUGON(!strm);
+ DBG_BUGON(strm->count <= 1);
+ --strm->count;
+ db->sp = NULL;
+}
+
+int erofs_tmpfile(void)
+{
+#define TRAILER "tmp.XXXXXXXXXX"
+ char buf[PATH_MAX];
+ int fd;
+ umode_t u;
+
+ (void)snprintf(buf, sizeof(buf), "%s/" TRAILER,
+ getenv("TMPDIR") ?: "/tmp");
+
+ fd = mkstemp(buf);
+ if (fd < 0)
+ return -errno;
+
+ unlink(buf);
+ u = umask(0);
+ (void)umask(u);
+ (void)fchmod(fd, 0666 & ~u);
+ return fd;
+}
+
+int erofs_diskbuf_init(unsigned int nstrms)
+{
+ struct erofs_diskbufstrm *strm;
+
+ strm = calloc(nstrms + 1, sizeof(*strm));
+ if (!strm)
+ return -ENOMEM;
+ strm[nstrms].fd = -1;
+ dbufstrm = strm;
+
+ for (; strm < dbufstrm + nstrms; ++strm) {
+ struct stat st;
+
+ /* try to use the devfd for regfiles on stream 0 */
+ if (strm == dbufstrm && sbi.devsz == INT64_MAX) {
+ strm->devpos = 1ULL << 40;
+ if (!ftruncate(sbi.devfd, strm->devpos << 1)) {
+ strm->fd = dup(sbi.devfd);
+ if (lseek(strm->fd, strm->devpos,
+ SEEK_SET) != strm->devpos)
+ return -EIO;
+ goto setupone;
+ }
+ }
+ strm->devpos = 0;
+ strm->fd = erofs_tmpfile();
+ if (strm->fd < 0)
+ return -ENOSPC;
+setupone:
+ strm->tailoffset = 0;
+ strm->count = 1;
+ if (fstat(strm->fd, &st))
+ return -errno;
+ strm->alignsize = max_t(u32, st.st_blksize, getpagesize());
+ }
+ return 0;
+}
+
+void erofs_diskbuf_exit(void)
+{
+ struct erofs_diskbufstrm *strm;
+
+ if (!dbufstrm)
+ return;
+
+ for (strm = dbufstrm; strm->fd >= 0; ++strm) {
+ DBG_BUGON(strm->count != 1);
+
+ close(strm->fd);
+ strm->fd = -1;
+ }
+}
diff --git a/lib/inode.c b/lib/inode.c
index 37aa79e..d321602 100644
--- a/lib/inode.c
+++ b/lib/inode.c
@@ -16,6 +16,7 @@
#endif
#include <dirent.h>
#include "erofs/print.h"
+#include "erofs/diskbuf.h"
#include "erofs/inode.h"
#include "erofs/cache.h"
#include "erofs/io.h"
@@ -121,10 +122,12 @@ unsigned int erofs_iput(struct erofs_inode *inode)
list_del(&inode->i_hash);
if (inode->i_srcpath)
free(inode->i_srcpath);
- if (inode->with_tmpfile)
- fclose(inode->i_tmpfile);
- else if (inode->i_link)
+ if (inode->with_diskbuf) {
+ erofs_diskbuf_close(inode->i_diskbuf);
+ free(inode->i_diskbuf);
+ } else if (inode->i_link) {
free(inode->i_link);
+ }
free(inode);
return 0;
}
@@ -454,12 +457,11 @@ static int write_uncompressed_file_from_fd(struct erofs_inode *inode, int fd)
return 0;
}
-int erofs_write_file(struct erofs_inode *inode, int fd)
+int erofs_write_file(struct erofs_inode *inode, int fd, u64 fpos)
{
int ret;
- if (!inode->i_size)
- return 0;
+ DBG_BUGON(!inode->i_size);
if (cfg.c_chunkbits) {
inode->u.chunkbits = cfg.c_chunkbits;
@@ -467,7 +469,7 @@ int erofs_write_file(struct erofs_inode *inode, int fd)
inode->u.chunkformat = 0;
if (cfg.c_force_chunkformat == FORCE_INODE_CHUNK_INDEXES)
inode->u.chunkformat = EROFS_CHUNK_FORMAT_INDEXES;
- return erofs_blob_write_chunked_file(inode, fd);
+ return erofs_blob_write_chunked_file(inode, fd, fpos);
}
if (cfg.c_compr_alg[0] && erofs_file_is_compressible(inode)) {
@@ -475,7 +477,7 @@ int erofs_write_file(struct erofs_inode *inode, int fd)
if (!ret || ret != -ENOSPC)
return ret;
- ret = lseek(fd, 0, SEEK_SET);
+ ret = lseek(fd, fpos, SEEK_SET);
if (ret < 0)
return -errno;
}
@@ -1096,7 +1098,7 @@ static int erofs_mkfs_build_tree(struct erofs_inode *dir, struct list_head *dirs
if (fd < 0)
return -errno;
- ret = erofs_write_file(dir, fd);
+ ret = erofs_write_file(dir, fd, 0);
close(fd);
} else {
ret = 0;
@@ -1358,11 +1360,16 @@ int erofs_rebuild_dump_tree(struct erofs_inode *dir)
ret = erofs_write_file_from_buffer(dir, dir->i_link);
free(dir->i_link);
dir->i_link = NULL;
- } else if (dir->i_tmpfile) {
- ret = erofs_write_file(dir, fileno(dir->i_tmpfile));
- fclose(dir->i_tmpfile);
- dir->i_tmpfile = NULL;
- dir->with_tmpfile = false;
+ } else if (dir->with_diskbuf) {
+ u64 fpos;
+
+ ret = erofs_diskbuf_getfd(dir->i_diskbuf, &fpos);
+ if (ret >= 0)
+ ret = erofs_write_file(dir, ret, fpos);
+ erofs_diskbuf_close(dir->i_diskbuf);
+ free(dir->i_diskbuf);
+ dir->i_diskbuf = NULL;
+ dir->with_diskbuf = false;
} else {
ret = 0;
}
diff --git a/lib/io.c b/lib/io.c
index 602ac68..c92f16c 100644
--- a/lib/io.c
+++ b/lib/io.c
@@ -119,6 +119,7 @@ repeat:
}
/* INT64_MAX is the limit of kernel vfs */
sbi->devsz = INT64_MAX;
+ sbi->devblksz = st.st_blksize;
break;
default:
erofs_err("bad file type (%s, %o).", dev, st.st_mode);
diff --git a/lib/tar.c b/lib/tar.c
index 0f0e7c5..08a140d 100644
--- a/lib/tar.c
+++ b/lib/tar.c
@@ -5,6 +5,7 @@
#include <sys/stat.h>
#include "erofs/print.h"
#include "erofs/cache.h"
+#include "erofs/diskbuf.h"
#include "erofs/inode.h"
#include "erofs/list.h"
#include "erofs/tar.h"
@@ -407,24 +408,32 @@ static int tarerofs_write_file_data(struct erofs_inode *inode,
struct erofs_tarfile *tar)
{
unsigned int j, rem;
+ int fd;
+ u64 off;
char buf[65536];
- if (!inode->i_tmpfile) {
- inode->i_tmpfile = tmpfile();
- if (!inode->i_tmpfile)
+ if (!inode->i_diskbuf) {
+ inode->i_diskbuf = calloc(1, sizeof(*inode->i_diskbuf));
+ if (!inode->i_diskbuf)
return -ENOSPC;
+ } else {
+ erofs_diskbuf_close(inode->i_diskbuf);
}
+ fd = erofs_diskbuf_reserve(inode->i_diskbuf, 0, &off);
+ if (fd < 0)
+ return -EBADF;
+
for (j = inode->i_size; j; ) {
rem = min_t(unsigned int, sizeof(buf), j);
if (erofs_read_from_fd(tar->fd, buf, rem) != rem ||
- fwrite(buf, rem, 1, inode->i_tmpfile) != 1)
+ write(fd, buf, rem) != rem)
return -EIO;
j -= rem;
}
- fseek(inode->i_tmpfile, 0, SEEK_SET);
- inode->with_tmpfile = true;
+ erofs_diskbuf_commit(inode->i_diskbuf, inode->i_size);
+ inode->with_diskbuf = true;
return 0;
}
diff --git a/mkfs/main.c b/mkfs/main.c
index a765743..ea868bb 100644
--- a/mkfs/main.c
+++ b/mkfs/main.c
@@ -15,6 +15,7 @@
#include "erofs/config.h"
#include "erofs/print.h"
#include "erofs/cache.h"
+#include "erofs/diskbuf.h"
#include "erofs/inode.h"
#include "erofs/tar.h"
#include "erofs/io.h"
@@ -938,6 +939,14 @@ int main(int argc, char **argv)
return 1;
}
+ if (tar_mode && !erofstar.index_mode) {
+ err = erofs_diskbuf_init(1);
+ if (err) {
+ erofs_err("failed to initialize diskbuf: %s",
+ strerror(-err));
+ goto exit;
+ }
+ }
#ifdef WITH_ANDROID
if (cfg.fs_config_file &&
load_canned_fs_config(cfg.fs_config_file) < 0) {
@@ -1171,6 +1180,7 @@ exit:
erofs_packedfile_exit();
erofs_xattr_cleanup_name_prefixes();
erofs_rebuild_cleanup();
+ erofs_diskbuf_exit();
erofs_exit_configure();
if (err) {
--
2.30.2
More information about the Linux-erofs
mailing list