[PATCH] erofs-utils: lib: introduce diskbuf

Gao Xiang hsiangkao at linux.alibaba.com
Wed Sep 20 13:35:28 AEST 2023



On 2023/9/20 11:32, Jingbo Xu wrote:
> 
> 
> On 9/15/23 11:29 PM, Gao Xiang wrote:
>> Previously, each tar data file will be kept as a temporary file before
>> landing to the target image since the input stream may be non-seekable.
>>
>> It's somewhat ineffective.  Let's introduce a new diskbuf approach to
>> manage those buffers.  Laterly, each stream can be redirected to blob
>> files for external reference.
>>
>> Signed-off-by: Gao Xiang <hsiangkao at linux.alibaba.com>
>> ---
>>   include/erofs/diskbuf.h  |  30 ++++++++
>>   include/erofs/internal.h |   8 ++-
>>   lib/Makefile.am          |   3 +-
>>   lib/diskbuf.c            | 147 +++++++++++++++++++++++++++++++++++++++
>>   lib/inode.c              |  33 +++++----
>>   lib/io.c                 |   1 +
>>   lib/tar.c                |  21 ++++--
>>   mkfs/main.c              |  10 +++
>>   8 files changed, 230 insertions(+), 23 deletions(-)
>>   create mode 100644 include/erofs/diskbuf.h
>>   create mode 100644 lib/diskbuf.c
>>
>> diff --git a/include/erofs/diskbuf.h b/include/erofs/diskbuf.h
>> new file mode 100644
>> index 0000000..29d9fe2
>> --- /dev/null
>> +++ b/include/erofs/diskbuf.h
>> @@ -0,0 +1,30 @@
>> +/* SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0 */
>> +#ifndef __EROFS_DISKBUF_H
>> +#define __EROFS_DISKBUF_H
>> +
>> +#ifdef __cplusplus
>> +extern "C"
>> +{
>> +#endif
>> +
>> +#include "erofs/defs.h"
>> +
>> +struct erofs_diskbuf {
>> +	void *sp;		/* internal stream pointer */
>> +	u64 offset;		/* internal offset */
>> +};
>> +
>> +int erofs_diskbuf_getfd(struct erofs_diskbuf *db, u64 *off);
>> +
>> +int erofs_diskbuf_reserve(struct erofs_diskbuf *db, int sid, u64 *off);
>> +void erofs_diskbuf_commit(struct erofs_diskbuf *db, u64 len);
>> +void erofs_diskbuf_close(struct erofs_diskbuf *db);
>> +
>> +int erofs_diskbuf_init(unsigned int nstrms);
>> +void erofs_diskbuf_exit(void);
>> +
>> +#ifdef __cplusplus
>> +}
>> +#endif
>> +
>> +#endif
>> diff --git a/include/erofs/internal.h b/include/erofs/internal.h
>> index 19b912b..a3c6fcd 100644
>> --- a/include/erofs/internal.h
>> +++ b/include/erofs/internal.h
>> @@ -107,7 +107,7 @@ struct erofs_sb_info {
>>   	u8 xattr_prefix_count;
>>   	struct erofs_xattr_prefix_item *xattr_prefixes;
>>   
>> -	int devfd;
>> +	int devfd, devblksz;
>>   	u64 devsz;
>>   	dev_t dev;
>>   	unsigned int nblobs;
>> @@ -150,6 +150,8 @@ EROFS_FEATURE_FUNCS(xattr_filter, compat, COMPAT_XATTR_FILTER)
>>   #define EROFS_I_EA_INITED	(1 << 0)
>>   #define EROFS_I_Z_INITED	(1 << 1)
>>   
>> +struct erofs_diskbuf;
>> +
>>   struct erofs_inode {
>>   	struct list_head i_hash, i_subdirs, i_xattrs;
>>   
>> @@ -189,7 +191,7 @@ struct erofs_inode {
>>   	char *i_srcpath;
>>   	union {
>>   		char *i_link;
>> -		FILE *i_tmpfile;
>> +		struct erofs_diskbuf *i_diskbuf;
>>   	};
>>   	unsigned char datalayout;
>>   	unsigned char inode_isize;
>> @@ -197,7 +199,7 @@ struct erofs_inode {
>>   	unsigned short idata_size;
>>   	bool compressed_idata;
>>   	bool lazy_tailblock;
>> -	bool with_tmpfile;
>> +	bool with_diskbuf;
>>   	bool opaque;
>>   	/* OVL: non-merge dir that may contain whiteout entries */
>>   	bool whiteouts;
>> diff --git a/lib/Makefile.am b/lib/Makefile.am
>> index 8a45bd6..483d410 100644
>> --- a/lib/Makefile.am
>> +++ b/lib/Makefile.am
>> @@ -9,6 +9,7 @@ noinst_HEADERS = $(top_srcdir)/include/erofs_fs.h \
>>         $(top_srcdir)/include/erofs/config.h \
>>         $(top_srcdir)/include/erofs/decompress.h \
>>         $(top_srcdir)/include/erofs/defs.h \
>> +      $(top_srcdir)/include/erofs/diskbuf.h \
>>         $(top_srcdir)/include/erofs/err.h \
>>         $(top_srcdir)/include/erofs/exclude.h \
>>         $(top_srcdir)/include/erofs/flex-array.h \
>> @@ -33,7 +34,7 @@ liberofs_la_SOURCES = config.c io.c cache.c super.c inode.c xattr.c exclude.c \
>>   		      namei.c data.c compress.c compressor.c zmap.c decompress.c \
>>   		      compress_hints.c hashmap.c sha256.c blobchunk.c dir.c \
>>   		      fragments.c rb_tree.c dedupe.c uuid_unparse.c uuid.c tar.c \
>> -		      block_list.c xxhash.c rebuild.c
>> +		      block_list.c xxhash.c rebuild.c diskbuf.c
>>   
>>   liberofs_la_CFLAGS = -Wall ${libuuid_CFLAGS} -I$(top_srcdir)/include
>>   if ENABLE_LZ4
>> diff --git a/lib/diskbuf.c b/lib/diskbuf.c
>> new file mode 100644
>> index 0000000..004b1c0
>> --- /dev/null
>> +++ b/lib/diskbuf.c
>> @@ -0,0 +1,147 @@
>> +// SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0
>> +#include "erofs/diskbuf.h"
>> +#include "erofs/internal.h"
>> +#include "erofs/print.h"
>> +#include <stdio.h>
>> +#include <errno.h>
>> +#include <sys/stat.h>
>> +#include <unistd.h>
>> +#include <stdlib.h>
>> +
>> +/* A simple approach to avoid creating too many temporary files */
>> +static struct erofs_diskbufstrm {
>> +	u64 count;
>> +	u64 tailoffset, devpos;
>> +	int fd;
>> +	unsigned int alignsize;
>> +	bool locked;
>> +} *dbufstrm;
>> +
>> +int erofs_diskbuf_getfd(struct erofs_diskbuf *db, u64 *fpos)
>> +{
>> +	const struct erofs_diskbufstrm *strm = db->sp;
>> +	u64 offset;
>> +
>> +	if (!strm)
>> +		return -1;
>> +	offset = db->offset + strm->devpos;
>> +	if (lseek(strm->fd, offset, SEEK_SET) != offset)
>> +		return -E2BIG;
>> +	if (fpos)
>> +		*fpos = offset;
>> +	return strm->fd;
>> +}
>> +
>> +int erofs_diskbuf_reserve(struct erofs_diskbuf *db, int sid, u64 *off)
>> +{
>> +	struct erofs_diskbufstrm *strm = dbufstrm + sid;
>> +
>> +	if (strm->tailoffset & (strm->alignsize - 1)) {
>> +		strm->tailoffset = round_up(strm->tailoffset, strm->alignsize);
>> +		if (lseek(strm->fd, strm->tailoffset + strm->devpos,
>> +			  SEEK_SET) != strm->tailoffset + strm->devpos)
>> +			return -EIO;
>> +	}
>> +	if (*off)
>> +		*off = db->offset + strm->devpos;
> 
> 
> if (off)
> 	*off = db->offset + strm->devpos;

Thanks, fixed.

> 
> ?
> 
>> +	db->offset = strm->tailoffset;
>> +	db->sp = strm;
>> +	++strm->count;
>> +	strm->locked = true;	/* TODO: need a real lock for MT */
>> +	return strm->fd;
>> +}
>> +
>> +void erofs_diskbuf_commit(struct erofs_diskbuf *db, u64 len)
>> +{
>> +	struct erofs_diskbufstrm *strm = db->sp;
>> +
>> +	DBG_BUGON(!strm);
>> +	DBG_BUGON(!strm->locked);
>> +	DBG_BUGON(strm->tailoffset != db->offset);
>> +	strm->tailoffset += len;
>> +}
>> +
>> +void erofs_diskbuf_close(struct erofs_diskbuf *db)
>> +{
>> +	struct erofs_diskbufstrm *strm = db->sp;
>> +
>> +	DBG_BUGON(!strm);
>> +	DBG_BUGON(strm->count <= 1);
>> +	--strm->count;
>> +	db->sp = NULL;
>> +}
>> +
>> +int erofs_tmpfile(void)
>> +{
>> +#define	TRAILER		"tmp.XXXXXXXXXX"
>> +	char buf[PATH_MAX];
>> +	int fd;
>> +	umode_t u;
>> +
>> +	(void)snprintf(buf, sizeof(buf), "%s/" TRAILER,
>> +		       getenv("TMPDIR") ?: "/tmp");
>> +
>> +	fd = mkstemp(buf);
>> +	if (fd < 0)
>> +		return -errno;
>> +
>> +	unlink(buf);
>> +	u = umask(0);
>> +	(void)umask(u);
>> +	(void)fchmod(fd, 0666 & ~u);
>> +	return fd;
>> +}
>> +
>> +int erofs_diskbuf_init(unsigned int nstrms)
>> +{
>> +	struct erofs_diskbufstrm *strm;
>> +
>> +	strm = calloc(nstrms + 1, sizeof(*strm));
>> +	if (!strm)
>> +		return -ENOMEM;
>> +	strm[nstrms].fd = -1;
>> +	dbufstrm = strm;
>> +
>> +	for (; strm < dbufstrm + nstrms; ++strm) {
>> +		struct stat st;
>> +
>> +		/* try to use the devfd for regfiles on stream 0 */
>> +		if (strm == dbufstrm && sbi.devsz == INT64_MAX) {
>> +			strm->devpos = 1ULL << 40;
>> +			if (!ftruncate(sbi.devfd, strm->devpos << 1)) {
>> +				strm->fd = dup(sbi.devfd);
>> +				if (lseek(strm->fd, strm->devpos,
>> +					  SEEK_SET) != strm->devpos)
>> +					return -EIO;
>> +				goto setupone;
>> +			}
>> +		}
>> +		strm->devpos = 0;
>> +		strm->fd = erofs_tmpfile();
>> +		if (strm->fd < 0)
>> +			return -ENOSPC;
>> +setupone:
>> +		strm->tailoffset = 0;
>> +		strm->count = 1;
>> +		if (fstat(strm->fd, &st))
>> +			return -errno;
>> +		strm->alignsize = max_t(u32, st.st_blksize, getpagesize());
>> +		++strm;
>> +	}
>> +	return 0;
>> +}
>> +
>> +void erofs_diskbuf_exit(void)
>> +{
>> +	struct erofs_diskbufstrm *strm;
>> +
>> +	if (!dbufstrm)
>> +		return;
>> +
>> +	for (strm = dbufstrm; strm->fd >= 0; ++strm) {
>> +		DBG_BUGON(strm->count != 1);
>> +
>> +		close(strm->fd);
>> +		strm->fd = -1;
>> +	}
>> +}
>> diff --git a/lib/inode.c b/lib/inode.c
>> index 37aa79e..4dc8260 100644
>> --- a/lib/inode.c
>> +++ b/lib/inode.c
>> @@ -16,6 +16,7 @@
>>   #endif
>>   #include <dirent.h>
>>   #include "erofs/print.h"
>> +#include "erofs/diskbuf.h"
>>   #include "erofs/inode.h"
>>   #include "erofs/cache.h"
>>   #include "erofs/io.h"
>> @@ -121,10 +122,12 @@ unsigned int erofs_iput(struct erofs_inode *inode)
>>   	list_del(&inode->i_hash);
>>   	if (inode->i_srcpath)
>>   		free(inode->i_srcpath);
>> -	if (inode->with_tmpfile)
>> -		fclose(inode->i_tmpfile);
>> -	else if (inode->i_link)
>> +	if (inode->with_diskbuf) {
>> +		erofs_diskbuf_close(inode->i_diskbuf);
>> +		free(inode->i_diskbuf);
>> +	} else if (inode->i_link) {
>>   		free(inode->i_link);
>> +	}
>>   	free(inode);
>>   	return 0;
>>   }
>> @@ -454,12 +457,11 @@ static int write_uncompressed_file_from_fd(struct erofs_inode *inode, int fd)
>>   	return 0;
>>   }
>>   
>> -int erofs_write_file(struct erofs_inode *inode, int fd)
>> +int erofs_write_file(struct erofs_inode *inode, int fd, u64 fpos)
>>   {
>>   	int ret;
>>   
>> -	if (!inode->i_size)
>> -		return 0;
>> +	DBG_BUGON(!inode->i_size);
>>   
>>   	if (cfg.c_chunkbits) {
>>   		inode->u.chunkbits = cfg.c_chunkbits;
>> @@ -475,7 +477,7 @@ int erofs_write_file(struct erofs_inode *inode, int fd)
>>   		if (!ret || ret != -ENOSPC)
>>   			return ret;
>>   
>> -		ret = lseek(fd, 0, SEEK_SET);
>> +		ret = lseek(fd, fpos, SEEK_SET);
>>   		if (ret < 0)
>>   			return -errno;
>>   	}
> 
> 
> It seems that erofs_blob_write_chunked_file() has not been adapted to
> the new erofs_diskbuf API?

I have no idea why it needs to be adapted, doesn't it work?

Thanks,
Gao Xiang


More information about the Linux-erofs mailing list