[PATCH] erofs-utils: support chunk-based sparse files
Gao Xiang
hsiangkao at linux.alibaba.com
Fri Jan 27 21:08:11 AEDT 2023
Scan holes for chunk-based inodes if either --chunksize=# or
-Ededupe (without compression) is specified so that sparse files
can be made, which has already been supported since Linux 5.15.
Signed-off-by: Gao Xiang <hsiangkao at linux.alibaba.com>
---
lib/blobchunk.c | 71 ++++++++++++++++++++++++++++++++++++++-----------
1 file changed, 55 insertions(+), 16 deletions(-)
diff --git a/lib/blobchunk.c b/lib/blobchunk.c
index 77b0c17..55ea061 100644
--- a/lib/blobchunk.c
+++ b/lib/blobchunk.c
@@ -18,7 +18,7 @@ void erofs_sha256(const unsigned char *in, unsigned long in_size,
struct erofs_blobchunk {
struct hashmap_entry ent;
char sha256[32];
- unsigned int chunksize;
+ erofs_off_t chunksize;
erofs_blk_t blkaddr;
};
@@ -27,9 +27,12 @@ static FILE *blobfile;
static erofs_blk_t remapped_base;
static bool multidev;
static struct erofs_buffer_head *bh_devt;
+struct erofs_blobchunk erofs_holechunk = {
+ .blkaddr = EROFS_NULL_ADDR,
+};
static struct erofs_blobchunk *erofs_blob_getchunk(int fd,
- unsigned int chunksize)
+ erofs_off_t chunksize)
{
static u8 zeroed[EROFS_BLKSIZ];
u8 *chunkdata, sha256[32];
@@ -129,7 +132,11 @@ int erofs_blob_write_chunk_indexes(struct erofs_inode *inode,
chunk = *(void **)(inode->chunkindexes + src);
- idx.blkaddr = base_blkaddr + chunk->blkaddr;
+ if (chunk->blkaddr != EROFS_NULL_ADDR)
+ idx.blkaddr = base_blkaddr + chunk->blkaddr;
+ else
+ idx.blkaddr = EROFS_NULL_ADDR;
+
if (extent_start != EROFS_NULL_ADDR &&
idx.blkaddr == extent_end + 1) {
extent_end = idx.blkaddr;
@@ -163,14 +170,28 @@ int erofs_blob_write_chunk_indexes(struct erofs_inode *inode,
int erofs_blob_write_chunked_file(struct erofs_inode *inode)
{
- unsigned int chunksize = 1 << cfg.c_chunkbits;
- unsigned int count = DIV_ROUND_UP(inode->i_size, chunksize);
+ unsigned int chunkbits = cfg.c_chunkbits;
+ unsigned int count, unit;
struct erofs_inode_chunk_index *idx;
- erofs_off_t pos, len;
- unsigned int unit;
+ erofs_off_t pos, len, chunksize;
int fd, ret;
- inode->u.chunkformat |= inode->u.chunkbits - LOG_BLOCK_SIZE;
+ fd = open(inode->i_srcpath, O_RDONLY | O_BINARY);
+ if (fd < 0)
+ return -errno;
+#ifdef SEEK_DATA
+ /* if the file is fully sparsed, use one big chunk instead */
+ if (lseek(fd, 0, SEEK_DATA) < 0 && errno == ENXIO) {
+ chunkbits = ilog2(inode->i_size - 1) + 1;
+ if (chunkbits < LOG_BLOCK_SIZE)
+ chunkbits = LOG_BLOCK_SIZE;
+ }
+#endif
+ if (chunkbits - LOG_BLOCK_SIZE > EROFS_CHUNK_FORMAT_BLKBITS_MASK)
+ chunkbits = EROFS_CHUNK_FORMAT_BLKBITS_MASK + LOG_BLOCK_SIZE;
+ chunksize = 1ULL << chunkbits;
+ count = DIV_ROUND_UP(inode->i_size, chunksize);
+ inode->u.chunkformat |= chunkbits - LOG_BLOCK_SIZE;
if (multidev)
inode->u.chunkformat |= EROFS_CHUNK_FORMAT_INDEXES;
@@ -181,24 +202,41 @@ int erofs_blob_write_chunked_file(struct erofs_inode *inode)
inode->extent_isize = count * unit;
idx = malloc(count * max(sizeof(*idx), sizeof(void *)));
- if (!idx)
+ if (!idx) {
+ close(fd);
return -ENOMEM;
- inode->chunkindexes = idx;
-
- fd = open(inode->i_srcpath, O_RDONLY | O_BINARY);
- if (fd < 0) {
- ret = -errno;
- goto err;
}
+ inode->chunkindexes = idx;
for (pos = 0; pos < inode->i_size; pos += len) {
struct erofs_blobchunk *chunk;
+#ifdef SEEK_DATA
+ off64_t offset = lseek(fd, pos, SEEK_DATA);
+
+ if (offset < 0) {
+ if (errno != ENXIO)
+ offset = pos;
+ else
+ offset = ((pos >> chunkbits) + 1) << chunkbits;
+ } else {
+ offset &= ~(chunksize - 1);
+ }
+
+ if (offset > pos) {
+ len = 0;
+ do {
+ *(void **)idx++ = &erofs_holechunk;
+ pos += chunksize;
+ } while (pos < offset);
+ DBG_BUGON(pos != offset);
+ continue;
+ }
+#endif
len = min_t(u64, inode->i_size - pos, chunksize);
chunk = erofs_blob_getchunk(fd, len);
if (IS_ERR(chunk)) {
ret = PTR_ERR(chunk);
- close(fd);
goto err;
}
*(void **)idx++ = chunk;
@@ -207,6 +245,7 @@ int erofs_blob_write_chunked_file(struct erofs_inode *inode)
close(fd);
return 0;
err:
+ close(fd);
free(inode->chunkindexes);
inode->chunkindexes = NULL;
return ret;
--
2.24.4
More information about the Linux-erofs
mailing list