[RFCv2] erofs-utils:code for detecting and tracking holes in uncompressed sparse files.
Pratik Shinde
pratikshinde320 at gmail.com
Tue Dec 24 04:29:38 AEDT 2019
Made some changes based on comments on previous patch :
1) defined an on disk structure for representing hole.
2) Maintain a list of this structure (per file) and dump this list to
disk at the time of writing the inode to disk.
---
include/erofs/internal.h | 8 +++-
lib/inode.c | 108 ++++++++++++++++++++++++++++++++++++++++++++---
2 files changed, 110 insertions(+), 6 deletions(-)
diff --git a/include/erofs/internal.h b/include/erofs/internal.h
index e13adda..863ef8a 100644
--- a/include/erofs/internal.h
+++ b/include/erofs/internal.h
@@ -63,7 +63,7 @@ struct erofs_sb_info {
extern struct erofs_sb_info sbi;
struct erofs_inode {
- struct list_head i_hash, i_subdirs, i_xattrs;
+ struct list_head i_hash, i_subdirs, i_xattrs, i_holes;
unsigned int i_count;
struct erofs_inode *i_parent;
@@ -93,6 +93,7 @@ struct erofs_inode {
unsigned int xattr_isize;
unsigned int extent_isize;
+ unsigned int holes_isize;
erofs_nid_t nid;
struct erofs_buffer_head *bh;
@@ -139,5 +140,10 @@ static inline const char *erofs_strerror(int err)
return msg;
}
+struct erofs_hole {
+ erofs_blk_t st;
+ u32 len;
+ struct list_head next;
+};
#endif
diff --git a/lib/inode.c b/lib/inode.c
index 0e19b11..20bbf06 100644
--- a/lib/inode.c
+++ b/lib/inode.c
@@ -38,6 +38,85 @@ static unsigned char erofs_type_by_mode[S_IFMT >> S_SHIFT] = {
struct list_head inode_hashtable[NR_INODE_HASHTABLE];
+
+#define IS_HOLE(start, end) (roundup(start, EROFS_BLKSIZ) == start && \
+ roundup(end, EROFS_BLKSIZ) == end && \
+ (end - start) % EROFS_BLKSIZ == 0)
+#define HOLE_BLK -1
+unsigned int erofs_detect_holes(struct erofs_inode *inode,
+ struct list_head *holes, unsigned int *htimes)
+{
+ int fd, st, en;
+ unsigned int nholes = 0;
+ erofs_off_t data, hole, len;
+ struct erofs_hole *eh;
+
+ fd = open(inode->i_srcpath, O_RDONLY);
+ if (fd < 0) {
+ return -errno;
+ }
+ len = lseek(fd, 0, SEEK_END);
+ if (lseek(fd, 0, SEEK_SET) == -1)
+ return -errno;
+ data = 0;
+ while (data < len) {
+ hole = lseek(fd, data, SEEK_HOLE);
+ if (hole == len)
+ break;
+ data = lseek(fd, hole, SEEK_DATA);
+ if (data < 0 || hole > data) {
+ return -EINVAL;
+ }
+ if (IS_HOLE(hole, data)) {
+ st = hole >> S_SHIFT;
+ en = data >> S_SHIFT;
+ eh = malloc(sizeof(struct erofs_hole));
+ if (eh == NULL)
+ return -ENOMEM;
+ eh->st = st;
+ eh->len = (en - st);
+ list_add_tail(&eh->next, holes);
+ nholes += eh->len;
+ *htimes += 1;
+ }
+ }
+ return nholes;
+}
+
+bool erofs_ishole(erofs_blk_t blk, struct list_head holes)
+{
+ if (list_empty(&holes))
+ return false;
+ struct erofs_hole *eh;
+ list_for_each_entry(eh, &holes, next) {
+ if (eh->st > blk)
+ return false;
+ if (eh->st <= blk && (eh->st + eh->len - 1) >= blk)
+ return true;
+ }
+ return false;
+}
+
+char *erofs_create_holes_buffer(struct list_head *holes, unsigned int size)
+{
+ struct erofs_hole *eh;
+ char *buf;
+ unsigned int p = 0;
+
+ buf = malloc(size);
+ if (buf == NULL)
+ return ERR_PTR(-ENOMEM);
+ list_for_each_entry(eh, holes, next) {
+ *(__le32 *)(buf + p) = cpu_to_le32(eh->st);
+ p += sizeof(__le32);
+ *(__le32 *)(buf + p) = cpu_to_le32(eh->len);
+ p += sizeof(__le32);
+ list_del(&eh->next);
+ free(eh);
+ }
+ return buf;
+}
+
void erofs_inode_manager_init(void)
{
unsigned int i;
@@ -304,7 +383,7 @@ static bool erofs_file_is_compressible(struct erofs_inode *inode)
int erofs_write_file(struct erofs_inode *inode)
{
- unsigned int nblocks, i;
+ unsigned int nblocks, i, nholes, hitems = 0;
int ret, fd;
if (!inode->i_size) {
@@ -322,16 +401,24 @@ int erofs_write_file(struct erofs_inode *inode)
/* fallback to all data uncompressed */
inode->datalayout = EROFS_INODE_FLAT_INLINE;
nblocks = inode->i_size / EROFS_BLKSIZ;
-
- ret = __allocate_inode_bh_data(inode, nblocks);
+ nholes = erofs_detect_holes(inode, &inode->i_holes, &hitems);
+ if (nholes < 0)
+ return nholes;
+ inode->holes_isize = (sizeof(struct erofs_hole) -
+ sizeof(struct list_head)) * hitems;
+ if (nblocks < 0)
+ return nblocks;
+ ret = __allocate_inode_bh_data(inode, nblocks - nholes);
if (ret)
return ret;
-
fd = open(inode->i_srcpath, O_RDONLY | O_BINARY);
if (fd < 0)
return -errno;
for (i = 0; i < nblocks; ++i) {
+ if (erofs_ishole(i, inode->i_holes)) {
+ continue;
+ }
char buf[EROFS_BLKSIZ];
ret = read(fd, buf, EROFS_BLKSIZ);
@@ -479,8 +566,19 @@ static bool erofs_bh_flush_write_inode(struct erofs_buffer_head *bh)
if (ret)
return false;
free(inode->compressmeta);
+ off += inode->extent_isize;
}
+ if (inode->holes_isize) {
+ char *holes = erofs_create_holes_buffer(&inode->i_holes,
+ inode->holes_isize);
+ if (IS_ERR(holes))
+ return false;
+ ret = dev_write(holes, off, inode->holes_isize);
+ free(holes);
+ if (ret)
+ return false;
+ }
inode->bh = NULL;
erofs_iput(inode);
return erofs_bh_flush_generic_end(bh);
@@ -737,6 +835,7 @@ struct erofs_inode *erofs_new_inode(void)
init_list_head(&inode->i_subdirs);
init_list_head(&inode->i_xattrs);
+ init_list_head(&inode->i_holes);
inode->idata_size = 0;
inode->xattr_isize = 0;
@@ -961,4 +1060,3 @@ struct erofs_inode *erofs_mkfs_build_tree_from_path(struct erofs_inode *parent,
return erofs_mkfs_build_tree(inode);
}
-
--
2.9.3
More information about the Linux-erofs
mailing list