[RFC PATCH] fs/hugetlb: Fix UBSAN warning reported on hugetlb

Aneesh Kumar K.V aneesh.kumar at linux.ibm.com
Thu Sep 8 17:26:59 AEST 2022


Powerpc architecture supports 16GB hugetlb pages with hash translation. For 4K
page size, this is implemented as a hugepage directory entry at the PGD level
and for 64K it is implemented as a huge page pte at the PUD level

Hugetlbfs sets up file system blocksize same as page size and this patch
switches blocks size usage with 16GB hugetlb to use size_t type.

We only change generic code and hugetlbfs related usage of i_blocksize(). Other
fs specific usage is left unchanged in this patch. A large part of this change
is not relevant to hugetlb, but it is changed to make sure we track block size
using size_t in generic code.

Only functionality w.r.t getattr is observed to be impacted by this change.

The below test shows the user-visible change.

 struct stat a;
 stat("/mnt/a", &a);
 printf("st_blksize = %ld\n", a.st_blksize);

Without patch
 # ./a.out  /mnt/a
 st_blksize = 0
 #

With patch
 # ./a.out /mnt/a
 st_blksize = 17179869184
 #

Statx still has the problem

 # stat /mnt/a
   File: /mnt/a
   Size: 0               Blocks: 0          IO Block: 512    regular empty file
 Device: 2eh/46d Inode: 74584       Links: 1
 Access: (0644/-rw-r--r--)  Uid: (    0/    root)   Gid: (    0/    root)
 Context: unconfined_u:object_r:hugetlbfs_t:s0
 Access: 2022-09-07 11:42:14.620239084 -0500
 Modify: 2022-09-07 11:42:14.620239084 -0500
 Change: 2022-09-07 11:42:14.620239084 -0500
  Birth: -

because it uses __u32 stx_blksize in uapi.

struct statx {
	/* 0x00 */
	__u32	stx_mask;	/* What results were written [uncond] */
	__u32	stx_blksize;	/* Preferred general I/O size [uncond] */

Fixing statx requires a syscall change where we add STATX_64BLOCKSIZE.

The change also fixes the below report warning.

 UBSAN: shift-out-of-bounds in ./include/linux/fs.h:709:12
 shift exponent 34 is too large for 32-bit type 'int'
 CPU: 67 PID: 1632 Comm: bash Not tainted 6.0.0-rc2-00327-gee88a56e8517-dirty #1
 Call Trace:
 [c000000021517990] [c000000000cb21e4] dump_stack_lvl+0x98/0xe0 (unreliable)
 [c0000000215179d0] [c000000000cacf60] ubsan_epilogue+0x18/0x70
 [c000000021517a30] [c000000000cac44c] __ubsan_handle_shift_out_of_bounds+0x1bc/0x390
 [c000000021517b30] [c00000000067e5b8] generic_fillattr+0x1b8/0x1d0
 [c000000021517b70] [c00000000067e6ec] vfs_getattr_nosec+0x11c/0x140
 [c000000021517bb0] [c00000000067e888] vfs_statx+0xd8/0x1d0
 [c000000021517c30] [c00000000067f658] vfs_fstatat+0x88/0xd0
 [c000000021517c80] [c00000000067f6e0] __do_sys_newstat+0x40/0x90
 [c000000021517d50] [c00000000003cde0] system_call_exception+0x250/0x600
 [c000000021517e10] [c00000000000c3bc] system_call_common+0xec/0x250

Cc: David Howells <dhowells at redhat.com>
Cc: linux-fsdevel at vger.kernel.org
Cc: Al Viro <viro at zeniv.linux.org.uk>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar at linux.ibm.com>
---
 fs/buffer.c            | 6 +++---
 fs/dax.c               | 2 +-
 fs/iomap/buffered-io.c | 6 +++---
 fs/iomap/direct-io.c   | 2 +-
 include/linux/fs.h     | 4 ++--
 include/linux/stat.h   | 2 +-
 mm/truncate.c          | 2 +-
 7 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/fs/buffer.c b/fs/buffer.c
index 55e762a58eb6..15def791325e 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2376,7 +2376,7 @@ static int cont_expand_zero(struct file *file, struct address_space *mapping,
 {
 	struct inode *inode = mapping->host;
 	const struct address_space_operations *aops = mapping->a_ops;
-	unsigned int blocksize = i_blocksize(inode);
+	size_t blocksize = i_blocksize(inode);
 	struct page *page;
 	void *fsdata;
 	pgoff_t index, curidx;
@@ -2454,7 +2454,7 @@ int cont_write_begin(struct file *file, struct address_space *mapping,
 			get_block_t *get_block, loff_t *bytes)
 {
 	struct inode *inode = mapping->host;
-	unsigned int blocksize = i_blocksize(inode);
+	size_t blocksize = i_blocksize(inode);
 	unsigned int zerofrom;
 	int err;
 
@@ -2542,7 +2542,7 @@ int block_truncate_page(struct address_space *mapping,
 {
 	pgoff_t index = from >> PAGE_SHIFT;
 	unsigned offset = from & (PAGE_SIZE-1);
-	unsigned blocksize;
+	size_t blocksize;
 	sector_t iblock;
 	unsigned length, pos;
 	struct inode *inode = mapping->host;
diff --git a/fs/dax.c b/fs/dax.c
index c440dcef4b1b..66673ef56695 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1304,7 +1304,7 @@ EXPORT_SYMBOL_GPL(dax_zero_range);
 int dax_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
 		const struct iomap_ops *ops)
 {
-	unsigned int blocksize = i_blocksize(inode);
+	size_t blocksize = i_blocksize(inode);
 	unsigned int off = pos & (blocksize - 1);
 
 	/* Block boundary? Nothing to do */
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index ca5c62901541..4b67018c6e71 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -955,7 +955,7 @@ int
 iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
 		const struct iomap_ops *ops)
 {
-	unsigned int blocksize = i_blocksize(inode);
+	size_t blocksize = i_blocksize(inode);
 	unsigned int off = pos & (blocksize - 1);
 
 	/* Block boundary? Nothing to do */
@@ -1297,7 +1297,7 @@ iomap_add_to_ioend(struct inode *inode, loff_t pos, struct folio *folio,
 		struct writeback_control *wbc, struct list_head *iolist)
 {
 	sector_t sector = iomap_sector(&wpc->iomap, pos);
-	unsigned len = i_blocksize(inode);
+	size_t len = i_blocksize(inode);
 	size_t poff = offset_in_folio(folio, pos);
 
 	if (!wpc->ioend || !iomap_can_add_to_ioend(wpc, pos, sector)) {
@@ -1340,7 +1340,7 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc,
 {
 	struct iomap_page *iop = iomap_page_create(inode, folio, 0);
 	struct iomap_ioend *ioend, *next;
-	unsigned len = i_blocksize(inode);
+	size_t len = i_blocksize(inode);
 	unsigned nblocks = i_blocks_per_folio(inode, folio);
 	u64 pos = folio_pos(folio);
 	int error = 0, count = 0, i;
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index 4eb559a16c9e..d17d9e11cd35 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -241,7 +241,7 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
 	const struct iomap *iomap = &iter->iomap;
 	struct inode *inode = iter->inode;
 	unsigned int blkbits = blksize_bits(bdev_logical_block_size(iomap->bdev));
-	unsigned int fs_block_size = i_blocksize(inode), pad;
+	size_t fs_block_size = i_blocksize(inode), pad;
 	loff_t length = iomap_length(iter);
 	loff_t pos = iter->pos;
 	blk_opf_t bio_opf;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9eced4cc286e..7fedf9dbcac3 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -704,9 +704,9 @@ struct inode {
 
 struct timespec64 timestamp_truncate(struct timespec64 t, struct inode *inode);
 
-static inline unsigned int i_blocksize(const struct inode *node)
+static inline size_t i_blocksize(const struct inode *node)
 {
-	return (1 << node->i_blkbits);
+	return (1UL << node->i_blkbits);
 }
 
 static inline int inode_unhashed(struct inode *inode)
diff --git a/include/linux/stat.h b/include/linux/stat.h
index 7df06931f25d..f362a8d1af0c 100644
--- a/include/linux/stat.h
+++ b/include/linux/stat.h
@@ -23,7 +23,7 @@ struct kstat {
 	u32		result_mask;	/* What fields the user got */
 	umode_t		mode;
 	unsigned int	nlink;
-	uint32_t	blksize;	/* Preferred I/O size */
+	size_t		blksize;	/* Preferred I/O size */
 	u64		attributes;
 	u64		attributes_mask;
 #define KSTAT_ATTR_FS_IOC_FLAGS				\
diff --git a/mm/truncate.c b/mm/truncate.c
index 0b0708bf935f..9d4b298d4f83 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -800,7 +800,7 @@ EXPORT_SYMBOL(truncate_setsize);
  */
 void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to)
 {
-	int bsize = i_blocksize(inode);
+	size_t bsize = i_blocksize(inode);
 	loff_t rounded_from;
 	struct page *page;
 	pgoff_t index;
-- 
2.37.3



More information about the Linuxppc-dev mailing list