[bug report] data corruption of init process

Gao Xiang hsiangkao at linux.alibaba.com
Tue Jan 21 14:33:07 AEDT 2025



On 2025/1/21 11:27, Gao Xiang wrote:
> 
> 
> On 2025/1/21 01:36, Stefan Kerkmann wrote:
>> Hi Gao,
>>
>> I have enabled KASAN and applied your requested changes, but nothing suspicious happened.
> 
> Sigh...
> 
>>
>>> - Could we get the exact file offset of `init` which init process is
>>>    crashed?  It will help us to chase down the the primary scene.
>>
>> I'll try to track that down. If you have any hints how to-do that let me know :-).
> 
> Many thanks for the test...
> I just hacked some code but un-tested as below:
> 
> diff --git a/kernel/exit.c b/kernel/exit.c
> index 1dcddfe537ee..868fea16732f 100644
> --- a/kernel/exit.c
> +++ b/kernel/exit.c
> @@ -873,6 +873,8 @@ static void synchronize_group_exit(struct task_struct *tsk, long code)
>       spin_unlock_irq(&sighand->siglock);
>   }
> 
> +extern struct inode *erofs_iget(struct super_block *sb, u64 nid);
> +
>   void __noreturn do_exit(long code)
>   {
>       struct task_struct *tsk = current;
> @@ -903,9 +905,59 @@ void __noreturn do_exit(long code)
>            * If the last thread of global init has exited, panic
>            * immediately to get a useable coredump.
>            */
> -        if (unlikely(is_global_init(tsk)))
> +        if (unlikely(is_global_init(tsk))) {
> +            struct path path;
> +            struct inode *inode;
> +
> +            get_fs_pwd(tsk->fs, &path);
> +
> +            inode = d_inode(path.dentry);
> +            if (inode && inode->i_sb->s_magic == EROFS_SUPER_MAGIC_V1) {
> +                struct inode *inode;
> +                int i = 0;
> +
> +                inode = erofs_iget(inode->i_sb, 190291);
> +                if (IS_ERR(inode))
> +                    goto skip;
> +
> +                for (i = 0; i < 30; ++i) {
> +                    struct page *page = find_get_page(inode->i_mapping, i);
> +                    void *data;
> +
> +                    if (!page)
> +                        continue;
> +                    data = kmap_local_page(page);
> +
> +                    hash = fnv_32_buf(data, PAGE_SIZE, FNV1_32_INIT);
> +                    pr_err("%px i_ino %lu, index %lu dst %px (%x) err %d",
> +                           page, page->mapping->host->i_ino, i, ptr, hash);

maybe use some different style in this print message:

		"exit: %px i_ino %lu, index %lu dst %px (%x)"

likewise.

Anyway, it's somewhat hack code, just wonder if it works, and
the output is helpful for us to know which page is corrupt.


> +                    kunmap_local(data);
> +                }
> +                iput(inode);
> +
> +                inode = erofs_iget(inode->i_sb, 868416);
> +                if (IS_ERR(inode))
> +                    goto skip;
> +
> +                for (i = 0; i < 19; ++i) {
> +                    struct page *page = find_get_page(inode->i_mapping, i);
> +                    void *data;
> +
> +                    if (!page)
> +                        continue;
> +                    data = kmap_local_page(page);
> +                    hash = fnv_32_buf(data, PAGE_SIZE, FNV1_32_INIT);
> +                    pr_err("%px i_ino %lu, index %lu dst %px (%x) err %d",
> +                           page, page->mapping->host->i_ino, i, ptr, hash);
> +                    kunmap_local(data);
> +                }
> +                iput(inode);
> +            }
> +skip:
>               panic("Attempted to kill init! exitcode=0x%08x\n",
>                   tsk->signal->group_exit_code ?: (int)code);
> +        }
> +
> 
>   #ifdef CONFIG_POSIX_TIMERS
>           hrtimer_cancel(&tsk->signal->real_timer);
> 
> You could follow the idea to dump the page cache data when init
> is killed, I wonder the output...
> 
> 
> Thanks,
> Gao Xiang



More information about the Linux-erofs mailing list