[PATCH] xfs: introduce "metasync" api to sync metadata to fsblock

Pingfan Liu kernelfans at gmail.com
Mon Oct 14 01:37:00 AEDT 2019


When using fadump (fireware assist dump) mode on powerpc, a mismatch
between grub xfs driver and kernel xfs driver has been obsevered.  Note:
fadump boots up in the following sequence: fireware -> grub reads kernel
and initramfs -> kernel boots.

The process to reproduce this mismatch:
  - On powerpc, boot kernel with fadump=on and edit /etc/kdump.conf.
  - Replacing "path /var/crash" with "path /var/crashnew", then, "kdumpctl
    restart" to rebuild the initramfs. Detail about the rebuilding looks
    like: mkdumprd /boot/initramfs-`uname -r`.img.tmp;
          mv /boot/initramfs-`uname -r`.img.tmp /boot/initramfs-`uname -r`.img
          sync
  - "echo c >/proc/sysrq-trigger".

The result:
The dump image will not be saved under /var/crashnew/* as expected, but
still saved under /var/crash.

The root cause:
As Eric pointed out that on xfs, 'sync' ensures the consistency by writing
back metadata to xlog, but not necessary to fsblock. This raises issue if
grub can not replay the xlog before accessing the xfs files. Since the
above dir entry of initramfs should be saved as inline data with xfs_inode,
so xfs_fs_sync_fs() does not guarantee it written to fsblock.

umount can be used to write metadata fsblock, but the filesystem can not be
umounted if still in use.

There are two ways to fix this mismatch, either grub or xfs. It may be
easier to do this in xfs side by introducing an interface to flush metadata
to fsblock explicitly.

With this patch, metadata can be written to fsblock by:
  # update AIL
  sync
  # new introduced interface to flush metadata to fsblock
  mount -o remount,metasync mountpoint

Signed-off-by: Pingfan Liu <kernelfans at gmail.com>
Cc: "Darrick J. Wong" <darrick.wong at oracle.com>
Cc: Dave Chinner <dchinner at redhat.com>
Cc: Eric Sandeen <esandeen at redhat.com>
Cc: Hari Bathini <hbathini at linux.ibm.com>
Cc: linuxppc-dev at lists.ozlabs.org
To: linux-xfs at vger.kernel.org
---
 fs/xfs/xfs_mount.h      |  1 +
 fs/xfs/xfs_super.c      | 15 ++++++++++++++-
 fs/xfs/xfs_trans.h      |  2 ++
 fs/xfs/xfs_trans_ail.c  | 26 +++++++++++++++++++++++++-
 fs/xfs/xfs_trans_priv.h |  1 +
 5 files changed, 43 insertions(+), 2 deletions(-)

diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index fdb60e0..85f32e6 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -243,6 +243,7 @@ typedef struct xfs_mount {
 #define XFS_MOUNT_FILESTREAMS	(1ULL << 24)	/* enable the filestreams
 						   allocator */
 #define XFS_MOUNT_NOATTR2	(1ULL << 25)	/* disable use of attr2 format */
+#define XFS_MOUNT_METASYNC	(1ull << 26)	/* write meta to fsblock */
 
 #define XFS_MOUNT_DAX		(1ULL << 62)	/* TEST ONLY! */
 
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 8d1df9f..41df810 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -59,7 +59,7 @@ enum {
 	Opt_filestreams, Opt_quota, Opt_noquota, Opt_usrquota, Opt_grpquota,
 	Opt_prjquota, Opt_uquota, Opt_gquota, Opt_pquota,
 	Opt_uqnoenforce, Opt_gqnoenforce, Opt_pqnoenforce, Opt_qnoenforce,
-	Opt_discard, Opt_nodiscard, Opt_dax, Opt_err,
+	Opt_discard, Opt_nodiscard, Opt_dax, Opt_metasync, Opt_err
 };
 
 static const match_table_t tokens = {
@@ -106,6 +106,7 @@ static const match_table_t tokens = {
 	{Opt_discard,	"discard"},	/* Discard unused blocks */
 	{Opt_nodiscard,	"nodiscard"},	/* Do not discard unused blocks */
 	{Opt_dax,	"dax"},		/* Enable direct access to bdev pages */
+	{Opt_metasync,	"metasync"},	/* one shot to write meta to fsblock */
 	{Opt_err,	NULL},
 };
 
@@ -338,6 +339,9 @@ xfs_parseargs(
 			mp->m_flags |= XFS_MOUNT_DAX;
 			break;
 #endif
+		case Opt_metasync:
+			mp->m_flags |= XFS_MOUNT_METASYNC;
+			break;
 		default:
 			xfs_warn(mp, "unknown mount option [%s].", p);
 			return -EINVAL;
@@ -1259,6 +1263,9 @@ xfs_fs_remount(
 			mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
 			mp->m_maxagi = xfs_set_inode_alloc(mp, sbp->sb_agcount);
 			break;
+		case Opt_metasync:
+			mp->m_flags |= XFS_MOUNT_METASYNC;
+			break;
 		default:
 			/*
 			 * Logically we would return an error here to prevent
@@ -1286,6 +1293,12 @@ xfs_fs_remount(
 		}
 	}
 
+	if (mp->m_flags & XFS_MOUNT_METASYNC) {
+		xfs_ail_push_sync(mp->m_ail);
+		/* one shot flag */
+		mp->m_flags &= ~XFS_MOUNT_METASYNC;
+	}
+
 	/* ro -> rw */
 	if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & SB_RDONLY)) {
 		if (mp->m_flags & XFS_MOUNT_NORECOVERY) {
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 64d7f17..fcdb902 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -242,6 +242,8 @@ void		xfs_trans_buf_set_type(struct xfs_trans *, struct xfs_buf *,
 void		xfs_trans_buf_copy_type(struct xfs_buf *dst_bp,
 					struct xfs_buf *src_bp);
 
+void		xfs_ail_push_sync(struct xfs_ail *ailp);
+
 extern kmem_zone_t	*xfs_trans_zone;
 
 #endif	/* __XFS_TRANS_H__ */
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 6ccfd75..b8d8df1 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -488,7 +488,11 @@ xfsaild_push(
 	xfs_trans_ail_cursor_done(&cur);
 	spin_unlock(&ailp->ail_lock);
 
-	if (xfs_buf_delwri_submit_nowait(&ailp->ail_buf_list))
+	if (unlikely(mp->m_flags & XFS_MOUNT_METASYNC)) {
+		xfs_buf_delwri_submit(&ailp->ail_buf_list);
+		ailp->ail_log_flush++;
+		wake_up_all(&ailp->pushed_que);
+	} else if (xfs_buf_delwri_submit_nowait(&ailp->ail_buf_list))
 		ailp->ail_log_flush++;
 
 	if (!count || XFS_LSN_CMP(lsn, target) >= 0) {
@@ -641,6 +645,25 @@ xfs_ail_push(
 	wake_up_process(ailp->ail_task);
 }
 
+void
+xfs_ail_push_sync(
+	struct xfs_ail		*ailp)
+{
+	xfs_lsn_t		sync_lsn;
+	DEFINE_WAIT(wait);
+
+	sync_lsn = xfs_ail_max_lsn(ailp);
+	for (;;) {
+		xfs_ail_push(ailp, sync_lsn);
+		prepare_to_wait(&ailp->pushed_que, &wait, TASK_INTERRUPTIBLE);
+		if (XFS_LSN_CMP(READ_ONCE(ailp->ail_target_prev),
+			sync_lsn) >= 0)
+			break;
+		schedule();
+	}
+	finish_wait(&ailp->pushed_que, &wait);
+}
+
 /*
  * Push out all items in the AIL immediately
  */
@@ -834,6 +857,7 @@ xfs_trans_ail_init(
 	spin_lock_init(&ailp->ail_lock);
 	INIT_LIST_HEAD(&ailp->ail_buf_list);
 	init_waitqueue_head(&ailp->ail_empty);
+	init_waitqueue_head(&ailp->pushed_que);
 
 	ailp->ail_task = kthread_run(xfsaild, ailp, "xfsaild/%s",
 			ailp->ail_mount->m_fsname);
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
index 2e073c1..9fe3cc6 100644
--- a/fs/xfs/xfs_trans_priv.h
+++ b/fs/xfs/xfs_trans_priv.h
@@ -61,6 +61,7 @@ struct xfs_ail {
 	int			ail_log_flush;
 	struct list_head	ail_buf_list;
 	wait_queue_head_t	ail_empty;
+	wait_queue_head_t	pushed_que;
 };
 
 /*
-- 
2.7.5



More information about the Linuxppc-dev mailing list