[PATCH v4 4/4] mm: use vma_start_write_killable() in process_vma_walk_lock()
Suren Baghdasaryan
surenb at google.com
Tue Mar 24 06:29:14 AEDT 2026
On Mon, Mar 23, 2026 at 11:04 AM Lorenzo Stoakes (Oracle)
<ljs at kernel.org> wrote:
>
> On Sat, Mar 21, 2026 at 10:43:08PM -0700, Suren Baghdasaryan wrote:
> > Replace vma_start_write() with vma_start_write_killable() when
> > process_vma_walk_lock() is used with PGWALK_WRLOCK option.
> > Adjust its direct and indirect users to check for a possible error
> > and handle it. Ensure users handle EINTR correctly and do not ignore
> > it.
> >
> > Signed-off-by: Suren Baghdasaryan <surenb at google.com>
> > ---
> > fs/proc/task_mmu.c | 5 ++++-
> > mm/mempolicy.c | 1 +
> > mm/pagewalk.c | 20 ++++++++++++++------
> > 3 files changed, 19 insertions(+), 7 deletions(-)
> >
> > diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
> > index e091931d7ca1..2fe3d11aad03 100644
> > --- a/fs/proc/task_mmu.c
> > +++ b/fs/proc/task_mmu.c
> > @@ -1797,6 +1797,7 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
> > struct clear_refs_private cp = {
> > .type = type,
> > };
> > + int err;
>
> Maybe better to make it a ssize_t given return type of function?
Ack.
>
> >
> > if (mmap_write_lock_killable(mm)) {
> > count = -EINTR;
> > @@ -1824,7 +1825,9 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
> > 0, mm, 0, -1UL);
> > mmu_notifier_invalidate_range_start(&range);
> > }
> > - walk_page_range(mm, 0, -1, &clear_refs_walk_ops, &cp);
> > + err = walk_page_range(mm, 0, -1, &clear_refs_walk_ops, &cp);
> > + if (err)
> > + count = err;
>
> Hmm this is gross, but it's an established pattern here, ugh.
>
> Now we have an err though, could we update:
>
> if (mmap_write_lock_killable(mm)) {
> - count = -EINTR;
> + err = -EINTR;
> goto out_mm;
> }
>
> Then we can just do:
>
> + err = walk_page_range(mm, 0, -1, &clear_refs_walk_ops, &cp);
>
> And at the end do:
>
> return err ?: count;
>
> Which possibly _necessitates_ err being a ssize_t.
Sounds doable. Let me try that.
>
> > if (type == CLEAR_REFS_SOFT_DIRTY) {
> > mmu_notifier_invalidate_range_end(&range);
> > flush_tlb_mm(mm);
> > diff --git a/mm/mempolicy.c b/mm/mempolicy.c
> > index 929e843543cf..bb5b0e83ce0f 100644
> > --- a/mm/mempolicy.c
> > +++ b/mm/mempolicy.c
> > @@ -969,6 +969,7 @@ static const struct mm_walk_ops queue_pages_lock_vma_walk_ops = {
> > * (a hugetlbfs page or a transparent huge page being counted as 1).
> > * -EIO - a misplaced page found, when MPOL_MF_STRICT specified without MOVEs.
> > * -EFAULT - a hole in the memory range, when MPOL_MF_DISCONTIG_OK unspecified.
> > + * -EINTR - walk got terminated due to pending fatal signal.
>
> Thanks!
>
> > */
> > static long
> > queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end,
> > diff --git a/mm/pagewalk.c b/mm/pagewalk.c
> > index eda74273c8ec..a42cd6a6d812 100644
> > --- a/mm/pagewalk.c
> > +++ b/mm/pagewalk.c
> > @@ -438,14 +438,13 @@ static inline void process_mm_walk_lock(struct mm_struct *mm,
> > mmap_assert_write_locked(mm);
> > }
> >
> > -static inline void process_vma_walk_lock(struct vm_area_struct *vma,
> > +static inline int process_vma_walk_lock(struct vm_area_struct *vma,
>
> NIT: Don't need this to be an inline any longer. May as well fix up while we're
> here.
Ack.
>
> > enum page_walk_lock walk_lock)
> > {
> > #ifdef CONFIG_PER_VMA_LOCK
> > switch (walk_lock) {
> > case PGWALK_WRLOCK:
> > - vma_start_write(vma);
> > - break;
> > + return vma_start_write_killable(vma);
>
> LGTM
>
> > case PGWALK_WRLOCK_VERIFY:
> > vma_assert_write_locked(vma);
> > break;
> > @@ -457,6 +456,7 @@ static inline void process_vma_walk_lock(struct vm_area_struct *vma,
> > break;
> > }
> > #endif
> > + return 0;
> > }
> >
> > /*
> > @@ -500,7 +500,9 @@ int walk_page_range_mm_unsafe(struct mm_struct *mm, unsigned long start,
> > if (ops->pte_hole)
> > err = ops->pte_hole(start, next, -1, &walk);
> > } else { /* inside vma */
> > - process_vma_walk_lock(vma, ops->walk_lock);
> > + err = process_vma_walk_lock(vma, ops->walk_lock);
> > + if (err)
> > + break;
>
> In every other case we set walk.vma = vma or NULL. Is it a problem not setting
> it at all in this code path?
IIUC the other cases set walk.vma because they later call
ops->pte_hole(..., walk). In our case we immediately break out of the
loop and exit the function, which pushes "walk" variable out of scope.
So, walk.vma won't be used and setting it would achieve nothing.
>
> > walk.vma = vma;
> > next = min(end, vma->vm_end);
> > vma = find_vma(mm, vma->vm_end);
> > @@ -717,6 +719,7 @@ int walk_page_range_vma_unsafe(struct vm_area_struct *vma, unsigned long start,
> > .vma = vma,
> > .private = private,
> > };
> > + int err;
> >
> > if (start >= end || !walk.mm)
> > return -EINVAL;
> > @@ -724,7 +727,9 @@ int walk_page_range_vma_unsafe(struct vm_area_struct *vma, unsigned long start,
> > return -EINVAL;
> >
> > process_mm_walk_lock(walk.mm, ops->walk_lock);
> > - process_vma_walk_lock(vma, ops->walk_lock);
> > + err = process_vma_walk_lock(vma, ops->walk_lock);
> > + if (err)
> > + return err;
>
> LGTM
>
> > return __walk_page_range(start, end, &walk);
> > }
> >
> > @@ -747,6 +752,7 @@ int walk_page_vma(struct vm_area_struct *vma, const struct mm_walk_ops *ops,
> > .vma = vma,
> > .private = private,
> > };
> > + int err;
> >
> > if (!walk.mm)
> > return -EINVAL;
> > @@ -754,7 +760,9 @@ int walk_page_vma(struct vm_area_struct *vma, const struct mm_walk_ops *ops,
> > return -EINVAL;
> >
> > process_mm_walk_lock(walk.mm, ops->walk_lock);
> > - process_vma_walk_lock(vma, ops->walk_lock);
> > + err = process_vma_walk_lock(vma, ops->walk_lock);
> > + if (err)
> > + return err;
>
> LGTM
>
> > return __walk_page_range(vma->vm_start, vma->vm_end, &walk);
> > }
> >
> > --
> > 2.53.0.1018.g2bb0e51243-goog
> >
>
> Thanks, Lorenzo
More information about the Linuxppc-dev
mailing list