[PATCH v3 22/23] mm/vma: convert vma_modify_flags[_uffd]() to use vma_flags_t

Fri Mar 20 23:15:36 AEDT 2026

On Fri, Mar 20, 2026 at 12:56:56PM +0100, Vlastimil Babka (SUSE) wrote:
> On 3/20/26 12:08, Lorenzo Stoakes (Oracle) wrote:
> > On Fri, Mar 20, 2026 at 11:39:58AM +0100, Vlastimil Babka (SUSE) wrote:
> >> On 3/18/26 16:50, Lorenzo Stoakes (Oracle) wrote:
> >> > Update the vma_modify_flags() and vma_modify_flags_uffd() functions to
> >> > accept a vma_flags_t parameter rather than a vm_flags_t one, and propagate
> >> > the changes as needed to implement this change.
> >> >
> >> > Finally, update the VMA tests to reflect this.
> >> >
> >> > Signed-off-by: Lorenzo Stoakes (Oracle) <ljs at kernel.org>
> >>
> >> > --- a/mm/mlock.c
> >> > +++ b/mm/mlock.c
> >> > @@ -415,13 +415,14 @@ static int mlock_pte_range(pmd_t *pmd, unsigned long addr,
> >> >   * @vma - vma containing range to be mlock()ed or munlock()ed
> >> >   * @start - start address in @vma of the range
> >> >   * @end - end of range in @vma
> >> > - * @newflags - the new set of flags for @vma.
> >> > + * @new_vma_flags - the new set of flags for @vma.
> >> >   *
> >> >   * Called for mlock(), mlock2() and mlockall(), to set @vma VM_LOCKED;
> >> >   * called for munlock() and munlockall(), to clear VM_LOCKED from @vma.
> >> >   */
> >> >  static void mlock_vma_pages_range(struct vm_area_struct *vma,
> >> > -	unsigned long start, unsigned long end, vm_flags_t newflags)
> >> > +	unsigned long start, unsigned long end,
> >> > +	vma_flags_t *new_vma_flags)
> >> >  {
> >> >  	static const struct mm_walk_ops mlock_walk_ops = {
> >> >  		.pmd_entry = mlock_pte_range,
> >> > @@ -439,18 +440,18 @@ static void mlock_vma_pages_range(struct vm_area_struct *vma,
> >> >  	 * combination should not be visible to other mmap_lock users;
> >> >  	 * but WRITE_ONCE so rmap walkers must see VM_IO if VM_LOCKED.
> >> >  	 */
> >> > -	if (newflags & VM_LOCKED)
> >> > -		newflags |= VM_IO;
> >> > +	if (vma_flags_test(new_vma_flags, VMA_LOCKED_BIT))
> >> > +		vma_flags_set(new_vma_flags, VMA_IO_BIT);
> >> >  	vma_start_write(vma);
> >> > -	vm_flags_reset_once(vma, newflags);
> >> > +	WRITE_ONCE(vma->flags, *new_vma_flags);
> >>
> >> It's not clear to me, how is switching from vm_flags_t to vma_flags_t
> >> allowing us to simply do WRITE_ONCE() instead of the full logic of
> >> vm_flags_reset_once()? Won't it fail to compile once once flags are more
> >> than single word? Or worse, will compile but silently allow tearing?
> >
> > We only care about tearing in the flags that can be contained within a
> > system word, but true we should probably do this more carefully, as I did
> > for vm_flags_reset_once().
> >
> > I will reimplement this as a new, hideous, helper function.
> >
> > I am not a fan of this being a thing to handle races, it's a hack. But I
> > guess that should be addressed separately.
>
> Right, thanks!

No problem. I might be able to avoid it being too hideous let's see :)

>
> >>
> >> >  	}
> >> >  }
> >> >
> >> > @@ -467,20 +468,22 @@ static int mlock_fixup(struct vma_iterator *vmi, struct vm_area_struct *vma,
> >> >  	       struct vm_area_struct **prev, unsigned long start,
> >> >  	       unsigned long end, vm_flags_t newflags)
> >> >  {
> >> > +	vma_flags_t new_vma_flags = legacy_to_vma_flags(newflags);
> >> > +	const vma_flags_t old_vma_flags = vma->flags;
> >> >  	struct mm_struct *mm = vma->vm_mm;
> >> >  	int nr_pages;
> >> >  	int ret = 0;
> >> > -	vm_flags_t oldflags = vma->vm_flags;
> >> >
> >> > -	if (newflags == oldflags || vma_is_secretmem(vma) ||
> >> > -	    !vma_supports_mlock(vma))
> >> > +	if (vma_flags_same_pair(&old_vma_flags, &new_vma_flags) ||
> >> > +	    vma_is_secretmem(vma) || !vma_supports_mlock(vma)) {
> >> >  		/*
> >> >  		 * Don't set VM_LOCKED or VM_LOCKONFAULT and don't count.
> >> >  		 * For secretmem, don't allow the memory to be unlocked.
> >> >  		 */
> >> >  		goto out;
> >> > +	}
> >> >
> >> > -	vma = vma_modify_flags(vmi, *prev, vma, start, end, &newflags);
> >> > +	vma = vma_modify_flags(vmi, *prev, vma, start, end, &new_vma_flags);
> >> >  	if (IS_ERR(vma)) {
> >> >  		ret = PTR_ERR(vma);
> >> >  		goto out;
> >> > @@ -490,9 +493,9 @@ static int mlock_fixup(struct vma_iterator *vmi, struct vm_area_struct *vma,
> >> >  	 * Keep track of amount of locked VM.
> >> >  	 */
> >> >  	nr_pages = (end - start) >> PAGE_SHIFT;
> >> > -	if (!(newflags & VM_LOCKED))
> >> > +	if (!vma_flags_test(&new_vma_flags, VMA_LOCKED_BIT))
> >> >  		nr_pages = -nr_pages;
> >> > -	else if (oldflags & VM_LOCKED)
> >> > +	else if (vma_flags_test(&old_vma_flags, VMA_LOCKED_BIT))
> >> >  		nr_pages = 0;
> >> >  	mm->locked_vm += nr_pages;
> >> >
> >> > @@ -501,12 +504,13 @@ static int mlock_fixup(struct vma_iterator *vmi, struct vm_area_struct *vma,
> >> >  	 * It's okay if try_to_unmap_one unmaps a page just after we
> >> >  	 * set VM_LOCKED, populate_vma_page_range will bring it back.
> >> >  	 */
> >> > -	if ((newflags & VM_LOCKED) && (oldflags & VM_LOCKED)) {
> >> > +	if (vma_flags_test(&new_vma_flags, VMA_LOCKED_BIT) &&
> >> > +	    vma_flags_test(&old_vma_flags, VMA_LOCKED_BIT)) {
> >> >  		/* No work to do, and mlocking twice would be wrong */
> >> >  		vma_start_write(vma);
> >> > -		vm_flags_reset(vma, newflags);
> >> > +		vma->flags = new_vma_flags;
> >>
> >> This also does lot less than vm_flags_reset()?
> >
> > Well let's look:
> >
> > 	VM_WARN_ON_ONCE(!pgtable_supports_soft_dirty() && (flags & VM_SOFTDIRTY));
> >
> > Are we really at a point where this is problematic? Do we hit this? Why are
> > we specifically checking only this case on every single instance of
> > resetting VMA flags?
>
> I'll admit I don't know, but sounds like we can stop being so paranoid when
> converting code to the new API.

Yeah I think so. I think Suren is ok with it, probably :P

>
> >
> > 	vma_assert_write_locked(vma);
> >
> > Note the vma_start_write() line above. I want to separate vma_flags_t
> > helpers from asserts like that, because:
> >
> > 1. We might be operating on a VMA that is not yet added to the tree
> > 2. We might be operating on a VMA that is now detached
> > 3. Really in all but core code, you should be using vma_desc_xxx().
> > 4. Other VMA fields are manipulated with no such checks.
> > 5. It'd be egregious to have to add variants of flag functions just to
> >    account for cases such as the above, especially when we don't do so for
> >    other VMA fields. Drivers are the problematic cases and why it was
> >    especially important (and also for debug as VMA locks were introduced),
> >    the mmap_prepare work is solving this generally.
>
> Perfectly reasonable!
>
> > 	vm_flags_init(vma, flags);
> >
> > Analysing what's in this function:
> >
> > 	VM_WARN_ON_ONCE(!pgtable_supports_soft_dirty() && (flags & VM_SOFTDIRTY));
> >
> > Duplicated.
> >
> > 	vma_flags_clear_all(&vma->flags);
> >
> > Only necessary while you're only setting the first system word of
> > vma->flags.
> >
> > 	vma_flags_overwrite_word(&vma->flags, flags);
> >
> > Again only necessary when you're only setting the first system word.
> >
> > So yeah it's doing the equivalent and (intentionally) eliminating some
> > noise.
>
> Ack.
>
> > But I'll add the S/D check back I guess.
>
> Thanks for the detailed explanation. It's fine to drop legacy stuff, it just
> wasn't obvious if intentional or by mistake. Maybe just mention the
> intention in the changelog?

Yeah that's a fair point will do!

>
> Thanks!
>
> >>
> >> >  	} else {
> >> > -		mlock_vma_pages_range(vma, start, end, newflags);
> >> > +		mlock_vma_pages_range(vma, start, end, &new_vma_flags);
> >> >  	}
> >> >  out:
> >> >  	*prev = vma;
> >> > diff --git a/mm/mprotect.c b/mm/mprotect.c
> >> > index eaa724b99908..2b8a85689ab7 100644
> >> > --- a/mm/mprotect.c
> >> > +++ b/mm/mprotect.c
> >> > @@ -756,13 +756,11 @@ mprotect_fixup(struct vma_iterator *vmi, struct mmu_gather *tlb,
> >> >  		vma_flags_clear(&new_vma_flags, VMA_ACCOUNT_BIT);
> >> >  	}
> >> >
> >> > -	newflags = vma_flags_to_legacy(new_vma_flags);
> >> > -	vma = vma_modify_flags(vmi, *pprev, vma, start, end, &newflags);
> >> > +	vma = vma_modify_flags(vmi, *pprev, vma, start, end, &new_vma_flags);
> >> >  	if (IS_ERR(vma)) {
> >> >  		error = PTR_ERR(vma);
> >> >  		goto fail;
> >> >  	}
> >> > -	new_vma_flags = legacy_to_vma_flags(newflags);
> >> >
> >> >  	*pprev = vma;
> >> >
> >> > @@ -771,7 +769,7 @@ mprotect_fixup(struct vma_iterator *vmi, struct mmu_gather *tlb,
> >> >  	 * held in write mode.
> >> >  	 */
> >> >  	vma_start_write(vma);
> >> > -	vm_flags_reset_once(vma, newflags);
> >> > +	WRITE_ONCE(vma->flags, new_vma_flags);
> >>
> >> Ditto.
> >
> > I mean overall these cases are hacks in my opinion to work around code that
> > should have solved their problem another way.
> >
> > But sure, as above I'll add a helper function or such.
>
> Great.
>
> >>
> >> >  	if (vma_wants_manual_pte_write_upgrade(vma))
> >> >  		mm_cp_flags |= MM_CP_TRY_CHANGE_WRITABLE;
> >> >  	vma_set_page_prot(vma);
> >> > @@ -796,6 +794,7 @@ mprotect_fixup(struct vma_iterator *vmi, struct mmu_gather *tlb,
> >> >  	}
> >> >
> >> >  	vm_stat_account(mm, vma_flags_to_legacy(old_vma_flags), -nrpages);
> >> > +	newflags = vma_flags_to_legacy(new_vma_flags);
> >> >  	vm_stat_account(mm, newflags, nrpages);
> >> >  	perf_event_mmap(vma);
> >> >  	return 0;
> >> > diff --git a/mm/mseal.c b/mm/mseal.c
> >> > index 316b5e1dec78..603df53ad267 100644
>

Cheers, Lorenzo