[PATCH v12 19/31] mm: protect the RB tree with a sequence lock

Jerome Glisse jglisse at redhat.com
Tue Apr 23 06:33:51 AEST 2019


On Tue, Apr 16, 2019 at 03:45:10PM +0200, Laurent Dufour wrote:
> Introducing a per mm_struct seqlock, mm_seq field, to protect the changes
> made in the MM RB tree. This allows to walk the RB tree without grabbing
> the mmap_sem, and on the walk is done to double check that sequence counter
> was stable during the walk.
> 
> The mm seqlock is held while inserting and removing entries into the MM RB
> tree.  Later in this series, it will be check when looking for a VMA
> without holding the mmap_sem.
> 
> This is based on the initial work from Peter Zijlstra:
> https://lore.kernel.org/linux-mm/20100104182813.479668508@chello.nl/
> 
> Signed-off-by: Laurent Dufour <ldufour at linux.ibm.com>

Reviewed-by: Jérôme Glisse <jglisse at redhat.com>

> ---
>  include/linux/mm_types.h |  3 +++
>  kernel/fork.c            |  3 +++
>  mm/init-mm.c             |  3 +++
>  mm/mmap.c                | 48 +++++++++++++++++++++++++++++++---------
>  4 files changed, 46 insertions(+), 11 deletions(-)
> 
> diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
> index e78f72eb2576..24b3f8ce9e42 100644
> --- a/include/linux/mm_types.h
> +++ b/include/linux/mm_types.h
> @@ -358,6 +358,9 @@ struct mm_struct {
>  	struct {
>  		struct vm_area_struct *mmap;		/* list of VMAs */
>  		struct rb_root mm_rb;
> +#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
> +		seqlock_t mm_seq;
> +#endif
>  		u64 vmacache_seqnum;                   /* per-thread vmacache */
>  #ifdef CONFIG_MMU
>  		unsigned long (*get_unmapped_area) (struct file *filp,
> diff --git a/kernel/fork.c b/kernel/fork.c
> index 2992d2c95256..3a1739197ebc 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -1008,6 +1008,9 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
>  	mm->mmap = NULL;
>  	mm->mm_rb = RB_ROOT;
>  	mm->vmacache_seqnum = 0;
> +#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
> +	seqlock_init(&mm->mm_seq);
> +#endif
>  	atomic_set(&mm->mm_users, 1);
>  	atomic_set(&mm->mm_count, 1);
>  	init_rwsem(&mm->mmap_sem);
> diff --git a/mm/init-mm.c b/mm/init-mm.c
> index a787a319211e..69346b883a4e 100644
> --- a/mm/init-mm.c
> +++ b/mm/init-mm.c
> @@ -27,6 +27,9 @@
>   */
>  struct mm_struct init_mm = {
>  	.mm_rb		= RB_ROOT,
> +#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
> +	.mm_seq		= __SEQLOCK_UNLOCKED(init_mm.mm_seq),
> +#endif
>  	.pgd		= swapper_pg_dir,
>  	.mm_users	= ATOMIC_INIT(2),
>  	.mm_count	= ATOMIC_INIT(1),
> diff --git a/mm/mmap.c b/mm/mmap.c
> index 13460b38b0fb..f7f6027a7dff 100644
> --- a/mm/mmap.c
> +++ b/mm/mmap.c
> @@ -170,6 +170,24 @@ void unlink_file_vma(struct vm_area_struct *vma)
>  	}
>  }
>  
> +#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
> +static inline void mm_write_seqlock(struct mm_struct *mm)
> +{
> +	write_seqlock(&mm->mm_seq);
> +}
> +static inline void mm_write_sequnlock(struct mm_struct *mm)
> +{
> +	write_sequnlock(&mm->mm_seq);
> +}
> +#else
> +static inline void mm_write_seqlock(struct mm_struct *mm)
> +{
> +}
> +static inline void mm_write_sequnlock(struct mm_struct *mm)
> +{
> +}
> +#endif /* CONFIG_SPECULATIVE_PAGE_FAULT */
> +
>  /*
>   * Close a vm structure and free it, returning the next.
>   */
> @@ -445,26 +463,32 @@ static void vma_gap_update(struct vm_area_struct *vma)
>  }
>  
>  static inline void vma_rb_insert(struct vm_area_struct *vma,
> -				 struct rb_root *root)
> +				 struct mm_struct *mm)
>  {
> +	struct rb_root *root = &mm->mm_rb;
> +
>  	/* All rb_subtree_gap values must be consistent prior to insertion */
>  	validate_mm_rb(root, NULL);
>  
>  	rb_insert_augmented(&vma->vm_rb, root, &vma_gap_callbacks);
>  }
>  
> -static void __vma_rb_erase(struct vm_area_struct *vma, struct rb_root *root)
> +static void __vma_rb_erase(struct vm_area_struct *vma, struct mm_struct *mm)
>  {
> +	struct rb_root *root = &mm->mm_rb;
> +
>  	/*
>  	 * Note rb_erase_augmented is a fairly large inline function,
>  	 * so make sure we instantiate it only once with our desired
>  	 * augmented rbtree callbacks.
>  	 */
> +	mm_write_seqlock(mm);
>  	rb_erase_augmented(&vma->vm_rb, root, &vma_gap_callbacks);
> +	mm_write_sequnlock(mm);	/* wmb */
>  }
>  
>  static __always_inline void vma_rb_erase_ignore(struct vm_area_struct *vma,
> -						struct rb_root *root,
> +						struct mm_struct *mm,
>  						struct vm_area_struct *ignore)
>  {
>  	/*
> @@ -472,21 +496,21 @@ static __always_inline void vma_rb_erase_ignore(struct vm_area_struct *vma,
>  	 * with the possible exception of the "next" vma being erased if
>  	 * next->vm_start was reduced.
>  	 */
> -	validate_mm_rb(root, ignore);
> +	validate_mm_rb(&mm->mm_rb, ignore);
>  
> -	__vma_rb_erase(vma, root);
> +	__vma_rb_erase(vma, mm);
>  }
>  
>  static __always_inline void vma_rb_erase(struct vm_area_struct *vma,
> -					 struct rb_root *root)
> +					 struct mm_struct *mm)
>  {
>  	/*
>  	 * All rb_subtree_gap values must be consistent prior to erase,
>  	 * with the possible exception of the vma being erased.
>  	 */
> -	validate_mm_rb(root, vma);
> +	validate_mm_rb(&mm->mm_rb, vma);
>  
> -	__vma_rb_erase(vma, root);
> +	__vma_rb_erase(vma, mm);
>  }
>  
>  /*
> @@ -601,10 +625,12 @@ void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
>  	 * immediately update the gap to the correct value. Finally we
>  	 * rebalance the rbtree after all augmented values have been set.
>  	 */
> +	mm_write_seqlock(mm);
>  	rb_link_node(&vma->vm_rb, rb_parent, rb_link);
>  	vma->rb_subtree_gap = 0;
>  	vma_gap_update(vma);
> -	vma_rb_insert(vma, &mm->mm_rb);
> +	vma_rb_insert(vma, mm);
> +	mm_write_sequnlock(mm);
>  }
>  
>  static void __vma_link_file(struct vm_area_struct *vma)
> @@ -680,7 +706,7 @@ static __always_inline void __vma_unlink_common(struct mm_struct *mm,
>  {
>  	struct vm_area_struct *next;
>  
> -	vma_rb_erase_ignore(vma, &mm->mm_rb, ignore);
> +	vma_rb_erase_ignore(vma, mm, ignore);
>  	next = vma->vm_next;
>  	if (has_prev)
>  		prev->vm_next = next;
> @@ -2674,7 +2700,7 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
>  	insertion_point = (prev ? &prev->vm_next : &mm->mmap);
>  	vma->vm_prev = NULL;
>  	do {
> -		vma_rb_erase(vma, &mm->mm_rb);
> +		vma_rb_erase(vma, mm);
>  		mm->map_count--;
>  		tail_vma = vma;
>  		vma = vma->vm_next;
> -- 
> 2.21.0
> 


More information about the Linuxppc-dev mailing list