[RFC] implicit hugetlb pages (hugetlb_dyn_as)

David Gibson david at gibson.dropbear.id.au
Mon Jan 12 13:35:15 EST 2004


On Fri, Jan 09, 2004 at 01:33:18PM -0800, Adam Litke wrote:
>
> hugetlb_dyn_as (2.6.0):
>   This patch adds support for dynamic resizing of the address space
> region used to address hugetlb pages.  This region starts empty and
> grows down from f0000000 in segment sized increments as needed.

Have you considered the approach of instead of using a single hugepage
range, using a bitmask indicating which segments are hugepage
segments?  That's more flexible, and I suspect it may actually lead to
some implementation simplifications, especially if you want to support
shrinking the hugepage region.

> Requires hugetlb_implicit and mmu_context_to_struct.

This chage is orthogonal to the hugetlb_implicit stuff, it would be
nice to make it independent of that patch.

Comments on some details below:

> diff -purN linux-2.6.0-implicit/arch/ppc64/kernel/setup.c linux-2.6.0-implicit+dynas/arch/ppc64/kernel/setup.c
> +++ linux-2.6.0-implicit+dynas/arch/ppc64/kernel/setup.c	2004-01-09 11:06:23.000000000 -0800
> @@ -523,6 +523,9 @@ void __init setup_arch(char **cmdline_p)
>  	init_mm.end_code = (unsigned long) _etext;
>  	init_mm.end_data = (unsigned long) _edata;
>  	init_mm.brk = klimit;
> +#ifdef CONFIG_HUGETLB_PAGE
> +	init_mm.context.hugetlb_base = TASK_HPAGE_BASE_32;
> +#endif

Erm... this appears to be giving init the largest possible hugetlb
range, which seems odd.  I can't see why init would want hugepages.

>  	/* Save unparsed command line copy for /proc/cmdline */
>  	strcpy(saved_command_line, cmd_line);
> diff -purN linux-2.6.0-implicit/arch/ppc64/mm/hugetlbpage.c linux-2.6.0-implicit+dynas/arch/ppc64/mm/hugetlbpage.c
> +++ linux-2.6.0-implicit+dynas/arch/ppc64/mm/hugetlbpage.c	2004-01-09 11:14:31.000000000 -0800
> @@ -249,14 +249,14 @@ static int open_32bit_htlbpage_range(str
>  		return 0; /* The window is already open */
>
>  	/* Check no VMAs are in the region */
> -	vma = find_vma(mm, TASK_HPAGE_BASE_32);
> +	vma = find_vma(mm, mm->context.hugetlb_base);
>
>  	if (vma && (vma->vm_start < TASK_HPAGE_END_32))
>  		return -EBUSY;
>
>  	/* Clean up any leftover PTE pages in the region */
>  	spin_lock(&mm->page_table_lock);
> -	for (addr = TASK_HPAGE_BASE_32; addr < TASK_HPAGE_END_32;
> +	for (addr = mm->context.hugetlb_base; addr < TASK_HPAGE_END_32;
>  	     addr += PMD_SIZE) {
>  		pgd_t *pgd = pgd_offset(mm, addr);
>  		pmd_t *pmd = pmd_offset(pgd, addr);
> @@ -590,6 +590,32 @@ full_search:
>  	}
>  }
>
> +unsigned long grow_hugetlb_region(unsigned long hpage_base, unsigned long len)
> +{
> +	struct vm_area_struct *vma = NULL;
> +	unsigned long i, new_base, vma_start = hpage_base;

i is an unused variable.

> +	vma = find_vma(current->mm, vma_start);
> +	vma_start = (vma && vma->vm_start < TASK_HPAGE_END_32) ?
> +		vma->vm_start : TASK_HPAGE_END_32;
> +	printk("First vma in hugetlb region starts at: %lx\n", vma_start);

> +	new_base = _ALIGN_DOWN(vma_start - len, 256<<20);
> +	if (new_base < TASK_HPAGE_BASE_32)
> +		return -ENOMEM;

> +	printk("Try to move hugetlb_base down to: %lx\n", new_base);
> +	vma = find_vma(current->mm, new_base);
> +	if (vma && vma->vm_start < hpage_base) {
> +		printk("Found vma at %lx aborting\n", vma->vm_start);
> +		return -ENOMEM;
> +	}
> +
> +	current->mm->context.hugetlb_base = new_base;
> +	printk("Area clean returning an area at: %lx\n", vma_start-len);
> +	return vma_start - len;
> +}

This isn't quite sufficient.  There could be non-hugepage SLB entries
in place for the segments which have now become hugepage segments, so
you'll need to flush those.  The simplest approach is probably an IPI
to slbia on all CPUs, like we do in open_32bit_htlbpage_range() when
we set the LOW_HPAGES flag.

Speaking of which, you no longer need the LOW_HPAGES flag, instead you
can just test whether the (32bit) hugepage range is non-empty.

>  unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
>  					unsigned long len, unsigned long pgoff,
>  					unsigned long flags)
> @@ -610,7 +636,7 @@ unsigned long hugetlb_get_unmapped_area(
>  		if (err)
>  			return err; /* Should this just be EINVAL? */
>
> -		base = TASK_HPAGE_BASE_32;
> +		base = current->mm->context.hugetlb_base;
>  		end = TASK_HPAGE_END_32;
>  	} else {
>  		base = TASK_HPAGE_BASE;
> @@ -624,7 +650,7 @@ unsigned long hugetlb_get_unmapped_area(
>  	for (vma = find_vma(current->mm, addr); ; vma = vma->vm_next) {
>  		/* At this point:  (!vma || addr < vma->vm_end). */
>  		if (addr + len > end)
> -			return -ENOMEM;
> +			break; /* We couldn't find an area */
>  		if (!vma || (addr + len) <= vma->vm_start)
>  			return addr;
>  		addr = ALIGN(vma->vm_end, HPAGE_SIZE);
> @@ -633,6 +659,8 @@ unsigned long hugetlb_get_unmapped_area(
>  		 * this alignment shouldn't have skipped over any
>  		 * other vmas */
>  	}
> +	/* Get the space by expanding the hugetlb region */
> +	return grow_hugetlb_region(base, len);
>  }
>
>  static inline unsigned long computeHugeHptePP(unsigned int hugepte)
> diff -purN linux-2.6.0-implicit/fs/hugetlbfs/inode.c linux-2.6.0-implicit+dynas/fs/hugetlbfs/inode.c
> +++ linux-2.6.0-implicit+dynas/fs/hugetlbfs/inode.c	2004-01-09 11:16:30.000000000 -0800
> @@ -155,6 +155,7 @@ try_hugetlb_get_unmapped_area(struct fil
>  	if (*flags & MAP_HUGETLB) {
>  		if (pre_error)
>  			return pre_error;
> +		printk("Doing explicit hugetlb mmap\n");
>  		return hugetlb_get_unmapped_area(NULL, addr, len, pgoff, *flags);
>  	}
>
> @@ -165,10 +166,13 @@ try_hugetlb_get_unmapped_area(struct fil
>  	if (mmap_hugetlb_implicit(len)) {
>  		if (pre_error)
>  			goto out;
> +		printk("Doing implicit hugetlb mmap...");
>  		addr = hugetlb_get_unmapped_area(NULL, addr, len, pgoff, *flags);
> -		if (IS_ERR((void *)addr))
> +		if (IS_ERR((void *)addr)) {
> +			printk("failed - falling back.\n");
>  			goto out;
> -		else {
> +		} else {
> +			printk("succeeded.\n");
>  			*flags |= MAP_HUGETLB;
>  			return addr;

Afaict this is the only dependence on the implicit hugepage patch, and
all it does is add some comments.  Best to ditch it so the patches
become independent.

> diff -purN linux-2.6.0-implicit/include/asm-ppc64/mmu.h linux-2.6.0-implicit+dynas/include/asm-ppc64/mmu.h
> +++ linux-2.6.0-implicit+dynas/include/asm-ppc64/mmu.h	2004-01-09 11:17:35.000000000 -0800
> @@ -18,6 +18,9 @@
>  /* Time to allow for more things here */
>  typedef struct {
>  	unsigned long flags;
> +#ifdef CONFIG_HUGETLB_PAGE
> +	unsigned long hugetlb_base;
> +#endif
>  } mm_context_t;
>
>  #ifdef CONFIG_HUGETLB_PAGE
> diff -purN linux-2.6.0-implicit/include/asm-ppc64/mmu_context.h linux-2.6.0-implicit+dynas/include/asm-ppc64/mmu_context.h
> +++ linux-2.6.0-implicit+dynas/include/asm-ppc64/mmu_context.h	2004-01-09 11:18:44.000000000 -0800
> @@ -90,6 +90,9 @@ init_new_context(struct task_struct *tsk
>
>  	head = mmu_context_queue.head;
>  	mm->context = mmu_context_queue.elements[head];
> +#ifdef CONFIG_HUGETLB_PAGE
> +	mm->context.hugetlb_base = TASK_HPAGE_END_32;
> +#endif
>
>  	head = (head < LAST_USER_CONTEXT-1) ? head+1 : 0;
>  	mmu_context_queue.head = head;
> diff -purN linux-2.6.0-implicit/include/asm-ppc64/page.h linux-2.6.0-implicit+dynas/include/asm-ppc64/page.h
> +++ linux-2.6.0-implicit+dynas/include/asm-ppc64/page.h	2004-01-09 11:22:58.000000000 -0800
> @@ -33,22 +33,28 @@
>  #define TASK_HPAGE_BASE 	(0x0000010000000000UL)
>  #define TASK_HPAGE_END 	(0x0000018000000000UL)
>
> -/* For 32-bit processes the hugepage range is 2-3G */
> -#define TASK_HPAGE_BASE_32	(0x80000000UL)
> -#define TASK_HPAGE_END_32	(0xc0000000UL)
> +/*
> + * We have much greater contention for segments in a
> + * 32-bit address space.  Therefore, the region reserved
> + * for huge pages is dynamically resized.  These values
> + * define the maximum range allowed for huge pages.
> + */
> +#define TASK_HPAGE_BASE_32	(0x40000000UL)
> +#define TASK_HPAGE_END_32	(0xf0000000UL)
>
>  #define ARCH_HAS_HUGEPAGE_ONLY_RANGE
>  #define is_hugepage_only_range(addr, len) \
>  	( ((addr > (TASK_HPAGE_BASE-len)) && (addr < TASK_HPAGE_END)) || \
>  	  ((current->mm->context.flags & CONTEXT_LOW_HPAGES) && \
> -	   (addr > (TASK_HPAGE_BASE_32-len)) && (addr < TASK_HPAGE_END_32)) )
> +	   (addr > (current->mm->context.hugetlb_base-len)) && \
> +	    (addr < TASK_HPAGE_END_32)) )

I assume context.hugetlb_base is supposed to be protected by the
mmap_sem.  Have you double checked to make sure that all the callers
of this macro hold the mmap_sem?

>  #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
>  #define in_hugepage_area(context, addr) \
>  	((cur_cpu_spec->cpu_features & CPU_FTR_16M_PAGE) && \
>  	 ((((addr) >= TASK_HPAGE_BASE) && ((addr) < TASK_HPAGE_END)) || \
>  	  (((context.flags) & CONTEXT_LOW_HPAGES) && \
> -	   (((addr) >= TASK_HPAGE_BASE_32) && ((addr) < TASK_HPAGE_END_32)))))
> +	   (((addr) >= context.hugetlb_base) && ((addr) < TASK_HPAGE_END_32)))))


>  #else /* !CONFIG_HUGETLB_PAGE */

--
David Gibson			| For every complex problem there is a
david AT gibson.dropbear.id.au	| solution which is simple, neat and
				| wrong.
http://www.ozlabs.org/people/dgibson

** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/





More information about the Linuxppc64-dev mailing list