ppc64 PTE hacks

Anton Blanchard anton at samba.org
Wed Dec 24 10:56:32 EST 2003


Hi,

I just remembered we never merged this patch from Paul. It would be
great to get rid of the flush_tlb_* functions.

Anton

----- Forwarded message from Paul Mackerras <paulus at samba.org> -----

From: Paul Mackerras <paulus at samba.org>
To: anton at samba.org
Subject: ppc64 PTE hacks

Anton,

Here is the patch that changes the HPTE handling so that we queue up a
HPTE invalidation at the time when we change the linux PTE, instead of
later in the flush_tlb_* call.

Could you run some benchmarks for me with and without this patch on a
decent-sized POWER4 box sometime?

(I just noticed that this patch gives a net removal of 66 lines from
the kernel, which is nice. :)

Thanks,
Paul.

diff -urN linux-2.5/arch/ppc64/kernel/process.c ppc64/arch/ppc64/kernel/process.c
--- linux-2.5/arch/ppc64/kernel/process.c	2003-02-23 21:45:50.000000000 +1100
+++ ppc64/arch/ppc64/kernel/process.c	2003-03-19 16:37:25.000000000 +1100
@@ -45,6 +45,7 @@
 #include <asm/machdep.h>
 #include <asm/iSeries/HvCallHpt.h>
 #include <asm/hardirq.h>
+#include <asm/tlbflush.h>

 struct task_struct *last_task_used_math = NULL;

@@ -103,6 +104,8 @@
 		giveup_fpu(prev);
 #endif /* CONFIG_SMP */

+	flush_tlb_pending();
+
 	new_thread = &new->thread;
 	old_thread = &current->thread;

diff -urN linux-2.5/arch/ppc64/mm/Makefile ppc64/arch/ppc64/mm/Makefile
--- linux-2.5/arch/ppc64/mm/Makefile	2002-12-16 10:50:39.000000000 +1100
+++ ppc64/arch/ppc64/mm/Makefile	2003-02-24 17:14:52.000000000 +1100
@@ -4,5 +4,5 @@

 EXTRA_CFLAGS += -mno-minimal-toc

-obj-y := fault.o init.o extable.o imalloc.o
+obj-y := fault.o init.o extable.o imalloc.o tlb.o
 obj-$(CONFIG_DISCONTIGMEM) += numa.o
diff -urN linux-2.5/arch/ppc64/mm/init.c ppc64/arch/ppc64/mm/init.c
--- linux-2.5/arch/ppc64/mm/init.c	2003-02-23 21:45:50.000000000 +1100
+++ ppc64/arch/ppc64/mm/init.c	2003-02-24 17:15:30.000000000 +1100
@@ -242,147 +242,6 @@
 	}
 }

-void
-flush_tlb_mm(struct mm_struct *mm)
-{
-	struct vm_area_struct *mp;
-
-	spin_lock(&mm->page_table_lock);
-
-	for (mp = mm->mmap; mp != NULL; mp = mp->vm_next)
-		__flush_tlb_range(mm, mp->vm_start, mp->vm_end);
-
-	/* XXX are there races with checking cpu_vm_mask? - Anton */
-	mm->cpu_vm_mask = 0;
-
-	spin_unlock(&mm->page_table_lock);
-}
-
-/*
- * Callers should hold the mm->page_table_lock
- */
-void
-flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
-{
-	unsigned long context = 0;
-	pgd_t *pgd;
-	pmd_t *pmd;
-	pte_t *ptep;
-	pte_t pte;
-	int local = 0;
-
-	switch( REGION_ID(vmaddr) ) {
-	case VMALLOC_REGION_ID:
-		pgd = pgd_offset_k( vmaddr );
-		break;
-	case IO_REGION_ID:
-		pgd = pgd_offset_i( vmaddr );
-		break;
-	case USER_REGION_ID:
-		pgd = pgd_offset( vma->vm_mm, vmaddr );
-		context = vma->vm_mm->context;
-
-		/* XXX are there races with checking cpu_vm_mask? - Anton */
-		if (vma->vm_mm->cpu_vm_mask == (1 << smp_processor_id()))
-			local = 1;
-
-		break;
-	default:
-		panic("flush_tlb_page: invalid region 0x%016lx", vmaddr);
-
-	}
-
-	if (!pgd_none(*pgd)) {
-		pmd = pmd_offset(pgd, vmaddr);
-		if (!pmd_none(*pmd)) {
-			ptep = pte_offset_kernel(pmd, vmaddr);
-			/* Check if HPTE might exist and flush it if so */
-			pte = __pte(pte_update(ptep, _PAGE_HPTEFLAGS, 0));
-			if ( pte_val(pte) & _PAGE_HASHPTE ) {
-				flush_hash_page(context, vmaddr, pte, local);
-			}
-		}
-	}
-}
-
-struct ppc64_tlb_batch ppc64_tlb_batch[NR_CPUS];
-
-void
-__flush_tlb_range(struct mm_struct *mm, unsigned long start, unsigned long end)
-{
-	pgd_t *pgd;
-	pmd_t *pmd;
-	pte_t *ptep;
-	pte_t pte;
-	unsigned long pgd_end, pmd_end;
-	unsigned long context = 0;
-	struct ppc64_tlb_batch *batch = &ppc64_tlb_batch[smp_processor_id()];
-	unsigned long i = 0;
-	int local = 0;
-
-	switch(REGION_ID(start)) {
-	case VMALLOC_REGION_ID:
-		pgd = pgd_offset_k(start);
-		break;
-	case IO_REGION_ID:
-		pgd = pgd_offset_i(start);
-		break;
-	case USER_REGION_ID:
-		pgd = pgd_offset(mm, start);
-		context = mm->context;
-
-		/* XXX are there races with checking cpu_vm_mask? - Anton */
-		if (mm->cpu_vm_mask == (1 << smp_processor_id()))
-			local = 1;
-
-		break;
-	default:
-		panic("flush_tlb_range: invalid region for start (%016lx) and end (%016lx)\n", start, end);
-	}
-
-	do {
-		pgd_end = (start + PGDIR_SIZE) & PGDIR_MASK;
-		if (pgd_end > end)
-			pgd_end = end;
-		if (!pgd_none(*pgd)) {
-			pmd = pmd_offset(pgd, start);
-			do {
-				pmd_end = (start + PMD_SIZE) & PMD_MASK;
-				if (pmd_end > end)
-					pmd_end = end;
-				if (!pmd_none(*pmd)) {
-					ptep = pte_offset_kernel(pmd, start);
-					do {
-						if (pte_val(*ptep) & _PAGE_HASHPTE) {
-							pte = __pte(pte_update(ptep, _PAGE_HPTEFLAGS, 0));
-							if (pte_val(pte) & _PAGE_HASHPTE) {
-								batch->pte[i] = pte;
-								batch->addr[i] = start;
-								i++;
-								if (i == PPC64_TLB_BATCH_NR) {
-									flush_hash_range(context, i, local);
-									i = 0;
-								}
-							}
-						}
-						start += PAGE_SIZE;
-						++ptep;
-					} while (start < pmd_end);
-				} else {
-					start = pmd_end;
-				}
-				++pmd;
-			} while (start < pgd_end);
-		} else {
-			start = pgd_end;
-		}
-		++pgd;
-	} while (start < end);
-
-	if (i)
-		flush_hash_range(context, i, local);
-}
-
 void free_initmem(void)
 {
 	unsigned long addr;
diff -urN linux-2.5/arch/ppc64/mm/tlb.c ppc64/arch/ppc64/mm/tlb.c
--- linux-2.5/arch/ppc64/mm/tlb.c	Thu Jan 01 10:00:00 1970
+++ ppc64/arch/ppc64/mm/tlb.c	Tue Feb 25 15:51:52 2003
@@ -0,0 +1,96 @@
+/*
+ * This file contains the routines for flushing entries from the
+ * TLB and MMU hash table.
+ *
+ *  Derived from arch/ppc64/mm/init.c:
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt at linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus at cs.anu.edu.au)
+ *  and Cort Dougan (PReP) (cort at cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *  Amiga/APUS changes by Jesper Skov (jskov at cygnus.co.uk).
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *
+ *  Dave Engebretsen <engebret at us.ibm.com>
+ *      Rework for PPC64 port.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ */
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/highmem.h>
+#include <asm/pgalloc.h>
+#include <asm/tlbflush.h>
+#include <asm/tlb.h>
+
+#if 0
+struct ppc64_tlb_batch {
+	unsigned long index;
+	unsigned long context;
+	struct mm_struct *mm;
+	pte_t pte[PPC64_TLB_BATCH_NR];
+	unsigned long addr[PPC64_TLB_BATCH_NR];
+	//unsigned long vaddr[PPC64_TLB_BATCH_NR];
+};
+#endif
+
+struct ppc64_tlb_batch ppc64_tlb_batch[NR_CPUS];
+
+/*
+ * Update the MMU hash table to correspond with a change to
+ * a Linux PTE.  If wrprot is true, it is permissible to
+ * change the existing HPTE to read-only rather than removing it
+ * (if we remove it we should clear the _PTE_HPTEFLAGS bits).
+ */
+void hpte_update(pte_t *ptep, unsigned long pte, int wrprot)
+{
+	struct page *ptepage;
+	struct mm_struct *mm;
+	unsigned long addr;
+	int i;
+	unsigned long context = 0;
+	struct ppc64_tlb_batch *batch = &ppc64_tlb_batch[smp_processor_id()];
+
+	ptepage = virt_to_page(ptep);
+	mm = (struct mm_struct *) ptepage->mapping;
+	addr = ptepage->index + (((unsigned long)ptep & ~PAGE_MASK) << 9);
+	if (REGION_ID(addr) == USER_REGION_ID)
+		context = mm->context;
+	i = batch->index;
+	if (unlikely(i != 0 && context != batch->context)) {
+		flush_tlb_pending();
+		i = 0;
+	}
+	if (i == 0) {
+		batch->context = context;
+		batch->mm = mm;
+	}
+	batch->pte[i] = __pte(pte);
+	batch->addr[i] = addr;
+	batch->index = ++i;
+	if (i >= PPC64_TLB_BATCH_NR)
+		flush_tlb_pending();
+}
+
+void __flush_tlb_pending(struct ppc64_tlb_batch *batch)
+{
+	int i;
+	int local = 0;
+
+	i = batch->index;
+	if (batch->mm->cpu_vm_mask == (1 << smp_processor_id()))
+		local = 1;
+	if (i == 1)
+		flush_hash_page(batch->context, batch->addr[0], batch->pte[0],
+				local);
+	else
+		flush_hash_range(batch->context, i, local);
+	batch->index = 0;
+}
diff -urN linux-2.5/include/asm-ppc64/pgtable.h ppc64/include/asm-ppc64/pgtable.h
--- linux-2.5/include/asm-ppc64/pgtable.h	2003-02-27 08:12:37.000000000 +1100
+++ ppc64/include/asm-ppc64/pgtable.h	2003-03-19 16:03:12.000000000 +1100
@@ -10,6 +10,7 @@
 #include <asm/processor.h>		/* For TASK_SIZE */
 #include <asm/mmu.h>
 #include <asm/page.h>
+#include <asm/tlbflush.h>
 #endif /* __ASSEMBLY__ */

 /* PMD_SHIFT determines what a second-level page table entry can map */
@@ -262,64 +263,85 @@

 /* Atomic PTE updates */

-static inline unsigned long pte_update( pte_t *p, unsigned long clr,
-					unsigned long set )
+static inline unsigned long pte_update(pte_t *p, unsigned long clr)
 {
 	unsigned long old, tmp;

 	__asm__ __volatile__(
 	"1:	ldarx	%0,0,%3		# pte_update\n\
 	andc	%1,%0,%4 \n\
-	or	%1,%1,%5 \n\
 	stdcx.	%1,0,%3 \n\
 	bne-	1b"
 	: "=&r" (old), "=&r" (tmp), "=m" (*p)
-	: "r" (p), "r" (clr), "r" (set), "m" (*p)
+	: "r" (p), "r" (clr), "m" (*p)
 	: "cc" );
 	return old;
 }

+/* PTE updating functions */
+extern void hpte_update(pte_t *ptep, unsigned long pte, int wrprot);
+
 static inline int ptep_test_and_clear_young(pte_t *ptep)
 {
-	return (pte_update(ptep, _PAGE_ACCESSED, 0) & _PAGE_ACCESSED) != 0;
+	unsigned long old;
+
+	old = pte_update(ptep, _PAGE_ACCESSED | _PAGE_HPTEFLAGS);
+	if (old & _PAGE_HASHPTE) {
+		hpte_update(ptep, old, 0);
+		flush_tlb_pending();	/* XXX generic code doesn't flush */
+	}
+	return (old & _PAGE_ACCESSED) != 0;
 }

 static inline int ptep_test_and_clear_dirty(pte_t *ptep)
 {
-	return (pte_update(ptep, _PAGE_DIRTY, 0) & _PAGE_DIRTY) != 0;
-}
+	unsigned long old;

-static inline pte_t ptep_get_and_clear(pte_t *ptep)
-{
-	return __pte(pte_update(ptep, ~_PAGE_HPTEFLAGS, 0));
+	old = pte_update(ptep, _PAGE_DIRTY);
+	if ((~old & (_PAGE_HASHPTE | _PAGE_RW | _PAGE_DIRTY)) == 0)
+		hpte_update(ptep, old, 1);
+	return (old & _PAGE_DIRTY) != 0;
 }

 static inline void ptep_set_wrprotect(pte_t *ptep)
 {
-	pte_update(ptep, _PAGE_RW, 0);
+	unsigned long old;
+
+	old = pte_update(ptep, _PAGE_RW);
+	if ((~old & (_PAGE_HASHPTE | _PAGE_RW | _PAGE_DIRTY)) == 0)
+		hpte_update(ptep, old, 1);
 }

-static inline void ptep_mkdirty(pte_t *ptep)
+static inline pte_t ptep_get_and_clear(pte_t *ptep)
 {
-	pte_update(ptep, 0, _PAGE_DIRTY);
+	unsigned long old = pte_update(ptep, ~0UL);
+
+	if (old & _PAGE_HASHPTE)
+		hpte_update(ptep, old, 0);
+	return __pte(old);
 }

-#define pte_same(A,B)	(((pte_val(A) ^ pte_val(B)) & ~_PAGE_HPTEFLAGS) == 0)
+static inline void pte_clear(pte_t * ptep)
+{
+	unsigned long old = pte_update(ptep, ~0UL);
+
+	if (old & _PAGE_HASHPTE)
+		hpte_update(ptep, old, 0);
+}

 /*
  * set_pte stores a linux PTE into the linux page table.
- * On machines which use an MMU hash table we avoid changing the
- * _PAGE_HASHPTE bit.
  */
 static inline void set_pte(pte_t *ptep, pte_t pte)
 {
-	pte_update(ptep, ~_PAGE_HPTEFLAGS, pte_val(pte) & ~_PAGE_HPTEFLAGS);
+	if (pte_present(*ptep))
+		pte_clear(ptep);
+	if (pte_present(pte))
+		flush_tlb_pending();
+	*ptep = __pte(pte_val(pte)) & ~_PAGE_HPTEFLAGS;
 }

-static inline void pte_clear(pte_t * ptep)
-{
-	pte_update(ptep, ~_PAGE_HPTEFLAGS, 0);
-}
+#define pte_same(A,B)	(((pte_val(A) ^ pte_val(B)) & ~_PAGE_HPTEFLAGS) == 0)

 extern unsigned long ioremap_bot, ioremap_base;

diff -urN linux-2.5/include/asm-ppc64/tlb.h ppc64/include/asm-ppc64/tlb.h
--- linux-2.5/include/asm-ppc64/tlb.h	2003-01-12 18:45:40.000000000 +1100
+++ ppc64/include/asm-ppc64/tlb.h	2003-02-25 15:52:01.000000000 +1100
@@ -13,11 +13,10 @@
 #define _PPC64_TLB_H

 #include <asm/pgtable.h>
-#include <asm/tlbflush.h>
 #include <asm/page.h>
 #include <asm/mmu.h>

-static inline void tlb_flush(struct mmu_gather *tlb);
+#define tlb_flush(tlb)	flush_tlb_pending()

 /* Avoid pulling in another include just for this */
 #define check_pgt_cache()	do { } while (0)
@@ -29,61 +28,6 @@
 #define tlb_start_vma(tlb, vma)	do { } while (0)
 #define tlb_end_vma(tlb, vma)	do { } while (0)

-/* Should make this at least as large as the generic batch size, but it
- * takes up too much space */
-#define PPC64_TLB_BATCH_NR 192
-
-struct ppc64_tlb_batch {
-	unsigned long index;
-	pte_t pte[PPC64_TLB_BATCH_NR];
-	unsigned long addr[PPC64_TLB_BATCH_NR];
-	unsigned long vaddr[PPC64_TLB_BATCH_NR];
-};
-
-extern struct ppc64_tlb_batch ppc64_tlb_batch[NR_CPUS];
-
-static inline void __tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep,
-					unsigned long address)
-{
-	int cpu = smp_processor_id();
-	struct ppc64_tlb_batch *batch = &ppc64_tlb_batch[cpu];
-	unsigned long i = batch->index;
-	pte_t pte;
-
-	if (pte_val(*ptep) & _PAGE_HASHPTE) {
-		pte = __pte(pte_update(ptep, _PAGE_HPTEFLAGS, 0));
-		if (pte_val(pte) & _PAGE_HASHPTE) {
-
-			batch->pte[i] = pte;
-			batch->addr[i] = address;
-			i++;
-
-			if (i == PPC64_TLB_BATCH_NR) {
-				int local = 0;
-
-				if (tlb->mm->cpu_vm_mask == (1UL << cpu))
-					local = 1;
-
-				flush_hash_range(tlb->mm->context, i, local);
-				i = 0;
-			}
-		}
-	}
-
-	batch->index = i;
-}
-
-static inline void tlb_flush(struct mmu_gather *tlb)
-{
-	int cpu = smp_processor_id();
-	struct ppc64_tlb_batch *batch = &ppc64_tlb_batch[cpu];
-	int local = 0;
-
-	if (tlb->mm->cpu_vm_mask == (1UL << smp_processor_id()))
-		local = 1;
-
-	flush_hash_range(tlb->mm->context, batch->index, local);
-	batch->index = 0;
-}
+#define __tlb_remove_tlb_entry(tlb, pte, address) do { } while (0)

 #endif /* _PPC64_TLB_H */
diff -urN linux-2.5/include/asm-ppc64/tlbflush.h ppc64/include/asm-ppc64/tlbflush.h
--- linux-2.5/include/asm-ppc64/tlbflush.h	2002-06-07 18:21:41.000000000 +1000
+++ ppc64/include/asm-ppc64/tlbflush.h	2003-02-25 15:51:59.000000000 +1100
@@ -1,10 +1,6 @@
 #ifndef _PPC64_TLBFLUSH_H
 #define _PPC64_TLBFLUSH_H

-#include <linux/threads.h>
-#include <linux/mm.h>
-#include <asm/page.h>
-
 /*
  * TLB flushing:
  *
@@ -15,22 +11,36 @@
  *  - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
  */

-extern void flush_tlb_mm(struct mm_struct *mm);
-extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
-extern void __flush_tlb_range(struct mm_struct *mm,
-			    unsigned long start, unsigned long end);
-#define flush_tlb_range(vma, start, end) \
-	__flush_tlb_range(vma->vm_mm, start, end)
+struct mm_struct;
+
+#define PPC64_TLB_BATCH_NR 192
+
+struct ppc64_tlb_batch {
+	unsigned long index;
+	unsigned long context;
+	struct mm_struct *mm;
+	pte_t pte[PPC64_TLB_BATCH_NR];
+	unsigned long addr[PPC64_TLB_BATCH_NR];
+	unsigned long vaddr[PPC64_TLB_BATCH_NR];
+};

-#define flush_tlb_kernel_range(start, end) \
-	__flush_tlb_range(&init_mm, (start), (end))
+extern struct ppc64_tlb_batch ppc64_tlb_batch[];
+extern void __flush_tlb_pending(struct ppc64_tlb_batch *batch);

-static inline void flush_tlb_pgtables(struct mm_struct *mm,
-				      unsigned long start, unsigned long end)
+static inline void flush_tlb_pending(void)
 {
-	/* PPC has hw page tables. */
+	struct ppc64_tlb_batch *batch = &ppc64_tlb_batch[smp_processor_id()];
+
+	if (batch->index)
+		__flush_tlb_pending(batch);
 }

+#define flush_tlb_mm(mm)			flush_tlb_pending()
+#define flush_tlb_page(vma, addr)		flush_tlb_pending()
+#define flush_tlb_range(vma, start, end)	flush_tlb_pending()
+#define flush_tlb_kernel_range(start, end)	flush_tlb_pending()
+#define flush_tlb_pgtables(mm, start, end)	do { } while (0)
+
 extern void flush_hash_page(unsigned long context, unsigned long ea, pte_t pte,
 			    int local);
 void flush_hash_range(unsigned long context, unsigned long number, int local);

----- End forwarded message -----

** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/





More information about the Linuxppc64-dev mailing list