ppc64 PTE hacks
Anton Blanchard
anton at samba.org
Wed Dec 24 10:56:32 EST 2003
Hi,
I just remembered we never merged this patch from Paul. It would be
great to get rid of the flush_tlb_* functions.
Anton
----- Forwarded message from Paul Mackerras <paulus at samba.org> -----
From: Paul Mackerras <paulus at samba.org>
To: anton at samba.org
Subject: ppc64 PTE hacks
Anton,
Here is the patch that changes the HPTE handling so that we queue up a
HPTE invalidation at the time when we change the linux PTE, instead of
later in the flush_tlb_* call.
Could you run some benchmarks for me with and without this patch on a
decent-sized POWER4 box sometime?
(I just noticed that this patch gives a net removal of 66 lines from
the kernel, which is nice. :)
Thanks,
Paul.
diff -urN linux-2.5/arch/ppc64/kernel/process.c ppc64/arch/ppc64/kernel/process.c
--- linux-2.5/arch/ppc64/kernel/process.c 2003-02-23 21:45:50.000000000 +1100
+++ ppc64/arch/ppc64/kernel/process.c 2003-03-19 16:37:25.000000000 +1100
@@ -45,6 +45,7 @@
#include <asm/machdep.h>
#include <asm/iSeries/HvCallHpt.h>
#include <asm/hardirq.h>
+#include <asm/tlbflush.h>
struct task_struct *last_task_used_math = NULL;
@@ -103,6 +104,8 @@
giveup_fpu(prev);
#endif /* CONFIG_SMP */
+ flush_tlb_pending();
+
new_thread = &new->thread;
old_thread = ¤t->thread;
diff -urN linux-2.5/arch/ppc64/mm/Makefile ppc64/arch/ppc64/mm/Makefile
--- linux-2.5/arch/ppc64/mm/Makefile 2002-12-16 10:50:39.000000000 +1100
+++ ppc64/arch/ppc64/mm/Makefile 2003-02-24 17:14:52.000000000 +1100
@@ -4,5 +4,5 @@
EXTRA_CFLAGS += -mno-minimal-toc
-obj-y := fault.o init.o extable.o imalloc.o
+obj-y := fault.o init.o extable.o imalloc.o tlb.o
obj-$(CONFIG_DISCONTIGMEM) += numa.o
diff -urN linux-2.5/arch/ppc64/mm/init.c ppc64/arch/ppc64/mm/init.c
--- linux-2.5/arch/ppc64/mm/init.c 2003-02-23 21:45:50.000000000 +1100
+++ ppc64/arch/ppc64/mm/init.c 2003-02-24 17:15:30.000000000 +1100
@@ -242,147 +242,6 @@
}
}
-void
-flush_tlb_mm(struct mm_struct *mm)
-{
- struct vm_area_struct *mp;
-
- spin_lock(&mm->page_table_lock);
-
- for (mp = mm->mmap; mp != NULL; mp = mp->vm_next)
- __flush_tlb_range(mm, mp->vm_start, mp->vm_end);
-
- /* XXX are there races with checking cpu_vm_mask? - Anton */
- mm->cpu_vm_mask = 0;
-
- spin_unlock(&mm->page_table_lock);
-}
-
-/*
- * Callers should hold the mm->page_table_lock
- */
-void
-flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
-{
- unsigned long context = 0;
- pgd_t *pgd;
- pmd_t *pmd;
- pte_t *ptep;
- pte_t pte;
- int local = 0;
-
- switch( REGION_ID(vmaddr) ) {
- case VMALLOC_REGION_ID:
- pgd = pgd_offset_k( vmaddr );
- break;
- case IO_REGION_ID:
- pgd = pgd_offset_i( vmaddr );
- break;
- case USER_REGION_ID:
- pgd = pgd_offset( vma->vm_mm, vmaddr );
- context = vma->vm_mm->context;
-
- /* XXX are there races with checking cpu_vm_mask? - Anton */
- if (vma->vm_mm->cpu_vm_mask == (1 << smp_processor_id()))
- local = 1;
-
- break;
- default:
- panic("flush_tlb_page: invalid region 0x%016lx", vmaddr);
-
- }
-
- if (!pgd_none(*pgd)) {
- pmd = pmd_offset(pgd, vmaddr);
- if (!pmd_none(*pmd)) {
- ptep = pte_offset_kernel(pmd, vmaddr);
- /* Check if HPTE might exist and flush it if so */
- pte = __pte(pte_update(ptep, _PAGE_HPTEFLAGS, 0));
- if ( pte_val(pte) & _PAGE_HASHPTE ) {
- flush_hash_page(context, vmaddr, pte, local);
- }
- }
- }
-}
-
-struct ppc64_tlb_batch ppc64_tlb_batch[NR_CPUS];
-
-void
-__flush_tlb_range(struct mm_struct *mm, unsigned long start, unsigned long end)
-{
- pgd_t *pgd;
- pmd_t *pmd;
- pte_t *ptep;
- pte_t pte;
- unsigned long pgd_end, pmd_end;
- unsigned long context = 0;
- struct ppc64_tlb_batch *batch = &ppc64_tlb_batch[smp_processor_id()];
- unsigned long i = 0;
- int local = 0;
-
- switch(REGION_ID(start)) {
- case VMALLOC_REGION_ID:
- pgd = pgd_offset_k(start);
- break;
- case IO_REGION_ID:
- pgd = pgd_offset_i(start);
- break;
- case USER_REGION_ID:
- pgd = pgd_offset(mm, start);
- context = mm->context;
-
- /* XXX are there races with checking cpu_vm_mask? - Anton */
- if (mm->cpu_vm_mask == (1 << smp_processor_id()))
- local = 1;
-
- break;
- default:
- panic("flush_tlb_range: invalid region for start (%016lx) and end (%016lx)\n", start, end);
- }
-
- do {
- pgd_end = (start + PGDIR_SIZE) & PGDIR_MASK;
- if (pgd_end > end)
- pgd_end = end;
- if (!pgd_none(*pgd)) {
- pmd = pmd_offset(pgd, start);
- do {
- pmd_end = (start + PMD_SIZE) & PMD_MASK;
- if (pmd_end > end)
- pmd_end = end;
- if (!pmd_none(*pmd)) {
- ptep = pte_offset_kernel(pmd, start);
- do {
- if (pte_val(*ptep) & _PAGE_HASHPTE) {
- pte = __pte(pte_update(ptep, _PAGE_HPTEFLAGS, 0));
- if (pte_val(pte) & _PAGE_HASHPTE) {
- batch->pte[i] = pte;
- batch->addr[i] = start;
- i++;
- if (i == PPC64_TLB_BATCH_NR) {
- flush_hash_range(context, i, local);
- i = 0;
- }
- }
- }
- start += PAGE_SIZE;
- ++ptep;
- } while (start < pmd_end);
- } else {
- start = pmd_end;
- }
- ++pmd;
- } while (start < pgd_end);
- } else {
- start = pgd_end;
- }
- ++pgd;
- } while (start < end);
-
- if (i)
- flush_hash_range(context, i, local);
-}
-
void free_initmem(void)
{
unsigned long addr;
diff -urN linux-2.5/arch/ppc64/mm/tlb.c ppc64/arch/ppc64/mm/tlb.c
--- linux-2.5/arch/ppc64/mm/tlb.c Thu Jan 01 10:00:00 1970
+++ ppc64/arch/ppc64/mm/tlb.c Tue Feb 25 15:51:52 2003
@@ -0,0 +1,96 @@
+/*
+ * This file contains the routines for flushing entries from the
+ * TLB and MMU hash table.
+ *
+ * Derived from arch/ppc64/mm/init.c:
+ * Copyright (C) 1995-1996 Gary Thomas (gdt at linuxppc.org)
+ *
+ * Modifications by Paul Mackerras (PowerMac) (paulus at cs.anu.edu.au)
+ * and Cort Dougan (PReP) (cort at cs.nmt.edu)
+ * Copyright (C) 1996 Paul Mackerras
+ * Amiga/APUS changes by Jesper Skov (jskov at cygnus.co.uk).
+ *
+ * Derived from "arch/i386/mm/init.c"
+ * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
+ *
+ * Dave Engebretsen <engebret at us.ibm.com>
+ * Rework for PPC64 port.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/highmem.h>
+#include <asm/pgalloc.h>
+#include <asm/tlbflush.h>
+#include <asm/tlb.h>
+
+#if 0
+struct ppc64_tlb_batch {
+ unsigned long index;
+ unsigned long context;
+ struct mm_struct *mm;
+ pte_t pte[PPC64_TLB_BATCH_NR];
+ unsigned long addr[PPC64_TLB_BATCH_NR];
+ //unsigned long vaddr[PPC64_TLB_BATCH_NR];
+};
+#endif
+
+struct ppc64_tlb_batch ppc64_tlb_batch[NR_CPUS];
+
+/*
+ * Update the MMU hash table to correspond with a change to
+ * a Linux PTE. If wrprot is true, it is permissible to
+ * change the existing HPTE to read-only rather than removing it
+ * (if we remove it we should clear the _PTE_HPTEFLAGS bits).
+ */
+void hpte_update(pte_t *ptep, unsigned long pte, int wrprot)
+{
+ struct page *ptepage;
+ struct mm_struct *mm;
+ unsigned long addr;
+ int i;
+ unsigned long context = 0;
+ struct ppc64_tlb_batch *batch = &ppc64_tlb_batch[smp_processor_id()];
+
+ ptepage = virt_to_page(ptep);
+ mm = (struct mm_struct *) ptepage->mapping;
+ addr = ptepage->index + (((unsigned long)ptep & ~PAGE_MASK) << 9);
+ if (REGION_ID(addr) == USER_REGION_ID)
+ context = mm->context;
+ i = batch->index;
+ if (unlikely(i != 0 && context != batch->context)) {
+ flush_tlb_pending();
+ i = 0;
+ }
+ if (i == 0) {
+ batch->context = context;
+ batch->mm = mm;
+ }
+ batch->pte[i] = __pte(pte);
+ batch->addr[i] = addr;
+ batch->index = ++i;
+ if (i >= PPC64_TLB_BATCH_NR)
+ flush_tlb_pending();
+}
+
+void __flush_tlb_pending(struct ppc64_tlb_batch *batch)
+{
+ int i;
+ int local = 0;
+
+ i = batch->index;
+ if (batch->mm->cpu_vm_mask == (1 << smp_processor_id()))
+ local = 1;
+ if (i == 1)
+ flush_hash_page(batch->context, batch->addr[0], batch->pte[0],
+ local);
+ else
+ flush_hash_range(batch->context, i, local);
+ batch->index = 0;
+}
diff -urN linux-2.5/include/asm-ppc64/pgtable.h ppc64/include/asm-ppc64/pgtable.h
--- linux-2.5/include/asm-ppc64/pgtable.h 2003-02-27 08:12:37.000000000 +1100
+++ ppc64/include/asm-ppc64/pgtable.h 2003-03-19 16:03:12.000000000 +1100
@@ -10,6 +10,7 @@
#include <asm/processor.h> /* For TASK_SIZE */
#include <asm/mmu.h>
#include <asm/page.h>
+#include <asm/tlbflush.h>
#endif /* __ASSEMBLY__ */
/* PMD_SHIFT determines what a second-level page table entry can map */
@@ -262,64 +263,85 @@
/* Atomic PTE updates */
-static inline unsigned long pte_update( pte_t *p, unsigned long clr,
- unsigned long set )
+static inline unsigned long pte_update(pte_t *p, unsigned long clr)
{
unsigned long old, tmp;
__asm__ __volatile__(
"1: ldarx %0,0,%3 # pte_update\n\
andc %1,%0,%4 \n\
- or %1,%1,%5 \n\
stdcx. %1,0,%3 \n\
bne- 1b"
: "=&r" (old), "=&r" (tmp), "=m" (*p)
- : "r" (p), "r" (clr), "r" (set), "m" (*p)
+ : "r" (p), "r" (clr), "m" (*p)
: "cc" );
return old;
}
+/* PTE updating functions */
+extern void hpte_update(pte_t *ptep, unsigned long pte, int wrprot);
+
static inline int ptep_test_and_clear_young(pte_t *ptep)
{
- return (pte_update(ptep, _PAGE_ACCESSED, 0) & _PAGE_ACCESSED) != 0;
+ unsigned long old;
+
+ old = pte_update(ptep, _PAGE_ACCESSED | _PAGE_HPTEFLAGS);
+ if (old & _PAGE_HASHPTE) {
+ hpte_update(ptep, old, 0);
+ flush_tlb_pending(); /* XXX generic code doesn't flush */
+ }
+ return (old & _PAGE_ACCESSED) != 0;
}
static inline int ptep_test_and_clear_dirty(pte_t *ptep)
{
- return (pte_update(ptep, _PAGE_DIRTY, 0) & _PAGE_DIRTY) != 0;
-}
+ unsigned long old;
-static inline pte_t ptep_get_and_clear(pte_t *ptep)
-{
- return __pte(pte_update(ptep, ~_PAGE_HPTEFLAGS, 0));
+ old = pte_update(ptep, _PAGE_DIRTY);
+ if ((~old & (_PAGE_HASHPTE | _PAGE_RW | _PAGE_DIRTY)) == 0)
+ hpte_update(ptep, old, 1);
+ return (old & _PAGE_DIRTY) != 0;
}
static inline void ptep_set_wrprotect(pte_t *ptep)
{
- pte_update(ptep, _PAGE_RW, 0);
+ unsigned long old;
+
+ old = pte_update(ptep, _PAGE_RW);
+ if ((~old & (_PAGE_HASHPTE | _PAGE_RW | _PAGE_DIRTY)) == 0)
+ hpte_update(ptep, old, 1);
}
-static inline void ptep_mkdirty(pte_t *ptep)
+static inline pte_t ptep_get_and_clear(pte_t *ptep)
{
- pte_update(ptep, 0, _PAGE_DIRTY);
+ unsigned long old = pte_update(ptep, ~0UL);
+
+ if (old & _PAGE_HASHPTE)
+ hpte_update(ptep, old, 0);
+ return __pte(old);
}
-#define pte_same(A,B) (((pte_val(A) ^ pte_val(B)) & ~_PAGE_HPTEFLAGS) == 0)
+static inline void pte_clear(pte_t * ptep)
+{
+ unsigned long old = pte_update(ptep, ~0UL);
+
+ if (old & _PAGE_HASHPTE)
+ hpte_update(ptep, old, 0);
+}
/*
* set_pte stores a linux PTE into the linux page table.
- * On machines which use an MMU hash table we avoid changing the
- * _PAGE_HASHPTE bit.
*/
static inline void set_pte(pte_t *ptep, pte_t pte)
{
- pte_update(ptep, ~_PAGE_HPTEFLAGS, pte_val(pte) & ~_PAGE_HPTEFLAGS);
+ if (pte_present(*ptep))
+ pte_clear(ptep);
+ if (pte_present(pte))
+ flush_tlb_pending();
+ *ptep = __pte(pte_val(pte)) & ~_PAGE_HPTEFLAGS;
}
-static inline void pte_clear(pte_t * ptep)
-{
- pte_update(ptep, ~_PAGE_HPTEFLAGS, 0);
-}
+#define pte_same(A,B) (((pte_val(A) ^ pte_val(B)) & ~_PAGE_HPTEFLAGS) == 0)
extern unsigned long ioremap_bot, ioremap_base;
diff -urN linux-2.5/include/asm-ppc64/tlb.h ppc64/include/asm-ppc64/tlb.h
--- linux-2.5/include/asm-ppc64/tlb.h 2003-01-12 18:45:40.000000000 +1100
+++ ppc64/include/asm-ppc64/tlb.h 2003-02-25 15:52:01.000000000 +1100
@@ -13,11 +13,10 @@
#define _PPC64_TLB_H
#include <asm/pgtable.h>
-#include <asm/tlbflush.h>
#include <asm/page.h>
#include <asm/mmu.h>
-static inline void tlb_flush(struct mmu_gather *tlb);
+#define tlb_flush(tlb) flush_tlb_pending()
/* Avoid pulling in another include just for this */
#define check_pgt_cache() do { } while (0)
@@ -29,61 +28,6 @@
#define tlb_start_vma(tlb, vma) do { } while (0)
#define tlb_end_vma(tlb, vma) do { } while (0)
-/* Should make this at least as large as the generic batch size, but it
- * takes up too much space */
-#define PPC64_TLB_BATCH_NR 192
-
-struct ppc64_tlb_batch {
- unsigned long index;
- pte_t pte[PPC64_TLB_BATCH_NR];
- unsigned long addr[PPC64_TLB_BATCH_NR];
- unsigned long vaddr[PPC64_TLB_BATCH_NR];
-};
-
-extern struct ppc64_tlb_batch ppc64_tlb_batch[NR_CPUS];
-
-static inline void __tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep,
- unsigned long address)
-{
- int cpu = smp_processor_id();
- struct ppc64_tlb_batch *batch = &ppc64_tlb_batch[cpu];
- unsigned long i = batch->index;
- pte_t pte;
-
- if (pte_val(*ptep) & _PAGE_HASHPTE) {
- pte = __pte(pte_update(ptep, _PAGE_HPTEFLAGS, 0));
- if (pte_val(pte) & _PAGE_HASHPTE) {
-
- batch->pte[i] = pte;
- batch->addr[i] = address;
- i++;
-
- if (i == PPC64_TLB_BATCH_NR) {
- int local = 0;
-
- if (tlb->mm->cpu_vm_mask == (1UL << cpu))
- local = 1;
-
- flush_hash_range(tlb->mm->context, i, local);
- i = 0;
- }
- }
- }
-
- batch->index = i;
-}
-
-static inline void tlb_flush(struct mmu_gather *tlb)
-{
- int cpu = smp_processor_id();
- struct ppc64_tlb_batch *batch = &ppc64_tlb_batch[cpu];
- int local = 0;
-
- if (tlb->mm->cpu_vm_mask == (1UL << smp_processor_id()))
- local = 1;
-
- flush_hash_range(tlb->mm->context, batch->index, local);
- batch->index = 0;
-}
+#define __tlb_remove_tlb_entry(tlb, pte, address) do { } while (0)
#endif /* _PPC64_TLB_H */
diff -urN linux-2.5/include/asm-ppc64/tlbflush.h ppc64/include/asm-ppc64/tlbflush.h
--- linux-2.5/include/asm-ppc64/tlbflush.h 2002-06-07 18:21:41.000000000 +1000
+++ ppc64/include/asm-ppc64/tlbflush.h 2003-02-25 15:51:59.000000000 +1100
@@ -1,10 +1,6 @@
#ifndef _PPC64_TLBFLUSH_H
#define _PPC64_TLBFLUSH_H
-#include <linux/threads.h>
-#include <linux/mm.h>
-#include <asm/page.h>
-
/*
* TLB flushing:
*
@@ -15,22 +11,36 @@
* - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
*/
-extern void flush_tlb_mm(struct mm_struct *mm);
-extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
-extern void __flush_tlb_range(struct mm_struct *mm,
- unsigned long start, unsigned long end);
-#define flush_tlb_range(vma, start, end) \
- __flush_tlb_range(vma->vm_mm, start, end)
+struct mm_struct;
+
+#define PPC64_TLB_BATCH_NR 192
+
+struct ppc64_tlb_batch {
+ unsigned long index;
+ unsigned long context;
+ struct mm_struct *mm;
+ pte_t pte[PPC64_TLB_BATCH_NR];
+ unsigned long addr[PPC64_TLB_BATCH_NR];
+ unsigned long vaddr[PPC64_TLB_BATCH_NR];
+};
-#define flush_tlb_kernel_range(start, end) \
- __flush_tlb_range(&init_mm, (start), (end))
+extern struct ppc64_tlb_batch ppc64_tlb_batch[];
+extern void __flush_tlb_pending(struct ppc64_tlb_batch *batch);
-static inline void flush_tlb_pgtables(struct mm_struct *mm,
- unsigned long start, unsigned long end)
+static inline void flush_tlb_pending(void)
{
- /* PPC has hw page tables. */
+ struct ppc64_tlb_batch *batch = &ppc64_tlb_batch[smp_processor_id()];
+
+ if (batch->index)
+ __flush_tlb_pending(batch);
}
+#define flush_tlb_mm(mm) flush_tlb_pending()
+#define flush_tlb_page(vma, addr) flush_tlb_pending()
+#define flush_tlb_range(vma, start, end) flush_tlb_pending()
+#define flush_tlb_kernel_range(start, end) flush_tlb_pending()
+#define flush_tlb_pgtables(mm, start, end) do { } while (0)
+
extern void flush_hash_page(unsigned long context, unsigned long ea, pte_t pte,
int local);
void flush_hash_range(unsigned long context, unsigned long number, int local);
----- End forwarded message -----
** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/
More information about the Linuxppc64-dev
mailing list