[RFC/PATCH] powerpc: Experimental 64-bit BookE support (v2)
Benjamin Herrenschmidt
benh at kernel.crashing.org
Wed Jun 3 17:20:22 EST 2009
This is still experiemental... among other things, don't
try to enable SPARSEMEM_VMEMMAP or HUGETLBFS...
Signed-off-by: Benjamin Herrenschmidt <benh at kernel.crashing.org>
---
NOTE: Kumar, I haven't addressed most of your comments yet ! I will
though... this is mostly a refresh and make-it-work-again on top
of upstream + the previously posted patches (including the RFC
patch I sent separately for adding an argument to __pte_free_tlb)
v2. - SMP works :-) Threads not yet
- Various other misc fixes and copy/paste errors, the handling of
the HW tablewalk should be correct now among others
- Removed spurrious 32-bit Book3E stuff for now
arch/powerpc/include/asm/exception-64e.h | 223 ++++++++
arch/powerpc/include/asm/hw_irq.h | 5
arch/powerpc/include/asm/mmu-book3e.h | 152 +++++
arch/powerpc/include/asm/mmu.h | 1
arch/powerpc/include/asm/mmu_context.h | 9
arch/powerpc/include/asm/paca.h | 23
arch/powerpc/include/asm/page.h | 4
arch/powerpc/include/asm/page_64.h | 10
arch/powerpc/include/asm/pgalloc.h | 21
arch/powerpc/include/asm/pgtable-ppc64.h | 61 +-
arch/powerpc/include/asm/ppc-opcode.h | 6
arch/powerpc/include/asm/ppc_asm.h | 8
arch/powerpc/include/asm/pte-book3e.h | 70 ++
arch/powerpc/include/asm/pte-common.h | 3
arch/powerpc/include/asm/reg.h | 10
arch/powerpc/include/asm/reg_booke.h | 35 +
arch/powerpc/include/asm/tlbflush.h | 11
arch/powerpc/kernel/Makefile | 10
arch/powerpc/kernel/asm-offsets.c | 19
arch/powerpc/kernel/cputable.c | 27 -
arch/powerpc/kernel/entry_64.S | 102 ++-
arch/powerpc/kernel/exceptions-64e.S | 794 +++++++++++++++++++++++++++++++
arch/powerpc/kernel/head_64.S | 45 +
arch/powerpc/kernel/paca.c | 3
arch/powerpc/kernel/process.c | 2
arch/powerpc/kernel/setup_64.c | 27 +
arch/powerpc/mm/Makefile | 1
arch/powerpc/mm/init_64.c | 2
arch/powerpc/mm/mmu_context_hash64.c | 5
arch/powerpc/mm/mmu_decl.h | 26 -
arch/powerpc/mm/pgtable_64.c | 60 ++
arch/powerpc/mm/tlb_book3e.c | 165 ++++++
arch/powerpc/mm/tlb_hash64.c | 5
arch/powerpc/mm/tlb_low_64e.S | 733 ++++++++++++++++++++++++++++
arch/powerpc/mm/tlb_nohash.c | 69 ++
arch/powerpc/mm/tlb_nohash_low.S | 79 +++
arch/powerpc/platforms/Kconfig.cputype | 45 +
arch/powerpc/xmon/xmon.c | 2
38 files changed, 2748 insertions(+), 125 deletions(-)
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-work/arch/powerpc/include/asm/pte-book3e.h 2009-06-03 15:13:29.000000000 +1000
@@ -0,0 +1,70 @@
+#ifndef _ASM_POWERPC_PTE_BOOK3E_H
+#define _ASM_POWERPC_PTE_BOOK3E_H
+#ifdef __KERNEL__
+
+/* PTE bit definitions for processors compliant to the Book3E
+ * architecture 2.06 or later. The position of the PTE bits
+ * matches the HW definition of the optional Embedded Page Table
+ * category.
+ */
+
+/* Architected bits */
+#define _PAGE_PRESENT 0x000001 /* software: pte contains a translation */
+#define _PAGE_FILE 0x000002 /* (!present only) software: pte holds file offset */
+#define _PAGE_SW1 0x000002
+#define _PAGE_BAP_SR 0x000004
+#define _PAGE_BAP_UR 0x000008
+#define _PAGE_BAP_SW 0x000010
+#define _PAGE_BAP_UW 0x000020
+#define _PAGE_BAP_SX 0x000040
+#define _PAGE_BAP_UX 0x000080
+#define _PAGE_PSIZE_MSK 0x000f00
+#define _PAGE_PSIZE_4K 0x000200
+#define _PAGE_PSIZE_64K 0x000600
+#define _PAGE_PSIZE_1M 0x000a00
+#define _PAGE_PSIZE_16M 0x000e00
+#define _PAGE_DIRTY 0x001000 /* C: page changed */
+#define _PAGE_SW0 0x002000
+#define _PAGE_U3 0x004000
+#define _PAGE_U2 0x008000
+#define _PAGE_U1 0x010000
+#define _PAGE_U0 0x020000
+#define _PAGE_ACCESSED 0x040000
+#define _PAGE_LENDIAN 0x080000
+#define _PAGE_GUARDED 0x100000
+#define _PAGE_COHERENT 0x200000 /* M: enforce memory coherence */
+#define _PAGE_NO_CACHE 0x400000 /* I: cache inhibit */
+#define _PAGE_WRITETHRU 0x800000 /* W: cache write-through */
+
+/* "Higher level" linux bit combinations */
+#define _PAGE_EXEC _PAGE_BAP_SX /* Can be executed from potentially */
+#define _PAGE_HWEXEC _PAGE_BAP_UX /* .. and was cache cleaned */
+#define _PAGE_RW (_PAGE_BAP_SW | _PAGE_BAP_UW) /* User write permission */
+#define _PAGE_KERNEL_RW (_PAGE_BAP_SW | _PAGE_BAP_SR | _PAGE_DIRTY)
+#define _PAGE_KERNEL_RO (_PAGE_BAP_SR)
+#define _PAGE_USER (_PAGE_BAP_UR | _PAGE_BAP_SR) /* Can be read */
+
+#define _PAGE_HASHPTE 0
+#define _PAGE_BUSY 0
+
+#define _PAGE_SPECIAL _PAGE_SW0
+
+/* Flags to be preserved on PTE modifications */
+#define _PAGE_HPTEFLAGS _PAGE_BUSY
+
+/* Base page size */
+#ifdef CONFIG_PPC_64K_PAGES
+#define _PAGE_PSIZE _PAGE_PSIZE_64K
+#define PTE_RPN_SHIFT (28)
+#else
+#define _PAGE_PSIZE _PAGE_PSIZE_4K
+#define PTE_RPN_SHIFT (24)
+#endif
+
+/* On 32-bit, we never clear the top part of the PTE */
+#ifdef CONFIG_PPC32
+#define _PTE_NONE_MASK 0xffffffff00000000ULL
+#endif
+
+#endif /* __KERNEL__ */
+#endif /* _ASM_POWERPC_PTE_FSL_BOOKE_H */
Index: linux-work/arch/powerpc/include/asm/pgtable-ppc64.h
===================================================================
--- linux-work.orig/arch/powerpc/include/asm/pgtable-ppc64.h 2009-06-03 15:11:20.000000000 +1000
+++ linux-work/arch/powerpc/include/asm/pgtable-ppc64.h 2009-06-03 15:13:29.000000000 +1000
@@ -5,11 +5,6 @@
* the ppc64 hashed page table.
*/
-#ifndef __ASSEMBLY__
-#include <linux/stddef.h>
-#include <asm/tlbflush.h>
-#endif /* __ASSEMBLY__ */
-
#ifdef CONFIG_PPC_64K_PAGES
#include <asm/pgtable-ppc64-64k.h>
#else
@@ -38,26 +33,46 @@
#endif
/*
- * Define the address range of the vmalloc VM area.
+ * Define the address range of the kernel non-linear virtual area
+ */
+
+#ifdef CONFIG_PPC_BOOK3E
+#define KERN_VIRT_START ASM_CONST(0x8000000000000000)
+#else
+#define KERN_VIRT_START ASM_CONST(0xD000000000000000)
+#endif
+#define KERN_VIRT_SIZE PGTABLE_RANGE
+
+/*
+ * The vmalloc space starts at the beginning of that region, and
+ * occupies half of it on hash CPUs and a quarter of it on Book3E
*/
-#define VMALLOC_START ASM_CONST(0xD000000000000000)
-#define VMALLOC_SIZE (PGTABLE_RANGE >> 1)
-#define VMALLOC_END (VMALLOC_START + VMALLOC_SIZE)
+#define VMALLOC_START KERN_VIRT_START
+#ifdef CONFIG_PPC_BOOK3E
+#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 2)
+#else
+#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1)
+#endif
+#define VMALLOC_END (VMALLOC_START + VMALLOC_SIZE)
/*
- * Define the address ranges for MMIO and IO space :
+ * The second half of the kernel virtual space is used for IO mappings,
+ * it's itself carved into the PIO region (ISA and PHB IO space) and
+ * the ioremap space
*
- * ISA_IO_BASE = VMALLOC_END, 64K reserved area
+ * ISA_IO_BASE = KERN_IO_START, 64K reserved area
* PHB_IO_BASE = ISA_IO_BASE + 64K to ISA_IO_BASE + 2G, PHB IO spaces
* IOREMAP_BASE = ISA_IO_BASE + 2G to VMALLOC_START + PGTABLE_RANGE
*/
+#define KERN_IO_START (KERN_VIRT_START + (KERN_VIRT_SIZE >> 1))
#define FULL_IO_SIZE 0x80000000ul
-#define ISA_IO_BASE (VMALLOC_END)
-#define ISA_IO_END (VMALLOC_END + 0x10000ul)
+#define ISA_IO_BASE (KERN_IO_START)
+#define ISA_IO_END (KERN_IO_START + 0x10000ul)
#define PHB_IO_BASE (ISA_IO_END)
-#define PHB_IO_END (VMALLOC_END + FULL_IO_SIZE)
+#define PHB_IO_END (KERN_IO_START + FULL_IO_SIZE)
#define IOREMAP_BASE (PHB_IO_END)
-#define IOREMAP_END (VMALLOC_START + PGTABLE_RANGE)
+#define IOREMAP_END (KERN_VIRT_START + KERN_VIRT_SIZE)
+
/*
* Region IDs
@@ -72,19 +87,28 @@
#define USER_REGION_ID (0UL)
/*
- * Defines the address of the vmemap area, in its own region
+ * Defines the address of the vmemap area, in its own region on
+ * hash table CPUs and after the vmalloc space on Book3E
*/
+#ifdef CONFIG_PPC_BOOK3E
+#define VMEMMAP_BASE VMALLOC_END
+#define VMEMMAP_END KERN_IO_START
+#else
#define VMEMMAP_BASE (VMEMMAP_REGION_ID << REGION_SHIFT)
+#endif
#define vmemmap ((struct page *)VMEMMAP_BASE)
/*
* Include the PTE bits definitions
*/
+#ifdef CONFIG_PPC_BOOK3S
#include <asm/pte-hash64.h>
+#else
+#include <asm/pte-book3e.h>
+#endif
#include <asm/pte-common.h>
-
#ifdef CONFIG_PPC_MM_SLICES
#define HAVE_ARCH_UNMAPPED_AREA
#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
@@ -92,6 +116,9 @@
#ifndef __ASSEMBLY__
+#include <linux/stddef.h>
+#include <asm/tlbflush.h>
+
/*
* This is the default implementation of various PTE accessors, it's
* used in all cases except Book3S with 64K pages where we have a
Index: linux-work/arch/powerpc/include/asm/pte-common.h
===================================================================
--- linux-work.orig/arch/powerpc/include/asm/pte-common.h 2009-06-03 15:11:20.000000000 +1000
+++ linux-work/arch/powerpc/include/asm/pte-common.h 2009-06-03 15:13:29.000000000 +1000
@@ -34,6 +34,9 @@
#ifndef _PAGE_4K_PFN
#define _PAGE_4K_PFN 0
#endif
+#ifndef _PAGE_SAO
+#define _PAGE_SAO 0
+#endif
#ifndef _PAGE_PSIZE
#define _PAGE_PSIZE 0
#endif
Index: linux-work/arch/powerpc/include/asm/mmu-book3e.h
===================================================================
--- linux-work.orig/arch/powerpc/include/asm/mmu-book3e.h 2009-06-03 15:11:20.000000000 +1000
+++ linux-work/arch/powerpc/include/asm/mmu-book3e.h 2009-06-03 15:13:29.000000000 +1000
@@ -38,6 +38,10 @@
#define BOOK3E_PAGESZ_1TB 30
#define BOOK3E_PAGESZ_2TB 31
+/*
+ * Old MAS bit definitions
+ */
+
#define MAS0_TLBSEL(x) ((x << 28) & 0x30000000)
#define MAS0_ESEL(x) ((x << 16) & 0x0FFF0000)
#define MAS0_NV(x) ((x) & 0x00000FFF)
@@ -91,6 +95,127 @@
#define MAS7_RPN 0xFFFFFFFF
+/*
+ * New MAS bit definitions
+ */
+
+#define SPRN_MAS0_HES 0x00004000
+#define SPRN_MAS0_WQ_ALLWAYS 0x00000000
+#define SPRN_MAS0_WQ_COND 0x00001000
+#define SPRN_MAS0_WQ_CLR_RSRV 0x00002000
+
+#define SPRN_MAS1_V 0x80000000
+#define SPRN_MAS1_IPROT 0x40000000
+#define SPRN_MAS1_TSIZE_MASK 0x00000f80
+#define SPRN_MAS1_TSIZE_SHIFT 7
+#define SPRN_MAS1_TS 0x00001000
+#define SPRN_MAS1_IND 0x00002000
+
+#define SPRN_MAS2_E 0x00000001
+#define SPRN_MAS2_G 0x00000002
+#define SPRN_MAS2_M 0x00000004
+#define SPRN_MAS2_I 0x00000008
+#define SPRN_MAS2_W 0x00000010
+
+#define SPRN_MAS3_SR 0x00000001
+#define SPRN_MAS3_UR 0x00000002
+#define SPRN_MAS3_SW 0x00000004
+#define SPRN_MAS3_UW 0x00000008
+#define SPRN_MAS3_SX 0x00000010
+#define SPRN_MAS3_UX 0x00000020
+#define SPRN_MAS3_SPSIZE 0x0000003e
+#define SPRN_MAS3_SPSIZE_SHIFT 1
+#define SPRN_MAS3_U3 0x00000040
+#define SPRN_MAS3_U2 0x00000080
+#define SPRN_MAS3_U1 0x00000100
+#define SPRN_MAS3_U0 0x00000200
+
+#define SPRN_MAS4_WIMGED_MASK 0x0000001f /* Default WIMGE */
+#define SPRN_MAS4_WIMGED_SHIFT 0
+#define SPRN_MAS4_VLED 0x00000020 /* Default VLE */
+#define SPRN_MAS4_ACMD 0x000000c0 /* Default ACM */
+#define SPRN_MAS4_ACMD_SHIFT 6
+#define SPRN_MAS4_TSIZED_MASK 0x00000f80 /* Default TSIZE */
+#define SPRN_MAS4_TSIZED_SHIFT 7
+#define SPRN_MAS4_INDD 0x00008000 /* Default IND */
+
+#define SPRN_MAS6_SIND 0x00000002 /* Indirect page */
+#define SPRN_MAS6_SIND_SHIFT 1
+#define SPRN_MAS6_SPID_MASK 0x3fff0000
+#define SPRN_MAS6_SPID_SHIFT 16
+#define SPRN_MAS6_ISIZE_MASK 0x00000f80
+#define SPRN_MAS6_ISIZE_SHIFT 7
+
+/* TLBnCFG encoding */
+#define SPRN_TLBnCFG_N_ENTRY 0x00000fff /* number of entries */
+#define SPRN_TLBnCFG_HES 0x00002000 /* HW select supported */
+#define SPRN_TLBnCFG_IPROT 0x00008000 /* IPROT supported */
+#define SPRN_TLBnCFG_GTWE 0x00010000 /* Guest can write */
+#define SPRN_TLBnCFG_IND 0x00020000 /* IND entries supported */
+#define SPRN_TLBnCFG_PT 0x00040000 /* Can load from page table */
+#define SPRN_TLBnCFG_ASSOC 0xff000000 /* Associativity */
+
+/* TLBnPS encoding */
+#define SPRN_TLBnPS_4K 0x00000004
+#define SPRN_TLBnPS_8K 0x00000008
+#define SPRN_TLBnPS_16K 0x00000010
+#define SPRN_TLBnPS_32K 0x00000020
+#define SPRN_TLBnPS_64K 0x00000040
+#define SPRN_TLBnPS_128K 0x00000080
+#define SPRN_TLBnPS_256K 0x00000100
+#define SPRN_TLBnPS_512K 0x00000200
+#define SPRN_TLBnPS_1M 0x00000400
+#define SPRN_TLBnPS_2M 0x00000800
+#define SPRN_TLBnPS_4M 0x00001000
+#define SPRN_TLBnPS_8M 0x00002000
+#define SPRN_TLBnPS_16M 0x00004000
+#define SPRN_TLBnPS_32M 0x00008000
+#define SPRN_TLBnPS_64M 0x00010000
+#define SPRN_TLBnPS_128M 0x00020000
+#define SPRN_TLBnPS_256M 0x00040000
+#define SPRN_TLBnPS_512M 0x00080000
+#define SPRN_TLBnPS_1G 0x00100000
+#define SPRN_TLBnPS_2G 0x00200000
+#define SPRN_TLBnPS_4G 0x00400000
+#define SPRN_TLBnPS_8G 0x00800000
+#define SPRN_TLBnPS_16G 0x01000000
+#define SPRN_TLBnPS_32G 0x02000000
+#define SPRN_TLBnPS_64G 0x04000000
+#define SPRN_TLBnPS_128G 0x08000000
+#define SPRN_TLBnPS_256G 0x10000000
+
+/* tlbilx action encoding */
+#define TLBILX_T_ALL 0
+#define TLBILX_T_TID 1
+#define TLBILX_T_FULLMATCH 3
+#define TLBILX_T_CLASS0 4
+#define TLBILX_T_CLASS1 5
+#define TLBILX_T_CLASS2 6
+#define TLBILX_T_CLASS3 7
+
+/*
+ * The kernel use the constants below to index in the page sizes array.
+ * The use of fixed constants for this purpose is better for performances
+ * of the low level hash refill handlers.
+ *
+ * A non supported page size has a "shift" field set to 0
+ *
+ * Any new page size being implemented can get a new entry in here. Whether
+ * the kernel will use it or not is a different matter though. The actual page
+ * size used by hugetlbfs is not defined here and may be made variable
+ *
+ * Note: This is identical to the definition in mmu-hash64.h and possibly
+ * to be moved to a common file. This is also only used on 64-bit for now
+ */
+
+#define MMU_PAGE_4K 0
+#define MMU_PAGE_64K 1
+#define MMU_PAGE_1M 2
+#define MMU_PAGE_16M 3
+#define MMU_PAGE_256M 4
+#define MMU_PAGE_1G 5
+#define MMU_PAGE_COUNT 6
+
#ifndef __ASSEMBLY__
extern unsigned int tlbcam_index;
@@ -100,6 +225,33 @@ typedef struct {
unsigned int active;
unsigned long vdso_base;
} mm_context_t;
+
+/* Page size definitions, common between 32 and 64-bit
+ *
+ * shift : is the "PAGE_SHIFT" value for that page size
+ * penc : is the pte encoding mask
+ *
+ */
+struct mmu_psize_def
+{
+ unsigned int shift; /* number of bits */
+ unsigned int enc; /* PTE encoding */
+};
+extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
+
+/* The page sizes use the same names as 64-bit hash but are
+ * constants
+ */
+#if defined(CONFIG_PPC_4K_PAGES)
+#define mmu_virtual_psize MMU_PAGE_4K
+#elif defined(CONFIG_PPC_64K_PAGES)
+#define mmu_virtual_psize MMU_PAGE_64K
+#else
+#error Unsupported page size
+#endif
+
+extern int mmu_linear_psize;
+
#endif /* !__ASSEMBLY__ */
#endif /* _ASM_POWERPC_MMU_BOOK3E_H_ */
Index: linux-work/arch/powerpc/include/asm/page_64.h
===================================================================
--- linux-work.orig/arch/powerpc/include/asm/page_64.h 2009-06-03 15:11:20.000000000 +1000
+++ linux-work/arch/powerpc/include/asm/page_64.h 2009-06-03 15:13:29.000000000 +1000
@@ -135,12 +135,22 @@ extern void slice_set_range_psize(struct
#endif /* __ASSEMBLY__ */
#else
#define slice_init()
+#ifdef CONFIG_PPC_STD_MMU_64
#define get_slice_psize(mm, addr) ((mm)->context.user_psize)
#define slice_set_user_psize(mm, psize) \
do { \
(mm)->context.user_psize = (psize); \
(mm)->context.sllp = SLB_VSID_USER | mmu_psize_defs[(psize)].sllp; \
} while (0)
+#else /* CONFIG_PPC_STD_MMU_64 */
+#ifdef CONFIG_PPC_64K_PAGES
+#define get_slice_psize(mm, addr) MMU_PAGE_64K
+#else /* CONFIG_PPC_64K_PAGES */
+#define get_slice_psize(mm, addr) MMU_PAGE_4K
+#endif /* !CONFIG_PPC_64K_PAGES */
+#define slice_set_user_psize(mm, psize) do { BUG(); } while(0)
+#endif /* !CONFIG_PPC_STD_MMU_64 */
+
#define slice_set_range_psize(mm, start, len, psize) \
slice_set_user_psize((mm), (psize))
#define slice_mm_new_context(mm) 1
Index: linux-work/arch/powerpc/mm/pgtable_64.c
===================================================================
--- linux-work.orig/arch/powerpc/mm/pgtable_64.c 2009-06-03 15:11:20.000000000 +1000
+++ linux-work/arch/powerpc/mm/pgtable_64.c 2009-06-03 16:55:50.000000000 +1000
@@ -33,6 +33,8 @@
#include <linux/stddef.h>
#include <linux/vmalloc.h>
#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/lmb.h>
#include <asm/pgalloc.h>
#include <asm/page.h>
@@ -55,19 +57,39 @@
unsigned long ioremap_bot = IOREMAP_BASE;
+
+#ifdef CONFIG_PPC_MMU_NOHASH
+static void *early_alloc_pgtable(unsigned long size)
+{
+ void *pt;
+
+ if (init_bootmem_done)
+ pt = __alloc_bootmem(size, size, __pa(MAX_DMA_ADDRESS));
+ else
+ pt = __va(lmb_alloc_base(size, size,
+ __pa(MAX_DMA_ADDRESS)));
+ memset(pt, 0, size);
+
+ printk("early_alloc_pgtable(0x%lx) -> 0x%p (using %s)\n",
+ size, pt, init_bootmem_done ? "bootmem" : "lmb");
+
+ return pt;
+}
+#endif /* CONFIG_PPC_MMU_NOHASH */
+
/*
- * map_io_page currently only called by __ioremap
- * map_io_page adds an entry to the ioremap page table
+ * map_kernel_page currently only called by __ioremap
+ * map_kernel_page adds an entry to the ioremap page table
* and adds an entry to the HPT, possibly bolting it
*/
-static int map_io_page(unsigned long ea, unsigned long pa, int flags)
+static int map_kernel_page(unsigned long ea, unsigned long pa, int flags)
{
pgd_t *pgdp;
pud_t *pudp;
pmd_t *pmdp;
pte_t *ptep;
- if (mem_init_done) {
+ if (slab_is_available()) {
pgdp = pgd_offset_k(ea);
pudp = pud_alloc(&init_mm, pgdp, ea);
if (!pudp)
@@ -81,6 +103,33 @@ static int map_io_page(unsigned long ea,
set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
__pgprot(flags)));
} else {
+#ifdef CONFIG_PPC_MMU_NOHASH
+ /* Warning ! This will blow up if bootmem is not initialized
+ * which our ppc64 code is keen to do that, we'll need to
+ * fix it and/or be more careful
+ */
+ pgdp = pgd_offset_k(ea);
+ if (pgd_none(*pgdp)) {
+ pudp = early_alloc_pgtable(PUD_TABLE_SIZE);
+ BUG_ON(pudp == NULL);
+ pgd_populate(&init_mm, pgdp, pudp);
+ }
+ pudp = pud_offset(pgdp, ea);
+ if (pud_none(*pudp)) {
+ pmdp = early_alloc_pgtable(PMD_TABLE_SIZE);
+ BUG_ON(pmdp == NULL);
+ pud_populate(&init_mm, pudp, pmdp);
+ }
+ pmdp = pmd_offset(pudp, ea);
+ if (!pmd_present(*pmdp)) {
+ ptep = early_alloc_pgtable(PAGE_SIZE);
+ BUG_ON(ptep == NULL);
+ pmd_populate_kernel(&init_mm, pmdp, ptep);
+ }
+ ptep = pte_offset_kernel(pmdp, ea);
+ set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
+ __pgprot(flags)));
+#else /* CONFIG_PPC_MMU_NOHASH */
/*
* If the mm subsystem is not fully up, we cannot create a
* linux page table entry for this mapping. Simply bolt an
@@ -93,6 +142,7 @@ static int map_io_page(unsigned long ea,
"memory at %016lx !\n", pa);
return -ENOMEM;
}
+#endif /* !CONFIG_PPC_MMU_NOHASH */
}
return 0;
}
@@ -124,7 +174,7 @@ void __iomem * __ioremap_at(phys_addr_t
WARN_ON(size & ~PAGE_MASK);
for (i = 0; i < size; i += PAGE_SIZE)
- if (map_io_page((unsigned long)ea+i, pa+i, flags))
+ if (map_kernel_page((unsigned long)ea+i, pa+i, flags))
return NULL;
return (void __iomem *)ea;
Index: linux-work/arch/powerpc/include/asm/exception-64e.h
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-work/arch/powerpc/include/asm/exception-64e.h 2009-06-03 15:13:29.000000000 +1000
@@ -0,0 +1,223 @@
+/*
+ * Definitions for use by exception code on Book3-E
+ *
+ * Copyright (C) 2008 Ben. Herrenschmidt (benh at kernel.crashing.org), IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#ifndef __EXCEPTION_64E_H__
+#define __EXCEPTION_64E_H__
+
+/*
+ * SPRGs usage an other considerations...
+ *
+ * Since TLB miss and other standard exceptions can be interrupted by
+ * critical exceptions which can themselves be interrupted by machine
+ * checks, and since the two later can themselves cause a TLB miss when
+ * hitting the linear mapping for the kernel stacks, we need to be a bit
+ * creative on how we use SPRGs.
+ *
+ * The base idea is that we have one SRPG reserved for critical and one
+ * for machine check interrupts. Those are used to save a GPR that can
+ * then be used to get the PACA, and store as much context as we need
+ * to save in there. That includes saving the SPRGs used by the TLB miss
+ * handler for linear mapping misses and the associated SRR0/1 due to
+ * the above re-entrancy issue.
+ *
+ * So here's the current usage pattern. It's done regardless of which
+ * SPRGs are user-readable though, thus we might have to change some of
+ * this later. In order to do that more easily, we use special constants
+ * for naming them
+ *
+ * WARNING: Some of these SPRGs are user readable. We need to do something
+ * about it as some point by making sure they can't be used to leak kernel
+ * critical data
+ */
+
+/* Machine check reserved SPRG */
+#define SPRN_SPRG_MCHECK SPRN_SPRG8
+
+/* Critical interrupt reserved SPRG */
+#define SPRN_SPRG_CRIT SPRN_SPRG7
+
+/* Debug interrupt reserved SPRG */
+#define SPRN_SPRG_DBG SPRN_SPRG6
+
+/* PACA (can't be changed without updating kernel code all over though) */
+#define SPRN_SPRG_PACA SPRN_SPRG3
+
+/* Current TLB miss exception frame in PACA (can't be changed without updating
+ * the code in setup_paca() */
+#define SPRN_SPRG_TLB_EXFRAME SPRN_SPRG2
+#define SPRN_SPRG_TLB_R12 SPRN_SPRG1
+
+/* Normal exceptions only use one SPRG to save r13 and it's not clobbered
+ * by TLB misses, though it could be if the exception code was causing a
+ * protection fault. Howeverm such a fault would be fatal anyway.
+ */
+#define SPRN_SPRG_GEN SPRN_SPRG0
+
+
+/* We are out of SPRGs so we save some things in the PACA. The normal
+ * exception frame is smaller than the CRIT or MC one though
+ */
+#define EX_R1 (0 * 8)
+#define EX_CR (1 * 8)
+#define EX_R10 (2 * 8)
+#define EX_R11 (3 * 8)
+#define EX_R14 (4 * 8)
+#define EX_R15 (5 * 8)
+
+/* The TLB miss exception uses different slots */
+
+#define EX_TLB_R10 ( 0 * 8)
+#define EX_TLB_R11 ( 1 * 8)
+#define EX_TLB_R12 ( 2 * 8)
+#define EX_TLB_R13 ( 3 * 8)
+#define EX_TLB_R14 ( 4 * 8)
+#define EX_TLB_R15 ( 5 * 8)
+#define EX_TLB_R16 ( 6 * 8)
+#define EX_TLB_CR ( 7 * 8)
+#define EX_TLB_DEAR ( 8 * 8) /* Level 0 and 2 only */
+#define EX_TLB_ESR ( 9 * 8) /* Level 0 and 2 only */
+#define EX_TLB_SRR0 (10 * 8)
+#define EX_TLB_SRR1 (11 * 8)
+#define EX_TLB_MMUCR0 (12 * 8) /* Level 0 */
+#define EX_TLB_MAS1 (12 * 8) /* Level 0 */
+#define EX_TLB_MAS2 (13 * 8) /* Level 0 */
+#ifdef CONFIG_BOOK3E_MMU_TLB_STATS
+#define EX_TLB_R8 (14 * 8)
+#define EX_TLB_R9 (15 * 8)
+#define EX_TLB_LR (16 * 8)
+#define EX_TLB_SIZE (17 * 8)
+#else
+#define EX_TLB_SIZE (14 * 8)
+#endif
+
+#define START_EXCEPTION(label) \
+ .globl exc_##label##_book3e; \
+exc_##label##_book3e:
+
+/* TLB miss exception prolog
+ *
+ * This prolog handles re-entrancy (up to 3 levels supported in the PACA
+ * though we currently don't test for overflow). It provides you with a
+ * re-entrancy safe working space of r10...r16 and CR with r12 being used
+ * as the exception area pointer in the PACA for that level of re-entrancy
+ * and r13 containing the PACA pointer.
+ *
+ * SRR0 and SRR1 are saved, but DEAR and ESR are not, since they don't apply
+ * as-is for instruction exceptions. It's up to the actual exception code
+ * to save them as well if required.
+ */
+#define TLB_MISS_PROLOG \
+ mtspr SPRN_SPRG_TLB_R12,r12; \
+ mfspr r12,SPRN_SPRG_TLB_EXFRAME; \
+ std r10,EX_TLB_R10(r12); \
+ mfcr r10; \
+ std r11,EX_TLB_R11(r12); \
+ mfspr r11,SPRN_SPRG_TLB_R12; \
+ std r13,EX_TLB_R13(r12); \
+ mfspr r13,SPRN_SPRG_PACA; \
+ std r14,EX_TLB_R14(r12); \
+ addi r14,r12,EX_TLB_SIZE; \
+ std r15,EX_TLB_R15(r12); \
+ mfspr r15,SPRN_SRR1; \
+ std r16,EX_TLB_R16(r12); \
+ mfspr r16,SPRN_SRR0; \
+ std r10,EX_TLB_CR(r12); \
+ std r11,EX_TLB_R12(r12); \
+ mtspr SPRN_SPRG_TLB_EXFRAME,r14; \
+ std r15,EX_TLB_SRR1(r12); \
+ std r16,EX_TLB_SRR0(r12); \
+ TLB_MISS_PROLOG_STATS
+
+/* And these are the matching epilogs that restores things
+ *
+ * There are 3 epilogs:
+ *
+ * - SUCCESS : Unwinds one level
+ * - ERROR : restore from level 0 and reset
+ * - ERROR_SPECIAL : restore from current level and reset
+ *
+ * Normal errors use ERROR, that is, they restore the initial fault context
+ * and trigger a fault. However, there is a special case for linear mapping
+ * errors. Those should basically never happen, but if they do happen, we
+ * want the error to point out the context that did that linear mapping
+ * fault, not the initial level 0 (basically, we got a bogus PGF or something
+ * like that). For userland errors on the linear mapping, there is no
+ * difference since those are always level 0 anyway
+ */
+
+#define TLB_MISS_RESTORE(freg) \
+ ld r14,EX_TLB_CR(r12); \
+ ld r10,EX_TLB_R10(r12); \
+ ld r15,EX_TLB_SRR0(r12); \
+ ld r16,EX_TLB_SRR1(r12); \
+ mtspr SPRN_SPRG_TLB_EXFRAME,freg; \
+ ld r11,EX_TLB_R11(r12); \
+ mtcr r14; \
+ ld r13,EX_TLB_R13(r12); \
+ ld r14,EX_TLB_R14(r12); \
+ mtspr SPRN_SRR0,r15; \
+ ld r15,EX_TLB_R15(r12); \
+ mtspr SPRN_SRR1,r16; \
+ TLB_MISS_RESTORE_STATS \
+ ld r16,EX_TLB_R16(r12); \
+ ld r12,EX_TLB_R12(r12); \
+
+#define TLB_MISS_EPILOG_SUCCESS \
+ TLB_MISS_RESTORE(r12)
+
+#define TLB_MISS_EPILOG_ERROR \
+ addi r12,r13,PACA_EXTLB; \
+ TLB_MISS_RESTORE(r12)
+
+#define TLB_MISS_EPILOG_ERROR_SPECIAL \
+ addi r11,r13,PACA_EXTLB; \
+ TLB_MISS_RESTORE(r11)
+
+#ifdef CONFIG_BOOK3E_MMU_TLB_STATS
+#define TLB_MISS_PROLOG_STATS \
+ mflr r10; \
+ std r8,EX_TLB_R8(r12); \
+ std r9,EX_TLB_R9(r12); \
+ std r10,EX_TLB_LR(r12);
+#define TLB_MISS_RESTORE_STATS \
+ ld r16,EX_TLB_LR(r12); \
+ ld r9,EX_TLB_R9(r12); \
+ ld r8,EX_TLB_R8(r12); \
+ mtlr r16;
+#define TLB_MISS_STATS_D(name) \
+ addi r9,r13,MMSTAT_DSTATS+name; \
+ bl .tlb_stat_inc;
+#define TLB_MISS_STATS_I(name) \
+ addi r9,r13,MMSTAT_ISTATS+name; \
+ bl .tlb_stat_inc;
+#define TLB_MISS_STATS_X(name) \
+ ld r8,PACA_EXTLB+EX_TLB_ESR(r13); \
+ cmpdi cr2,r8,-1; \
+ beq cr2,61f; \
+ addi r9,r13,MMSTAT_DSTATS+name; \
+ b 62f; \
+61: addi r9,r13,MMSTAT_ISTATS+name; \
+62: bl .tlb_stat_inc;
+#define TLB_MISS_STATS_SAVE_INFO \
+ std r14,EX_TLB_ESR(r12); /* save ESR */ \
+
+
+#else
+#define TLB_MISS_PROLOG_STATS
+#define TLB_MISS_RESTORE_STATS
+#define TLB_MISS_STATS_D(name)
+#define TLB_MISS_STATS_I(name)
+#define TLB_MISS_STATS_X(name)
+#define TLB_MISS_STATS_Y(name)
+#define TLB_MISS_STATS_SAVE_INFO
+#endif
+
+
+#endif /* __EXCEPTIONS_64E_H__ */
Index: linux-work/arch/powerpc/kernel/Makefile
===================================================================
--- linux-work.orig/arch/powerpc/kernel/Makefile 2009-06-03 15:11:20.000000000 +1000
+++ linux-work/arch/powerpc/kernel/Makefile 2009-06-03 15:13:29.000000000 +1000
@@ -31,10 +31,10 @@ obj-y := cputable.o ptrace.o syscalls
obj-y += vdso32/
obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \
signal_64.o ptrace32.o \
- paca.o cpu_setup_ppc970.o \
- cpu_setup_pa6t.o \
- firmware.o nvram_64.o
+ paca.o nvram_64.o firmware.o
+obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o
obj64-$(CONFIG_RELOCATABLE) += reloc_64.o
+obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o
obj-$(CONFIG_PPC64) += vdso64/
obj-$(CONFIG_ALTIVEC) += vecemu.o
obj-$(CONFIG_PPC_970_NAP) += idle_power4.o
@@ -61,8 +61,8 @@ obj-$(CONFIG_MODULES) += module.o modul
obj-$(CONFIG_44x) += cpu_setup_44x.o
obj-$(CONFIG_FSL_BOOKE) += cpu_setup_fsl_booke.o dbell.o
-extra-$(CONFIG_PPC_STD_MMU) := head_32.o
-extra-$(CONFIG_PPC64) := head_64.o
+extra-y := head_$(CONFIG_WORD_SIZE).o
+extra-$(CONFIG_PPC_BOOK3E_32) := head_new_booke.o
extra-$(CONFIG_40x) := head_40x.o
extra-$(CONFIG_44x) := head_44x.o
extra-$(CONFIG_FSL_BOOKE) := head_fsl_booke.o
Index: linux-work/arch/powerpc/include/asm/reg.h
===================================================================
--- linux-work.orig/arch/powerpc/include/asm/reg.h 2009-06-03 15:11:20.000000000 +1000
+++ linux-work/arch/powerpc/include/asm/reg.h 2009-06-03 15:13:29.000000000 +1000
@@ -98,19 +98,15 @@
#define MSR_RI __MASK(MSR_RI_LG) /* Recoverable Exception */
#define MSR_LE __MASK(MSR_LE_LG) /* Little Endian */
-#ifdef CONFIG_PPC64
+#if defined(CONFIG_PPC_BOOK3S_64)
+/* Server variant */
#define MSR_ MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_ISF |MSR_HV
#define MSR_KERNEL MSR_ | MSR_SF
-
#define MSR_USER32 MSR_ | MSR_PR | MSR_EE
#define MSR_USER64 MSR_USER32 | MSR_SF
-
-#else /* 32-bit */
+#elif defined(CONFIG_PPC_BOOK3S_32)
/* Default MSR for kernel mode. */
-#ifndef MSR_KERNEL /* reg_booke.h also defines this */
#define MSR_KERNEL (MSR_ME|MSR_RI|MSR_IR|MSR_DR)
-#endif
-
#define MSR_USER (MSR_KERNEL|MSR_PR|MSR_EE)
#endif
Index: linux-work/arch/powerpc/include/asm/reg_booke.h
===================================================================
--- linux-work.orig/arch/powerpc/include/asm/reg_booke.h 2009-06-03 15:11:20.000000000 +1000
+++ linux-work/arch/powerpc/include/asm/reg_booke.h 2009-06-03 15:13:29.000000000 +1000
@@ -18,18 +18,26 @@
#define MSR_IS MSR_IR /* Instruction Space */
#define MSR_DS MSR_DR /* Data Space */
#define MSR_PMM (1<<2) /* Performance monitor mark bit */
+#define MSR_CM (1<<31) /* Computation Mode (0=32-bit, 1=64-bit) */
-/* Default MSR for kernel mode. */
-#if defined (CONFIG_40x)
+#if defined(CONFIG_PPC_BOOK3E_64)
+#define MSR_ MSR_ME | MSR_CE
+#define MSR_KERNEL MSR_ | MSR_CM
+#define MSR_USER32 MSR_ | MSR_PR | MSR_EE
+#define MSR_USER64 MSR_USER32 | MSR_CM
+#elif defined (CONFIG_40x)
#define MSR_KERNEL (MSR_ME|MSR_RI|MSR_IR|MSR_DR|MSR_CE)
-#elif defined(CONFIG_BOOKE)
+#define MSR_USER (MSR_KERNEL|MSR_PR|MSR_EE)
+#else
#define MSR_KERNEL (MSR_ME|MSR_RI|MSR_CE)
+#define MSR_USER (MSR_KERNEL|MSR_PR|MSR_EE)
#endif
/* Special Purpose Registers (SPRNs)*/
#define SPRN_DECAR 0x036 /* Decrementer Auto Reload Register */
#define SPRN_IVPR 0x03F /* Interrupt Vector Prefix Register */
#define SPRN_USPRG0 0x100 /* User Special Purpose Register General 0 */
+#define SPRN_SPRG3R 0x103 /* Special Purpose Register General 4 Read */
#define SPRN_SPRG4R 0x104 /* Special Purpose Register General 4 Read */
#define SPRN_SPRG5R 0x105 /* Special Purpose Register General 5 Read */
#define SPRN_SPRG6R 0x106 /* Special Purpose Register General 6 Read */
@@ -38,11 +46,18 @@
#define SPRN_SPRG5W 0x115 /* Special Purpose Register General 5 Write */
#define SPRN_SPRG6W 0x116 /* Special Purpose Register General 6 Write */
#define SPRN_SPRG7W 0x117 /* Special Purpose Register General 7 Write */
+#define SPRN_EPCR 0x133 /* Embedded Processor Control Register */
#define SPRN_DBCR2 0x136 /* Debug Control Register 2 */
#define SPRN_IAC3 0x13A /* Instruction Address Compare 3 */
#define SPRN_IAC4 0x13B /* Instruction Address Compare 4 */
#define SPRN_DVC1 0x13E /* Data Value Compare Register 1 */
#define SPRN_DVC2 0x13F /* Data Value Compare Register 2 */
+#define SPRN_MAS8 0x155 /* MMU Assist Register 8 */
+#define SPRN_TLB0PS 0x158 /* TLB 0 Page Size Register */
+#define SPRN_MAS5_MAS6 0x15c /* MMU Assist Register 5 || 6 */
+#define SPRN_MAS8_MAS1 0x15d /* MMU Assist Register 8 || 1 */
+#define SPRN_MAS7_MAS3 0x174 /* MMU Assist Register 7 || 3 */
+#define SPRN_MAS0_MAS1 0x175 /* MMU Assist Register 0 || 1 */
#define SPRN_IVOR0 0x190 /* Interrupt Vector Offset Register 0 */
#define SPRN_IVOR1 0x191 /* Interrupt Vector Offset Register 1 */
#define SPRN_IVOR2 0x192 /* Interrupt Vector Offset Register 2 */
@@ -423,6 +438,19 @@
#define SGR_NORMAL 0 /* Speculative fetching allowed. */
#define SGR_GUARDED 1 /* Speculative fetching disallowed. */
+/* Bit definitions for EPCR */
+#define SPRN_EPCR_EXTGS 0x80000000
+#define SPRN_EPCR_DTLBGS 0x40000000
+#define SPRN_EPCR_ITLBGS 0x20000000
+#define SPRN_EPCR_DSIGS 0x10000000
+#define SPRN_EPCR_ISIGS 0x08000000
+#define SPRN_EPCR_DUVD 0x04000000
+#define SPRN_EPCR_ICM 0x02000000
+#define SPRN_EPCR_GICM 0x01000000
+#define SPRN_EPCR_DGTMI 0x00800000
+#define SPRN_EPCR_DMIUH 0x00400000
+
+
/*
* The IBM-403 is an even more odd special case, as it is much
* older than the IBM-405 series. We put these down here incase someone
@@ -478,3 +506,4 @@
#endif /* 403GCX */
#endif /* __ASM_POWERPC_REG_BOOKE_H__ */
#endif /* __KERNEL__ */
+
Index: linux-work/arch/powerpc/mm/tlb_low_64e.S
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-work/arch/powerpc/mm/tlb_low_64e.S 2009-06-03 15:13:29.000000000 +1000
@@ -0,0 +1,733 @@
+/*
+ * Low leve TLB miss handlers for Book3E
+ *
+ * Copyright (C) 2008-2009
+ * Ben. Herrenschmidt (benh at kernel.crashing.org), IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <asm/reg.h>
+#include <asm/page.h>
+#include <asm/mmu.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/cputable.h>
+#include <asm/pgtable.h>
+#include <asm/reg.h>
+#include <asm/exception-64e.h>
+#include <asm/ppc-opcode.h>
+
+#ifdef CONFIG_PPC_64K_PAGES
+#define VPTE_PMD_SHIFT (PTE_INDEX_SIZE+1)
+#else
+#define VPTE_PMD_SHIFT (PTE_INDEX_SIZE)
+#endif
+#define VPTE_PUD_SHIFT (VPTE_PMD_SHIFT + PMD_INDEX_SIZE)
+#define VPTE_PGD_SHIFT (VPTE_PUD_SHIFT + PUD_INDEX_SIZE)
+#define VPTE_INDEX_SIZE (VPTE_PGD_SHIFT + PGD_INDEX_SIZE)
+
+
+/**********************************************************************
+ * *
+ * TLB miss handling for Book3E with TLB reservation and HES support *
+ * *
+ **********************************************************************/
+
+
+/* Data TLB miss */
+ START_EXCEPTION(data_tlb_miss)
+ TLB_MISS_PROLOG
+
+ /* Now we handle the fault proper. We only save DEAR in normal
+ * fault case since that's the only interesting values here.
+ * We could probably also optimize by not saving SRR0/1 in the
+ * linear mapping case but I'll leave that for later
+ */
+ mfspr r14,SPRN_ESR
+ mfspr r16,SPRN_DEAR /* get faulting address */
+ srdi r15,r16,60 /* get region */
+ cmpldi cr0,r15,0xc /* linear mapping ? */
+ TLB_MISS_STATS_SAVE_INFO
+ beq tlb_load_linear /* yes -> go to linear map load */
+
+ /* The page tables are mapped virtually linear. At this point, though,
+ * we don't know whether we are trying to fault in a first level
+ * virtual address or a virtual page table address. We can get that
+ * from bit 0x1 of the region ID which we have set for a page table
+ */
+ andi. r10,r15,0x1
+ bne- virt_page_table_tlb_miss
+
+ std r14,EX_TLB_ESR(r12); /* save ESR */
+ std r16,EX_TLB_DEAR(r12); /* save DEAR */
+
+ /* We need _PAGE_PRESENT and _PAGE_ACCESSED set */
+ li r11,_PAGE_PRESENT
+ oris r11,r11,_PAGE_ACCESSED at h
+
+ /* We do the user/kernel test for the PID here along with the RW test
+ */
+ cmpldi cr0,r15,0 /* Check for user region */
+
+ /* We pre-test some combination of permissions to avoid double
+ * faults:
+ *
+ * We move the ESR:ST bit into the position of _PAGE_BAP_SW in the PTE
+ * ESR_ST is 0x00800000
+ * _PAGE_BAP_SW is 0x00000010
+ * So the shift is >> 19. This tests for supervisor writeability.
+ * If the page happens to be supervisor writeable and not user
+ * writeable, we will take a new fault later, but that should be
+ * a rare enough case.
+ *
+ * We also move ESR_ST in _PAGE_DIRTY position
+ * _PAGE_DIRTY is 0x00001000 so the shift is >> 11
+ *
+ * MAS1 is preset for all we need except for TID that needs to
+ * be cleared for kernel translations
+ */
+ rlwimi r11,r14,32-19,27,27
+ rlwimi r11,r14,32-16,19,19
+ beq normal_tlb_miss
+ /* XXX replace the RMW cycles with immediate loads + writes */
+1: mfspr r10,SPRN_MAS1
+ cmpldi cr0,r15,8 /* Check for vmalloc region */
+ rlwinm r10,r10,0,16,1 /* Clear TID */
+ mtspr SPRN_MAS1,r10
+ beq+ normal_tlb_miss
+
+ /* We got a crappy address, just fault with whatever DEAR and ESR
+ * are here
+ */
+ TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT)
+ TLB_MISS_EPILOG_ERROR
+ b exc_data_storage_book3e
+
+/* Instruction TLB miss */
+ START_EXCEPTION(instruction_tlb_miss)
+ TLB_MISS_PROLOG
+
+ /* If we take a recursive fault, the second level handler may need
+ * to know whether we are handling a data or instruction fault in
+ * order to get to the right store fault handler. We provide that
+ * info by writing a crazy value in ESR in our exception frame
+ */
+ li r14,-1 /* store to exception frame is done later */
+
+ /* Now we handle the fault proper. We only save DEAR in the non
+ * linear mapping case since we know the linear mapping case will
+ * not re-enter. We could indeed optimize and also not save SRR0/1
+ * in the linear mapping case but I'll leave that for later
+ *
+ * Faulting address is SRR0 which is already in r16
+ */
+ srdi r15,r16,60 /* get region */
+ cmpldi cr0,r15,0xc /* linear mapping ? */
+ TLB_MISS_STATS_SAVE_INFO
+ beq tlb_load_linear /* yes -> go to linear map load */
+
+ /* We do the user/kernel test for the PID here along with the RW test
+ */
+ li r11,_PAGE_PRESENT|_PAGE_HWEXEC /* Base perm */
+ oris r11,r11,_PAGE_ACCESSED at h
+
+ cmpldi cr0,r15,0 /* Check for user region */
+ std r14,EX_TLB_ESR(r12) /* write crazy -1 to frame */
+ beq normal_tlb_miss
+ /* XXX replace the RMW cycles with immediate loads + writes */
+1: mfspr r10,SPRN_MAS1
+ cmpldi cr0,r15,8 /* Check for vmalloc region */
+ rlwinm r10,r10,0,16,1 /* Clear TID */
+ mtspr SPRN_MAS1,r10
+ beq+ normal_tlb_miss
+
+ /* We got a crappy address, just fault */
+ TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT)
+ TLB_MISS_EPILOG_ERROR
+ b exc_instruction_storage_book3e
+
+/*
+ * This is the guts of the first-level TLB miss handler for direct
+ * misses. We are entered with:
+ *
+ * r16 = faulting address
+ * r15 = region ID
+ * r14 = crap (free to use)
+ * r13 = PACA
+ * r12 = TLB exception frame in PACA
+ * r11 = PTE permission mask
+ * r10 = crap (free to use)
+ */
+normal_tlb_miss:
+ /* So we first construct the page table address. We do that by
+ * shifting the bottom of the address (not the region ID) by
+ * PAGE_SHIFT-3, clearing the bottom 3 bits (get a PTE ptr) and
+ * or'ing the fourth high bit.
+ *
+ * NOTE: For 64K pages, we do things slightly differently in
+ * order to handle the weird page table format used by linux
+ */
+ ori r10,r15,0x1
+#ifdef CONFIG_PPC_64K_PAGES
+ /* For the top bits, 16 bytes per PTE */
+ rldicl r14,r16,64-(PAGE_SHIFT-4),PAGE_SHIFT-4+4
+ /* Now create the bottom bits as 0 in position 0x8000 and
+ * the rest calculated for 8 bytes per PTE
+ */
+ rldicl r15,r16,64-(PAGE_SHIFT-3),64-15
+ /* Insert the bottom bits in */
+ rlwimi r14,r15,0,16,31
+#else
+ rldicl r14,r16,64-(PAGE_SHIFT-3),PAGE_SHIFT-3+4
+#endif
+ sldi r15,r10,60
+ clrrdi r14,r14,3
+ or r10,r15,r14
+
+ /* Set the TLB reservation and seach for existing entry. Then load
+ * the entry.
+ */
+ PPC_TLBSRX_DOT(0,r16)
+ ld r14,0(r10)
+ beq normal_tlb_miss_done
+
+finish_normal_tlb_miss:
+ /* Check if required permissions are met */
+ andc. r15,r11,r14
+ bne- normal_tlb_miss_access_fault
+
+ /* Now we build the MAS:
+ *
+ * MAS 0 : Fully setup with defaults in MAS4 and TLBnCFG
+ * MAS 1 : Almost fully setup
+ * - PID already updated by caller if necessary
+ * - TSIZE need change if !base page size, not
+ * yet implemented for now
+ * MAS 2 : Defaults not useful, need to be redone
+ * MAS 3+7 : Needs to be done
+ *
+ * TODO: mix up code below for better scheduling
+ */
+ clrrdi r11,r16,12 /* Clear low crap in EA */
+ rlwimi r11,r14,32-19,27,31 /* Insert WIMGE */
+ mtspr SPRN_MAS2,r11
+
+ /* Check page size, if not standard, update MAS1 */
+ rldicl r11,r14,64-8,64-8
+#ifdef CONFIG_PPC_64K_PAGES
+ cmpldi cr0,r11,BOOK3E_PAGESZ_64K
+#else
+ cmpldi cr0,r11,BOOK3E_PAGESZ_4K
+#endif
+ beq- 1f
+ mfspr r11,SPRN_MAS1
+ rlwimi r11,r14,31,21,24
+ rlwinm r11,r11,0,21,19
+ mtspr SPRN_MAS1,r11
+1:
+ /* Move RPN in position */
+ rldicr r11,r14,64-(PTE_RPN_SHIFT-PAGE_SHIFT),63-PAGE_SHIFT
+ clrldi r15,r11,12 /* Clear crap at the top */
+ rlwimi r15,r14,32-8,22,25 /* Move in U bits */
+ rlwimi r15,r14,32-2,26,31 /* Move in BAP bits */
+
+ /* Mask out SW and UW if !DIRTY (XXX optimize this !) */
+ andi. r11,r14,_PAGE_DIRTY
+ bne 1f
+ li r11,SPRN_MAS3_SW|SPRN_MAS3_UW
+ andc r15,r15,r11
+1: mtspr SPRN_MAS7_MAS3,r15
+
+ tlbwe
+
+normal_tlb_miss_done:
+ /* We don't bother with restoring DEAR or ESR since we know we are
+ * level 0 and just going back to userland. They are only needed
+ * if you are going to take an access fault
+ */
+ TLB_MISS_STATS_X(MMSTAT_TLB_MISS_NORM_OK)
+ TLB_MISS_EPILOG_SUCCESS
+ rfi
+
+normal_tlb_miss_access_fault:
+ /* We need to check if it was an instruction miss */
+ andi. r10,r11,_PAGE_HWEXEC
+ bne 1f
+ ld r14,EX_TLB_DEAR(r12)
+ ld r15,EX_TLB_ESR(r12)
+ mtspr SPRN_DEAR,r14
+ mtspr SPRN_ESR,r15
+ TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT)
+ TLB_MISS_EPILOG_ERROR
+ b exc_data_storage_book3e
+1: TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT)
+ TLB_MISS_EPILOG_ERROR
+ b exc_instruction_storage_book3e
+
+
+/*
+ * This is the guts of the second-level TLB miss handler for direct
+ * misses. We are entered with:
+ *
+ * r16 = virtual page table faulting address
+ * r15 = region (top 4 bits of address)
+ * r14 = crap (free to use)
+ * r13 = PACA
+ * r12 = TLB exception frame in PACA
+ * r11 = crap (free to use)
+ * r10 = crap (free to use)
+ *
+ * Note that this should only ever be called as a second level handler
+ * with the current scheme when using SW load.
+ * That means we can always get the original fault DEAR at
+ * EX_TLB_DEAR-EX_TLB_SIZE(r12)
+ *
+ * It can be re-entered by the linear mapping miss handler. However, to
+ * avoid too much complication, it will restart the whole fault at level
+ * 0 so we don't care too much about clobbers
+ *
+ * XXX That code was written back when we couldn't clobber r14. We can now,
+ * so we could probably optimize things a bit
+ */
+virt_page_table_tlb_miss:
+ /* Are we hitting a kernel page table ? */
+ andi. r10,r15,0x8
+
+ /* The cool thing now is that r10 contains 0 for user and 8 for kernel,
+ * and we happen to have the swapper_pg_dir at offset 8 from the user
+ * pgdir in the PACA :-).
+ */
+ add r11,r10,r13
+
+ /* If kernel, we need to clear MAS1 TID */
+ beq 1f
+ /* XXX replace the RMW cycles with immediate loads + writes */
+ mfspr r10,SPRN_MAS1
+ rlwinm r10,r10,0,16,1 /* Clear TID */
+ mtspr SPRN_MAS1,r10
+1:
+ /* Search if we already have a TLB entry for that virtual address, and
+ * if we do, bail out.
+ */
+ PPC_TLBSRX_DOT(0,r16)
+ beq virt_page_table_tlb_miss_done
+
+ /* Now, we need to walk the page tables. First check if we are in
+ * range.
+ */
+ rldicl. r10,r16,64-(VPTE_INDEX_SIZE+3),VPTE_INDEX_SIZE+3+4
+ bne- virt_page_table_tlb_miss_fault
+
+ /* Get the PGD pointer */
+ ld r15,PACAPGD(r11)
+ cmpldi cr0,r15,0
+ beq- virt_page_table_tlb_miss_fault
+
+ /* Get to PGD entry */
+ rldicl r11,r16,64-VPTE_PGD_SHIFT,64-PGD_INDEX_SIZE-3
+ clrrdi r10,r11,3
+ ldx r15,r10,r15
+ cmpldi cr0,r15,0
+ beq virt_page_table_tlb_miss_fault
+
+#ifndef CONFIG_PPC_64K_PAGES
+ /* Get to PUD entry */
+ rldicl r11,r16,64-VPTE_PUD_SHIFT,64-PUD_INDEX_SIZE-3
+ clrrdi r10,r11,3
+ ldx r15,r10,r15
+ cmpldi cr0,r15,0
+ beq virt_page_table_tlb_miss_fault
+#endif /* CONFIG_PPC_64K_PAGES */
+
+ /* Get to PMD entry */
+ rldicl r11,r16,64-VPTE_PMD_SHIFT,64-PMD_INDEX_SIZE-3
+ clrrdi r10,r11,3
+ ldx r15,r10,r15
+ cmpldi cr0,r15,0
+ beq virt_page_table_tlb_miss_fault
+
+ /* Ok, we're all right, we can now create a kernel translation for
+ * a 4K or 64K page from r16 -> r15.
+ */
+ /* Now we build the MAS:
+ *
+ * MAS 0 : Fully setup with defaults in MAS4 and TLBnCFG
+ * MAS 1 : Almost fully setup
+ * - PID already updated by caller if necessary
+ * - TSIZE for now is base page size always
+ * MAS 2 : Use defaults
+ * MAS 3+7 : Needs to be done
+ *
+ * So we only do MAS 2 and 3 for now...
+ */
+ clrldi r11,r15,4 /* remove region ID from RPN */
+ ori r10,r11,1 /* Or-in SR */
+ mtspr SPRN_MAS7_MAS3,r10
+
+ tlbwe
+
+virt_page_table_tlb_miss_done:
+
+ /* We have overriden MAS2:EPN but currently our primary TLB miss
+ * handler will always restore it so that should not be an issue,
+ * if we ever optimize the primary handler to not write MAS2 on
+ * some cases, we'll have to restore MAS2:EPN here based on the
+ * original fault's DEAR. If we do that we have to modify the
+ * ITLB miss handler to also store SRR0 in the exception frame
+ * as DEAR.
+ *
+ * However, one nasty thing we did is we cleared the reservation
+ * (well, potentially we did). We do a trick here thus if we
+ * are not a level 0 exception (we interrupted the TLB miss) we
+ * offset the return address by -4 in order to replay the tlbsrx
+ * instruction there
+ */
+ subf r10,r13,r12
+ cmpldi cr0,r10,PACA_EXTLB+EX_TLB_SIZE
+ bne- 1f
+ ld r11,PACA_EXTLB+EX_TLB_SIZE+EX_TLB_SRR0(r13)
+ addi r10,r11,-4
+ std r10,PACA_EXTLB+EX_TLB_SIZE+EX_TLB_SRR0(r13)
+1:
+ /* Return to caller, normal case */
+ TLB_MISS_STATS_X(MMSTAT_TLB_MISS_PT_OK);
+ TLB_MISS_EPILOG_SUCCESS
+ rfi
+
+virt_page_table_tlb_miss_fault:
+ /* If we fault here, things are a little bit tricky. We need to call
+ * either data or instruction store fault, and we need to retreive
+ * the original fault address and ESR (for data).
+ *
+ * The thing is, we know that in normal circumstances, this is
+ * always called as a second level tlb miss for SW load or as a first
+ * level TLB miss for HW load, so we should be able to peek at the
+ * relevant informations in the first exception frame in the PACA.
+ *
+ * However, we do need to double check that, because we may just hit
+ * a stray kernel pointer or a userland attack trying to hit those
+ * areas. If that is the case, we do a data fault. (We can't get here
+ * from an instruction tlb miss anyway).
+ *
+ * Note also that when going to a fault, we must unwind the previous
+ * level as well. Since we are doing that, we don't need to clear or
+ * restore the TLB reservation neither.
+ */
+ subf r10,r13,r12
+ cmpldi cr0,r10,PACA_EXTLB+EX_TLB_SIZE
+ bne- virt_page_table_tlb_miss_whacko_fault
+
+ /* We dig the original DEAR and ESR from slot 0 */
+ ld r15,EX_TLB_DEAR+PACA_EXTLB(r13)
+ ld r16,EX_TLB_ESR+PACA_EXTLB(r13)
+
+ /* We check for the "special" ESR value for instruction faults */
+ cmpdi cr0,r16,-1
+ beq 1f
+ mtspr SPRN_DEAR,r15
+ mtspr SPRN_ESR,r16
+ TLB_MISS_STATS_D(MMSTAT_TLB_MISS_PT_FAULT);
+ TLB_MISS_EPILOG_ERROR
+ b exc_data_storage_book3e
+1: TLB_MISS_STATS_I(MMSTAT_TLB_MISS_PT_FAULT);
+ TLB_MISS_EPILOG_ERROR
+ b exc_instruction_storage_book3e
+
+virt_page_table_tlb_miss_whacko_fault:
+ /* The linear fault will restart everything so ESR and DEAR will
+ * not have been clobbered, let's just fault with what we have
+ */
+ TLB_MISS_STATS_X(MMSTAT_TLB_MISS_PT_FAULT);
+ TLB_MISS_EPILOG_ERROR
+ b exc_data_storage_book3e
+
+
+/**************************************************************
+ * *
+ * TLB miss handling for Book3E with hw page table support *
+ * *
+ **************************************************************/
+
+
+/* Data TLB miss */
+ START_EXCEPTION(data_tlb_miss_htw)
+ TLB_MISS_PROLOG
+
+ /* Now we handle the fault proper. We only save DEAR in normal
+ * fault case since that's the only interesting values here.
+ * We could probably also optimize by not saving SRR0/1 in the
+ * linear mapping case but I'll leave that for later
+ */
+ mfspr r14,SPRN_ESR
+ mfspr r16,SPRN_DEAR /* get faulting address */
+ srdi r11,r16,60 /* get region */
+ cmpldi cr0,r11,0xc /* linear mapping ? */
+ TLB_MISS_STATS_SAVE_INFO
+ beq tlb_load_linear /* yes -> go to linear map load */
+
+ /* We do the user/kernel test for the PID here along with the RW test
+ */
+ cmpldi cr0,r11,0 /* Check for user region */
+ ld r15,PACAPGD(r13) /* Load user pgdir */
+ beq htw_tlb_miss
+
+ /* XXX replace the RMW cycles with immediate loads + writes */
+1: mfspr r10,SPRN_MAS1
+ cmpldi cr0,r11,8 /* Check for vmalloc region */
+ rlwinm r10,r10,0,16,1 /* Clear TID */
+ mtspr SPRN_MAS1,r10
+ ld r15,PACA_KERNELPGD(r13) /* Load kernel pgdir */
+ beq+ htw_tlb_miss
+
+ /* We got a crappy address, just fault with whatever DEAR and ESR
+ * are here
+ */
+ TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT)
+ TLB_MISS_EPILOG_ERROR
+ b exc_data_storage_book3e
+
+/* Instruction TLB miss */
+ START_EXCEPTION(instruction_tlb_miss_htw)
+ TLB_MISS_PROLOG
+
+ /* If we take a recursive fault, the second level handler may need
+ * to know whether we are handling a data or instruction fault in
+ * order to get to the right store fault handler. We provide that
+ * info by keeping a crazy value for ESR in r14
+ */
+ li r14,-1 /* store to exception frame is done later */
+
+ /* Now we handle the fault proper. We only save DEAR in the non
+ * linear mapping case since we know the linear mapping case will
+ * not re-enter. We could indeed optimize and also not save SRR0/1
+ * in the linear mapping case but I'll leave that for later
+ *
+ * Faulting address is SRR0 which is already in r16
+ */
+ srdi r11,r16,60 /* get region */
+ cmpldi cr0,r11,0xc /* linear mapping ? */
+ TLB_MISS_STATS_SAVE_INFO
+ beq tlb_load_linear /* yes -> go to linear map load */
+
+ /* We do the user/kernel test for the PID here along with the RW test
+ */
+ cmpldi cr0,r11,0 /* Check for user region */
+ ld r15,PACAPGD(r13) /* Load user pgdir */
+ beq htw_tlb_miss
+
+ /* XXX replace the RMW cycles with immediate loads + writes */
+1: mfspr r10,SPRN_MAS1
+ cmpldi cr0,r11,8 /* Check for vmalloc region */
+ rlwinm r10,r10,0,16,1 /* Clear TID */
+ mtspr SPRN_MAS1,r10
+ ld r15,PACA_KERNELPGD(r13) /* Load kernel pgdir */
+ beq+ htw_tlb_miss
+
+ /* We got a crappy address, just fault */
+ TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT)
+ TLB_MISS_EPILOG_ERROR
+ b exc_instruction_storage_book3e
+
+
+/*
+ * This is the guts of the second-level TLB miss handler for direct
+ * misses. We are entered with:
+ *
+ * r16 = virtual page table faulting address
+ * r15 = PGD pointer
+ * r14 = ESR
+ * r13 = PACA
+ * r12 = TLB exception frame in PACA
+ * r11 = crap (free to use)
+ * r10 = crap (free to use)
+ *
+ * It can be re-entered by the linear mapping miss handler. However, to
+ * avoid too much complication, it will save/restore things for us
+ */
+htw_tlb_miss:
+ /* Search if we already have a TLB entry for that virtual address, and
+ * if we do, bail out.
+ *
+ * MAS1:IND should be already set based on MAS4
+ */
+ PPC_TLBSRX_DOT(0,r16)
+ beq htw_tlb_miss_done
+
+ /* Now, we need to walk the page tables. First check if we are in
+ * range.
+ */
+ rldicl. r10,r16,64-PGTABLE_EADDR_SIZE,PGTABLE_EADDR_SIZE+4
+ bne- htw_tlb_miss_fault
+
+ /* Get the PGD pointer */
+ cmpldi cr0,r15,0
+ beq- htw_tlb_miss_fault
+
+ /* Get to PGD entry */
+ rldicl r11,r16,64-(PGDIR_SHIFT-3),64-PGD_INDEX_SIZE-3
+ clrrdi r10,r11,3
+ ldx r15,r10,r15
+ cmpldi cr0,r15,0
+ beq htw_tlb_miss_fault
+
+#ifndef CONFIG_PPC_64K_PAGES
+ /* Get to PUD entry */
+ rldicl r11,r16,64-(PUD_SHIFT-3),64-PUD_INDEX_SIZE-3
+ clrrdi r10,r11,3
+ ldx r15,r10,r15
+ cmpldi cr0,r15,0
+ beq htw_tlb_miss_fault
+#endif /* CONFIG_PPC_64K_PAGES */
+
+ /* Get to PMD entry */
+ rldicl r11,r16,64-(PMD_SHIFT-3),64-PMD_INDEX_SIZE-3
+ clrrdi r10,r11,3
+ ldx r15,r10,r15
+ cmpldi cr0,r15,0
+ beq htw_tlb_miss_fault
+
+ /* Ok, we're all right, we can now create an indirect entry for
+ * a 1M or 256M page.
+ *
+ * The last trick is now that because we use "half" pages for
+ * the HTW (1M IND is 2K and 256M IND is 32K) we need to account
+ * for an added LSB bit to the RPN. For 64K pages, there is no
+ * problem as we already use 32K arrays (half PTE pages), but for
+ * 4K page we need to extract a bit from the virtual address and
+ * insert it into the "PA52" bit of the RPN.
+ */
+#ifndef CONFIG_PPC_64K_PAGES
+ rlwimi r15,r16,32-9,20,20
+#endif
+ /* Now we build the MAS:
+ *
+ * MAS 0 : Fully setup with defaults in MAS4 and TLBnCFG
+ * MAS 1 : Almost fully setup
+ * - PID already updated by caller if necessary
+ * - TSIZE for now is base ind page size always
+ * MAS 2 : Use defaults
+ * MAS 3+7 : Needs to be done
+ */
+#ifdef CONFIG_PPC_64K_PAGES
+ ori r10,r15,(BOOK3E_PAGESZ_64K << SPRN_MAS3_SPSIZE_SHIFT)
+#else
+ ori r10,r15,(BOOK3E_PAGESZ_4K << SPRN_MAS3_SPSIZE_SHIFT)
+#endif
+ mtspr SPRN_MAS7_MAS3,r10
+
+ tlbwe
+
+htw_tlb_miss_done:
+ /* We don't bother with restoring DEAR or ESR since we know we are
+ * level 0 and just going back to userland. They are only needed
+ * if you are going to take an access fault
+ */
+ TLB_MISS_STATS_X(MMSTAT_TLB_MISS_PT_OK)
+ TLB_MISS_EPILOG_SUCCESS
+ rfi
+
+htw_tlb_miss_fault:
+ /* We need to check if it was an instruction miss. We know this
+ * though because r14 would contain -1
+ */
+ cmpdi cr0,r14,-1
+ beq 1f
+ mtspr SPRN_DEAR,r16
+ mtspr SPRN_ESR,r14
+ TLB_MISS_STATS_D(MMSTAT_TLB_MISS_PT_FAULT)
+ TLB_MISS_EPILOG_ERROR
+ b exc_data_storage_book3e
+1: TLB_MISS_STATS_I(MMSTAT_TLB_MISS_PT_FAULT)
+ TLB_MISS_EPILOG_ERROR
+ b exc_instruction_storage_book3e
+
+/*
+ * This is the guts of "any" level TLB miss handler for kernel linear
+ * mapping misses. We are entered with:
+ *
+ *
+ * r16 = faulting address
+ * r15 = crap (free to use)
+ * r14 = ESR (data) or -1 (instruction)
+ * r13 = PACA
+ * r12 = TLB exception frame in PACA
+ * r11 = crap (free to use)
+ * r10 = crap (free to use)
+ *
+ * In addition we know that we will not re-enter, so in theory, we could
+ * use a simpler epilog not restoring SRR0/1 etc.. but we'll do that later.
+ *
+ * We also need to be careful about MAS registers here & TLB reservation,
+ * as we know we'll have clobbered them if we interrupt the main TLB miss
+ * handlers in which case we probably want to do a full restart at level
+ * 0 rather than saving / restoring the MAS.
+ *
+ * Note: If we care about performance of that core, we can easily shuffle
+ * a few things around
+ */
+tlb_load_linear:
+ /* For now, we assume the linear mapping is contiguous and stops at
+ * linear_map_top. We also assume the size is a multiple of 1G, thus
+ * we only use 1G pages for now. That might have to be changed in a
+ * final implementation, especially when dealing with hypervisors
+ */
+ ld r11,PACATOC(r13)
+ ld r11,linear_map_top at got(r11)
+ ld r10,0(r11)
+ cmpld cr0,r10,r16
+ bge tlb_load_linear_fault
+
+ /* MAS1 need whole new setup. */
+ li r15,(BOOK3E_PAGESZ_1GB<<SPRN_MAS1_TSIZE_SHIFT)
+ oris r15,r15,SPRN_MAS1_V at h /* MAS1 needs V and TSIZE */
+ mtspr SPRN_MAS1,r15
+
+ /* Already somebody there ? */
+ PPC_TLBSRX_DOT(0,r16)
+ beq tlb_load_linear_done
+
+ /* Now we build the remaining MAS. MAS0 and 2 should be fine
+ * with their defaults, which leaves us with MAS 3 and 7. The
+ * mapping is linear, so we just take the address, clear the
+ * region bits, and or in the permission bits which are currently
+ * hard wired
+ */
+ clrrdi r10,r16,30 /* 1G page index */
+ clrldi r10,r10,4 /* clear region bits */
+ ori r10,r10,SPRN_MAS3_SR|SPRN_MAS3_SW|SPRN_MAS3_SX
+ mtspr SPRN_MAS7_MAS3,r10
+
+ tlbwe
+
+tlb_load_linear_done:
+ /* We use the "error" epilog for success as we do want to
+ * restore to the initial faulting context, whatever it was.
+ * We do that because we can't resume a fault within a TLB
+ * miss handler, due to MAS and TLB reservation being clobbered.
+ */
+ TLB_MISS_STATS_X(MMSTAT_TLB_MISS_LINEAR)
+ TLB_MISS_EPILOG_ERROR
+ rfi
+
+tlb_load_linear_fault:
+ /* We keep the DEAR and ESR around, this shouldn't have happened */
+ cmpdi cr0,r14,-1
+ beq 1f
+ TLB_MISS_EPILOG_ERROR_SPECIAL
+ b exc_data_storage_book3e
+1: TLB_MISS_EPILOG_ERROR_SPECIAL
+ b exc_instruction_storage_book3e
+
+
+#ifdef CONFIG_BOOK3E_MMU_TLB_STATS
+.tlb_stat_inc:
+1: ldarx r8,0,r9
+ addi r8,r8,1
+ stdcx. r8,0,r9
+ bne- 1b
+ blr
+#endif
Index: linux-work/arch/powerpc/kernel/exceptions-64e.S
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-work/arch/powerpc/kernel/exceptions-64e.S 2009-06-03 16:18:23.000000000 +1000
@@ -0,0 +1,794 @@
+/*
+ * Boot code and exception vectors for Book3E processors
+ *
+ * Copyright (C) 2007 Ben. Herrenschmidt (benh at kernel.crashing.org), IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/threads.h>
+#include <asm/reg.h>
+#include <asm/page.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/cputable.h>
+#include <asm/setup.h>
+#include <asm/thread_info.h>
+#include <asm/reg.h>
+#include <asm/exception-64e.h>
+#include <asm/bug.h>
+#include <asm/irqflags.h>
+#include <asm/ptrace.h>
+#include <asm/ppc-opcode.h>
+#include <asm/mmu.h>
+
+/* XXX This will ultimately add space for a special exception save
+ * structure used to save things like SRR0/SRR1, SPRGs, MAS, etc...
+ * when taking special interrupts. For now we don't support that,
+ * special interrupts from within a non-standard level will probably
+ * blow you up
+ */
+#define SPECIAL_EXC_FRAME_SIZE INT_FRAME_SIZE
+
+/* Exception prolog code for all exceptions */
+#define EXCEPTION_PROLOG(n, type, addition) \
+ mtspr SPRN_SPRG_##type,r13; /* get spare registers */ \
+ mfspr r13,SPRN_SPRG_PACA; /* get PACA */ \
+ std r10,PACA_EX##type+EX_R10(r13); \
+ std r11,PACA_EX##type+EX_R11(r13); \
+ mfcr r10; /* save CR */ \
+ addition; /* additional code for that exc. */ \
+ std r1,PACA_EX##type+EX_R1(r13); /* save old r1 in the PACA */ \
+ stw r10,PACA_EX##type+EX_CR(r13); /* save old CR in the PACA */ \
+ mfspr r11,SPRN_##type##_SRR1;/* what are we coming from */ \
+ type##_SET_KSTACK; /* get special stack if necessary */\
+ andi. r10,r11,MSR_PR; /* save stack pointer */ \
+ beq 1f; /* branch around if supervisor */ \
+ ld r1,PACAKSAVE(r13); /* get kernel stack coming from usr */\
+1: cmpdi cr1,r1,0; /* check if SP makes sense */ \
+ bge- cr1,exc_##n##_bad_stack;/* bad stack (TODO: out of line) */ \
+ mfspr r10,SPRN_##type##_SRR0; /* read SRR0 before touching stack */
+
+/* Exception type-specific macros */
+#define GEN_SET_KSTACK \
+ subi r1,r1,INT_FRAME_SIZE; /* alloc frame on kernel stack */
+#define SPRN_GEN_SRR0 SPRN_SRR0
+#define SPRN_GEN_SRR1 SPRN_SRR1
+
+#define CRIT_SET_KSTACK \
+ ld r1,PACA_CRIT_STACK(r13); \
+ subi r1,r1,SPECIAL_EXC_FRAME_SIZE;
+#define SPRN_CRIT_SRR0 SPRN_CSRR0
+#define SPRN_CRIT_SRR1 SPRN_CSRR1
+
+#define DBG_SET_KSTACK \
+ ld r1,PACA_DBG_STACK(r13); \
+ subi r1,r1,SPECIAL_EXC_FRAME_SIZE;
+#define SPRN_DBG_SRR0 SPRN_DSRR0
+#define SPRN_DBG_SRR1 SPRN_DSRR1
+
+#define MC_SET_KSTACK \
+ ld r1,PACA_MC_STACK(r13); \
+ subi r1,r1,SPECIAL_EXC_FRAME_SIZE;
+#define SPRN_MC_SRR0 SPRN_MCSRR0
+#define SPRN_MC_SRR1 SPRN_MCSRR1
+
+#define NORMAL_EXCEPTION_PROLOG(n, addition) \
+ EXCEPTION_PROLOG(n, GEN, addition##_GEN)
+
+#define CRIT_EXCEPTION_PROLOG(n, addition) \
+ EXCEPTION_PROLOG(n, CRIT, addition##_CRIT)
+
+#define DBG_EXCEPTION_PROLOG(n, addition) \
+ EXCEPTION_PROLOG(n, DBG, addition##_DBG)
+
+#define MC_EXCEPTION_PROLOG(n, addition) \
+ EXCEPTION_PROLOG(n, MC, addition##_MC)
+
+
+/* Variants of the "addition" argument for the prolog
+ */
+#define PROLOG_ADDITION_NONE_GEN
+#define PROLOG_ADDITION_NONE_CRIT
+#define PROLOG_ADDITION_NONE_DBG
+#define PROLOG_ADDITION_NONE_MC
+
+#define PROLOG_ADDITION_MASKABLE_GEN \
+ lbz r11,PACASOFTIRQEN(r13); /* are irqs soft-disabled ? */ \
+ cmpwi cr0,r11,0; /* yes -> go out of line */ \
+ beq masked_interrupt_book3e;
+
+#define PROLOG_ADDITION_2REGS_GEN \
+ std r14,PACA_EXGEN+EX_R14(r13); \
+ std r15,PACA_EXGEN+EX_R15(r13)
+
+#define PROLOG_ADDITION_1REG_GEN \
+ std r14,PACA_EXGEN+EX_R14(r13);
+
+#define PROLOG_ADDITION_2REGS_CRIT \
+ std r14,PACA_EXCRIT+EX_R14(r13); \
+ std r15,PACA_EXCRIT+EX_R15(r13)
+
+#define PROLOG_ADDITION_2REGS_DBG \
+ std r14,PACA_EXDBG+EX_R14(r13); \
+ std r15,PACA_EXDBG+EX_R15(r13)
+
+#define PROLOG_ADDITION_2REGS_MC \
+ std r14,PACA_EXMC+EX_R14(r13); \
+ std r15,PACA_EXMC+EX_R15(r13)
+
+/* Core exception code for all exceptions except TLB misses.
+ * XXX: Needs to make SPRN_SPRG_GEN depend on exception type
+ */
+#define EXCEPTION_COMMON(n, excf, ints) \
+ std r0,GPR0(r1); /* save r0 in stackframe */ \
+ std r2,GPR2(r1); /* save r2 in stackframe */ \
+ SAVE_4GPRS(3, r1); /* save r3 - r6 in stackframe */ \
+ SAVE_2GPRS(7, r1); /* save r7, r8 in stackframe */ \
+ std r9,GPR9(r1); /* save r9 in stackframe */ \
+ std r10,_NIP(r1); /* save SRR0 to stackframe */ \
+ std r11,_MSR(r1); /* save SRR1 to stackframe */ \
+ ACCOUNT_CPU_USER_ENTRY(r10,r11);/* accounting (uses cr0+eq) */ \
+ ld r3,excf+EX_R10(r13); /* get back r10 */ \
+ ld r4,excf+EX_R11(r13); /* get back r11 */ \
+ mfspr r5,SPRN_SPRG_GEN; /* get back r13 */ \
+ std r12,GPR12(r1); /* save r12 in stackframe */ \
+ ld r2,PACATOC(r13); /* get kernel TOC into r2 */ \
+ mflr r6; /* save LR in stackframe */ \
+ mfctr r7; /* save CTR in stackframe */ \
+ mfspr r8,SPRN_XER; /* save XER in stackframe */ \
+ ld r9,excf+EX_R1(r13); /* load orig r1 back from PACA */ \
+ lwz r10,excf+EX_CR(r13); /* load orig CR back from PACA */ \
+ lbz r11,PACASOFTIRQEN(r13); /* get current IRQ softe */ \
+ ld r12,exception_marker at toc(r2); \
+ li r0,0; \
+ std r3,GPR10(r1); /* save r10 to stackframe */ \
+ std r4,GPR11(r1); /* save r11 to stackframe */ \
+ std r5,GPR13(r1); /* save it to stackframe */ \
+ std r6,_LINK(r1); \
+ std r7,_CTR(r1); \
+ std r8,_XER(r1); \
+ li r3,(n)+1; /* indicate partial regs in trap */ \
+ std r9,0(r1); /* store stack frame back link */ \
+ std r10,_CCR(r1); /* store orig CR in stackframe */ \
+ std r9,GPR1(r1); /* store stack frame back link */ \
+ std r11,SOFTE(r1); /* and save it to stackframe */ \
+ std r12,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame */ \
+ std r3,_TRAP(r1); /* set trap number */ \
+ std r0,RESULT(r1); /* clear regs->result */ \
+ ints;
+
+/* Variants for the "ints" argument */
+#define INTS_KEEP
+#define INTS_DISABLE_SOFT \
+ stb r0,PACASOFTIRQEN(r13); /* mark interrupts soft-disabled */ \
+ TRACE_DISABLE_INTS;
+#define INTS_DISABLE_HARD \
+ stb r0,PACAHARDIRQEN(r13); /* and hard disabled */
+#define INTS_DISABLE_ALL \
+ INTS_DISABLE_SOFT \
+ INTS_DISABLE_HARD
+
+/* This is called by exceptions that used INTS_KEEP (that is did not clear
+ * neither soft nor hard IRQ indicators in the PACA. This will restore MSR:EE
+ * to it's previous value
+ *
+ * XXX In the long run, we may want to open-code it in order to separate the
+ * load from the wrtee, thus limiting the latency caused by the dependency
+ * but at this point, I'll favor code clarity until we have a near to final
+ * implementation
+ */
+#define INTS_RESTORE_HARD \
+ ld r11,_MSR(r1); \
+ wrtee r11;
+
+/* XXX FIXME: Restore r14/r15 when necessary */
+#define BAD_STACK_TRAMPOLINE(n) \
+exc_##n##_bad_stack: \
+ li r1,(n); /* get exception number */ \
+ sth r1,PACA_TRAP_SAVE(r13); /* store trap */ \
+ b bad_stack_book3e; /* bad stack error */
+
+#define EXCEPTION_STUB(loc, label) \
+ . = interrupt_base_book3e + loc; \
+ nop; /* To make debug interrupts happy */ \
+ b exc_##label##_book3e;
+
+#define ACK_NONE(r)
+#define ACK_DEC(r) \
+ lis r,TSR_DIS at h; \
+ mtspr SPRN_TSR,r
+#define ACK_FIT(r) \
+ lis r,TSR_FIS at h; \
+ mtspr SPRN_TSR,r
+
+#define MASKABLE_EXCEPTION(trapnum, label, hdlr, ack) \
+ START_EXCEPTION(label); \
+ NORMAL_EXCEPTION_PROLOG(trapnum, PROLOG_ADDITION_MASKABLE) \
+ EXCEPTION_COMMON(trapnum, PACA_EXGEN, INTS_DISABLE_ALL) \
+ ack(r8); \
+ addi r3,r1,STACK_FRAME_OVERHEAD; \
+ bl hdlr; \
+ b .ret_from_except_lite;
+
+/* This value is used to mark exception frames on the stack. */
+ .section ".toc","aw"
+exception_marker:
+ .tc ID_EXC_MARKER[TC],STACK_FRAME_REGS_MARKER
+
+
+/*
+ * And here we have the exception vectors !
+ */
+
+ .text
+ .balign 0x1000
+ .globl interrupt_base_book3e
+interrupt_base_book3e: /* fake trap */
+ /* Note: If real debug exceptions are supported by the HW, the vector
+ * below will have to be patched up to point to an appropriate handler
+ */
+ EXCEPTION_STUB(0x000, machine_check) /* 0x0200 */
+ EXCEPTION_STUB(0x020, critical_input) /* 0x0580 */
+ EXCEPTION_STUB(0x040, debug_crit) /* 0x0d00 */
+ EXCEPTION_STUB(0x060, data_storage) /* 0x0300 */
+ EXCEPTION_STUB(0x080, instruction_storage) /* 0x0400 */
+ EXCEPTION_STUB(0x0a0, external_input) /* 0x0500 */
+ EXCEPTION_STUB(0x0c0, alignment) /* 0x0600 */
+ EXCEPTION_STUB(0x0e0, program) /* 0x0700 */
+ EXCEPTION_STUB(0x100, fp_unavailable) /* 0x0800 */
+ EXCEPTION_STUB(0x120, system_call) /* 0x0c00 */
+ EXCEPTION_STUB(0x140, ap_unavailable) /* 0x0f20 */
+ EXCEPTION_STUB(0x160, decrementer) /* 0x0900 */
+ EXCEPTION_STUB(0x180, fixed_interval) /* 0x0980 */
+ EXCEPTION_STUB(0x1a0, watchdog) /* 0x09f0 */
+ EXCEPTION_STUB(0x1c0, data_tlb_miss)
+ EXCEPTION_STUB(0x1e0, instruction_tlb_miss)
+
+#if 0
+ EXCEPTION_STUB(0x280, processor_doorbell)
+ EXCEPTION_STUB(0x220, processor_doorbell_crit)
+#endif
+ .globl interrupt_end_book3e
+interrupt_end_book3e:
+
+/* Critical Input Interrupt */
+ START_EXCEPTION(critical_input);
+ CRIT_EXCEPTION_PROLOG(0x100, PROLOG_ADDITION_NONE)
+// EXCEPTION_COMMON(0x100, PACA_EXCRIT, INTS_DISABLE_ALL)
+// bl special_reg_save_crit
+// addi r3,r1,STACK_FRAME_OVERHEAD
+// bl .critical_exception
+// b ret_from_crit_except
+ b .
+
+/* Machine Check Interrupt */
+ START_EXCEPTION(machine_check);
+ CRIT_EXCEPTION_PROLOG(0x200, PROLOG_ADDITION_NONE)
+// EXCEPTION_COMMON(0x200, PACA_EXMC, INTS_DISABLE_ALL)
+// bl special_reg_save_mc
+// addi r3,r1,STACK_FRAME_OVERHEAD
+// bl .machine_check_exception
+// b ret_from_mc_except
+ b .
+
+/* Data Storage Interrupt */
+ START_EXCEPTION(data_storage)
+ NORMAL_EXCEPTION_PROLOG(0x300, PROLOG_ADDITION_2REGS)
+ mfspr r14,SPRN_DEAR
+ mfspr r15,SPRN_ESR
+ EXCEPTION_COMMON(0x300, PACA_EXGEN, INTS_KEEP)
+ b storage_fault_common
+
+/* Instruction Storage Interrupt */
+ START_EXCEPTION(instruction_storage);
+ NORMAL_EXCEPTION_PROLOG(0x400, PROLOG_ADDITION_2REGS)
+ li r15,0
+ mr r14,r10
+ EXCEPTION_COMMON(0x400, PACA_EXGEN, INTS_KEEP)
+ b storage_fault_common
+
+/* External Input Interrupt */
+ MASKABLE_EXCEPTION(0x500, external_input, .do_IRQ, ACK_NONE)
+
+/* Alignment */
+ START_EXCEPTION(alignment);
+ NORMAL_EXCEPTION_PROLOG(0x600, PROLOG_ADDITION_2REGS)
+ mfspr r14,SPRN_DEAR
+ mfspr r15,SPRN_ESR
+ EXCEPTION_COMMON(0x600, PACA_EXGEN, INTS_KEEP)
+ b alignment_more /* no room, go out of line */
+
+/* Program Interrupt */
+ START_EXCEPTION(program);
+ NORMAL_EXCEPTION_PROLOG(0x700, PROLOG_ADDITION_1REG)
+ mfspr r14,SPRN_ESR
+ EXCEPTION_COMMON(0x700, PACA_EXGEN, INTS_DISABLE_SOFT)
+ std r14,_DSISR(r1)
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ ld r14,PACA_EXGEN+EX_R14(r13)
+ bl .save_nvgprs
+ INTS_RESTORE_HARD
+ bl .program_check_exception
+ b .ret_from_except
+
+/* Floating Point Unavailable Interrupt */
+ START_EXCEPTION(fp_unavailable);
+ NORMAL_EXCEPTION_PROLOG(0x800, PROLOG_ADDITION_NONE)
+ /* we can probably do a shorter exception entry for that one... */
+ EXCEPTION_COMMON(0x800, PACA_EXGEN, INTS_KEEP)
+ bne 1f /* if from user, just load it up */
+ bl .save_nvgprs
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ INTS_RESTORE_HARD
+ bl .kernel_fp_unavailable_exception
+ BUG_OPCODE
+1: ld r12,_MSR(r1)
+ bl .load_up_fpu
+ b fast_exception_return
+
+/* Decrementer Interrupt */
+ MASKABLE_EXCEPTION(0x900, decrementer, .timer_interrupt, ACK_DEC)
+
+/* Fixed Interval Timer Interrupt */
+ MASKABLE_EXCEPTION(0x980, fixed_interval, .unknown_exception, ACK_FIT)
+
+/* Watchdog Timer Interrupt */
+ START_EXCEPTION(watchdog);
+ CRIT_EXCEPTION_PROLOG(0x9f0, PROLOG_ADDITION_NONE)
+// EXCEPTION_COMMON(0x9f0, PACA_EXCRIT, INTS_DISABLE_ALL)
+// bl special_reg_save_crit
+// addi r3,r1,STACK_FRAME_OVERHEAD
+// bl .unknown_exception
+// b ret_from_crit_except
+ b .
+
+/* System Call Interrupt */
+ START_EXCEPTION(system_call)
+ mr r9,r13 /* keep a copy of userland r13 */
+ mfspr r11,SPRN_SRR0 /* get return address */
+ mfspr r12,SPRN_SRR1 /* get previous MSR */
+ mfspr r13,SPRN_SPRG_PACA /* get our PACA */
+ b system_call_common
+
+/* Auxillary Processor Unavailable Interrupt */
+ START_EXCEPTION(ap_unavailable);
+ NORMAL_EXCEPTION_PROLOG(0xf20, PROLOG_ADDITION_NONE)
+ EXCEPTION_COMMON(0xf20, PACA_EXGEN, INTS_KEEP)
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl .save_nvgprs
+ INTS_RESTORE_HARD
+ bl .unknown_exception
+ b .ret_from_except
+
+/* Debug exception as a critical interrupt*/
+ START_EXCEPTION(debug_crit);
+ CRIT_EXCEPTION_PROLOG(0xd00, PROLOG_ADDITION_2REGS)
+
+ /*
+ * If there is a single step or branch-taken exception in an
+ * exception entry sequence, it was probably meant to apply to
+ * the code where the exception occurred (since exception entry
+ * doesn't turn off DE automatically). We simulate the effect
+ * of turning off DE on entry to an exception handler by turning
+ * off DE in the CSRR1 value and clearing the debug status.
+ */
+
+ mfspr r14,SPRN_DBSR /* check single-step/branch taken */
+ andis. r15,r14,DBSR_IC at h
+ beq+ 1f
+
+ LOAD_REG_IMMEDIATE(r14,interrupt_base_book3e)
+ LOAD_REG_IMMEDIATE(r15,interrupt_end_book3e)
+ cmpld cr0,r10,r14
+ cmpld cr1,r10,r15
+ blt+ cr0,1f
+ bge+ cr1,1f
+
+ /* here it looks like we got an inappropriate debug exception. */
+ lis r14,DBSR_IC at h /* clear the IC event */
+ rlwinm r11,r11,0,~MSR_DE /* clear DE in the CSRR1 value */
+ mtspr SPRN_DBSR,r14
+ mtspr SPRN_CSRR1,r11
+ lwz r10,PACA_EXCRIT+EX_CR(r13) /* restore registers */
+ ld r1,PACA_EXCRIT+EX_R1(r13)
+ ld r14,PACA_EXCRIT+EX_R14(r13)
+ ld r15,PACA_EXCRIT+EX_R15(r13)
+ mtcr r10
+ ld r10,PACA_EXCRIT+EX_R10(r13) /* restore registers */
+ ld r11,PACA_EXCRIT+EX_R11(r13)
+ mfspr r13,SPRN_SPRG_CRIT
+ rfci
+
+ /* Normal debug exception */
+ /* XXX We only handle coming from userspace for now since we can't
+ * quite save properly an interrupted kernel state yet
+ */
+1: andi. r14,r11,MSR_PR; /* check for userspace again */
+ beq kernel_dbg_exc; /* if from kernel mode */
+
+ /* Now we mash up things to make it look like we are coming on a
+ * normal exception
+ */
+ mfspr r15,SPRN_SPRG_CRIT
+ mtspr SPRN_SPRG_GEN,r15
+ mfspr r14,SPRN_DBSR
+ EXCEPTION_COMMON(0xd00, PACA_EXCRIT, INTS_DISABLE_ALL)
+ std r14,_DSISR(r1)
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ mr r4,r14
+ ld r14,PACA_EXCRIT+EX_R14(r13)
+ ld r15,PACA_EXCRIT+EX_R15(r13)
+ bl .save_nvgprs
+ bl .DebugException
+ b .ret_from_except
+
+kernel_dbg_exc:
+ b . /* NYI */
+
+
+/*
+ * An interrupt came in while soft-disabled; clear EE in SRR1,
+ * clear paca->hard_enabled and return.
+ */
+masked_interrupt_book3e:
+ mtcr r10
+ stb r11,PACAHARDIRQEN(r13)
+ mfspr r10,SPRN_SRR1
+ rldicl r11,r10,48,1 /* clear MSR_EE */
+ rotldi r10,r11,16
+ mtspr SPRN_SRR1,r10
+ ld r10,PACA_EXGEN+EX_R10(r13); /* restore registers */
+ ld r11,PACA_EXGEN+EX_R11(r13);
+ mfspr r13,SPRN_SPRG_GEN;
+ rfi
+ b .
+
+/*
+ * This is called from 0x300 and 0x400 handlers after the prologs with
+ * r14 and r15 containing the fault address and error code, with the
+ * original values stashed away in the PACA
+ */
+storage_fault_common:
+ std r14,_DAR(r1)
+ std r15,_DSISR(r1)
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ mr r4,r14
+ mr r5,r15
+ ld r14,PACA_EXGEN+EX_R14(r13)
+ ld r15,PACA_EXGEN+EX_R15(r13)
+ INTS_RESTORE_HARD
+ bl .do_page_fault
+ cmpdi r3,0
+ bne- 1f
+ b .ret_from_except_lite
+1: bl .save_nvgprs
+ mr r5,r3
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ ld r4,_DAR(r1)
+ bl .bad_page_fault
+ b .ret_from_except
+
+/*
+ * Alignment exception doesn't fit entirely in the 0x100 bytes so it
+ * continues here.
+ */
+alignment_more:
+ std r14,_DAR(r1)
+ std r15,_DSISR(r1)
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ ld r14,PACA_EXGEN+EX_R14(r13)
+ ld r15,PACA_EXGEN+EX_R15(r13)
+ bl .save_nvgprs
+ INTS_RESTORE_HARD
+ bl .alignment_exception
+ b .ret_from_except
+
+/*
+ * We branch here from entry_64.S for the last stage of the exception
+ * return code path. MSR:EE is expected to be off at that point
+ */
+_GLOBAL(exception_return_book3e)
+ b 1f
+
+/* This is the return from load_up_fpu fast path which could do with
+ * less GPR restores in fact, but for now we have a single return path
+ */
+ .globl fast_exception_return
+fast_exception_return:
+ wrteei 0
+1: mr r0,r13
+ ld r10,_MSR(r1)
+ REST_4GPRS(2, r1)
+ andi. r6,r10,MSR_PR
+ REST_2GPRS(6, r1)
+ beq 1f
+ ACCOUNT_CPU_USER_EXIT(r10, r11)
+ ld r0,GPR13(r1)
+
+1: stdcx. r0,0,r1 /* to clear the reservation */
+
+ ld r8,_CCR(r1)
+ ld r9,_LINK(r1)
+ ld r10,_CTR(r1)
+ ld r11,_XER(r1)
+ mtcr r8
+ mtlr r9
+ mtctr r10
+ mtxer r11
+ REST_2GPRS(8, r1)
+ ld r10,GPR10(r1)
+ ld r11,GPR11(r1)
+ ld r12,GPR12(r1)
+ mtspr SPRN_SPRG_GEN,r0
+
+ std r10,PACA_EXGEN+EX_R10(r13);
+ std r11,PACA_EXGEN+EX_R11(r13);
+ ld r10,_NIP(r1)
+ ld r11,_MSR(r1)
+ ld r0,GPR0(r1)
+ ld r1,GPR1(r1)
+ mtspr SPRN_SRR0,r10
+ mtspr SPRN_SRR1,r11
+ ld r10,PACA_EXGEN+EX_R10(r13)
+ ld r11,PACA_EXGEN+EX_R11(r13)
+ mfspr r13,SPRN_SPRG_GEN
+ rfi
+
+/*
+ * Trampolines used when spotting a bad kernel stack pointer in
+ * the exception entry code.
+ *
+ * TODO: move some bits like SRR0 read to trampoline, pass PACA
+ * index around, etc... to handle crit & mcheck
+ */
+BAD_STACK_TRAMPOLINE(0x000)
+BAD_STACK_TRAMPOLINE(0x100)
+BAD_STACK_TRAMPOLINE(0x200)
+BAD_STACK_TRAMPOLINE(0x300)
+BAD_STACK_TRAMPOLINE(0x400)
+BAD_STACK_TRAMPOLINE(0x500)
+BAD_STACK_TRAMPOLINE(0x600)
+BAD_STACK_TRAMPOLINE(0x700)
+BAD_STACK_TRAMPOLINE(0x800)
+BAD_STACK_TRAMPOLINE(0x900)
+BAD_STACK_TRAMPOLINE(0x980)
+BAD_STACK_TRAMPOLINE(0x9f0)
+BAD_STACK_TRAMPOLINE(0xa00)
+BAD_STACK_TRAMPOLINE(0xb00)
+BAD_STACK_TRAMPOLINE(0xc00)
+BAD_STACK_TRAMPOLINE(0xd00)
+BAD_STACK_TRAMPOLINE(0xe00)
+BAD_STACK_TRAMPOLINE(0xf00)
+BAD_STACK_TRAMPOLINE(0xf20)
+
+ .globl bad_stack_book3e
+bad_stack_book3e:
+ mfspr r10,SPRN_SRR0; /* read SRR0 before touching stack */
+ ld r1,PACAEMERGSP(r13)
+ subi r1,r1,64+INT_FRAME_SIZE
+ std r10,_NIP(r1)
+ std r11,_MSR(r1)
+ ld r10,PACA_EXGEN+EX_R1(r13) /* FIXME for crit & mcheck */
+ lwz r11,PACA_EXGEN+EX_CR(r13) /* FIXME for crit & mcheck */
+ std r10,GPR1(r1)
+ std r11,_CCR(r1)
+ mfspr r10,SPRN_DEAR
+ mfspr r11,SPRN_ESR
+ std r10,_DAR(r1)
+ std r11,_DSISR(r1)
+ std r0,GPR0(r1); /* save r0 in stackframe */ \
+ std r2,GPR2(r1); /* save r2 in stackframe */ \
+ SAVE_4GPRS(3, r1); /* save r3 - r6 in stackframe */ \
+ SAVE_2GPRS(7, r1); /* save r7, r8 in stackframe */ \
+ std r9,GPR9(r1); /* save r9 in stackframe */ \
+ ld r3,PACA_EXGEN+EX_R10(r13);/* get back r10 */ \
+ ld r4,PACA_EXGEN+EX_R11(r13);/* get back r11 */ \
+ mfspr r5,SPRN_SPRG_GEN; /* get back r13 XXX can be wrong */ \
+ std r3,GPR10(r1); /* save r10 to stackframe */ \
+ std r4,GPR11(r1); /* save r11 to stackframe */ \
+ std r12,GPR12(r1); /* save r12 in stackframe */ \
+ std r5,GPR13(r1); /* save it to stackframe */ \
+ mflr r10
+ mfctr r11
+ mfxer r12
+ std r10,_LINK(r1)
+ std r11,_CTR(r1)
+ std r12,_XER(r1)
+ SAVE_10GPRS(14,r1)
+ SAVE_8GPRS(24,r1)
+ lhz r12,PACA_TRAP_SAVE(r13)
+ std r12,_TRAP(r1)
+ addi r11,r1,INT_FRAME_SIZE
+ std r11,0(r1)
+ li r12,0
+ std r12,0(r11)
+ ld r2,PACATOC(r13)
+1: addi r3,r1,STACK_FRAME_OVERHEAD
+ bl .kernel_bad_stack
+ b 1b
+
+/*
+ * Setup the initial TLB for a core. This current implementation
+ * assume that whatever we are running off will not conflict with
+ * the new mapping at PAGE_OFFSET.
+ * We also make various assumptions about the processor we run on,
+ * this might have to be made more flexible based on the content
+ * of MMUCFG and friends.
+ */
+_GLOBAL(initial_tlb_book3e)
+
+ /* Setup MAS 0,1,2,3 and 7 for tlbwe of a 1G entry that maps the
+ * kernel linear mapping. We also set MAS8 once for all here though
+ * that will have to be made dependent on whether we are running under
+ * a hypervisor I suppose.
+ */
+ li r3,SPRN_MAS0_HES | SPRN_MAS0_WQ_ALLWAYS
+ mtspr SPRN_MAS0,r3
+ lis r3,(SPRN_MAS1_V | SPRN_MAS1_IPROT)@h
+ ori r3,r3,BOOK3E_PAGESZ_1GB << SPRN_MAS1_TSIZE_SHIFT
+ mtspr SPRN_MAS1,r3
+ LOAD_REG_IMMEDIATE(r3, PAGE_OFFSET | SPRN_MAS2_M)
+ mtspr SPRN_MAS2,r3
+ li r3,SPRN_MAS3_SR | SPRN_MAS3_SW | SPRN_MAS3_SX
+ mtspr SPRN_MAS7_MAS3,r3
+ li r3,0
+ mtspr SPRN_MAS8,r3
+
+ /* Write the TLB entry */
+ tlbwe
+
+ /* Now we branch the new virtual address mapped by this entry */
+ LOAD_REG_IMMEDIATE(r3,1f)
+ mtctr r3
+ bctr
+
+1: /* We are now running at PAGE_OFFSET, clean the TLB of everything
+ * else (XXX we should scan for bolted crap from the firmware too)
+ */
+ PPC_TLBILX(0,0,0)
+ sync
+ isync
+
+ /* We translate LR and return */
+ mflr r3
+ tovirt(r3,r3)
+ mtlr r3
+ blr
+
+/*
+ * Main entry (boot CPU, thread 0)
+ *
+ * We enter here from head_64.S, possibly after the prom_init trampoline
+ * with r3 and r4 already saved to r31 and 30 respectively and in 64 bits
+ * mode. Anything else is as it was left by the bootloader
+ *
+ * Initial requirements of this port:
+ *
+ * - Kernel loaded at 0 physical
+ * - A good lump of memory mapped 0:0 by UTLB entry 0
+ * - MSR:IS & MSR:DS set to 0
+ *
+ * Note that some of the above requirements will be relaxed in the future
+ * as the kernel becomes smarter at dealing with different initial conditions
+ * but for now you have to be careful
+ */
+_GLOBAL(start_initialization_book3e)
+ mflr r28
+
+ /* First, we need to setup some initial TLBs to map the kernel
+ * text, data and bss at PAGE_OFFSET. We don't have a real mode
+ * and always use AS 0, so we just set it up to match our link
+ * address and never use 0 based addresses.
+ */
+ bl .initial_tlb_book3e
+
+ /* Init global core bits */
+ bl .init_core_book3e
+
+ /* Init per-thread bits */
+ bl .init_thread_book3e
+
+ /* Return to common init code */
+ tovirt(r28,r28)
+ mtlr r28
+ blr
+
+
+/*
+ * Secondary core/processor entry
+ *
+ * This is entered for thread 0 of a secondary core, all other threads
+ * are expected to be stopped. It's similar to start_initialization_book3e
+ * except that it's generally entered from the holding loop in head_64.S
+ * after CPUs have been gathered by Open Firmware.
+ *
+ * We assume we are in 32 bits mode running with whatever TLB entry was
+ * set for us by the firmware or POR engine.
+ */
+_GLOBAL(book3e_secondary_core_init_64bit)
+ li r4,1
+ b .generic_secondary_smp_init
+
+_GLOBAL(book3e_secondary_core_init)
+ mflr r28
+ mr r24,r3
+ cmplwi r4,0
+ bne 2f
+
+ /* Setup TLB for this core */
+ bl .initial_tlb_book3e
+
+ /* get the TOC pointer (virtual address) */
+ bl .relative_toc
+
+ /* Init global core bits */
+2: bl .init_core_book3e
+
+ /* Init per-thread bits */
+ bl .init_thread_book3e
+
+ /* Return to common init code */
+ tovirt(r28,r28)
+ mtlr r28
+ blr
+
+/*
+ * Secondary thread entry
+ *
+ * This is entered in 32 bits mode, we are mapped by a temporary
+ * ERAT entry covering that thread only that we need to get rid of.
+ * r3 contains our linux CPU number. We are getting in here via RTAS
+ * with the current implementation.
+ */
+_GLOBAL(book3e_secondary_thread_init_64bit)
+ mr r24,r3
+ b 2f
+
+_GLOBAL(book3e_secondary_thread_init)
+ mr r24,r3
+
+ /* Turn on 64-bit mode */
+ bl .enable_64b_mode
+
+ /* We need to get away from our 32 bits address and jump to
+ * the proper full 64 bits address
+ */
+ LOAD_REG_IMMEDIATE(r3,1f)
+ mtctr r3
+ bctr
+1: isync
+
+ /* Init per-thread bits */
+2: bl .init_thread_book3e
+
+ /* Jump to common init code */
+ mr r3,r24
+ b .generic_secondary_smp_init
+
+
+_STATIC(init_core_book3e)
+ /* Establish the interrupt vector base */
+ LOAD_REG_IMMEDIATE(r3, interrupt_base_book3e)
+ mtspr SPRN_IVPR,r3
+ sync
+ blr
+
+_STATIC(init_thread_book3e)
+ lis r3,(SPRN_EPCR_ICM | SPRN_EPCR_GICM)@h
+ mtspr SPRN_EPCR,r3
+
+ /* Make sure interrupts are off */
+ wrteei 0
+
+ /* disable watchdog and FIT and enable DEC interrupts */
+ lis r3,TCR_DIE at h
+ mtspr SPRN_TCR,r3
+
+ blr
+
+
+
Index: linux-work/arch/powerpc/include/asm/ppc_asm.h
===================================================================
--- linux-work.orig/arch/powerpc/include/asm/ppc_asm.h 2009-06-03 15:11:20.000000000 +1000
+++ linux-work/arch/powerpc/include/asm/ppc_asm.h 2009-06-03 15:13:29.000000000 +1000
@@ -376,7 +376,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_601)
#endif
-#if defined(CONFIG_BOOKE)
+/* XXX Implement __toreal and __tovirt that can be used by Book3E 64-bit
+ * where it wants it to avoid having the ifdef CONFIG_PPC64 below
+ */
+#if defined(CONFIG_BOOKE) && !defined(CONFIG_PPC64)
#define toreal(rd)
#define fromreal(rd)
@@ -426,10 +429,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_601)
.previous
#endif
-#ifdef CONFIG_PPC64
+#ifdef CONFIG_PPC_BOOK3S_64
#define RFI rfid
#define MTMSRD(r) mtmsrd r
-
#else
#define FIX_SRR1(ra, rb)
#ifndef CONFIG_40x
Index: linux-work/arch/powerpc/kernel/head_64.S
===================================================================
--- linux-work.orig/arch/powerpc/kernel/head_64.S 2009-06-03 15:11:20.000000000 +1000
+++ linux-work/arch/powerpc/kernel/head_64.S 2009-06-03 16:30:43.000000000 +1000
@@ -122,10 +122,11 @@ __run_at_load:
*/
.globl __secondary_hold
__secondary_hold:
+#ifndef CONFIG_PPC_BOOK3E
mfmsr r24
ori r24,r24,MSR_RI
mtmsrd r24 /* RI on */
-
+#endif
/* Grab our physical cpu number */
mr r24,r3
@@ -144,6 +145,7 @@ __secondary_hold:
ld r4,0(r4) /* deref function descriptor */
mtctr r4
mr r3,r24
+ li r4,0
bctr
#else
BUG_OPCODE
@@ -172,13 +174,20 @@ exception_marker:
*/
_GLOBAL(generic_secondary_smp_init)
mr r24,r3
-
+ mr r25,r4
+
/* turn on 64-bit mode */
bl .enable_64b_mode
/* get the TOC pointer (real address) */
bl .relative_toc
+#ifdef CONFIG_PPC_BOOK3E
+ /* Book3E initialization */
+ mr r3,r24
+ mr r4,r25
+ bl .book3e_secondary_core_init
+#endif
/* Set up a paca value for this processor. Since we have the
* physical cpu id in r24, we need to search the pacas to find
* which logical id maps to our physical one.
@@ -197,6 +206,11 @@ _GLOBAL(generic_secondary_smp_init)
b .kexec_wait /* next kernel might do better */
2: mtspr SPRN_SPRG3,r13 /* Save vaddr of paca in SPRG3 */
+#ifdef CONFIG_PPC_BOOK3E
+ addi r12,r13,PACA_EXTLB /* and TLB exc frame in SPRG2 */
+ mtspr SPRN_SPRG2,r12
+#endif
+
/* From now on, r24 is expected to be logical cpuid */
mr r24,r5
3: HMT_LOW
@@ -232,6 +246,7 @@ _GLOBAL(generic_secondary_smp_init)
* Turn the MMU off.
* Assumes we're mapped EA == RA if the MMU is on.
*/
+#ifdef CONFIG_PPC_BOOK3S
_STATIC(__mmu_off)
mfmsr r3
andi. r0,r3,MSR_IR|MSR_DR
@@ -243,6 +258,7 @@ _STATIC(__mmu_off)
sync
rfid
b . /* prevent speculative execution */
+#endif
/*
@@ -280,6 +296,10 @@ _GLOBAL(__start_initialization_multiplat
mr r31,r3
mr r30,r4
+#ifdef CONFIG_PPC_BOOK3E
+ bl .start_initialization_book3e
+ b .__after_prom_start
+#else
/* Setup some critical 970 SPRs before switching MMU off */
mfspr r0,SPRN_PVR
srwi r0,r0,16
@@ -297,6 +317,7 @@ _GLOBAL(__start_initialization_multiplat
/* Switch off MMU if not already off */
bl .__mmu_off
b .__after_prom_start
+#endif /* CONFIG_PPC_BOOK3E */
_INIT_STATIC(__boot_from_prom)
#ifdef CONFIG_PPC_OF_BOOT_TRAMPOLINE
@@ -359,10 +380,16 @@ _STATIC(__after_prom_start)
* Note: This process overwrites the OF exception vectors.
*/
li r3,0 /* target addr */
+#ifdef CONFIG_PPC_BOOK3E
+ tovirt(r3,r3) /* on booke, we already run at PAGE_OFFSET */
+#endif
mr. r4,r26 /* In some cases the loader may */
beq 9f /* have already put us at zero */
li r6,0x100 /* Start offset, the first 0x100 */
/* bytes were copied earlier. */
+#ifdef CONFIG_PPC_BOOK3E
+ tovirt(r6,r6) /* on booke, we already run at PAGE_OFFSET */
+#endif
#ifdef CONFIG_CRASH_DUMP
/*
@@ -508,6 +535,9 @@ _GLOBAL(pmac_secondary_start)
* r13 = paca virtual address
* SPRG3 = paca virtual address
*/
+ .section ".text";
+ .align 2 ;
+
.globl __secondary_start
__secondary_start:
/* Set thread priority to MEDIUM */
@@ -544,7 +574,7 @@ END_FW_FTR_SECTION_IFCLR(FW_FEATURE_ISER
mtspr SPRN_SRR0,r3
mtspr SPRN_SRR1,r4
- rfid
+ RFI
b . /* prevent speculative execution */
/*
@@ -565,11 +595,16 @@ _GLOBAL(start_secondary_prolog)
*/
_GLOBAL(enable_64b_mode)
mfmsr r11 /* grab the current MSR */
+#ifdef CONFIG_PPC_BOOK3E
+ oris r11,r11,0x8000 /* CM bit set, we'll set ICM later */
+ mtmsr r11
+#else /* CONFIG_PPC_BOOK3E */
li r12,(MSR_SF | MSR_ISF)@highest
sldi r12,r12,48
or r11,r11,r12
mtmsrd r11
isync
+#endif
blr
/*
@@ -613,9 +648,11 @@ _INIT_STATIC(start_here_multiplatform)
bdnz 3b
4:
+#ifndef CONFIG_PPC_BOOK3E
mfmsr r6
ori r6,r6,MSR_RI
mtmsrd r6 /* RI on */
+#endif
#ifdef CONFIG_RELOCATABLE
/* Save the physical address we're running at in kernstart_addr */
@@ -648,7 +685,7 @@ _INIT_STATIC(start_here_multiplatform)
ld r4,PACAKMSR(r13)
mtspr SPRN_SRR0,r3
mtspr SPRN_SRR1,r4
- rfid
+ RFI
b . /* prevent speculative execution */
/* This is where all platforms converge execution */
Index: linux-work/arch/powerpc/kernel/entry_64.S
===================================================================
--- linux-work.orig/arch/powerpc/kernel/entry_64.S 2009-06-03 15:11:20.000000000 +1000
+++ linux-work/arch/powerpc/kernel/entry_64.S 2009-06-03 17:15:23.000000000 +1000
@@ -124,6 +124,15 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISER
ori r11,r11,MSR_EE
mtmsrd r11,1
+ /* Hard enable interrupts */
+#ifdef CONFIG_PPC_BOOK3E
+ wrteei 1
+#else
+ mfmsr r11
+ ori r11,r11,MSR_EE
+ mtmsrd r11,1
+#endif /* CONFIG_PPC_BOOK3E */
+
#ifdef SHOW_SYSCALLS
bl .do_show_syscall
REST_GPR(0,r1)
@@ -168,15 +177,25 @@ syscall_exit:
#endif
clrrdi r12,r1,THREAD_SHIFT
- /* disable interrupts so current_thread_info()->flags can't change,
- and so that we don't get interrupted after loading SRR0/1. */
ld r8,_MSR(r1)
+#ifdef CONFIG_PPC_BOOK3S
+ /* No MSR:RI on BookE */
andi. r10,r8,MSR_RI
beq- unrecov_restore
+#endif
+
+ /* Disable interrupts so current_thread_info()->flags can't change,
+ * and so that we don't get interrupted after loading SRR0/1.
+ */
+#ifdef CONFIG_PPC_BOOK3E
+ wrteei 0
+#else
mfmsr r10
rldicl r10,r10,48,1
rotldi r10,r10,16
mtmsrd r10,1
+#endif /* CONFIG_PPC_BOOK3E */
+
ld r9,TI_FLAGS(r12)
li r11,-_LAST_ERRNO
andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK)
@@ -194,9 +213,13 @@ syscall_error_cont:
* userspace and we take an exception after restoring r13,
* we end up corrupting the userspace r13 value.
*/
+#ifdef CONFIG_PPC_BOOK3S
+ /* No MSR:RI on BookE */
li r12,MSR_RI
andc r11,r10,r12
mtmsrd r11,1 /* clear MSR.RI */
+#endif /* CONFIG_PPC_BOOK3S */
+
beq- 1f
ACCOUNT_CPU_USER_EXIT(r11, r12)
ld r13,GPR13(r1) /* only restore r13 if returning to usermode */
@@ -206,7 +229,7 @@ syscall_error_cont:
mtcr r5
mtspr SPRN_SRR0,r7
mtspr SPRN_SRR1,r8
- rfid
+ RFI
b . /* prevent speculative execution */
syscall_error:
@@ -276,9 +299,13 @@ syscall_exit_work:
beq .ret_from_except_lite
/* Re-enable interrupts */
+#ifdef CONFIG_PPC_BOOK3E
+ wrteei 1
+#else
mfmsr r10
ori r10,r10,MSR_EE
mtmsrd r10,1
+#endif /* CONFIG_PPC_BOOK3E */
bl .save_nvgprs
addi r3,r1,STACK_FRAME_OVERHEAD
@@ -380,7 +407,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
and. r0,r0,r22
beq+ 1f
andc r22,r22,r0
- mtmsrd r22
+ MTMSRD(r22)
isync
1: std r20,_NIP(r1)
mfcr r23
@@ -399,6 +426,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
std r6,PACACURRENT(r13) /* Set new 'current' */
ld r8,KSP(r4) /* new stack pointer */
+#ifdef CONFIG_PPC_BOOK3S
BEGIN_FTR_SECTION
BEGIN_FTR_SECTION_NESTED(95)
clrrdi r6,r8,28 /* get its ESID */
@@ -445,8 +473,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_1T_SEGMENT
slbie r6 /* Workaround POWER5 < DD2.1 issue */
slbmte r7,r0
isync
-
2:
+#endif /* !CONFIG_PPC_BOOK3S */
+
clrrdi r7,r8,THREAD_SHIFT /* base of new stack */
/* Note: this uses SWITCH_FRAME_SIZE rather than INT_FRAME_SIZE
because we don't need to leave the 288-byte ABI gap at the
@@ -490,10 +519,14 @@ _GLOBAL(ret_from_except_lite)
* can't change between when we test it and when we return
* from the interrupt.
*/
+#ifdef CONFIG_PPC_BOOK3E
+ wrteei 0
+#else
mfmsr r10 /* Get current interrupt state */
rldicl r9,r10,48,1 /* clear MSR_EE */
rotldi r9,r9,16
mtmsrd r9,1 /* Update machine state */
+#endif /* CONFIG_PPC_BOOK3E */
#ifdef CONFIG_PREEMPT
clrrdi r9,r1,THREAD_SHIFT /* current_thread_info() */
@@ -531,6 +564,9 @@ ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_
rldicl r4,r3,49,63 /* r0 = (r3 >> 15) & 1 */
stb r4,PACAHARDIRQEN(r13)
+#ifdef CONFIG_PPC_BOOK3E
+ b .exception_return_book3e
+#else
ld r4,_CTR(r1)
ld r0,_LINK(r1)
mtctr r4
@@ -579,6 +615,8 @@ ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_
rfid
b . /* prevent speculative execution */
+#endif /* CONFIG_PPC_BOOK3E */
+
iseries_check_pending_irqs:
#ifdef CONFIG_PPC_ISERIES
ld r5,SOFTE(r1)
@@ -629,6 +667,11 @@ do_work:
li r0,1
stb r0,PACASOFTIRQEN(r13)
stb r0,PACAHARDIRQEN(r13)
+#ifdef CONFIG_PPC_BOOK3E
+ wrteei 1
+ bl .preempt_schedule
+ wrteei 0
+#else
ori r10,r10,MSR_EE
mtmsrd r10,1 /* reenable interrupts */
bl .preempt_schedule
@@ -637,6 +680,7 @@ do_work:
rldicl r10,r10,48,1 /* disable interrupts again */
rotldi r10,r10,16
mtmsrd r10,1
+#endif /* CONFIG_PPC_BOOK3E */
ld r4,TI_FLAGS(r9)
andi. r0,r4,_TIF_NEED_RESCHED
bne 1b
@@ -645,8 +689,12 @@ do_work:
user_work:
#endif
/* Enable interrupts */
+#ifdef CONFIG_PPC_BOOK3E
+ wrteei 1
+#else
ori r10,r10,MSR_EE
mtmsrd r10,1
+#endif /* CONFIG_PPC_BOOK3E */
andi. r0,r4,_TIF_NEED_RESCHED
beq 1f
@@ -814,33 +862,25 @@ _GLOBAL(enter_prom)
* of all registers that it saves. We therefore save those registers
* PROM might touch to the stack. (r0, r3-r13 are caller saved)
*/
- SAVE_8GPRS(2, r1)
+ SAVE_GPR(2, r1)
SAVE_GPR(13, r1)
SAVE_8GPRS(14, r1)
SAVE_10GPRS(22, r1)
- mfcr r4
- std r4,_CCR(r1)
- mfctr r5
- std r5,_CTR(r1)
- mfspr r6,SPRN_XER
- std r6,_XER(r1)
- mfdar r7
- std r7,_DAR(r1)
- mfdsisr r8
- std r8,_DSISR(r1)
- mfsrr0 r9
- std r9,_SRR0(r1)
- mfsrr1 r10
- std r10,_SRR1(r1)
+ mfcr r10
mfmsr r11
+ std r10,_CCR(r1)
std r11,_MSR(r1)
/* Get the PROM entrypoint */
- ld r0,GPR4(r1)
- mtlr r0
+ mtlr r4
/* Switch MSR to 32 bits mode
*/
+#ifdef CONFIG_PPC_BOOK3E
+ oris r11,r11,0x8000 /* XXX use rlwim */
+ xoris r11,r11,0x8000
+ mtmsr r11
+#else /* CONFIG_PPC_BOOK3E */
mfmsr r11
li r12,1
rldicr r12,r12,MSR_SF_LG,(63-MSR_SF_LG)
@@ -849,10 +889,10 @@ _GLOBAL(enter_prom)
rldicr r12,r12,MSR_ISF_LG,(63-MSR_ISF_LG)
andc r11,r11,r12
mtmsrd r11
+#endif /* CONFIG_PPC_BOOK3E */
isync
- /* Restore arguments & enter PROM here... */
- ld r3,GPR3(r1)
+ /* Enter PROM here... */
blrl
/* Just make sure that r1 top 32 bits didn't get
@@ -862,7 +902,7 @@ _GLOBAL(enter_prom)
/* Restore the MSR (back to 64 bits) */
ld r0,_MSR(r1)
- mtmsrd r0
+ MTMSRD(r0)
isync
/* Restore other registers */
@@ -872,18 +912,6 @@ _GLOBAL(enter_prom)
REST_10GPRS(22, r1)
ld r4,_CCR(r1)
mtcr r4
- ld r5,_CTR(r1)
- mtctr r5
- ld r6,_XER(r1)
- mtspr SPRN_XER,r6
- ld r7,_DAR(r1)
- mtdar r7
- ld r8,_DSISR(r1)
- mtdsisr r8
- ld r9,_SRR0(r1)
- mtsrr0 r9
- ld r10,_SRR1(r1)
- mtsrr1 r10
addi r1,r1,PROM_FRAME_SIZE
ld r0,16(r1)
Index: linux-work/arch/powerpc/include/asm/mmu.h
===================================================================
--- linux-work.orig/arch/powerpc/include/asm/mmu.h 2009-06-03 15:11:20.000000000 +1000
+++ linux-work/arch/powerpc/include/asm/mmu.h 2009-06-03 15:57:15.000000000 +1000
@@ -17,6 +17,7 @@
#define MMU_FTR_TYPE_40x ASM_CONST(0x00000004)
#define MMU_FTR_TYPE_44x ASM_CONST(0x00000008)
#define MMU_FTR_TYPE_FSL_E ASM_CONST(0x00000010)
+#define MMU_FTR_TYPE_3E ASM_CONST(0x00000020)
/*
* This is individual features
Index: linux-work/arch/powerpc/include/asm/paca.h
===================================================================
--- linux-work.orig/arch/powerpc/include/asm/paca.h 2009-06-03 15:11:20.000000000 +1000
+++ linux-work/arch/powerpc/include/asm/paca.h 2009-06-03 15:13:29.000000000 +1000
@@ -14,9 +14,11 @@
#define _ASM_POWERPC_PACA_H
#ifdef __KERNEL__
-#include <asm/types.h>
-#include <asm/lppaca.h>
-#include <asm/mmu.h>
+#include <asm/types.h>
+#include <asm/lppaca.h>
+#include <asm/mmu.h>
+#include <asm/page.h>
+#include <asm/exception-64e.h>
register struct paca_struct *local_paca asm("r13");
@@ -91,6 +93,21 @@ struct paca_struct {
u16 slb_cache[SLB_CACHE_ENTRIES];
#endif /* CONFIG_PPC_STD_MMU_64 */
+#ifdef CONFIG_PPC_BOOK3E
+ pgd_t *pgd; /* Current PGD */
+ pgd_t *kernel_pgd; /* Kernel PGD */
+ u64 exgen[8] __attribute__((aligned(0x80)));
+ u64 extlb[EX_TLB_SIZE*3] __attribute__((aligned(0x80)));
+ u64 exmc[8]; /* used for machine checks */
+ u64 excrit[8]; /* used for crit interrupts */
+ u64 exdbg[8]; /* used for debug interrupts */
+
+ /* Kernel stack pointers for use by special exceptions */
+ void *mc_kstack;
+ void *crit_kstack;
+ void *dbg_kstack;
+#endif /* CONFIG_PPC_BOOK3E */
+
mm_context_t context;
/*
Index: linux-work/arch/powerpc/include/asm/ppc-opcode.h
===================================================================
--- linux-work.orig/arch/powerpc/include/asm/ppc-opcode.h 2009-06-03 15:11:20.000000000 +1000
+++ linux-work/arch/powerpc/include/asm/ppc-opcode.h 2009-06-03 16:00:05.000000000 +1000
@@ -48,6 +48,8 @@
#define PPC_INST_TLBIE 0x7c000264
#define PPC_INST_TLBILX 0x7c000024
#define PPC_INST_WAIT 0x7c00007c
+#define PPC_INST_TLBIVAX 0x7c000624
+#define PPC_INST_TLBSRX_DOT 0x7c0006a5
/* macros to insert fields into opcodes */
#define __PPC_RA(a) (((a) & 0x1f) << 16)
@@ -76,6 +78,10 @@
__PPC_WC(w))
#define PPC_TLBIE(lp,a) stringify_in_c(.long PPC_INST_TLBIE | \
__PPC_RB(a) | __PPC_RS(lp))
+#define PPC_TLBSRX_DOT(a,b) stringify_in_c(.long PPC_INST_TLBSRX_DOT | \
+ __PPC_RA(a) | __PPC_RB(b))
+#define PPC_TLBIVAX(a,b) stringify_in_c(.long PPC_INST_TLBIVAX | \
+ __PPC_RA(a) | __PPC_RB(b))
/*
* Define what the VSX XX1 form instructions will look like, then add
Index: linux-work/arch/powerpc/include/asm/tlbflush.h
===================================================================
--- linux-work.orig/arch/powerpc/include/asm/tlbflush.h 2009-06-03 15:11:20.000000000 +1000
+++ linux-work/arch/powerpc/include/asm/tlbflush.h 2009-06-03 15:13:29.000000000 +1000
@@ -6,7 +6,7 @@
*
* - flush_tlb_mm(mm) flushes the specified mm context TLB's
* - flush_tlb_page(vma, vmaddr) flushes one page
- * - local_flush_tlb_mm(mm) flushes the specified mm context on
+ * - local_flush_tlb_mm(mm, full) flushes the specified mm context on
* the local processor
* - local_flush_tlb_page(vma, vmaddr) flushes one page on the local processor
* - flush_tlb_page_nohash(vma, vmaddr) flushes one page if SW loaded TLB
@@ -29,7 +29,8 @@
* specific tlbie's
*/
-#include <linux/mm.h>
+struct vm_area_struct;
+struct mm_struct;
#define MMU_NO_CONTEXT ((unsigned int)-1)
@@ -40,12 +41,18 @@ extern void flush_tlb_kernel_range(unsig
extern void local_flush_tlb_mm(struct mm_struct *mm);
extern void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
+extern void __local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
+ int psize, int ind);
+
#ifdef CONFIG_SMP
extern void flush_tlb_mm(struct mm_struct *mm);
extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
+extern void __flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
+ int psize, int ind);
#else
#define flush_tlb_mm(mm) local_flush_tlb_mm(mm)
#define flush_tlb_page(vma,addr) local_flush_tlb_page(vma,addr)
+#define __flush_tlb_page(mm,addr,p,i) __local_flush_tlb_page(mm,addr,p,i)
#endif
#define flush_tlb_page_nohash(vma,addr) flush_tlb_page(vma,addr)
Index: linux-work/arch/powerpc/kernel/asm-offsets.c
===================================================================
--- linux-work.orig/arch/powerpc/kernel/asm-offsets.c 2009-06-03 15:11:20.000000000 +1000
+++ linux-work/arch/powerpc/kernel/asm-offsets.c 2009-06-03 15:13:29.000000000 +1000
@@ -52,9 +52,11 @@
#include <linux/kvm_host.h>
#endif
+#ifdef CONFIG_PPC32
#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
#include "head_booke.h"
#endif
+#endif
#if defined(CONFIG_FSL_BOOKE)
#include "../mm/mmu_decl.h"
@@ -137,6 +139,20 @@ int main(void)
context.high_slices_psize));
DEFINE(MMUPSIZEDEFSIZE, sizeof(struct mmu_psize_def));
#endif /* CONFIG_PPC_MM_SLICES */
+
+#ifdef CONFIG_PPC_BOOK3E
+ DEFINE(PACAPGD, offsetof(struct paca_struct, pgd));
+ DEFINE(PACA_KERNELPGD, offsetof(struct paca_struct, kernel_pgd));
+ DEFINE(PACA_EXGEN, offsetof(struct paca_struct, exgen));
+ DEFINE(PACA_EXTLB, offsetof(struct paca_struct, extlb));
+ DEFINE(PACA_EXMC, offsetof(struct paca_struct, exmc));
+ DEFINE(PACA_EXCRIT, offsetof(struct paca_struct, excrit));
+ DEFINE(PACA_EXDBG, offsetof(struct paca_struct, exdbg));
+ DEFINE(PACA_MC_STACK, offsetof(struct paca_struct, mc_kstack));
+ DEFINE(PACA_CRIT_STACK, offsetof(struct paca_struct, crit_kstack));
+ DEFINE(PACA_DBG_STACK, offsetof(struct paca_struct, dbg_kstack));
+#endif /* CONFIG_PPC_BOOK3E */
+
#ifdef CONFIG_PPC_STD_MMU_64
DEFINE(PACASTABREAL, offsetof(struct paca_struct, stab_real));
DEFINE(PACASTABVIRT, offsetof(struct paca_struct, stab_addr));
@@ -259,6 +275,7 @@ int main(void)
DEFINE(_SRR1, STACK_FRAME_OVERHEAD+sizeof(struct pt_regs)+8);
#endif /* CONFIG_PPC64 */
+#if defined(CONFIG_PPC32)
#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
DEFINE(EXC_LVL_SIZE, STACK_EXC_LVL_FRAME_SIZE);
DEFINE(MAS0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas0));
@@ -277,7 +294,7 @@ int main(void)
DEFINE(_DSRR1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, dsrr1));
DEFINE(SAVED_KSP_LIMIT, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, saved_ksp_limit));
#endif
-
+#endif
DEFINE(CLONE_VM, CLONE_VM);
DEFINE(CLONE_UNTRACED, CLONE_UNTRACED);
Index: linux-work/arch/powerpc/kernel/cputable.c
===================================================================
--- linux-work.orig/arch/powerpc/kernel/cputable.c 2009-06-03 15:11:20.000000000 +1000
+++ linux-work/arch/powerpc/kernel/cputable.c 2009-06-03 15:57:33.000000000 +1000
@@ -93,7 +93,7 @@ extern void __restore_cpu_power7(void);
PPC_FEATURE_BOOKE)
static struct cpu_spec __initdata cpu_specs[] = {
-#ifdef CONFIG_PPC64
+#ifdef CONFIG_PPC_BOOK3S_64
{ /* Power3 */
.pvr_mask = 0xffff0000,
.pvr_value = 0x00400000,
@@ -508,7 +508,30 @@ static struct cpu_spec __initdata cpu_sp
.machine_check = machine_check_generic,
.platform = "power4",
}
-#endif /* CONFIG_PPC64 */
+#endif /* CONFIG_PPC_BOOK3S_64 */
+#ifdef CONFIG_PPC_BOOK3E_64
+ { /* This is a default entry to get going, to be replaced by
+ * a real one at some stage
+ */
+#define CPU_FTRS_BASE_BOOK3E (CPU_FTR_USE_TB | \
+ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_SMT | \
+ CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE)
+ .pvr_mask = 0x00000000,
+ .pvr_value = 0x00000000,
+ .cpu_name = "Book3E",
+ .cpu_features = CPU_FTRS_BASE_BOOK3E,
+ .cpu_user_features = COMMON_USER_PPC64,
+ .mmu_features = MMU_FTR_TYPE_3E | MMU_FTR_USE_TLBILX |
+ MMU_FTR_USE_TLBIVAX_BCAST |
+ MMU_FTR_LOCK_BCAST_INVAL,
+ .icache_bsize = 64,
+ .dcache_bsize = 64,
+ .num_pmcs = 0,
+ .machine_check = machine_check_generic,
+ .platform = "power6",
+ },
+#endif
+
#ifdef CONFIG_PPC32
#if CLASSIC_PPC
{ /* 601 */
Index: linux-work/arch/powerpc/mm/Makefile
===================================================================
--- linux-work.orig/arch/powerpc/mm/Makefile 2009-06-03 15:11:20.000000000 +1000
+++ linux-work/arch/powerpc/mm/Makefile 2009-06-03 15:13:29.000000000 +1000
@@ -11,6 +11,7 @@ obj-y := fault.o mem.o pgtable.o gup.
pgtable_$(CONFIG_WORD_SIZE).o
obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \
tlb_nohash_low.o
+obj-$(CONFIG_PPC_BOOK3E) += tlb_book3e.o tlb_low_$(CONFIG_WORD_SIZE)e.o
obj-$(CONFIG_PPC64) += mmap_64.o
hash64-$(CONFIG_PPC_NATIVE) := hash_native_64.o
obj-$(CONFIG_PPC_STD_MMU_64) += hash_utils_64.o \
Index: linux-work/arch/powerpc/mm/init_64.c
===================================================================
--- linux-work.orig/arch/powerpc/mm/init_64.c 2009-06-03 15:11:20.000000000 +1000
+++ linux-work/arch/powerpc/mm/init_64.c 2009-06-03 15:13:29.000000000 +1000
@@ -76,6 +76,8 @@
#endif
#endif /* CONFIG_PPC_STD_MMU_64 */
+DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
+
phys_addr_t memstart_addr = ~0;
phys_addr_t kernstart_addr;
Index: linux-work/arch/powerpc/mm/mmu_decl.h
===================================================================
--- linux-work.orig/arch/powerpc/mm/mmu_decl.h 2009-06-03 15:11:20.000000000 +1000
+++ linux-work/arch/powerpc/mm/mmu_decl.h 2009-06-03 15:13:29.000000000 +1000
@@ -36,32 +36,42 @@ static inline void _tlbil_pid(unsigned i
{
asm volatile ("sync; tlbia; isync" : : : "memory");
}
+#define _tlbil_pid_noind(pid) _tlbil_pid(pid)
+
#else /* CONFIG_40x || CONFIG_8xx */
extern void _tlbil_all(void);
extern void _tlbil_pid(unsigned int pid);
+#ifdef CONFIG_PPC_BOOK3E
+extern void _tlbil_pid_noind(unsigned int pid);
+#else
+#define _tlbil_pid_noind(pid) _tlbil_pid(pid)
+#endif
#endif /* !(CONFIG_40x || CONFIG_8xx) */
/*
* On 8xx, we directly inline tlbie, on others, it's extern
*/
#ifdef CONFIG_8xx
-static inline void _tlbil_va(unsigned long address, unsigned int pid)
+static inline void _tlbil_va(unsigned long address, unsigned int pid,
+ unsigned int psize, unsigned int ind)
{
asm volatile ("tlbie %0; sync" : : "r" (address) : "memory");
}
#else /* CONFIG_8xx */
-extern void _tlbil_va(unsigned long address, unsigned int pid);
+extern void _tlbil_va(unsigned long address, unsigned int pid,
+ unsigned int psize, unsigned int ind);
#endif /* CONIFG_8xx */
-/*
- * As of today, we don't support tlbivax broadcast on any
- * implementation. When that becomes the case, this will be
- * an extern.
- */
-static inline void _tlbivax_bcast(unsigned long address, unsigned int pid)
+#ifdef CONFIG_PPC_BOOK3E
+extern void _tlbivax_bcast(unsigned long address, unsigned int pid,
+ unsigned int psize, unsigned int ind);
+#else
+static inline void _tlbivax_bcast(unsigned long address, unsigned int pid,
+ unsigned int psize, unsigned int ind)
{
BUG();
}
+#endif
#else /* CONFIG_PPC_MMU_NOHASH */
Index: linux-work/arch/powerpc/mm/tlb_hash64.c
===================================================================
--- linux-work.orig/arch/powerpc/mm/tlb_hash64.c 2009-06-03 15:11:20.000000000 +1000
+++ linux-work/arch/powerpc/mm/tlb_hash64.c 2009-06-03 15:13:29.000000000 +1000
@@ -33,11 +33,6 @@
DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch);
-/* This is declared as we are using the more or less generic
- * arch/powerpc/include/asm/tlb.h file -- tgall
- */
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
-
/*
* A linux PTE was changed and the corresponding hash table entry
* neesd to be flushed. This function will either perform the flush
Index: linux-work/arch/powerpc/mm/tlb_book3e.c
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-work/arch/powerpc/mm/tlb_book3e.c 2009-06-03 15:13:29.000000000 +1000
@@ -0,0 +1,165 @@
+/*
+ * This file contains support code for Book3E TLB handling
+ *
+ * Copyright 2009 Ben Herrenschmidt <benh at kernel.crashing.org>
+ * IBM Corp.
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/highmem.h>
+#include <linux/pagemap.h>
+#include <linux/preempt.h>
+#include <linux/spinlock.h>
+#include <linux/lmb.h>
+
+#include <asm/tlbflush.h>
+#include <asm/tlb.h>
+#include <asm/code-patching.h>
+
+#include "mmu_decl.h"
+
+/* Page size used for the linear mapping */
+int mmu_linear_psize;
+
+/* Page size used for PTE pages */
+int mmu_pte_psize;
+
+/* Is HW tablewalk enabled ? */
+int book3e_htw_enabled;
+
+/* Top of linear mapping */
+unsigned long linear_map_top;
+
+/*
+ * Handling of virtual linear page tables or indirect TLB entries
+ * flushing when PTE pages are freed
+ */
+void tlb_flush_pgtable(struct mmu_gather *tlb, unsigned long address)
+{
+ if (book3e_htw_enabled) {
+ unsigned long start = address & PMD_MASK;
+ unsigned long end = address + PMD_SIZE;
+ unsigned long size = 1UL << mmu_psize_defs[mmu_pte_psize].shift;
+ int enc = mmu_psize_defs[mmu_pte_psize].enc;
+
+ /* This isn't the most optimal, ideally we would factor out the
+ * while preempt & CPU mask mucking around, or even the IPI but
+ * it will do for now
+ */
+ while (start < end) {
+ __flush_tlb_page(tlb->mm, start, enc, 1);
+ start += size;
+ }
+ } else {
+ unsigned long rmask = 0xf000000000000000ul;
+ unsigned long rid = (address & rmask) | 0x1000000000000000ul;
+ unsigned long vpte = address & ~rmask;
+
+#ifdef CONFIG_PPC_64K_PAGES
+ vpte = (vpte >> (PAGE_SHIFT - 4)) & ~0xfffful;
+#else
+ vpte = (vpte >> (PAGE_SHIFT - 3)) & ~0xffful;
+#endif
+ vpte |= rid;
+ __flush_tlb_page(tlb->mm, vpte, mmu_pte_psize, 0);
+ }
+}
+
+/*
+ * Early initialization of the MMU TLB code
+ */
+static void __early_init_mmu(int boot_cpu)
+{
+ extern unsigned int interrupt_base_book3e;
+ extern unsigned int exc_data_tlb_miss_htw_book3e;
+ extern unsigned int exc_instruction_tlb_miss_htw_book3e;
+
+ unsigned int *ibase = &interrupt_base_book3e;
+ unsigned int mas4;
+
+ /* XXX This will have to be decided at runtime, but right
+ * now our boot and TLB miss code hard wires it
+ */
+ mmu_linear_psize = MMU_PAGE_1G;
+
+
+ /* Check if HW tablewalk is present, and if yes, enable it by:
+ *
+ * - patching the TLB miss handlers to branch to the
+ * one dedicates to it
+ *
+ * - setting the global book3e_htw_enabled
+ *
+ * - Set MAS4:INDD and default page size
+ */
+
+ /* XXX This code only checks for TLB 0 capabilities and doesn't
+ * check what page size combos are supported by the HW. It
+ * also doesn't handle the case where a separate array holds
+ * the IND entries from the array loaded by the PT.
+ */
+ if (boot_cpu) {
+ unsigned int tlb0cfg = mfspr(SPRN_TLB0CFG);
+
+ /* Check if HW loader is supported */
+ if ((tlb0cfg & SPRN_TLBnCFG_IND) &&
+ (tlb0cfg & SPRN_TLBnCFG_PT)) {
+ patch_branch(ibase + (0x1c0 / 4),
+ (unsigned long)&exc_data_tlb_miss_htw_book3e, 0);
+ patch_branch(ibase + (0x1e0 / 4),
+ (unsigned long)&exc_instruction_tlb_miss_htw_book3e, 0);
+ book3e_htw_enabled = 1;
+ }
+ printk(KERN_INFO "MMU: Book3E Page Tables %s\n",
+ book3e_htw_enabled ? "Enabled" : "Disabled");
+ }
+
+ /* Set MAS4 based on page table setting */
+
+ mas4 = 0x4 << SPRN_MAS4_WIMGED_SHIFT;
+ if (book3e_htw_enabled) {
+ mas4 |= mas4 | SPRN_MAS4_INDD;
+#ifdef CONFIG_PPC_64K_PAGES
+ mas4 |= BOOK3E_PAGESZ_256M << SPRN_MAS4_TSIZED_SHIFT;
+ mmu_pte_psize = MMU_PAGE_256M;
+#else
+ mas4 |= BOOK3E_PAGESZ_1M << SPRN_MAS4_TSIZED_SHIFT;
+ mmu_pte_psize = MMU_PAGE_1M;
+#endif
+ } else {
+#ifdef CONFIG_PPC_64K_PAGES
+ mas4 |= BOOK3E_PAGESZ_64K << SPRN_MAS4_TSIZED_SHIFT;
+#else
+ mas4 |= BOOK3E_PAGESZ_4K << SPRN_MAS4_TSIZED_SHIFT;
+#endif
+ mmu_pte_psize = mmu_virtual_psize;
+ }
+ mtspr(SPRN_MAS4, mas4);
+
+ /* Set the global containing the top of the linear mapping
+ * for use by the TLB miss code
+ */
+ linear_map_top = lmb_end_of_DRAM();
+
+ /* A sync won't hurt us after mucking around with
+ * the MMU configuration
+ */
+ mb();
+}
+
+void __init early_init_mmu(void)
+{
+ __early_init_mmu(1);
+}
+
+void __cpuinit early_init_mmu_secondary(void)
+{
+ __early_init_mmu(0);
+}
Index: linux-work/arch/powerpc/mm/tlb_nohash.c
===================================================================
--- linux-work.orig/arch/powerpc/mm/tlb_nohash.c 2009-06-03 15:11:20.000000000 +1000
+++ linux-work/arch/powerpc/mm/tlb_nohash.c 2009-06-03 15:19:30.000000000 +1000
@@ -40,6 +40,33 @@
#include "mmu_decl.h"
+struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = {
+ [MMU_PAGE_4K] = {
+ .shift = 12,
+ .enc = BOOK3E_PAGESZ_4K,
+ },
+ [MMU_PAGE_64K] = {
+ .shift = 16,
+ .enc = BOOK3E_PAGESZ_64K,
+ },
+ [MMU_PAGE_1M] = {
+ .shift = 20,
+ .enc = BOOK3E_PAGESZ_1M,
+ },
+ [MMU_PAGE_16M] = {
+ .shift = 24,
+ .enc = BOOK3E_PAGESZ_16M,
+ },
+ [MMU_PAGE_256M] = {
+ .shift = 28,
+ .enc = BOOK3E_PAGESZ_256M,
+ },
+ [MMU_PAGE_1G] = {
+ .shift = 30,
+ .enc = BOOK3E_PAGESZ_1GB,
+ },
+};
+
/*
* Base TLB flushing operations:
*
@@ -67,16 +94,23 @@ void local_flush_tlb_mm(struct mm_struct
}
EXPORT_SYMBOL(local_flush_tlb_mm);
-void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
+void __local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
+ int psize, int ind)
{
unsigned int pid;
preempt_disable();
- pid = vma ? vma->vm_mm->context.id : 0;
+ pid = mm ? mm->context.id : 0;
if (pid != MMU_NO_CONTEXT)
- _tlbil_va(vmaddr, pid);
+ _tlbil_va(vmaddr, pid, psize, ind);
preempt_enable();
}
+
+void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
+{
+ __local_flush_tlb_page(vma ? vma->vm_mm : NULL, vmaddr,
+ mmu_psize_defs[mmu_virtual_psize].enc, 0);
+}
EXPORT_SYMBOL(local_flush_tlb_page);
@@ -90,6 +124,8 @@ static DEFINE_SPINLOCK(tlbivax_lock);
struct tlb_flush_param {
unsigned long addr;
unsigned int pid;
+ unsigned int psize;
+ unsigned int ind;
};
static void do_flush_tlb_mm_ipi(void *param)
@@ -103,7 +139,7 @@ static void do_flush_tlb_page_ipi(void *
{
struct tlb_flush_param *p = param;
- _tlbil_va(p->addr, p->pid);
+ _tlbil_va(p->addr, p->pid, p->psize, p->ind);
}
@@ -143,41 +179,53 @@ void flush_tlb_mm(struct mm_struct *mm)
}
EXPORT_SYMBOL(flush_tlb_mm);
-void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
+void __flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, int psize, int ind)
{
struct cpumask *cpu_mask;
unsigned int pid;
preempt_disable();
- pid = vma ? vma->vm_mm->context.id : 0;
+ pid = mm ? mm->context.id : 0;
if (unlikely(pid == MMU_NO_CONTEXT))
goto bail;
- cpu_mask = mm_cpumask(vma->vm_mm);
+ cpu_mask = mm_cpumask(mm);
if (!cpumask_equal(cpu_mask, cpumask_of(smp_processor_id()))) {
/* If broadcast tlbivax is supported, use it */
if (mmu_has_feature(MMU_FTR_USE_TLBIVAX_BCAST)) {
int lock = mmu_has_feature(MMU_FTR_LOCK_BCAST_INVAL);
if (lock)
spin_lock(&tlbivax_lock);
- _tlbivax_bcast(vmaddr, pid);
+ _tlbivax_bcast(vmaddr, pid, psize, ind);
if (lock)
spin_unlock(&tlbivax_lock);
goto bail;
} else {
- struct tlb_flush_param p = { .pid = pid, .addr = vmaddr };
+ struct tlb_flush_param p = {
+ .pid = pid,
+ .addr = vmaddr,
+ .psize = psize,
+ .ind = ind,
+ };
/* Ignores smp_processor_id() even if set in cpu_mask */
smp_call_function_many(cpu_mask,
do_flush_tlb_page_ipi, &p, 1);
}
}
- _tlbil_va(vmaddr, pid);
+ _tlbil_va(vmaddr, pid, psize, ind);
bail:
preempt_enable();
}
+
+void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
+{
+ __flush_tlb_page(vma ? vma->vm_mm : NULL, vmaddr,
+ mmu_psize_defs[mmu_virtual_psize].enc, 0);
+}
EXPORT_SYMBOL(flush_tlb_page);
#endif /* CONFIG_SMP */
+
/*
* Flush kernel TLB entries in the given range
*/
@@ -207,3 +255,4 @@ void flush_tlb_range(struct vm_area_stru
flush_tlb_mm(vma->vm_mm);
}
EXPORT_SYMBOL(flush_tlb_range);
+
Index: linux-work/arch/powerpc/include/asm/pgalloc.h
===================================================================
--- linux-work.orig/arch/powerpc/include/asm/pgalloc.h 2009-06-03 15:11:20.000000000 +1000
+++ linux-work/arch/powerpc/include/asm/pgalloc.h 2009-06-03 15:13:29.000000000 +1000
@@ -4,6 +4,12 @@
#include <linux/mm.h>
+#ifdef CONFIG_PPC_BOOK3E
+extern void tlb_flush_pgtable(struct mmu_gather *tlb, unsigned long address);
+#else
+#define tlb_flush_pgtable(tlb, address) do { } while(0)
+#endif
+
static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
{
free_page((unsigned long)pte);
@@ -38,14 +44,19 @@ static inline pgtable_free_t pgtable_fre
extern void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf);
#ifdef CONFIG_SMP
-#define __pte_free_tlb(tlb,ptepage,address) \
-do { \
- pgtable_page_dtor(ptepage); \
+#define __pte_free_tlb(tlb,ptepage,address) \
+do { \
+ pgtable_page_dtor(ptepage); \
+ tlb_flush_pgtable(tlb, address); \
pgtable_free_tlb(tlb, pgtable_free_cache(page_address(ptepage), \
- PTE_NONCACHE_NUM, PTE_TABLE_SIZE-1)); \
+ PTE_NONCACHE_NUM, PTE_TABLE_SIZE-1)); \
} while (0)
#else
-#define __pte_free_tlb(tlb, pte, address) pte_free((tlb)->mm, (pte))
+#define __pte_free_tlb(tlb,pte,address) \
+do { \
+ tlb_flush_pgtable(tlb, address); \
+ pte_free((tlb)->mm, (pte)); \
+} while (0)
#endif
Index: linux-work/arch/powerpc/include/asm/mmu_context.h
===================================================================
--- linux-work.orig/arch/powerpc/include/asm/mmu_context.h 2009-06-03 15:11:20.000000000 +1000
+++ linux-work/arch/powerpc/include/asm/mmu_context.h 2009-06-03 15:13:29.000000000 +1000
@@ -36,8 +36,13 @@ static inline void switch_mm(struct mm_s
/* 32-bit keeps track of the current PGDIR in the thread struct */
#ifdef CONFIG_PPC32
tsk->thread.pgdir = next->pgd;
+
#endif /* CONFIG_PPC32 */
+ /* 64-bit Book3E keeps track of current PGD in the PACA */
+#ifdef CONFIG_PPC_BOOK3E_64
+ get_paca()->pgd = next->pgd;
+#endif
/* Nothing else to do if we aren't actually switching */
if (prev == next)
return;
@@ -84,6 +89,10 @@ static inline void activate_mm(struct mm
static inline void enter_lazy_tlb(struct mm_struct *mm,
struct task_struct *tsk)
{
+ /* 64-bit Book3E keeps track of current PGD in the PACA */
+#ifdef CONFIG_PPC_BOOK3E_64
+ get_paca()->pgd = NULL;
+#endif
}
#endif /* __KERNEL__ */
Index: linux-work/arch/powerpc/kernel/setup_64.c
===================================================================
--- linux-work.orig/arch/powerpc/kernel/setup_64.c 2009-06-03 15:11:20.000000000 +1000
+++ linux-work/arch/powerpc/kernel/setup_64.c 2009-06-03 17:15:23.000000000 +1000
@@ -61,6 +61,7 @@
#include <asm/xmon.h>
#include <asm/udbg.h>
#include <asm/kexec.h>
+#include <asm/mmu_context.h>
#include "setup.h"
@@ -146,6 +147,9 @@ void __init setup_paca(int cpu)
{
local_paca = &paca[cpu];
mtspr(SPRN_SPRG3, local_paca);
+#ifdef CONFIG_PPC_BOOK3E
+ mtspr(SPRN_SPRG2, local_paca->extlb);
+#endif
}
/*
@@ -452,6 +456,24 @@ static void __init irqstack_early_init(v
#define irqstack_early_init()
#endif
+#ifdef CONFIG_PPC_BOOK3E
+static void __init exc_lvl_early_init(void)
+{
+ unsigned int i;
+
+ for_each_possible_cpu(i) {
+ critirq_ctx[i] = (struct thread_info *)
+ __va(lmb_alloc(THREAD_SIZE, THREAD_SIZE));
+ dbgirq_ctx[i] = (struct thread_info *)
+ __va(lmb_alloc(THREAD_SIZE, THREAD_SIZE));
+ mcheckirq_ctx[i] = (struct thread_info *)
+ __va(lmb_alloc(THREAD_SIZE, THREAD_SIZE));
+ }
+}
+#else
+#define exc_lvl_early_init()
+#endif
+
/*
* Stack space used when we detect a bad kernel stack pointer, and
* early in SMP boots before relocation is enabled.
@@ -511,6 +533,7 @@ void __init setup_arch(char **cmdline_p)
init_mm.brk = klimit;
irqstack_early_init();
+ exc_lvl_early_init();
emergency_stack_init();
#ifdef CONFIG_PPC_STD_MMU_64
@@ -528,6 +551,10 @@ void __init setup_arch(char **cmdline_p)
ppc_md.setup_arch();
paging_init();
+
+ /* Initialize the MMU context management stuff */
+ mmu_context_init();
+
ppc64_boot_msg(0x15, "Setup Done");
}
Index: linux-work/arch/powerpc/kernel/paca.c
===================================================================
--- linux-work.orig/arch/powerpc/kernel/paca.c 2009-06-03 15:11:20.000000000 +1000
+++ linux-work/arch/powerpc/kernel/paca.c 2009-06-03 15:13:29.000000000 +1000
@@ -13,6 +13,7 @@
#include <asm/lppaca.h>
#include <asm/paca.h>
#include <asm/sections.h>
+#include <asm/pgtable.h>
/* This symbol is provided by the linker - let it fill in the paca
* field correctly */
@@ -87,6 +88,8 @@ void __init initialise_pacas(void)
#ifdef CONFIG_PPC_BOOK3S
new_paca->lppaca_ptr = &lppaca[cpu];
+#else
+ new_paca->kernel_pgd = swapper_pg_dir;
#endif
new_paca->lock_token = 0x8000;
new_paca->paca_index = cpu;
Index: linux-work/arch/powerpc/include/asm/hw_irq.h
===================================================================
--- linux-work.orig/arch/powerpc/include/asm/hw_irq.h 2009-06-03 15:11:20.000000000 +1000
+++ linux-work/arch/powerpc/include/asm/hw_irq.h 2009-06-03 15:13:29.000000000 +1000
@@ -49,8 +49,13 @@ extern void iseries_handle_interrupts(vo
#define raw_irqs_disabled() (local_get_flags() == 0)
#define raw_irqs_disabled_flags(flags) ((flags) == 0)
+#ifdef CONFIG_PPC_BOOK3E
+#define __hard_irq_enable() __asm__ __volatile__("wrteei 1": : :"memory");
+#define __hard_irq_disable() __asm__ __volatile__("wrteei 0": : :"memory");
+#else
#define __hard_irq_enable() __mtmsrd(mfmsr() | MSR_EE, 1)
#define __hard_irq_disable() __mtmsrd(mfmsr() & ~MSR_EE, 1)
+#endif
#define hard_irq_disable() \
do { \
Index: linux-work/arch/powerpc/xmon/xmon.c
===================================================================
--- linux-work.orig/arch/powerpc/xmon/xmon.c 2009-06-03 15:11:20.000000000 +1000
+++ linux-work/arch/powerpc/xmon/xmon.c 2009-06-03 15:13:29.000000000 +1000
@@ -2570,7 +2570,7 @@ static void xmon_print_symbol(unsigned l
printf("%s", after);
}
-#ifdef CONFIG_PPC64
+#ifdef CONFIG_PPC_BOOK3S_64
static void dump_slb(void)
{
int i;
Index: linux-work/arch/powerpc/kernel/process.c
===================================================================
--- linux-work.orig/arch/powerpc/kernel/process.c 2009-06-03 15:11:20.000000000 +1000
+++ linux-work/arch/powerpc/kernel/process.c 2009-06-03 15:13:29.000000000 +1000
@@ -664,6 +664,7 @@ int copy_thread(unsigned long clone_flag
sp_vsid |= SLB_VSID_KERNEL | llp;
p->thread.ksp_vsid = sp_vsid;
}
+#endif /* CONFIG_PPC_STD_MMU_64 */
/*
* The PPC64 ABI makes use of a TOC to contain function
@@ -671,6 +672,7 @@ int copy_thread(unsigned long clone_flag
* to the TOC entry. The first entry is a pointer to the actual
* function.
*/
+#ifdef CONFIG_PPC64
kregs->nip = *((unsigned long *)ret_from_fork);
#else
kregs->nip = (unsigned long)ret_from_fork;
Index: linux-work/arch/powerpc/mm/mmu_context_hash64.c
===================================================================
--- linux-work.orig/arch/powerpc/mm/mmu_context_hash64.c 2009-06-03 15:11:20.000000000 +1000
+++ linux-work/arch/powerpc/mm/mmu_context_hash64.c 2009-06-03 15:13:29.000000000 +1000
@@ -76,3 +76,8 @@ void destroy_context(struct mm_struct *m
mm->context.id = NO_CONTEXT;
}
+
+void __init mmu_context_init(void)
+{
+}
+
Index: linux-work/arch/powerpc/include/asm/page.h
===================================================================
--- linux-work.orig/arch/powerpc/include/asm/page.h 2009-06-03 15:11:20.000000000 +1000
+++ linux-work/arch/powerpc/include/asm/page.h 2009-06-03 15:13:29.000000000 +1000
@@ -139,7 +139,11 @@ extern phys_addr_t kernstart_addr;
* Don't compare things with KERNELBASE or PAGE_OFFSET to test for
* "kernelness", use is_kernel_addr() - it should do what you want.
*/
+#ifdef CONFIG_PPC_BOOK3E_64
+#define is_kernel_addr(x) ((x) >= 0x8000000000000000ul)
+#else
#define is_kernel_addr(x) ((x) >= PAGE_OFFSET)
+#endif
#ifndef __ASSEMBLY__
Index: linux-work/arch/powerpc/mm/tlb_nohash_low.S
===================================================================
--- linux-work.orig/arch/powerpc/mm/tlb_nohash_low.S 2009-06-03 15:11:20.000000000 +1000
+++ linux-work/arch/powerpc/mm/tlb_nohash_low.S 2009-06-03 15:58:21.000000000 +1000
@@ -191,6 +191,85 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_US
isync
1: wrtee r10
blr
+#elif defined(CONFIG_PPC_BOOK3E)
+/*
+ * New Book3E (>= 2.06) implementation
+ *
+ * Note: We may be able to get away without the interrupt masking stuff
+ * if we save/restore MAS6 on exceptions that might modify it
+ */
+_GLOBAL(_tlbil_pid)
+ slwi r4,r3,SPRN_MAS6_SPID_SHIFT
+ mfmsr r10
+ wrteei 0
+ mtspr SPRN_MAS6,r4
+ PPC_TLBILX_PID(0,0)
+ wrtee r10
+ msync
+ isync
+ blr
+
+_GLOBAL(_tlbil_pid_noind)
+ slwi r4,r3,SPRN_MAS6_SPID_SHIFT
+ mfmsr r10
+ ori r4,r4,SPRN_MAS6_SIND
+ wrteei 0
+ mtspr SPRN_MAS6,r4
+ PPC_TLBILX_PID(0,0)
+ wrtee r10
+ msync
+ isync
+ blr
+
+_GLOBAL(_tlbil_all)
+ PPC_TLBILX_ALL(0,0)
+ msync
+ isync
+ blr
+
+_GLOBAL(_tlbil_va)
+ mfmsr r10
+ wrteei 0
+ cmpwi cr0,r6,0
+ slwi r4,r4,SPRN_MAS6_SPID_SHIFT
+ rlwimi r4,r5,SPRN_MAS6_ISIZE_SHIFT,SPRN_MAS6_ISIZE_MASK
+ beq 1f
+ rlwimi r4,r6,SPRN_MAS6_SIND_SHIFT,SPRN_MAS6_SIND
+1: mtspr SPRN_MAS6,r4 /* assume AS=0 for now */
+ PPC_TLBILX_VA(0,r3)
+ msync
+ isync
+ wrtee r10
+ blr
+
+_GLOBAL(_tlbivax_bcast)
+ mfmsr r10
+ wrteei 0
+ cmpwi cr0,r6,0
+ slwi r4,r4,SPRN_MAS6_SPID_SHIFT
+ rlwimi r4,r5,SPRN_MAS6_ISIZE_SHIFT,SPRN_MAS6_ISIZE_MASK
+ beq 1f
+ rlwimi r4,r6,SPRN_MAS6_SIND_SHIFT,SPRN_MAS6_SIND
+1: mtspr SPRN_MAS6,r4 /* assume AS=0 for now */
+ PPC_TLBIVAX(0,r3)
+ eieio
+ tlbsync
+ sync
+ wrtee r10
+ blr
+
+_GLOBAL(set_context)
+#ifdef CONFIG_BDI_SWITCH
+ /* Context switch the PTE pointer for the Abatron BDI2000.
+ * The PGDIR is the second parameter.
+ */
+ lis r5, abatron_pteptrs at h
+ ori r5, r5, abatron_pteptrs at l
+ stw r4, 0x4(r5)
+#endif
+ mtspr SPRN_PID,r3
+ isync /* Force context change */
+ blr
#else
#error Unsupported processor type !
#endif
Index: linux-work/arch/powerpc/platforms/Kconfig.cputype
===================================================================
--- linux-work.orig/arch/powerpc/platforms/Kconfig.cputype 2009-06-03 15:11:20.000000000 +1000
+++ linux-work/arch/powerpc/platforms/Kconfig.cputype 2009-06-03 15:16:05.000000000 +1000
@@ -20,7 +20,7 @@ choice
If unsure, select 52xx/6xx/7xx/74xx/82xx/83xx/86xx.
-config PPC_BOOK3S
+config PPC_BOOK3S_32
bool "512x/52xx/6xx/7xx/74xx/82xx/83xx/86xx"
select PPC_FPU
@@ -56,11 +56,34 @@ config E200
endchoice
-config PPC_BOOK3S
- default y
- depends on PPC64
+choice
+ prompt "Processor Type"
+ depends on PPC64
+ help
+ There are two families of 64 bit PowerPC chips supported.
+ The most common ones are the desktop and server CPUs
+ (POWER3, RS64, POWER4, POWER5, POWER5+, POWER6, ...)
+
+ The other are the "embedded" processors compliant with the
+ "Book 3E" variant of the architecture
+
+config PPC_BOOK3S_64
+ bool "Server processors"
select PPC_FPU
+config PPC_BOOK3E_64
+ bool "Embedded processors"
+ select PPC_FPU # Make it a choice ?
+
+endchoice
+
+config PPC_BOOK3E
+ def_bool y
+ depends on PPC_BOOK3E_32 || PPC_BOOK3E_64
+
+config PPC_BOOK3S
+ def_bool y
+ depends on PPC_BOOK3S_32 || PPC_BOOK3S_64
config POWER4_ONLY
bool "Optimize for POWER4"
@@ -120,7 +143,7 @@ config 4xx
config BOOKE
bool
- depends on E200 || E500 || 44x
+ depends on E200 || E500 || 44x || PPC_BOOK3E
default y
config FSL_BOOKE
@@ -218,9 +241,17 @@ config PPC_MMU_NOHASH
def_bool y
depends on !PPC_STD_MMU
+config PPC_MMU_NOHASH_32
+ def_bool y
+ depends on PPC_MMU_NOHASH && PPC32
+
+config PPC_MMU_NOHASH_64
+ def_bool y
+ depends on PPC_MMU_NOHASH && PPC64
+
config PPC_BOOK3E_MMU
def_bool y
- depends on FSL_BOOKE
+ depends on FSL_BOOKE || PPC_BOOK3E
config PPC_MM_SLICES
bool
@@ -243,7 +274,7 @@ config VIRT_CPU_ACCOUNTING
If in doubt, say Y here.
config SMP
- depends on PPC_STD_MMU || FSL_BOOKE
+ depends on PPC_BOOK3S || PPC_BOOK3E || FSL_BOOKE
bool "Symmetric multi-processing support"
---help---
This enables support for systems with more than one CPU. If you have
More information about the Linuxppc-dev
mailing list