[PATCH 2/2] powerpc/e6500: TLB miss handler with hardware tablewalk support
Benjamin Herrenschmidt
benh at kernel.crashing.org
Fri Sep 7 14:41:20 EST 2012
On Thu, 2012-06-14 at 18:41 -0500, Scott Wood wrote:
> There are a few things that make the existing hw tablewalk handlers
> unsuitable for e6500:
>
> - Indirect entries go in TLB1 (though the resulting direct entries go in
> TLB0).
>
> - It has threads, but no "tlbsrx." -- so we need a spinlock and
> a normal "tlbsx". Because we need this lock, hardware tablewalk
> is mandatory on e6500 unless we want to add spinlock+tlbsx to
> the normal bolted TLB miss handler.
>
> - TLB1 has no HES (nor next-victim hint) so we need software round robin
> (TODO: integrate this round robin data with hugetlb/KVM)
>
> - The existing tablewalk handlers map half of a page table at a time,
> because IBM hardware has a fixed 1MiB indirect page size. e6500
> has variable size indirect entries, with a minimum of 2MiB.
> So we can't do the half-page indirect mapping, and even if we
> could it would be less efficient than mapping the full page.
>
> - Like on e5500, the linear mapping is bolted, so we don't need the
> overhead of supporting nested tlb misses.
>
> Note that hardware tablewalk does not work in rev1 of e6500.
> We do not expect to support e6500 rev1 in mainline Linux.
>
> Signed-off-by: Scott Wood <scottwood at freescale.com>
> ---
> arch/powerpc/include/asm/mmu-book3e.h | 13 +++
> arch/powerpc/include/asm/mmu.h | 21 ++--
> arch/powerpc/include/asm/paca.h | 6 +
> arch/powerpc/kernel/asm-offsets.c | 10 ++
> arch/powerpc/kernel/paca.c | 5 +
> arch/powerpc/kernel/setup_64.c | 33 +++++++
> arch/powerpc/mm/fsl_booke_mmu.c | 8 ++
> arch/powerpc/mm/tlb_low_64e.S | 167 +++++++++++++++++++++++++++++++++
> arch/powerpc/mm/tlb_nohash.c | 109 ++++++++++++++++------
> 9 files changed, 335 insertions(+), 37 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/mmu-book3e.h b/arch/powerpc/include/asm/mmu-book3e.h
> index eeabcdb..3072aa0 100644
> --- a/arch/powerpc/include/asm/mmu-book3e.h
> +++ b/arch/powerpc/include/asm/mmu-book3e.h
> @@ -264,8 +264,21 @@ extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
> extern int mmu_linear_psize;
> extern int mmu_vmemmap_psize;
>
> +struct book3e_tlb_per_core {
> + /* For software way selection, as on Freescale TLB1 */
> + u8 esel_next, esel_max, esel_first;
> +
> + /* Per-core spinlock for e6500 TLB handlers (no tlbsrx.) */
> + u8 lock;
> +};
I'm no fan of the name ... tlb_core_data ? Probably don't even need the
book3e prefix really.
> #ifdef CONFIG_PPC64
> extern unsigned long linear_map_top;
> +extern int book3e_htw_mode;
> +
> +#define PPC_HTW_NONE 0
> +#define PPC_HTW_IBM 1
> +#define PPC_HTW_E6500 2
Sad :-( Wonder why we bother with an architecture, really ...
> /*
> * 64-bit booke platforms don't load the tlb in the tlb miss handler code.
> diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
> index a9e9ec6..63d97eb 100644
> --- a/arch/powerpc/include/asm/mmu.h
> +++ b/arch/powerpc/include/asm/mmu.h
> @@ -170,16 +170,17 @@ extern u64 ppc64_rma_size;
> #define MMU_PAGE_64K_AP 3 /* "Admixed pages" (hash64 only) */
> #define MMU_PAGE_256K 4
> #define MMU_PAGE_1M 5
> -#define MMU_PAGE_4M 6
> -#define MMU_PAGE_8M 7
> -#define MMU_PAGE_16M 8
> -#define MMU_PAGE_64M 9
> -#define MMU_PAGE_256M 10
> -#define MMU_PAGE_1G 11
> -#define MMU_PAGE_16G 12
> -#define MMU_PAGE_64G 13
> -
> -#define MMU_PAGE_COUNT 14
> +#define MMU_PAGE_2M 6
> +#define MMU_PAGE_4M 7
> +#define MMU_PAGE_8M 8
> +#define MMU_PAGE_16M 9
> +#define MMU_PAGE_64M 10
> +#define MMU_PAGE_256M 11
> +#define MMU_PAGE_1G 12
> +#define MMU_PAGE_16G 13
> +#define MMU_PAGE_64G 14
> +
> +#define MMU_PAGE_COUNT 15
Let's pray that won't hit a funny bug on server :-)
> #if defined(CONFIG_PPC_STD_MMU_64)
> /* 64-bit classic hash table MMU */
> diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
> index daf813f..4e18bb5 100644
> --- a/arch/powerpc/include/asm/paca.h
> +++ b/arch/powerpc/include/asm/paca.h
> @@ -108,6 +108,12 @@ struct paca_struct {
> /* Keep pgd in the same cacheline as the start of extlb */
> pgd_t *pgd __attribute__((aligned(0x80))); /* Current PGD */
> pgd_t *kernel_pgd; /* Kernel PGD */
> +
> + struct book3e_tlb_per_core tlb_per_core;
> +
> + /* Points to the tlb_per_core of the first thread on this core. */
> + struct book3e_tlb_per_core *tlb_per_core_ptr;
> +
That's gross. Can't you allocate them elsewhere and then populate the
PACA pointers ?
> /* We can have up to 3 levels of reentrancy in the TLB miss handler */
> u64 extlb[3][EX_TLB_SIZE / sizeof(u64)];
> u64 exmc[8]; /* used for machine checks */
> diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
> index 52c7ad7..61f4634 100644
> --- a/arch/powerpc/kernel/asm-offsets.c
> +++ b/arch/powerpc/kernel/asm-offsets.c
> @@ -168,6 +168,16 @@ int main(void)
> DEFINE(PACA_MC_STACK, offsetof(struct paca_struct, mc_kstack));
> DEFINE(PACA_CRIT_STACK, offsetof(struct paca_struct, crit_kstack));
> DEFINE(PACA_DBG_STACK, offsetof(struct paca_struct, dbg_kstack));
> + DEFINE(PACA_TLB_PER_CORE_PTR,
> + offsetof(struct paca_struct, tlb_per_core_ptr));
> +
> + DEFINE(PERCORE_TLB_ESEL_NEXT,
> + offsetof(struct book3e_tlb_per_core, esel_next));
> + DEFINE(PERCORE_TLB_ESEL_MAX,
> + offsetof(struct book3e_tlb_per_core, esel_max));
> + DEFINE(PERCORE_TLB_ESEL_FIRST,
> + offsetof(struct book3e_tlb_per_core, esel_first));
> + DEFINE(PERCORE_TLB_LOCK, offsetof(struct book3e_tlb_per_core, lock));
> #endif /* CONFIG_PPC_BOOK3E */
>
> #ifdef CONFIG_PPC_STD_MMU_64
> diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
> index fbe1a12..65abfc0 100644
> --- a/arch/powerpc/kernel/paca.c
> +++ b/arch/powerpc/kernel/paca.c
> @@ -145,6 +145,11 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu)
> #ifdef CONFIG_PPC_STD_MMU_64
> new_paca->slb_shadow_ptr = &slb_shadow[cpu];
> #endif /* CONFIG_PPC_STD_MMU_64 */
> +
> +#ifdef CONFIG_PPC_BOOK3E
> + /* For now -- if we have threads this will be adjusted later */
> + new_paca->tlb_per_core_ptr = &new_paca->tlb_per_core;
> +#endif
> }
>
> /* Put the paca pointer into r13 and SPRG_PACA */
> diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
> index 389bd4f..271b85d 100644
> --- a/arch/powerpc/kernel/setup_64.c
> +++ b/arch/powerpc/kernel/setup_64.c
> @@ -102,6 +102,37 @@ int ucache_bsize;
>
> static char *smt_enabled_cmdline;
>
> +#ifdef CONFIG_PPC_BOOK3E
> +static void setup_tlb_per_core(void)
> +{
> + int cpu;
> +
> + for_each_possible_cpu(cpu) {
> + int first = cpu_first_thread_sibling(cpu);
> +
> + paca[cpu].tlb_per_core_ptr = &paca[first].tlb_per_core;
> +
> + /*
> + * If we have threads, we need either tlbsrx.
> + * or e6500 tablewalk mode, or else TLB handlers
> + * will be racy and could produce duplicate entries.
> + */
> + if (smt_enabled_at_boot >= 2 &&
> + !mmu_has_feature(MMU_FTR_USE_TLBRSRV) &&
> + book3e_htw_mode != PPC_HTW_E6500) {
> + /* Should we panic instead? */
> + WARN_ONCE("%s: unsupported MMU configuration -- expect problems\n",
> + __func__);
> + }
> + }
> +}
> +#else
> +static void setup_tlb_per_core(void)
> +{
> +}
> +#endif
> +
> +
> /* Look for ibm,smt-enabled OF option */
> static void check_smt_enabled(void)
> {
> @@ -142,6 +173,8 @@ static void check_smt_enabled(void)
> of_node_put(dn);
> }
> }
> +
> + setup_tlb_per_core();
> }
I'd rather you move that to the caller
> /* Look for smt-enabled= cmdline option */
> diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c
> index 07ba45b..bf06d36b 100644
> --- a/arch/powerpc/mm/fsl_booke_mmu.c
> +++ b/arch/powerpc/mm/fsl_booke_mmu.c
> @@ -52,6 +52,7 @@
> #include <asm/smp.h>
> #include <asm/machdep.h>
> #include <asm/setup.h>
> +#include <asm/paca.h>
>
> #include "mmu_decl.h"
>
> @@ -192,6 +193,13 @@ unsigned long map_mem_in_cams(unsigned long ram, int max_cam_idx)
> }
> tlbcam_index = i;
>
> +#ifdef CONFIG_PPC64
> + get_paca()->tlb_per_core.esel_next = i;
> + get_paca()->tlb_per_core.esel_max =
> + mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY;
> + get_paca()->tlb_per_core.esel_first = i;
> +#endif
> +
> return amount_mapped;
> }
>
> diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/tlb_low_64e.S
> index efe0f33..8e82772 100644
> --- a/arch/powerpc/mm/tlb_low_64e.S
> +++ b/arch/powerpc/mm/tlb_low_64e.S
> @@ -232,6 +232,173 @@ itlb_miss_fault_bolted:
> beq tlb_miss_common_bolted
> b itlb_miss_kernel_bolted
>
> +/*
> + * TLB miss handling for e6500 and derivatives, using hardware tablewalk.
> + *
> + * Linear mapping is bolted: no virtual page table or nested TLB misses
> + * Indirect entries in TLB1, hardware loads resulting direct entries
> + * into TLB0
> + * No HES or NV hint on TLB1, so we need to do software round-robin
> + * No tlbsrx. so we need a spinlock, and we have to deal
> + * with MAS-damage caused by tlbsx
Ouch ... so for every indirect entry you have to take a lock, backup the
MAS, do a tlbsx, restore the MAS, insert the entry and drop the lock ?
After all that, do you have some bullets left for the HW designers ?
Remind me to also shoot myself for allowing tlbsrx. and HES to be
optional in MAV2 :-(
> + * 4K pages only
> + */
> +
> + START_EXCEPTION(instruction_tlb_miss_e6500)
> + tlb_prolog_bolted SPRN_SRR0
> +
> + ld r11,PACA_TLB_PER_CORE_PTR(r13)
> + srdi. r15,r16,60 /* get region */
> + ori r16,r16,1
> +
> + TLB_MISS_STATS_SAVE_INFO_BOLTED
> + bne tlb_miss_kernel_e6500 /* user/kernel test */
> +
> + b tlb_miss_common_e6500
> +
> + START_EXCEPTION(data_tlb_miss_e6500)
> + tlb_prolog_bolted SPRN_DEAR
> +
> + ld r11,PACA_TLB_PER_CORE_PTR(r13)
> + srdi. r15,r16,60 /* get region */
> + rldicr r16,r16,0,62
> +
> + TLB_MISS_STATS_SAVE_INFO_BOLTED
> + bne tlb_miss_kernel_e6500 /* user vs kernel check */
> +
> +/*
> + * This is the guts of the TLB miss handler for e6500 and derivatives.
> + * We are entered with:
> + *
> + * r16 = page of faulting address (low bit 0 if data, 1 if instruction)
> + * r15 = crap (free to use)
> + * r14 = page table base
> + * r13 = PACA
> + * r11 = tlb_per_core ptr
> + * r10 = crap (free to use)
> + */
> +tlb_miss_common_e6500:
> + /*
> + * Search if we already have an indirect entry for that virtual
> + * address, and if we do, bail out.
> + *
> + * MAS6:IND should be already set based on MAS4
> + */
> + addi r10,r11,PERCORE_TLB_LOCK
> +1: lbarx r15,0,r10
> + cmpdi r15,0
> + bne 2f
> + li r15,1
> + stbcx. r15,0,r10
No need for barriers here ?
> + bne 1b
> + .subsection 1
> +2: lbz r15,0(r10)
> + cmpdi r15,0
> + bne 2b
> + b 1b
> + .previous
> +
> + mfspr r15,SPRN_MAS2
> +
> + tlbsx 0,r16
> + mfspr r10,SPRN_MAS1
> + andis. r10,r10,MAS1_VALID at h
> + bne tlb_miss_done_e6500
> +
> + /* Undo MAS-damage from the tlbsx */
> + mfspr r10,SPRN_MAS1
> + oris r10,r10,MAS1_VALID at h
> + mtspr SPRN_MAS1,r10
> + mtspr SPRN_MAS2,r15
> +
> + /* Now, we need to walk the page tables. First check if we are in
> + * range.
> + */
> + rldicl. r10,r16,64-PGTABLE_EADDR_SIZE,PGTABLE_EADDR_SIZE+4
> + bne- tlb_miss_fault_e6500
> +
> + rldicl r15,r16,64-PGDIR_SHIFT+3,64-PGD_INDEX_SIZE-3
> + cmpldi cr0,r14,0
> + clrrdi r15,r15,3
> + beq- tlb_miss_fault_e6500 /* No PGDIR, bail */
> + ldx r14,r14,r15 /* grab pgd entry */
> +
> + rldicl r15,r16,64-PUD_SHIFT+3,64-PUD_INDEX_SIZE-3
> + clrrdi r15,r15,3
> + cmpdi cr0,r14,0
> + bge tlb_miss_fault_e6500 /* Bad pgd entry or hugepage; bail */
> + ldx r14,r14,r15 /* grab pud entry */
> +
> + rldicl r15,r16,64-PMD_SHIFT+3,64-PMD_INDEX_SIZE-3
> + clrrdi r15,r15,3
> + cmpdi cr0,r14,0
> + bge tlb_miss_fault_e6500
> + ldx r14,r14,r15 /* Grab pmd entry */
> +
> + mfspr r10,SPRN_MAS0
> + cmpdi cr0,r14,0
> + bge tlb_miss_fault_e6500
> +
> + /* Now we build the MAS for a 2M indirect page:
> + *
> + * MAS 0 : ESEL needs to be filled by software round-robin
> + * MAS 1 : Almost fully setup
> + * - PID already updated by caller if necessary
> + * - TSIZE for now is base ind page size always
> + * MAS 2 : Use defaults
> + * MAS 3+7 : Needs to be done
> + */
> +
> + ori r14,r14,(BOOK3E_PAGESZ_4K << MAS3_SPSIZE_SHIFT)
> + mtspr SPRN_MAS7_MAS3,r14
> +
> + lbz r15,PERCORE_TLB_ESEL_NEXT(r11)
> + lbz r16,PERCORE_TLB_ESEL_MAX(r11)
> + lbz r14,PERCORE_TLB_ESEL_FIRST(r11)
> + rlwimi r10,r15,16,0x00ff0000 /* insert esel_next into MAS0 */
> + addi r15,r15,1 /* increment esel_next */
> + mtspr SPRN_MAS0,r10
> + cmpw r15,r16
> + iseleq r15,r14,r15 /* if next == last use first */
> + stb r15,PERCORE_TLB_ESEL_NEXT(r11)
> +
> + tlbwe
> +
> +tlb_miss_done_e6500:
> + .macro tlb_unlock_e6500
> + li r15,0
> + isync
> + stb r15,PERCORE_TLB_LOCK(r11)
> + .endm
> +
> + tlb_unlock_e6500
> + TLB_MISS_STATS_X(MMSTAT_TLB_MISS_NORM_OK)
> + tlb_epilog_bolted
> + rfi
> +
> +tlb_miss_kernel_e6500:
> + mfspr r10,SPRN_MAS1
> + ld r14,PACA_KERNELPGD(r13)
> + cmpldi cr0,r15,8 /* Check for vmalloc region */
> + rlwinm r10,r10,0,16,1 /* Clear TID */
> + mtspr SPRN_MAS1,r10
> + beq+ tlb_miss_common_e6500
> +
> +tlb_miss_fault_e6500:
> + tlb_unlock_e6500
> + /* We need to check if it was an instruction miss */
> + andi. r16,r16,1
> + bne itlb_miss_fault_e6500
> +dtlb_miss_fault_e6500:
> + TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT)
> + tlb_epilog_bolted
> + b exc_data_storage_book3e
> +itlb_miss_fault_e6500:
> + TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT)
> + tlb_epilog_bolted
> + b exc_instruction_storage_book3e
> +
> +
> /**********************************************************************
> * *
> * TLB miss handling for Book3E with TLB reservation and HES support *
> diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c
> index df32a83..2f09ddf 100644
> --- a/arch/powerpc/mm/tlb_nohash.c
> +++ b/arch/powerpc/mm/tlb_nohash.c
> @@ -43,6 +43,7 @@
> #include <asm/tlb.h>
> #include <asm/code-patching.h>
> #include <asm/hugetlb.h>
> +#include <asm/paca.h>
>
> #include "mmu_decl.h"
>
> @@ -58,6 +59,10 @@ struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = {
> .shift = 12,
> .enc = BOOK3E_PAGESZ_4K,
> },
> + [MMU_PAGE_2M] = {
> + .shift = 21,
> + .enc = BOOK3E_PAGESZ_2M,
> + },
> [MMU_PAGE_4M] = {
> .shift = 22,
> .enc = BOOK3E_PAGESZ_4M,
> @@ -136,7 +141,7 @@ static inline int mmu_get_tsize(int psize)
> int mmu_linear_psize; /* Page size used for the linear mapping */
> int mmu_pte_psize; /* Page size used for PTE pages */
> int mmu_vmemmap_psize; /* Page size used for the virtual mem map */
> -int book3e_htw_enabled; /* Is HW tablewalk enabled ? */
> +int book3e_htw_mode; /* HW tablewalk? Value is PPC_HTW_* */
> unsigned long linear_map_top; /* Top of linear mapping */
>
> #endif /* CONFIG_PPC64 */
> @@ -377,7 +382,7 @@ void tlb_flush_pgtable(struct mmu_gather *tlb, unsigned long address)
> {
> int tsize = mmu_psize_defs[mmu_pte_psize].enc;
>
> - if (book3e_htw_enabled) {
> + if (book3e_htw_mode) {
Make it if (boot3e_htw_enabled != PPC_HTW_NONE)
> unsigned long start = address & PMD_MASK;
> unsigned long end = address + PMD_SIZE;
> unsigned long size = 1UL << mmu_psize_defs[mmu_pte_psize].shift;
> @@ -413,10 +418,10 @@ static void setup_page_sizes(void)
> int i, psize;
>
> #ifdef CONFIG_PPC_FSL_BOOK3E
> + int fsl_mmu = mmu_has_feature(MMU_FTR_TYPE_FSL_E);
> unsigned int mmucfg = mfspr(SPRN_MMUCFG);
>
> - if (((mmucfg & MMUCFG_MAVN) == MMUCFG_MAVN_V1) &&
> - (mmu_has_feature(MMU_FTR_TYPE_FSL_E))) {
> + if (fsl_mmu && (mmucfg & MMUCFG_MAVN) == MMUCFG_MAVN_V1) {
> unsigned int tlb1cfg = mfspr(SPRN_TLB1CFG);
> unsigned int min_pg, max_pg;
>
> @@ -430,7 +435,7 @@ static void setup_page_sizes(void)
> def = &mmu_psize_defs[psize];
> shift = def->shift;
>
> - if (shift == 0)
> + if (shift == 0 || shift & 1)
> continue;
>
> /* adjust to be in terms of 4^shift Kb */
> @@ -440,7 +445,40 @@ static void setup_page_sizes(void)
> def->flags |= MMU_PAGE_SIZE_DIRECT;
> }
>
> - goto no_indirect;
> + goto out;
> + }
> +
> + if (fsl_mmu && (mmucfg & MMUCFG_MAVN) == MMUCFG_MAVN_V2) {
> + u32 tlb1cfg, tlb1ps;
> +
> + tlb0cfg = mfspr(SPRN_TLB0CFG);
> + tlb1cfg = mfspr(SPRN_TLB1CFG);
> + tlb1ps = mfspr(SPRN_TLB1PS);
> + eptcfg = mfspr(SPRN_EPTCFG);
> +
> + if ((tlb1cfg & TLBnCFG_IND) && (tlb0cfg & TLBnCFG_PT))
> + book3e_htw_mode = PPC_HTW_E6500;
> +
> + /*
> + * We expect 4K subpage size and unrestricted indirect size.
> + * The lack of a restriction on indirect size is a Freescale
> + * extension, indicated by PSn = 0 but SPSn != 0.
> + */
> + if (eptcfg != 2)
> + book3e_htw_mode = PPC_HTW_NONE;
> +
> + for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
> + struct mmu_psize_def *def = &mmu_psize_defs[psize];
> +
> + if (tlb1ps & (1U << (def->shift - 10))) {
> + def->flags |= MMU_PAGE_SIZE_DIRECT;
> +
> + if (book3e_htw_mode && psize == MMU_PAGE_2M)
> + def->flags |= MMU_PAGE_SIZE_INDIRECT;
> + }
> + }
> +
> + goto out;
> }
> #endif
>
> @@ -457,8 +495,11 @@ static void setup_page_sizes(void)
> }
>
> /* Indirect page sizes supported ? */
> - if ((tlb0cfg & TLBnCFG_IND) == 0)
> - goto no_indirect;
> + if ((tlb0cfg & TLBnCFG_IND) == 0 ||
> + (tlb0cfg & TLBnCFG_PT) == 0)
> + goto out;
> +
> + book3e_htw_mode = PPC_HTW_IBM;
>
> /* Now, we only deal with one IND page size for each
> * direct size. Hopefully all implementations today are
> @@ -483,8 +524,8 @@ static void setup_page_sizes(void)
> def->ind = ps + 10;
> }
> }
> - no_indirect:
>
> +out:
> /* Cleanup array and print summary */
> pr_info("MMU: Supported page sizes\n");
> for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
> @@ -525,23 +566,23 @@ static void __patch_exception(int exc, unsigned long addr)
>
> static void setup_mmu_htw(void)
> {
> - /* Check if HW tablewalk is present, and if yes, enable it by:
> - *
> - * - patching the TLB miss handlers to branch to the
> - * one dedicates to it
> - *
> - * - setting the global book3e_htw_enabled
> - */
> - unsigned int tlb0cfg = mfspr(SPRN_TLB0CFG);
> + /*
> + * If we want to use HW tablewalk, enable it by patching the TLB miss
> + * handlers to branch to the one dedicated to it.
> + */
>
> - if ((tlb0cfg & TLBnCFG_IND) &&
> - (tlb0cfg & TLBnCFG_PT)) {
> + switch (book3e_htw_mode) {
> + case PPC_HTW_IBM:
> patch_exception(0x1c0, exc_data_tlb_miss_htw_book3e);
> patch_exception(0x1e0, exc_instruction_tlb_miss_htw_book3e);
> - book3e_htw_enabled = 1;
> + break;
> + case PPC_HTW_E6500:
> + patch_exception(0x1c0, exc_data_tlb_miss_e6500_book3e);
> + patch_exception(0x1e0, exc_instruction_tlb_miss_e6500_book3e);
> + break;
> }
> pr_info("MMU: Book3E HW tablewalk %s\n",
> - book3e_htw_enabled ? "enabled" : "not supported");
> + book3e_htw_mode ? "enabled" : "not supported");
> }
>
> /*
> @@ -581,8 +622,16 @@ static void __early_init_mmu(int boot_cpu)
> /* Set MAS4 based on page table setting */
>
> mas4 = 0x4 << MAS4_WIMGED_SHIFT;
> - if (book3e_htw_enabled) {
> - mas4 |= mas4 | MAS4_INDD;
> + switch (book3e_htw_mode) {
> + case PPC_HTW_E6500:
> + mas4 |= MAS4_INDD;
> + mas4 |= BOOK3E_PAGESZ_2M << MAS4_TSIZED_SHIFT;
> + mas4 |= MAS4_TLBSELD(1);
> + mmu_pte_psize = MMU_PAGE_2M;
> + break;
> +
> + case PPC_HTW_IBM:
> + mas4 |= MAS4_INDD;
> #ifdef CONFIG_PPC_64K_PAGES
> mas4 |= BOOK3E_PAGESZ_256M << MAS4_TSIZED_SHIFT;
> mmu_pte_psize = MMU_PAGE_256M;
> @@ -590,13 +639,16 @@ static void __early_init_mmu(int boot_cpu)
> mas4 |= BOOK3E_PAGESZ_1M << MAS4_TSIZED_SHIFT;
> mmu_pte_psize = MMU_PAGE_1M;
> #endif
> - } else {
> + break;
> +
> + case PPC_HTW_NONE:
> #ifdef CONFIG_PPC_64K_PAGES
> mas4 |= BOOK3E_PAGESZ_64K << MAS4_TSIZED_SHIFT;
> #else
> mas4 |= BOOK3E_PAGESZ_4K << MAS4_TSIZED_SHIFT;
> #endif
> mmu_pte_psize = mmu_virtual_psize;
> + break;
> }
> mtspr(SPRN_MAS4, mas4);
>
> @@ -616,8 +668,11 @@ static void __early_init_mmu(int boot_cpu)
> /* limit memory so we dont have linear faults */
> memblock_enforce_memory_limit(linear_map_top);
>
> - patch_exception(0x1c0, exc_data_tlb_miss_bolted_book3e);
> - patch_exception(0x1e0, exc_instruction_tlb_miss_bolted_book3e);
> + if (book3e_htw_mode == PPC_HTW_NONE) {
> + patch_exception(0x1c0, exc_data_tlb_miss_bolted_book3e);
> + patch_exception(0x1e0,
> + exc_instruction_tlb_miss_bolted_book3e);
> + }
> }
> #endif
>
Ben.
More information about the Linuxppc-dev
mailing list