[RFC NO-MERGE 2/2] arch/powerpc/CAS: Update to new option-vector-5 format for CAS
Suraj Jitindar Singh
sjitindarsingh at gmail.com
Fri Feb 24 14:46:53 AEDT 2017
On Thu, 2017-02-23 at 15:44 +1100, Paul Mackerras wrote:
> On Tue, Feb 21, 2017 at 05:06:11PM +1100, Suraj Jitindar Singh wrote:
> >
> > The CAS process has been updated to change how the host to guest
> Once again, explain CAS; perhaps "The ibm,client-architecture-support
> (CAS) negotiation process has been updated for POWER9 to ..."
>
> >
> > negotiation is done for the new hash/radix mmu as well as the nest
> > mmu,
> > process tables and guest translation shootdown (GTSE).
> >
> > The host tells the guest which options it supports in
> > ibm,arch-vec-5-platform-support. The guest then chooses a subset of
> > these
> > to request in the CAS call and these are agreed to in the
> > ibm,architecture-vec-5 property of the chosen node.
> >
> > Thus we read ibm,arch-vec-5-platform-support and make our selection
> > before
> > calling CAS. We then parse the ibm,architecture-vec-5 property of
> > the
> > chosen node to check whether we should run as hash or radix.
> >
> > Signed-off-by: Suraj Jitindar Singh <sjitindarsingh at gmail.com>
> > ---
> > arch/powerpc/include/asm/prom.h | 16 ++++---
> > arch/powerpc/kernel/prom_init.c | 99
> > +++++++++++++++++++++++++++++++++++++++--
> > arch/powerpc/mm/init_64.c | 31 ++++++++++---
> > 3 files changed, 130 insertions(+), 16 deletions(-)
> >
> > diff --git a/arch/powerpc/include/asm/prom.h
> > b/arch/powerpc/include/asm/prom.h
> > index 8af2546..19d2e84 100644
> > --- a/arch/powerpc/include/asm/prom.h
> > +++ b/arch/powerpc/include/asm/prom.h
> > @@ -158,12 +158,16 @@ struct of_drconf_cell {
> > #define OV5_PFO_HW_ENCR 0x1120 /* PFO
> > Encryption Accelerator */
> > #define OV5_SUB_PROCESSORS 0x1501 /* 1,2,or 4 Sub-
> > Processors supported */
> > #define OV5_XIVE_EXPLOIT 0x1701 /* XIVE exploitation
> > supported */
> > -#define OV5_MMU_RADIX_300 0x1880 /* ISA v3.00 radix
> > MMU supported */
> > -#define OV5_MMU_HASH_300 0x1840 /* ISA v3.00 hash
> > MMU supported */
> > -#define OV5_MMU_SEGM_RADIX 0x1820 /* radix mode (no
> > segmentation) */
> > -#define OV5_MMU_PROC_TBL 0x1810 /* hcall selects SLB
> > or proc table */
> > -#define OV5_MMU_SLB 0x1800 /* always use SLB
> > */
> > -#define OV5_MMU_GTSE 0x1808 /* Guest
> > translation shootdown */
> > +/* MMU Base Architecture */
> > +#define OV5_MMU_HASH_300 0x1800 /* ISA v3.00 Hash
> > MMU Only */
> This is actually legacy HPT as well as ISA v3.00 HPT.
True
>
> >
> > +#define OV5_MMU_RADIX_300 0x1840 /* ISA v3.00 Radix
> > MMU Only */
> > +#define OV5_MMU_EITHER_300 0x1880 /* ISA v3.00 Hash
> > or Radix Supported */
> I wonder if it would work better to have a define for the 2-bit field
> with subsidiary definitions for the field values. Something like
>
> #define OV5_MMU_SELECTION 0x18c0
> #define OV5_MMU_HPT 0x00
> #define OV5_MMU_RADIX 0x40
> #define OV5_MMU_EITHER 0x80
Yep that's clearer
>
> >
> > +#define OV5_NMMU 0x1820 /* Nest MMU
> > Available */
> > +/* Hash Table Extensions */
> > +#define OV5_HASH_SEG_TBL 0x1980 /* In Memory Segment
> > Tables Available */
> > +#define OV5_HASH_GTSE 0x1940 /* Guest
> > Translation Shoot Down Avail */
> > +/* Radix Table Extensions */
> > +#define OV5_RADIX_GTSE 0x1A40 /* Guest
> > Translation Shoot Down Avail */
> >
> > /* Option Vector 6: IBM PAPR hints */
> > #define OV6_LINUX 0x02 /* Linux is our OS */
> > diff --git a/arch/powerpc/kernel/prom_init.c
> > b/arch/powerpc/kernel/prom_init.c
> > index 37b5a29..8272104 100644
> > --- a/arch/powerpc/kernel/prom_init.c
> > +++ b/arch/powerpc/kernel/prom_init.c
> > @@ -168,6 +168,8 @@ static unsigned long __initdata
> > prom_tce_alloc_start;
> > static unsigned long __initdata prom_tce_alloc_end;
> > #endif
> >
> > +static bool __initdata prom_radix_disable;
> > +
> > /* Platforms codes are now obsolete in the kernel. Now only used
> > within this
> > * file and ultimately gone too. Feel free to change them if you
> > need, they
> > * are not shared with anything outside of this file anymore
> > @@ -626,6 +628,12 @@ static void __init early_cmdline_parse(void)
> > prom_memory_limit = ALIGN(prom_memory_limit,
> > 0x1000000);
> > #endif
> > }
> > +
> > + opt = strstr(prom_cmd_line, "disable_radix");
> > + if (opt) {
> > + prom_debug("Radix disabled from cmdline\n");
> > + prom_radix_disable = true;
> > + }
> > }
> >
> > #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
> > @@ -693,8 +701,10 @@ struct option_vector5 {
> > __be16 reserved3;
> > u8 subprocessors;
> > u8 byte22;
> > - u8 intarch;
> > + u8 xive;
> > u8 mmu;
> > + u8 hash_ext;
> > + u8 radix_ext;
> > } __packed;
> >
> > struct option_vector6 {
> > @@ -849,9 +859,10 @@ struct ibm_arch_vec __cacheline_aligned
> > ibm_architecture_vec = {
> > .reserved2 = 0,
> > .reserved3 = 0,
> > .subprocessors = 1,
> > - .intarch = 0,
> > - .mmu = OV5_FEAT(OV5_MMU_RADIX_300) |
> > OV5_FEAT(OV5_MMU_HASH_300) |
> > - OV5_FEAT(OV5_MMU_PROC_TBL) |
> > OV5_FEAT(OV5_MMU_GTSE),
> > + .xive = 0,
> > + .mmu = 0,
> > + .hash_ext = 0,
> > + .radix_ext = 0,
> > },
> >
> > /* option vector 6: IBM PAPR hints */
> > @@ -990,6 +1001,83 @@ static int __init
> > prom_count_smt_threads(void)
> >
> > }
> >
> > +static void __init prom_check_platform_support(void)
> This function gets quite deeply nested. Can you split out some of it
> into a subroutine to make it look cleaner?
This did occur to me, will do.
>
> >
> > +{
> > + int prop_len, i;
> > + bool radix_gtse = false, radix_mmu = false, hash_mmu =
> > false;
> > +
> > + prop_len = prom_getproplen(prom.chosen,
> > + "ibm,arch-vec-5-platform-
> > support");
> > + if (prop_len > 1) {
> > + u8 val[prop_len];
> > + prom_debug("Found ibm,arch-vec-5-platform-support,
> > len: %d\n",
> > + prop_len);
> > + prom_getprop(prom.chosen, "ibm,arch-vec-5-
> > platform-support",
> > + &val, sizeof(val));
> > + for (i = 0; i < prop_len; i += 2) {
> > + u8 b1 = val[i], b2 = val[i+1];
> > + prom_debug("%d: nbyte = %x, val = %x\n",
> > i/2, b1, b2);
> > + switch (b1) {
> > + case OV5_INDX(OV5_XIVE_EXPLOIT):
> > + /* Unimplemented - Ignore */
> > + break;
> > + case OV5_INDX(OV5_MMU_EITHER_300):
> > + if (b2 ==
> > OV5_FEAT(OV5_MMU_EITHER_300)) {
> Here you're trying to check what's in the top 2 bits, so you should
> mask b2 before doing the comparisons, in case other bits (such as the
> nest MMU bit) are set. Future versions of PAPR might define meanings
> for some of other bits, and you don't want all these comparisons to
> stop working then.
True
>
> >
> > + /* Either Available */
> > + prom_debug("MMU - either
> > supported\n");
> > + radix_mmu =
> > !prom_radix_disable;
> > + hash_mmu = true;
> > + } else if (b2 ==
> > OV5_FEAT(OV5_MMU_RADIX_300)) {
> > + /* Radix Only */
> > + prom_debug("MMU - radix
> > only\n");
> > + if (prom_radix_disable) {
> > + prom_printf("WARNI
> > NG: Ignoring "
> > + "cmdli
> > ne option \""
> > + "disab
> > le_radix\"\n"
> You're allowed to have long lines for string constants; that's better
> than splitting them up like this, because splitting them makes it
> much
> harder to grep for the string.
Ok
>
> >
> > + );
> > + }
> > + radix_mmu = true;
> > + } else if (b2 ==
> > OV5_FEAT(OV5_MMU_HASH_300)) {
> > + /* Hash Only */
> > + prom_debug("MMU - hash
> > only\n");
> > + hash_mmu = true;
> > + }
> > + break;
> > + case OV5_INDX(OV5_HASH_SEG_TBL):
> > + /* Unimplemented - Ignore */
> > + break;
> > + case OV5_INDX(OV5_RADIX_GTSE):
> > + if (val[i+1] &
> > OV5_FEAT(OV5_RADIX_GTSE)) {
> Why val[i+1] not b2?
Woops
>
> >
> > + prom_debug("GTSE
> > supported\n");
> > + radix_gtse = true;
> > + }
> > + break;
> > + default:
> > + prom_printf("WARNING: Invalid byte
> > index <%d>"
> > + " in ibm,arch-vec-5-
> > platform-"
> > + "support\n", val[i]);
> I don't think a warning here is appropriate; there could legitimately
> be other values here in future.
Good point, I guess it's not really important if there's anything else
in the vector.
>
> >
> > + break;
> > + }
> > + }
> > + } else {
> > + prom_printf("WARNING: Invalid len <%d> of
> > ibm,arch-vec-5"
> > + "-platform-support\n", prop_len);
> This will be printed on all current machines, won't it? I don't
> think
> a warning is appropriate if the property just doesn't exist.
>
Good point. But if it doesn't exist what should we assume?
Atleast legacy hash support?
> >
> > + }
> > +
> > + if (radix_mmu && radix_gtse) {
> > + /* Radix preferred - but we require GTSE for now
> > */
> > + prom_debug("Asking for radix with GTSE\n");
> > + ibm_architecture_vec.vec5.mmu =
> > OV5_FEAT(OV5_MMU_RADIX_300);
> > + ibm_architecture_vec.vec5.radix_ext =
> > OV5_FEAT(OV5_RADIX_GTSE);
> > + } else if (hash_mmu) {
> > + /* Default to hash mmu (if we can) */
> > + prom_debug("Asking for hash\n");
> > + ibm_architecture_vec.vec5.mmu =
> > OV5_FEAT(OV5_MMU_HASH_300);
> > + } else {
> > + /* Unsupported Configuration */
> > + prom_printf("WARNING: unsupported host platform
> > MMU config\n");
> Won't we get this on all current machines, i.e. if the property
> doesn't exist? We shouldn't print warnings just because we're
> running
> on an old machine (think about a POWER6 partition under PowerVM).
Should we just return an empty (for the new bits atleast) vector then?
>
> >
> > + }
> > +}
> >
> > static void __init prom_send_capabilities(void)
> > {
> > @@ -997,6 +1085,9 @@ static void __init
> > prom_send_capabilities(void)
> > prom_arg_t ret;
> > u32 cores;
> >
> > + /* Check ibm,arch-vec-5-platform-support and fixup vec5 if
> > required */
> > + prom_check_platform_support();
> > +
> > root = call_prom("open", 1, 1, ADDR("/"));
> > if (root != 0) {
> > /* We need to tell the FW about the number of
> > cores we support.
> > diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
> > index 6aa3b76..17262da 100644
> > --- a/arch/powerpc/mm/init_64.c
> > +++ b/arch/powerpc/mm/init_64.c
> > @@ -359,15 +359,34 @@ static void early_check_vec5(void)
> >
> > root = of_get_flat_dt_root();
> > chosen = of_get_flat_dt_subnode_by_name(root, "chosen");
> > - if (chosen == -FDT_ERR_NOTFOUND)
> > + if (chosen == -FDT_ERR_NOTFOUND) {
> > + cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
> > return;
> > + }
> > vec5 = of_get_flat_dt_prop(chosen, "ibm,architecture-vec-
> > 5", &size);
> > - if (!vec5)
> > + if (!vec5) {
> > + cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
> > return;
> > - if (size <= OV5_INDX(OV5_MMU_RADIX_300) ||
> > - !(vec5[OV5_INDX(OV5_MMU_RADIX_300)] &
> > OV5_FEAT(OV5_MMU_RADIX_300)))
> > - /* Hypervisor doesn't support radix */
> > + }
> > + if (size <= OV5_INDX(OV5_MMU_RADIX_300)) {
> > cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
> > + return;
> > + }
> > +
> > + /* Check for supported configuration */
> > + if (vec5[OV5_INDX(OV5_MMU_EITHER_300)] ==
> > OV5_FEAT(OV5_MMU_RADIX_300)) {
> Once again you need to look at just the top 2 bits, not the whole
> byte.
Yep
>
> >
> > + /* Hypervisor only supports radix - check enabled
> > && GTSE */
> > + WARN(!early_radix_enabled() ||
> I would not be at all surprised if a WARN at this early stage won't
> be
> handled properly and will just lead to a silent boot hang with no
> output. Just do a printk or pr_warn.
This printed fine for me, but I can use a printk instead.
>
> >
> > + !(vec5[OV5_INDX(OV5_RADIX_GTSE)] &
> > + OV5_FEAT(OV5_RADIX_GTSE)),
> > + "WARNING: Unsupported MMU configuration in
> > vec5\n");
> > + /* Do radix anyway - the hypervisor said we had to
> > */
> > + cur_cpu_spec->mmu_features |= MMU_FTR_TYPE_RADIX;
> > + } else if (vec5[OV5_INDX(OV5_MMU_EITHER_300)] ==
> > + OV5_FEAT(OV5_MMU_HASH_300)) {
> > + /* Hypervisor only supports hash - disable radix
> > */
> > + cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
> > + }
> > }
> >
> > void __init mmu_early_init_devtree(void)
> > @@ -383,7 +402,7 @@ void __init mmu_early_init_devtree(void)
> > * even though the ibm,architecture-vec-5 property created
> > by
> > * skiboot doesn't have the necessary bits set.
> > */
> > - if (early_radix_enabled() && !(mfmsr() & MSR_HV))
> > + if (!(mfmsr() & MSR_HV))
> > early_check_vec5();
> >
> > if (early_radix_enabled())
More information about the Linuxppc-dev
mailing list