[RFC NO-MERGE 2/2] arch/powerpc/CAS: Update to new option-vector-5 format for CAS

Suraj Jitindar Singh sjitindarsingh at gmail.com
Fri Feb 24 14:46:53 AEDT 2017


On Thu, 2017-02-23 at 15:44 +1100, Paul Mackerras wrote:
> On Tue, Feb 21, 2017 at 05:06:11PM +1100, Suraj Jitindar Singh wrote:
> > 
> > The CAS process has been updated to change how the host to guest
> Once again, explain CAS; perhaps "The ibm,client-architecture-support
> (CAS) negotiation process has been updated for POWER9 to ..."
> 
> > 
> > negotiation is done for the new hash/radix mmu as well as the nest
> > mmu,
> > process tables and guest translation shootdown (GTSE).
> > 
> > The host tells the guest which options it supports in
> > ibm,arch-vec-5-platform-support. The guest then chooses a subset of
> > these
> > to request in the CAS call and these are agreed to in the
> > ibm,architecture-vec-5 property of the chosen node.
> > 
> > Thus we read ibm,arch-vec-5-platform-support and make our selection
> > before
> > calling CAS. We then parse the ibm,architecture-vec-5 property of
> > the
> > chosen node to check whether we should run as hash or radix.
> > 
> > Signed-off-by: Suraj Jitindar Singh <sjitindarsingh at gmail.com>
> > ---
> >  arch/powerpc/include/asm/prom.h | 16 ++++---
> >  arch/powerpc/kernel/prom_init.c | 99
> > +++++++++++++++++++++++++++++++++++++++--
> >  arch/powerpc/mm/init_64.c       | 31 ++++++++++---
> >  3 files changed, 130 insertions(+), 16 deletions(-)
> > 
> > diff --git a/arch/powerpc/include/asm/prom.h
> > b/arch/powerpc/include/asm/prom.h
> > index 8af2546..19d2e84 100644
> > --- a/arch/powerpc/include/asm/prom.h
> > +++ b/arch/powerpc/include/asm/prom.h
> > @@ -158,12 +158,16 @@ struct of_drconf_cell {
> >  #define OV5_PFO_HW_ENCR		0x1120	/* PFO
> > Encryption Accelerator */
> >  #define OV5_SUB_PROCESSORS	0x1501	/* 1,2,or 4 Sub-
> > Processors supported */
> >  #define OV5_XIVE_EXPLOIT	0x1701	/* XIVE exploitation
> > supported */
> > -#define OV5_MMU_RADIX_300	0x1880	/* ISA v3.00 radix
> > MMU supported */
> > -#define OV5_MMU_HASH_300	0x1840	/* ISA v3.00 hash
> > MMU supported */
> > -#define OV5_MMU_SEGM_RADIX	0x1820	/* radix mode (no
> > segmentation) */
> > -#define OV5_MMU_PROC_TBL	0x1810	/* hcall selects SLB
> > or proc table */
> > -#define OV5_MMU_SLB		0x1800	/* always use SLB
> > */
> > -#define OV5_MMU_GTSE		0x1808	/* Guest
> > translation shootdown */
> > +/* MMU Base Architecture */
> > +#define OV5_MMU_HASH_300	0x1800	/* ISA v3.00 Hash
> > MMU Only */
> This is actually legacy HPT as well as ISA v3.00 HPT.

True

> 
> > 
> > +#define OV5_MMU_RADIX_300	0x1840	/* ISA v3.00 Radix
> > MMU Only */
> > +#define OV5_MMU_EITHER_300	0x1880	/* ISA v3.00 Hash
> > or Radix Supported */
> I wonder if it would work better to have a define for the 2-bit field
> with subsidiary definitions for the field values.  Something like
> 
> #define OV5_MMU_SELECTION	0x18c0
> #define  OV5_MMU_HPT			0x00
> #define  OV5_MMU_RADIX			0x40
> #define  OV5_MMU_EITHER			0x80

Yep that's clearer

> 
> > 
> > +#define OV5_NMMU		0x1820	/* Nest MMU
> > Available */
> > +/* Hash Table Extensions */
> > +#define OV5_HASH_SEG_TBL	0x1980	/* In Memory Segment
> > Tables Available */
> > +#define OV5_HASH_GTSE		0x1940	/* Guest
> > Translation Shoot Down Avail */
> > +/* Radix Table Extensions */
> > +#define OV5_RADIX_GTSE		0x1A40	/* Guest
> > Translation Shoot Down Avail */
> >  
> >  /* Option Vector 6: IBM PAPR hints */
> >  #define OV6_LINUX		0x02	/* Linux is our OS */
> > diff --git a/arch/powerpc/kernel/prom_init.c
> > b/arch/powerpc/kernel/prom_init.c
> > index 37b5a29..8272104 100644
> > --- a/arch/powerpc/kernel/prom_init.c
> > +++ b/arch/powerpc/kernel/prom_init.c
> > @@ -168,6 +168,8 @@ static unsigned long __initdata
> > prom_tce_alloc_start;
> >  static unsigned long __initdata prom_tce_alloc_end;
> >  #endif
> >  
> > +static bool __initdata prom_radix_disable;
> > +
> >  /* Platforms codes are now obsolete in the kernel. Now only used
> > within this
> >   * file and ultimately gone too. Feel free to change them if you
> > need, they
> >   * are not shared with anything outside of this file anymore
> > @@ -626,6 +628,12 @@ static void __init early_cmdline_parse(void)
> >  		prom_memory_limit = ALIGN(prom_memory_limit,
> > 0x1000000);
> >  #endif
> >  	}
> > +
> > +	opt = strstr(prom_cmd_line, "disable_radix");
> > +	if (opt) {
> > +		prom_debug("Radix disabled from cmdline\n");
> > +		prom_radix_disable = true;
> > +	}
> >  }
> >  
> >  #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
> > @@ -693,8 +701,10 @@ struct option_vector5 {
> >  	__be16 reserved3;
> >  	u8 subprocessors;
> >  	u8 byte22;
> > -	u8 intarch;
> > +	u8 xive;
> >  	u8 mmu;
> > +	u8 hash_ext;
> > +	u8 radix_ext;
> >  } __packed;
> >  
> >  struct option_vector6 {
> > @@ -849,9 +859,10 @@ struct ibm_arch_vec __cacheline_aligned
> > ibm_architecture_vec = {
> >  		.reserved2 = 0,
> >  		.reserved3 = 0,
> >  		.subprocessors = 1,
> > -		.intarch = 0,
> > -		.mmu = OV5_FEAT(OV5_MMU_RADIX_300) |
> > OV5_FEAT(OV5_MMU_HASH_300) |
> > -			OV5_FEAT(OV5_MMU_PROC_TBL) |
> > OV5_FEAT(OV5_MMU_GTSE),
> > +		.xive = 0,
> > +		.mmu = 0,
> > +		.hash_ext = 0,
> > +		.radix_ext = 0,
> >  	},
> >  
> >  	/* option vector 6: IBM PAPR hints */
> > @@ -990,6 +1001,83 @@ static int __init
> > prom_count_smt_threads(void)
> >  
> >  }
> >  
> > +static void __init prom_check_platform_support(void)
> This function gets quite deeply nested.  Can you split out some of it
> into a subroutine to make it look cleaner?

This did occur to me, will do.

> 
> > 
> > +{
> > +	int prop_len, i;
> > +	bool radix_gtse = false, radix_mmu = false, hash_mmu =
> > false;
> > +
> > +	prop_len = prom_getproplen(prom.chosen,
> > +				   "ibm,arch-vec-5-platform-
> > support");
> > +	if (prop_len > 1) {
> > +		u8 val[prop_len];
> > +		prom_debug("Found ibm,arch-vec-5-platform-support, 
> > len: %d\n",
> > +			   prop_len);
> > +		prom_getprop(prom.chosen, "ibm,arch-vec-5-
> > platform-support",
> > +			     &val, sizeof(val));
> > +		for (i = 0; i < prop_len; i += 2) {
> > +			u8 b1 = val[i], b2 = val[i+1];
> > +			prom_debug("%d: nbyte = %x, val = %x\n",
> > i/2, b1, b2);
> > +			switch (b1) {
> > +			case OV5_INDX(OV5_XIVE_EXPLOIT):
> > +				/* Unimplemented - Ignore */
> > +				break;
> > +			case OV5_INDX(OV5_MMU_EITHER_300):
> > +				if (b2 ==
> > OV5_FEAT(OV5_MMU_EITHER_300)) {
> Here you're trying to check what's in the top 2 bits, so you should
> mask b2 before doing the comparisons, in case other bits (such as the
> nest MMU bit) are set.  Future versions of PAPR might define meanings
> for some of other bits, and you don't want all these comparisons to
> stop working then.

True

> 
> > 
> > +					/* Either Available */
> > +					prom_debug("MMU - either
> > supported\n");
> > +					radix_mmu =
> > !prom_radix_disable;
> > +					hash_mmu = true;
> > +				} else if (b2 ==
> > OV5_FEAT(OV5_MMU_RADIX_300)) {
> > +					/* Radix Only */
> > +					prom_debug("MMU - radix
> > only\n");
> > +					if (prom_radix_disable) {
> > +						prom_printf("WARNI
> > NG: Ignoring "
> > +							    "cmdli
> > ne option \""
> > +							    "disab
> > le_radix\"\n"
> You're allowed to have long lines for string constants; that's better
> than splitting them up like this, because splitting them makes it
> much
> harder to grep for the string.

Ok

> 
> > 
> > +							    );
> > +					}
> > +					radix_mmu = true;
> > +				} else if (b2 ==
> > OV5_FEAT(OV5_MMU_HASH_300)) {
> > +					/* Hash Only */
> > +					prom_debug("MMU - hash
> > only\n");
> > +					hash_mmu = true;
> > +				}
> > +				break;
> > +			case OV5_INDX(OV5_HASH_SEG_TBL):
> > +				/* Unimplemented - Ignore */
> > +				break;
> > +			case OV5_INDX(OV5_RADIX_GTSE):
> > +				if (val[i+1] &
> > OV5_FEAT(OV5_RADIX_GTSE)) {
> Why val[i+1] not b2?

Woops

> 
> > 
> > +					prom_debug("GTSE
> > supported\n");
> > +					radix_gtse = true;
> > +				}
> > +				break;
> > +			default:
> > +				prom_printf("WARNING: Invalid byte
> > index <%d>"
> > +					    " in ibm,arch-vec-5-
> > platform-"
> > +					    "support\n", val[i]);
> I don't think a warning here is appropriate; there could legitimately
> be other values here in future.

Good point, I guess it's not really important if there's anything else
in the vector.

> 
> > 
> > +				break;
> > +			}
> > +		}
> > +	} else {
> > +		prom_printf("WARNING: Invalid len <%d> of
> > ibm,arch-vec-5"
> > +			    "-platform-support\n", prop_len);
> This will be printed on all current machines, won't it?  I don't
> think
> a warning is appropriate if the property just doesn't exist.
> 

Good point. But if it doesn't exist what should we assume?
Atleast legacy hash support?

> > 
> > +	}
> > +
> > +	if (radix_mmu && radix_gtse) {
> > +		/* Radix preferred - but we require GTSE for now
> > */
> > +		prom_debug("Asking for radix with GTSE\n");
> > +		ibm_architecture_vec.vec5.mmu =
> > OV5_FEAT(OV5_MMU_RADIX_300);
> > +		ibm_architecture_vec.vec5.radix_ext =
> > OV5_FEAT(OV5_RADIX_GTSE);
> > +	} else if (hash_mmu) {
> > +		/* Default to hash mmu (if we can) */
> > +		prom_debug("Asking for hash\n");
> > +		ibm_architecture_vec.vec5.mmu =
> > OV5_FEAT(OV5_MMU_HASH_300);
> > +	} else {
> > +		/* Unsupported Configuration */
> > +		prom_printf("WARNING: unsupported host platform
> > MMU config\n");
> Won't we get this on all current machines, i.e. if the property
> doesn't exist?  We shouldn't print warnings just because we're
> running
> on an old machine (think about a POWER6 partition under PowerVM).

Should we just return an empty (for the new bits atleast) vector then?

> 
> > 
> > +	}
> > +}
> >  
> >  static void __init prom_send_capabilities(void)
> >  {
> > @@ -997,6 +1085,9 @@ static void __init
> > prom_send_capabilities(void)
> >  	prom_arg_t ret;
> >  	u32 cores;
> >  
> > +	/* Check ibm,arch-vec-5-platform-support and fixup vec5 if
> > required */
> > +	prom_check_platform_support();
> > +
> >  	root = call_prom("open", 1, 1, ADDR("/"));
> >  	if (root != 0) {
> >  		/* We need to tell the FW about the number of
> > cores we support.
> > diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
> > index 6aa3b76..17262da 100644
> > --- a/arch/powerpc/mm/init_64.c
> > +++ b/arch/powerpc/mm/init_64.c
> > @@ -359,15 +359,34 @@ static void early_check_vec5(void)
> >  
> >  	root = of_get_flat_dt_root();
> >  	chosen = of_get_flat_dt_subnode_by_name(root, "chosen");
> > -	if (chosen == -FDT_ERR_NOTFOUND)
> > +	if (chosen == -FDT_ERR_NOTFOUND) {
> > +		cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
> >  		return;
> > +	}
> >  	vec5 = of_get_flat_dt_prop(chosen, "ibm,architecture-vec-
> > 5", &size);
> > -	if (!vec5)
> > +	if (!vec5) {
> > +		cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
> >  		return;
> > -	if (size <= OV5_INDX(OV5_MMU_RADIX_300) ||
> > -	    !(vec5[OV5_INDX(OV5_MMU_RADIX_300)] &
> > OV5_FEAT(OV5_MMU_RADIX_300)))
> > -		/* Hypervisor doesn't support radix */
> > +	}
> > +	if (size <= OV5_INDX(OV5_MMU_RADIX_300)) {
> >  		cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
> > +		return;
> > +	}
> > +
> > +	/* Check for supported configuration */
> > +	if (vec5[OV5_INDX(OV5_MMU_EITHER_300)] ==
> > OV5_FEAT(OV5_MMU_RADIX_300)) {
> Once again you need to look at just the top 2 bits, not the whole
> byte.

Yep

> 
> > 
> > +		/* Hypervisor only supports radix - check enabled
> > && GTSE */
> > +		WARN(!early_radix_enabled() ||
> I would not be at all surprised if a WARN at this early stage won't
> be
> handled properly and will just lead to a silent boot hang with no
> output.  Just do a printk or pr_warn.

This printed fine for me, but I can use a printk instead.

> 
> > 
> > +		     !(vec5[OV5_INDX(OV5_RADIX_GTSE)] &
> > +		       OV5_FEAT(OV5_RADIX_GTSE)),
> > +		     "WARNING: Unsupported MMU configuration in
> > vec5\n");
> > +		/* Do radix anyway - the hypervisor said we had to
> > */
> > +		cur_cpu_spec->mmu_features |= MMU_FTR_TYPE_RADIX;
> > +	} else if (vec5[OV5_INDX(OV5_MMU_EITHER_300)] ==
> > +		   OV5_FEAT(OV5_MMU_HASH_300)) {
> > +		/* Hypervisor only supports hash - disable radix
> > */
> > +		cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
> > +	}
> >  }
> >  
> >  void __init mmu_early_init_devtree(void)
> > @@ -383,7 +402,7 @@ void __init mmu_early_init_devtree(void)
> >  	 * even though the ibm,architecture-vec-5 property created
> > by
> >  	 * skiboot doesn't have the necessary bits set.
> >  	 */
> > -	if (early_radix_enabled() && !(mfmsr() & MSR_HV))
> > +	if (!(mfmsr() & MSR_HV))
> >  		early_check_vec5();
> >  
> >  	if (early_radix_enabled())


More information about the Linuxppc-dev mailing list