[PATCH 17/18] KVM: PPC: Book3S HV: Enable radix guest support

Suraj Jitindar Singh sjitindarsingh at gmail.com
Mon Jan 23 14:31:43 AEDT 2017


On Thu, 2017-01-12 at 20:07 +1100, Paul Mackerras wrote:
> This adds a few last pieces of the support for radix guests:
> 
> * Implement the backends for the KVM_PPC_CONFIGURE_V3_MMU and
>   KVM_PPC_GET_RMMU_INFO ioctls for radix guests
> 
> * On POWER9, allow secondary threads to be on/off-lined while guests
>   are running.
> 
> * Set up LPCR and the partition table entry for radix guests.
> 
> * Don't allocate the rmap array in the kvm_memory_slot structure
>   on radix.
> 
> * Prevent the AIL field in the LPCR being set for radix guests,
>   since we can't yet handle getting interrupts from the guest with
>   the MMU on.
> 
> * Don't try to initialize the HPT for radix guests, since they don't
>   have an HPT.
> 
> * Take out the code that prevents the HV KVM module from
>   initializing on radix hosts.
> 
> At this stage, we only support radix guests if the host is running
> in radix mode, and only support HPT guests if the host is running in
> HPT mode.  Thus a guest cannot switch from one mode to the other,
> which enables some simplifications.
> 
> Signed-off-by: Paul Mackerras <paulus at ozlabs.org>
> ---
>  arch/powerpc/include/asm/kvm_book3s.h  |  2 +
>  arch/powerpc/kvm/book3s_64_mmu_hv.c    |  1 -
>  arch/powerpc/kvm/book3s_64_mmu_radix.c | 45 ++++++++++++++++
>  arch/powerpc/kvm/book3s_hv.c           | 93
> ++++++++++++++++++++++++----------
>  arch/powerpc/kvm/powerpc.c             |  2 +-
>  5 files changed, 115 insertions(+), 28 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/kvm_book3s.h
> b/arch/powerpc/include/asm/kvm_book3s.h
> index 57dc407..2bf3501 100644
> --- a/arch/powerpc/include/asm/kvm_book3s.h
> +++ b/arch/powerpc/include/asm/kvm_book3s.h
> @@ -189,6 +189,7 @@ extern int kvmppc_book3s_radix_page_fault(struct
> kvm_run *run,
>  			unsigned long ea, unsigned long dsisr);
>  extern int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t
> eaddr,
>  			struct kvmppc_pte *gpte, bool data, bool
> iswrite);
> +extern int kvmppc_init_vm_radix(struct kvm *kvm);
>  extern void kvmppc_free_radix(struct kvm *kvm);
>  extern int kvmppc_radix_init(void);
>  extern void kvmppc_radix_exit(void);
> @@ -200,6 +201,7 @@ extern int kvm_test_age_radix(struct kvm *kvm,
> struct kvm_memory_slot *memslot,
>  			unsigned long gfn);
>  extern long kvmppc_hv_get_dirty_log_radix(struct kvm *kvm,
>  			struct kvm_memory_slot *memslot, unsigned
> long *map);
> +extern int kvmhv_get_rmmu_info(struct kvm *kvm, struct
> kvm_ppc_rmmu_info *info);
>  
>  /* XXX remove this export when load_last_inst() is generic */
>  extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size,
> void *ptr, bool data);
> diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c
> b/arch/powerpc/kvm/book3s_64_mmu_hv.c
> index 7a9afbe..db8de17 100644
> --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
> +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
> @@ -155,7 +155,6 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32
> *htab_orderp)
>  
>  void kvmppc_free_hpt(struct kvm *kvm)
>  {
> -	kvmppc_free_lpid(kvm->arch.lpid);
>  	vfree(kvm->arch.revmap);
>  	if (kvm->arch.hpt_cma_alloc)
>  		kvm_release_hpt(virt_to_page(kvm->arch.hpt_virt),
> diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c
> b/arch/powerpc/kvm/book3s_64_mmu_radix.c
> index 125cc7c..4344651 100644
> --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
> +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
> @@ -610,6 +610,51 @@ long kvmppc_hv_get_dirty_log_radix(struct kvm
> *kvm,
>  	return 0;
>  }
>  
> +static void add_rmmu_ap_encoding(struct kvm_ppc_rmmu_info *info,
> +				 int psize, int *indexp)
> +{
> +	if (!mmu_psize_defs[psize].shift)
> +		return;
> +	info->ap_encodings[*indexp] = mmu_psize_defs[psize].shift |
> +		(mmu_psize_defs[psize].ap << 29);
> +	++(*indexp);
> +}
> +
> +int kvmhv_get_rmmu_info(struct kvm *kvm, struct kvm_ppc_rmmu_info
> *info)
> +{
> +	int i;
> +
> +	if (!radix_enabled())
> +		return -EINVAL;
> +	memset(info, 0, sizeof(*info));
> +
> +	/* 4k page size */
> +	info->geometries[0].page_shift = 12;
> +	info->geometries[0].level_bits[0] = 9;
> +	for (i = 1; i < 4; ++i)
> +		info->geometries[0].level_bits[i] =
> p9_supported_radix_bits[i];
> +	/* 64k page size */
> +	info->geometries[1].page_shift = 16;
> +	for (i = 0; i < 4; ++i)
> +		info->geometries[1].level_bits[i] =
> p9_supported_radix_bits[i];
> +
> +	i = 0;
> +	add_rmmu_ap_encoding(info, MMU_PAGE_4K, &i);
> +	add_rmmu_ap_encoding(info, MMU_PAGE_64K, &i);
> +	add_rmmu_ap_encoding(info, MMU_PAGE_2M, &i);
> +	add_rmmu_ap_encoding(info, MMU_PAGE_1G, &i);
> +
> +	return 0;
> +}
> +
> +int kvmppc_init_vm_radix(struct kvm *kvm)
> +{
> +	kvm->arch.pgtable = pgd_alloc(kvm->mm);
> +	if (!kvm->arch.pgtable)
> +		return -ENOMEM;
> +	return 0;
> +}
> +
>  void kvmppc_free_radix(struct kvm *kvm)
>  {
>  	unsigned long ig, iu, im;
> diff --git a/arch/powerpc/kvm/book3s_hv.c
> b/arch/powerpc/kvm/book3s_hv.c
> index ab5adcd..14a9efe 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -1136,10 +1136,13 @@ static void kvmppc_set_lpcr(struct kvm_vcpu
> *vcpu, u64 new_lpcr,
>  	/*
>  	 * Userspace can only modify DPFD (default prefetch depth),
>  	 * ILE (interrupt little-endian) and TC (translation
> control).
> -	 * On POWER8 userspace can also modify AIL (alt. interrupt
> loc.)
> +	 * On POWER8 userspace can also modify AIL (alt. interrupt
> loc.).
> +	 * On POWER9 with a radix guest, we can't allow AIL to be
> set
> +	 * since we don't yet have KVM handlers in the relocation-on
> +	 * interrupt vectors.
>  	 */
>  	mask = LPCR_DPFD | LPCR_ILE | LPCR_TC;
> -	if (cpu_has_feature(CPU_FTR_ARCH_207S))
> +	if (cpu_has_feature(CPU_FTR_ARCH_207S) &&
> !kvm_is_radix(kvm))
>  		mask |= LPCR_AIL;
>  
>  	/* Broken 32-bit version of LPCR must not clear top bits */
> @@ -2878,7 +2881,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_run
> *run, struct kvm_vcpu *vcpu)
>  	smp_mb();
>  
>  	/* On the first time here, set up HTAB and VRMA */
> -	if (!vcpu->kvm->arch.hpte_setup_done) {
> +	if (!kvm_is_radix(vcpu->kvm) && !vcpu->kvm-
> >arch.hpte_setup_done) {
>  		r = kvmppc_hv_setup_htab_rma(vcpu);
>  		if (r)
>  			goto out;
> @@ -2940,6 +2943,13 @@ static int
> kvm_vm_ioctl_get_smmu_info_hv(struct kvm *kvm,
>  {
>  	struct kvm_ppc_one_seg_page_size *sps;
>  
> +	/*
> +	 * Since we don't yet support HPT guests on a radix host,
> +	 * return an error if the host uses radix.
> +	 */
> +	if (radix_enabled())
> +		return -EINVAL;
> +
>  	info->flags = KVM_PPC_PAGE_SIZES_REAL;
>  	if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
>  		info->flags |= KVM_PPC_1T_SEGMENTS;
> @@ -3025,6 +3035,15 @@ static void kvmppc_core_free_memslot_hv(struct
> kvm_memory_slot *free,
>  static int kvmppc_core_create_memslot_hv(struct kvm_memory_slot
> *slot,
>  					 unsigned long npages)
>  {
> +	/*
> +	 * For now, if radix_enabled() then we only support radix
> guests,
> +	 * and in that case we don't need the rmap array.
> +	 */
> +	if (radix_enabled()) {
> +		slot->arch.rmap = NULL;
> +		return 0;
> +	}
> +
>  	slot->arch.rmap = vzalloc(npages * sizeof(*slot-
> >arch.rmap));
>  	if (!slot->arch.rmap)
>  		return -ENOMEM;
> @@ -3105,14 +3124,20 @@ static void
> kvmppc_setup_partition_table(struct kvm *kvm)
>  {
>  	unsigned long dw0, dw1;
>  
> -	/* PS field - page size for VRMA */
> -	dw0 = ((kvm->arch.vrma_slb_v & SLB_VSID_L) >> 1) |
> -		((kvm->arch.vrma_slb_v & SLB_VSID_LP) << 1);
> -	/* HTABSIZE and HTABORG fields */
> -	dw0 |= kvm->arch.sdr1;
> +	if (!kvm->arch.radix) {
kvm_is_radix() for consistency?
> +		/* PS field - page size for VRMA */
> +		dw0 = ((kvm->arch.vrma_slb_v & SLB_VSID_L) >> 1) |
> +			((kvm->arch.vrma_slb_v & SLB_VSID_LP) << 1);
> +		/* HTABSIZE and HTABORG fields */
> +		dw0 |= kvm->arch.sdr1;
>  
> -	/* Second dword as set by userspace */
> -	dw1 = kvm->arch.process_table;
> +		/* Second dword as set by userspace */
> +		dw1 = kvm->arch.process_table;
> +	} else {
> +		dw0 = PATB_HR | radix__get_tree_size() |
> +			__pa(kvm->arch.pgtable) |
> RADIX_PGD_INDEX_SIZE;
> +		dw1 = PATB_GR | kvm->arch.process_table;
> +	}
>  
>  	mmu_partition_table_set_entry(kvm->arch.lpid, dw0, dw1);
>  }
> @@ -3282,6 +3307,7 @@ static int kvmppc_core_init_vm_hv(struct kvm
> *kvm)
>  {
>  	unsigned long lpcr, lpid;
>  	char buf[32];
> +	int ret;
>  
>  	/* Allocate the guest's logical partition ID */
>  
> @@ -3329,13 +3355,30 @@ static int kvmppc_core_init_vm_hv(struct kvm
> *kvm)
>  		lpcr |= LPCR_HVICE;
>  	}
>  
> +	/*
> +	 * For now, if the host uses radix, the guest must be radix.
> +	 */
> +	if (radix_enabled()) {
> +		kvm->arch.radix = 1;
> +		lpcr &= ~LPCR_VPM1;
> +		lpcr |= LPCR_UPRT | LPCR_GTSE | LPCR_HR;
> +		ret = kvmppc_init_vm_radix(kvm);
> +		if (ret) {
> +			kvmppc_free_lpid(kvm->arch.lpid);
> +			return ret;
> +		}
> +		kvmppc_setup_partition_table(kvm);
> +	}
> +
>  	kvm->arch.lpcr = lpcr;
>  
>  	/*
>  	 * Work out how many sets the TLB has, for the use of
>  	 * the TLB invalidation loop in book3s_hv_rmhandlers.S.
>  	 */
> -	if (cpu_has_feature(CPU_FTR_ARCH_300))
> +	if (kvm_is_radix(kvm))
> +		kvm->arch.tlb_sets = POWER9_TLB_SETS_RADIX;	/
> * 128 */
> +	else if (cpu_has_feature(CPU_FTR_ARCH_300))
>  		kvm->arch.tlb_sets = POWER9_TLB_SETS_HASH;	/*
> 256 */
>  	else if (cpu_has_feature(CPU_FTR_ARCH_207S))
>  		kvm->arch.tlb_sets = POWER8_TLB_SETS;		
> /* 512 */
> @@ -3345,8 +3388,11 @@ static int kvmppc_core_init_vm_hv(struct kvm
> *kvm)
>  	/*
>  	 * Track that we now have a HV mode VM active. This blocks
> secondary
>  	 * CPU threads from coming online.
> +	 * On POWER9, we only need to do this for HPT guests on a
> radix
> +	 * host, which is not yet supported.
>  	 */
> -	kvm_hv_vm_activated();
> +	if (!cpu_has_feature(CPU_FTR_ARCH_300))
> +		kvm_hv_vm_activated();
>  
>  	/*
>  	 * Create a debugfs directory for the VM
> @@ -3372,10 +3418,13 @@ static void kvmppc_core_destroy_vm_hv(struct
> kvm *kvm)
>  {
>  	debugfs_remove_recursive(kvm->arch.debugfs_dir);
>  
> -	kvm_hv_vm_deactivated();
> +	if (!cpu_has_feature(CPU_FTR_ARCH_300))
> +		kvm_hv_vm_deactivated();
>  
>  	kvmppc_free_vcores(kvm);
>  
> +	kvmppc_free_lpid(kvm->arch.lpid);
> +
>  	if (kvm->arch.radix)
ditto
>  		kvmppc_free_radix(kvm);
>  	else
> @@ -3408,11 +3457,6 @@ static int
> kvmppc_core_check_processor_compat_hv(void)
>  	if (!cpu_has_feature(CPU_FTR_HVMODE) ||
>  	    !cpu_has_feature(CPU_FTR_ARCH_206))
>  		return -EIO;
> -	/*
> -	 * Disable KVM for Power9 in radix mode.
> -	 */
> -	if (cpu_has_feature(CPU_FTR_ARCH_300) && radix_enabled())
> -		return -EIO;
>  
>  	return 0;
>  }
> @@ -3683,6 +3727,7 @@ static void init_default_hcalls(void)
>  static int kvmhv_configure_mmu(struct kvm *kvm, struct
> kvm_ppc_mmuv3_cfg *cfg)
>  {
>  	unsigned long lpcr;
> +	int radix;
For clarity, this could be a bool.
>  
>  	/* If not on a POWER9, reject it */
>  	if (!cpu_has_feature(CPU_FTR_ARCH_300))
> @@ -3692,12 +3737,13 @@ static int kvmhv_configure_mmu(struct kvm
> *kvm, struct kvm_ppc_mmuv3_cfg *cfg)
>  	if (cfg->flags & ~(KVM_PPC_MMUV3_RADIX |
> KVM_PPC_MMUV3_GTSE))
>  		return -EINVAL;
>  
> -	/* We can't do radix yet */
> -	if (cfg->flags & KVM_PPC_MMUV3_RADIX)
> +	/* We can't change a guest to/from radix yet */
> +	radix = !!(cfg->flags & KVM_PPC_MMUV3_RADIX);
> +	if (radix != kvm_is_radix(kvm))
>  		return -EINVAL;
>  
>  	/* GR (guest radix) bit in process_table field must match */
> -	if (cfg->process_table & PATB_GR)
> +	if (!!(cfg->process_table & PATB_GR) != radix)
>  		return -EINVAL;
>  
>  	/* Process table size field must be reasonable, i.e. <= 24
> */
> @@ -3713,11 +3759,6 @@ static int kvmhv_configure_mmu(struct kvm
> *kvm, struct kvm_ppc_mmuv3_cfg *cfg)
>  	return 0;
>  }
>  
> -static int kvmhv_get_rmmu_info(struct kvm *kvm, struct
> kvm_ppc_rmmu_info *info)
> -{
> -	return -EINVAL;
> -}
> -
>  static struct kvmppc_ops kvm_ops_hv = {
>  	.get_sregs = kvm_arch_vcpu_ioctl_get_sregs_hv,
>  	.set_sregs = kvm_arch_vcpu_ioctl_set_sregs_hv,
> diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
> index 1476a48..40a5b2d 100644
> --- a/arch/powerpc/kvm/powerpc.c
> +++ b/arch/powerpc/kvm/powerpc.c
> @@ -566,7 +566,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm,
> long ext)
>  		r = kvmppc_hwrng_present();
>  		break;
>  	case KVM_CAP_PPC_MMU_RADIX:
> -		r = !!(0 && hv_enabled && radix_enabled());
> +		r = !!(hv_enabled && radix_enabled());
>  		break;
>  	case KVM_CAP_PPC_MMU_HASH_V3:
>  		r = !!(hv_enabled && !radix_enabled() &&


More information about the Linuxppc-dev mailing list