[PATCH v2 1/2] KVM: PPC: Book3S HV: Add support for H_RPT_INVALIDATE

David Gibson david at gibson.dropbear.id.au
Thu Dec 17 14:42:15 AEDT 2020


On Wed, Dec 16, 2020 at 02:24:46PM +0530, Bharata B Rao wrote:
> Implement H_RPT_INVALIDATE hcall and add KVM capability
> KVM_CAP_PPC_RPT_INVALIDATE to indicate the support for the same.
> 
> This hcall does two types of TLB invalidations:
> 
> 1. Process-scoped invalidations for guests with LPCR[GTSE]=0.
>    This is currently not used in KVM as GTSE is not usually
>    disabled in KVM.
> 2. Partition-scoped invalidations that an L1 hypervisor does on
>    behalf of an L2 guest. This replaces the uses of the existing
>    hcall H_TLB_INVALIDATE.
> 
> Signed-off-by: Bharata B Rao <bharata at linux.ibm.com>
> ---
>  Documentation/virt/kvm/api.rst                |  17 +++
>  .../include/asm/book3s/64/tlbflush-radix.h    |  18 +++
>  arch/powerpc/include/asm/kvm_book3s.h         |   3 +
>  arch/powerpc/kvm/book3s_hv.c                  | 121 ++++++++++++++++++
>  arch/powerpc/kvm/book3s_hv_nested.c           |  94 ++++++++++++++
>  arch/powerpc/kvm/powerpc.c                    |   3 +
>  arch/powerpc/mm/book3s64/radix_tlb.c          |   4 -
>  include/uapi/linux/kvm.h                      |   1 +
>  8 files changed, 257 insertions(+), 4 deletions(-)
> 
> diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
> index e00a66d72372..5ce237c0d707 100644
> --- a/Documentation/virt/kvm/api.rst
> +++ b/Documentation/virt/kvm/api.rst
> @@ -6014,6 +6014,23 @@ KVM_EXIT_X86_RDMSR and KVM_EXIT_X86_WRMSR exit notifications which user space
>  can then handle to implement model specific MSR handling and/or user notifications
>  to inform a user that an MSR was not handled.
>  
> +7.22 KVM_CAP_PPC_RPT_INVALIDATE
> +------------------------------
> +
> +:Capability: KVM_CAP_PPC_RPT_INVALIDATE
> +:Architectures: ppc
> +:Type: vm
> +
> +This capability indicates that the kernel is capable of handling
> +H_RPT_INVALIDATE hcall.
> +
> +In order to enable the use of H_RPT_INVALIDATE in the guest,
> +user space might have to advertise it for the guest. For example,
> +IBM pSeries (sPAPR) guest starts using it if "hcall-rpt-invalidate" is
> +present in the "ibm,hypertas-functions" device-tree property.
> +
> +This capability is always enabled.
> +
>  8. Other capabilities.
>  ======================
>  
> diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
> index 94439e0cefc9..aace7e9b2397 100644
> --- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
> +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
> @@ -4,6 +4,10 @@
>  
>  #include <asm/hvcall.h>
>  
> +#define RIC_FLUSH_TLB 0
> +#define RIC_FLUSH_PWC 1
> +#define RIC_FLUSH_ALL 2
> +
>  struct vm_area_struct;
>  struct mm_struct;
>  struct mmu_gather;
> @@ -21,6 +25,20 @@ static inline u64 psize_to_rpti_pgsize(unsigned long psize)
>  	return H_RPTI_PAGE_ALL;
>  }
>  
> +static inline int rpti_pgsize_to_psize(unsigned long page_size)
> +{
> +	if (page_size == H_RPTI_PAGE_4K)
> +		return MMU_PAGE_4K;
> +	if (page_size == H_RPTI_PAGE_64K)
> +		return MMU_PAGE_64K;
> +	if (page_size == H_RPTI_PAGE_2M)
> +		return MMU_PAGE_2M;
> +	if (page_size == H_RPTI_PAGE_1G)
> +		return MMU_PAGE_1G;
> +	else
> +		return MMU_PAGE_64K; /* Default */
> +}
> +
>  static inline int mmu_get_ap(int psize)
>  {
>  	return mmu_psize_defs[psize].ap;
> diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
> index d32ec9ae73bd..0f1c5fa6e8ce 100644
> --- a/arch/powerpc/include/asm/kvm_book3s.h
> +++ b/arch/powerpc/include/asm/kvm_book3s.h
> @@ -298,6 +298,9 @@ void kvmhv_set_ptbl_entry(unsigned int lpid, u64 dw0, u64 dw1);
>  void kvmhv_release_all_nested(struct kvm *kvm);
>  long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu);
>  long kvmhv_do_nested_tlbie(struct kvm_vcpu *vcpu);
> +long kvmhv_h_rpti_nested(struct kvm_vcpu *vcpu, unsigned long lpid,
> +			 unsigned long type, unsigned long pg_sizes,
> +			 unsigned long start, unsigned long end);
>  int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu,
>  			  u64 time_limit, unsigned long lpcr);
>  void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr);
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index e3b1839fc251..adf2d1191581 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -904,6 +904,118 @@ static int kvmppc_get_yield_count(struct kvm_vcpu *vcpu)
>  	return yield_count;
>  }
>  
> +static inline void do_tlb_invalidate_all(unsigned long rb, unsigned long rs)
> +{
> +	asm volatile("ptesync" : : : "memory");
> +	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
> +		: : "r"(rb), "i"(1), "i"(1), "i"(RIC_FLUSH_ALL), "r"(rs)
> +		: "memory");
> +	asm volatile("eieio; tlbsync; ptesync" : : : "memory");
> +}
> +
> +static inline void do_tlb_invalidate_pwc(unsigned long rb, unsigned long rs)
> +{
> +	asm volatile("ptesync" : : : "memory");
> +	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
> +		: : "r"(rb), "i"(1), "i"(1), "i"(RIC_FLUSH_PWC), "r"(rs)
> +		: "memory");
> +	asm volatile("eieio; tlbsync; ptesync" : : : "memory");
> +}
> +
> +static inline void do_tlb_invalidate_tlb(unsigned long rb, unsigned long rs)
> +{
> +	asm volatile("ptesync" : : : "memory");
> +	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
> +		: : "r"(rb), "i"(1), "i"(1), "i"(RIC_FLUSH_TLB), "r"(rs)
> +		: "memory");
> +	asm volatile("eieio; tlbsync; ptesync" : : : "memory");
> +}
> +
> +static void do_tlb_invalidate(unsigned long rs, unsigned long target,
> +			      unsigned long type, unsigned long page_size,
> +			      unsigned long ap, unsigned long start,
> +			      unsigned long end)
> +{
> +	unsigned long rb;
> +	unsigned long addr = start;
> +
> +	if ((type & H_RPTI_TYPE_ALL) == H_RPTI_TYPE_ALL) {
> +		rb = PPC_BIT(53); /* IS = 1 */
> +		do_tlb_invalidate_all(rb, rs);
> +		return;
> +	}
> +
> +	if (type & H_RPTI_TYPE_PWC) {
> +		rb = PPC_BIT(53); /* IS = 1 */
> +		do_tlb_invalidate_pwc(rb, rs);
> +	}
> +
> +	if (!addr && end == -1) { /* PID */
> +		rb = PPC_BIT(53); /* IS = 1 */
> +		do_tlb_invalidate_tlb(rb, rs);
> +	} else { /* EA */
> +		do {
> +			rb = addr & ~(PPC_BITMASK(52, 63));
> +			rb |= ap << PPC_BITLSHIFT(58);
> +			do_tlb_invalidate_tlb(rb, rs);
> +			addr += page_size;
> +		} while (addr < end);
> +	}
> +}
> +
> +static long kvmppc_h_rpt_invalidate(struct kvm_vcpu *vcpu,
> +				    unsigned long pid, unsigned long target,
> +				    unsigned long type, unsigned long pg_sizes,
> +				    unsigned long start, unsigned long end)
> +{
> +	unsigned long rs, ap, psize;
> +
> +	if (!kvm_is_radix(vcpu->kvm))
> +		return H_FUNCTION;

IIUC The cover note said this case was H_NOT_SUPPORTED, rather than H_FUNCTION.

> +
> +	if (end < start)
> +		return H_P5;
> +
> +	if (type & H_RPTI_TYPE_NESTED) {
> +		if (!nesting_enabled(vcpu->kvm))
> +			return H_FUNCTION;

Likewise, I'm not sure that H_FUNCTION is the right choice here.

> +
> +		/* Support only cores as target */
> +		if (target != H_RPTI_TARGET_CMMU)
> +			return H_P2;
> +
> +		return kvmhv_h_rpti_nested(vcpu, pid,
> +					   (type & ~H_RPTI_TYPE_NESTED),
> +					    pg_sizes, start, end);
> +	}
> +
> +	rs = pid << PPC_BITLSHIFT(31);
> +	rs |= vcpu->kvm->arch.lpid;
> +
> +	if (pg_sizes & H_RPTI_PAGE_64K) {
> +		psize = rpti_pgsize_to_psize(pg_sizes & H_RPTI_PAGE_64K);
> +		ap = mmu_get_ap(psize);
> +		do_tlb_invalidate(rs, target, type, (1UL << 16), ap, start,
> +				  end);

Should these be conditional on the TLB flag in type?

> +	}
> +
> +	if (pg_sizes & H_RPTI_PAGE_2M) {
> +		psize = rpti_pgsize_to_psize(pg_sizes & H_RPTI_PAGE_2M);
> +		ap = mmu_get_ap(psize);
> +		do_tlb_invalidate(rs, target, type, (1UL << 21), ap, start,
> +				  end);
> +	}
> +
> +	if (pg_sizes & H_RPTI_PAGE_1G) {
> +		psize = rpti_pgsize_to_psize(pg_sizes & H_RPTI_PAGE_1G);
> +		ap = mmu_get_ap(psize);
> +		do_tlb_invalidate(rs, target, type, (1UL << 30), ap, start,
> +				  end);
> +	}
> +
> +	return H_SUCCESS;
> +}
> +
>  int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
>  {
>  	unsigned long req = kvmppc_get_gpr(vcpu, 3);
> @@ -1112,6 +1224,14 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
>  		 */
>  		ret = kvmppc_h_svm_init_abort(vcpu->kvm);
>  		break;
> +	case H_RPT_INVALIDATE:
> +		ret = kvmppc_h_rpt_invalidate(vcpu, kvmppc_get_gpr(vcpu, 4),
> +					      kvmppc_get_gpr(vcpu, 5),
> +					      kvmppc_get_gpr(vcpu, 6),
> +					      kvmppc_get_gpr(vcpu, 7),
> +					      kvmppc_get_gpr(vcpu, 8),
> +					      kvmppc_get_gpr(vcpu, 9));
> +		break;
>  
>  	default:
>  		return RESUME_HOST;
> @@ -1158,6 +1278,7 @@ static int kvmppc_hcall_impl_hv(unsigned long cmd)
>  	case H_XIRR_X:
>  #endif
>  	case H_PAGE_INIT:
> +	case H_RPT_INVALIDATE:
>  		return 1;
>  	}
>  
> diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
> index 33b58549a9aa..a54ba4b1d4a7 100644
> --- a/arch/powerpc/kvm/book3s_hv_nested.c
> +++ b/arch/powerpc/kvm/book3s_hv_nested.c
> @@ -1149,6 +1149,100 @@ long kvmhv_do_nested_tlbie(struct kvm_vcpu *vcpu)
>  	return H_SUCCESS;
>  }
>  
> +static long do_tlb_invalidate_nested_tlb(struct kvm_vcpu *vcpu,
> +					 unsigned long lpid,
> +					 unsigned long page_size,
> +					 unsigned long ap,
> +					 unsigned long start,
> +					 unsigned long end)
> +{
> +	unsigned long addr = start;
> +	int ret;
> +
> +	do {
> +		ret = kvmhv_emulate_tlbie_tlb_addr(vcpu, lpid, ap,
> +						   get_epn(addr));
> +		if (ret)
> +			return ret;
> +		addr += page_size;
> +	} while (addr < end);
> +
> +	return ret;
> +}
> +
> +static long do_tlb_invalidate_nested_all(struct kvm_vcpu *vcpu,
> +					 unsigned long lpid)
> +{
> +	struct kvm *kvm = vcpu->kvm;
> +	struct kvm_nested_guest *gp;
> +
> +	gp = kvmhv_get_nested(kvm, lpid, false);
> +	if (gp) {
> +		kvmhv_emulate_tlbie_lpid(vcpu, gp, RIC_FLUSH_ALL);
> +		kvmhv_put_nested(gp);
> +	}
> +	return H_SUCCESS;
> +}
> +
> +long kvmhv_h_rpti_nested(struct kvm_vcpu *vcpu, unsigned long lpid,
> +			 unsigned long type, unsigned long pg_sizes,
> +			 unsigned long start, unsigned long end)
> +{
> +	struct kvm_nested_guest *gp;
> +	long ret;
> +	unsigned long psize, ap;
> +
> +	/*
> +	 * If L2 lpid isn't valid, we need to return H_PARAMETER.
> +	 * Nested KVM issues a L2 lpid flush call when creating
> +	 * partition table entries for L2. This happens even before
> +	 * the corresponding shadow lpid is created in HV. Until
> +	 * this is fixed, ignore such flush requests.
> +	 */
> +	gp = kvmhv_find_nested(vcpu->kvm, lpid);
> +	if (!gp)
> +		return H_SUCCESS;
> +
> +	if ((type & H_RPTI_TYPE_NESTED_ALL) == H_RPTI_TYPE_NESTED_ALL)
> +		return do_tlb_invalidate_nested_all(vcpu, lpid);
> +
> +	if ((type & H_RPTI_TYPE_TLB) == H_RPTI_TYPE_TLB) {
> +		if (pg_sizes & H_RPTI_PAGE_64K) {
> +			psize = rpti_pgsize_to_psize(pg_sizes & H_RPTI_PAGE_64K);
> +			ap = mmu_get_ap(psize);
> +
> +			ret = do_tlb_invalidate_nested_tlb(vcpu, lpid,
> +							   (1UL << 16),
> +							   ap, start, end);
> +			if (ret)
> +				return H_P4;
> +		}
> +
> +		if (pg_sizes & H_RPTI_PAGE_2M) {
> +			psize = rpti_pgsize_to_psize(pg_sizes & H_RPTI_PAGE_2M);
> +			ap = mmu_get_ap(psize);
> +
> +			ret = do_tlb_invalidate_nested_tlb(vcpu, lpid,
> +							   (1UL << 21),
> +							   ap, start, end);
> +			if (ret)
> +				return H_P4;
> +		}
> +
> +		if (pg_sizes & H_RPTI_PAGE_1G) {
> +			psize = rpti_pgsize_to_psize(pg_sizes & H_RPTI_PAGE_1G);
> +			ap = mmu_get_ap(psize);
> +
> +			ret = do_tlb_invalidate_nested_tlb(vcpu, lpid,
> +							   (1UL << 30),
> +							   ap, start, end);
> +			if (ret)
> +				return H_P4;
> +		}
> +	}
> +	return H_SUCCESS;
> +}
> +
>  /* Used to convert a nested guest real address to a L1 guest real address */
>  static int kvmhv_translate_addr_nested(struct kvm_vcpu *vcpu,
>  				       struct kvm_nested_guest *gp,
> diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
> index 13999123b735..172a89187116 100644
> --- a/arch/powerpc/kvm/powerpc.c
> +++ b/arch/powerpc/kvm/powerpc.c
> @@ -678,6 +678,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
>  		r = hv_enabled && kvmppc_hv_ops->enable_svm &&
>  			!kvmppc_hv_ops->enable_svm(NULL);
>  		break;
> +	case KVM_CAP_PPC_RPT_INVALIDATE:
> +		r = 1;
> +		break;
>  #endif
>  	default:
>  		r = 0;
> diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
> index b487b489d4b6..3a2b12d1d49b 100644
> --- a/arch/powerpc/mm/book3s64/radix_tlb.c
> +++ b/arch/powerpc/mm/book3s64/radix_tlb.c
> @@ -18,10 +18,6 @@
>  #include <asm/cputhreads.h>
>  #include <asm/plpar_wrappers.h>
>  
> -#define RIC_FLUSH_TLB 0
> -#define RIC_FLUSH_PWC 1
> -#define RIC_FLUSH_ALL 2
> -
>  /*
>   * tlbiel instruction for radix, set invalidation
>   * i.e., r=1 and is=01 or is=10 or is=11
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index ca41220b40b8..c9ece825299e 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -1053,6 +1053,7 @@ struct kvm_ppc_resize_hpt {
>  #define KVM_CAP_X86_USER_SPACE_MSR 188
>  #define KVM_CAP_X86_MSR_FILTER 189
>  #define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 190
> +#define KVM_CAP_PPC_RPT_INVALIDATE 191
>  
>  #ifdef KVM_CAP_IRQ_ROUTING
>  

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: not available
URL: <http://lists.ozlabs.org/pipermail/linuxppc-dev/attachments/20201217/b515a1a6/attachment-0001.sig>


More information about the Linuxppc-dev mailing list