[PATCH kernel v2 3/6] KVM: PPC: Account TCE-containing pages in locked_vm

David Gibson david at gibson.dropbear.id.au
Mon Jan 25 10:57:14 AEDT 2016


On Thu, Jan 21, 2016 at 06:39:34PM +1100, Alexey Kardashevskiy wrote:
> At the moment pages used for TCE tables (in addition to pages addressed
> by TCEs) are not counted in locked_vm counter so a malicious userspace
> tool can call ioctl(KVM_CREATE_SPAPR_TCE) as many times as RLIMIT_NOFILE and
> lock a lot of memory.
> 
> This adds counting for pages used for TCE tables.
> 
> This counts the number of pages required for a table plus pages for
> the kvmppc_spapr_tce_table struct (TCE table descriptor) itself.
> 
> This changes release_spapr_tce_table() to store @npages on stack to
> avoid calling kvmppc_stt_npages() in the loop (tiny optimization,
> probably).
> 
> This does not change the amount of (de)allocated memory.
> 
> Signed-off-by: Alexey Kardashevskiy <aik at ozlabs.ru>
> ---
> Changes:
> v2:
> * switched from long to unsigned long types
> * added WARN_ON_ONCE() in locked_vm decrement case
> ---
>  arch/powerpc/kvm/book3s_64_vio.c | 55 +++++++++++++++++++++++++++++++++++++---
>  1 file changed, 52 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
> index 9526c34..ea498b4 100644
> --- a/arch/powerpc/kvm/book3s_64_vio.c
> +++ b/arch/powerpc/kvm/book3s_64_vio.c
> @@ -39,19 +39,62 @@
>  
>  #define TCES_PER_PAGE	(PAGE_SIZE / sizeof(u64))
>  
> -static long kvmppc_stt_npages(unsigned long window_size)
> +static unsigned long kvmppc_stt_npages(unsigned long window_size)
>  {
>  	return ALIGN((window_size >> SPAPR_TCE_SHIFT)
>  		     * sizeof(u64), PAGE_SIZE) / PAGE_SIZE;
>  }
>  
> +static long kvmppc_account_memlimit(unsigned long npages, bool inc)
> +{
> +	long ret = 0;
> +	const unsigned long bytes = sizeof(struct kvmppc_spapr_tce_table) +
> +			(npages * sizeof(struct page *));
> +	const unsigned long stt_pages = ALIGN(bytes, PAGE_SIZE) / PAGE_SIZE;

Urgh, this is made pretty hard to follow by the fact that in some
places npages / stt_pages refers to the number of pages occupied by
the actual TCE tables, and in other places to the number of pages
occupied by the overhead data structures.  Please use different (and
consistent) variables for the two things to make this clearer.

It also seems odd the calculation of the overhead pages is done here,
but the base number of pages is calculated in the caller, even though
both quantities come from the stt structure itself.

> +	if (!current || !current->mm)
> +		return ret; /* process exited */
> +
> +	npages += stt_pages;
> +
> +	down_write(&current->mm->mmap_sem);
> +
> +	if (inc) {
> +		unsigned long locked, lock_limit;
> +
> +		locked = current->mm->locked_vm + npages;
> +		lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
> +		if (locked > lock_limit && !capable(CAP_IPC_LOCK))
> +			ret = -ENOMEM;
> +		else
> +			current->mm->locked_vm += npages;
> +	} else {
> +		if (WARN_ON_ONCE(npages > current->mm->locked_vm))
> +			npages = current->mm->locked_vm;
> +
> +		current->mm->locked_vm -= npages;
> +	}
> +
> +	pr_debug("[%d] RLIMIT_MEMLOCK KVM %c%ld %ld/%ld%s\n", current->pid,
> +			inc ? '+' : '-',
> +			npages << PAGE_SHIFT,
> +			current->mm->locked_vm << PAGE_SHIFT,
> +			rlimit(RLIMIT_MEMLOCK),
> +			ret ? " - exceeded" : "");
> +
> +	up_write(&current->mm->mmap_sem);
> +
> +	return ret;
> +}
> +
>  static void release_spapr_tce_table(struct rcu_head *head)
>  {
>  	struct kvmppc_spapr_tce_table *stt = container_of(head,
>  			struct kvmppc_spapr_tce_table, rcu);
>  	int i;
> +	unsigned long npages = kvmppc_stt_npages(stt->window_size);
>  
> -	for (i = 0; i < kvmppc_stt_npages(stt->window_size); i++)
> +	for (i = 0; i < npages; i++)
>  		__free_page(stt->pages[i]);
>  
>  	kfree(stt);
> @@ -89,6 +132,7 @@ static int kvm_spapr_tce_release(struct inode *inode, struct file *filp)
>  
>  	kvm_put_kvm(stt->kvm);
>  
> +	kvmppc_account_memlimit(kvmppc_stt_npages(stt->window_size), false);
>  	call_rcu(&stt->rcu, release_spapr_tce_table);
>  
>  	return 0;
> @@ -103,7 +147,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
>  				   struct kvm_create_spapr_tce *args)
>  {
>  	struct kvmppc_spapr_tce_table *stt = NULL;
> -	long npages;
> +	unsigned long npages;
>  	int ret = -ENOMEM;
>  	int i;
>  
> @@ -114,6 +158,11 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
>  	}
>  
>  	npages = kvmppc_stt_npages(args->window_size);
> +	ret = kvmppc_account_memlimit(npages, true);
> +	if (ret) {
> +		stt = NULL;
> +		goto fail;
> +	}
>  
>  	stt = kzalloc(sizeof(*stt) + npages * sizeof(struct page *),
>  		      GFP_KERNEL);

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 819 bytes
Desc: not available
URL: <http://lists.ozlabs.org/pipermail/linuxppc-dev/attachments/20160125/e3d5f0d6/attachment-0001.sig>


More information about the Linuxppc-dev mailing list