[PATCH 1/2] powerpc: Partition hibernation support

Michael Ellerman michael at ellerman.id.au
Mon May 10 16:48:22 EST 2010


On Fri, 2010-05-07 at 13:58 -0500, Brian King wrote:
> Enables support for HMC initiated partition hibernation. This is
> a firmware assisted hibernation, since the firmware handles writing
> the memory out to disk, along with other partition information,
> so we just mimic suspend to ram.
> 
> Signed-off-by: Brian King <brking at linux.vnet.ibm.com>

Hi Brian,

A few comments while I wait for my kernels to build ...


> diff -puN /dev/null arch/powerpc/platforms/pseries/suspend.c
> --- /dev/null	2009-12-15 17:58:07.000000000 -0600
> +++ linux-2.6-bjking1/arch/powerpc/platforms/pseries/suspend.c	2010-05-07 13:49:12.000000000 -0500
> @@ -0,0 +1,209 @@
> +/*
> +  * Copyright (C) 2010 Brian King IBM Corporation
> +  *
> +  * This program is free software; you can redistribute it and/or modify
> +  * it under the terms of the GNU General Public License as published by
> +  * the Free Software Foundation; either version 2 of the License, or
> +  * (at your option) any later version.
> +  *
> +  * This program is distributed in the hope that it will be useful,
> +  * but WITHOUT ANY WARRANTY; without even the implied warranty of
> +  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +  * GNU General Public License for more details.
> +  *
> +  * You should have received a copy of the GNU General Public License
> +  * along with this program; if not, write to the Free Software
> +  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
> +  */
> +
> +#include <linux/delay.h>
> +#include <linux/suspend.h>
> +#include <asm/hvcall.h>
> +#include <asm/machdep.h>
> +#include <asm/mmu.h>
> +#include <asm/rtas.h>
> +
> +static u64 stream_id;

I don't see any reasons why this is a global?

> +static struct sys_device suspend_sysdev;
> +static DECLARE_COMPLETION(suspend_work);
> +static struct rtas_suspend_me_data suspend_data;
> +static atomic_t suspending;
> +
> +/**
> + * pseries_suspend_begin - First phase of hibernation
> + *
> + * Check to ensure we are in a valid state to hibernate
> + *
> + * Return value:
> + * 	0 on success / other on failure
> + **/
> +static int pseries_suspend_begin(suspend_state_t state)
> +{
> +	long vs, rc;

I read "vs" as "versus", whereas I think it's meant to be "vasi state".
"state" might be a better name.

> +	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
> +
> +	if (!rtas_service_present("ibm,suspend-me"))
> +		return -ENOSYS;
> +
> +	/* Make sure the state is valid */
> +	rc = plpar_hcall(H_VASI_STATE, retbuf, stream_id);
> +
> +	vs = retbuf[0];
> +
> +	if (rc) {
> +		pr_err("pseries_suspend_begin: vasi_state returned %ld\n",rc);
> +		return rc;
> +	} else if (vs == H_VASI_ENABLED) {
> +		return RTAS_NOT_SUSPENDABLE;

I'm sure if I read PAPR this would make sense, but I haven't, why does
enabled mean we can't suspend?

> +	} else if (vs != H_VASI_SUSPENDING) {
> +		pr_err("pseries_suspend_begin: vasi_state returned state %ld\n", vs);
> +		return -EIO;
> +	}

The comment above "Make sure the state is valid" made me think that was
just a preliminary check before we started suspending. But it seems that
actually starts the suspend?

> +
> +	return 0;
> +}
> +
...
> +/**
> + * pseries_prepare_late - Prepare to suspend all other CPUs
> + *
> + * Return value:
> + * 	0 on success / other on failure
> + **/
> +static int pseries_prepare_late(void)
> +{
> +	atomic_set(&suspending, 1);
> +	atomic_set(&suspend_data.working, 0);
> +	atomic_set(&suspend_data.done, 0);
> +	atomic_set(&suspend_data.error, 0);
> +	suspend_data.token = rtas_token("ibm,suspend-me");

You could look this up at init time.

> +	suspend_data.complete = &suspend_work;
> +	INIT_COMPLETION(suspend_work);
> +	return 0;
> +}
> +
> +/**
> + * store_hibernate - Initiate partition hibernation
> + * @classdev:	sysdev class struct
> + * @attr:		class device attribute struct
> + * @buf:		buffer
> + * @count:		buffer size
> + *
> + * Write the stream ID received from the HMC to this file
> + * to trigger hibernating the partition
> + *
> + * Return value:
> + * 	number of bytes printed to buffer / other on failure
> + **/
> +static ssize_t store_hibernate(struct sysdev_class *classdev,
> +			       struct sysdev_class_attribute *attr,
> +			       const char *buf, size_t count)
> +{
> +	int rc;
> +
> +	if (!capable(CAP_SYS_ADMIN))
> +		return -EPERM;
> +
> +	stream_id = simple_strtoul(buf, NULL, 16);

Error handling?

> +	do {
> +		rc = pseries_suspend_begin(PM_SUSPEND_MEM);
> +		if (rc == RTAS_NOT_SUSPENDABLE)
> +			ssleep(1);

Hmm OK. So if we're not suspendable, we just try again? That sounds more
like -EAGAIN to me.

> +	} while (rc == RTAS_NOT_SUSPENDABLE);
> +
> +	if (!rc)
> +		rc = pm_suspend(PM_SUSPEND_MEM);
> +
> +	stream_id = 0;

Same comment before about this being global.

> +	if (!rc)
> +		rc = count;
> +	return rc;
> +}
> +
> +static SYSDEV_CLASS_ATTR(hibernate, S_IWUSR, NULL, store_hibernate);
> +
> +static struct sysdev_class suspend_sysdev_class = {
> +	.name = "power",

"power" like powerpc, or like electricity? If it's the former should it
be "pseries" instead.

...
> +/**
> + * pseries_suspend_init - initcall for pSeries suspend
> + *
> + * Return value:
> + * 	0 on success / other on failure
> + **/
> +static int __init pseries_suspend_init(void)
> +{
> +	int rc;
> +
> +	if ((rc = pseries_suspend_sysfs_register(&suspend_sysdev)))
> +		return rc;
> +
> +	ppc_md.suspend_disable_cpu = pseries_suspend_cpu;
> +	suspend_set_ops(&pseries_suspend_ops);

This unconditionally sets the pseries suspend ops, regardless of whether
we're actually running on a pseries machine, or if the required RTAS
tokens were found.

You should probably use the presence of ibm,suspend-me as the condition?
And while you're looking it up anyway you can store it in
suspend_data :)

> +	return 0;
> +}
> +
> +__initcall(pseries_suspend_init);
> diff -puN arch/powerpc/kernel/rtas.c~powerpc_allarch_pseries_hibernation arch/powerpc/kernel/rtas.c
> --- linux-2.6/arch/powerpc/kernel/rtas.c~powerpc_allarch_pseries_hibernation	2010-05-07 13:49:12.000000000 -0500
> +++ linux-2.6-bjking1/arch/powerpc/kernel/rtas.c	2010-05-07 13:49:12.000000000 -0500
> @@ -47,14 +47,6 @@ struct rtas_t rtas = {
>  };
>  EXPORT_SYMBOL(rtas);
>  
> -struct rtas_suspend_me_data {
> -	atomic_t working; /* number of cpus accessing this struct */
> -	atomic_t done;
> -	int token; /* ibm,suspend-me */
> -	int error;
> -	struct completion *complete; /* wait on this until working == 0 */
> -};
> -
>  DEFINE_SPINLOCK(rtas_data_buf_lock);
>  EXPORT_SYMBOL(rtas_data_buf_lock);
>  
> @@ -711,14 +703,54 @@ void rtas_os_term(char *str)
>  
>  static int ibm_suspend_me_token = RTAS_UNKNOWN_SERVICE;
>  #ifdef CONFIG_PPC_PSERIES
> -static void rtas_percpu_suspend_me(void *info)
> +static int __rtas_suspend_last_cpu(struct rtas_suspend_me_data *data, int wake_when_done)
> +{
> +	u16 slb_size = mmu_slb_size;
> +	int rc = H_MULTI_THREADS_ACTIVE;
> +	int cpu;
> +
> +	atomic_inc(&data->working);
> +
> +	slb_set_size(SLB_MIN_SIZE);
> +	printk(KERN_DEBUG "calling ibm,suspend-me on cpu %i\n", smp_processor_id());
> +
> +	while (rc == H_MULTI_THREADS_ACTIVE && !atomic_read(&data->done) &&
> +	       !atomic_read(&data->error))
> +		rc = rtas_call(data->token, 0, 1, NULL);
> +
> +	if (rc || atomic_read(&data->error)) {
> +		printk(KERN_DEBUG "ibm,suspend-me returned %d\n", rc);
> +		slb_set_size(slb_size);
> +	}
> +
> +	if (atomic_read(&data->error))
> +		rc = atomic_read(&data->error);
> +
> +	atomic_set(&data->error, rc);
> +
> +	if (wake_when_done) {
> +		atomic_set(&data->done, 1);
> +
> +		for_each_online_cpu(cpu)
> +			plpar_hcall_norets(H_PROD, get_hard_smp_processor_id(cpu));
> +	}
> +
> +	if (atomic_dec_return(&data->working) == 0)
> +		complete(data->complete);
> +
> +	return rc;
> +}
> +
> +int rtas_suspend_last_cpu(struct rtas_suspend_me_data *data)
> +{
> +	return __rtas_suspend_last_cpu(data, 0);
> +}
> +
> +static int __rtas_suspend_cpu(struct rtas_suspend_me_data *data, int wake_when_done)
>  {
>  	long rc = H_SUCCESS;
>  	unsigned long msr_save;
> -	u16 slb_size = mmu_slb_size;
>  	int cpu;
> -	struct rtas_suspend_me_data *data =
> -		(struct rtas_suspend_me_data *)info;
>  
>  	atomic_inc(&data->working);
>  
> @@ -726,7 +758,7 @@ static void rtas_percpu_suspend_me(void
>  	msr_save = mfmsr();
>  	mtmsr(msr_save & ~(MSR_EE));
>  
> -	while (rc == H_SUCCESS && !atomic_read(&data->done))
> +	while (rc == H_SUCCESS && !atomic_read(&data->done) && !atomic_read(&data->error))
>  		rc = plpar_hcall_norets(H_JOIN);
>  
>  	mtmsr(msr_save);
> @@ -738,33 +770,37 @@ static void rtas_percpu_suspend_me(void
>  		/* All other cpus are in H_JOIN, this cpu does
>  		 * the suspend.
>  		 */
> -		slb_set_size(SLB_MIN_SIZE);
> -		printk(KERN_DEBUG "calling ibm,suspend-me on cpu %i\n",
> -		       smp_processor_id());
> -		data->error = rtas_call(data->token, 0, 1, NULL);
> -
> -		if (data->error) {
> -			printk(KERN_DEBUG "ibm,suspend-me returned %d\n",
> -			       data->error);
> -			slb_set_size(slb_size);
> -		}
> +		return __rtas_suspend_last_cpu(data, wake_when_done);
>  	} else {
>  		printk(KERN_ERR "H_JOIN on cpu %i failed with rc = %ld\n",
>  		       smp_processor_id(), rc);
> -		data->error = rc;
> +		atomic_set(&data->error, rc);
>  	}
>  
> -	atomic_set(&data->done, 1);
> +	if (wake_when_done) {
> +		atomic_set(&data->done, 1);
>  
> -	/* This cpu did the suspend or got an error; in either case,
> -	 * we need to prod all other other cpus out of join state.
> -	 * Extra prods are harmless.
> -	 */
> -	for_each_online_cpu(cpu)
> -		plpar_hcall_norets(H_PROD, get_hard_smp_processor_id(cpu));
> +		/* This cpu did the suspend or got an error; in either case,
> +		 * we need to prod all other other cpus out of join state.
> +		 * Extra prods are harmless.
> +		 */
> +		for_each_online_cpu(cpu)
> +			plpar_hcall_norets(H_PROD, get_hard_smp_processor_id(cpu));
> +	}
>  out:
>  	if (atomic_dec_return(&data->working) == 0)
>  		complete(data->complete);
> +	return rc;
> +}
> +
> +int rtas_suspend_cpu(struct rtas_suspend_me_data *data)
> +{
> +	return __rtas_suspend_cpu(data, 0);
> +}
> +
> +static void rtas_percpu_suspend_me(void *info)
> +{
> +	__rtas_suspend_cpu((struct rtas_suspend_me_data *)info, 1);
>  }

What is the current path via rtas_ibm_suspend_me() used for? It's not
clear to me that you've changed that in a way that is equivalent.

>  
>  static int rtas_ibm_suspend_me(struct rtas_args *args)
> @@ -799,29 +835,41 @@ static int rtas_ibm_suspend_me(struct rt
>  
>  	atomic_set(&data.working, 0);
>  	atomic_set(&data.done, 0);
> +	atomic_set(&data.error, 0);
>  	data.token = rtas_token("ibm,suspend-me");
> -	data.error = 0;
>  	data.complete = &done;
>  
>  	/* Call function on all CPUs.  One of us will make the
>  	 * rtas call
>  	 */
>  	if (on_each_cpu(rtas_percpu_suspend_me, &data, 0))
> -		data.error = -EINVAL;
> +		atomic_set(&data.error, -EINVAL);
>  
>  	wait_for_completion(&done);
>  
> -	if (data.error != 0)
> +	if (atomic_read(&data.error) != 0)
>  		printk(KERN_ERR "Error doing global join\n");
>  
> -	return data.error;
> +	return atomic_read(&data.error);

So while moving the struct definition you've also changed error to be
atomic, and fixed the code here. That would be nicer as a separate
commit, at the very least you should call it out in your changelog.

And why does it need to be atomic?

>  }
>  #else /* CONFIG_PPC_PSERIES */
>  static int rtas_ibm_suspend_me(struct rtas_args *args)
>  {
>  	return -ENOSYS;
>  }
> +
> +int rtas_suspend_cpu(struct rtas_suspend_me_data *data)
> +{
> +	return -ENOSYS;
> +}
> +
> +int rtas_suspend_last_cpu(struct rtas_suspend_me_data *data)
> +{
> +	return -ENOSYS;
> +}

Don't see why you need these empty versions, they're only ever called
from pseries code aren't they?

>  #endif
> +EXPORT_SYMBOL_GPL(rtas_suspend_cpu);
> +EXPORT_SYMBOL_GPL(rtas_suspend_last_cpu);

Don't see why these need to be exported, CONFIG_SUSPEND is bool so
pseries/suspend.c is only ever built-in.

> diff -puN arch/powerpc/include/asm/rtas.h~powerpc_allarch_pseries_hibernation arch/powerpc/include/asm/rtas.h
> --- linux-2.6/arch/powerpc/include/asm/rtas.h~powerpc_allarch_pseries_hibernation	2010-05-07 13:49:12.000000000 -0500
> +++ linux-2.6-bjking1/arch/powerpc/include/asm/rtas.h	2010-05-07 13:49:12.000000000 -0500
> @@ -63,6 +63,14 @@ struct rtas_t {
>  	struct device_node *dev;	/* virtual address pointer */
>  };
>  
> +struct rtas_suspend_me_data {
> +	atomic_t working; /* number of cpus accessing this struct */
> +	atomic_t done;
> +	int token; /* ibm,suspend-me */
> +	atomic_t error;
> +	struct completion *complete; /* wait on this until working == 0 */
> +};
> +
>  /* RTAS event classes */
>  #define RTAS_INTERNAL_ERROR		0x80000000 /* set bit 0 */
>  #define RTAS_EPOW_WARNING		0x40000000 /* set bit 1 */


> diff -puN arch/powerpc/include/asm/hvcall.h~powerpc_allarch_pseries_hibernation arch/powerpc/include/asm/hvcall.h
> --- linux-2.6/arch/powerpc/include/asm/hvcall.h~powerpc_allarch_pseries_hibernation	2010-05-07 13:49:12.000000000 -0500
> +++ linux-2.6-bjking1/arch/powerpc/include/asm/hvcall.h	2010-05-07 13:49:12.000000000 -0500
> @@ -74,6 +74,7 @@
>  #define H_NOT_ENOUGH_RESOURCES -44
>  #define H_R_STATE       -45
>  #define H_RESCINDEND    -46
> +#define H_MULTI_THREADS_ACTIVE -9005

Which means what?

> diff -puN arch/powerpc/include/asm/machdep.h~powerpc_allarch_pseries_hibernation arch/powerpc/include/asm/machdep.h
> --- linux-2.6/arch/powerpc/include/asm/machdep.h~powerpc_allarch_pseries_hibernation	2010-05-07 13:49:12.000000000 -0500
> +++ linux-2.6-bjking1/arch/powerpc/include/asm/machdep.h	2010-05-07 13:49:12.000000000 -0500
> @@ -266,6 +266,7 @@ struct machdep_calls {
>  	void (*suspend_disable_irqs)(void);
>  	void (*suspend_enable_irqs)(void);
>  #endif
> +	int (*suspend_disable_cpu)(void);

Should this be ifdef CONFIG_SUSPEND ? 

> diff -puN arch/powerpc/platforms/pseries/hotplug-cpu.c~powerpc_allarch_pseries_hibernation arch/powerpc/platforms/pseries/hotplug-cpu.c
> --- linux-2.6/arch/powerpc/platforms/pseries/hotplug-cpu.c~powerpc_allarch_pseries_hibernation	2010-05-07 13:49:12.000000000 -0500
> +++ linux-2.6-bjking1/arch/powerpc/platforms/pseries/hotplug-cpu.c	2010-05-07 13:49:12.000000000 -0500
> @@ -116,6 +116,12 @@ static void pseries_mach_cpu_die(void)
>  
>  	if (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) {
>  		set_cpu_current_state(cpu, CPU_STATE_INACTIVE);
> +		if (ppc_md.suspend_disable_cpu)
> +			ppc_md.suspend_disable_cpu();
> +	}
> +
> +	if (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) {
> +		set_cpu_current_state(cpu, CPU_STATE_INACTIVE);

That looks weird? Do we expect preferred offline state to change?

>  		cede_latency_hint = 2;
>  
>  		get_lppaca()->idle = 1;


cheers
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 198 bytes
Desc: This is a digitally signed message part
URL: <http://lists.ozlabs.org/pipermail/linuxppc-dev/attachments/20100510/fd8e281e/attachment-0001.pgp>


More information about the Linuxppc-dev mailing list