[Skiboot] [PATCH 16/28] cpu: Fix problem calling time_wait with lock held

Thu Jul 7 17:35:49 AEST 2016

> cpu: Fix problem calling time_wait with lock held

The problem was it couldn't be called with the lock held?  Maybe this
should read:

  cpu: Allow time_wait to be called with lock held

?

Other than that

Acked-by: Michael Neuling <mikey at neuling.org>

On Thu, 2016-07-07 at 11:50 +1000, Benjamin Herrenschmidt wrote:
> Also make the locking around re-init safer, properly block the
> OS from restarting a thread that was caught for re-init.
> 
> Signed-off-by: Benjamin Herrenschmidt <benh at kernel.crashing.org>

> ---
>  core/cpu.c    | 24 +++++++++++++++++-------
>  include/cpu.h |  1 +
>  2 files changed, 18 insertions(+), 7 deletions(-)
> 
> diff --git a/core/cpu.c b/core/cpu.c
> index 8a7a43f..2084150 100644
> --- a/core/cpu.c
> +++ b/core/cpu.c
> @@ -726,6 +726,11 @@ static int64_t opal_start_cpu_thread(uint64_t
> server_no, uint64_t start_address)
>  		prerror("OPAL: CPU not active in OPAL !\n");
>  		return OPAL_WRONG_STATE;
>  	}
> +	if (cpu->in_reinit) {
> +		unlock(&reinit_lock);
> +		prerror("OPAL: CPU being reinitialized !\n");
> +		return OPAL_WRONG_STATE;
> +	}
>  	job = __cpu_queue_job(cpu, "start_thread",
>  			      opal_start_thread_job, (void
> *)start_address,
>  			      true);
> @@ -821,34 +826,37 @@ static int64_t opal_reinit_cpus(uint64_t flags)
>  
>  	prerror("OPAL: Trying a CPU re-init with flags: 0x%llx\n",
> flags);
>  
> + again:
>  	lock(&reinit_lock);
>  
>  	for (cpu = first_cpu(); cpu; cpu = next_cpu(cpu)) {
> -		if (cpu == this_cpu())
> +		if (cpu == this_cpu() || cpu->in_reinit)
>  			continue;
>  		if (cpu->state == cpu_state_os) {
> +			unlock(&reinit_lock);
>  			/*
>  			 * That might be a race with return CPU during
> kexec
>  			 * where we are still, wait a bit and try again
>  			 */
>  			for (i = 0; (i < 1000) &&
>  				     (cpu->state == cpu_state_os); i++)
> {
> -				unlock(&reinit_lock);
>  				time_wait_ms(1);
> -				lock(&reinit_lock);
>  			}
>  			if (cpu->state == cpu_state_os) {
>  				prerror("OPAL: CPU 0x%x not in OPAL
> !\n", cpu->pir);
> -				rc = OPAL_WRONG_STATE;
> -				goto bail;
> +				return OPAL_WRONG_STATE;
>  			}
> +			goto again;
>  		}
> +		cpu->in_reinit = true;
>  	}
>  	/*
>  	 * Now we need to mark ourselves "active" or we'll be skipped
>  	 * by the various "for_each_active_..." calls done by
> slw_reinit()
>  	 */
>  	this_cpu()->state = cpu_state_active;
> +	this_cpu()->in_reinit = true;
> +	unlock(&reinit_lock);
>  
>  	/*
>  	 * If the flags affect endianness and we are on P8 DD2 or later,
> then
> @@ -875,10 +883,12 @@ static int64_t opal_reinit_cpus(uint64_t flags)
>  		rc = slw_reinit(flags);
>  
>  	/* And undo the above */
> +	lock(&reinit_lock);
>  	this_cpu()->state = cpu_state_os;
> -
> -bail:
> +	for (cpu = first_cpu(); cpu; cpu = next_cpu(cpu))
> +		cpu->in_reinit = false;
>  	unlock(&reinit_lock);
> +
>  	return rc;
>  }
>  opal_call(OPAL_REINIT_CPUS, opal_reinit_cpus, 1);
> diff --git a/include/cpu.h b/include/cpu.h
> index 59923d5..62f5629 100644
> --- a/include/cpu.h
> +++ b/include/cpu.h
> @@ -60,6 +60,7 @@ struct cpu_thread {
>  	bool				con_need_flush;
>  	bool				in_mcount;
>  	bool				in_poller;
> +	bool				in_reinit;
>  	uint32_t			hbrt_spec_wakeup; /* primary
> only */
>  	uint64_t			save_l2_fir_action1;
>  	uint64_t			current_token;