[Skiboot] [PATCH 16/28] cpu: Fix problem calling time_wait with lock held

Benjamin Herrenschmidt benh at kernel.crashing.org
Thu Jul 7 17:38:32 AEST 2016


On Thu, 2016-07-07 at 17:35 +1000, Michael Neuling wrote:
> > 
> > cpu: Fix problem calling time_wait with lock held
> 
> The problem was it couldn't be called with the lock held?  Maybe this
> should read:
> 
>   cpu: Allow time_wait to be called with lock held

No. It must not be called with a lock held and we were doing it.

So maybe this should be called: cpu: Don't call time_wait with lock
held.

Cheers,
Ben.

> ?
> 
> Other than that
> 
> Acked-by: Michael Neuling <mikey at neuling.org>
> 
> 
> On Thu, 2016-07-07 at 11:50 +1000, Benjamin Herrenschmidt wrote:
> > Also make the locking around re-init safer, properly block the
> > OS from restarting a thread that was caught for re-init.
> > 
> > Signed-off-by: Benjamin Herrenschmidt <benh at kernel.crashing.org>
> 
> 
> > ---
> >  core/cpu.c    | 24 +++++++++++++++++-------
> >  include/cpu.h |  1 +
> >  2 files changed, 18 insertions(+), 7 deletions(-)
> > 
> > diff --git a/core/cpu.c b/core/cpu.c
> > index 8a7a43f..2084150 100644
> > --- a/core/cpu.c
> > +++ b/core/cpu.c
> > @@ -726,6 +726,11 @@ static int64_t opal_start_cpu_thread(uint64_t
> > server_no, uint64_t start_address)
> >  		prerror("OPAL: CPU not active in OPAL !\n");
> >  		return OPAL_WRONG_STATE;
> >  	}
> > +	if (cpu->in_reinit) {
> > +		unlock(&reinit_lock);
> > +		prerror("OPAL: CPU being reinitialized !\n");
> > +		return OPAL_WRONG_STATE;
> > +	}
> >  	job = __cpu_queue_job(cpu, "start_thread",
> >  			      opal_start_thread_job, (void
> > *)start_address,
> >  			      true);
> > @@ -821,34 +826,37 @@ static int64_t opal_reinit_cpus(uint64_t
> > flags)
> >  
> >  	prerror("OPAL: Trying a CPU re-init with flags: 0x%llx\n",
> > flags);
> >  
> > + again:
> >  	lock(&reinit_lock);
> >  
> >  	for (cpu = first_cpu(); cpu; cpu = next_cpu(cpu)) {
> > -		if (cpu == this_cpu())
> > +		if (cpu == this_cpu() || cpu->in_reinit)
> >  			continue;
> >  		if (cpu->state == cpu_state_os) {
> > +			unlock(&reinit_lock);
> >  			/*
> >  			 * That might be a race with return CPU
> > during
> > kexec
> >  			 * where we are still, wait a bit and try
> > again
> >  			 */
> >  			for (i = 0; (i < 1000) &&
> >  				     (cpu->state == cpu_state_os);
> > i++)
> > {
> > -				unlock(&reinit_lock);
> >  				time_wait_ms(1);
> > -				lock(&reinit_lock);
> >  			}
> >  			if (cpu->state == cpu_state_os) {
> >  				prerror("OPAL: CPU 0x%x not in
> > OPAL
> > !\n", cpu->pir);
> > -				rc = OPAL_WRONG_STATE;
> > -				goto bail;
> > +				return OPAL_WRONG_STATE;
> >  			}
> > +			goto again;
> >  		}
> > +		cpu->in_reinit = true;
> >  	}
> >  	/*
> >  	 * Now we need to mark ourselves "active" or we'll be
> > skipped
> >  	 * by the various "for_each_active_..." calls done by
> > slw_reinit()
> >  	 */
> >  	this_cpu()->state = cpu_state_active;
> > +	this_cpu()->in_reinit = true;
> > +	unlock(&reinit_lock);
> >  
> >  	/*
> >  	 * If the flags affect endianness and we are on P8 DD2 or
> > later,
> > then
> > @@ -875,10 +883,12 @@ static int64_t opal_reinit_cpus(uint64_t
> > flags)
> >  		rc = slw_reinit(flags);
> >  
> >  	/* And undo the above */
> > +	lock(&reinit_lock);
> >  	this_cpu()->state = cpu_state_os;
> > -
> > -bail:
> > +	for (cpu = first_cpu(); cpu; cpu = next_cpu(cpu))
> > +		cpu->in_reinit = false;
> >  	unlock(&reinit_lock);
> > +
> >  	return rc;
> >  }
> >  opal_call(OPAL_REINIT_CPUS, opal_reinit_cpus, 1);
> > diff --git a/include/cpu.h b/include/cpu.h
> > index 59923d5..62f5629 100644
> > --- a/include/cpu.h
> > +++ b/include/cpu.h
> > @@ -60,6 +60,7 @@ struct cpu_thread {
> >  	bool				con_need_flush;
> >  	bool				in_mcount;
> >  	bool				in_poller;
> > +	bool				in_reinit;
> >  	uint32_t			hbrt_spec_wakeup; /*
> > primary
> > only */
> >  	uint64_t			save_l2_fir_action1;
> >  	uint64_t			current_token;


More information about the Skiboot mailing list