[PATCH v3 04/10] powerpc/pseries/vas: Reopen windows with DLPAR core add

Haren Myneni haren at linux.ibm.com
Wed Feb 16 12:38:04 AEDT 2022


On Mon, 2022-02-14 at 13:08 +1000, Nicholas Piggin wrote:
> Excerpts from Haren Myneni's message of January 22, 2022 5:56 am:
> > VAS windows can be closed in the hypervisor due to lost credits
> > when the core is removed. If these credits are available later
> > for core add, reopen these windows and set them active. When the
> > kernel sees page fault on the paste address, it creates new mapping
> > on the new paste address. Then the user space can continue to use
> > these windows and send HW compression requests to NX successfully.
> 
> Any reason to put this before the close windows patch? It would be
> more logical to put it afterwards AFAIKS.

reconfig_open_windows() is just to reopen and set the status flag when
windows are closed. I thought adding handler first before closing /
unmap helps during git bisect. 

I can change. 

> 
> > Signed-off-by: Haren Myneni <haren at linux.ibm.com>
> > ---
> >  arch/powerpc/include/asm/vas.h          |  16 +++
> >  arch/powerpc/platforms/book3s/vas-api.c |   1 +
> >  arch/powerpc/platforms/pseries/vas.c    | 144
> > ++++++++++++++++++++++++
> >  arch/powerpc/platforms/pseries/vas.h    |   8 +-
> >  4 files changed, 163 insertions(+), 6 deletions(-)
> > 
> > diff --git a/arch/powerpc/include/asm/vas.h
> > b/arch/powerpc/include/asm/vas.h
> > index 57573d9c1e09..f1efe86563cc 100644
> > --- a/arch/powerpc/include/asm/vas.h
> > +++ b/arch/powerpc/include/asm/vas.h
> > @@ -29,6 +29,19 @@
> >  #define VAS_THRESH_FIFO_GT_QTR_FULL	2
> >  #define VAS_THRESH_FIFO_GT_EIGHTH_FULL	3
> >  
> > +/*
> > + * VAS window status
> > + */
> > +#define VAS_WIN_ACTIVE		0x0	/* Used in platform
> > independent */
> > +					/* vas mmap() */
> > +/* The hypervisor returns these values */
> > +#define VAS_WIN_CLOSED		0x00000001
> > +#define VAS_WIN_INACTIVE	0x00000002 /* Inactive due to HW
> > failure */
> > +#define VAS_WIN_MOD_IN_PROCESS	0x00000003 /* Process of being
> > modified, */
> 
> While you're moving these and adding a comment, it would be good to 
> list what hcalls they are relevant to. H_QUERY_VAS_WINDOW (which is
> not
> used anywhere yet?) These are also a 1-byte field, so '0x00', '0x01'
> etc
> would be more appropriate.

Yes, these status bits are assigned by the hypervisor and we are not
using / supporting them right now. I just list them as defined in PAPR
NX HCALLs.

For example: OS can modify the existing window with modify HCALL when
the window is used. During this time, the hypervisor return this status
with window query HCALL. 
 
> 
> > +					   /* deallocated, or quiesced
> > */
> > +/* Linux status bits */
> > +#define VAS_WIN_NO_CRED_CLOSE	0x00000004 /* Window is closed
> > due to */
> > +					   /* lost credit */
> 
> This is mixing a user defined bit field with hcall API value field.
> You
> also AFAIKS as yet don't fill in the hypervisor status anywhere.
> 
> I would make this it's own field entirely. A boolean would be nice,
> if
> possible.

Yes, HV status bits are not used here. 

In case if the window status is reported thorugh sysfs in future,
thought that it will be simpler to have one status flag. 

I can add 'hv_status' for the hypervisor status flag in pseries_vas-
window struct and 'status' for linux in vas_window struct. 

We also need one more status for migration. So boolean may not be used.

> 
> >  /*
> >   * Get/Set bit fields
> >   */
> > @@ -59,6 +72,8 @@ struct vas_user_win_ref {
> >  	struct pid *pid;	/* PID of owner */
> >  	struct pid *tgid;	/* Thread group ID of owner */
> >  	struct mm_struct *mm;	/* Linux process mm_struct */
> > +	struct mutex mmap_mutex;	/* protects paste address mmap() */
> > +					/* with DLPAR close/open
> > windows */
> >  };
> >  
> >  /*
> > @@ -67,6 +82,7 @@ struct vas_user_win_ref {
> >  struct vas_window {
> >  	u32 winid;
> >  	u32 wcreds_max;	/* Window credits */
> > +	u32 status;
> >  	enum vas_cop_type cop;
> >  	struct vas_user_win_ref task_ref;
> >  	char *dbgname;
> > diff --git a/arch/powerpc/platforms/book3s/vas-api.c
> > b/arch/powerpc/platforms/book3s/vas-api.c
> > index 4d82c92ddd52..2b0ced611f32 100644
> > --- a/arch/powerpc/platforms/book3s/vas-api.c
> > +++ b/arch/powerpc/platforms/book3s/vas-api.c
> > @@ -316,6 +316,7 @@ static int coproc_ioc_tx_win_open(struct file
> > *fp, unsigned long arg)
> >  		return PTR_ERR(txwin);
> >  	}
> >  
> > +	mutex_init(&txwin->task_ref.mmap_mutex);
> >  	cp_inst->txwin = txwin;
> >  
> >  	return 0;
> > diff --git a/arch/powerpc/platforms/pseries/vas.c
> > b/arch/powerpc/platforms/pseries/vas.c
> > index 2ef56157634f..d9ff73d7704d 100644
> > --- a/arch/powerpc/platforms/pseries/vas.c
> > +++ b/arch/powerpc/platforms/pseries/vas.c
> > @@ -501,6 +501,7 @@ static int __init get_vas_capabilities(u8 feat,
> > enum vas_cop_feat_type type,
> >  	memset(vcaps, 0, sizeof(*vcaps));
> >  	INIT_LIST_HEAD(&vcaps->list);
> >  
> > +	vcaps->feat = feat;
> >  	caps = &vcaps->caps;
> >  
> >  	rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, feat,
> > @@ -539,6 +540,145 @@ static int __init get_vas_capabilities(u8
> > feat, enum vas_cop_feat_type type,
> >  	return 0;
> >  }
> >  
> > +/*
> > + * VAS windows can be closed due to lost credits when the core is
> > + * removed. So reopen them if credits are available due to DLPAR
> > + * core add and set the window active status. When NX sees the
> > page
> > + * fault on the unmapped paste address, the kernel handles the
> > fault
> > + * by setting the remapping to new paste address if the window is
> > + * active.
> > + */
> > +static int reconfig_open_windows(struct vas_caps *vcaps, int
> > creds)
> > +{
> > +	long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID};
> > +	struct vas_cop_feat_caps *caps = &vcaps->caps;
> > +	struct pseries_vas_window *win = NULL, *tmp;
> > +	int rc, mv_ents = 0;
> > +
> > +	/*
> > +	 * Nothing to do if there are no closed windows.
> > +	 */
> > +	if (!vcaps->close_wins)
> > +		return 0;
> > +
> > +	/*
> > +	 * For the core removal, the hypervisor reduces the credits
> > +	 * assigned to the LPAR and the kernel closes VAS windows
> > +	 * in the hypervisor depends on reduced credits. The kernel
> > +	 * uses LIFO (the last windows that are opened will be closed
> > +	 * first) and expects to open in the same order when credits
> > +	 * are available.
> > +	 * For example, 40 windows are closed when the LPAR lost 2
> > cores
> > +	 * (dedicated). If 1 core is added, this LPAR can have 20 more
> > +	 * credits. It means the kernel can reopen 20 windows. So move
> > +	 * 20 entries in the VAS windows lost and reopen next 20
> > windows.
> > +	 */
> > +	if (vcaps->close_wins > creds)
> > +		mv_ents = vcaps->close_wins - creds;
> > +
> > +	list_for_each_entry_safe(win, tmp, &vcaps->list, win_list) {
> > +		if (!mv_ents)
> > +			break;
> > +
> > +		mv_ents--;
> > +	}
> > +
> > +	list_for_each_entry_safe_from(win, tmp, &vcaps->list, win_list)
> > {
> > +		/*
> > +		 * Nothing to do on this window if it is not closed
> > +		 * with VAS_WIN_NO_CRED_CLOSE
> > +		 */
> > +		if (!(win->vas_win.status & VAS_WIN_NO_CRED_CLOSE))
> > +			continue;
> > +
> > +		rc = allocate_setup_window(win, (u64 *)&domain[0],
> > +					   caps->win_type);
> > +		if (rc)
> > +			return rc;
> > +
> > +		rc = h_modify_vas_window(win);
> > +		if (rc)
> > +			goto out;
> > +
> > +		mutex_lock(&win->vas_win.task_ref.mmap_mutex);
> > +		/*
> > +		 * Set window status to active
> > +		 */
> > +		win->vas_win.status &= ~VAS_WIN_NO_CRED_CLOSE;
> > +		mutex_unlock(&win->vas_win.task_ref.mmap_mutex);
> 
> What's the mutex protecting? If status can change, what happens if
> it 
> changed between checking it above and this point?

We need this mutex to protect status flag between
reconfig_close/open_windows() and VAS mmap handler. 
 We hold pseries_mutex() before calling recnfig_close/popen_windows(),
so not an issue here.

> 
> > +		win->win_type = caps->win_type;
> > +		if (!--vcaps->close_wins)
> > +			break;
> > +	}
> > +
> > +	return 0;
> > +out:
> > +	/*
> > +	 * Window modify HCALL failed. So close the window to the
> > +	 * hypervisor and return.
> > +	 */
> > +	free_irq_setup(win);
> > +	h_deallocate_vas_window(win->vas_win.winid);
> > +	return rc;
> > +}
> > +
> > +/*
> > + * Get new VAS capabilities when the core add/removal
> > configuration
> > + * changes. Reconfig window configurations based on the credits
> > + * availability from this new capabilities.
> > + */
> > +static int vas_reconfig_capabilties(u8 type)
> > +{
> > +	struct hv_vas_cop_feat_caps *hv_caps;
> > +	struct vas_cop_feat_caps *caps;
> > +	int lpar_creds, new_creds;
> > +	struct vas_caps *vcaps;
> > +	int rc = 0;
> > +
> > +	if (type >= VAS_MAX_FEAT_TYPE) {
> > +		pr_err("Invalid credit type %d\n", type);
> > +		return -EINVAL;
> > +	}
> > +
> > +	vcaps = &vascaps[type];
> > +	caps = &vcaps->caps;
> > +
> > +	hv_caps = kmalloc(sizeof(*hv_caps), GFP_KERNEL);
> > +	if (!hv_caps)
> > +		return -ENOMEM;
> > +
> > +	mutex_lock(&vas_pseries_mutex);
> > +	rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, vcaps-
> > >feat,
> > +				      (u64)virt_to_phys(hv_caps));
> > +	if (rc)
> > +		goto out;
> > +
> > +	new_creds = be16_to_cpu(hv_caps->target_lpar_creds);
> > +
> > +	lpar_creds = atomic_read(&caps->target_creds);
> 
> NBD but it would be slightly clearer that new_creds is the absolute 
> value not the number of new credits if you call these two
> new_nr_creds and old_nr_creds or prev_nr_creds.

Sure will change.
> 
> > +
> > +	atomic_set(&caps->target_creds, new_creds);
> > +	/*
> > +	 * The total number of available credits may be decreased or
> > +	 * inceased with DLPAR operation. Means some windows have to be
> > +	 * closed / reopened. Hold the vas_pseries_mutex so that the
> > +	 * the user space can not open new windows.
> > +	 */
> > +	if (lpar_creds <  new_creds) {
> > +		/*
> > +		 * If the existing target credits is less than the new
> > +		 * target, reopen windows if they are closed due to
> > +		 * the previous DLPAR (core removal).
> > +		 */
> > +		rc = reconfig_open_windows(vcaps, new_creds -
> > lpar_creds);
> > +	}
> > +
> > +out:
> > +	mutex_unlock(&vas_pseries_mutex);
> > +	kfree(hv_caps);
> > +	return rc;
> > +}
> > +
> >  /*
> >   * Total number of default credits available (target_credits)
> >   * in LPAR depends on number of cores configured. It varies based
> > on
> > @@ -565,6 +705,10 @@ static int pseries_vas_notifier(struct
> > notifier_block *nb,
> >  	if (!intserv)
> >  		return NOTIFY_OK;
> >  
> > +	rc = vas_reconfig_capabilties(VAS_GZIP_DEF_FEAT_TYPE);
> > +	if (rc)
> > +		pr_err("Failed reconfig VAS capabilities with
> > DLPAR\n");
> > +
> >  	return rc;
> >  }
> >  
> > diff --git a/arch/powerpc/platforms/pseries/vas.h
> > b/arch/powerpc/platforms/pseries/vas.h
> > index 0538760d13be..45b62565955b 100644
> > --- a/arch/powerpc/platforms/pseries/vas.h
> > +++ b/arch/powerpc/platforms/pseries/vas.h
> > @@ -21,12 +21,6 @@
> >  #define VAS_MOD_WIN_FLAGS	(VAS_MOD_WIN_JOBS_KILL | VAS_MOD_WIN_DR
> > | \
> >  				VAS_MOD_WIN_PR | VAS_MOD_WIN_SF)
> >  
> > -#define VAS_WIN_ACTIVE		0x0
> > -#define VAS_WIN_CLOSED		0x1
> > -#define VAS_WIN_INACTIVE	0x2	/* Inactive due to HW failure */
> > -/* Process of being modified, deallocated, or quiesced */
> > -#define VAS_WIN_MOD_IN_PROCESS	0x3
> > -
> >  #define VAS_COPY_PASTE_USER_MODE	0x00000001
> >  #define VAS_COP_OP_USER_MODE		0x00000010
> >  
> > @@ -84,6 +78,8 @@ struct vas_cop_feat_caps {
> >  struct vas_caps {
> >  	struct vas_cop_feat_caps caps;
> >  	struct list_head list;	/* List of open windows */
> > +	int close_wins;		/* closed windows in the hypervisor
> > for DLPAR */
> 
> 'nr_closed_wins'
> 
> This does not look like a capability. Although I guess neither do
> some
> of the fields (used_creds) in vas_cop_feat_caps. Why not at least put
> this there with the rest of those credits fields?

We can add used_credits() here since it is not part of HV capability.
It was included in previous pathes, So I did not change, 

Thanks
Haren

> 
> Thanks,
> Nick
> 
> > +	u8 feat;		/* Feature type */
> >  };
> >  
> >  /*
> > -- 
> > 2.27.0
> > 
> > 
> > 



More information about the Linuxppc-dev mailing list