[PATCH V2 6/6] crypto/nx: Add P9 NX support for 842 compression engine

Haren Myneni haren at linux.vnet.ibm.com
Sat Jul 22 14:16:51 AEST 2017


On 07/17/2017 11:53 PM, Ram Pai wrote:
> On Mon, Jul 17, 2017 at 04:50:38PM -0700, Haren Myneni wrote:
>>
>> This patch adds P9 NX support for 842 compression engine. Virtual
>> Accelerator Switchboard (VAS) is used to access 842 engine on P9.
>>
>> For each NX engine per chip, setup receive window using
>> vas_rx_win_open() which configures RxFIFo with FIFO address, lpid,
>> pid and tid values. This unique (lpid, pid, tid) combination will
>> be used to identify the target engine.
>>
>> For crypto open request, open send window on the NX engine for
>> the corresponding chip / cpu where the open request is executed.
>> This send window will be closed upon crypto close request.
>>
>> NX provides high and normal priority FIFOs. For compression /
>> decompression requests, we use only hight priority FIFOs in kernel.
>>
>> Each NX request will be communicated to VAS using copy/paste
>> instructions with vas_copy_crb() / vas_paste_crb() functions.
>>
>> Signed-off-by: Haren Myneni <haren at us.ibm.com>
>> ---
>>  drivers/crypto/nx/Kconfig          |   1 +
>>  drivers/crypto/nx/nx-842-powernv.c | 369 ++++++++++++++++++++++++++++++++++++-
>>  drivers/crypto/nx/nx-842.c         |   2 +-
>>  3 files changed, 365 insertions(+), 7 deletions(-)
>>
>> diff --git a/drivers/crypto/nx/Kconfig b/drivers/crypto/nx/Kconfig
>> index ad7552a6998c..cd5dda9c48f4 100644
>> --- a/drivers/crypto/nx/Kconfig
>> +++ b/drivers/crypto/nx/Kconfig
>> @@ -38,6 +38,7 @@ config CRYPTO_DEV_NX_COMPRESS_PSERIES
>>  config CRYPTO_DEV_NX_COMPRESS_POWERNV
>>  	tristate "Compression acceleration support on PowerNV platform"
>>  	depends on PPC_POWERNV
>> +	depends on PPC_VAS
>>  	default y
>>  	help
>>  	  Support for PowerPC Nest (NX) compression acceleration. This
>> diff --git a/drivers/crypto/nx/nx-842-powernv.c b/drivers/crypto/nx/nx-842-powernv.c
>> index c0dd4c7e17d3..8d9d21420144 100644
>> --- a/drivers/crypto/nx/nx-842-powernv.c
>> +++ b/drivers/crypto/nx/nx-842-powernv.c
>> @@ -23,6 +23,7 @@
>>  #include <asm/prom.h>
>>  #include <asm/icswx.h>
>>  #include <asm/vas.h>
>> +#include <asm/reg.h>
>>
>>  MODULE_LICENSE("GPL");
>>  MODULE_AUTHOR("Dan Streetman <ddstreet at ieee.org>");
>> @@ -32,6 +33,9 @@ MODULE_ALIAS_CRYPTO("842-nx");
>>
>>  #define WORKMEM_ALIGN	(CRB_ALIGN)
>>  #define CSB_WAIT_MAX	(5000) /* ms */
>> +#define VAS_RETRIES	(10)
>> +/* # of requests allowed per RxFIFO at a time. 0 for unlimited */
>> +#define MAX_CREDITS_PER_RXFIFO	(64)
>>
>>  struct nx842_workmem {
>>  	/* Below fields must be properly aligned */
>> @@ -42,16 +46,27 @@ struct nx842_workmem {
>>
>>  	ktime_t start;
>>
>> +	struct vas_window *txwin;	/* Used with VAS function */
>>  	char padding[WORKMEM_ALIGN]; /* unused, to allow alignment */
>>  } __packed __aligned(WORKMEM_ALIGN);
>>
>>  struct nx842_coproc {
>>  	unsigned int chip_id;
>>  	unsigned int ct;
>> -	unsigned int ci;
>> +	unsigned int ci;	/* Coprocessor instance, used with icswx */
>> +	struct {
>> +		struct vas_window *rxwin;
>> +		int id;
>> +	} vas;
> 
> ci and vas are mutually exclusive. a few bytes could be saved by unionizing them?

We will have few coproc entries - NX engine per chip. 
> 
>>  	struct list_head list;
>>  };
>>
>> +/*
>> + * Send the request to NX engine on the chip for the corresponding CPU
>> + * where the process is executing. Use with VAS function.
>> + */
>> +static DEFINE_PER_CPU(struct nx842_coproc *, coproc_inst);
>> +
>>  /* no cpu hotplug on powernv, so this list never changes after init */
>>  static LIST_HEAD(nx842_coprocs);
>>  static unsigned int nx842_ct;	/* used in icswx function */
>> @@ -513,6 +528,108 @@ static int nx842_exec_icswx(const unsigned char *in, unsigned int inlen,
>>  }
>>
>>  /**
>> + * nx842_exec_vas - compress/decompress data using the 842 algorithm
>> + *
>> + * (De)compression provided by the NX842 coprocessor on IBM PowerNV systems.
>> + * This compresses or decompresses the provided input buffer into the provided
>> + * output buffer.
>> + *
>> + * Upon return from this function @outlen contains the length of the
>> + * output data.  If there is an error then @outlen will be 0 and an
>> + * error will be specified by the return code from this function.
>> + *
>> + * The @workmem buffer should only be used by one function call at a time.
>> + *
>> + * @in: input buffer pointer
>> + * @inlen: input buffer size
>> + * @out: output buffer pointer
>> + * @outlenp: output buffer size pointer
>> + * @workmem: working memory buffer pointer, size determined by
>> + *           nx842_powernv_driver.workmem_size
>> + * @fc: function code, see CCW Function Codes in nx-842.h
>> + *
>> + * Returns:
>> + *   0		Success, output of length @outlenp stored in the buffer
>> + *		at @out
>> + *   -ENODEV	Hardware unavailable
>> + *   -ENOSPC	Output buffer is to small
>> + *   -EMSGSIZE	Input buffer too large
>> + *   -EINVAL	buffer constraints do not fix nx842_constraints
>> + *   -EPROTO	hardware error during operation
>> + *   -ETIMEDOUT	hardware did not complete operation in reasonable time
>> + *   -EINTR	operation was aborted
>> + */
>> +static int nx842_exec_vas(const unsigned char *in, unsigned int inlen,
>> +				  unsigned char *out, unsigned int *outlenp,
>> +				  void *workmem, int fc)
>> +{
>> +	struct coprocessor_request_block *crb;
>> +	struct coprocessor_status_block *csb;
>> +	struct nx842_workmem *wmem;
>> +	struct vas_window *txwin;
>> +	int ret, i = 0;
>> +	u32 ccw;
>> +	unsigned int outlen = *outlenp;
>> +
>> +	wmem = PTR_ALIGN(workmem, WORKMEM_ALIGN);
>> +
>> +	*outlenp = 0;
>> +
>> +	crb = &wmem->crb;
>> +	csb = &crb->csb;
>> +
>> +	ret = nx842_config_crb(in, inlen, out, outlen, wmem);
>> +	if (ret)
>> +		return ret;
>> +
>> +	ccw = 0;
>> +	ccw = SET_FIELD(CCW_FC_842, ccw, fc);
>> +	crb->ccw = cpu_to_be32(ccw);
>> +
>> +	txwin = wmem->txwin;
>> +	/* shoudn't happen, we don't load without a coproc */
>> +	if (!txwin) {
>> +		pr_err_ratelimited("NX-842 coprocessor is not available");
>> +		return -ENODEV;
>> +	}
>> +
>> +	wmem->start = ktime_get();
>> +
>> +	/*
>> +	 * VAS copy CRB into L2 cache. Refer <asm/vas.h>.
>> +	 * @crb, @offset and @first (must be true)
>> +	 */
>> +	vas_copy_crb(crb, 0, 1);
>> +
>> +	/*
>> +	 * VAS paste previously copied CRB to NX.
>> +	 * @txwin, @offset, @last (must be true) and @re is expected/assumed
>> +	 * to be true for NX windows.
>> +	 */
>> +	ret = vas_paste_crb(txwin, 0, 1, 1);
>> +
>> +	/*
>> +	 * Retry copy/paste function for VAS failures.
>> +	 */
>> +	while (ret && (i++ < VAS_RETRIES)) {
>> +		vas_copy_crb(crb, 0, 1);
>> +		wmem->start = ktime_get();
> 
> should this is be before vas_copy_crb() ?

paste instruction is the one which sends request to NX. So added start time just before paste instead of copy. But I will change to as you mentioned. Needed for copy/paste with preemption disable.  
> 
> 
>> +		ret = vas_paste_crb(wmem->txwin, 0, 1, 1);
>> +	}
>> +
>> +	if (ret) {
>> +		pr_err_ratelimited("VAS copy/paste failed\n");
>> +		return ret;
>> +	}
>> +
>> +	ret = wait_for_csb(wmem, csb);
>> +	if (!ret)
>> +		*outlenp = be32_to_cpu(csb->count);
>> +
>> +	return ret;
>> +}
>> +
>> +/**
>>   * nx842_powernv_compress - Compress data using the 842 algorithm
>>   *
>>   * Compression provided by the NX842 coprocessor on IBM PowerNV systems.
>> @@ -576,6 +693,191 @@ static inline void nx842_add_coprocs_list(struct nx842_coproc *coproc,
>>  	list_add(&coproc->list, &nx842_coprocs);
>>  }
>>
>> +/*
>> + * Identify chip ID for each CPU and save coprocesor adddress for the
>> + * corresponding NX engine in percpu coproc_inst.
>> + * coproc_inst is used in crypto_init to open send window on the NX instance
>> + * for the corresponding CPU / chip where the open request is executed.
>> + */
>> +static void nx842_set_per_cpu_coproc(struct nx842_coproc *coproc)
>> +{
>> +	unsigned int i, chip_id;
>> +
>> +	for_each_possible_cpu(i) {
>> +		chip_id = cpu_to_chip_id(i);
>> +
>> +		if (coproc->chip_id == chip_id)
>> +			per_cpu(coproc_inst, i) = coproc;
>> +	}
>> +}
>> +
>> +
>> +static struct vas_window *nx842_alloc_txwin(struct nx842_coproc *coproc)
>> +{
>> +	struct vas_window *txwin = NULL;
>> +	struct vas_tx_win_attr txattr;
>> +
>> +	/*
>> +	 * Kernel requests will be high priority. So open send
>> +	 * windows only for high priority RxFIFO entries.
>> +	 */
>> +	vas_init_tx_win_attr(&txattr, coproc->ct);
>> +	txattr.lpid = 0;	/* lpid is 0 for kernel requests */
>> +	txattr.pid = mfspr(SPRN_PID);
>> +
>> +	/*
>> +	 * Open a VAS send window which is used to send request to NX.
>> +	 */
>> +	txwin = vas_tx_win_open(coproc->vas.id, coproc->ct, &txattr);
>> +	if (IS_ERR(txwin)) {
>> +		pr_err("ibm,nx-842: Can not open TX window: %ld\n",
>> +				PTR_ERR(txwin));
>> +		return NULL;
>> +	}
>> +
>> +	return txwin;
>> +}
>> +
>> +static int __init vas_cfg_coproc_info(struct device_node *dn, int chip_id,
>> +					int vasid)
>> +{
>> +	struct vas_window *rxwin = NULL;
>> +	struct vas_rx_win_attr rxattr;
>> +	struct nx842_coproc *coproc;
>> +	u32 lpid, pid, tid, fifo_size;
>> +	u64 rx_fifo;
>> +	const char *priority;
>> +	int ret;
>> +
>> +	ret = of_property_read_u64(dn, "rx-fifo-address", (void *)&rx_fifo);
>> +	if (ret) {
>> +		pr_err("Missing rx-fifo-address property\n");
>> +		return ret;
>> +	}
>> +
>> +	ret = of_property_read_u32(dn, "rx-fifo-size", &fifo_size);
>> +	if (ret) {
>> +		pr_err("Missing rx-fifo-size property\n");
>> +		return ret;
>> +	}
>> +
>> +	ret = of_property_read_u32(dn, "lpid", &lpid);
>> +	if (ret) {
>> +		pr_err("Missing lpid property\n");
>> +		return ret;
>> +	}
>> +
>> +	ret = of_property_read_u32(dn, "pid", &pid);
>> +	if (ret) {
>> +		pr_err("Missing pid property\n");
>> +		return ret;
>> +	}
>> +
>> +	ret = of_property_read_u32(dn, "tid", &tid);
>> +	if (ret) {
>> +		pr_err("Missing tid property\n");
>> +		return ret;
>> +	}
>> +
>> +	ret = of_property_read_string(dn, "priority", &priority);
>> +	if (ret) {
>> +		pr_err("Missing priority property\n");
>> +		return ret;
>> +	}
>> +
>> +	coproc = kzalloc(sizeof(*coproc), GFP_KERNEL);
>> +	if (!coproc)
>> +		return -ENOMEM;
>> +
>> +	if (!strcmp(priority, "High"))
>> +		coproc->ct = VAS_COP_TYPE_842_HIPRI;
>> +	else if (!strcmp(priority, "Normal"))
>> +		coproc->ct = VAS_COP_TYPE_842;
>> +	else {
>> +		pr_err("Invalid RxFIFO priority value\n");
>> +		ret =  -EINVAL;
>> +		goto err_out;
>> +	}
>> +
>> +	vas_init_rx_win_attr(&rxattr, coproc->ct);
>> +	rxattr.rx_fifo = (void *)rx_fifo;
>> +	rxattr.rx_fifo_size = fifo_size;
>> +	rxattr.lnotify_lpid = lpid;
>> +	rxattr.lnotify_pid = pid;
>> +	rxattr.lnotify_tid = tid;
>> +	rxattr.wcreds_max = MAX_CREDITS_PER_RXFIFO;
>> +
>> +	/*
>> +	 * Open a VAS receice window which is used to configure RxFIFO
>> +	 * for NX.
>> +	 */
>> +	rxwin = vas_rx_win_open(vasid, coproc->ct, &rxattr);
>> +	if (IS_ERR(rxwin)) {
>> +		ret = PTR_ERR(rxwin);
>> +		pr_err("setting RxFIFO with VAS failed: %d\n",
>> +			ret);
>> +		goto err_out;
>> +	}
>> +
>> +	coproc->vas.rxwin = rxwin;
>> +	coproc->vas.id = vasid;
>> +	nx842_add_coprocs_list(coproc, chip_id);
>> +
>> +	/*
>> +	 * Kernel requests use only high priority FIFOs. So save coproc
>> +	 * info in percpu coproc_inst which will be used to open send
>> +	 * windows for crypto open requests later.
>> +	 */
>> +	if (coproc->ct == VAS_COP_TYPE_842_HIPRI)
>> +		nx842_set_per_cpu_coproc(coproc);
>> +
>> +	return 0;
>> +
>> +err_out:
>> +	kfree(coproc);
>> +	return ret;
>> +}
>> +
>> +
>> +static int __init nx842_powernv_probe_vas(struct device_node *pn)
>> +{
>> +	struct device_node *dn;
>> +	int chip_id, vasid, rc = 0;
>> +
>> +	chip_id = of_get_ibm_chip_id(pn);
>> +	if (chip_id < 0) {
>> +		pr_err("ibm,chip-id missing\n");
>> +		return -EINVAL;
>> +	}
>> +
>> +	dn = of_find_compatible_node(pn, NULL, "ibm,power9-vas-x");
>> +
>> +	if (!dn) {
>> +		pr_err("Missing VAS device node\n");
>> +		return -EINVAL;
>> +	}
>> +
>> +	if (of_property_read_u32(dn, "ibm,vas-id", &vasid)) {
>> +		pr_err("Missing ibm,vas-id device property\n");
>> +		of_node_put(dn);
>> +		return -EINVAL;
>> +	}
>> +
>> +	of_node_put(dn);
>> +
>> +	for_each_child_of_node(pn, dn) {
>> +		if (of_device_is_compatible(dn, "ibm,p9-nx-842")) {
>> +			rc = vas_cfg_coproc_info(dn, chip_id, vasid);
>> +			if (rc) {
>> +				of_node_put(dn);
>> +				return rc;
>> +			}
>> +		}
>> +	}
>> +
>> +	return 0;
>> +}
>> +
>>  static int __init nx842_powernv_probe(struct device_node *dn)
>>  {
>>  	struct nx842_coproc *coproc;
>> @@ -622,6 +924,9 @@ static void nx842_delete_coprocs(void)
>>  	struct nx842_coproc *coproc, *n;
>>
>>  	list_for_each_entry_safe(coproc, n, &nx842_coprocs, list) {
>> +		if (coproc->vas.rxwin)
>> +			vas_win_close(coproc->vas.rxwin);
>> +
>>  		list_del(&coproc->list);
>>  		kfree(coproc);
>>  	}
>> @@ -643,6 +948,44 @@ static struct nx842_driver nx842_powernv_driver = {
>>  	.decompress =	nx842_powernv_decompress,
>>  };
>>
>> +static int nx842_powernv_crypto_init_vas(struct crypto_tfm *tfm)
>> +{
>> +	struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm);
>> +	struct nx842_workmem *wmem;
>> +	struct nx842_coproc *coproc;
>> +	int ret;
>> +
>> +	ret = nx842_crypto_init(tfm, &nx842_powernv_driver);
>> +
>> +	if (ret)
>> +		return ret;
>> +
>> +	wmem = (struct nx842_workmem *)ctx->wmem;
> 
> I think, you want to align wmem to WORKMEM_ALIGN boundary

Correct, Missed it. Will change in next version for init/exit(). Thanks for review. 
.
> 
>> +	coproc = per_cpu(coproc_inst, smp_processor_id());
>> +
>> +	ret = -EINVAL;
>> +	if (coproc && coproc->vas.rxwin) {
>> +		wmem->txwin = nx842_alloc_txwin(coproc);
>> +		if (!IS_ERR(wmem->txwin))
>> +			return 0;
>> +
>> +		ret = PTR_ERR(wmem->txwin);
>> +	}
>> +
>> +	return ret;
>> +}
>> +
>> +void nx842_powernv_crypto_exit_vas(struct crypto_tfm *tfm)
>> +{
>> +	struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm);
>> +	struct nx842_workmem *wmem = (struct nx842_workmem *)ctx->wmem;
> 
> here as well.
> 
>> +
>> +	if (wmem && wmem->txwin)
>> +		vas_win_close(wmem->txwin);
>> +
>> +	nx842_crypto_exit(tfm);
>> +}
>> +
>>  static int nx842_powernv_crypto_init(struct crypto_tfm *tfm)
>>  {
>>  	return nx842_crypto_init(tfm, &nx842_powernv_driver);
>> @@ -676,13 +1019,27 @@ static __init int nx842_powernv_init(void)
>>  	BUILD_BUG_ON(DDE_BUFFER_ALIGN % DDE_BUFFER_SIZE_MULT);
>>  	BUILD_BUG_ON(DDE_BUFFER_SIZE_MULT % DDE_BUFFER_LAST_MULT);
>>
>> -	for_each_compatible_node(dn, NULL, "ibm,power-nx")
>> -		nx842_powernv_probe(dn);
>> +	for_each_compatible_node(dn, NULL, "ibm,power9-nx") {
>> +		ret = nx842_powernv_probe_vas(dn);
>> +		if (ret) {
>> +			nx842_delete_coprocs();
>> +			return ret;
>> +		}
>> +	}
>>
>> -	if (!nx842_ct)
>> -		return -ENODEV;
>> +	if (list_empty(&nx842_coprocs)) {
>> +		for_each_compatible_node(dn, NULL, "ibm,power-nx")
>> +			nx842_powernv_probe(dn);
>> +
>> +		if (!nx842_ct)
>> +			return -ENODEV;
>>
>> -	nx842_powernv_exec = nx842_exec_icswx;
>> +		nx842_powernv_exec = nx842_exec_icswx;
>> +	} else {
>> +		nx842_powernv_exec = nx842_exec_vas;
>> +		nx842_powernv_alg.cra_init = nx842_powernv_crypto_init_vas;
>> +		nx842_powernv_alg.cra_exit = nx842_powernv_crypto_exit_vas;
>> +	}
>>
>>  	ret = crypto_register_alg(&nx842_powernv_alg);
>>  	if (ret) {
>> diff --git a/drivers/crypto/nx/nx-842.c b/drivers/crypto/nx/nx-842.c
>> index d94e25df503b..da3cb8c35ec7 100644
>> --- a/drivers/crypto/nx/nx-842.c
>> +++ b/drivers/crypto/nx/nx-842.c
>> @@ -116,7 +116,7 @@ int nx842_crypto_init(struct crypto_tfm *tfm, struct nx842_driver *driver)
>>
>>  	spin_lock_init(&ctx->lock);
>>  	ctx->driver = driver;
>> -	ctx->wmem = kmalloc(driver->workmem_size, GFP_KERNEL);
>> +	ctx->wmem = kzalloc(driver->workmem_size, GFP_KERNEL);
>>  	ctx->sbounce = (u8 *)__get_free_pages(GFP_KERNEL, BOUNCE_BUFFER_ORDER);
>>  	ctx->dbounce = (u8 *)__get_free_pages(GFP_KERNEL, BOUNCE_BUFFER_ORDER);
>>  	if (!ctx->wmem || !ctx->sbounce || !ctx->dbounce) {
>> -- 
>> 2.11.0
>>
>>
> 



More information about the Linuxppc-dev mailing list