[PATCH 5/5] crypto: talitos: Add software backlog queue handling
Horia Geantă
horia.geanta at freescale.com
Wed Feb 25 05:21:52 AEDT 2015
On 2/20/2015 6:21 PM, Martin Hicks wrote:
> I was running into situations where the hardware FIFO was filling up, and
> the code was returning EAGAIN to dm-crypt and just dropping the submitted
> crypto request.
>
> This adds support in talitos for a software backlog queue. When requests
> can't be queued to the hardware immediately EBUSY is returned. The queued
> requests are dispatched to the hardware in received order as hardware FIFO
> slots become available.
>
> Signed-off-by: Martin Hicks <mort at bork.org>
Hi Martin,
Thanks for the effort!
Indeed we noticed that talitos (and caam) don't play nicely with
dm-crypt, lacking a backlog mechanism.
Please run checkpatch --strict and fix the errors, warnings.
> ---
> drivers/crypto/talitos.c | 92 +++++++++++++++++++++++++++++++++++-----------
> drivers/crypto/talitos.h | 3 ++
> 2 files changed, 74 insertions(+), 21 deletions(-)
>
> diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
> index d3472be..226654c 100644
> --- a/drivers/crypto/talitos.c
> +++ b/drivers/crypto/talitos.c
> @@ -183,43 +183,72 @@ static int init_device(struct device *dev)
> }
>
> /**
> - * talitos_submit - submits a descriptor to the device for processing
> + * talitos_handle_queue - performs submissions either of new descriptors
> + * or ones waiting in the queue backlog.
> * @dev: the SEC device to be used
> * @ch: the SEC device channel to be used
> - * @edesc: the descriptor to be processed by the device
> - * @context: a handle for use by caller (optional)
The "context" kernel-doc should have been removed in patch 4/5.
> + * @edesc: the descriptor to be processed by the device (optional)
> *
> * desc must contain valid dma-mapped (bus physical) address pointers.
> * callback must check err and feedback in descriptor header
> - * for device processing status.
> + * for device processing status upon completion.
> */
> -int talitos_submit(struct device *dev, int ch, struct talitos_edesc *edesc)
> +int talitos_handle_queue(struct device *dev, int ch, struct talitos_edesc *edesc)
> {
> struct talitos_private *priv = dev_get_drvdata(dev);
> - struct talitos_request *request = &edesc->req;
> + struct talitos_request *request, *orig_request = NULL;
> + struct crypto_async_request *async_req;
> unsigned long flags;
> int head;
> + int ret = -EINPROGRESS;
>
> spin_lock_irqsave(&priv->chan[ch].head_lock, flags);
>
> + if (edesc) {
> + orig_request = &edesc->req;
> + crypto_enqueue_request(&priv->chan[ch].queue, &orig_request->base);
> + }
The request goes through the SW queue even if there are empty slots in
the HW queue, doing unnecessary crypto_queue_encrypt() and
crypto_queue_decrypt(). Trying to use the HW queue first would be better.
> +
> +flush_another:
> + if (priv->chan[ch].queue.qlen == 0) {
> + spin_unlock_irqrestore(&priv->chan[ch].head_lock, flags);
> + return 0;
> + }
> +
> if (!atomic_inc_not_zero(&priv->chan[ch].submit_count)) {
> /* h/w fifo is full */
> spin_unlock_irqrestore(&priv->chan[ch].head_lock, flags);
> - return -EAGAIN;
> + return -EBUSY;
> }
>
> - head = priv->chan[ch].head;
> + /* Dequeue the oldest request */
> + async_req = crypto_dequeue_request(&priv->chan[ch].queue);
> +
> + request = container_of(async_req, struct talitos_request, base);
> request->dma_desc = dma_map_single(dev, request->desc,
> sizeof(*request->desc),
> DMA_BIDIRECTIONAL);
>
> /* increment fifo head */
> + head = priv->chan[ch].head;
> priv->chan[ch].head = (priv->chan[ch].head + 1) & (priv->fifo_len - 1);
>
> - smp_wmb();
> - priv->chan[ch].fifo[head] = request;
> + spin_unlock_irqrestore(&priv->chan[ch].head_lock, flags);
> +
> + /*
> + * Mark a backlogged request as in-progress, return EBUSY because
> + * the original request that was submitted is backlogged.
s/is backlogged/is backlogged or dropped
Original request will not be enqueued by crypto_queue_enqueue() if the
CRYPTO_TFM_REQ_MAY_BACKLOG flag is not set (since SW queue is for
backlog only) - that's the case for IPsec requests.
> + */
> + if (request != orig_request) {
> + struct crypto_async_request *areq = request->context;
> + areq->complete(areq, -EINPROGRESS);
> + ret = -EBUSY;
> + }
> +
> + spin_lock_irqsave(&priv->chan[ch].head_lock, flags);
>
> /* GO! */
> + priv->chan[ch].fifo[head] = request;
> wmb();
> out_be32(priv->chan[ch].reg + TALITOS_FF,
> upper_32_bits(request->dma_desc));
> @@ -228,9 +257,18 @@ int talitos_submit(struct device *dev, int ch, struct talitos_edesc *edesc)
>
> spin_unlock_irqrestore(&priv->chan[ch].head_lock, flags);
>
> - return -EINPROGRESS;
> + /*
> + * When handling the queue via the completion path, queue more
> + * requests if the hardware has room.
> + */
> + if (!edesc) {
> + spin_lock_irqsave(&priv->chan[ch].head_lock, flags);
> + goto flush_another;
> + }
This is better - avoids releasing and reacquiring the lock:
if (!edesc) {
goto flush_another;
}
spin_unlock_irqrestore(&priv->chan[ch].head_lock, flags);
> +
> + return ret;
> }
> -EXPORT_SYMBOL(talitos_submit);
> +EXPORT_SYMBOL(talitos_handle_queue);
>
> /*
> * process what was done, notify callback of error if not
> @@ -284,6 +322,8 @@ static void flush_channel(struct device *dev, int ch, int error, int reset_ch)
> }
>
> spin_unlock_irqrestore(&priv->chan[ch].tail_lock, flags);
> +
> + talitos_handle_queue(dev, ch, NULL);
> }
>
> /*
> @@ -1038,8 +1078,8 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq,
> edesc->req.callback = callback;
> edesc->req.context = areq;
>
> - ret = talitos_submit(dev, ctx->ch, edesc);
> - if (ret != -EINPROGRESS) {
> + ret = talitos_handle_queue(dev, ctx->ch, edesc);
> + if (ret != -EINPROGRESS && ret != -EBUSY) {
Again, factor in CRYPTO_TFM_REQ_MAY_BACKLOG.
Only when talitos_handle_queue() returns -EBUSY *and*
CRYPTO_TFM_REQ_MAY_BACKLOG is set, the request is backlogged.
Thus the 2nd condition should be:
!(ret == -EBUSY && areq->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)
Other places need similar fix.
> ipsec_esp_unmap(dev, edesc, areq);
> kfree(edesc);
> }
> @@ -1080,6 +1120,7 @@ static struct talitos_edesc *talitos_edesc_alloc(struct device *dev,
> unsigned int ivsize,
> int icv_stashing,
> u32 cryptoflags,
> + struct crypto_async_request *areq,
> bool encrypt)
> {
> struct talitos_edesc *edesc;
> @@ -1170,6 +1211,8 @@ static struct talitos_edesc *talitos_edesc_alloc(struct device *dev,
> edesc->dma_len,
> DMA_BIDIRECTIONAL);
> edesc->req.desc = &edesc->desc;
> + /* A copy of the crypto_async_request to use the crypto_queue backlog */
> + memcpy(&edesc->req.base, areq, sizeof(struct crypto_async_request));
Why not have a
struct crypto_async_request *base;
instead of
struct crypto_async_request base;
in talitos_request structure?
This way:
1. memcopy above is avoided
2. talitos_request structure gets smaller - remember that
talitos_request is now embedded in talitos_edesc structure, which gets
kmalloc-ed for every request
That would be similar to previous driver behaviour.
Caller is expected not to destroy the request if the return code from
the Crypto API / backend driver is -EINPROGRESS or -EBUSY (when
MAY_BACKLOG flag is set).
>
> return edesc;
> }
> @@ -1184,7 +1227,7 @@ static struct talitos_edesc *aead_edesc_alloc(struct aead_request *areq, u8 *iv,
> return talitos_edesc_alloc(ctx->dev, areq->assoc, areq->src, areq->dst,
> iv, areq->assoclen, areq->cryptlen,
> ctx->authsize, ivsize, icv_stashing,
> - areq->base.flags, encrypt);
> + areq->base.flags, &areq->base, encrypt);
> }
>
> static int aead_encrypt(struct aead_request *req)
> @@ -1413,8 +1456,8 @@ static int common_nonsnoop(struct talitos_edesc *edesc,
> edesc->req.callback = callback;
> edesc->req.context = areq;
>
> - ret = talitos_submit(dev, ctx->ch, edesc);
> - if (ret != -EINPROGRESS) {
> + ret = talitos_handle_queue(dev, ctx->ch, edesc);
> + if (ret != -EINPROGRESS && ret != -EBUSY) {
> common_nonsnoop_unmap(dev, edesc, areq);
> kfree(edesc);
> }
> @@ -1430,7 +1473,7 @@ static struct talitos_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request *
>
> return talitos_edesc_alloc(ctx->dev, NULL, areq->src, areq->dst,
> areq->info, 0, areq->nbytes, 0, ivsize, 0,
> - areq->base.flags, encrypt);
> + areq->base.flags, &areq->base, encrypt);
> }
>
> static int ablkcipher_encrypt(struct ablkcipher_request *areq)
> @@ -1596,8 +1639,8 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc,
> edesc->req.callback = callback;
> edesc->req.context = areq;
>
> - ret = talitos_submit(dev, ctx->ch, edesc);
> - if (ret != -EINPROGRESS) {
> + ret = talitos_handle_queue(dev, ctx->ch, edesc);
> + if (ret != -EINPROGRESS && ret != -EBUSY) {
> common_nonsnoop_hash_unmap(dev, edesc, areq);
> kfree(edesc);
> }
> @@ -1612,7 +1655,7 @@ static struct talitos_edesc *ahash_edesc_alloc(struct ahash_request *areq,
> struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
>
> return talitos_edesc_alloc(ctx->dev, NULL, req_ctx->psrc, NULL, NULL, 0,
> - nbytes, 0, 0, 0, areq->base.flags, false);
> + nbytes, 0, 0, 0, areq->base.flags, &areq->base, false);
> }
>
> static int ahash_init(struct ahash_request *areq)
> @@ -2690,6 +2733,13 @@ static int talitos_probe(struct platform_device *ofdev)
> }
>
> atomic_set(&priv->chan[i].submit_count, -priv->chfifo_len);
> +
> + /*
> + * The crypto_queue is used to manage the backlog only. While
> + * the hardware FIFO has space requests are dispatched
> + * immediately.
> + */
> + crypto_init_queue(&priv->chan[i].queue, 0);
> }
>
> dma_set_mask(dev, DMA_BIT_MASK(36));
> diff --git a/drivers/crypto/talitos.h b/drivers/crypto/talitos.h
> index 91faa76..a6f73e2 100644
> --- a/drivers/crypto/talitos.h
> +++ b/drivers/crypto/talitos.h
> @@ -65,6 +65,7 @@ struct talitos_desc {
> * @context: caller context (optional)
> */
> struct talitos_request {
> + struct crypto_async_request base;
> struct talitos_desc *desc;
> dma_addr_t dma_desc;
> void (*callback) (struct device *dev, struct talitos_desc *desc,
> @@ -91,6 +92,8 @@ struct talitos_channel {
> spinlock_t tail_lock ____cacheline_aligned;
> /* index to next in-progress/done descriptor request */
> int tail;
> +
> + struct crypto_queue queue;
> };
>
> struct talitos_private {
>
More information about the Linuxppc-dev
mailing list