[PATCH V2 3/5] selftests/powerpc: Add NX-GZIP engine compress testcase
Daniel Axtens
dja at axtens.net
Thu Apr 2 17:17:01 AEDT 2020
Raphael Moreira Zinsly <rzinsly at linux.ibm.com> writes:
> Add a compression testcase for the powerpc NX-GZIP engine.
>
> Signed-off-by: Bulent Abali <abali at us.ibm.com>
> Signed-off-by: Raphael Moreira Zinsly <rzinsly at linux.ibm.com>
> ---
> .../selftests/powerpc/nx-gzip/Makefile | 21 +
> .../selftests/powerpc/nx-gzip/gzfht_test.c | 489 ++++++++++++++++++
> .../selftests/powerpc/nx-gzip/gzip_vas.c | 259 ++++++++++
> 3 files changed, 769 insertions(+)
> create mode 100644 tools/testing/selftests/powerpc/nx-gzip/Makefile
> create mode 100644 tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c
> create mode 100644 tools/testing/selftests/powerpc/nx-gzip/gzip_vas.c
>
> diff --git a/tools/testing/selftests/powerpc/nx-gzip/Makefile b/tools/testing/selftests/powerpc/nx-gzip/Makefile
> new file mode 100644
> index 000000000000..ab903f63bbbd
> --- /dev/null
> +++ b/tools/testing/selftests/powerpc/nx-gzip/Makefile
> @@ -0,0 +1,21 @@
> +CC = gcc
> +CFLAGS = -O3
> +INC = ./inc
> +SRC = gzfht_test.c
> +OBJ = $(SRC:.c=.o)
> +TESTS = gzfht_test
> +EXTRA_SOURCES = gzip_vas.c
> +
> +all: $(TESTS)
> +
> +$(OBJ): %.o: %.c
> + $(CC) $(CFLAGS) -I$(INC) -c $<
> +
> +$(TESTS): $(OBJ)
> + $(CC) $(CFLAGS) -I$(INC) -o $@ $@.o $(EXTRA_SOURCES)
> +
> +run_tests: $(TESTS)
> + ./gzfht_test gzip_vas.c
> +
> +clean:
> + rm -f $(TESTS) *.o *~ *.gz
> diff --git a/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c b/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c
> new file mode 100644
> index 000000000000..7a21c25f5611
> --- /dev/null
> +++ b/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c
> @@ -0,0 +1,489 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +
> +/* P9 gzip sample code for demonstrating the P9 NX hardware interface.
> + * Not intended for productive uses or for performance or compression
> + * ratio measurements. For simplicity of demonstration, this sample
> + * code compresses in to fixed Huffman blocks only (Deflate btype=1)
> + * and has very simple memory management. Dynamic Huffman blocks
> + * (Deflate btype=2) are more involved as detailed in the user guide.
> + * Note also that /dev/crypto/gzip, VAS and skiboot support are
> + * required.
> + *
> + * Copyright 2020 IBM Corp.
> + *
> + * https://github.com/libnxz/power-gzip for zlib api and other utils
> + *
> + * Author: Bulent Abali <abali at us.ibm.com>
> + *
> + * Definitions of acronyms used here. See
> + * P9 NX Gzip Accelerator User's Manual for details:
> + * https://github.com/libnxz/power-gzip/blob/develop/doc/power_nx_gzip_um.pdf
> + *
> + * adler/crc: 32 bit checksums appended to stream tail
> + * ce: completion extension
> + * cpb: coprocessor parameter block (metadata)
> + * crb: coprocessor request block (command)
> + * csb: coprocessor status block (status)
> + * dht: dynamic huffman table
> + * dde: data descriptor element (address, length)
> + * ddl: list of ddes
> + * dh/fh: dynamic and fixed huffman types
> + * fc: coprocessor function code
> + * histlen: history/dictionary length
> + * history: sliding window of up to 32KB of data
> + * lzcount: Deflate LZ symbol counts
> + * rembytecnt: remaining byte count
> + * sfbt: source final block type; last block's type during decomp
> + * spbc: source processed byte count
> + * subc: source unprocessed bit count
> + * tebc: target ending bit count; valid bits in the last byte
> + * tpbc: target processed byte count
> + * vas: virtual accelerator switch; the user mode interface
> + */
> +
> +
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <unistd.h>
> +#include <stdint.h>
> +#include <sys/types.h>
> +#include <sys/stat.h>
> +#include <sys/time.h>
> +#include <sys/fcntl.h>
> +#include <sys/mman.h>
> +#include <endian.h>
> +#include <bits/endian.h>
> +#include <sys/ioctl.h>
> +#include <assert.h>
> +#include <errno.h>
> +#include <signal.h>
> +#include "nxu.h"
> +#include "nx.h"
> +
> +int nx_dbg;
> +FILE *nx_gzip_log;
> +void *nx_fault_storage_address;
> +
> +#define NX_MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
> +#define FNAME_MAX 1024
> +#define FEXT ".nx.gz"
> +
> +/*
> + * LZ counts returned in the user supplied nx_gzip_crb_cpb_t structure.
> + */
> +static int compress_fht_sample(char *src, uint32_t srclen, char *dst,
> + uint32_t dstlen, int with_count,
> + struct nx_gzip_crb_cpb_t *cmdp, void *handle)
> +{
> + int cc;
> + uint32_t fc;
> +
> + assert(!!cmdp);
> +
> + put32(cmdp->crb, gzip_fc, 0); /* clear */
> + fc = (with_count) ? GZIP_FC_COMPRESS_RESUME_FHT_COUNT :
> + GZIP_FC_COMPRESS_RESUME_FHT;
> + putnn(cmdp->crb, gzip_fc, fc);
> + putnn(cmdp->cpb, in_histlen, 0); /* resuming with no history */
> + memset((void *) &cmdp->crb.csb, 0, sizeof(cmdp->crb.csb));
> +
> + /* Section 6.6 programming notes; spbc may be in two different
> + * places depending on FC.
> + */
> + if (!with_count)
> + put32(cmdp->cpb, out_spbc_comp, 0);
> + else
> + put32(cmdp->cpb, out_spbc_comp_with_count, 0);
> +
> + /* Figure 6-3 6-4; CSB location */
> + put64(cmdp->crb, csb_address, 0);
> + put64(cmdp->crb, csb_address,
> + (uint64_t) &cmdp->crb.csb & csb_address_mask);
> +
> + /* Source direct dde (scatter-gather list) */
> + clear_dde(cmdp->crb.source_dde);
> + putnn(cmdp->crb.source_dde, dde_count, 0);
> + put32(cmdp->crb.source_dde, ddebc, srclen);
> + put64(cmdp->crb.source_dde, ddead, (uint64_t) src);
> +
> + /* Target direct dde (scatter-gather list) */
> + clear_dde(cmdp->crb.target_dde);
> + putnn(cmdp->crb.target_dde, dde_count, 0);
> + put32(cmdp->crb.target_dde, ddebc, dstlen);
> + put64(cmdp->crb.target_dde, ddead, (uint64_t) dst);
> +
> + /* Submit the crb, the job descriptor, to the accelerator */
> + nxu_run_job(cmdp, handle);
> +
> + /* Poll for the csb.v bit; you should also consider sleeping
> + * or interrupts.
> + */
> + do { ; } while (getnn(cmdp->crb.csb, csb_v) == 0);
> +
> + /* CC Table 6-8 */
> + cc = getnn(cmdp->crb.csb, csb_cc);
> +
> + return cc;
> +}
> +
> +/*
> + * Prepares a blank no filename no timestamp gzip header and returns
> + * the number of bytes written to buf.
> + * Gzip specification at https://tools.ietf.org/html/rfc1952
> + */
> +int gzip_header_blank(char *buf)
> +{
> + int i = 0;
> +
> + buf[i++] = 0x1f; /* ID1 */
> + buf[i++] = 0x8b; /* ID2 */
> + buf[i++] = 0x08; /* CM */
> + buf[i++] = 0x00; /* FLG */
> + buf[i++] = 0x00; /* MTIME */
> + buf[i++] = 0x00; /* MTIME */
> + buf[i++] = 0x00; /* MTIME */
> + buf[i++] = 0x00; /* MTIME */
> + buf[i++] = 0x04; /* XFL 4=fastest */
> + buf[i++] = 0x03; /* OS UNIX */
> +
> + return i;
> +}
> +
> +/* Caller must free the allocated buffer return nonzero on error. */
> +int read_alloc_input_file(char *fname, char **buf, size_t *bufsize)
> +{
> + struct stat statbuf;
> + FILE *fp;
> + char *p;
> + size_t num_bytes;
> +
> + if (stat(fname, &statbuf)) {
> + perror(fname);
> + return(-1);
> + }
> + fp = fopen(fname, "r");
> + if (fp == NULL) {
> + perror(fname);
> + return(-1);
> + }
> + assert(NULL != (p = (char *) malloc(statbuf.st_size)));
> + num_bytes = fread(p, 1, statbuf.st_size, fp);
> + if (ferror(fp) || (num_bytes != statbuf.st_size)) {
> + perror(fname);
> + return(-1);
> + }
> + *buf = p;
> + *bufsize = num_bytes;
> + return 0;
> +}
> +
> +/* Returns nonzero on error */
> +int write_output_file(char *fname, char *buf, size_t bufsize)
> +{
> + FILE *fp;
> + size_t num_bytes;
> +
> + fp = fopen(fname, "w");
> + if (fp == NULL) {
> + perror(fname);
> + return(-1);
> + }
> + num_bytes = fwrite(buf, 1, bufsize, fp);
> + if (ferror(fp) || (num_bytes != bufsize)) {
> + perror(fname);
> + return(-1);
> + }
> + fclose(fp);
> + return 0;
> +}
> +
> +/*
> + * Z_SYNC_FLUSH as described in zlib.h.
> + * Returns number of appended bytes
> + */
> +int append_sync_flush(char *buf, int tebc, int final)
> +{
> + uint64_t flush;
> + int shift = (tebc & 0x7);
> +
> + if (tebc > 0) {
> + /* Last byte is partially full */
> + buf = buf - 1;
> + *buf = *buf & (unsigned char) ((1<<tebc)-1);
> + } else
> + *buf = 0;
> + flush = ((0x1ULL & final) << shift) | *buf;
> + shift = shift + 3; /* BFINAL and BTYPE written */
> + shift = (shift <= 8) ? 8 : 16;
> + flush |= (0xFFFF0000ULL) << shift; /* Zero length block */
> + shift = shift + 32;
> + while (shift > 0) {
> + *buf++ = (unsigned char) (flush & 0xffULL);
> + flush = flush >> 8;
> + shift = shift - 8;
> + }
> + return(((tebc > 5) || (tebc == 0)) ? 5 : 4);
> +}
> +
> +/*
> + * Fault in pages prior to NX job submission. wr=1 may be required to
> + * touch writeable pages. System zero pages do not fault-in the page as
> + * intended. Typically set wr=1 for NX target pages and set wr=0 for NX
> + * source pages.
> + */
> +static int nx_touch_pages(void *buf, long buf_len, long page_len, int wr)
> +{
> + char *begin = buf;
> + char *end = (char *) buf + buf_len - 1;
> + char t;
> +
> + assert(buf_len >= 0 && !!buf);
> +
> + NXPRT(fprintf(stderr, "touch %p %p len 0x%lx wr=%d\n", buf,
> + (buf + buf_len), buf_len, wr));
> +
> + if (buf_len <= 0 || buf == NULL)
> + return -1;
> +
> + do {
> + t = *begin;
> + if (wr)
> + *begin = t;
> + begin = begin + page_len;
> + } while (begin < end);
> +
> + /* When buf_sz is small or buf tail is in another page */
> + t = *end;
> + if (wr)
> + *end = t;
> +
> + return 0;
> +}
> +
> +/*
> + * Final deflate block bit. This call assumes the block
> + * beginning is byte aligned.
> + */
> +static void set_bfinal(void *buf, int bfinal)
> +{
> + char *b = buf;
> +
> + if (bfinal)
> + *b = *b | (unsigned char) 0x01;
> + else
> + *b = *b & (unsigned char) 0xfe;
> +}
> +
> +int compress_file(int argc, char **argv, void *handle)
> +{
> + char *inbuf, *outbuf, *srcbuf, *dstbuf;
> + char outname[FNAME_MAX];
> + uint32_t srclen, dstlen;
> + uint32_t flushlen, chunk;
> + size_t inlen, outlen, dsttotlen, srctotlen;
> + uint32_t crc, spbc, tpbc, tebc;
> + int lzcounts = 0;
> + int cc;
> + int num_hdr_bytes;
> + struct nx_gzip_crb_cpb_t *cmdp;
> + uint32_t pagelen = 65536;
> + int fault_tries = 50;
> +
> + cmdp = (void *)(uintptr_t)
> + aligned_alloc(sizeof(struct nx_gzip_crb_cpb_t),
> + sizeof(struct nx_gzip_crb_cpb_t));
> +
> + if (argc != 2) {
> + fprintf(stderr, "usage: %s <fname>\n", argv[0]);
> + exit(-1);
> + }
> + if (read_alloc_input_file(argv[1], &inbuf, &inlen))
> + exit(-1);
> + fprintf(stderr, "file %s read, %ld bytes\n", argv[1], inlen);
> +
> + /* Generous output buffer for header/trailer */
> + outlen = 2 * inlen + 1024;
> +
> + assert(NULL != (outbuf = (char *)malloc(outlen)));
> + nx_touch_pages(outbuf, outlen, pagelen, 1);
> +
> + /* Compress piecemeal in smallish chunks */
> + chunk = 1<<22;
> +
> + /* Write the gzip header to the stream */
> + num_hdr_bytes = gzip_header_blank(outbuf);
> + dstbuf = outbuf + num_hdr_bytes;
> + outlen = outlen - num_hdr_bytes;
> + dsttotlen = num_hdr_bytes;
> +
> + srcbuf = inbuf;
> + srctotlen = 0;
> +
> + /* Init the CRB, the coprocessor request block */
> + memset(&cmdp->crb, 0, sizeof(cmdp->crb));
> +
> + /* Initial gzip crc32 */
> + put32(cmdp->cpb, in_crc, 0);
> +
> + fault_tries = 50;
> +
> + while (inlen > 0) {
> +
> + /* Submit chunk size source data per job */
> + srclen = NX_MIN(chunk, inlen);
> + /* Supply large target in case data expands */
> + dstlen = NX_MIN(2*srclen, outlen);
> +
> + /* Page faults are handled by the user code */
> +
> + /* Fault-in pages; an improved code wouldn't touch so
> + * many pages but would try to estimate the
> + * compression ratio and adjust both the src and dst
> + * touch amounts.
> + */
> + nx_touch_pages(cmdp, sizeof(struct nx_gzip_crb_cpb_t), pagelen,
> + 1);
> + nx_touch_pages(srcbuf, srclen, pagelen, 0);
> + nx_touch_pages(dstbuf, dstlen, pagelen, 1);
> +
> + cc = compress_fht_sample(
> + srcbuf, srclen,
> + dstbuf, dstlen,
> + lzcounts, cmdp, handle);
> +
> + if (cc != ERR_NX_OK && cc != ERR_NX_TPBC_GT_SPBC &&
> + cc != ERR_NX_TRANSLATION) {
> + fprintf(stderr, "nx error: cc= %d\n", cc);
> + exit(-1);
> + }
> +
> + /* Page faults are handled by the user code */
> + if (cc == ERR_NX_TRANSLATION) {
> + NXPRT(fprintf(stderr, "page fault: cc= %d, ", cc));
> + NXPRT(fprintf(stderr, "try= %d, fsa= %08llx\n",
> + fault_tries,
> + (unsigned long long) cmdp->crb.csb.fsaddr));
> +
> + fault_tries--;
> + if (fault_tries > 0) {
> + continue;
> + } else {
> + fprintf(stderr, "error: cannot progress; ");
> + fprintf(stderr, "too many faults\n");
> + exit(-1);
> + };
> + }
> +
> + fault_tries = 50; /* Reset for the next chunk */
> +
> + inlen = inlen - srclen;
> + srcbuf = srcbuf + srclen;
> + srctotlen = srctotlen + srclen;
> +
> + /* Two possible locations for spbc depending on the function
> + * code.
> + */
> + spbc = (!lzcounts) ? get32(cmdp->cpb, out_spbc_comp) :
> + get32(cmdp->cpb, out_spbc_comp_with_count);
> + assert(spbc == srclen);
> +
> + /* Target byte count */
> + tpbc = get32(cmdp->crb.csb, tpbc);
> + /* Target ending bit count */
> + tebc = getnn(cmdp->cpb, out_tebc);
> + NXPRT(fprintf(stderr, "compressed chunk %d " spbc));
> + NXPRT(fprintf(stderr, "to %d bytes, tebc= %d\n", tpbc, tebc));
> +
> + if (inlen > 0) { /* More chunks to go */
> + set_bfinal(dstbuf, 0);
> + dstbuf = dstbuf + tpbc;
> + dsttotlen = dsttotlen + tpbc;
> + outlen = outlen - tpbc;
> + /* Round up to the next byte with a flush
> + * block; do not set the BFINAqL bit.
> + */
> + flushlen = append_sync_flush(dstbuf, tebc, 0);
> + dsttotlen = dsttotlen + flushlen;
> + outlen = outlen - flushlen;
> + dstbuf = dstbuf + flushlen;
> + NXPRT(fprintf(stderr, "added sync_flush %d bytes\n",
> + flushlen));
> + } else { /* Done */
> + /* Set the BFINAL bit of the last block per Deflate
> + * specification.
> + */
> + set_bfinal(dstbuf, 1);
> + dstbuf = dstbuf + tpbc;
> + dsttotlen = dsttotlen + tpbc;
> + outlen = outlen - tpbc;
> + }
> +
> + /* Resuming crc32 for the next chunk */
> + crc = get32(cmdp->cpb, out_crc);
> + put32(cmdp->cpb, in_crc, crc);
> + crc = be32toh(crc);
> + }
> +
> + /* Append crc32 and ISIZE to the end */
> + memcpy(dstbuf, &crc, 4);
> + memcpy(dstbuf+4, &srctotlen, 4);
> + dsttotlen = dsttotlen + 8;
> + outlen = outlen - 8;
> +
> + assert(FNAME_MAX > (strlen(argv[1]) + strlen(FEXT)));
> + strcpy(outname, argv[1]);
> + strcat(outname, FEXT);
> + if (write_output_file(outname, outbuf, dsttotlen)) {
> + fprintf(stderr, "write error: %s\n", outname);
> + exit(-1);
> + }
> +
> + fprintf(stderr, "compressed %ld to %ld bytes total, ", srctotlen,
> + dsttotlen);
> + fprintf(stderr, "crc32 checksum = %08x\n", crc);
> +
> + if (inbuf != NULL)
> + free(inbuf);
> +
> + if (outbuf != NULL)
> + free(outbuf);
> +
> + return 0;
> +}
> +
> +void sigsegv_handler(int sig, siginfo_t *info, void *ctx)
> +{
> + fprintf(stderr, "%d: Got signal %d si_code %d, si_addr %p\n", getpid(),
> + sig, info->si_code, info->si_addr);
> +
> + nx_fault_storage_address = info->si_addr;
> +}
> +
> +int main(int argc, char **argv)
> +{
> + int rc;
> + struct sigaction act;
> + void *handle;
> +
> + nx_dbg = 0;
> + nx_gzip_log = NULL;
> + act.sa_handler = 0;
> + act.sa_sigaction = sigsegv_handler;
> + act.sa_flags = SA_SIGINFO;
> + act.sa_restorer = 0;
> + sigemptyset(&act.sa_mask);
> + sigaction(SIGSEGV, &act, NULL);
> +
> + handle = nx_function_begin(NX_FUNC_COMP_GZIP, 0);
> + if (!handle) {
> + fprintf(stderr, "Unable to init NX, errno %d\n", errno);
> + exit(-1);
> + }
> +
> + rc = compress_file(argc, argv, handle);
> +
> + nx_function_end(handle);
> +
> + return rc;
> +}
> diff --git a/tools/testing/selftests/powerpc/nx-gzip/gzip_vas.c b/tools/testing/selftests/powerpc/nx-gzip/gzip_vas.c
> new file mode 100644
> index 000000000000..d28e1efb527b
> --- /dev/null
> +++ b/tools/testing/selftests/powerpc/nx-gzip/gzip_vas.c
> @@ -0,0 +1,259 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +
> +/* Copyright 2020 IBM Corp.
> + *
> + * Author: Bulent Abali <abali at us.ibm.com>
> + *
> + */
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <unistd.h>
> +#include <stdint.h>
> +#include <sys/types.h>
> +#include <sys/stat.h>
> +#include <sys/time.h>
> +#include <sys/fcntl.h>
> +#include <sys/mman.h>
> +#include <endian.h>
> +#include <bits/endian.h>
> +#include <sys/ioctl.h>
> +#include <assert.h>
> +#include <errno.h>
> +#include <signal.h>
> +#include "nx-gzip.h"
> +#include "nx.h"
> +#include "copy-paste.h"
> +#include "nxu.h"
> +#include "nx_dbg.h"
> +#include <sys/platform/ppc.h>
> +
> +#define barrier()
> +#define hwsync() ({ asm volatile("hwsync" ::: "memory"); })
This doesn't compile on the clang version I tried as it doesn't
recognise 'hwsync'. Does
asm volatile("sync" ::: "memory");
do the same thing? That is recognised by clang, but I don't know if
dropping the hw prefix matters!
Regards,
Daniel
> +
> +#ifndef NX_NO_CPU_PRI
> +#define cpu_pri_default() ({ asm volatile ("or 2, 2, 2"); })
> +#define cpu_pri_low() ({ asm volatile ("or 31, 31, 31"); })
> +#else
> +#define cpu_pri_default()
> +#define cpu_pri_low()
> +#endif
> +
> +void *nx_fault_storage_address;
> +
> +struct nx_handle {
> + int fd;
> + int function;
> + void *paste_addr;
> +};
> +
> +static int open_device_nodes(char *devname, int pri, struct nx_handle *handle)
> +{
> + int rc, fd;
> + void *addr;
> + struct vas_gzip_setup_attr txattr;
> +
> + fd = open(devname, O_RDWR);
> + if (fd < 0) {
> + fprintf(stderr, " open device name %s\n", devname);
> + return -errno;
> + }
> +
> + memset(&txattr, 0, sizeof(txattr));
> + txattr.version = 1;
> + txattr.vas_id = pri;
> + rc = ioctl(fd, VAS_GZIP_TX_WIN_OPEN, (unsigned long)&txattr);
> + if (rc < 0) {
> + fprintf(stderr, "ioctl() n %d, error %d\n", rc, errno);
> + rc = -errno;
> + goto out;
> + }
> +
> + addr = mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0ULL);
> + if (addr == MAP_FAILED) {
> + fprintf(stderr, "mmap() failed, errno %d\n", errno);
> + rc = -errno;
> + goto out;
> + }
> + handle->fd = fd;
> + handle->paste_addr = (void *)((char *)addr + 0x400);
> +
> + rc = 0;
> +out:
> + close(fd);
> + return rc;
> +}
> +
> +void *nx_function_begin(int function, int pri)
> +{
> + int rc;
> + char *devname = "/dev/crypto/nx-gzip";
> + struct nx_handle *nxhandle;
> +
> + if (function != NX_FUNC_COMP_GZIP) {
> + errno = EINVAL;
> + fprintf(stderr, " NX_FUNC_COMP_GZIP not found\n");
> + return NULL;
> + }
> +
> +
> + nxhandle = malloc(sizeof(*nxhandle));
> + if (!nxhandle) {
> + errno = ENOMEM;
> + fprintf(stderr, " No memory\n");
> + return NULL;
> + }
> +
> + nxhandle->function = function;
> + rc = open_device_nodes(devname, pri, nxhandle);
> + if (rc < 0) {
> + errno = -rc;
> + fprintf(stderr, " open_device_nodes failed\n");
> + return NULL;
> + }
> +
> + return nxhandle;
> +}
> +
> +int nx_function_end(void *handle)
> +{
> + int rc = 0;
> + struct nx_handle *nxhandle = handle;
> +
> + rc = munmap(nxhandle->paste_addr - 0x400, 4096);
> + if (rc < 0) {
> + fprintf(stderr, "munmap() failed, errno %d\n", errno);
> + return rc;
> + }
> + close(nxhandle->fd);
> + free(nxhandle);
> +
> + return rc;
> +}
> +
> +static int nx_wait_for_csb(struct nx_gzip_crb_cpb_t *cmdp)
> +{
> + long poll = 0;
> + uint64_t t;
> +
> + /* Save power and let other threads use the h/w. top may show
> + * 100% but only because OS doesn't know we slowed the this
> + * h/w thread while polling. We're letting other threads have
> + * higher throughput on the core.
> + */
> + cpu_pri_low();
> +
> +#define CSB_MAX_POLL 200000000UL
> +#define USLEEP_TH 300000UL
> +
> + t = __ppc_get_timebase();
> +
> + while (getnn(cmdp->crb.csb, csb_v) == 0) {
> + ++poll;
> + hwsync();
> +
> + cpu_pri_low();
> +
> + /* usleep(0) takes around 29000 ticks ~60 us.
> + * 300000 is spinning for about 600 us then
> + * start sleeping.
> + */
> + if ((__ppc_get_timebase() - t) > USLEEP_TH) {
> + cpu_pri_default();
> + usleep(1);
> + }
> +
> + if (poll > CSB_MAX_POLL)
> + break;
> +
> + /* Fault address from signal handler */
> + if (nx_fault_storage_address) {
> + cpu_pri_default();
> + return -EAGAIN;
> + }
> +
> + }
> +
> + cpu_pri_default();
> +
> + /* hw has updated csb and output buffer */
> + hwsync();
> +
> + /* Check CSB flags. */
> + if (getnn(cmdp->crb.csb, csb_v) == 0) {
> + fprintf(stderr, "CSB still not valid after %d polls.\n",
> + (int) poll);
> + prt_err("CSB still not valid after %d polls, giving up.\n",
> + (int) poll);
> + return -ETIMEDOUT;
> + }
> +
> + return 0;
> +}
> +
> +int nxu_run_job(struct nx_gzip_crb_cpb_t *cmdp, void *handle)
> +{
> + int i, ret, retries;
> + struct nx_handle *nxhandle = handle;
> +
> + assert(handle != NULL);
> + i = 0;
> + retries = 5000;
> + while (i++ < retries) {
> + hwsync();
> + vas_copy(&cmdp->crb, 0);
> + ret = vas_paste(nxhandle->paste_addr, 0);
> + hwsync();
> +
> + NXPRT(fprintf(stderr, "Paste attempt %d/%d returns 0x%x\n",
> + i, retries, ret));
> +
> + if ((ret == 2) || (ret == 3)) {
> +
> + ret = nx_wait_for_csb(cmdp);
> + if (!ret) {
> + goto out;
> + } else if (ret == -EAGAIN) {
> + long x;
> +
> + prt_err("Touching address %p, 0x%lx\n",
> + nx_fault_storage_address,
> + *(long *) nx_fault_storage_address);
> + x = *(long *) nx_fault_storage_address;
> + *(long *) nx_fault_storage_address = x;
> + nx_fault_storage_address = 0;
> + continue;
> + } else {
> + prt_err("wait_for_csb() returns %d\n", ret);
> + break;
> + }
> + } else {
> + if (i < 10) {
> + /* spin for few ticks */
> +#define SPIN_TH 500UL
> + uint64_t fail_spin;
> +
> + fail_spin = __ppc_get_timebase();
> + while ((__ppc_get_timebase() - fail_spin) <
> + SPIN_TH)
> + ;
> + } else {
> + /* sleep */
> + unsigned int pr = 0;
> +
> + if (pr++ % 100 == 0) {
> + prt_err("Paste attempt %d/", i);
> + prt_err("%d, failed pid= %d\n", retries,
> + getpid());
> + }
> + usleep(1);
> + }
> + continue;
> + }
> + }
> +
> +out:
> + cpu_pri_default();
> +
> + return ret;
> +}
> --
> 2.21.0
More information about the Linuxppc-dev
mailing list