[PATCH 2/5] selftests/powerpc: Add header files for NX compresion/decompression
Daniel Axtens
dja at axtens.net
Thu Mar 19 09:29:37 AEDT 2020
Raphael Moreira Zinsly <rzinsly at linux.ibm.com> writes:
> Add files to be able to compress and decompress files using the
> powerpc NX-GZIP engine.
>
> Signed-off-by: Bulent Abali <abali at us.ibm.com>
> Signed-off-by: Raphael Moreira Zinsly <rzinsly at linux.ibm.com>
> ---
> .../powerpc/nx-gzip/inc/copy-paste.h | 54 ++
> .../selftests/powerpc/nx-gzip/inc/nx_dbg.h | 95 +++
> .../selftests/powerpc/nx-gzip/inc/nxu.h | 644 ++++++++++++++++++
> 3 files changed, 793 insertions(+)
> create mode 100644 tools/testing/selftests/powerpc/nx-gzip/inc/copy-paste.h
> create mode 100644 tools/testing/selftests/powerpc/nx-gzip/inc/nx_dbg.h
> create mode 100644 tools/testing/selftests/powerpc/nx-gzip/inc/nxu.h
>
> diff --git a/tools/testing/selftests/powerpc/nx-gzip/inc/copy-paste.h b/tools/testing/selftests/powerpc/nx-gzip/inc/copy-paste.h
> new file mode 100644
> index 000000000000..107139b6c7df
> --- /dev/null
> +++ b/tools/testing/selftests/powerpc/nx-gzip/inc/copy-paste.h
> @@ -0,0 +1,54 @@
> +/* SPDX-License-Identifier: GPL-2.0-or-later */
> +
> +#include "nx-helpers.h"
> +
> +/*
> + * Macros taken from arch/powerpc/include/asm/ppc-opcode.h and other
> + * header files.
> + */
> +#define ___PPC_RA(a) (((a) & 0x1f) << 16)
> +#define ___PPC_RB(b) (((b) & 0x1f) << 11)
> +
> +#define PPC_INST_COPY 0x7c20060c
> +#define PPC_INST_PASTE 0x7c20070d
> +
> +#define PPC_COPY(a, b) stringify_in_c(.long PPC_INST_COPY | \
> + ___PPC_RA(a) | ___PPC_RB(b))
> +#define PPC_PASTE(a, b) stringify_in_c(.long PPC_INST_PASTE | \
> + ___PPC_RA(a) | ___PPC_RB(b))
> +#define CR0_SHIFT 28
> +#define CR0_MASK 0xF
> +/*
> + * Copy/paste instructions:
> + *
> + * copy RA,RB
> + * Copy contents of address (RA) + effective_address(RB)
> + * to internal copy-buffer.
> + *
> + * paste RA,RB
> + * Paste contents of internal copy-buffer to the address
> + * (RA) + effective_address(RB)
> + */
> +static inline int vas_copy(void *crb, int offset)
> +{
> + asm volatile(PPC_COPY(%0, %1)";"
> + :
> + : "b" (offset), "b" (crb)
> + : "memory");
> +
> + return 0;
> +}
> +
> +static inline int vas_paste(void *paste_address, int offset)
> +{
> + u32 cr;
> +
> + cr = 0;
> + asm volatile(PPC_PASTE(%1, %2)";"
> + "mfocrf %0, 0x80;"
> + : "=r" (cr)
> + : "b" (offset), "b" (paste_address)
> + : "memory", "cr0");
> +
> + return (cr >> CR0_SHIFT) & CR0_MASK;
> +}
> diff --git a/tools/testing/selftests/powerpc/nx-gzip/inc/nx_dbg.h b/tools/testing/selftests/powerpc/nx-gzip/inc/nx_dbg.h
> new file mode 100644
> index 000000000000..f2c0eee2317e
> --- /dev/null
> +++ b/tools/testing/selftests/powerpc/nx-gzip/inc/nx_dbg.h
> @@ -0,0 +1,95 @@
> +/* SPDX-License-Identifier: GPL-2.0-or-later
> + *
> + * Copyright 2020 IBM Corporation
> + *
> + */
> +
> +#ifndef _NXU_DBG_H_
> +#define _NXU_DBG_H_
> +
> +#include <sys/file.h>
> +#include <stdint.h>
> +#include <stdio.h>
> +#include <time.h>
> +#include <pthread.h>
> +
> +extern FILE * nx_gzip_log;
> +extern int nx_gzip_trace;
> +extern unsigned int nx_gzip_inflate_impl;
> +extern unsigned int nx_gzip_deflate_impl;
> +extern unsigned int nx_gzip_inflate_flags;
> +extern unsigned int nx_gzip_deflate_flags;
> +
> +extern int nx_dbg;
> +pthread_mutex_t mutex_log;
> +
> +#define nx_gzip_trace_enabled() (nx_gzip_trace & 0x1)
> +#define nx_gzip_hw_trace_enabled() (nx_gzip_trace & 0x2)
> +#define nx_gzip_sw_trace_enabled() (nx_gzip_trace & 0x4)
> +#define nx_gzip_gather_statistics() (nx_gzip_trace & 0x8)
> +#define nx_gzip_per_stream_stat() (nx_gzip_trace & 0x10)
> +
> +#define prt(fmt, ...) do { \
> + pthread_mutex_lock(&mutex_log); \
> + flock(nx_gzip_log->_fileno, LOCK_EX); \
> + time_t t; struct tm *m; time(&t); m = localtime(&t); \
> + fprintf(nx_gzip_log, "[%04d/%02d/%02d %02d:%02d:%02d] " \
> + "pid %d: " fmt, \
> + (int)m->tm_year + 1900, (int)m->tm_mon+1, (int)m->tm_mday, \
> + (int)m->tm_hour, (int)m->tm_min, (int)m->tm_sec, \
> + (int)getpid(), ## __VA_ARGS__); \
> + fflush(nx_gzip_log); \
> + flock(nx_gzip_log->_fileno, LOCK_UN); \
> + pthread_mutex_unlock(&mutex_log); \
> +} while (0)
> +
> +/* Use in case of an error */
> +#define prt_err(fmt, ...) do { if (nx_dbg >= 0) { \
> + prt("%s:%u: Error: "fmt, \
> + __FILE__, __LINE__, ## __VA_ARGS__); \
> +}} while (0)
> +
> +/* Use in case of an warning */
> +#define prt_warn(fmt, ...) do { if (nx_dbg >= 1) { \
> + prt("%s:%u: Warning: "fmt, \
> + __FILE__, __LINE__, ## __VA_ARGS__); \
> +}} while (0)
> +
> +/* Informational printouts */
> +#define prt_info(fmt, ...) do { if (nx_dbg >= 2) { \
> + prt("Info: "fmt, ## __VA_ARGS__); \
> +}} while (0)
> +
> +/* Trace zlib wrapper code */
> +#define prt_trace(fmt, ...) do { if (nx_gzip_trace_enabled()) { \
> + prt("### "fmt, ## __VA_ARGS__); \
> +}} while (0)
> +
> +/* Trace statistics */
> +#define prt_stat(fmt, ...) do { if (nx_gzip_gather_statistics()) { \
> + prt("### "fmt, ## __VA_ARGS__); \
> +}} while (0)
> +
> +/* Trace zlib hardware implementation */
> +#define hw_trace(fmt, ...) do { \
> + if (nx_gzip_hw_trace_enabled()) \
> + fprintf(nx_gzip_log, "hhh " fmt, ## __VA_ARGS__); \
> + } while (0)
> +
> +/* Trace zlib software implementation */
> +#define sw_trace(fmt, ...) do { \
> + if (nx_gzip_sw_trace_enabled()) \
> + fprintf(nx_gzip_log, "sss " fmt, ## __VA_ARGS__); \
> + } while (0)
> +
> +
> +/**
> + * str_to_num - Convert string into number and copy with endings like
> + * KiB for kilobyte
> + * MiB for megabyte
> + * GiB for gigabyte
> + */
> +uint64_t str_to_num(char *str);
> +void nx_lib_debug(int onoff);
> +
> +#endif /* _NXU_DBG_H_ */
> diff --git a/tools/testing/selftests/powerpc/nx-gzip/inc/nxu.h b/tools/testing/selftests/powerpc/nx-gzip/inc/nxu.h
> new file mode 100644
> index 000000000000..faa95ffc162a
> --- /dev/null
> +++ b/tools/testing/selftests/powerpc/nx-gzip/inc/nxu.h
> @@ -0,0 +1,644 @@
> +/* SPDX-License-Identifier: GPL-2.0-or-later
> + *
> + * Hardware interface of the NX-GZIP compression accelerator
> + *
> + * Copyright (C) IBM Corporation, 2020
> + *
> + * Author: Bulent Abali <abali at us.ibm.com>
> + *
> + */
> +
> +#ifndef _NXU_H
> +#define _NXU_H
> +
> +#include <stdint.h>
> +#include <endian.h>
> +
> +/* deflate */
> +#define LLSZ 286
> +#define DSZ 30
> +
> +/* nx */
> +#define DHTSZ 18
> +#define DHT_MAXSZ 288
> +#define MAX_DDE_COUNT 256
> +
> +/* util */
> +#ifdef NXDBG
> +#define NXPRT(X) do { X; } while (0)
> +#else
> +#define NXPRT(X) do { ; } while (0)
> +#endif
> +
> +#ifdef NXTIMER
> +#include <sys/platform/ppc.h>
> +#define NX_CLK(X) do { X; } while (0)
> +#define nx_get_time() __ppc_get_timebase()
> +#define nx_get_freq() __ppc_get_timebase_freq()
> +#else
> +#define NX_CLK(X) do { ; } while (0)
> +#define nx_get_time() (-1)
> +#define nx_get_freq() (-1)
> +#endif
> +
> +/*
> + * Definitions of acronyms used here. See
> + * P9 NX Gzip Accelerator User's Manual for details
> + *
If I google "P9 NX Gzip Accelerator User's Manual" I just find the
POWER9 Processor User's Manual which seems to be a different
thing... following the links in that document to a GitHub repo I think
the document I'm looking for is
https://github.com/abalib/power-gzip/blob/master/power_nx_gzip_um.pdf
If that's right and the URL isstable, could you include a link in source
code? (Should perhaps the power-gzip repository be moved from a personal
github account to the open-power organisation?)
Regards,
Daniel
> + * adler/crc: 32 bit checksums appended to stream tail
> + * ce: completion extension
> + * cpb: coprocessor parameter block (metadata)
> + * crb: coprocessor request block (command)
> + * csb: coprocessor status block (status)
> + * dht: dynamic huffman table
> + * dde: data descriptor element (address, length)
> + * ddl: list of ddes
> + * dh/fh: dynamic and fixed huffman types
> + * fc: coprocessor function code
> + * histlen: history/dictionary length
> + * history: sliding window of up to 32KB of data
> + * lzcount: Deflate LZ symbol counts
> + * rembytecnt: remaining byte count
> + * sfbt: source final block type; last block's type during decomp
> + * spbc: source processed byte count
> + * subc: source unprocessed bit count
> + * tebc: target ending bit count; valid bits in the last byte
> + * tpbc: target processed byte count
> + * vas: virtual accelerator switch; the user mode interface
> + */
> +
> +typedef union {
> + uint32_t word[4];
> + uint64_t dword[2];
> +} nx_qw_t __attribute__((aligned (16)));
> +
> +/*
> + * Note: NX registers with fewer than 32 bits are declared by
> + * convention as uint32_t variables in unions. If *_offset and *_mask
> + * are defined for a variable, then use get_ put_ macros to
> + * conveniently access the register fields for endian conversions.
> + */
> +
> +typedef struct {
> + /* Data Descriptor Element, Section 6.4 */
> + union {
> + uint32_t dde_count;
> + /* When dde_count == 0 ddead is a pointer to a data buffer;
> + * ddebc is the buffer length bytes.
> + * When dde_count > 0 dde is an indirect dde; ddead is a pointer
> + * to a contiguous list of direct ddes; ddebc is the total length
> + * of all data pointed to by the list of direct ddes.
> + * Note that only one level of indirection is permitted.
> + * See Section 6.4 of the user manual for additional details
> + */
> + };
> + uint32_t ddebc; /* dde byte count */
> + uint64_t ddead; /* dde address */
> +} nx_dde_t __attribute__((aligned (16)));
> +
> +typedef struct {
> + /* Coprocessor Status Block, Section 6.6 */
> + union {
> + uint32_t csb_v;
> + /* Valid bit. v must be set to 0 by the program
> + * before submitting the coprocessor command.
> + * Software can poll for the v bit
> + */
> +
> + uint32_t csb_f;
> + /* 16B CSB size. Written to 0 by DMA when it writes the CPB */
> +
> + uint32_t csb_cs;
> + /* cs completion sequence; unused */
> +
> + uint32_t csb_cc;
> + /* cc completion code; cc != 0 exception occurred */
> +
> + uint32_t csb_ce;
> + /* ce completion extension */
> +
> + };
> + uint32_t tpbc;
> + /* target processed byte count TPBC */
> +
> + uint64_t fsaddr;
> + /* Section 6.12.1 CSB NonZero error summary. FSA Failing storage
> + * address. Address where error occurred. When available, written
> + * to A field of CSB
> + */
> +} nx_csb_t __attribute__((aligned (16)));
> +
> +typedef struct {
> + /* Coprocessor Completion Block, Section 6.7 */
> +
> + uint32_t reserved[3];
> + union {
> + /* When crb.c==0 (no ccb defined) it is reserved;
> + * When crb.c==1 (ccb defined) it is cm
> + */
> +
> + uint32_t ccb_cm;
> + /* Signal interrupt of crb.c==1 and cm==1 */
> +
> + uint32_t word;
> + /* generic access to the 32bit word */
> + };
> +} nx_ccb_t __attribute__((aligned (16)));
> +
> +typedef struct {
> + /*
> + * CRB operand of the paste coprocessor instruction is stamped
> + * in quadword 4 with the information shown here as its written
> + * in to the receive FIFO of the coprocessor
> + */
> +
> + union {
> + uint32_t vas_buf_num;
> + /* Verification only vas buffer number which correlates to
> + * the low order bits of the atag in the paste command
> + */
> +
> + uint32_t send_wc_id;
> + /* Pointer to Send Window Context that provides for NX address
> + * translation information, such as MSR and LPCR bits, job completion
> + * interrupt RA, PSWID, and job utilization counter.
> + */
> +
> + };
> + union {
> + uint32_t recv_wc_id;
> + /* Pointer to Receive Window Context. NX uses this to return
> + * credits to a Receive FIFO as entries are dequeued.
> + */
> +
> + };
> + uint32_t reserved2;
> + union {
> + uint32_t vas_invalid;
> + /* Invalid bit. If this bit is 1 the CRB is discarded by
> + * NX upon fetching from the receive FIFO. If this bit is 0
> + * the CRB is processed normally. The bit is stamped to 0
> + * by VAS and may be written to 1 by hypervisor while
> + * the CRB is in the receive FIFO (in memory).
> + */
> +
> + };
> +} vas_stamped_crb_t;
> +
> +typedef struct {
> + /*
> + * A CRB that has a translation fault is stamped by NX in quadword 4
> + * and pasted to the Fault Send Window in VAS.
> + */
> + uint64_t fsa;
> + union {
> + uint32_t nxsf_t;
> + uint32_t nxsf_fs;
> + };
> + uint32_t pswid;
> +} nx_stamped_fault_crb_t;
> +
> +typedef union {
> + vas_stamped_crb_t vas;
> + nx_stamped_fault_crb_t nx;
> +} stamped_crb_t;
> +
> +typedef struct {
> + /*
> + * Coprocessor Parameter Block In/Out are used to pass metadata
> + * to/from accelerator. Tables 6.5 and 6.6 of the user manual.
> + */
> +
> + /* CPBInput */
> +
> + struct {
> + union {
> + nx_qw_t qw0;
> + struct {
> + uint32_t in_adler; /* bits 0:31 */
> + uint32_t in_crc; /* bits 32:63 */
> + union {
> + uint32_t in_histlen; /* bits 64:75 */
> + uint32_t in_subc; /* bits 93:95 */
> + };
> + union {
> + uint32_t in_sfbt; /* bits 108:111 */
> + uint32_t in_rembytecnt; /* bits 112:127 */
> + uint32_t in_dhtlen; /* bits 116:127 */
> + };
> + };
> + };
> + union {
> + nx_qw_t in_dht[DHTSZ]; /* qw[1:18] */
> + char in_dht_char[DHT_MAXSZ]; /* byte access */
> + };
> + nx_qw_t reserved[5]; /* qw[19:23] */
> + };
> +
> + /* CPBOutput */
> +
> + volatile struct {
> + union {
> + nx_qw_t qw24;
> + struct {
> + uint32_t out_adler; /* bits 0:31 qw[24] */
> + uint32_t out_crc; /* bits 32:63 qw[24] */
> + union {
> + uint32_t out_tebc; /* bits 77:79 qw[24] */
> + uint32_t out_subc; /* bits 80:95 qw[24] */
> + };
> + union {
> + uint32_t out_sfbt; /* bits 108:111 qw[24] */
> + uint32_t out_rembytecnt; /* bits 112:127 qw[24] */
> + uint32_t out_dhtlen; /* bits 116:127 qw[24] */
> + };
> + };
> + };
> + union {
> + nx_qw_t qw25[79]; /* qw[25:103] */
> + /* qw[25] compress no lzcounts or wrap */
> + uint32_t out_spbc_comp_wrap;
> + uint32_t out_spbc_wrap; /* qw[25] wrap */
> + uint32_t out_spbc_comp; /* qw[25] compress no lzcounts */
> + uint32_t out_lzcount[LLSZ+DSZ]; /* 286 LL and 30 D symbol counts */
> + struct {
> + nx_qw_t out_dht[DHTSZ]; /* qw[25:42] */
> + uint32_t out_spbc_decomp; /* qw[43] decompress */
> + };
> + };
> + /* qw[104] compress with lzcounts */
> + uint32_t out_spbc_comp_with_count;
> + };
> +} nx_gzip_cpb_t __attribute__((aligned (128)));
> +
> +typedef struct {
> + union { /* byte[0:3] */
> + uint32_t gzip_fc; /* bits[24-31] */
> + };
> + uint32_t reserved1; /* byte[4:7] */
> + union {
> + uint64_t csb_address; /* byte[8:15] */
> + struct {
> + uint32_t reserved2;
> + union {
> + uint32_t crb_c;
> + /* c==0 no ccb defined */
> +
> + uint32_t crb_at;
> + /* at==0 address type is ignored;
> + * all addrs effective assumed.
> + */
> +
> + };
> + };
> + };
> + nx_dde_t source_dde; /* byte[16:31] */
> + nx_dde_t target_dde; /* byte[32:47] */
> + volatile nx_ccb_t ccb; /* byte[48:63] */
> + volatile union {
> + /* byte[64:239] shift csb by 128 bytes out of the crb; csb was in crb
> + * earlier; JReilly says csb written with partial inject.
> + */
> + nx_qw_t reserved64[11];
> + stamped_crb_t stamp; /* byte[64:79] */
> + };
> + volatile nx_csb_t csb;
> +} nx_gzip_crb_t __attribute__((aligned (128)));
> +
> +
> +typedef struct {
> + nx_gzip_crb_t crb;
> + nx_gzip_cpb_t cpb;
> +} nx_gzip_crb_cpb_t __attribute__((aligned (2048)));
> +
> +
> +/*
> + * NX hardware convention has the msb bit on the left numbered 0.
> + * The defines below has *_offset defined as the right most bit
> + * position of a field. x of size_mask(x) is the field width in bits.
> + */
> +
> +#define size_mask(x) ((1U<<(x))-1)
> +
> +/*
> + * Offsets and Widths within the containing 32 bits of the various NX
> + * gzip hardware registers. Use the getnn/putnn macros to access
> + * these regs
> + */
> +
> +#define dde_count_mask size_mask(8)
> +#define dde_count_offset 23
> +
> +/* CSB */
> +
> +#define csb_v_mask size_mask(1)
> +#define csb_v_offset 0
> +#define csb_f_mask size_mask(1)
> +#define csb_f_offset 6
> +#define csb_cs_mask size_mask(8)
> +#define csb_cs_offset 15
> +#define csb_cc_mask size_mask(8)
> +#define csb_cc_offset 23
> +#define csb_ce_mask size_mask(8)
> +#define csb_ce_offset 31
> +
> +/* CCB */
> +
> +#define ccb_cm_mask size_mask(3)
> +#define ccb_cm_offset 31
> +
> +/* VAS stamped CRB fields */
> +
> +#define vas_buf_num_mask size_mask(6)
> +#define vas_buf_num_offset 5
> +#define send_wc_id_mask size_mask(16)
> +#define send_wc_id_offset 31
> +#define recv_wc_id_mask size_mask(16)
> +#define recv_wc_id_offset 31
> +#define vas_invalid_mask size_mask(1)
> +#define vas_invalid_offset 31
> +
> +/* NX stamped fault CRB fields */
> +
> +#define nxsf_t_mask size_mask(1)
> +#define nxsf_t_offset 23
> +#define nxsf_fs_mask size_mask(8)
> +#define nxsf_fs_offset 31
> +
> +/* CPB input */
> +
> +#define in_histlen_mask size_mask(12)
> +#define in_histlen_offset 11
> +#define in_dhtlen_mask size_mask(12)
> +#define in_dhtlen_offset 31
> +#define in_subc_mask size_mask(3)
> +#define in_subc_offset 31
> +#define in_sfbt_mask size_mask(4)
> +#define in_sfbt_offset 15
> +#define in_rembytecnt_mask size_mask(16)
> +#define in_rembytecnt_offset 31
> +
> +/* CPB output */
> +
> +#define out_tebc_mask size_mask(3)
> +#define out_tebc_offset 15
> +#define out_subc_mask size_mask(16)
> +#define out_subc_offset 31
> +#define out_sfbt_mask size_mask(4)
> +#define out_sfbt_offset 15
> +#define out_rembytecnt_mask size_mask(16)
> +#define out_rembytecnt_offset 31
> +#define out_dhtlen_mask size_mask(12)
> +#define out_dhtlen_offset 31
> +
> +/* CRB */
> +
> +#define gzip_fc_mask size_mask(8)
> +#define gzip_fc_offset 31
> +#define crb_c_mask size_mask(1)
> +#define crb_c_offset 28
> +#define crb_at_mask size_mask(1)
> +#define crb_at_offset 30
> +#define csb_address_mask ~(15UL) /* mask off bottom 4b */
> +
> +/*
> + * Access macros for the registers. Do not access registers directly
> + * because of the endian conversion. P9 processor may run either as
> + * Little or Big endian. However the NX coprocessor regs are always
> + * big endian.
> + * Use the 32 and 64b macros to access respective
> + * register sizes.
> + * Use nn forms for the register fields shorter than 32 bits.
> + */
> +
> +#define getnn(ST, REG) ((be32toh(ST.REG) >> (31-REG##_offset)) \
> + & REG##_mask)
> +#define getpnn(ST, REG) ((be32toh((ST)->REG) >> (31-REG##_offset)) \
> + & REG##_mask)
> +#define get32(ST, REG) (be32toh(ST.REG))
> +#define getp32(ST, REG) (be32toh((ST)->REG))
> +#define get64(ST, REG) (be64toh(ST.REG))
> +#define getp64(ST, REG) (be64toh((ST)->REG))
> +
> +#define unget32(ST, REG) (get32(ST, REG) & ~((REG##_mask) \
> + << (31-REG##_offset)))
> +/* get 32bits less the REG field */
> +
> +#define ungetp32(ST, REG) (getp32(ST, REG) & ~((REG##_mask) \
> + << (31-REG##_offset)))
> +/* get 32bits less the REG field */
> +
> +#define clear_regs(ST) do { memset((void *)(&(ST)), 0, sizeof(ST)); \
> + } while (0)
> +#define clear_dde(ST) do { ST.dde_count = ST.ddebc = 0; ST.ddead = 0; \
> + } while (0)
> +#define clearp_dde(ST) do { (ST)->dde_count = (ST)->ddebc = 0; \
> + (ST)->ddead = 0; \
> + } while (0)
> +#define clear_struct(ST) do { memset((void *)(&(ST)), 0, sizeof(ST)); \
> + } while (0)
> +
> +#define putnn(ST, REG, X) do { ST.REG = htobe32(unget32(ST, REG) | (((X) \
> + & REG##_mask) << (31-REG##_offset))); \
> + } while (0)
> +#define putpnn(ST, REG, X) do { (ST)->REG = htobe32(ungetp32(ST, REG) \
> + | (((X) & REG##_mask) << (31-REG##_offset))); \
> + } while (0)
> +
> +#define put32(ST, REG, X) do { ST.REG = htobe32(X); } while (0)
> +#define putp32(ST, REG, X) do { (ST)->REG = htobe32(X); } while (0)
> +#define put64(ST, REG, X) do { ST.REG = htobe64(X); } while (0)
> +#define putp64(ST, REG, X) do { (ST)->REG = htobe64(X); } while (0)
> +
> +/*
> + * Completion extension ce(0) ce(1) ce(2). Bits ce(3-7)
> + * unused. Section 6.6 Figure 6.7.
> + */
> +
> +#define get_csb_ce(ST) ((uint32_t)getnn(ST, csb_ce))
> +#define get_csb_ce_ms3b(ST) (get_csb_ce(ST) >> 5)
> +#define put_csb_ce_ms3b(ST, X) do { putnn(ST, csb_ce, ((uint32_t)(X) << 5)); \
> + } while (0)
> +
> +#define CSB_CE_PARTIAL 0x4
> +#define CSB_CE_TERMINATE 0x2
> +#define CSB_CE_TPBC_VALID 0x1
> +
> +#define csb_ce_termination(X) (!!((X) & CSB_CE_TERMINATE))
> +/* termination, output buffers may be modified, SPBC/TPBC invalid Fig.6-7 */
> +
> +#define csb_ce_check_completion(X) (!csb_ce_termination(X))
> +/* if not terminated then check full or partial completion */
> +
> +#define csb_ce_partial_completion(X) (!!((X) & CSB_CE_PARTIAL))
> +#define csb_ce_full_completion(X) (!csb_ce_partial_completion(X))
> +#define csb_ce_tpbc_valid(X) (!!((X) & CSB_CE_TPBC_VALID))
> +/* TPBC indicates successfully stored data count */
> +
> +#define csb_ce_default_err(X) csb_ce_termination(X)
> +/* most error CEs have CE(0)=0 and CE(1)=1 */
> +
> +#define csb_ce_cc3_partial(X) csb_ce_partial_completion(X)
> +/* some CC=3 are partially completed, Table 6-8 */
> +
> +#define csb_ce_cc64(X) ((X)&(CSB_CE_PARTIAL \
> + | CSB_CE_TERMINATE) == 0)
> +/* Compression: when TPBC>SPBC then CC=64 Table 6-8; target didn't
> + * compress smaller than source.
> + */
> +
> +/* Decompress SFBT combinations Tables 5-3, 6-4, 6-6 */
> +
> +#define SFBT_BFINAL 0x1
> +#define SFBT_LIT 0x4
> +#define SFBT_FHT 0x5
> +#define SFBT_DHT 0x6
> +#define SFBT_HDR 0x7
> +
> +/*
> + * NX gzip function codes. Table 6.2.
> + * Bits 0:4 are the FC. Bit 5 is used by the DMA controller to
> + * select one of the two Byte Count Limits.
> + */
> +
> +#define GZIP_FC_LIMIT_MASK 0x01
> +#define GZIP_FC_COMPRESS_FHT 0x00
> +#define GZIP_FC_COMPRESS_DHT 0x02
> +#define GZIP_FC_COMPRESS_FHT_COUNT 0x04
> +#define GZIP_FC_COMPRESS_DHT_COUNT 0x06
> +#define GZIP_FC_COMPRESS_RESUME_FHT 0x08
> +#define GZIP_FC_COMPRESS_RESUME_DHT 0x0a
> +#define GZIP_FC_COMPRESS_RESUME_FHT_COUNT 0x0c
> +#define GZIP_FC_COMPRESS_RESUME_DHT_COUNT 0x0e
> +#define GZIP_FC_DECOMPRESS 0x10
> +#define GZIP_FC_DECOMPRESS_SINGLE_BLK_N_SUSPEND 0x12
> +#define GZIP_FC_DECOMPRESS_RESUME 0x14
> +#define GZIP_FC_DECOMPRESS_RESUME_SINGLE_BLK_N_SUSPEND 0x16
> +#define GZIP_FC_WRAP 0x1e
> +
> +#define fc_is_compress(fc) (((fc) & 0x10) == 0)
> +#define fc_has_count(fc) (fc_is_compress(fc) && (((fc) & 0x4) != 0))
> +
> +/* CSB.CC Error codes */
> +
> +#define ERR_NX_OK 0
> +#define ERR_NX_ALIGNMENT 1
> +#define ERR_NX_OPOVERLAP 2
> +#define ERR_NX_DATA_LENGTH 3
> +#define ERR_NX_TRANSLATION 5
> +#define ERR_NX_PROTECTION 6
> +#define ERR_NX_EXTERNAL_UE7 7
> +#define ERR_NX_INVALID_OP 8
> +#define ERR_NX_PRIVILEGE 9
> +#define ERR_NX_INTERNAL_UE 10
> +#define ERR_NX_EXTERN_UE_WR 12
> +#define ERR_NX_TARGET_SPACE 13
> +#define ERR_NX_EXCESSIVE_DDE 14
> +#define ERR_NX_TRANSL_WR 15
> +#define ERR_NX_PROTECT_WR 16
> +#define ERR_NX_SUBFUNCTION 17
> +#define ERR_NX_FUNC_ABORT 18
> +#define ERR_NX_BYTE_MAX 19
> +#define ERR_NX_CORRUPT_CRB 20
> +#define ERR_NX_INVALID_CRB 21
> +#define ERR_NX_INVALID_DDE 30
> +#define ERR_NX_SEGMENTED_DDL 31
> +#define ERR_NX_DDE_OVERFLOW 33
> +#define ERR_NX_TPBC_GT_SPBC 64
> +#define ERR_NX_MISSING_CODE 66
> +#define ERR_NX_INVALID_DIST 67
> +#define ERR_NX_INVALID_DHT 68
> +#define ERR_NX_EXTERNAL_UE90 90
> +#define ERR_NX_WDOG_TIMER 224
> +#define ERR_NX_AT_FAULT 250
> +#define ERR_NX_INTR_SERVER 252
> +#define ERR_NX_UE253 253
> +#define ERR_NX_NO_HW 254
> +#define ERR_NX_HUNG_OP 255
> +#define ERR_NX_END 256
> +
> +/* initial values for non-resume operations */
> +#define INIT_CRC 0 /* crc32(0L, Z_NULL, 0) */
> +#define INIT_ADLER 1 /* adler32(0L, Z_NULL, 0) adler is initalized to 1 */
> +
> +/* prototypes */
> +#ifdef NX_JOB_CALLBACK
> +int nxu_run_job(nx_gzip_crb_cpb_t *c, void *handle,
> + int (*callback)(const void *));
> +#else
> +int nxu_run_job(nx_gzip_crb_cpb_t *c, void *handle);
> +#endif
> +
> +
> +/* caller supplies a print buffer 4*sizeof(crb) */
> +
> +char *nx_crb_str(nx_gzip_crb_t *crb, char *prbuf);
> +char *nx_cpb_str(nx_gzip_cpb_t *cpb, char *prbuf);
> +char *nx_prt_hex(void *cp, int sz, char *prbuf);
> +char *nx_lzcount_str(nx_gzip_cpb_t *cpb, char *prbuf);
> +char *nx_strerror(int e);
> +
> +#ifdef NX_SIM
> +#include <stdio.h>
> +int nx_sim_init(void *ctx);
> +int nx_sim_end(void *ctx);
> +int nxu_run_sim_job(nx_gzip_crb_cpb_t *c, void *ctx);
> +#endif /* NX_SIM */
> +
> +/* Deflate stream manipulation */
> +
> +#define set_final_bit(x) do { x |= (unsigned char)1; } while (0)
> +#define clr_final_bit(x) do { x &= ~(unsigned char)1; } while (0)
> +
> +#define append_empty_fh_blk(p, b) do { *(p) = (2 | (1&(b))); *((p)+1) = 0; \
> + } while (0)
> +/* append 10 bits 0000001b 00...... ;
> + * assumes appending starts on a byte boundary; b is the final bit.
> + */
> +
> +
> +#ifdef NX_842
> +
> +/* 842 Engine */
> +
> +typedef struct {
> + union { /* byte[0:3] */
> + uint32_t eft_fc; /* bits[29-31] */
> + };
> + uint32_t reserved1; /* byte[4:7] */
> + union {
> + uint64_t csb_address; /* byte[8:15] */
> + struct {
> + uint32_t reserved2;
> + union {
> + uint32_t crb_c;
> + /* c==0 no ccb defined */
> +
> + uint32_t crb_at;
> + /* at==0 address type is ignored;
> + all addrs effective assumed */
> +
> + };
> + };
> + };
> + nx_dde_t source_dde; /* byte[16:31] */
> + nx_dde_t target_dde; /* byte[32:47] */
> + nx_ccb_t ccb; /* byte[48:63] */
> + union {
> + nx_qw_t reserved64[3]; /* byte[64:96] */
> + };
> + nx_csb_t csb;
> +} nx_eft_crb_t __attribute__((aligned (128)));
> +
> +/* 842 CRB */
> +
> +#define EFT_FC_MASK size_mask(3)
> +#define EFT_FC_OFFSET 31
> +#define EFT_FC_COMPRESS 0x0
> +#define EFT_FC_COMPRESS_WITH_CRC 0x1
> +#define EFT_FC_DECOMPRESS 0x2
> +#define EFT_FC_DECOMPRESS_WITH_CRC 0x3
> +#define EFT_FC_BLK_DATA_MOVE 0x4
> +#endif /* NX_842 */
> +
> +#endif /* _NXU_H */
> --
> 2.21.0
More information about the Linuxppc-dev
mailing list