linux/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2
   3/* P9 gzip sample code for demonstrating the P9 NX hardware interface.
   4 * Not intended for productive uses or for performance or compression
   5 * ratio measurements.  For simplicity of demonstration, this sample
   6 * code compresses in to fixed Huffman blocks only (Deflate btype=1)
   7 * and has very simple memory management.  Dynamic Huffman blocks
   8 * (Deflate btype=2) are more involved as detailed in the user guide.
   9 * Note also that /dev/crypto/gzip, VAS and skiboot support are
  10 * required.
  11 *
  12 * Copyright 2020 IBM Corp.
  13 *
  14 * https://github.com/libnxz/power-gzip for zlib api and other utils
  15 *
  16 * Author: Bulent Abali <abali@us.ibm.com>
  17 *
  18 * Definitions of acronyms used here. See
  19 * P9 NX Gzip Accelerator User's Manual for details:
  20 * https://github.com/libnxz/power-gzip/blob/develop/doc/power_nx_gzip_um.pdf
  21 *
  22 * adler/crc: 32 bit checksums appended to stream tail
  23 * ce:       completion extension
  24 * cpb:      coprocessor parameter block (metadata)
  25 * crb:      coprocessor request block (command)
  26 * csb:      coprocessor status block (status)
  27 * dht:      dynamic huffman table
  28 * dde:      data descriptor element (address, length)
  29 * ddl:      list of ddes
  30 * dh/fh:    dynamic and fixed huffman types
  31 * fc:       coprocessor function code
  32 * histlen:  history/dictionary length
  33 * history:  sliding window of up to 32KB of data
  34 * lzcount:  Deflate LZ symbol counts
  35 * rembytecnt: remaining byte count
  36 * sfbt:     source final block type; last block's type during decomp
  37 * spbc:     source processed byte count
  38 * subc:     source unprocessed bit count
  39 * tebc:     target ending bit count; valid bits in the last byte
  40 * tpbc:     target processed byte count
  41 * vas:      virtual accelerator switch; the user mode interface
  42 */
  43
  44#define _ISOC11_SOURCE  // For aligned_alloc()
  45#define _DEFAULT_SOURCE // For endian.h
  46
  47#include <stdio.h>
  48#include <stdlib.h>
  49#include <string.h>
  50#include <unistd.h>
  51#include <stdint.h>
  52#include <sys/types.h>
  53#include <sys/stat.h>
  54#include <sys/time.h>
  55#include <sys/fcntl.h>
  56#include <sys/mman.h>
  57#include <endian.h>
  58#include <bits/endian.h>
  59#include <sys/ioctl.h>
  60#include <assert.h>
  61#include <errno.h>
  62#include <signal.h>
  63#include "utils.h"
  64#include "nxu.h"
  65#include "nx.h"
  66
  67int nx_dbg;
  68FILE *nx_gzip_log;
  69
  70#define NX_MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
  71#define FNAME_MAX 1024
  72#define FEXT ".nx.gz"
  73
  74#define SYSFS_MAX_REQ_BUF_PATH "devices/vio/ibm,compression-v1/nx_gzip_caps/req_max_processed_len"
  75
  76/*
  77 * LZ counts returned in the user supplied nx_gzip_crb_cpb_t structure.
  78 */
  79static int compress_fht_sample(char *src, uint32_t srclen, char *dst,
  80                                uint32_t dstlen, int with_count,
  81                                struct nx_gzip_crb_cpb_t *cmdp, void *handle)
  82{
  83        uint32_t fc;
  84
  85        assert(!!cmdp);
  86
  87        put32(cmdp->crb, gzip_fc, 0);  /* clear */
  88        fc = (with_count) ? GZIP_FC_COMPRESS_RESUME_FHT_COUNT :
  89                            GZIP_FC_COMPRESS_RESUME_FHT;
  90        putnn(cmdp->crb, gzip_fc, fc);
  91        putnn(cmdp->cpb, in_histlen, 0); /* resuming with no history */
  92        memset((void *) &cmdp->crb.csb, 0, sizeof(cmdp->crb.csb));
  93
  94        /* Section 6.6 programming notes; spbc may be in two different
  95         * places depending on FC.
  96         */
  97        if (!with_count)
  98                put32(cmdp->cpb, out_spbc_comp, 0);
  99        else
 100                put32(cmdp->cpb, out_spbc_comp_with_count, 0);
 101
 102        /* Figure 6-3 6-4; CSB location */
 103        put64(cmdp->crb, csb_address, 0);
 104        put64(cmdp->crb, csb_address,
 105              (uint64_t) &cmdp->crb.csb & csb_address_mask);
 106
 107        /* Source direct dde (scatter-gather list) */
 108        clear_dde(cmdp->crb.source_dde);
 109        putnn(cmdp->crb.source_dde, dde_count, 0);
 110        put32(cmdp->crb.source_dde, ddebc, srclen);
 111        put64(cmdp->crb.source_dde, ddead, (uint64_t) src);
 112
 113        /* Target direct dde (scatter-gather list) */
 114        clear_dde(cmdp->crb.target_dde);
 115        putnn(cmdp->crb.target_dde, dde_count, 0);
 116        put32(cmdp->crb.target_dde, ddebc, dstlen);
 117        put64(cmdp->crb.target_dde, ddead, (uint64_t) dst);
 118
 119        /* Submit the crb, the job descriptor, to the accelerator */
 120        return nxu_submit_job(cmdp, handle);
 121}
 122
 123/*
 124 * Prepares a blank no filename no timestamp gzip header and returns
 125 * the number of bytes written to buf.
 126 * Gzip specification at https://tools.ietf.org/html/rfc1952
 127 */
 128int gzip_header_blank(char *buf)
 129{
 130        int i = 0;
 131
 132        buf[i++] = 0x1f; /* ID1 */
 133        buf[i++] = 0x8b; /* ID2 */
 134        buf[i++] = 0x08; /* CM  */
 135        buf[i++] = 0x00; /* FLG */
 136        buf[i++] = 0x00; /* MTIME */
 137        buf[i++] = 0x00; /* MTIME */
 138        buf[i++] = 0x00; /* MTIME */
 139        buf[i++] = 0x00; /* MTIME */
 140        buf[i++] = 0x04; /* XFL 4=fastest */
 141        buf[i++] = 0x03; /* OS UNIX */
 142
 143        return i;
 144}
 145
 146/* Caller must free the allocated buffer return nonzero on error. */
 147int read_alloc_input_file(char *fname, char **buf, size_t *bufsize)
 148{
 149        struct stat statbuf;
 150        FILE *fp;
 151        char *p;
 152        size_t num_bytes;
 153
 154        if (stat(fname, &statbuf)) {
 155                perror(fname);
 156                return(-1);
 157        }
 158        fp = fopen(fname, "r");
 159        if (fp == NULL) {
 160                perror(fname);
 161                return(-1);
 162        }
 163        assert(NULL != (p = (char *) malloc(statbuf.st_size)));
 164        num_bytes = fread(p, 1, statbuf.st_size, fp);
 165        if (ferror(fp) || (num_bytes != statbuf.st_size)) {
 166                perror(fname);
 167                return(-1);
 168        }
 169        *buf = p;
 170        *bufsize = num_bytes;
 171        return 0;
 172}
 173
 174/* Returns nonzero on error */
 175int write_output_file(char *fname, char *buf, size_t bufsize)
 176{
 177        FILE *fp;
 178        size_t num_bytes;
 179
 180        fp = fopen(fname, "w");
 181        if (fp == NULL) {
 182                perror(fname);
 183                return(-1);
 184        }
 185        num_bytes = fwrite(buf, 1, bufsize, fp);
 186        if (ferror(fp) || (num_bytes != bufsize)) {
 187                perror(fname);
 188                return(-1);
 189        }
 190        fclose(fp);
 191        return 0;
 192}
 193
 194/*
 195 * Z_SYNC_FLUSH as described in zlib.h.
 196 * Returns number of appended bytes
 197 */
 198int append_sync_flush(char *buf, int tebc, int final)
 199{
 200        uint64_t flush;
 201        int shift = (tebc & 0x7);
 202
 203        if (tebc > 0) {
 204                /* Last byte is partially full */
 205                buf = buf - 1;
 206                *buf = *buf & (unsigned char) ((1<<tebc)-1);
 207        } else
 208                *buf = 0;
 209        flush = ((0x1ULL & final) << shift) | *buf;
 210        shift = shift + 3; /* BFINAL and BTYPE written */
 211        shift = (shift <= 8) ? 8 : 16;
 212        flush |= (0xFFFF0000ULL) << shift; /* Zero length block */
 213        shift = shift + 32;
 214        while (shift > 0) {
 215                *buf++ = (unsigned char) (flush & 0xffULL);
 216                flush = flush >> 8;
 217                shift = shift - 8;
 218        }
 219        return(((tebc > 5) || (tebc == 0)) ? 5 : 4);
 220}
 221
 222/*
 223 * Final deflate block bit.  This call assumes the block
 224 * beginning is byte aligned.
 225 */
 226static void set_bfinal(void *buf, int bfinal)
 227{
 228        char *b = buf;
 229
 230        if (bfinal)
 231                *b = *b | (unsigned char) 0x01;
 232        else
 233                *b = *b & (unsigned char) 0xfe;
 234}
 235
 236int compress_file(int argc, char **argv, void *handle)
 237{
 238        char *inbuf, *outbuf, *srcbuf, *dstbuf;
 239        char outname[FNAME_MAX];
 240        uint32_t srclen, dstlen;
 241        uint32_t flushlen, chunk;
 242        size_t inlen, outlen, dsttotlen, srctotlen;
 243        uint32_t crc, spbc, tpbc, tebc;
 244        int lzcounts = 0;
 245        int cc;
 246        int num_hdr_bytes;
 247        struct nx_gzip_crb_cpb_t *cmdp;
 248        uint32_t pagelen = 65536;
 249        int fault_tries = NX_MAX_FAULTS;
 250        char buf[32];
 251
 252        cmdp = (void *)(uintptr_t)
 253                aligned_alloc(sizeof(struct nx_gzip_crb_cpb_t),
 254                              sizeof(struct nx_gzip_crb_cpb_t));
 255
 256        if (argc != 2) {
 257                fprintf(stderr, "usage: %s <fname>\n", argv[0]);
 258                exit(-1);
 259        }
 260        if (read_alloc_input_file(argv[1], &inbuf, &inlen))
 261                exit(-1);
 262        fprintf(stderr, "file %s read, %ld bytes\n", argv[1], inlen);
 263
 264        /* Generous output buffer for header/trailer */
 265        outlen = 2 * inlen + 1024;
 266
 267        assert(NULL != (outbuf = (char *)malloc(outlen)));
 268        nxu_touch_pages(outbuf, outlen, pagelen, 1);
 269
 270        /*
 271         * On PowerVM, the hypervisor defines the maximum request buffer
 272         * size is defined and this value is available via sysfs.
 273         */
 274        if (!read_sysfs_file(SYSFS_MAX_REQ_BUF_PATH, buf, sizeof(buf))) {
 275                chunk = atoi(buf);
 276        } else {
 277                /* sysfs entry is not available on PowerNV */
 278                /* Compress piecemeal in smallish chunks */
 279                chunk = 1<<22;
 280        }
 281
 282        /* Write the gzip header to the stream */
 283        num_hdr_bytes = gzip_header_blank(outbuf);
 284        dstbuf    = outbuf + num_hdr_bytes;
 285        outlen    = outlen - num_hdr_bytes;
 286        dsttotlen = num_hdr_bytes;
 287
 288        srcbuf    = inbuf;
 289        srctotlen = 0;
 290
 291        /* Init the CRB, the coprocessor request block */
 292        memset(&cmdp->crb, 0, sizeof(cmdp->crb));
 293
 294        /* Initial gzip crc32 */
 295        put32(cmdp->cpb, in_crc, 0);
 296
 297        while (inlen > 0) {
 298
 299                /* Submit chunk size source data per job */
 300                srclen = NX_MIN(chunk, inlen);
 301                /* Supply large target in case data expands */
 302                dstlen = NX_MIN(2*srclen, outlen);
 303
 304                /* Page faults are handled by the user code */
 305
 306                /* Fault-in pages; an improved code wouldn't touch so
 307                 * many pages but would try to estimate the
 308                 * compression ratio and adjust both the src and dst
 309                 * touch amounts.
 310                 */
 311                nxu_touch_pages(cmdp, sizeof(struct nx_gzip_crb_cpb_t), pagelen,
 312                                1);
 313                nxu_touch_pages(srcbuf, srclen, pagelen, 0);
 314                nxu_touch_pages(dstbuf, dstlen, pagelen, 1);
 315
 316                cc = compress_fht_sample(
 317                        srcbuf, srclen,
 318                        dstbuf, dstlen,
 319                        lzcounts, cmdp, handle);
 320
 321                if (cc != ERR_NX_OK && cc != ERR_NX_TPBC_GT_SPBC &&
 322                    cc != ERR_NX_AT_FAULT) {
 323                        fprintf(stderr, "nx error: cc= %d\n", cc);
 324                        exit(-1);
 325                }
 326
 327                /* Page faults are handled by the user code */
 328                if (cc == ERR_NX_AT_FAULT) {
 329                        NXPRT(fprintf(stderr, "page fault: cc= %d, ", cc));
 330                        NXPRT(fprintf(stderr, "try= %d, fsa= %08llx\n",
 331                                  fault_tries,
 332                                  (unsigned long long) cmdp->crb.csb.fsaddr));
 333                        fault_tries--;
 334                        if (fault_tries > 0) {
 335                                continue;
 336                        } else {
 337                                fprintf(stderr, "error: cannot progress; ");
 338                                fprintf(stderr, "too many faults\n");
 339                                exit(-1);
 340                        }
 341                }
 342
 343                fault_tries = NX_MAX_FAULTS; /* Reset for the next chunk */
 344
 345                inlen     = inlen - srclen;
 346                srcbuf    = srcbuf + srclen;
 347                srctotlen = srctotlen + srclen;
 348
 349                /* Two possible locations for spbc depending on the function
 350                 * code.
 351                 */
 352                spbc = (!lzcounts) ? get32(cmdp->cpb, out_spbc_comp) :
 353                        get32(cmdp->cpb, out_spbc_comp_with_count);
 354                assert(spbc == srclen);
 355
 356                /* Target byte count */
 357                tpbc = get32(cmdp->crb.csb, tpbc);
 358                /* Target ending bit count */
 359                tebc = getnn(cmdp->cpb, out_tebc);
 360                NXPRT(fprintf(stderr, "compressed chunk %d ", spbc));
 361                NXPRT(fprintf(stderr, "to %d bytes, tebc= %d\n", tpbc, tebc));
 362
 363                if (inlen > 0) { /* More chunks to go */
 364                        set_bfinal(dstbuf, 0);
 365                        dstbuf    = dstbuf + tpbc;
 366                        dsttotlen = dsttotlen + tpbc;
 367                        outlen    = outlen - tpbc;
 368                        /* Round up to the next byte with a flush
 369                         * block; do not set the BFINAqL bit.
 370                         */
 371                        flushlen  = append_sync_flush(dstbuf, tebc, 0);
 372                        dsttotlen = dsttotlen + flushlen;
 373                        outlen    = outlen - flushlen;
 374                        dstbuf    = dstbuf + flushlen;
 375                        NXPRT(fprintf(stderr, "added sync_flush %d bytes\n",
 376                                        flushlen));
 377                } else {  /* Done */
 378                        /* Set the BFINAL bit of the last block per Deflate
 379                         * specification.
 380                         */
 381                        set_bfinal(dstbuf, 1);
 382                        dstbuf    = dstbuf + tpbc;
 383                        dsttotlen = dsttotlen + tpbc;
 384                        outlen    = outlen - tpbc;
 385                }
 386
 387                /* Resuming crc32 for the next chunk */
 388                crc = get32(cmdp->cpb, out_crc);
 389                put32(cmdp->cpb, in_crc, crc);
 390                crc = be32toh(crc);
 391        }
 392
 393        /* Append crc32 and ISIZE to the end */
 394        memcpy(dstbuf, &crc, 4);
 395        memcpy(dstbuf+4, &srctotlen, 4);
 396        dsttotlen = dsttotlen + 8;
 397        outlen    = outlen - 8;
 398
 399        assert(FNAME_MAX > (strlen(argv[1]) + strlen(FEXT)));
 400        strcpy(outname, argv[1]);
 401        strcat(outname, FEXT);
 402        if (write_output_file(outname, outbuf, dsttotlen)) {
 403                fprintf(stderr, "write error: %s\n", outname);
 404                exit(-1);
 405        }
 406
 407        fprintf(stderr, "compressed %ld to %ld bytes total, ", srctotlen,
 408                dsttotlen);
 409        fprintf(stderr, "crc32 checksum = %08x\n", crc);
 410
 411        if (inbuf != NULL)
 412                free(inbuf);
 413
 414        if (outbuf != NULL)
 415                free(outbuf);
 416
 417        return 0;
 418}
 419
 420int main(int argc, char **argv)
 421{
 422        int rc;
 423        struct sigaction act;
 424        void *handle;
 425
 426        nx_dbg = 0;
 427        nx_gzip_log = NULL;
 428        act.sa_handler = 0;
 429        act.sa_sigaction = nxu_sigsegv_handler;
 430        act.sa_flags = SA_SIGINFO;
 431        act.sa_restorer = 0;
 432        sigemptyset(&act.sa_mask);
 433        sigaction(SIGSEGV, &act, NULL);
 434
 435        handle = nx_function_begin(NX_FUNC_COMP_GZIP, 0);
 436        if (!handle) {
 437                fprintf(stderr, "Unable to init NX, errno %d\n", errno);
 438                exit(-1);
 439        }
 440
 441        rc = compress_file(argc, argv, handle);
 442
 443        nx_function_end(handle);
 444
 445        return rc;
 446}
 447