linux/drivers/crypto/nx/nx-842.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * Cryptographic API for the NX-842 hardware compression.
   4 *
   5 * Copyright (C) IBM Corporation, 2011-2015
   6 *
   7 * Designer of the Power data compression engine:
   8 *   Bulent Abali <abali@us.ibm.com>
   9 *
  10 * Original Authors: Robert Jennings <rcj@linux.vnet.ibm.com>
  11 *                   Seth Jennings <sjenning@linux.vnet.ibm.com>
  12 *
  13 * Rewrite: Dan Streetman <ddstreet@ieee.org>
  14 *
  15 * This is an interface to the NX-842 compression hardware in PowerPC
  16 * processors.  Most of the complexity of this drvier is due to the fact that
  17 * the NX-842 compression hardware requires the input and output data buffers
  18 * to be specifically aligned, to be a specific multiple in length, and within
  19 * specific minimum and maximum lengths.  Those restrictions, provided by the
  20 * nx-842 driver via nx842_constraints, mean this driver must use bounce
  21 * buffers and headers to correct misaligned in or out buffers, and to split
  22 * input buffers that are too large.
  23 *
  24 * This driver will fall back to software decompression if the hardware
  25 * decompression fails, so this driver's decompression should never fail as
  26 * long as the provided compressed buffer is valid.  Any compressed buffer
  27 * created by this driver will have a header (except ones where the input
  28 * perfectly matches the constraints); so users of this driver cannot simply
  29 * pass a compressed buffer created by this driver over to the 842 software
  30 * decompression library.  Instead, users must use this driver to decompress;
  31 * if the hardware fails or is unavailable, the compressed buffer will be
  32 * parsed and the header removed, and the raw 842 buffer(s) passed to the 842
  33 * software decompression library.
  34 *
  35 * This does not fall back to software compression, however, since the caller
  36 * of this function is specifically requesting hardware compression; if the
  37 * hardware compression fails, the caller can fall back to software
  38 * compression, and the raw 842 compressed buffer that the software compressor
  39 * creates can be passed to this driver for hardware decompression; any
  40 * buffer without our specific header magic is assumed to be a raw 842 buffer
  41 * and passed directly to the hardware.  Note that the software compression
  42 * library will produce a compressed buffer that is incompatible with the
  43 * hardware decompressor if the original input buffer length is not a multiple
  44 * of 8; if such a compressed buffer is passed to this driver for
  45 * decompression, the hardware will reject it and this driver will then pass
  46 * it over to the software library for decompression.
  47 */
  48
  49#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  50
  51#include <linux/vmalloc.h>
  52#include <linux/sw842.h>
  53#include <linux/spinlock.h>
  54
  55#include "nx-842.h"
  56
  57/* The first 5 bits of this magic are 0x1f, which is an invalid 842 5-bit
  58 * template (see lib/842/842.h), so this magic number will never appear at
  59 * the start of a raw 842 compressed buffer.  That is important, as any buffer
  60 * passed to us without this magic is assumed to be a raw 842 compressed
  61 * buffer, and passed directly to the hardware to decompress.
  62 */
  63#define NX842_CRYPTO_MAGIC      (0xf842)
  64#define NX842_CRYPTO_HEADER_SIZE(g)                             \
  65        (sizeof(struct nx842_crypto_header) +                   \
  66         sizeof(struct nx842_crypto_header_group) * (g))
  67#define NX842_CRYPTO_HEADER_MAX_SIZE                            \
  68        NX842_CRYPTO_HEADER_SIZE(NX842_CRYPTO_GROUP_MAX)
  69
  70/* bounce buffer size */
  71#define BOUNCE_BUFFER_ORDER     (2)
  72#define BOUNCE_BUFFER_SIZE                                      \
  73        ((unsigned int)(PAGE_SIZE << BOUNCE_BUFFER_ORDER))
  74
  75/* try longer on comp because we can fallback to sw decomp if hw is busy */
  76#define COMP_BUSY_TIMEOUT       (250) /* ms */
  77#define DECOMP_BUSY_TIMEOUT     (50) /* ms */
  78
  79struct nx842_crypto_param {
  80        u8 *in;
  81        unsigned int iremain;
  82        u8 *out;
  83        unsigned int oremain;
  84        unsigned int ototal;
  85};
  86
  87static int update_param(struct nx842_crypto_param *p,
  88                        unsigned int slen, unsigned int dlen)
  89{
  90        if (p->iremain < slen)
  91                return -EOVERFLOW;
  92        if (p->oremain < dlen)
  93                return -ENOSPC;
  94
  95        p->in += slen;
  96        p->iremain -= slen;
  97        p->out += dlen;
  98        p->oremain -= dlen;
  99        p->ototal += dlen;
 100
 101        return 0;
 102}
 103
 104int nx842_crypto_init(struct crypto_tfm *tfm, struct nx842_driver *driver)
 105{
 106        struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm);
 107
 108        spin_lock_init(&ctx->lock);
 109        ctx->driver = driver;
 110        ctx->wmem = kmalloc(driver->workmem_size, GFP_KERNEL);
 111        ctx->sbounce = (u8 *)__get_free_pages(GFP_KERNEL, BOUNCE_BUFFER_ORDER);
 112        ctx->dbounce = (u8 *)__get_free_pages(GFP_KERNEL, BOUNCE_BUFFER_ORDER);
 113        if (!ctx->wmem || !ctx->sbounce || !ctx->dbounce) {
 114                kfree(ctx->wmem);
 115                free_page((unsigned long)ctx->sbounce);
 116                free_page((unsigned long)ctx->dbounce);
 117                return -ENOMEM;
 118        }
 119
 120        return 0;
 121}
 122EXPORT_SYMBOL_GPL(nx842_crypto_init);
 123
 124void nx842_crypto_exit(struct crypto_tfm *tfm)
 125{
 126        struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm);
 127
 128        kfree(ctx->wmem);
 129        free_page((unsigned long)ctx->sbounce);
 130        free_page((unsigned long)ctx->dbounce);
 131}
 132EXPORT_SYMBOL_GPL(nx842_crypto_exit);
 133
 134static void check_constraints(struct nx842_constraints *c)
 135{
 136        /* limit maximum, to always have enough bounce buffer to decompress */
 137        if (c->maximum > BOUNCE_BUFFER_SIZE)
 138                c->maximum = BOUNCE_BUFFER_SIZE;
 139}
 140
 141static int nx842_crypto_add_header(struct nx842_crypto_header *hdr, u8 *buf)
 142{
 143        int s = NX842_CRYPTO_HEADER_SIZE(hdr->groups);
 144
 145        /* compress should have added space for header */
 146        if (s > be16_to_cpu(hdr->group[0].padding)) {
 147                pr_err("Internal error: no space for header\n");
 148                return -EINVAL;
 149        }
 150
 151        memcpy(buf, hdr, s);
 152
 153        print_hex_dump_debug("header ", DUMP_PREFIX_OFFSET, 16, 1, buf, s, 0);
 154
 155        return 0;
 156}
 157
 158static int compress(struct nx842_crypto_ctx *ctx,
 159                    struct nx842_crypto_param *p,
 160                    struct nx842_crypto_header_group *g,
 161                    struct nx842_constraints *c,
 162                    u16 *ignore,
 163                    unsigned int hdrsize)
 164{
 165        unsigned int slen = p->iremain, dlen = p->oremain, tmplen;
 166        unsigned int adj_slen = slen;
 167        u8 *src = p->in, *dst = p->out;
 168        int ret, dskip = 0;
 169        ktime_t timeout;
 170
 171        if (p->iremain == 0)
 172                return -EOVERFLOW;
 173
 174        if (p->oremain == 0 || hdrsize + c->minimum > dlen)
 175                return -ENOSPC;
 176
 177        if (slen % c->multiple)
 178                adj_slen = round_up(slen, c->multiple);
 179        if (slen < c->minimum)
 180                adj_slen = c->minimum;
 181        if (slen > c->maximum)
 182                adj_slen = slen = c->maximum;
 183        if (adj_slen > slen || (u64)src % c->alignment) {
 184                adj_slen = min(adj_slen, BOUNCE_BUFFER_SIZE);
 185                slen = min(slen, BOUNCE_BUFFER_SIZE);
 186                if (adj_slen > slen)
 187                        memset(ctx->sbounce + slen, 0, adj_slen - slen);
 188                memcpy(ctx->sbounce, src, slen);
 189                src = ctx->sbounce;
 190                slen = adj_slen;
 191                pr_debug("using comp sbounce buffer, len %x\n", slen);
 192        }
 193
 194        dst += hdrsize;
 195        dlen -= hdrsize;
 196
 197        if ((u64)dst % c->alignment) {
 198                dskip = (int)(PTR_ALIGN(dst, c->alignment) - dst);
 199                dst += dskip;
 200                dlen -= dskip;
 201        }
 202        if (dlen % c->multiple)
 203                dlen = round_down(dlen, c->multiple);
 204        if (dlen < c->minimum) {
 205nospc:
 206                dst = ctx->dbounce;
 207                dlen = min(p->oremain, BOUNCE_BUFFER_SIZE);
 208                dlen = round_down(dlen, c->multiple);
 209                dskip = 0;
 210                pr_debug("using comp dbounce buffer, len %x\n", dlen);
 211        }
 212        if (dlen > c->maximum)
 213                dlen = c->maximum;
 214
 215        tmplen = dlen;
 216        timeout = ktime_add_ms(ktime_get(), COMP_BUSY_TIMEOUT);
 217        do {
 218                dlen = tmplen; /* reset dlen, if we're retrying */
 219                ret = ctx->driver->compress(src, slen, dst, &dlen, ctx->wmem);
 220                /* possibly we should reduce the slen here, instead of
 221                 * retrying with the dbounce buffer?
 222                 */
 223                if (ret == -ENOSPC && dst != ctx->dbounce)
 224                        goto nospc;
 225        } while (ret == -EBUSY && ktime_before(ktime_get(), timeout));
 226        if (ret)
 227                return ret;
 228
 229        dskip += hdrsize;
 230
 231        if (dst == ctx->dbounce)
 232                memcpy(p->out + dskip, dst, dlen);
 233
 234        g->padding = cpu_to_be16(dskip);
 235        g->compressed_length = cpu_to_be32(dlen);
 236        g->uncompressed_length = cpu_to_be32(slen);
 237
 238        if (p->iremain < slen) {
 239                *ignore = slen - p->iremain;
 240                slen = p->iremain;
 241        }
 242
 243        pr_debug("compress slen %x ignore %x dlen %x padding %x\n",
 244                 slen, *ignore, dlen, dskip);
 245
 246        return update_param(p, slen, dskip + dlen);
 247}
 248
 249int nx842_crypto_compress(struct crypto_tfm *tfm,
 250                          const u8 *src, unsigned int slen,
 251                          u8 *dst, unsigned int *dlen)
 252{
 253        struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm);
 254        struct nx842_crypto_header *hdr = &ctx->header;
 255        struct nx842_crypto_param p;
 256        struct nx842_constraints c = *ctx->driver->constraints;
 257        unsigned int groups, hdrsize, h;
 258        int ret, n;
 259        bool add_header;
 260        u16 ignore = 0;
 261
 262        check_constraints(&c);
 263
 264        p.in = (u8 *)src;
 265        p.iremain = slen;
 266        p.out = dst;
 267        p.oremain = *dlen;
 268        p.ototal = 0;
 269
 270        *dlen = 0;
 271
 272        groups = min_t(unsigned int, NX842_CRYPTO_GROUP_MAX,
 273                       DIV_ROUND_UP(p.iremain, c.maximum));
 274        hdrsize = NX842_CRYPTO_HEADER_SIZE(groups);
 275
 276        spin_lock_bh(&ctx->lock);
 277
 278        /* skip adding header if the buffers meet all constraints */
 279        add_header = (p.iremain % c.multiple    ||
 280                      p.iremain < c.minimum     ||
 281                      p.iremain > c.maximum     ||
 282                      (u64)p.in % c.alignment   ||
 283                      p.oremain % c.multiple    ||
 284                      p.oremain < c.minimum     ||
 285                      p.oremain > c.maximum     ||
 286                      (u64)p.out % c.alignment);
 287
 288        hdr->magic = cpu_to_be16(NX842_CRYPTO_MAGIC);
 289        hdr->groups = 0;
 290        hdr->ignore = 0;
 291
 292        while (p.iremain > 0) {
 293                n = hdr->groups++;
 294                ret = -ENOSPC;
 295                if (hdr->groups > NX842_CRYPTO_GROUP_MAX)
 296                        goto unlock;
 297
 298                /* header goes before first group */
 299                h = !n && add_header ? hdrsize : 0;
 300
 301                if (ignore)
 302                        pr_warn("internal error, ignore is set %x\n", ignore);
 303
 304                ret = compress(ctx, &p, &hdr->group[n], &c, &ignore, h);
 305                if (ret)
 306                        goto unlock;
 307        }
 308
 309        if (!add_header && hdr->groups > 1) {
 310                pr_err("Internal error: No header but multiple groups\n");
 311                ret = -EINVAL;
 312                goto unlock;
 313        }
 314
 315        /* ignore indicates the input stream needed to be padded */
 316        hdr->ignore = cpu_to_be16(ignore);
 317        if (ignore)
 318                pr_debug("marked %d bytes as ignore\n", ignore);
 319
 320        if (add_header)
 321                ret = nx842_crypto_add_header(hdr, dst);
 322        if (ret)
 323                goto unlock;
 324
 325        *dlen = p.ototal;
 326
 327        pr_debug("compress total slen %x dlen %x\n", slen, *dlen);
 328
 329unlock:
 330        spin_unlock_bh(&ctx->lock);
 331        return ret;
 332}
 333EXPORT_SYMBOL_GPL(nx842_crypto_compress);
 334
 335static int decompress(struct nx842_crypto_ctx *ctx,
 336                      struct nx842_crypto_param *p,
 337                      struct nx842_crypto_header_group *g,
 338                      struct nx842_constraints *c,
 339                      u16 ignore)
 340{
 341        unsigned int slen = be32_to_cpu(g->compressed_length);
 342        unsigned int required_len = be32_to_cpu(g->uncompressed_length);
 343        unsigned int dlen = p->oremain, tmplen;
 344        unsigned int adj_slen = slen;
 345        u8 *src = p->in, *dst = p->out;
 346        u16 padding = be16_to_cpu(g->padding);
 347        int ret, spadding = 0;
 348        ktime_t timeout;
 349
 350        if (!slen || !required_len)
 351                return -EINVAL;
 352
 353        if (p->iremain <= 0 || padding + slen > p->iremain)
 354                return -EOVERFLOW;
 355
 356        if (p->oremain <= 0 || required_len - ignore > p->oremain)
 357                return -ENOSPC;
 358
 359        src += padding;
 360
 361        if (slen % c->multiple)
 362                adj_slen = round_up(slen, c->multiple);
 363        if (slen < c->minimum)
 364                adj_slen = c->minimum;
 365        if (slen > c->maximum)
 366                goto usesw;
 367        if (slen < adj_slen || (u64)src % c->alignment) {
 368                /* we can append padding bytes because the 842 format defines
 369                 * an "end" template (see lib/842/842_decompress.c) and will
 370                 * ignore any bytes following it.
 371                 */
 372                if (slen < adj_slen)
 373                        memset(ctx->sbounce + slen, 0, adj_slen - slen);
 374                memcpy(ctx->sbounce, src, slen);
 375                src = ctx->sbounce;
 376                spadding = adj_slen - slen;
 377                slen = adj_slen;
 378                pr_debug("using decomp sbounce buffer, len %x\n", slen);
 379        }
 380
 381        if (dlen % c->multiple)
 382                dlen = round_down(dlen, c->multiple);
 383        if (dlen < required_len || (u64)dst % c->alignment) {
 384                dst = ctx->dbounce;
 385                dlen = min(required_len, BOUNCE_BUFFER_SIZE);
 386                pr_debug("using decomp dbounce buffer, len %x\n", dlen);
 387        }
 388        if (dlen < c->minimum)
 389                goto usesw;
 390        if (dlen > c->maximum)
 391                dlen = c->maximum;
 392
 393        tmplen = dlen;
 394        timeout = ktime_add_ms(ktime_get(), DECOMP_BUSY_TIMEOUT);
 395        do {
 396                dlen = tmplen; /* reset dlen, if we're retrying */
 397                ret = ctx->driver->decompress(src, slen, dst, &dlen, ctx->wmem);
 398        } while (ret == -EBUSY && ktime_before(ktime_get(), timeout));
 399        if (ret) {
 400usesw:
 401                /* reset everything, sw doesn't have constraints */
 402                src = p->in + padding;
 403                slen = be32_to_cpu(g->compressed_length);
 404                spadding = 0;
 405                dst = p->out;
 406                dlen = p->oremain;
 407                if (dlen < required_len) { /* have ignore bytes */
 408                        dst = ctx->dbounce;
 409                        dlen = BOUNCE_BUFFER_SIZE;
 410                }
 411                pr_info_ratelimited("using software 842 decompression\n");
 412                ret = sw842_decompress(src, slen, dst, &dlen);
 413        }
 414        if (ret)
 415                return ret;
 416
 417        slen -= spadding;
 418
 419        dlen -= ignore;
 420        if (ignore)
 421                pr_debug("ignoring last %x bytes\n", ignore);
 422
 423        if (dst == ctx->dbounce)
 424                memcpy(p->out, dst, dlen);
 425
 426        pr_debug("decompress slen %x padding %x dlen %x ignore %x\n",
 427                 slen, padding, dlen, ignore);
 428
 429        return update_param(p, slen + padding, dlen);
 430}
 431
 432int nx842_crypto_decompress(struct crypto_tfm *tfm,
 433                            const u8 *src, unsigned int slen,
 434                            u8 *dst, unsigned int *dlen)
 435{
 436        struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm);
 437        struct nx842_crypto_header *hdr;
 438        struct nx842_crypto_param p;
 439        struct nx842_constraints c = *ctx->driver->constraints;
 440        int n, ret, hdr_len;
 441        u16 ignore = 0;
 442
 443        check_constraints(&c);
 444
 445        p.in = (u8 *)src;
 446        p.iremain = slen;
 447        p.out = dst;
 448        p.oremain = *dlen;
 449        p.ototal = 0;
 450
 451        *dlen = 0;
 452
 453        hdr = (struct nx842_crypto_header *)src;
 454
 455        spin_lock_bh(&ctx->lock);
 456
 457        /* If it doesn't start with our header magic number, assume it's a raw
 458         * 842 compressed buffer and pass it directly to the hardware driver
 459         */
 460        if (be16_to_cpu(hdr->magic) != NX842_CRYPTO_MAGIC) {
 461                struct nx842_crypto_header_group g = {
 462                        .padding =              0,
 463                        .compressed_length =    cpu_to_be32(p.iremain),
 464                        .uncompressed_length =  cpu_to_be32(p.oremain),
 465                };
 466
 467                ret = decompress(ctx, &p, &g, &c, 0);
 468                if (ret)
 469                        goto unlock;
 470
 471                goto success;
 472        }
 473
 474        if (!hdr->groups) {
 475                pr_err("header has no groups\n");
 476                ret = -EINVAL;
 477                goto unlock;
 478        }
 479        if (hdr->groups > NX842_CRYPTO_GROUP_MAX) {
 480                pr_err("header has too many groups %x, max %x\n",
 481                       hdr->groups, NX842_CRYPTO_GROUP_MAX);
 482                ret = -EINVAL;
 483                goto unlock;
 484        }
 485
 486        hdr_len = NX842_CRYPTO_HEADER_SIZE(hdr->groups);
 487        if (hdr_len > slen) {
 488                ret = -EOVERFLOW;
 489                goto unlock;
 490        }
 491
 492        memcpy(&ctx->header, src, hdr_len);
 493        hdr = &ctx->header;
 494
 495        for (n = 0; n < hdr->groups; n++) {
 496                /* ignore applies to last group */
 497                if (n + 1 == hdr->groups)
 498                        ignore = be16_to_cpu(hdr->ignore);
 499
 500                ret = decompress(ctx, &p, &hdr->group[n], &c, ignore);
 501                if (ret)
 502                        goto unlock;
 503        }
 504
 505success:
 506        *dlen = p.ototal;
 507
 508        pr_debug("decompress total slen %x dlen %x\n", slen, *dlen);
 509
 510        ret = 0;
 511
 512unlock:
 513        spin_unlock_bh(&ctx->lock);
 514
 515        return ret;
 516}
 517EXPORT_SYMBOL_GPL(nx842_crypto_decompress);
 518
 519MODULE_LICENSE("GPL");
 520MODULE_DESCRIPTION("IBM PowerPC Nest (NX) 842 Hardware Compression Driver");
 521MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");
 522