linux/drivers/crypto/nx/nx-842.c
<<
>>
Prefs
   1/*
   2 * Cryptographic API for the NX-842 hardware compression.
   3 *
   4 * This program is free software; you can redistribute it and/or modify
   5 * it under the terms of the GNU General Public License as published by
   6 * the Free Software Foundation; either version 2 of the License, or
   7 * (at your option) any later version.
   8 *
   9 * This program is distributed in the hope that it will be useful,
  10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 * GNU General Public License for more details.
  13 *
  14 * Copyright (C) IBM Corporation, 2011-2015
  15 *
  16 * Designer of the Power data compression engine:
  17 *   Bulent Abali <abali@us.ibm.com>
  18 *
  19 * Original Authors: Robert Jennings <rcj@linux.vnet.ibm.com>
  20 *                   Seth Jennings <sjenning@linux.vnet.ibm.com>
  21 *
  22 * Rewrite: Dan Streetman <ddstreet@ieee.org>
  23 *
  24 * This is an interface to the NX-842 compression hardware in PowerPC
  25 * processors.  Most of the complexity of this drvier is due to the fact that
  26 * the NX-842 compression hardware requires the input and output data buffers
  27 * to be specifically aligned, to be a specific multiple in length, and within
  28 * specific minimum and maximum lengths.  Those restrictions, provided by the
  29 * nx-842 driver via nx842_constraints, mean this driver must use bounce
  30 * buffers and headers to correct misaligned in or out buffers, and to split
  31 * input buffers that are too large.
  32 *
  33 * This driver will fall back to software decompression if the hardware
  34 * decompression fails, so this driver's decompression should never fail as
  35 * long as the provided compressed buffer is valid.  Any compressed buffer
  36 * created by this driver will have a header (except ones where the input
  37 * perfectly matches the constraints); so users of this driver cannot simply
  38 * pass a compressed buffer created by this driver over to the 842 software
  39 * decompression library.  Instead, users must use this driver to decompress;
  40 * if the hardware fails or is unavailable, the compressed buffer will be
  41 * parsed and the header removed, and the raw 842 buffer(s) passed to the 842
  42 * software decompression library.
  43 *
  44 * This does not fall back to software compression, however, since the caller
  45 * of this function is specifically requesting hardware compression; if the
  46 * hardware compression fails, the caller can fall back to software
  47 * compression, and the raw 842 compressed buffer that the software compressor
  48 * creates can be passed to this driver for hardware decompression; any
  49 * buffer without our specific header magic is assumed to be a raw 842 buffer
  50 * and passed directly to the hardware.  Note that the software compression
  51 * library will produce a compressed buffer that is incompatible with the
  52 * hardware decompressor if the original input buffer length is not a multiple
  53 * of 8; if such a compressed buffer is passed to this driver for
  54 * decompression, the hardware will reject it and this driver will then pass
  55 * it over to the software library for decompression.
  56 */
  57
  58#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  59
  60#include <linux/vmalloc.h>
  61#include <linux/sw842.h>
  62#include <linux/spinlock.h>
  63
  64#include "nx-842.h"
  65
  66/* The first 5 bits of this magic are 0x1f, which is an invalid 842 5-bit
  67 * template (see lib/842/842.h), so this magic number will never appear at
  68 * the start of a raw 842 compressed buffer.  That is important, as any buffer
  69 * passed to us without this magic is assumed to be a raw 842 compressed
  70 * buffer, and passed directly to the hardware to decompress.
  71 */
  72#define NX842_CRYPTO_MAGIC      (0xf842)
  73#define NX842_CRYPTO_HEADER_SIZE(g)                             \
  74        (sizeof(struct nx842_crypto_header) +                   \
  75         sizeof(struct nx842_crypto_header_group) * (g))
  76#define NX842_CRYPTO_HEADER_MAX_SIZE                            \
  77        NX842_CRYPTO_HEADER_SIZE(NX842_CRYPTO_GROUP_MAX)
  78
  79/* bounce buffer size */
  80#define BOUNCE_BUFFER_ORDER     (2)
  81#define BOUNCE_BUFFER_SIZE                                      \
  82        ((unsigned int)(PAGE_SIZE << BOUNCE_BUFFER_ORDER))
  83
  84/* try longer on comp because we can fallback to sw decomp if hw is busy */
  85#define COMP_BUSY_TIMEOUT       (250) /* ms */
  86#define DECOMP_BUSY_TIMEOUT     (50) /* ms */
  87
  88struct nx842_crypto_param {
  89        u8 *in;
  90        unsigned int iremain;
  91        u8 *out;
  92        unsigned int oremain;
  93        unsigned int ototal;
  94};
  95
  96static int update_param(struct nx842_crypto_param *p,
  97                        unsigned int slen, unsigned int dlen)
  98{
  99        if (p->iremain < slen)
 100                return -EOVERFLOW;
 101        if (p->oremain < dlen)
 102                return -ENOSPC;
 103
 104        p->in += slen;
 105        p->iremain -= slen;
 106        p->out += dlen;
 107        p->oremain -= dlen;
 108        p->ototal += dlen;
 109
 110        return 0;
 111}
 112
 113int nx842_crypto_init(struct crypto_tfm *tfm, struct nx842_driver *driver)
 114{
 115        struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm);
 116
 117        spin_lock_init(&ctx->lock);
 118        ctx->driver = driver;
 119        ctx->wmem = kmalloc(driver->workmem_size, GFP_KERNEL);
 120        ctx->sbounce = (u8 *)__get_free_pages(GFP_KERNEL, BOUNCE_BUFFER_ORDER);
 121        ctx->dbounce = (u8 *)__get_free_pages(GFP_KERNEL, BOUNCE_BUFFER_ORDER);
 122        if (!ctx->wmem || !ctx->sbounce || !ctx->dbounce) {
 123                kfree(ctx->wmem);
 124                free_page((unsigned long)ctx->sbounce);
 125                free_page((unsigned long)ctx->dbounce);
 126                return -ENOMEM;
 127        }
 128
 129        return 0;
 130}
 131EXPORT_SYMBOL_GPL(nx842_crypto_init);
 132
 133void nx842_crypto_exit(struct crypto_tfm *tfm)
 134{
 135        struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm);
 136
 137        kfree(ctx->wmem);
 138        free_page((unsigned long)ctx->sbounce);
 139        free_page((unsigned long)ctx->dbounce);
 140}
 141EXPORT_SYMBOL_GPL(nx842_crypto_exit);
 142
 143static void check_constraints(struct nx842_constraints *c)
 144{
 145        /* limit maximum, to always have enough bounce buffer to decompress */
 146        if (c->maximum > BOUNCE_BUFFER_SIZE)
 147                c->maximum = BOUNCE_BUFFER_SIZE;
 148}
 149
 150static int nx842_crypto_add_header(struct nx842_crypto_header *hdr, u8 *buf)
 151{
 152        int s = NX842_CRYPTO_HEADER_SIZE(hdr->groups);
 153
 154        /* compress should have added space for header */
 155        if (s > be16_to_cpu(hdr->group[0].padding)) {
 156                pr_err("Internal error: no space for header\n");
 157                return -EINVAL;
 158        }
 159
 160        memcpy(buf, hdr, s);
 161
 162        print_hex_dump_debug("header ", DUMP_PREFIX_OFFSET, 16, 1, buf, s, 0);
 163
 164        return 0;
 165}
 166
 167static int compress(struct nx842_crypto_ctx *ctx,
 168                    struct nx842_crypto_param *p,
 169                    struct nx842_crypto_header_group *g,
 170                    struct nx842_constraints *c,
 171                    u16 *ignore,
 172                    unsigned int hdrsize)
 173{
 174        unsigned int slen = p->iremain, dlen = p->oremain, tmplen;
 175        unsigned int adj_slen = slen;
 176        u8 *src = p->in, *dst = p->out;
 177        int ret, dskip = 0;
 178        ktime_t timeout;
 179
 180        if (p->iremain == 0)
 181                return -EOVERFLOW;
 182
 183        if (p->oremain == 0 || hdrsize + c->minimum > dlen)
 184                return -ENOSPC;
 185
 186        if (slen % c->multiple)
 187                adj_slen = round_up(slen, c->multiple);
 188        if (slen < c->minimum)
 189                adj_slen = c->minimum;
 190        if (slen > c->maximum)
 191                adj_slen = slen = c->maximum;
 192        if (adj_slen > slen || (u64)src % c->alignment) {
 193                adj_slen = min(adj_slen, BOUNCE_BUFFER_SIZE);
 194                slen = min(slen, BOUNCE_BUFFER_SIZE);
 195                if (adj_slen > slen)
 196                        memset(ctx->sbounce + slen, 0, adj_slen - slen);
 197                memcpy(ctx->sbounce, src, slen);
 198                src = ctx->sbounce;
 199                slen = adj_slen;
 200                pr_debug("using comp sbounce buffer, len %x\n", slen);
 201        }
 202
 203        dst += hdrsize;
 204        dlen -= hdrsize;
 205
 206        if ((u64)dst % c->alignment) {
 207                dskip = (int)(PTR_ALIGN(dst, c->alignment) - dst);
 208                dst += dskip;
 209                dlen -= dskip;
 210        }
 211        if (dlen % c->multiple)
 212                dlen = round_down(dlen, c->multiple);
 213        if (dlen < c->minimum) {
 214nospc:
 215                dst = ctx->dbounce;
 216                dlen = min(p->oremain, BOUNCE_BUFFER_SIZE);
 217                dlen = round_down(dlen, c->multiple);
 218                dskip = 0;
 219                pr_debug("using comp dbounce buffer, len %x\n", dlen);
 220        }
 221        if (dlen > c->maximum)
 222                dlen = c->maximum;
 223
 224        tmplen = dlen;
 225        timeout = ktime_add_ms(ktime_get(), COMP_BUSY_TIMEOUT);
 226        do {
 227                dlen = tmplen; /* reset dlen, if we're retrying */
 228                ret = ctx->driver->compress(src, slen, dst, &dlen, ctx->wmem);
 229                /* possibly we should reduce the slen here, instead of
 230                 * retrying with the dbounce buffer?
 231                 */
 232                if (ret == -ENOSPC && dst != ctx->dbounce)
 233                        goto nospc;
 234        } while (ret == -EBUSY && ktime_before(ktime_get(), timeout));
 235        if (ret)
 236                return ret;
 237
 238        dskip += hdrsize;
 239
 240        if (dst == ctx->dbounce)
 241                memcpy(p->out + dskip, dst, dlen);
 242
 243        g->padding = cpu_to_be16(dskip);
 244        g->compressed_length = cpu_to_be32(dlen);
 245        g->uncompressed_length = cpu_to_be32(slen);
 246
 247        if (p->iremain < slen) {
 248                *ignore = slen - p->iremain;
 249                slen = p->iremain;
 250        }
 251
 252        pr_debug("compress slen %x ignore %x dlen %x padding %x\n",
 253                 slen, *ignore, dlen, dskip);
 254
 255        return update_param(p, slen, dskip + dlen);
 256}
 257
 258int nx842_crypto_compress(struct crypto_tfm *tfm,
 259                          const u8 *src, unsigned int slen,
 260                          u8 *dst, unsigned int *dlen)
 261{
 262        struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm);
 263        struct nx842_crypto_header *hdr = &ctx->header;
 264        struct nx842_crypto_param p;
 265        struct nx842_constraints c = *ctx->driver->constraints;
 266        unsigned int groups, hdrsize, h;
 267        int ret, n;
 268        bool add_header;
 269        u16 ignore = 0;
 270
 271        check_constraints(&c);
 272
 273        p.in = (u8 *)src;
 274        p.iremain = slen;
 275        p.out = dst;
 276        p.oremain = *dlen;
 277        p.ototal = 0;
 278
 279        *dlen = 0;
 280
 281        groups = min_t(unsigned int, NX842_CRYPTO_GROUP_MAX,
 282                       DIV_ROUND_UP(p.iremain, c.maximum));
 283        hdrsize = NX842_CRYPTO_HEADER_SIZE(groups);
 284
 285        spin_lock_bh(&ctx->lock);
 286
 287        /* skip adding header if the buffers meet all constraints */
 288        add_header = (p.iremain % c.multiple    ||
 289                      p.iremain < c.minimum     ||
 290                      p.iremain > c.maximum     ||
 291                      (u64)p.in % c.alignment   ||
 292                      p.oremain % c.multiple    ||
 293                      p.oremain < c.minimum     ||
 294                      p.oremain > c.maximum     ||
 295                      (u64)p.out % c.alignment);
 296
 297        hdr->magic = cpu_to_be16(NX842_CRYPTO_MAGIC);
 298        hdr->groups = 0;
 299        hdr->ignore = 0;
 300
 301        while (p.iremain > 0) {
 302                n = hdr->groups++;
 303                ret = -ENOSPC;
 304                if (hdr->groups > NX842_CRYPTO_GROUP_MAX)
 305                        goto unlock;
 306
 307                /* header goes before first group */
 308                h = !n && add_header ? hdrsize : 0;
 309
 310                if (ignore)
 311                        pr_warn("internal error, ignore is set %x\n", ignore);
 312
 313                ret = compress(ctx, &p, &hdr->group[n], &c, &ignore, h);
 314                if (ret)
 315                        goto unlock;
 316        }
 317
 318        if (!add_header && hdr->groups > 1) {
 319                pr_err("Internal error: No header but multiple groups\n");
 320                ret = -EINVAL;
 321                goto unlock;
 322        }
 323
 324        /* ignore indicates the input stream needed to be padded */
 325        hdr->ignore = cpu_to_be16(ignore);
 326        if (ignore)
 327                pr_debug("marked %d bytes as ignore\n", ignore);
 328
 329        if (add_header)
 330                ret = nx842_crypto_add_header(hdr, dst);
 331        if (ret)
 332                goto unlock;
 333
 334        *dlen = p.ototal;
 335
 336        pr_debug("compress total slen %x dlen %x\n", slen, *dlen);
 337
 338unlock:
 339        spin_unlock_bh(&ctx->lock);
 340        return ret;
 341}
 342EXPORT_SYMBOL_GPL(nx842_crypto_compress);
 343
 344static int decompress(struct nx842_crypto_ctx *ctx,
 345                      struct nx842_crypto_param *p,
 346                      struct nx842_crypto_header_group *g,
 347                      struct nx842_constraints *c,
 348                      u16 ignore)
 349{
 350        unsigned int slen = be32_to_cpu(g->compressed_length);
 351        unsigned int required_len = be32_to_cpu(g->uncompressed_length);
 352        unsigned int dlen = p->oremain, tmplen;
 353        unsigned int adj_slen = slen;
 354        u8 *src = p->in, *dst = p->out;
 355        u16 padding = be16_to_cpu(g->padding);
 356        int ret, spadding = 0, dpadding = 0;
 357        ktime_t timeout;
 358
 359        if (!slen || !required_len)
 360                return -EINVAL;
 361
 362        if (p->iremain <= 0 || padding + slen > p->iremain)
 363                return -EOVERFLOW;
 364
 365        if (p->oremain <= 0 || required_len - ignore > p->oremain)
 366                return -ENOSPC;
 367
 368        src += padding;
 369
 370        if (slen % c->multiple)
 371                adj_slen = round_up(slen, c->multiple);
 372        if (slen < c->minimum)
 373                adj_slen = c->minimum;
 374        if (slen > c->maximum)
 375                goto usesw;
 376        if (slen < adj_slen || (u64)src % c->alignment) {
 377                /* we can append padding bytes because the 842 format defines
 378                 * an "end" template (see lib/842/842_decompress.c) and will
 379                 * ignore any bytes following it.
 380                 */
 381                if (slen < adj_slen)
 382                        memset(ctx->sbounce + slen, 0, adj_slen - slen);
 383                memcpy(ctx->sbounce, src, slen);
 384                src = ctx->sbounce;
 385                spadding = adj_slen - slen;
 386                slen = adj_slen;
 387                pr_debug("using decomp sbounce buffer, len %x\n", slen);
 388        }
 389
 390        if (dlen % c->multiple)
 391                dlen = round_down(dlen, c->multiple);
 392        if (dlen < required_len || (u64)dst % c->alignment) {
 393                dst = ctx->dbounce;
 394                dlen = min(required_len, BOUNCE_BUFFER_SIZE);
 395                pr_debug("using decomp dbounce buffer, len %x\n", dlen);
 396        }
 397        if (dlen < c->minimum)
 398                goto usesw;
 399        if (dlen > c->maximum)
 400                dlen = c->maximum;
 401
 402        tmplen = dlen;
 403        timeout = ktime_add_ms(ktime_get(), DECOMP_BUSY_TIMEOUT);
 404        do {
 405                dlen = tmplen; /* reset dlen, if we're retrying */
 406                ret = ctx->driver->decompress(src, slen, dst, &dlen, ctx->wmem);
 407        } while (ret == -EBUSY && ktime_before(ktime_get(), timeout));
 408        if (ret) {
 409usesw:
 410                /* reset everything, sw doesn't have constraints */
 411                src = p->in + padding;
 412                slen = be32_to_cpu(g->compressed_length);
 413                spadding = 0;
 414                dst = p->out;
 415                dlen = p->oremain;
 416                dpadding = 0;
 417                if (dlen < required_len) { /* have ignore bytes */
 418                        dst = ctx->dbounce;
 419                        dlen = BOUNCE_BUFFER_SIZE;
 420                }
 421                pr_info_ratelimited("using software 842 decompression\n");
 422                ret = sw842_decompress(src, slen, dst, &dlen);
 423        }
 424        if (ret)
 425                return ret;
 426
 427        slen -= spadding;
 428
 429        dlen -= ignore;
 430        if (ignore)
 431                pr_debug("ignoring last %x bytes\n", ignore);
 432
 433        if (dst == ctx->dbounce)
 434                memcpy(p->out, dst, dlen);
 435
 436        pr_debug("decompress slen %x padding %x dlen %x ignore %x\n",
 437                 slen, padding, dlen, ignore);
 438
 439        return update_param(p, slen + padding, dlen);
 440}
 441
 442int nx842_crypto_decompress(struct crypto_tfm *tfm,
 443                            const u8 *src, unsigned int slen,
 444                            u8 *dst, unsigned int *dlen)
 445{
 446        struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm);
 447        struct nx842_crypto_header *hdr;
 448        struct nx842_crypto_param p;
 449        struct nx842_constraints c = *ctx->driver->constraints;
 450        int n, ret, hdr_len;
 451        u16 ignore = 0;
 452
 453        check_constraints(&c);
 454
 455        p.in = (u8 *)src;
 456        p.iremain = slen;
 457        p.out = dst;
 458        p.oremain = *dlen;
 459        p.ototal = 0;
 460
 461        *dlen = 0;
 462
 463        hdr = (struct nx842_crypto_header *)src;
 464
 465        spin_lock_bh(&ctx->lock);
 466
 467        /* If it doesn't start with our header magic number, assume it's a raw
 468         * 842 compressed buffer and pass it directly to the hardware driver
 469         */
 470        if (be16_to_cpu(hdr->magic) != NX842_CRYPTO_MAGIC) {
 471                struct nx842_crypto_header_group g = {
 472                        .padding =              0,
 473                        .compressed_length =    cpu_to_be32(p.iremain),
 474                        .uncompressed_length =  cpu_to_be32(p.oremain),
 475                };
 476
 477                ret = decompress(ctx, &p, &g, &c, 0);
 478                if (ret)
 479                        goto unlock;
 480
 481                goto success;
 482        }
 483
 484        if (!hdr->groups) {
 485                pr_err("header has no groups\n");
 486                ret = -EINVAL;
 487                goto unlock;
 488        }
 489        if (hdr->groups > NX842_CRYPTO_GROUP_MAX) {
 490                pr_err("header has too many groups %x, max %x\n",
 491                       hdr->groups, NX842_CRYPTO_GROUP_MAX);
 492                ret = -EINVAL;
 493                goto unlock;
 494        }
 495
 496        hdr_len = NX842_CRYPTO_HEADER_SIZE(hdr->groups);
 497        if (hdr_len > slen) {
 498                ret = -EOVERFLOW;
 499                goto unlock;
 500        }
 501
 502        memcpy(&ctx->header, src, hdr_len);
 503        hdr = &ctx->header;
 504
 505        for (n = 0; n < hdr->groups; n++) {
 506                /* ignore applies to last group */
 507                if (n + 1 == hdr->groups)
 508                        ignore = be16_to_cpu(hdr->ignore);
 509
 510                ret = decompress(ctx, &p, &hdr->group[n], &c, ignore);
 511                if (ret)
 512                        goto unlock;
 513        }
 514
 515success:
 516        *dlen = p.ototal;
 517
 518        pr_debug("decompress total slen %x dlen %x\n", slen, *dlen);
 519
 520        ret = 0;
 521
 522unlock:
 523        spin_unlock_bh(&ctx->lock);
 524
 525        return ret;
 526}
 527EXPORT_SYMBOL_GPL(nx842_crypto_decompress);
 528
 529MODULE_LICENSE("GPL");
 530MODULE_DESCRIPTION("IBM PowerPC Nest (NX) 842 Hardware Compression Driver");
 531MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");
 532