linux/lib/842/842_compress.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * 842 Software Compression
   4 *
   5 * Copyright (C) 2015 Dan Streetman, IBM Corp
   6 *
   7 * See 842.h for details of the 842 compressed format.
   8 */
   9
  10#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  11#define MODULE_NAME "842_compress"
  12
  13#include <linux/hashtable.h>
  14
  15#include "842.h"
  16#include "842_debugfs.h"
  17
  18#define SW842_HASHTABLE8_BITS   (10)
  19#define SW842_HASHTABLE4_BITS   (11)
  20#define SW842_HASHTABLE2_BITS   (10)
  21
  22/* By default, we allow compressing input buffers of any length, but we must
  23 * use the non-standard "short data" template so the decompressor can correctly
  24 * reproduce the uncompressed data buffer at the right length.  However the
  25 * hardware 842 compressor will not recognize the "short data" template, and
  26 * will fail to decompress any compressed buffer containing it (I have no idea
  27 * why anyone would want to use software to compress and hardware to decompress
  28 * but that's beside the point).  This parameter forces the compression
  29 * function to simply reject any input buffer that isn't a multiple of 8 bytes
  30 * long, instead of using the "short data" template, so that all compressed
  31 * buffers produced by this function will be decompressable by the 842 hardware
  32 * decompressor.  Unless you have a specific need for that, leave this disabled
  33 * so that any length buffer can be compressed.
  34 */
  35static bool sw842_strict;
  36module_param_named(strict, sw842_strict, bool, 0644);
  37
  38static u8 comp_ops[OPS_MAX][5] = { /* params size in bits */
  39        { I8, N0, N0, N0, 0x19 }, /* 8 */
  40        { I4, I4, N0, N0, 0x18 }, /* 18 */
  41        { I4, I2, I2, N0, 0x17 }, /* 25 */
  42        { I2, I2, I4, N0, 0x13 }, /* 25 */
  43        { I2, I2, I2, I2, 0x12 }, /* 32 */
  44        { I4, I2, D2, N0, 0x16 }, /* 33 */
  45        { I4, D2, I2, N0, 0x15 }, /* 33 */
  46        { I2, D2, I4, N0, 0x0e }, /* 33 */
  47        { D2, I2, I4, N0, 0x09 }, /* 33 */
  48        { I2, I2, I2, D2, 0x11 }, /* 40 */
  49        { I2, I2, D2, I2, 0x10 }, /* 40 */
  50        { I2, D2, I2, I2, 0x0d }, /* 40 */
  51        { D2, I2, I2, I2, 0x08 }, /* 40 */
  52        { I4, D4, N0, N0, 0x14 }, /* 41 */
  53        { D4, I4, N0, N0, 0x04 }, /* 41 */
  54        { I2, I2, D4, N0, 0x0f }, /* 48 */
  55        { I2, D2, I2, D2, 0x0c }, /* 48 */
  56        { I2, D4, I2, N0, 0x0b }, /* 48 */
  57        { D2, I2, I2, D2, 0x07 }, /* 48 */
  58        { D2, I2, D2, I2, 0x06 }, /* 48 */
  59        { D4, I2, I2, N0, 0x03 }, /* 48 */
  60        { I2, D2, D4, N0, 0x0a }, /* 56 */
  61        { D2, I2, D4, N0, 0x05 }, /* 56 */
  62        { D4, I2, D2, N0, 0x02 }, /* 56 */
  63        { D4, D2, I2, N0, 0x01 }, /* 56 */
  64        { D8, N0, N0, N0, 0x00 }, /* 64 */
  65};
  66
  67struct sw842_hlist_node8 {
  68        struct hlist_node node;
  69        u64 data;
  70        u8 index;
  71};
  72
  73struct sw842_hlist_node4 {
  74        struct hlist_node node;
  75        u32 data;
  76        u16 index;
  77};
  78
  79struct sw842_hlist_node2 {
  80        struct hlist_node node;
  81        u16 data;
  82        u8 index;
  83};
  84
  85#define INDEX_NOT_FOUND         (-1)
  86#define INDEX_NOT_CHECKED       (-2)
  87
  88struct sw842_param {
  89        u8 *in;
  90        u8 *instart;
  91        u64 ilen;
  92        u8 *out;
  93        u64 olen;
  94        u8 bit;
  95        u64 data8[1];
  96        u32 data4[2];
  97        u16 data2[4];
  98        int index8[1];
  99        int index4[2];
 100        int index2[4];
 101        DECLARE_HASHTABLE(htable8, SW842_HASHTABLE8_BITS);
 102        DECLARE_HASHTABLE(htable4, SW842_HASHTABLE4_BITS);
 103        DECLARE_HASHTABLE(htable2, SW842_HASHTABLE2_BITS);
 104        struct sw842_hlist_node8 node8[1 << I8_BITS];
 105        struct sw842_hlist_node4 node4[1 << I4_BITS];
 106        struct sw842_hlist_node2 node2[1 << I2_BITS];
 107};
 108
 109#define get_input_data(p, o, b)                                         \
 110        be##b##_to_cpu(get_unaligned((__be##b *)((p)->in + (o))))
 111
 112#define init_hashtable_nodes(p, b)      do {                    \
 113        int _i;                                                 \
 114        hash_init((p)->htable##b);                              \
 115        for (_i = 0; _i < ARRAY_SIZE((p)->node##b); _i++) {     \
 116                (p)->node##b[_i].index = _i;                    \
 117                (p)->node##b[_i].data = 0;                      \
 118                INIT_HLIST_NODE(&(p)->node##b[_i].node);        \
 119        }                                                       \
 120} while (0)
 121
 122#define find_index(p, b, n)     ({                                      \
 123        struct sw842_hlist_node##b *_n;                                 \
 124        p->index##b[n] = INDEX_NOT_FOUND;                               \
 125        hash_for_each_possible(p->htable##b, _n, node, p->data##b[n]) { \
 126                if (p->data##b[n] == _n->data) {                        \
 127                        p->index##b[n] = _n->index;                     \
 128                        break;                                          \
 129                }                                                       \
 130        }                                                               \
 131        p->index##b[n] >= 0;                                            \
 132})
 133
 134#define check_index(p, b, n)                    \
 135        ((p)->index##b[n] == INDEX_NOT_CHECKED  \
 136         ? find_index(p, b, n)                  \
 137         : (p)->index##b[n] >= 0)
 138
 139#define replace_hash(p, b, i, d)        do {                            \
 140        struct sw842_hlist_node##b *_n = &(p)->node##b[(i)+(d)];        \
 141        hash_del(&_n->node);                                            \
 142        _n->data = (p)->data##b[d];                                     \
 143        pr_debug("add hash index%x %x pos %x data %lx\n", b,            \
 144                 (unsigned int)_n->index,                               \
 145                 (unsigned int)((p)->in - (p)->instart),                \
 146                 (unsigned long)_n->data);                              \
 147        hash_add((p)->htable##b, &_n->node, _n->data);                  \
 148} while (0)
 149
 150static u8 bmask[8] = { 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe };
 151
 152static int add_bits(struct sw842_param *p, u64 d, u8 n);
 153
 154static int __split_add_bits(struct sw842_param *p, u64 d, u8 n, u8 s)
 155{
 156        int ret;
 157
 158        if (n <= s)
 159                return -EINVAL;
 160
 161        ret = add_bits(p, d >> s, n - s);
 162        if (ret)
 163                return ret;
 164        return add_bits(p, d & GENMASK_ULL(s - 1, 0), s);
 165}
 166
 167static int add_bits(struct sw842_param *p, u64 d, u8 n)
 168{
 169        int b = p->bit, bits = b + n, s = round_up(bits, 8) - bits;
 170        u64 o;
 171        u8 *out = p->out;
 172
 173        pr_debug("add %u bits %lx\n", (unsigned char)n, (unsigned long)d);
 174
 175        if (n > 64)
 176                return -EINVAL;
 177
 178        /* split this up if writing to > 8 bytes (i.e. n == 64 && p->bit > 0),
 179         * or if we're at the end of the output buffer and would write past end
 180         */
 181        if (bits > 64)
 182                return __split_add_bits(p, d, n, 32);
 183        else if (p->olen < 8 && bits > 32 && bits <= 56)
 184                return __split_add_bits(p, d, n, 16);
 185        else if (p->olen < 4 && bits > 16 && bits <= 24)
 186                return __split_add_bits(p, d, n, 8);
 187
 188        if (DIV_ROUND_UP(bits, 8) > p->olen)
 189                return -ENOSPC;
 190
 191        o = *out & bmask[b];
 192        d <<= s;
 193
 194        if (bits <= 8)
 195                *out = o | d;
 196        else if (bits <= 16)
 197                put_unaligned(cpu_to_be16(o << 8 | d), (__be16 *)out);
 198        else if (bits <= 24)
 199                put_unaligned(cpu_to_be32(o << 24 | d << 8), (__be32 *)out);
 200        else if (bits <= 32)
 201                put_unaligned(cpu_to_be32(o << 24 | d), (__be32 *)out);
 202        else if (bits <= 40)
 203                put_unaligned(cpu_to_be64(o << 56 | d << 24), (__be64 *)out);
 204        else if (bits <= 48)
 205                put_unaligned(cpu_to_be64(o << 56 | d << 16), (__be64 *)out);
 206        else if (bits <= 56)
 207                put_unaligned(cpu_to_be64(o << 56 | d << 8), (__be64 *)out);
 208        else
 209                put_unaligned(cpu_to_be64(o << 56 | d), (__be64 *)out);
 210
 211        p->bit += n;
 212
 213        if (p->bit > 7) {
 214                p->out += p->bit / 8;
 215                p->olen -= p->bit / 8;
 216                p->bit %= 8;
 217        }
 218
 219        return 0;
 220}
 221
 222static int add_template(struct sw842_param *p, u8 c)
 223{
 224        int ret, i, b = 0;
 225        u8 *t = comp_ops[c];
 226        bool inv = false;
 227
 228        if (c >= OPS_MAX)
 229                return -EINVAL;
 230
 231        pr_debug("template %x\n", t[4]);
 232
 233        ret = add_bits(p, t[4], OP_BITS);
 234        if (ret)
 235                return ret;
 236
 237        for (i = 0; i < 4; i++) {
 238                pr_debug("op %x\n", t[i]);
 239
 240                switch (t[i] & OP_AMOUNT) {
 241                case OP_AMOUNT_8:
 242                        if (b)
 243                                inv = true;
 244                        else if (t[i] & OP_ACTION_INDEX)
 245                                ret = add_bits(p, p->index8[0], I8_BITS);
 246                        else if (t[i] & OP_ACTION_DATA)
 247                                ret = add_bits(p, p->data8[0], 64);
 248                        else
 249                                inv = true;
 250                        break;
 251                case OP_AMOUNT_4:
 252                        if (b == 2 && t[i] & OP_ACTION_DATA)
 253                                ret = add_bits(p, get_input_data(p, 2, 32), 32);
 254                        else if (b != 0 && b != 4)
 255                                inv = true;
 256                        else if (t[i] & OP_ACTION_INDEX)
 257                                ret = add_bits(p, p->index4[b >> 2], I4_BITS);
 258                        else if (t[i] & OP_ACTION_DATA)
 259                                ret = add_bits(p, p->data4[b >> 2], 32);
 260                        else
 261                                inv = true;
 262                        break;
 263                case OP_AMOUNT_2:
 264                        if (b != 0 && b != 2 && b != 4 && b != 6)
 265                                inv = true;
 266                        if (t[i] & OP_ACTION_INDEX)
 267                                ret = add_bits(p, p->index2[b >> 1], I2_BITS);
 268                        else if (t[i] & OP_ACTION_DATA)
 269                                ret = add_bits(p, p->data2[b >> 1], 16);
 270                        else
 271                                inv = true;
 272                        break;
 273                case OP_AMOUNT_0:
 274                        inv = (b != 8) || !(t[i] & OP_ACTION_NOOP);
 275                        break;
 276                default:
 277                        inv = true;
 278                        break;
 279                }
 280
 281                if (ret)
 282                        return ret;
 283
 284                if (inv) {
 285                        pr_err("Invalid templ %x op %d : %x %x %x %x\n",
 286                               c, i, t[0], t[1], t[2], t[3]);
 287                        return -EINVAL;
 288                }
 289
 290                b += t[i] & OP_AMOUNT;
 291        }
 292
 293        if (b != 8) {
 294                pr_err("Invalid template %x len %x : %x %x %x %x\n",
 295                       c, b, t[0], t[1], t[2], t[3]);
 296                return -EINVAL;
 297        }
 298
 299        if (sw842_template_counts)
 300                atomic_inc(&template_count[t[4]]);
 301
 302        return 0;
 303}
 304
 305static int add_repeat_template(struct sw842_param *p, u8 r)
 306{
 307        int ret;
 308
 309        /* repeat param is 0-based */
 310        if (!r || --r > REPEAT_BITS_MAX)
 311                return -EINVAL;
 312
 313        ret = add_bits(p, OP_REPEAT, OP_BITS);
 314        if (ret)
 315                return ret;
 316
 317        ret = add_bits(p, r, REPEAT_BITS);
 318        if (ret)
 319                return ret;
 320
 321        if (sw842_template_counts)
 322                atomic_inc(&template_repeat_count);
 323
 324        return 0;
 325}
 326
 327static int add_short_data_template(struct sw842_param *p, u8 b)
 328{
 329        int ret, i;
 330
 331        if (!b || b > SHORT_DATA_BITS_MAX)
 332                return -EINVAL;
 333
 334        ret = add_bits(p, OP_SHORT_DATA, OP_BITS);
 335        if (ret)
 336                return ret;
 337
 338        ret = add_bits(p, b, SHORT_DATA_BITS);
 339        if (ret)
 340                return ret;
 341
 342        for (i = 0; i < b; i++) {
 343                ret = add_bits(p, p->in[i], 8);
 344                if (ret)
 345                        return ret;
 346        }
 347
 348        if (sw842_template_counts)
 349                atomic_inc(&template_short_data_count);
 350
 351        return 0;
 352}
 353
 354static int add_zeros_template(struct sw842_param *p)
 355{
 356        int ret = add_bits(p, OP_ZEROS, OP_BITS);
 357
 358        if (ret)
 359                return ret;
 360
 361        if (sw842_template_counts)
 362                atomic_inc(&template_zeros_count);
 363
 364        return 0;
 365}
 366
 367static int add_end_template(struct sw842_param *p)
 368{
 369        int ret = add_bits(p, OP_END, OP_BITS);
 370
 371        if (ret)
 372                return ret;
 373
 374        if (sw842_template_counts)
 375                atomic_inc(&template_end_count);
 376
 377        return 0;
 378}
 379
 380static bool check_template(struct sw842_param *p, u8 c)
 381{
 382        u8 *t = comp_ops[c];
 383        int i, match, b = 0;
 384
 385        if (c >= OPS_MAX)
 386                return false;
 387
 388        for (i = 0; i < 4; i++) {
 389                if (t[i] & OP_ACTION_INDEX) {
 390                        if (t[i] & OP_AMOUNT_2)
 391                                match = check_index(p, 2, b >> 1);
 392                        else if (t[i] & OP_AMOUNT_4)
 393                                match = check_index(p, 4, b >> 2);
 394                        else if (t[i] & OP_AMOUNT_8)
 395                                match = check_index(p, 8, 0);
 396                        else
 397                                return false;
 398                        if (!match)
 399                                return false;
 400                }
 401
 402                b += t[i] & OP_AMOUNT;
 403        }
 404
 405        return true;
 406}
 407
 408static void get_next_data(struct sw842_param *p)
 409{
 410        p->data8[0] = get_input_data(p, 0, 64);
 411        p->data4[0] = get_input_data(p, 0, 32);
 412        p->data4[1] = get_input_data(p, 4, 32);
 413        p->data2[0] = get_input_data(p, 0, 16);
 414        p->data2[1] = get_input_data(p, 2, 16);
 415        p->data2[2] = get_input_data(p, 4, 16);
 416        p->data2[3] = get_input_data(p, 6, 16);
 417}
 418
 419/* update the hashtable entries.
 420 * only call this after finding/adding the current template
 421 * the dataN fields for the current 8 byte block must be already updated
 422 */
 423static void update_hashtables(struct sw842_param *p)
 424{
 425        u64 pos = p->in - p->instart;
 426        u64 n8 = (pos >> 3) % (1 << I8_BITS);
 427        u64 n4 = (pos >> 2) % (1 << I4_BITS);
 428        u64 n2 = (pos >> 1) % (1 << I2_BITS);
 429
 430        replace_hash(p, 8, n8, 0);
 431        replace_hash(p, 4, n4, 0);
 432        replace_hash(p, 4, n4, 1);
 433        replace_hash(p, 2, n2, 0);
 434        replace_hash(p, 2, n2, 1);
 435        replace_hash(p, 2, n2, 2);
 436        replace_hash(p, 2, n2, 3);
 437}
 438
 439/* find the next template to use, and add it
 440 * the p->dataN fields must already be set for the current 8 byte block
 441 */
 442static int process_next(struct sw842_param *p)
 443{
 444        int ret, i;
 445
 446        p->index8[0] = INDEX_NOT_CHECKED;
 447        p->index4[0] = INDEX_NOT_CHECKED;
 448        p->index4[1] = INDEX_NOT_CHECKED;
 449        p->index2[0] = INDEX_NOT_CHECKED;
 450        p->index2[1] = INDEX_NOT_CHECKED;
 451        p->index2[2] = INDEX_NOT_CHECKED;
 452        p->index2[3] = INDEX_NOT_CHECKED;
 453
 454        /* check up to OPS_MAX - 1; last op is our fallback */
 455        for (i = 0; i < OPS_MAX - 1; i++) {
 456                if (check_template(p, i))
 457                        break;
 458        }
 459
 460        ret = add_template(p, i);
 461        if (ret)
 462                return ret;
 463
 464        return 0;
 465}
 466
 467/**
 468 * sw842_compress
 469 *
 470 * Compress the uncompressed buffer of length @ilen at @in to the output buffer
 471 * @out, using no more than @olen bytes, using the 842 compression format.
 472 *
 473 * Returns: 0 on success, error on failure.  The @olen parameter
 474 * will contain the number of output bytes written on success, or
 475 * 0 on error.
 476 */
 477int sw842_compress(const u8 *in, unsigned int ilen,
 478                   u8 *out, unsigned int *olen, void *wmem)
 479{
 480        struct sw842_param *p = (struct sw842_param *)wmem;
 481        int ret;
 482        u64 last, next, pad, total;
 483        u8 repeat_count = 0;
 484        u32 crc;
 485
 486        BUILD_BUG_ON(sizeof(*p) > SW842_MEM_COMPRESS);
 487
 488        init_hashtable_nodes(p, 8);
 489        init_hashtable_nodes(p, 4);
 490        init_hashtable_nodes(p, 2);
 491
 492        p->in = (u8 *)in;
 493        p->instart = p->in;
 494        p->ilen = ilen;
 495        p->out = out;
 496        p->olen = *olen;
 497        p->bit = 0;
 498
 499        total = p->olen;
 500
 501        *olen = 0;
 502
 503        /* if using strict mode, we can only compress a multiple of 8 */
 504        if (sw842_strict && (ilen % 8)) {
 505                pr_err("Using strict mode, can't compress len %d\n", ilen);
 506                return -EINVAL;
 507        }
 508
 509        /* let's compress at least 8 bytes, mkay? */
 510        if (unlikely(ilen < 8))
 511                goto skip_comp;
 512
 513        /* make initial 'last' different so we don't match the first time */
 514        last = ~get_unaligned((u64 *)p->in);
 515
 516        while (p->ilen > 7) {
 517                next = get_unaligned((u64 *)p->in);
 518
 519                /* must get the next data, as we need to update the hashtable
 520                 * entries with the new data every time
 521                 */
 522                get_next_data(p);
 523
 524                /* we don't care about endianness in last or next;
 525                 * we're just comparing 8 bytes to another 8 bytes,
 526                 * they're both the same endianness
 527                 */
 528                if (next == last) {
 529                        /* repeat count bits are 0-based, so we stop at +1 */
 530                        if (++repeat_count <= REPEAT_BITS_MAX)
 531                                goto repeat;
 532                }
 533                if (repeat_count) {
 534                        ret = add_repeat_template(p, repeat_count);
 535                        repeat_count = 0;
 536                        if (next == last) /* reached max repeat bits */
 537                                goto repeat;
 538                }
 539
 540                if (next == 0)
 541                        ret = add_zeros_template(p);
 542                else
 543                        ret = process_next(p);
 544
 545                if (ret)
 546                        return ret;
 547
 548repeat:
 549                last = next;
 550                update_hashtables(p);
 551                p->in += 8;
 552                p->ilen -= 8;
 553        }
 554
 555        if (repeat_count) {
 556                ret = add_repeat_template(p, repeat_count);
 557                if (ret)
 558                        return ret;
 559        }
 560
 561skip_comp:
 562        if (p->ilen > 0) {
 563                ret = add_short_data_template(p, p->ilen);
 564                if (ret)
 565                        return ret;
 566
 567                p->in += p->ilen;
 568                p->ilen = 0;
 569        }
 570
 571        ret = add_end_template(p);
 572        if (ret)
 573                return ret;
 574
 575        /*
 576         * crc(0:31) is appended to target data starting with the next
 577         * bit after End of stream template.
 578         * nx842 calculates CRC for data in big-endian format. So doing
 579         * same here so that sw842 decompression can be used for both
 580         * compressed data.
 581         */
 582        crc = crc32_be(0, in, ilen);
 583        ret = add_bits(p, crc, CRC_BITS);
 584        if (ret)
 585                return ret;
 586
 587        if (p->bit) {
 588                p->out++;
 589                p->olen--;
 590                p->bit = 0;
 591        }
 592
 593        /* pad compressed length to multiple of 8 */
 594        pad = (8 - ((total - p->olen) % 8)) % 8;
 595        if (pad) {
 596                if (pad > p->olen) /* we were so close! */
 597                        return -ENOSPC;
 598                memset(p->out, 0, pad);
 599                p->out += pad;
 600                p->olen -= pad;
 601        }
 602
 603        if (unlikely((total - p->olen) > UINT_MAX))
 604                return -ENOSPC;
 605
 606        *olen = total - p->olen;
 607
 608        return 0;
 609}
 610EXPORT_SYMBOL_GPL(sw842_compress);
 611
 612static int __init sw842_init(void)
 613{
 614        if (sw842_template_counts)
 615                sw842_debugfs_create();
 616
 617        return 0;
 618}
 619module_init(sw842_init);
 620
 621static void __exit sw842_exit(void)
 622{
 623        if (sw842_template_counts)
 624                sw842_debugfs_remove();
 625}
 626module_exit(sw842_exit);
 627
 628MODULE_LICENSE("GPL");
 629MODULE_DESCRIPTION("Software 842 Compressor");
 630MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");
 631