linux/lib/842/842_compress.c
<<
>>
Prefs
   1/*
   2 * 842 Software Compression
   3 *
   4 * Copyright (C) 2015 Dan Streetman, IBM Corp
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License as published by
   8 * the Free Software Foundation; either version 2 of the License, or
   9 * (at your option) any later version.
  10 *
  11 * This program is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 * GNU General Public License for more details.
  15 *
  16 * See 842.h for details of the 842 compressed format.
  17 */
  18
  19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  20#define MODULE_NAME "842_compress"
  21
  22#include <linux/hashtable.h>
  23
  24#include "842.h"
  25#include "842_debugfs.h"
  26
  27#define SW842_HASHTABLE8_BITS   (10)
  28#define SW842_HASHTABLE4_BITS   (11)
  29#define SW842_HASHTABLE2_BITS   (10)
  30
  31/* By default, we allow compressing input buffers of any length, but we must
  32 * use the non-standard "short data" template so the decompressor can correctly
  33 * reproduce the uncompressed data buffer at the right length.  However the
  34 * hardware 842 compressor will not recognize the "short data" template, and
  35 * will fail to decompress any compressed buffer containing it (I have no idea
  36 * why anyone would want to use software to compress and hardware to decompress
  37 * but that's beside the point).  This parameter forces the compression
  38 * function to simply reject any input buffer that isn't a multiple of 8 bytes
  39 * long, instead of using the "short data" template, so that all compressed
  40 * buffers produced by this function will be decompressable by the 842 hardware
  41 * decompressor.  Unless you have a specific need for that, leave this disabled
  42 * so that any length buffer can be compressed.
  43 */
  44static bool sw842_strict;
  45module_param_named(strict, sw842_strict, bool, 0644);
  46
  47static u8 comp_ops[OPS_MAX][5] = { /* params size in bits */
  48        { I8, N0, N0, N0, 0x19 }, /* 8 */
  49        { I4, I4, N0, N0, 0x18 }, /* 18 */
  50        { I4, I2, I2, N0, 0x17 }, /* 25 */
  51        { I2, I2, I4, N0, 0x13 }, /* 25 */
  52        { I2, I2, I2, I2, 0x12 }, /* 32 */
  53        { I4, I2, D2, N0, 0x16 }, /* 33 */
  54        { I4, D2, I2, N0, 0x15 }, /* 33 */
  55        { I2, D2, I4, N0, 0x0e }, /* 33 */
  56        { D2, I2, I4, N0, 0x09 }, /* 33 */
  57        { I2, I2, I2, D2, 0x11 }, /* 40 */
  58        { I2, I2, D2, I2, 0x10 }, /* 40 */
  59        { I2, D2, I2, I2, 0x0d }, /* 40 */
  60        { D2, I2, I2, I2, 0x08 }, /* 40 */
  61        { I4, D4, N0, N0, 0x14 }, /* 41 */
  62        { D4, I4, N0, N0, 0x04 }, /* 41 */
  63        { I2, I2, D4, N0, 0x0f }, /* 48 */
  64        { I2, D2, I2, D2, 0x0c }, /* 48 */
  65        { I2, D4, I2, N0, 0x0b }, /* 48 */
  66        { D2, I2, I2, D2, 0x07 }, /* 48 */
  67        { D2, I2, D2, I2, 0x06 }, /* 48 */
  68        { D4, I2, I2, N0, 0x03 }, /* 48 */
  69        { I2, D2, D4, N0, 0x0a }, /* 56 */
  70        { D2, I2, D4, N0, 0x05 }, /* 56 */
  71        { D4, I2, D2, N0, 0x02 }, /* 56 */
  72        { D4, D2, I2, N0, 0x01 }, /* 56 */
  73        { D8, N0, N0, N0, 0x00 }, /* 64 */
  74};
  75
  76struct sw842_hlist_node8 {
  77        struct hlist_node node;
  78        u64 data;
  79        u8 index;
  80};
  81
  82struct sw842_hlist_node4 {
  83        struct hlist_node node;
  84        u32 data;
  85        u16 index;
  86};
  87
  88struct sw842_hlist_node2 {
  89        struct hlist_node node;
  90        u16 data;
  91        u8 index;
  92};
  93
  94#define INDEX_NOT_FOUND         (-1)
  95#define INDEX_NOT_CHECKED       (-2)
  96
  97struct sw842_param {
  98        u8 *in;
  99        u8 *instart;
 100        u64 ilen;
 101        u8 *out;
 102        u64 olen;
 103        u8 bit;
 104        u64 data8[1];
 105        u32 data4[2];
 106        u16 data2[4];
 107        int index8[1];
 108        int index4[2];
 109        int index2[4];
 110        DECLARE_HASHTABLE(htable8, SW842_HASHTABLE8_BITS);
 111        DECLARE_HASHTABLE(htable4, SW842_HASHTABLE4_BITS);
 112        DECLARE_HASHTABLE(htable2, SW842_HASHTABLE2_BITS);
 113        struct sw842_hlist_node8 node8[1 << I8_BITS];
 114        struct sw842_hlist_node4 node4[1 << I4_BITS];
 115        struct sw842_hlist_node2 node2[1 << I2_BITS];
 116};
 117
 118#define get_input_data(p, o, b)                                         \
 119        be##b##_to_cpu(get_unaligned((__be##b *)((p)->in + (o))))
 120
 121#define init_hashtable_nodes(p, b)      do {                    \
 122        int _i;                                                 \
 123        hash_init((p)->htable##b);                              \
 124        for (_i = 0; _i < ARRAY_SIZE((p)->node##b); _i++) {     \
 125                (p)->node##b[_i].index = _i;                    \
 126                (p)->node##b[_i].data = 0;                      \
 127                INIT_HLIST_NODE(&(p)->node##b[_i].node);        \
 128        }                                                       \
 129} while (0)
 130
 131#define find_index(p, b, n)     ({                                      \
 132        struct sw842_hlist_node##b *_n;                                 \
 133        p->index##b[n] = INDEX_NOT_FOUND;                               \
 134        hash_for_each_possible(p->htable##b, _n, node, p->data##b[n]) { \
 135                if (p->data##b[n] == _n->data) {                        \
 136                        p->index##b[n] = _n->index;                     \
 137                        break;                                          \
 138                }                                                       \
 139        }                                                               \
 140        p->index##b[n] >= 0;                                            \
 141})
 142
 143#define check_index(p, b, n)                    \
 144        ((p)->index##b[n] == INDEX_NOT_CHECKED  \
 145         ? find_index(p, b, n)                  \
 146         : (p)->index##b[n] >= 0)
 147
 148#define replace_hash(p, b, i, d)        do {                            \
 149        struct sw842_hlist_node##b *_n = &(p)->node##b[(i)+(d)];        \
 150        hash_del(&_n->node);                                            \
 151        _n->data = (p)->data##b[d];                                     \
 152        pr_debug("add hash index%x %x pos %x data %lx\n", b,            \
 153                 (unsigned int)_n->index,                               \
 154                 (unsigned int)((p)->in - (p)->instart),                \
 155                 (unsigned long)_n->data);                              \
 156        hash_add((p)->htable##b, &_n->node, _n->data);                  \
 157} while (0)
 158
 159static u8 bmask[8] = { 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe };
 160
 161static int add_bits(struct sw842_param *p, u64 d, u8 n);
 162
 163static int __split_add_bits(struct sw842_param *p, u64 d, u8 n, u8 s)
 164{
 165        int ret;
 166
 167        if (n <= s)
 168                return -EINVAL;
 169
 170        ret = add_bits(p, d >> s, n - s);
 171        if (ret)
 172                return ret;
 173        return add_bits(p, d & GENMASK_ULL(s - 1, 0), s);
 174}
 175
 176static int add_bits(struct sw842_param *p, u64 d, u8 n)
 177{
 178        int b = p->bit, bits = b + n, s = round_up(bits, 8) - bits;
 179        u64 o;
 180        u8 *out = p->out;
 181
 182        pr_debug("add %u bits %lx\n", (unsigned char)n, (unsigned long)d);
 183
 184        if (n > 64)
 185                return -EINVAL;
 186
 187        /* split this up if writing to > 8 bytes (i.e. n == 64 && p->bit > 0),
 188         * or if we're at the end of the output buffer and would write past end
 189         */
 190        if (bits > 64)
 191                return __split_add_bits(p, d, n, 32);
 192        else if (p->olen < 8 && bits > 32 && bits <= 56)
 193                return __split_add_bits(p, d, n, 16);
 194        else if (p->olen < 4 && bits > 16 && bits <= 24)
 195                return __split_add_bits(p, d, n, 8);
 196
 197        if (DIV_ROUND_UP(bits, 8) > p->olen)
 198                return -ENOSPC;
 199
 200        o = *out & bmask[b];
 201        d <<= s;
 202
 203        if (bits <= 8)
 204                *out = o | d;
 205        else if (bits <= 16)
 206                put_unaligned(cpu_to_be16(o << 8 | d), (__be16 *)out);
 207        else if (bits <= 24)
 208                put_unaligned(cpu_to_be32(o << 24 | d << 8), (__be32 *)out);
 209        else if (bits <= 32)
 210                put_unaligned(cpu_to_be32(o << 24 | d), (__be32 *)out);
 211        else if (bits <= 40)
 212                put_unaligned(cpu_to_be64(o << 56 | d << 24), (__be64 *)out);
 213        else if (bits <= 48)
 214                put_unaligned(cpu_to_be64(o << 56 | d << 16), (__be64 *)out);
 215        else if (bits <= 56)
 216                put_unaligned(cpu_to_be64(o << 56 | d << 8), (__be64 *)out);
 217        else
 218                put_unaligned(cpu_to_be64(o << 56 | d), (__be64 *)out);
 219
 220        p->bit += n;
 221
 222        if (p->bit > 7) {
 223                p->out += p->bit / 8;
 224                p->olen -= p->bit / 8;
 225                p->bit %= 8;
 226        }
 227
 228        return 0;
 229}
 230
 231static int add_template(struct sw842_param *p, u8 c)
 232{
 233        int ret, i, b = 0;
 234        u8 *t = comp_ops[c];
 235        bool inv = false;
 236
 237        if (c >= OPS_MAX)
 238                return -EINVAL;
 239
 240        pr_debug("template %x\n", t[4]);
 241
 242        ret = add_bits(p, t[4], OP_BITS);
 243        if (ret)
 244                return ret;
 245
 246        for (i = 0; i < 4; i++) {
 247                pr_debug("op %x\n", t[i]);
 248
 249                switch (t[i] & OP_AMOUNT) {
 250                case OP_AMOUNT_8:
 251                        if (b)
 252                                inv = true;
 253                        else if (t[i] & OP_ACTION_INDEX)
 254                                ret = add_bits(p, p->index8[0], I8_BITS);
 255                        else if (t[i] & OP_ACTION_DATA)
 256                                ret = add_bits(p, p->data8[0], 64);
 257                        else
 258                                inv = true;
 259                        break;
 260                case OP_AMOUNT_4:
 261                        if (b == 2 && t[i] & OP_ACTION_DATA)
 262                                ret = add_bits(p, get_input_data(p, 2, 32), 32);
 263                        else if (b != 0 && b != 4)
 264                                inv = true;
 265                        else if (t[i] & OP_ACTION_INDEX)
 266                                ret = add_bits(p, p->index4[b >> 2], I4_BITS);
 267                        else if (t[i] & OP_ACTION_DATA)
 268                                ret = add_bits(p, p->data4[b >> 2], 32);
 269                        else
 270                                inv = true;
 271                        break;
 272                case OP_AMOUNT_2:
 273                        if (b != 0 && b != 2 && b != 4 && b != 6)
 274                                inv = true;
 275                        if (t[i] & OP_ACTION_INDEX)
 276                                ret = add_bits(p, p->index2[b >> 1], I2_BITS);
 277                        else if (t[i] & OP_ACTION_DATA)
 278                                ret = add_bits(p, p->data2[b >> 1], 16);
 279                        else
 280                                inv = true;
 281                        break;
 282                case OP_AMOUNT_0:
 283                        inv = (b != 8) || !(t[i] & OP_ACTION_NOOP);
 284                        break;
 285                default:
 286                        inv = true;
 287                        break;
 288                }
 289
 290                if (ret)
 291                        return ret;
 292
 293                if (inv) {
 294                        pr_err("Invalid templ %x op %d : %x %x %x %x\n",
 295                               c, i, t[0], t[1], t[2], t[3]);
 296                        return -EINVAL;
 297                }
 298
 299                b += t[i] & OP_AMOUNT;
 300        }
 301
 302        if (b != 8) {
 303                pr_err("Invalid template %x len %x : %x %x %x %x\n",
 304                       c, b, t[0], t[1], t[2], t[3]);
 305                return -EINVAL;
 306        }
 307
 308        if (sw842_template_counts)
 309                atomic_inc(&template_count[t[4]]);
 310
 311        return 0;
 312}
 313
 314static int add_repeat_template(struct sw842_param *p, u8 r)
 315{
 316        int ret;
 317
 318        /* repeat param is 0-based */
 319        if (!r || --r > REPEAT_BITS_MAX)
 320                return -EINVAL;
 321
 322        ret = add_bits(p, OP_REPEAT, OP_BITS);
 323        if (ret)
 324                return ret;
 325
 326        ret = add_bits(p, r, REPEAT_BITS);
 327        if (ret)
 328                return ret;
 329
 330        if (sw842_template_counts)
 331                atomic_inc(&template_repeat_count);
 332
 333        return 0;
 334}
 335
 336static int add_short_data_template(struct sw842_param *p, u8 b)
 337{
 338        int ret, i;
 339
 340        if (!b || b > SHORT_DATA_BITS_MAX)
 341                return -EINVAL;
 342
 343        ret = add_bits(p, OP_SHORT_DATA, OP_BITS);
 344        if (ret)
 345                return ret;
 346
 347        ret = add_bits(p, b, SHORT_DATA_BITS);
 348        if (ret)
 349                return ret;
 350
 351        for (i = 0; i < b; i++) {
 352                ret = add_bits(p, p->in[i], 8);
 353                if (ret)
 354                        return ret;
 355        }
 356
 357        if (sw842_template_counts)
 358                atomic_inc(&template_short_data_count);
 359
 360        return 0;
 361}
 362
 363static int add_zeros_template(struct sw842_param *p)
 364{
 365        int ret = add_bits(p, OP_ZEROS, OP_BITS);
 366
 367        if (ret)
 368                return ret;
 369
 370        if (sw842_template_counts)
 371                atomic_inc(&template_zeros_count);
 372
 373        return 0;
 374}
 375
 376static int add_end_template(struct sw842_param *p)
 377{
 378        int ret = add_bits(p, OP_END, OP_BITS);
 379
 380        if (ret)
 381                return ret;
 382
 383        if (sw842_template_counts)
 384                atomic_inc(&template_end_count);
 385
 386        return 0;
 387}
 388
 389static bool check_template(struct sw842_param *p, u8 c)
 390{
 391        u8 *t = comp_ops[c];
 392        int i, match, b = 0;
 393
 394        if (c >= OPS_MAX)
 395                return false;
 396
 397        for (i = 0; i < 4; i++) {
 398                if (t[i] & OP_ACTION_INDEX) {
 399                        if (t[i] & OP_AMOUNT_2)
 400                                match = check_index(p, 2, b >> 1);
 401                        else if (t[i] & OP_AMOUNT_4)
 402                                match = check_index(p, 4, b >> 2);
 403                        else if (t[i] & OP_AMOUNT_8)
 404                                match = check_index(p, 8, 0);
 405                        else
 406                                return false;
 407                        if (!match)
 408                                return false;
 409                }
 410
 411                b += t[i] & OP_AMOUNT;
 412        }
 413
 414        return true;
 415}
 416
 417static void get_next_data(struct sw842_param *p)
 418{
 419        p->data8[0] = get_input_data(p, 0, 64);
 420        p->data4[0] = get_input_data(p, 0, 32);
 421        p->data4[1] = get_input_data(p, 4, 32);
 422        p->data2[0] = get_input_data(p, 0, 16);
 423        p->data2[1] = get_input_data(p, 2, 16);
 424        p->data2[2] = get_input_data(p, 4, 16);
 425        p->data2[3] = get_input_data(p, 6, 16);
 426}
 427
 428/* update the hashtable entries.
 429 * only call this after finding/adding the current template
 430 * the dataN fields for the current 8 byte block must be already updated
 431 */
 432static void update_hashtables(struct sw842_param *p)
 433{
 434        u64 pos = p->in - p->instart;
 435        u64 n8 = (pos >> 3) % (1 << I8_BITS);
 436        u64 n4 = (pos >> 2) % (1 << I4_BITS);
 437        u64 n2 = (pos >> 1) % (1 << I2_BITS);
 438
 439        replace_hash(p, 8, n8, 0);
 440        replace_hash(p, 4, n4, 0);
 441        replace_hash(p, 4, n4, 1);
 442        replace_hash(p, 2, n2, 0);
 443        replace_hash(p, 2, n2, 1);
 444        replace_hash(p, 2, n2, 2);
 445        replace_hash(p, 2, n2, 3);
 446}
 447
 448/* find the next template to use, and add it
 449 * the p->dataN fields must already be set for the current 8 byte block
 450 */
 451static int process_next(struct sw842_param *p)
 452{
 453        int ret, i;
 454
 455        p->index8[0] = INDEX_NOT_CHECKED;
 456        p->index4[0] = INDEX_NOT_CHECKED;
 457        p->index4[1] = INDEX_NOT_CHECKED;
 458        p->index2[0] = INDEX_NOT_CHECKED;
 459        p->index2[1] = INDEX_NOT_CHECKED;
 460        p->index2[2] = INDEX_NOT_CHECKED;
 461        p->index2[3] = INDEX_NOT_CHECKED;
 462
 463        /* check up to OPS_MAX - 1; last op is our fallback */
 464        for (i = 0; i < OPS_MAX - 1; i++) {
 465                if (check_template(p, i))
 466                        break;
 467        }
 468
 469        ret = add_template(p, i);
 470        if (ret)
 471                return ret;
 472
 473        return 0;
 474}
 475
 476/**
 477 * sw842_compress
 478 *
 479 * Compress the uncompressed buffer of length @ilen at @in to the output buffer
 480 * @out, using no more than @olen bytes, using the 842 compression format.
 481 *
 482 * Returns: 0 on success, error on failure.  The @olen parameter
 483 * will contain the number of output bytes written on success, or
 484 * 0 on error.
 485 */
 486int sw842_compress(const u8 *in, unsigned int ilen,
 487                   u8 *out, unsigned int *olen, void *wmem)
 488{
 489        struct sw842_param *p = (struct sw842_param *)wmem;
 490        int ret;
 491        u64 last, next, pad, total;
 492        u8 repeat_count = 0;
 493
 494        BUILD_BUG_ON(sizeof(*p) > SW842_MEM_COMPRESS);
 495
 496        init_hashtable_nodes(p, 8);
 497        init_hashtable_nodes(p, 4);
 498        init_hashtable_nodes(p, 2);
 499
 500        p->in = (u8 *)in;
 501        p->instart = p->in;
 502        p->ilen = ilen;
 503        p->out = out;
 504        p->olen = *olen;
 505        p->bit = 0;
 506
 507        total = p->olen;
 508
 509        *olen = 0;
 510
 511        /* if using strict mode, we can only compress a multiple of 8 */
 512        if (sw842_strict && (ilen % 8)) {
 513                pr_err("Using strict mode, can't compress len %d\n", ilen);
 514                return -EINVAL;
 515        }
 516
 517        /* let's compress at least 8 bytes, mkay? */
 518        if (unlikely(ilen < 8))
 519                goto skip_comp;
 520
 521        /* make initial 'last' different so we don't match the first time */
 522        last = ~get_unaligned((u64 *)p->in);
 523
 524        while (p->ilen > 7) {
 525                next = get_unaligned((u64 *)p->in);
 526
 527                /* must get the next data, as we need to update the hashtable
 528                 * entries with the new data every time
 529                 */
 530                get_next_data(p);
 531
 532                /* we don't care about endianness in last or next;
 533                 * we're just comparing 8 bytes to another 8 bytes,
 534                 * they're both the same endianness
 535                 */
 536                if (next == last) {
 537                        /* repeat count bits are 0-based, so we stop at +1 */
 538                        if (++repeat_count <= REPEAT_BITS_MAX)
 539                                goto repeat;
 540                }
 541                if (repeat_count) {
 542                        ret = add_repeat_template(p, repeat_count);
 543                        repeat_count = 0;
 544                        if (next == last) /* reached max repeat bits */
 545                                goto repeat;
 546                }
 547
 548                if (next == 0)
 549                        ret = add_zeros_template(p);
 550                else
 551                        ret = process_next(p);
 552
 553                if (ret)
 554                        return ret;
 555
 556repeat:
 557                last = next;
 558                update_hashtables(p);
 559                p->in += 8;
 560                p->ilen -= 8;
 561        }
 562
 563        if (repeat_count) {
 564                ret = add_repeat_template(p, repeat_count);
 565                if (ret)
 566                        return ret;
 567        }
 568
 569skip_comp:
 570        if (p->ilen > 0) {
 571                ret = add_short_data_template(p, p->ilen);
 572                if (ret)
 573                        return ret;
 574
 575                p->in += p->ilen;
 576                p->ilen = 0;
 577        }
 578
 579        ret = add_end_template(p);
 580        if (ret)
 581                return ret;
 582
 583        if (p->bit) {
 584                p->out++;
 585                p->olen--;
 586                p->bit = 0;
 587        }
 588
 589        /* pad compressed length to multiple of 8 */
 590        pad = (8 - ((total - p->olen) % 8)) % 8;
 591        if (pad) {
 592                if (pad > p->olen) /* we were so close! */
 593                        return -ENOSPC;
 594                memset(p->out, 0, pad);
 595                p->out += pad;
 596                p->olen -= pad;
 597        }
 598
 599        if (unlikely((total - p->olen) > UINT_MAX))
 600                return -ENOSPC;
 601
 602        *olen = total - p->olen;
 603
 604        return 0;
 605}
 606EXPORT_SYMBOL_GPL(sw842_compress);
 607
 608static int __init sw842_init(void)
 609{
 610        if (sw842_template_counts)
 611                sw842_debugfs_create();
 612
 613        return 0;
 614}
 615module_init(sw842_init);
 616
 617static void __exit sw842_exit(void)
 618{
 619        if (sw842_template_counts)
 620                sw842_debugfs_remove();
 621}
 622module_exit(sw842_exit);
 623
 624MODULE_LICENSE("GPL");
 625MODULE_DESCRIPTION("Software 842 Compressor");
 626MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");
 627