linux/lib/xz/xz_dec_bcj.c
<<
>>
Prefs
   1/*
   2 * Branch/Call/Jump (BCJ) filter decoders
   3 *
   4 * Authors: Lasse Collin <lasse.collin@tukaani.org>
   5 *          Igor Pavlov <https://7-zip.org/>
   6 *
   7 * This file has been put into the public domain.
   8 * You can do whatever you want with this file.
   9 */
  10
  11#include "xz_private.h"
  12
  13/*
  14 * The rest of the file is inside this ifdef. It makes things a little more
  15 * convenient when building without support for any BCJ filters.
  16 */
  17#ifdef XZ_DEC_BCJ
  18
  19struct xz_dec_bcj {
  20        /* Type of the BCJ filter being used */
  21        enum {
  22                BCJ_X86 = 4,        /* x86 or x86-64 */
  23                BCJ_POWERPC = 5,    /* Big endian only */
  24                BCJ_IA64 = 6,       /* Big or little endian */
  25                BCJ_ARM = 7,        /* Little endian only */
  26                BCJ_ARMTHUMB = 8,   /* Little endian only */
  27                BCJ_SPARC = 9       /* Big or little endian */
  28        } type;
  29
  30        /*
  31         * Return value of the next filter in the chain. We need to preserve
  32         * this information across calls, because we must not call the next
  33         * filter anymore once it has returned XZ_STREAM_END.
  34         */
  35        enum xz_ret ret;
  36
  37        /* True if we are operating in single-call mode. */
  38        bool single_call;
  39
  40        /*
  41         * Absolute position relative to the beginning of the uncompressed
  42         * data (in a single .xz Block). We care only about the lowest 32
  43         * bits so this doesn't need to be uint64_t even with big files.
  44         */
  45        uint32_t pos;
  46
  47        /* x86 filter state */
  48        uint32_t x86_prev_mask;
  49
  50        /* Temporary space to hold the variables from struct xz_buf */
  51        uint8_t *out;
  52        size_t out_pos;
  53        size_t out_size;
  54
  55        struct {
  56                /* Amount of already filtered data in the beginning of buf */
  57                size_t filtered;
  58
  59                /* Total amount of data currently stored in buf  */
  60                size_t size;
  61
  62                /*
  63                 * Buffer to hold a mix of filtered and unfiltered data. This
  64                 * needs to be big enough to hold Alignment + 2 * Look-ahead:
  65                 *
  66                 * Type         Alignment   Look-ahead
  67                 * x86              1           4
  68                 * PowerPC          4           0
  69                 * IA-64           16           0
  70                 * ARM              4           0
  71                 * ARM-Thumb        2           2
  72                 * SPARC            4           0
  73                 */
  74                uint8_t buf[16];
  75        } temp;
  76};
  77
  78#ifdef XZ_DEC_X86
  79/*
  80 * This is used to test the most significant byte of a memory address
  81 * in an x86 instruction.
  82 */
  83static inline int bcj_x86_test_msbyte(uint8_t b)
  84{
  85        return b == 0x00 || b == 0xFF;
  86}
  87
  88static size_t bcj_x86(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
  89{
  90        static const bool mask_to_allowed_status[8]
  91                = { true, true, true, false, true, false, false, false };
  92
  93        static const uint8_t mask_to_bit_num[8] = { 0, 1, 2, 2, 3, 3, 3, 3 };
  94
  95        size_t i;
  96        size_t prev_pos = (size_t)-1;
  97        uint32_t prev_mask = s->x86_prev_mask;
  98        uint32_t src;
  99        uint32_t dest;
 100        uint32_t j;
 101        uint8_t b;
 102
 103        if (size <= 4)
 104                return 0;
 105
 106        size -= 4;
 107        for (i = 0; i < size; ++i) {
 108                if ((buf[i] & 0xFE) != 0xE8)
 109                        continue;
 110
 111                prev_pos = i - prev_pos;
 112                if (prev_pos > 3) {
 113                        prev_mask = 0;
 114                } else {
 115                        prev_mask = (prev_mask << (prev_pos - 1)) & 7;
 116                        if (prev_mask != 0) {
 117                                b = buf[i + 4 - mask_to_bit_num[prev_mask]];
 118                                if (!mask_to_allowed_status[prev_mask]
 119                                                || bcj_x86_test_msbyte(b)) {
 120                                        prev_pos = i;
 121                                        prev_mask = (prev_mask << 1) | 1;
 122                                        continue;
 123                                }
 124                        }
 125                }
 126
 127                prev_pos = i;
 128
 129                if (bcj_x86_test_msbyte(buf[i + 4])) {
 130                        src = get_unaligned_le32(buf + i + 1);
 131                        while (true) {
 132                                dest = src - (s->pos + (uint32_t)i + 5);
 133                                if (prev_mask == 0)
 134                                        break;
 135
 136                                j = mask_to_bit_num[prev_mask] * 8;
 137                                b = (uint8_t)(dest >> (24 - j));
 138                                if (!bcj_x86_test_msbyte(b))
 139                                        break;
 140
 141                                src = dest ^ (((uint32_t)1 << (32 - j)) - 1);
 142                        }
 143
 144                        dest &= 0x01FFFFFF;
 145                        dest |= (uint32_t)0 - (dest & 0x01000000);
 146                        put_unaligned_le32(dest, buf + i + 1);
 147                        i += 4;
 148                } else {
 149                        prev_mask = (prev_mask << 1) | 1;
 150                }
 151        }
 152
 153        prev_pos = i - prev_pos;
 154        s->x86_prev_mask = prev_pos > 3 ? 0 : prev_mask << (prev_pos - 1);
 155        return i;
 156}
 157#endif
 158
 159#ifdef XZ_DEC_POWERPC
 160static size_t bcj_powerpc(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
 161{
 162        size_t i;
 163        uint32_t instr;
 164
 165        for (i = 0; i + 4 <= size; i += 4) {
 166                instr = get_unaligned_be32(buf + i);
 167                if ((instr & 0xFC000003) == 0x48000001) {
 168                        instr &= 0x03FFFFFC;
 169                        instr -= s->pos + (uint32_t)i;
 170                        instr &= 0x03FFFFFC;
 171                        instr |= 0x48000001;
 172                        put_unaligned_be32(instr, buf + i);
 173                }
 174        }
 175
 176        return i;
 177}
 178#endif
 179
 180#ifdef XZ_DEC_IA64
 181static size_t bcj_ia64(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
 182{
 183        static const uint8_t branch_table[32] = {
 184                0, 0, 0, 0, 0, 0, 0, 0,
 185                0, 0, 0, 0, 0, 0, 0, 0,
 186                4, 4, 6, 6, 0, 0, 7, 7,
 187                4, 4, 0, 0, 4, 4, 0, 0
 188        };
 189
 190        /*
 191         * The local variables take a little bit stack space, but it's less
 192         * than what LZMA2 decoder takes, so it doesn't make sense to reduce
 193         * stack usage here without doing that for the LZMA2 decoder too.
 194         */
 195
 196        /* Loop counters */
 197        size_t i;
 198        size_t j;
 199
 200        /* Instruction slot (0, 1, or 2) in the 128-bit instruction word */
 201        uint32_t slot;
 202
 203        /* Bitwise offset of the instruction indicated by slot */
 204        uint32_t bit_pos;
 205
 206        /* bit_pos split into byte and bit parts */
 207        uint32_t byte_pos;
 208        uint32_t bit_res;
 209
 210        /* Address part of an instruction */
 211        uint32_t addr;
 212
 213        /* Mask used to detect which instructions to convert */
 214        uint32_t mask;
 215
 216        /* 41-bit instruction stored somewhere in the lowest 48 bits */
 217        uint64_t instr;
 218
 219        /* Instruction normalized with bit_res for easier manipulation */
 220        uint64_t norm;
 221
 222        for (i = 0; i + 16 <= size; i += 16) {
 223                mask = branch_table[buf[i] & 0x1F];
 224                for (slot = 0, bit_pos = 5; slot < 3; ++slot, bit_pos += 41) {
 225                        if (((mask >> slot) & 1) == 0)
 226                                continue;
 227
 228                        byte_pos = bit_pos >> 3;
 229                        bit_res = bit_pos & 7;
 230                        instr = 0;
 231                        for (j = 0; j < 6; ++j)
 232                                instr |= (uint64_t)(buf[i + j + byte_pos])
 233                                                << (8 * j);
 234
 235                        norm = instr >> bit_res;
 236
 237                        if (((norm >> 37) & 0x0F) == 0x05
 238                                        && ((norm >> 9) & 0x07) == 0) {
 239                                addr = (norm >> 13) & 0x0FFFFF;
 240                                addr |= ((uint32_t)(norm >> 36) & 1) << 20;
 241                                addr <<= 4;
 242                                addr -= s->pos + (uint32_t)i;
 243                                addr >>= 4;
 244
 245                                norm &= ~((uint64_t)0x8FFFFF << 13);
 246                                norm |= (uint64_t)(addr & 0x0FFFFF) << 13;
 247                                norm |= (uint64_t)(addr & 0x100000)
 248                                                << (36 - 20);
 249
 250                                instr &= (1 << bit_res) - 1;
 251                                instr |= norm << bit_res;
 252
 253                                for (j = 0; j < 6; j++)
 254                                        buf[i + j + byte_pos]
 255                                                = (uint8_t)(instr >> (8 * j));
 256                        }
 257                }
 258        }
 259
 260        return i;
 261}
 262#endif
 263
 264#ifdef XZ_DEC_ARM
 265static size_t bcj_arm(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
 266{
 267        size_t i;
 268        uint32_t addr;
 269
 270        for (i = 0; i + 4 <= size; i += 4) {
 271                if (buf[i + 3] == 0xEB) {
 272                        addr = (uint32_t)buf[i] | ((uint32_t)buf[i + 1] << 8)
 273                                        | ((uint32_t)buf[i + 2] << 16);
 274                        addr <<= 2;
 275                        addr -= s->pos + (uint32_t)i + 8;
 276                        addr >>= 2;
 277                        buf[i] = (uint8_t)addr;
 278                        buf[i + 1] = (uint8_t)(addr >> 8);
 279                        buf[i + 2] = (uint8_t)(addr >> 16);
 280                }
 281        }
 282
 283        return i;
 284}
 285#endif
 286
 287#ifdef XZ_DEC_ARMTHUMB
 288static size_t bcj_armthumb(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
 289{
 290        size_t i;
 291        uint32_t addr;
 292
 293        for (i = 0; i + 4 <= size; i += 2) {
 294                if ((buf[i + 1] & 0xF8) == 0xF0
 295                                && (buf[i + 3] & 0xF8) == 0xF8) {
 296                        addr = (((uint32_t)buf[i + 1] & 0x07) << 19)
 297                                        | ((uint32_t)buf[i] << 11)
 298                                        | (((uint32_t)buf[i + 3] & 0x07) << 8)
 299                                        | (uint32_t)buf[i + 2];
 300                        addr <<= 1;
 301                        addr -= s->pos + (uint32_t)i + 4;
 302                        addr >>= 1;
 303                        buf[i + 1] = (uint8_t)(0xF0 | ((addr >> 19) & 0x07));
 304                        buf[i] = (uint8_t)(addr >> 11);
 305                        buf[i + 3] = (uint8_t)(0xF8 | ((addr >> 8) & 0x07));
 306                        buf[i + 2] = (uint8_t)addr;
 307                        i += 2;
 308                }
 309        }
 310
 311        return i;
 312}
 313#endif
 314
 315#ifdef XZ_DEC_SPARC
 316static size_t bcj_sparc(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
 317{
 318        size_t i;
 319        uint32_t instr;
 320
 321        for (i = 0; i + 4 <= size; i += 4) {
 322                instr = get_unaligned_be32(buf + i);
 323                if ((instr >> 22) == 0x100 || (instr >> 22) == 0x1FF) {
 324                        instr <<= 2;
 325                        instr -= s->pos + (uint32_t)i;
 326                        instr >>= 2;
 327                        instr = ((uint32_t)0x40000000 - (instr & 0x400000))
 328                                        | 0x40000000 | (instr & 0x3FFFFF);
 329                        put_unaligned_be32(instr, buf + i);
 330                }
 331        }
 332
 333        return i;
 334}
 335#endif
 336
 337/*
 338 * Apply the selected BCJ filter. Update *pos and s->pos to match the amount
 339 * of data that got filtered.
 340 *
 341 * NOTE: This is implemented as a switch statement to avoid using function
 342 * pointers, which could be problematic in the kernel boot code, which must
 343 * avoid pointers to static data (at least on x86).
 344 */
 345static void bcj_apply(struct xz_dec_bcj *s,
 346                      uint8_t *buf, size_t *pos, size_t size)
 347{
 348        size_t filtered;
 349
 350        buf += *pos;
 351        size -= *pos;
 352
 353        switch (s->type) {
 354#ifdef XZ_DEC_X86
 355        case BCJ_X86:
 356                filtered = bcj_x86(s, buf, size);
 357                break;
 358#endif
 359#ifdef XZ_DEC_POWERPC
 360        case BCJ_POWERPC:
 361                filtered = bcj_powerpc(s, buf, size);
 362                break;
 363#endif
 364#ifdef XZ_DEC_IA64
 365        case BCJ_IA64:
 366                filtered = bcj_ia64(s, buf, size);
 367                break;
 368#endif
 369#ifdef XZ_DEC_ARM
 370        case BCJ_ARM:
 371                filtered = bcj_arm(s, buf, size);
 372                break;
 373#endif
 374#ifdef XZ_DEC_ARMTHUMB
 375        case BCJ_ARMTHUMB:
 376                filtered = bcj_armthumb(s, buf, size);
 377                break;
 378#endif
 379#ifdef XZ_DEC_SPARC
 380        case BCJ_SPARC:
 381                filtered = bcj_sparc(s, buf, size);
 382                break;
 383#endif
 384        default:
 385                /* Never reached but silence compiler warnings. */
 386                filtered = 0;
 387                break;
 388        }
 389
 390        *pos += filtered;
 391        s->pos += filtered;
 392}
 393
 394/*
 395 * Flush pending filtered data from temp to the output buffer.
 396 * Move the remaining mixture of possibly filtered and unfiltered
 397 * data to the beginning of temp.
 398 */
 399static void bcj_flush(struct xz_dec_bcj *s, struct xz_buf *b)
 400{
 401        size_t copy_size;
 402
 403        copy_size = min_t(size_t, s->temp.filtered, b->out_size - b->out_pos);
 404        memcpy(b->out + b->out_pos, s->temp.buf, copy_size);
 405        b->out_pos += copy_size;
 406
 407        s->temp.filtered -= copy_size;
 408        s->temp.size -= copy_size;
 409        memmove(s->temp.buf, s->temp.buf + copy_size, s->temp.size);
 410}
 411
 412/*
 413 * The BCJ filter functions are primitive in sense that they process the
 414 * data in chunks of 1-16 bytes. To hide this issue, this function does
 415 * some buffering.
 416 */
 417XZ_EXTERN enum xz_ret xz_dec_bcj_run(struct xz_dec_bcj *s,
 418                                     struct xz_dec_lzma2 *lzma2,
 419                                     struct xz_buf *b)
 420{
 421        size_t out_start;
 422
 423        /*
 424         * Flush pending already filtered data to the output buffer. Return
 425         * immediately if we couldn't flush everything, or if the next
 426         * filter in the chain had already returned XZ_STREAM_END.
 427         */
 428        if (s->temp.filtered > 0) {
 429                bcj_flush(s, b);
 430                if (s->temp.filtered > 0)
 431                        return XZ_OK;
 432
 433                if (s->ret == XZ_STREAM_END)
 434                        return XZ_STREAM_END;
 435        }
 436
 437        /*
 438         * If we have more output space than what is currently pending in
 439         * temp, copy the unfiltered data from temp to the output buffer
 440         * and try to fill the output buffer by decoding more data from the
 441         * next filter in the chain. Apply the BCJ filter on the new data
 442         * in the output buffer. If everything cannot be filtered, copy it
 443         * to temp and rewind the output buffer position accordingly.
 444         *
 445         * This needs to be always run when temp.size == 0 to handle a special
 446         * case where the output buffer is full and the next filter has no
 447         * more output coming but hasn't returned XZ_STREAM_END yet.
 448         */
 449        if (s->temp.size < b->out_size - b->out_pos || s->temp.size == 0) {
 450                out_start = b->out_pos;
 451                memcpy(b->out + b->out_pos, s->temp.buf, s->temp.size);
 452                b->out_pos += s->temp.size;
 453
 454                s->ret = xz_dec_lzma2_run(lzma2, b);
 455                if (s->ret != XZ_STREAM_END
 456                                && (s->ret != XZ_OK || s->single_call))
 457                        return s->ret;
 458
 459                bcj_apply(s, b->out, &out_start, b->out_pos);
 460
 461                /*
 462                 * As an exception, if the next filter returned XZ_STREAM_END,
 463                 * we can do that too, since the last few bytes that remain
 464                 * unfiltered are meant to remain unfiltered.
 465                 */
 466                if (s->ret == XZ_STREAM_END)
 467                        return XZ_STREAM_END;
 468
 469                s->temp.size = b->out_pos - out_start;
 470                b->out_pos -= s->temp.size;
 471                memcpy(s->temp.buf, b->out + b->out_pos, s->temp.size);
 472
 473                /*
 474                 * If there wasn't enough input to the next filter to fill
 475                 * the output buffer with unfiltered data, there's no point
 476                 * to try decoding more data to temp.
 477                 */
 478                if (b->out_pos + s->temp.size < b->out_size)
 479                        return XZ_OK;
 480        }
 481
 482        /*
 483         * We have unfiltered data in temp. If the output buffer isn't full
 484         * yet, try to fill the temp buffer by decoding more data from the
 485         * next filter. Apply the BCJ filter on temp. Then we hopefully can
 486         * fill the actual output buffer by copying filtered data from temp.
 487         * A mix of filtered and unfiltered data may be left in temp; it will
 488         * be taken care on the next call to this function.
 489         */
 490        if (b->out_pos < b->out_size) {
 491                /* Make b->out{,_pos,_size} temporarily point to s->temp. */
 492                s->out = b->out;
 493                s->out_pos = b->out_pos;
 494                s->out_size = b->out_size;
 495                b->out = s->temp.buf;
 496                b->out_pos = s->temp.size;
 497                b->out_size = sizeof(s->temp.buf);
 498
 499                s->ret = xz_dec_lzma2_run(lzma2, b);
 500
 501                s->temp.size = b->out_pos;
 502                b->out = s->out;
 503                b->out_pos = s->out_pos;
 504                b->out_size = s->out_size;
 505
 506                if (s->ret != XZ_OK && s->ret != XZ_STREAM_END)
 507                        return s->ret;
 508
 509                bcj_apply(s, s->temp.buf, &s->temp.filtered, s->temp.size);
 510
 511                /*
 512                 * If the next filter returned XZ_STREAM_END, we mark that
 513                 * everything is filtered, since the last unfiltered bytes
 514                 * of the stream are meant to be left as is.
 515                 */
 516                if (s->ret == XZ_STREAM_END)
 517                        s->temp.filtered = s->temp.size;
 518
 519                bcj_flush(s, b);
 520                if (s->temp.filtered > 0)
 521                        return XZ_OK;
 522        }
 523
 524        return s->ret;
 525}
 526
 527XZ_EXTERN struct xz_dec_bcj *xz_dec_bcj_create(bool single_call)
 528{
 529        struct xz_dec_bcj *s = kmalloc(sizeof(*s), GFP_KERNEL);
 530        if (s != NULL)
 531                s->single_call = single_call;
 532
 533        return s;
 534}
 535
 536XZ_EXTERN enum xz_ret xz_dec_bcj_reset(struct xz_dec_bcj *s, uint8_t id)
 537{
 538        switch (id) {
 539#ifdef XZ_DEC_X86
 540        case BCJ_X86:
 541#endif
 542#ifdef XZ_DEC_POWERPC
 543        case BCJ_POWERPC:
 544#endif
 545#ifdef XZ_DEC_IA64
 546        case BCJ_IA64:
 547#endif
 548#ifdef XZ_DEC_ARM
 549        case BCJ_ARM:
 550#endif
 551#ifdef XZ_DEC_ARMTHUMB
 552        case BCJ_ARMTHUMB:
 553#endif
 554#ifdef XZ_DEC_SPARC
 555        case BCJ_SPARC:
 556#endif
 557                break;
 558
 559        default:
 560                /* Unsupported Filter ID */
 561                return XZ_OPTIONS_ERROR;
 562        }
 563
 564        s->type = id;
 565        s->ret = XZ_OK;
 566        s->pos = 0;
 567        s->x86_prev_mask = 0;
 568        s->temp.filtered = 0;
 569        s->temp.size = 0;
 570
 571        return XZ_OK;
 572}
 573
 574#endif
 575