busybox/archival/libarchive/unxz/xz_dec_bcj.c
<<
>>
Prefs
   1/*
   2 * Branch/Call/Jump (BCJ) filter decoders
   3 *
   4 * Authors: Lasse Collin <lasse.collin@tukaani.org>
   5 *          Igor Pavlov <http://7-zip.org/>
   6 *
   7 * This file has been put into the public domain.
   8 * You can do whatever you want with this file.
   9 */
  10
  11#include "xz_private.h"
  12
  13/*
  14 * The rest of the file is inside this ifdef. It makes things a little more
  15 * convenient when building without support for any BCJ filters.
  16 */
  17#ifdef XZ_DEC_BCJ
  18
  19struct xz_dec_bcj {
  20        /* Type of the BCJ filter being used */
  21        enum {
  22                BCJ_X86 = 4,        /* x86 or x86-64 */
  23                BCJ_POWERPC = 5,    /* Big endian only */
  24                BCJ_IA64 = 6,       /* Big or little endian */
  25                BCJ_ARM = 7,        /* Little endian only */
  26                BCJ_ARMTHUMB = 8,   /* Little endian only */
  27                BCJ_SPARC = 9       /* Big or little endian */
  28        } type;
  29
  30        /*
  31         * Return value of the next filter in the chain. We need to preserve
  32         * this information across calls, because we must not call the next
  33         * filter anymore once it has returned XZ_STREAM_END.
  34         */
  35        enum xz_ret ret;
  36
  37        /* True if we are operating in single-call mode. */
  38        bool single_call;
  39
  40        /*
  41         * Absolute position relative to the beginning of the uncompressed
  42         * data (in a single .xz Block). We care only about the lowest 32
  43         * bits so this doesn't need to be uint64_t even with big files.
  44         */
  45        uint32_t pos;
  46
  47        /* x86 filter state */
  48        uint32_t x86_prev_mask;
  49
  50        /* Temporary space to hold the variables from struct xz_buf */
  51        uint8_t *out;
  52        size_t out_pos;
  53        size_t out_size;
  54
  55        struct {
  56                /* Amount of already filtered data in the beginning of buf */
  57                size_t filtered;
  58
  59                /* Total amount of data currently stored in buf  */
  60                size_t size;
  61
  62                /*
  63                 * Buffer to hold a mix of filtered and unfiltered data. This
  64                 * needs to be big enough to hold Alignment + 2 * Look-ahead:
  65                 *
  66                 * Type         Alignment   Look-ahead
  67                 * x86              1           4
  68                 * PowerPC          4           0
  69                 * IA-64           16           0
  70                 * ARM              4           0
  71                 * ARM-Thumb        2           2
  72                 * SPARC            4           0
  73                 */
  74                uint8_t buf[16];
  75        } temp;
  76};
  77
  78#ifdef XZ_DEC_X86
  79/*
  80 * This is used to test the most significant byte of a memory address
  81 * in an x86 instruction.
  82 */
  83static inline int bcj_x86_test_msbyte(uint8_t b)
  84{
  85        return b == 0x00 || b == 0xFF;
  86}
  87
  88static noinline_for_stack size_t XZ_FUNC bcj_x86(
  89                struct xz_dec_bcj *s, uint8_t *buf, size_t size)
  90{
  91        static const bool mask_to_allowed_status[8]
  92                = { true, true, true, false, true, false, false, false };
  93
  94        static const uint8_t mask_to_bit_num[8] = { 0, 1, 2, 2, 3, 3, 3, 3 };
  95
  96        size_t i;
  97        size_t prev_pos = (size_t)-1;
  98        uint32_t prev_mask = s->x86_prev_mask;
  99        uint32_t src;
 100        uint32_t dest;
 101        uint32_t j;
 102        uint8_t b;
 103
 104        if (size <= 4)
 105                return 0;
 106
 107        size -= 4;
 108        for (i = 0; i < size; ++i) {
 109                if ((buf[i] & 0xFE) != 0xE8)
 110                        continue;
 111
 112                prev_pos = i - prev_pos;
 113                if (prev_pos > 3) {
 114                        prev_mask = 0;
 115                } else {
 116                        prev_mask = (prev_mask << (prev_pos - 1)) & 7;
 117                        if (prev_mask != 0) {
 118                                b = buf[i + 4 - mask_to_bit_num[prev_mask]];
 119                                if (!mask_to_allowed_status[prev_mask]
 120                                                || bcj_x86_test_msbyte(b)) {
 121                                        prev_pos = i;
 122                                        prev_mask = (prev_mask << 1) | 1;
 123                                        continue;
 124                                }
 125                        }
 126                }
 127
 128                prev_pos = i;
 129
 130                if (bcj_x86_test_msbyte(buf[i + 4])) {
 131                        src = get_unaligned_le32(buf + i + 1);
 132                        while (true) {
 133                                dest = src - (s->pos + (uint32_t)i + 5);
 134                                if (prev_mask == 0)
 135                                        break;
 136
 137                                j = mask_to_bit_num[prev_mask] * 8;
 138                                b = (uint8_t)(dest >> (24 - j));
 139                                if (!bcj_x86_test_msbyte(b))
 140                                        break;
 141
 142                                src = dest ^ (((uint32_t)1 << (32 - j)) - 1);
 143                        }
 144
 145                        dest &= 0x01FFFFFF;
 146                        dest |= (uint32_t)0 - (dest & 0x01000000);
 147                        put_unaligned_le32(dest, buf + i + 1);
 148                        i += 4;
 149                } else {
 150                        prev_mask = (prev_mask << 1) | 1;
 151                }
 152        }
 153
 154        prev_pos = i - prev_pos;
 155        s->x86_prev_mask = prev_pos > 3 ? 0 : prev_mask << (prev_pos - 1);
 156        return i;
 157}
 158#endif
 159
 160#ifdef XZ_DEC_POWERPC
 161static noinline_for_stack size_t XZ_FUNC bcj_powerpc(
 162                struct xz_dec_bcj *s, uint8_t *buf, size_t size)
 163{
 164        size_t i;
 165        uint32_t instr;
 166
 167        for (i = 0; i + 4 <= size; i += 4) {
 168                instr = get_unaligned_be32(buf + i);
 169                if ((instr & 0xFC000003) == 0x48000001) {
 170                        instr &= 0x03FFFFFC;
 171                        instr -= s->pos + (uint32_t)i;
 172                        instr &= 0x03FFFFFC;
 173                        instr |= 0x48000001;
 174                        put_unaligned_be32(instr, buf + i);
 175                }
 176        }
 177
 178        return i;
 179}
 180#endif
 181
 182#ifdef XZ_DEC_IA64
 183static noinline_for_stack size_t XZ_FUNC bcj_ia64(
 184                struct xz_dec_bcj *s, uint8_t *buf, size_t size)
 185{
 186        static const uint8_t branch_table[32] = {
 187                0, 0, 0, 0, 0, 0, 0, 0,
 188                0, 0, 0, 0, 0, 0, 0, 0,
 189                4, 4, 6, 6, 0, 0, 7, 7,
 190                4, 4, 0, 0, 4, 4, 0, 0
 191        };
 192
 193        /*
 194         * The local variables take a little bit stack space, but it's less
 195         * than what LZMA2 decoder takes, so it doesn't make sense to reduce
 196         * stack usage here without doing that for the LZMA2 decoder too.
 197         */
 198
 199        /* Loop counters */
 200        size_t i;
 201        size_t j;
 202
 203        /* Instruction slot (0, 1, or 2) in the 128-bit instruction word */
 204        uint32_t slot;
 205
 206        /* Bitwise offset of the instruction indicated by slot */
 207        uint32_t bit_pos;
 208
 209        /* bit_pos split into byte and bit parts */
 210        uint32_t byte_pos;
 211        uint32_t bit_res;
 212
 213        /* Address part of an instruction */
 214        uint32_t addr;
 215
 216        /* Mask used to detect which instructions to convert */
 217        uint32_t mask;
 218
 219        /* 41-bit instruction stored somewhere in the lowest 48 bits */
 220        uint64_t instr;
 221
 222        /* Instruction normalized with bit_res for easier manipulation */
 223        uint64_t norm;
 224
 225        for (i = 0; i + 16 <= size; i += 16) {
 226                mask = branch_table[buf[i] & 0x1F];
 227                for (slot = 0, bit_pos = 5; slot < 3; ++slot, bit_pos += 41) {
 228                        if (((mask >> slot) & 1) == 0)
 229                                continue;
 230
 231                        byte_pos = bit_pos >> 3;
 232                        bit_res = bit_pos & 7;
 233                        instr = 0;
 234                        for (j = 0; j < 6; ++j)
 235                                instr |= (uint64_t)(buf[i + j + byte_pos])
 236                                                << (8 * j);
 237
 238                        norm = instr >> bit_res;
 239
 240                        if (((norm >> 37) & 0x0F) == 0x05
 241                                        && ((norm >> 9) & 0x07) == 0) {
 242                                addr = (norm >> 13) & 0x0FFFFF;
 243                                addr |= ((uint32_t)(norm >> 36) & 1) << 20;
 244                                addr <<= 4;
 245                                addr -= s->pos + (uint32_t)i;
 246                                addr >>= 4;
 247
 248                                norm &= ~((uint64_t)0x8FFFFF << 13);
 249                                norm |= (uint64_t)(addr & 0x0FFFFF) << 13;
 250                                norm |= (uint64_t)(addr & 0x100000)
 251                                                << (36 - 20);
 252
 253                                instr &= (1 << bit_res) - 1;
 254                                instr |= norm << bit_res;
 255
 256                                for (j = 0; j < 6; j++)
 257                                        buf[i + j + byte_pos]
 258                                                = (uint8_t)(instr >> (8 * j));
 259                        }
 260                }
 261        }
 262
 263        return i;
 264}
 265#endif
 266
 267#ifdef XZ_DEC_ARM
 268static noinline_for_stack size_t XZ_FUNC bcj_arm(
 269                struct xz_dec_bcj *s, uint8_t *buf, size_t size)
 270{
 271        size_t i;
 272        uint32_t addr;
 273
 274        for (i = 0; i + 4 <= size; i += 4) {
 275                if (buf[i + 3] == 0xEB) {
 276                        addr = (uint32_t)buf[i] | ((uint32_t)buf[i + 1] << 8)
 277                                        | ((uint32_t)buf[i + 2] << 16);
 278                        addr <<= 2;
 279                        addr -= s->pos + (uint32_t)i + 8;
 280                        addr >>= 2;
 281                        buf[i] = (uint8_t)addr;
 282                        buf[i + 1] = (uint8_t)(addr >> 8);
 283                        buf[i + 2] = (uint8_t)(addr >> 16);
 284                }
 285        }
 286
 287        return i;
 288}
 289#endif
 290
 291#ifdef XZ_DEC_ARMTHUMB
 292static noinline_for_stack size_t XZ_FUNC bcj_armthumb(
 293                struct xz_dec_bcj *s, uint8_t *buf, size_t size)
 294{
 295        size_t i;
 296        uint32_t addr;
 297
 298        for (i = 0; i + 4 <= size; i += 2) {
 299                if ((buf[i + 1] & 0xF8) == 0xF0
 300                                && (buf[i + 3] & 0xF8) == 0xF8) {
 301                        addr = (((uint32_t)buf[i + 1] & 0x07) << 19)
 302                                        | ((uint32_t)buf[i] << 11)
 303                                        | (((uint32_t)buf[i + 3] & 0x07) << 8)
 304                                        | (uint32_t)buf[i + 2];
 305                        addr <<= 1;
 306                        addr -= s->pos + (uint32_t)i + 4;
 307                        addr >>= 1;
 308                        buf[i + 1] = (uint8_t)(0xF0 | ((addr >> 19) & 0x07));
 309                        buf[i] = (uint8_t)(addr >> 11);
 310                        buf[i + 3] = (uint8_t)(0xF8 | ((addr >> 8) & 0x07));
 311                        buf[i + 2] = (uint8_t)addr;
 312                        i += 2;
 313                }
 314        }
 315
 316        return i;
 317}
 318#endif
 319
 320#ifdef XZ_DEC_SPARC
 321static noinline_for_stack size_t XZ_FUNC bcj_sparc(
 322                struct xz_dec_bcj *s, uint8_t *buf, size_t size)
 323{
 324        size_t i;
 325        uint32_t instr;
 326
 327        for (i = 0; i + 4 <= size; i += 4) {
 328                instr = get_unaligned_be32(buf + i);
 329                if ((instr >> 22) == 0x100 || (instr >> 22) == 0x1FF) {
 330                        instr <<= 2;
 331                        instr -= s->pos + (uint32_t)i;
 332                        instr >>= 2;
 333                        instr = ((uint32_t)0x40000000 - (instr & 0x400000))
 334                                        | 0x40000000 | (instr & 0x3FFFFF);
 335                        put_unaligned_be32(instr, buf + i);
 336                }
 337        }
 338
 339        return i;
 340}
 341#endif
 342
 343/*
 344 * Apply the selected BCJ filter. Update *pos and s->pos to match the amount
 345 * of data that got filtered.
 346 *
 347 * NOTE: This is implemented as a switch statement to avoid using function
 348 * pointers, which could be problematic in the kernel boot code, which must
 349 * avoid pointers to static data (at least on x86).
 350 */
 351static void XZ_FUNC bcj_apply(struct xz_dec_bcj *s,
 352                uint8_t *buf, size_t *pos, size_t size)
 353{
 354        size_t filtered;
 355
 356        buf += *pos;
 357        size -= *pos;
 358
 359        switch (s->type) {
 360#ifdef XZ_DEC_X86
 361        case BCJ_X86:
 362                filtered = bcj_x86(s, buf, size);
 363                break;
 364#endif
 365#ifdef XZ_DEC_POWERPC
 366        case BCJ_POWERPC:
 367                filtered = bcj_powerpc(s, buf, size);
 368                break;
 369#endif
 370#ifdef XZ_DEC_IA64
 371        case BCJ_IA64:
 372                filtered = bcj_ia64(s, buf, size);
 373                break;
 374#endif
 375#ifdef XZ_DEC_ARM
 376        case BCJ_ARM:
 377                filtered = bcj_arm(s, buf, size);
 378                break;
 379#endif
 380#ifdef XZ_DEC_ARMTHUMB
 381        case BCJ_ARMTHUMB:
 382                filtered = bcj_armthumb(s, buf, size);
 383                break;
 384#endif
 385#ifdef XZ_DEC_SPARC
 386        case BCJ_SPARC:
 387                filtered = bcj_sparc(s, buf, size);
 388                break;
 389#endif
 390        default:
 391                /* Never reached but silence compiler warnings. */
 392                filtered = 0;
 393                break;
 394        }
 395
 396        *pos += filtered;
 397        s->pos += filtered;
 398}
 399
 400/*
 401 * Flush pending filtered data from temp to the output buffer.
 402 * Move the remaining mixture of possibly filtered and unfiltered
 403 * data to the beginning of temp.
 404 */
 405static void XZ_FUNC bcj_flush(struct xz_dec_bcj *s, struct xz_buf *b)
 406{
 407        size_t copy_size;
 408
 409        copy_size = min_t(size_t, s->temp.filtered, b->out_size - b->out_pos);
 410        memcpy(b->out + b->out_pos, s->temp.buf, copy_size);
 411        b->out_pos += copy_size;
 412
 413        s->temp.filtered -= copy_size;
 414        s->temp.size -= copy_size;
 415        memmove(s->temp.buf, s->temp.buf + copy_size, s->temp.size);
 416}
 417
 418/*
 419 * The BCJ filter functions are primitive in sense that they process the
 420 * data in chunks of 1-16 bytes. To hide this issue, this function does
 421 * some buffering.
 422 */
 423XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_bcj_run(struct xz_dec_bcj *s,
 424                struct xz_dec_lzma2 *lzma2, struct xz_buf *b)
 425{
 426        size_t out_start;
 427
 428        /*
 429         * Flush pending already filtered data to the output buffer. Return
 430         * immediatelly if we couldn't flush everything, or if the next
 431         * filter in the chain had already returned XZ_STREAM_END.
 432         */
 433        if (s->temp.filtered > 0) {
 434                bcj_flush(s, b);
 435                if (s->temp.filtered > 0)
 436                        return XZ_OK;
 437
 438                if (s->ret == XZ_STREAM_END)
 439                        return XZ_STREAM_END;
 440        }
 441
 442        /*
 443         * If we have more output space than what is currently pending in
 444         * temp, copy the unfiltered data from temp to the output buffer
 445         * and try to fill the output buffer by decoding more data from the
 446         * next filter in the chain. Apply the BCJ filter on the new data
 447         * in the output buffer. If everything cannot be filtered, copy it
 448         * to temp and rewind the output buffer position accordingly.
 449         *
 450         * This needs to be always run when temp.size == 0 to handle a special
 451         * case where the output buffer is full and the next filter has no
 452         * more output coming but hasn't returned XZ_STREAM_END yet.
 453         */
 454        if (s->temp.size < b->out_size - b->out_pos || s->temp.size == 0) {
 455                out_start = b->out_pos;
 456                memcpy(b->out + b->out_pos, s->temp.buf, s->temp.size);
 457                b->out_pos += s->temp.size;
 458
 459                s->ret = xz_dec_lzma2_run(lzma2, b);
 460                if (s->ret != XZ_STREAM_END
 461                                && (s->ret != XZ_OK || s->single_call))
 462                        return s->ret;
 463
 464                bcj_apply(s, b->out, &out_start, b->out_pos);
 465
 466                /*
 467                 * As an exception, if the next filter returned XZ_STREAM_END,
 468                 * we can do that too, since the last few bytes that remain
 469                 * unfiltered are meant to remain unfiltered.
 470                 */
 471                if (s->ret == XZ_STREAM_END)
 472                        return XZ_STREAM_END;
 473
 474                s->temp.size = b->out_pos - out_start;
 475                b->out_pos -= s->temp.size;
 476                memcpy(s->temp.buf, b->out + b->out_pos, s->temp.size);
 477
 478                /*
 479                 * If there wasn't enough input to the next filter to fill
 480                 * the output buffer with unfiltered data, there's no point
 481                 * to try decoding more data to temp.
 482                 */
 483                if (b->out_pos + s->temp.size < b->out_size)
 484                        return XZ_OK;
 485        }
 486
 487        /*
 488         * We have unfiltered data in temp. If the output buffer isn't full
 489         * yet, try to fill the temp buffer by decoding more data from the
 490         * next filter. Apply the BCJ filter on temp. Then we hopefully can
 491         * fill the actual output buffer by copying filtered data from temp.
 492         * A mix of filtered and unfiltered data may be left in temp; it will
 493         * be taken care on the next call to this function.
 494         */
 495        if (b->out_pos < b->out_size) {
 496                /* Make b->out{,_pos,_size} temporarily point to s->temp. */
 497                s->out = b->out;
 498                s->out_pos = b->out_pos;
 499                s->out_size = b->out_size;
 500                b->out = s->temp.buf;
 501                b->out_pos = s->temp.size;
 502                b->out_size = sizeof(s->temp.buf);
 503
 504                s->ret = xz_dec_lzma2_run(lzma2, b);
 505
 506                s->temp.size = b->out_pos;
 507                b->out = s->out;
 508                b->out_pos = s->out_pos;
 509                b->out_size = s->out_size;
 510
 511                if (s->ret != XZ_OK && s->ret != XZ_STREAM_END)
 512                        return s->ret;
 513
 514                bcj_apply(s, s->temp.buf, &s->temp.filtered, s->temp.size);
 515
 516                /*
 517                 * If the next filter returned XZ_STREAM_END, we mark that
 518                 * everything is filtered, since the last unfiltered bytes
 519                 * of the stream are meant to be left as is.
 520                 */
 521                if (s->ret == XZ_STREAM_END)
 522                        s->temp.filtered = s->temp.size;
 523
 524                bcj_flush(s, b);
 525                if (s->temp.filtered > 0)
 526                        return XZ_OK;
 527        }
 528
 529        return s->ret;
 530}
 531
 532XZ_EXTERN struct xz_dec_bcj * XZ_FUNC xz_dec_bcj_create(bool single_call)
 533{
 534        struct xz_dec_bcj *s = kmalloc(sizeof(*s), GFP_KERNEL);
 535        if (s != NULL)
 536                s->single_call = single_call;
 537
 538        return s;
 539}
 540
 541XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_bcj_reset(
 542                struct xz_dec_bcj *s, uint8_t id)
 543{
 544        switch (id) {
 545#ifdef XZ_DEC_X86
 546        case BCJ_X86:
 547#endif
 548#ifdef XZ_DEC_POWERPC
 549        case BCJ_POWERPC:
 550#endif
 551#ifdef XZ_DEC_IA64
 552        case BCJ_IA64:
 553#endif
 554#ifdef XZ_DEC_ARM
 555        case BCJ_ARM:
 556#endif
 557#ifdef XZ_DEC_ARMTHUMB
 558        case BCJ_ARMTHUMB:
 559#endif
 560#ifdef XZ_DEC_SPARC
 561        case BCJ_SPARC:
 562#endif
 563                break;
 564
 565        default:
 566                /* Unsupported Filter ID */
 567                return XZ_OPTIONS_ERROR;
 568        }
 569
 570        s->type = id;
 571        s->ret = XZ_OK;
 572        s->pos = 0;
 573        s->x86_prev_mask = 0;
 574        s->temp.filtered = 0;
 575        s->temp.size = 0;
 576
 577        return XZ_OK;
 578}
 579
 580#endif
 581