busybox/archival/libunarchive/unxz/xz_dec_bcj.c
<<
>>
Prefs
   1/*
   2 * Branch/Call/Jump (BCJ) filter decoders
   3 *
   4 * Authors: Lasse Collin <lasse.collin@tukaani.org>
   5 *          Igor Pavlov <http://7-zip.org/>
   6 *
   7 * This file has been put into the public domain.
   8 * You can do whatever you want with this file.
   9 */
  10
  11#include "xz_private.h"
  12
  13/*
  14 * The rest of the file is inside this ifdef. It makes things a little more
  15 * convenient when building without support for any BCJ filters.
  16 */
  17#ifdef XZ_DEC_BCJ
  18
  19struct xz_dec_bcj {
  20        /* Type of the BCJ filter being used */
  21        enum {
  22                BCJ_X86 = 4,        /* x86 or x86-64 */
  23                BCJ_POWERPC = 5,    /* Big endian only */
  24                BCJ_IA64 = 6,       /* Big or little endian */
  25                BCJ_ARM = 7,        /* Little endian only */
  26                BCJ_ARMTHUMB = 8,   /* Little endian only */
  27                BCJ_SPARC = 9       /* Big or little endian */
  28        } type;
  29
  30        /*
  31         * Return value of the next filter in the chain. We need to preserve
  32         * this information across calls, because we must not call the next
  33         * filter anymore once it has returned XZ_STREAM_END.
  34         */
  35        enum xz_ret ret;
  36
  37        /* True if we are operating in single-call mode. */
  38        bool single_call;
  39
  40        /*
  41         * Absolute position relative to the beginning of the uncompressed
  42         * data (in a single .xz Block). We care only about the lowest 32
  43         * bits so this doesn't need to be uint64_t even with big files.
  44         */
  45        uint32_t pos;
  46
  47        /* x86 filter state */
  48        uint32_t x86_prev_mask;
  49
  50        /* Temporary space to hold the variables from struct xz_buf */
  51        uint8_t *out;
  52        size_t out_pos;
  53        size_t out_size;
  54
  55        struct {
  56                /* Amount of already filtered data in the beginning of buf */
  57                size_t filtered;
  58
  59                /* Total amount of data currently stored in buf  */
  60                size_t size;
  61
  62                /*
  63                 * Buffer to hold a mix of filtered and unfiltered data. This
  64                 * needs to be big enough to hold Alignment + 2 * Look-ahead:
  65                 *
  66                 * Type         Alignment   Look-ahead
  67                 * x86              1           4
  68                 * PowerPC          4           0
  69                 * IA-64           16           0
  70                 * ARM              4           0
  71                 * ARM-Thumb        2           2
  72                 * SPARC            4           0
  73                 */
  74                uint8_t buf[16];
  75        } temp;
  76};
  77
  78#ifdef XZ_DEC_X86
  79/*
  80 * This is macro used to test the most significant byte of a memory address
  81 * in an x86 instruction.
  82 */
  83#define bcj_x86_test_msbyte(b) ((b) == 0x00 || (b) == 0xFF)
  84
  85static noinline_for_stack size_t XZ_FUNC bcj_x86(
  86                struct xz_dec_bcj *s, uint8_t *buf, size_t size)
  87{
  88        static const bool mask_to_allowed_status[8]
  89                = { true, true, true, false, true, false, false, false };
  90
  91        static const uint8_t mask_to_bit_num[8] = { 0, 1, 2, 2, 3, 3, 3, 3 };
  92
  93        size_t i;
  94        size_t prev_pos = (size_t)-1;
  95        uint32_t prev_mask = s->x86_prev_mask;
  96        uint32_t src;
  97        uint32_t dest;
  98        uint32_t j;
  99        uint8_t b;
 100
 101        if (size <= 4)
 102                return 0;
 103
 104        size -= 4;
 105        for (i = 0; i < size; ++i) {
 106                if ((buf[i] & 0xFE) != 0xE8)
 107                        continue;
 108
 109                prev_pos = i - prev_pos;
 110                if (prev_pos > 3) {
 111                        prev_mask = 0;
 112                } else {
 113                        prev_mask = (prev_mask << (prev_pos - 1)) & 7;
 114                        if (prev_mask != 0) {
 115                                b = buf[i + 4 - mask_to_bit_num[prev_mask]];
 116                                if (!mask_to_allowed_status[prev_mask]
 117                                                || bcj_x86_test_msbyte(b)) {
 118                                        prev_pos = i;
 119                                        prev_mask = (prev_mask << 1) | 1;
 120                                        continue;
 121                                }
 122                        }
 123                }
 124
 125                prev_pos = i;
 126
 127                if (bcj_x86_test_msbyte(buf[i + 4])) {
 128                        src = get_unaligned_le32(buf + i + 1);
 129                        while (true) {
 130                                dest = src - (s->pos + (uint32_t)i + 5);
 131                                if (prev_mask == 0)
 132                                        break;
 133
 134                                j = mask_to_bit_num[prev_mask] * 8;
 135                                b = (uint8_t)(dest >> (24 - j));
 136                                if (!bcj_x86_test_msbyte(b))
 137                                        break;
 138
 139                                src = dest ^ (((uint32_t)1 << (32 - j)) - 1);
 140                        }
 141
 142                        dest &= 0x01FFFFFF;
 143                        dest |= (uint32_t)0 - (dest & 0x01000000);
 144                        put_unaligned_le32(dest, buf + i + 1);
 145                        i += 4;
 146                } else {
 147                        prev_mask = (prev_mask << 1) | 1;
 148                }
 149        }
 150
 151        prev_pos = i - prev_pos;
 152        s->x86_prev_mask = prev_pos > 3 ? 0 : prev_mask << (prev_pos - 1);
 153        return i;
 154}
 155#endif
 156
 157#ifdef XZ_DEC_POWERPC
 158static noinline_for_stack size_t XZ_FUNC bcj_powerpc(
 159                struct xz_dec_bcj *s, uint8_t *buf, size_t size)
 160{
 161        size_t i;
 162        uint32_t instr;
 163
 164        for (i = 0; i + 4 <= size; i += 4) {
 165                instr = get_unaligned_be32(buf + i);
 166                if ((instr & 0xFC000003) == 0x48000001) {
 167                        instr &= 0x03FFFFFC;
 168                        instr -= s->pos + (uint32_t)i;
 169                        instr &= 0x03FFFFFC;
 170                        instr |= 0x48000001;
 171                        put_unaligned_be32(instr, buf + i);
 172                }
 173        }
 174
 175        return i;
 176}
 177#endif
 178
 179#ifdef XZ_DEC_IA64
 180static noinline_for_stack size_t XZ_FUNC bcj_ia64(
 181                struct xz_dec_bcj *s, uint8_t *buf, size_t size)
 182{
 183        static const uint8_t branch_table[32] = {
 184                0, 0, 0, 0, 0, 0, 0, 0,
 185                0, 0, 0, 0, 0, 0, 0, 0,
 186                4, 4, 6, 6, 0, 0, 7, 7,
 187                4, 4, 0, 0, 4, 4, 0, 0
 188        };
 189
 190        /*
 191         * The local variables take a little bit stack space, but it's less
 192         * than what LZMA2 decoder takes, so it doesn't make sense to reduce
 193         * stack usage here without doing that for the LZMA2 decoder too.
 194         */
 195
 196        /* Loop counters */
 197        size_t i;
 198        size_t j;
 199
 200        /* Instruction slot (0, 1, or 2) in the 128-bit instruction word */
 201        uint32_t slot;
 202
 203        /* Bitwise offset of the instruction indicated by slot */
 204        uint32_t bit_pos;
 205
 206        /* bit_pos split into byte and bit parts */
 207        uint32_t byte_pos;
 208        uint32_t bit_res;
 209
 210        /* Address part of an instruction */
 211        uint32_t addr;
 212
 213        /* Mask used to detect which instructions to convert */
 214        uint32_t mask;
 215
 216        /* 41-bit instruction stored somewhere in the lowest 48 bits */
 217        uint64_t instr;
 218
 219        /* Instruction normalized with bit_res for easier manipulation */
 220        uint64_t norm;
 221
 222        for (i = 0; i + 16 <= size; i += 16) {
 223                mask = branch_table[buf[i] & 0x1F];
 224                for (slot = 0, bit_pos = 5; slot < 3; ++slot, bit_pos += 41) {
 225                        if (((mask >> slot) & 1) == 0)
 226                                continue;
 227
 228                        byte_pos = bit_pos >> 3;
 229                        bit_res = bit_pos & 7;
 230                        instr = 0;
 231                        for (j = 0; j < 6; ++j)
 232                                instr |= (uint64_t)(buf[i + j + byte_pos])
 233                                                << (8 * j);
 234
 235                        norm = instr >> bit_res;
 236
 237                        if (((norm >> 37) & 0x0F) == 0x05
 238                                        && ((norm >> 9) & 0x07) == 0) {
 239                                addr = (norm >> 13) & 0x0FFFFF;
 240                                addr |= ((uint32_t)(norm >> 36) & 1) << 20;
 241                                addr <<= 4;
 242                                addr -= s->pos + (uint32_t)i;
 243                                addr >>= 4;
 244
 245                                norm &= ~((uint64_t)0x8FFFFF << 13);
 246                                norm |= (uint64_t)(addr & 0x0FFFFF) << 13;
 247                                norm |= (uint64_t)(addr & 0x100000)
 248                                                << (36 - 20);
 249
 250                                instr &= (1 << bit_res) - 1;
 251                                instr |= norm << bit_res;
 252
 253                                for (j = 0; j < 6; j++)
 254                                        buf[i + j + byte_pos]
 255                                                = (uint8_t)(instr >> (8 * j));
 256                        }
 257                }
 258        }
 259
 260        return i;
 261}
 262#endif
 263
 264#ifdef XZ_DEC_ARM
 265static noinline_for_stack size_t XZ_FUNC bcj_arm(
 266                struct xz_dec_bcj *s, uint8_t *buf, size_t size)
 267{
 268        size_t i;
 269        uint32_t addr;
 270
 271        for (i = 0; i + 4 <= size; i += 4) {
 272                if (buf[i + 3] == 0xEB) {
 273                        addr = (uint32_t)buf[i] | ((uint32_t)buf[i + 1] << 8)
 274                                        | ((uint32_t)buf[i + 2] << 16);
 275                        addr <<= 2;
 276                        addr -= s->pos + (uint32_t)i + 8;
 277                        addr >>= 2;
 278                        buf[i] = (uint8_t)addr;
 279                        buf[i + 1] = (uint8_t)(addr >> 8);
 280                        buf[i + 2] = (uint8_t)(addr >> 16);
 281                }
 282        }
 283
 284        return i;
 285}
 286#endif
 287
 288#ifdef XZ_DEC_ARMTHUMB
 289static noinline_for_stack size_t XZ_FUNC bcj_armthumb(
 290                struct xz_dec_bcj *s, uint8_t *buf, size_t size)
 291{
 292        size_t i;
 293        uint32_t addr;
 294
 295        for (i = 0; i + 4 <= size; i += 2) {
 296                if ((buf[i + 1] & 0xF8) == 0xF0
 297                                && (buf[i + 3] & 0xF8) == 0xF8) {
 298                        addr = (((uint32_t)buf[i + 1] & 0x07) << 19)
 299                                        | ((uint32_t)buf[i] << 11)
 300                                        | (((uint32_t)buf[i + 3] & 0x07) << 8)
 301                                        | (uint32_t)buf[i + 2];
 302                        addr <<= 1;
 303                        addr -= s->pos + (uint32_t)i + 4;
 304                        addr >>= 1;
 305                        buf[i + 1] = (uint8_t)(0xF0 | ((addr >> 19) & 0x07));
 306                        buf[i] = (uint8_t)(addr >> 11);
 307                        buf[i + 3] = (uint8_t)(0xF8 | ((addr >> 8) & 0x07));
 308                        buf[i + 2] = (uint8_t)addr;
 309                        i += 2;
 310                }
 311        }
 312
 313        return i;
 314}
 315#endif
 316
 317#ifdef XZ_DEC_SPARC
 318static noinline_for_stack size_t XZ_FUNC bcj_sparc(
 319                struct xz_dec_bcj *s, uint8_t *buf, size_t size)
 320{
 321        size_t i;
 322        uint32_t instr;
 323
 324        for (i = 0; i + 4 <= size; i += 4) {
 325                instr = get_unaligned_be32(buf + i);
 326                if ((instr >> 22) == 0x100 || (instr >> 22) == 0x1FF) {
 327                        instr <<= 2;
 328                        instr -= s->pos + (uint32_t)i;
 329                        instr >>= 2;
 330                        instr = ((uint32_t)0x40000000 - (instr & 0x400000))
 331                                        | 0x40000000 | (instr & 0x3FFFFF);
 332                        put_unaligned_be32(instr, buf + i);
 333                }
 334        }
 335
 336        return i;
 337}
 338#endif
 339
 340/*
 341 * Apply the selected BCJ filter. Update *pos and s->pos to match the amount
 342 * of data that got filtered.
 343 *
 344 * NOTE: This is implemented as a switch statement to avoid using function
 345 * pointers, which could be problematic in the kernel boot code, which must
 346 * avoid pointers to static data (at least on x86).
 347 */
 348static void XZ_FUNC bcj_apply(struct xz_dec_bcj *s,
 349                uint8_t *buf, size_t *pos, size_t size)
 350{
 351        size_t filtered;
 352
 353        buf += *pos;
 354        size -= *pos;
 355
 356        switch (s->type) {
 357#ifdef XZ_DEC_X86
 358        case BCJ_X86:
 359                filtered = bcj_x86(s, buf, size);
 360                break;
 361#endif
 362#ifdef XZ_DEC_POWERPC
 363        case BCJ_POWERPC:
 364                filtered = bcj_powerpc(s, buf, size);
 365                break;
 366#endif
 367#ifdef XZ_DEC_IA64
 368        case BCJ_IA64:
 369                filtered = bcj_ia64(s, buf, size);
 370                break;
 371#endif
 372#ifdef XZ_DEC_ARM
 373        case BCJ_ARM:
 374                filtered = bcj_arm(s, buf, size);
 375                break;
 376#endif
 377#ifdef XZ_DEC_ARMTHUMB
 378        case BCJ_ARMTHUMB:
 379                filtered = bcj_armthumb(s, buf, size);
 380                break;
 381#endif
 382#ifdef XZ_DEC_SPARC
 383        case BCJ_SPARC:
 384                filtered = bcj_sparc(s, buf, size);
 385                break;
 386#endif
 387        default:
 388                /* Never reached but silence compiler warnings. */
 389                filtered = 0;
 390                break;
 391        }
 392
 393        *pos += filtered;
 394        s->pos += filtered;
 395}
 396
 397/*
 398 * Flush pending filtered data from temp to the output buffer.
 399 * Move the remaining mixture of possibly filtered and unfiltered
 400 * data to the beginning of temp.
 401 */
 402static void XZ_FUNC bcj_flush(struct xz_dec_bcj *s, struct xz_buf *b)
 403{
 404        size_t copy_size;
 405
 406        copy_size = min_t(size_t, s->temp.filtered, b->out_size - b->out_pos);
 407        memcpy(b->out + b->out_pos, s->temp.buf, copy_size);
 408        b->out_pos += copy_size;
 409
 410        s->temp.filtered -= copy_size;
 411        s->temp.size -= copy_size;
 412        memmove(s->temp.buf, s->temp.buf + copy_size, s->temp.size);
 413}
 414
 415/*
 416 * The BCJ filter functions are primitive in sense that they process the
 417 * data in chunks of 1-16 bytes. To hide this issue, this function does
 418 * some buffering.
 419 */
 420XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_bcj_run(struct xz_dec_bcj *s,
 421                struct xz_dec_lzma2 *lzma2, struct xz_buf *b)
 422{
 423        size_t out_start;
 424
 425        /*
 426         * Flush pending already filtered data to the output buffer. Return
 427         * immediatelly if we couldn't flush everything, or if the next
 428         * filter in the chain had already returned XZ_STREAM_END.
 429         */
 430        if (s->temp.filtered > 0) {
 431                bcj_flush(s, b);
 432                if (s->temp.filtered > 0)
 433                        return XZ_OK;
 434
 435                if (s->ret == XZ_STREAM_END)
 436                        return XZ_STREAM_END;
 437        }
 438
 439        /*
 440         * If we have more output space than what is currently pending in
 441         * temp, copy the unfiltered data from temp to the output buffer
 442         * and try to fill the output buffer by decoding more data from the
 443         * next filter in the chain. Apply the BCJ filter on the new data
 444         * in the output buffer. If everything cannot be filtered, copy it
 445         * to temp and rewind the output buffer position accordingly.
 446         */
 447        if (s->temp.size < b->out_size - b->out_pos) {
 448                out_start = b->out_pos;
 449                memcpy(b->out + b->out_pos, s->temp.buf, s->temp.size);
 450                b->out_pos += s->temp.size;
 451
 452                s->ret = xz_dec_lzma2_run(lzma2, b);
 453                if (s->ret != XZ_STREAM_END
 454                                && (s->ret != XZ_OK || s->single_call))
 455                        return s->ret;
 456
 457                bcj_apply(s, b->out, &out_start, b->out_pos);
 458
 459                /*
 460                 * As an exception, if the next filter returned XZ_STREAM_END,
 461                 * we can do that too, since the last few bytes that remain
 462                 * unfiltered are meant to remain unfiltered.
 463                 */
 464                if (s->ret == XZ_STREAM_END)
 465                        return XZ_STREAM_END;
 466
 467                s->temp.size = b->out_pos - out_start;
 468                b->out_pos -= s->temp.size;
 469                memcpy(s->temp.buf, b->out + b->out_pos, s->temp.size);
 470        }
 471
 472        /*
 473         * If we have unfiltered data in temp, try to fill by decoding more
 474         * data from the next filter. Apply the BCJ filter on temp. Then we
 475         * hopefully can fill the actual output buffer by copying filtered
 476         * data from temp. A mix of filtered and unfiltered data may be left
 477         * in temp; it will be taken care on the next call to this function.
 478         */
 479        if (s->temp.size > 0) {
 480                /* Make b->out{,_pos,_size} temporarily point to s->temp. */
 481                s->out = b->out;
 482                s->out_pos = b->out_pos;
 483                s->out_size = b->out_size;
 484                b->out = s->temp.buf;
 485                b->out_pos = s->temp.size;
 486                b->out_size = sizeof(s->temp.buf);
 487
 488                s->ret = xz_dec_lzma2_run(lzma2, b);
 489
 490                s->temp.size = b->out_pos;
 491                b->out = s->out;
 492                b->out_pos = s->out_pos;
 493                b->out_size = s->out_size;
 494
 495                if (s->ret != XZ_OK && s->ret != XZ_STREAM_END)
 496                        return s->ret;
 497
 498                bcj_apply(s, s->temp.buf, &s->temp.filtered, s->temp.size);
 499
 500                /*
 501                 * If the next filter returned XZ_STREAM_END, we mark that
 502                 * everything is filtered, since the last unfiltered bytes
 503                 * of the stream are meant to be left as is.
 504                 */
 505                if (s->ret == XZ_STREAM_END)
 506                        s->temp.filtered = s->temp.size;
 507
 508                bcj_flush(s, b);
 509                if (s->temp.filtered > 0)
 510                        return XZ_OK;
 511        }
 512
 513        return s->ret;
 514}
 515
 516XZ_EXTERN struct xz_dec_bcj * XZ_FUNC xz_dec_bcj_create(bool single_call)
 517{
 518        struct xz_dec_bcj *s = kmalloc(sizeof(*s), GFP_KERNEL);
 519        if (s != NULL)
 520                s->single_call = single_call;
 521
 522        return s;
 523}
 524
 525XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_bcj_reset(
 526                struct xz_dec_bcj *s, uint8_t id)
 527{
 528        switch (id) {
 529#ifdef XZ_DEC_X86
 530        case BCJ_X86:
 531#endif
 532#ifdef XZ_DEC_POWERPC
 533        case BCJ_POWERPC:
 534#endif
 535#ifdef XZ_DEC_IA64
 536        case BCJ_IA64:
 537#endif
 538#ifdef XZ_DEC_ARM
 539        case BCJ_ARM:
 540#endif
 541#ifdef XZ_DEC_ARMTHUMB
 542        case BCJ_ARMTHUMB:
 543#endif
 544#ifdef XZ_DEC_SPARC
 545        case BCJ_SPARC:
 546#endif
 547                break;
 548
 549        default:
 550                /* Unsupported Filter ID */
 551                return XZ_OPTIONS_ERROR;
 552        }
 553
 554        s->type = id;
 555        s->ret = XZ_OK;
 556        s->pos = 0;
 557        s->x86_prev_mask = 0;
 558        s->temp.filtered = 0;
 559        s->temp.size = 0;
 560
 561        return XZ_OK;
 562}
 563
 564#endif
 565