busybox/archival/libarchive/decompress_bunzip2.c
<<
>>
Prefs
   1/* vi: set sw=4 ts=4: */
   2/*
   3 * Small bzip2 deflate implementation, by Rob Landley (rob@landley.net).
   4 *
   5 * Based on bzip2 decompression code by Julian R Seward (jseward@acm.org),
   6 * which also acknowledges contributions by Mike Burrows, David Wheeler,
   7 * Peter Fenwick, Alistair Moffat, Radford Neal, Ian H. Witten,
   8 * Robert Sedgewick, and Jon L. Bentley.
   9 *
  10 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
  11 */
  12/*
  13        Size and speed optimizations by Manuel Novoa III  (mjn3@codepoet.org).
  14
  15        More efficient reading of Huffman codes, a streamlined read_bunzip()
  16        function, and various other tweaks.  In (limited) tests, approximately
  17        20% faster than bzcat on x86 and about 10% faster on arm.
  18
  19        Note that about 2/3 of the time is spent in read_bunzip() reversing
  20        the Burrows-Wheeler transformation.  Much of that time is delay
  21        resulting from cache misses.
  22
  23        (2010 update by vda: profiled "bzcat <84mbyte.bz2 >/dev/null"
  24        on x86-64 CPU with L2 > 1M: get_next_block is hotter than read_bunzip:
  25        %time seconds   calls function
  26        71.01   12.69     444 get_next_block
  27        28.65    5.12   93065 read_bunzip
  28        00.22    0.04 7736490 get_bits
  29        00.11    0.02      47 dealloc_bunzip
  30        00.00    0.00   93018 full_write
  31        ...)
  32
  33
  34        I would ask that anyone benefiting from this work, especially those
  35        using it in commercial products, consider making a donation to my local
  36        non-profit hospice organization (www.hospiceacadiana.com) in the name of
  37        the woman I loved, Toni W. Hagan, who passed away Feb. 12, 2003.
  38
  39        Manuel
  40 */
  41#include "libbb.h"
  42#include "bb_archive.h"
  43
  44#if 0
  45# define dbg(...) bb_error_msg(__VA_ARGS__)
  46#else
  47# define dbg(...) ((void)0)
  48#endif
  49
  50/* Constants for Huffman coding */
  51#define MAX_GROUPS          6
  52#define GROUP_SIZE          50      /* 64 would have been more efficient */
  53#define MAX_HUFCODE_BITS    20      /* Longest Huffman code allowed */
  54#define MAX_SYMBOLS         258     /* 256 literals + RUNA + RUNB */
  55#define SYMBOL_RUNA         0
  56#define SYMBOL_RUNB         1
  57
  58/* Status return values */
  59#define RETVAL_OK                       0
  60#define RETVAL_LAST_BLOCK               (dbg("%d", __LINE__), -1)
  61#define RETVAL_NOT_BZIP_DATA            (dbg("%d", __LINE__), -2)
  62#define RETVAL_UNEXPECTED_INPUT_EOF     (dbg("%d", __LINE__), -3)
  63#define RETVAL_SHORT_WRITE              (dbg("%d", __LINE__), -4)
  64#define RETVAL_DATA_ERROR               (dbg("%d", __LINE__), -5)
  65#define RETVAL_OUT_OF_MEMORY            (dbg("%d", __LINE__), -6)
  66#define RETVAL_OBSOLETE_INPUT           (dbg("%d", __LINE__), -7)
  67
  68/* Other housekeeping constants */
  69#define IOBUF_SIZE          4096
  70
  71/* This is what we know about each Huffman coding group */
  72struct group_data {
  73        /* We have an extra slot at the end of limit[] for a sentinel value. */
  74        int limit[MAX_HUFCODE_BITS+1], base[MAX_HUFCODE_BITS], permute[MAX_SYMBOLS];
  75        int minLen, maxLen;
  76};
  77
  78/* Structure holding all the housekeeping data, including IO buffers and
  79 * memory that persists between calls to bunzip
  80 * Found the most used member:
  81 *  cat this_file.c | sed -e 's/"/ /g' -e "s/'/ /g" | xargs -n1 \
  82 *  | grep 'bd->' | sed 's/^.*bd->/bd->/' | sort | $PAGER
  83 * and moved it (inbufBitCount) to offset 0.
  84 */
  85struct bunzip_data {
  86        /* I/O tracking data (file handles, buffers, positions, etc.) */
  87        unsigned inbufBitCount, inbufBits;
  88        int in_fd, out_fd, inbufCount, inbufPos /*, outbufPos*/;
  89        uint8_t *inbuf /*,*outbuf*/;
  90
  91        /* State for interrupting output loop */
  92        int writeCopies, writePos, writeRunCountdown, writeCount;
  93        int writeCurrent; /* actually a uint8_t */
  94
  95        /* The CRC values stored in the block header and calculated from the data */
  96        uint32_t headerCRC, totalCRC, writeCRC;
  97
  98        /* Intermediate buffer and its size (in bytes) */
  99        uint32_t *dbuf;
 100        unsigned dbufSize;
 101
 102        /* For I/O error handling */
 103        jmp_buf *jmpbuf;
 104
 105        /* Big things go last (register-relative addressing can be larger for big offsets) */
 106        uint32_t crc32Table[256];
 107        uint8_t selectors[32768];  /* nSelectors=15 bits */
 108        struct group_data groups[MAX_GROUPS];  /* Huffman coding tables */
 109};
 110typedef struct bunzip_data bunzip_data;
 111
 112
 113/* Return the next nnn bits of input.  All reads from the compressed input
 114   are done through this function.  All reads are big endian */
 115static unsigned get_bits(bunzip_data *bd, int bits_wanted)
 116{
 117        unsigned bits = 0;
 118        /* Cache bd->inbufBitCount in a CPU register (hopefully): */
 119        int bit_count = bd->inbufBitCount;
 120
 121        /* If we need to get more data from the byte buffer, do so.  (Loop getting
 122           one byte at a time to enforce endianness and avoid unaligned access.) */
 123        while (bit_count < bits_wanted) {
 124
 125                /* If we need to read more data from file into byte buffer, do so */
 126                if (bd->inbufPos == bd->inbufCount) {
 127                        /* if "no input fd" case: in_fd == -1, read fails, we jump */
 128                        bd->inbufCount = read(bd->in_fd, bd->inbuf, IOBUF_SIZE);
 129                        if (bd->inbufCount <= 0)
 130                                longjmp(*bd->jmpbuf, RETVAL_UNEXPECTED_INPUT_EOF);
 131                        bd->inbufPos = 0;
 132                }
 133
 134                /* Avoid 32-bit overflow (dump bit buffer to top of output) */
 135                if (bit_count >= 24) {
 136                        bits = bd->inbufBits & ((1U << bit_count) - 1);
 137                        bits_wanted -= bit_count;
 138                        bits <<= bits_wanted;
 139                        bit_count = 0;
 140                }
 141
 142                /* Grab next 8 bits of input from buffer. */
 143                bd->inbufBits = (bd->inbufBits << 8) | bd->inbuf[bd->inbufPos++];
 144                bit_count += 8;
 145        }
 146
 147        /* Calculate result */
 148        bit_count -= bits_wanted;
 149        bd->inbufBitCount = bit_count;
 150        bits |= (bd->inbufBits >> bit_count) & ((1 << bits_wanted) - 1);
 151
 152        return bits;
 153}
 154//#define get_bits(bd, n) (dbg("%d:get_bits()", __LINE__), get_bits(bd, n))
 155
 156/* Unpacks the next block and sets up for the inverse Burrows-Wheeler step. */
 157static int get_next_block(bunzip_data *bd)
 158{
 159        int groupCount, selector,
 160                i, j, symCount, symTotal, nSelectors, byteCount[256];
 161        uint8_t uc, symToByte[256], mtfSymbol[256], *selectors;
 162        uint32_t *dbuf;
 163        unsigned origPtr, t;
 164        unsigned dbufCount, runPos;
 165        unsigned runCnt = runCnt; /* for compiler */
 166
 167        dbuf = bd->dbuf;
 168        selectors = bd->selectors;
 169
 170/* In bbox, we are ok with aborting through setjmp which is set up in start_bunzip */
 171#if 0
 172        /* Reset longjmp I/O error handling */
 173        i = setjmp(bd->jmpbuf);
 174        if (i) return i;
 175#endif
 176
 177        /* Read in header signature and CRC, then validate signature.
 178           (last block signature means CRC is for whole file, return now) */
 179        i = get_bits(bd, 24);
 180        j = get_bits(bd, 24);
 181        bd->headerCRC = get_bits(bd, 32);
 182        if ((i == 0x177245) && (j == 0x385090))
 183                return RETVAL_LAST_BLOCK;
 184        if ((i != 0x314159) || (j != 0x265359))
 185                return RETVAL_NOT_BZIP_DATA;
 186
 187        /* We can add support for blockRandomised if anybody complains.  There was
 188           some code for this in busybox 1.0.0-pre3, but nobody ever noticed that
 189           it didn't actually work. */
 190        if (get_bits(bd, 1))
 191                return RETVAL_OBSOLETE_INPUT;
 192        origPtr = get_bits(bd, 24);
 193        if (origPtr > bd->dbufSize)
 194                return RETVAL_DATA_ERROR;
 195
 196        /* mapping table: if some byte values are never used (encoding things
 197           like ascii text), the compression code removes the gaps to have fewer
 198           symbols to deal with, and writes a sparse bitfield indicating which
 199           values were present.  We make a translation table to convert the symbols
 200           back to the corresponding bytes. */
 201        symTotal = 0;
 202        i = 0;
 203        t = get_bits(bd, 16);
 204        do {
 205                if (t & (1 << 15)) {
 206                        unsigned inner_map = get_bits(bd, 16);
 207                        do {
 208                                if (inner_map & (1 << 15))
 209                                        symToByte[symTotal++] = i;
 210                                inner_map <<= 1;
 211                                i++;
 212                        } while (i & 15);
 213                        i -= 16;
 214                }
 215                t <<= 1;
 216                i += 16;
 217        } while (i < 256);
 218
 219        /* How many different Huffman coding groups does this block use? */
 220        groupCount = get_bits(bd, 3);
 221        if (groupCount < 2 || groupCount > MAX_GROUPS)
 222                return RETVAL_DATA_ERROR;
 223
 224        /* nSelectors: Every GROUP_SIZE many symbols we select a new Huffman coding
 225           group.  Read in the group selector list, which is stored as MTF encoded
 226           bit runs.  (MTF=Move To Front, as each value is used it's moved to the
 227           start of the list.) */
 228        for (i = 0; i < groupCount; i++)
 229                mtfSymbol[i] = i;
 230        nSelectors = get_bits(bd, 15);
 231        if (!nSelectors)
 232                return RETVAL_DATA_ERROR;
 233        for (i = 0; i < nSelectors; i++) {
 234                uint8_t tmp_byte;
 235                /* Get next value */
 236                int n = 0;
 237                while (get_bits(bd, 1)) {
 238                        n++;
 239                        if (n >= groupCount)
 240                                return RETVAL_DATA_ERROR;
 241                }
 242                /* Decode MTF to get the next selector */
 243                tmp_byte = mtfSymbol[n];
 244                while (--n >= 0)
 245                        mtfSymbol[n + 1] = mtfSymbol[n];
 246//We catch it later, in the second loop where we use selectors[i].
 247//Maybe this is a better place, though?
 248//              if (tmp_byte >= groupCount) {
 249//                      dbg("%d: selectors[%d]:%d groupCount:%d",
 250//                                      __LINE__, i, tmp_byte, groupCount);
 251//                      return RETVAL_DATA_ERROR;
 252//              }
 253                mtfSymbol[0] = selectors[i] = tmp_byte;
 254        }
 255
 256        /* Read the Huffman coding tables for each group, which code for symTotal
 257           literal symbols, plus two run symbols (RUNA, RUNB) */
 258        symCount = symTotal + 2;
 259        for (j = 0; j < groupCount; j++) {
 260                uint8_t length[MAX_SYMBOLS];
 261                /* 8 bits is ALMOST enough for temp[], see below */
 262                unsigned temp[MAX_HUFCODE_BITS+1];
 263                struct group_data *hufGroup;
 264                int *base, *limit;
 265                int minLen, maxLen, pp, len_m1;
 266
 267                /* Read Huffman code lengths for each symbol.  They're stored in
 268                   a way similar to mtf; record a starting value for the first symbol,
 269                   and an offset from the previous value for every symbol after that.
 270                   (Subtracting 1 before the loop and then adding it back at the end is
 271                   an optimization that makes the test inside the loop simpler: symbol
 272                   length 0 becomes negative, so an unsigned inequality catches it.) */
 273                len_m1 = get_bits(bd, 5) - 1;
 274                for (i = 0; i < symCount; i++) {
 275                        for (;;) {
 276                                int two_bits;
 277                                if ((unsigned)len_m1 > (MAX_HUFCODE_BITS-1))
 278                                        return RETVAL_DATA_ERROR;
 279
 280                                /* If first bit is 0, stop.  Else second bit indicates whether
 281                                   to increment or decrement the value.  Optimization: grab 2
 282                                   bits and unget the second if the first was 0. */
 283                                two_bits = get_bits(bd, 2);
 284                                if (two_bits < 2) {
 285                                        bd->inbufBitCount++;
 286                                        break;
 287                                }
 288
 289                                /* Add one if second bit 1, else subtract 1.  Avoids if/else */
 290                                len_m1 += (((two_bits+1) & 2) - 1);
 291                        }
 292
 293                        /* Correct for the initial -1, to get the final symbol length */
 294                        length[i] = len_m1 + 1;
 295                }
 296
 297                /* Find largest and smallest lengths in this group */
 298                minLen = maxLen = length[0];
 299                for (i = 1; i < symCount; i++) {
 300                        if (length[i] > maxLen)
 301                                maxLen = length[i];
 302                        else if (length[i] < minLen)
 303                                minLen = length[i];
 304                }
 305
 306                /* Calculate permute[], base[], and limit[] tables from length[].
 307                 *
 308                 * permute[] is the lookup table for converting Huffman coded symbols
 309                 * into decoded symbols.  base[] is the amount to subtract from the
 310                 * value of a Huffman symbol of a given length when using permute[].
 311                 *
 312                 * limit[] indicates the largest numerical value a symbol with a given
 313                 * number of bits can have.  This is how the Huffman codes can vary in
 314                 * length: each code with a value>limit[length] needs another bit.
 315                 */
 316                hufGroup = bd->groups + j;
 317                hufGroup->minLen = minLen;
 318                hufGroup->maxLen = maxLen;
 319
 320                /* Note that minLen can't be smaller than 1, so we adjust the base
 321                   and limit array pointers so we're not always wasting the first
 322                   entry.  We do this again when using them (during symbol decoding). */
 323                base = hufGroup->base - 1;
 324                limit = hufGroup->limit - 1;
 325
 326                /* Calculate permute[].  Concurrently, initialize temp[] and limit[]. */
 327                pp = 0;
 328                for (i = minLen; i <= maxLen; i++) {
 329                        int k;
 330                        temp[i] = limit[i] = 0;
 331                        for (k = 0; k < symCount; k++)
 332                                if (length[k] == i)
 333                                        hufGroup->permute[pp++] = k;
 334                }
 335
 336                /* Count symbols coded for at each bit length */
 337                /* NB: in pathological cases, temp[8] can end ip being 256.
 338                 * That's why uint8_t is too small for temp[]. */
 339                for (i = 0; i < symCount; i++)
 340                        temp[length[i]]++;
 341
 342                /* Calculate limit[] (the largest symbol-coding value at each bit
 343                 * length, which is (previous limit<<1)+symbols at this level), and
 344                 * base[] (number of symbols to ignore at each bit length, which is
 345                 * limit minus the cumulative count of symbols coded for already). */
 346                pp = t = 0;
 347                for (i = minLen; i < maxLen;) {
 348                        unsigned temp_i = temp[i];
 349
 350                        pp += temp_i;
 351
 352                        /* We read the largest possible symbol size and then unget bits
 353                           after determining how many we need, and those extra bits could
 354                           be set to anything.  (They're noise from future symbols.)  At
 355                           each level we're really only interested in the first few bits,
 356                           so here we set all the trailing to-be-ignored bits to 1 so they
 357                           don't affect the value>limit[length] comparison. */
 358                        limit[i] = (pp << (maxLen - i)) - 1;
 359                        pp <<= 1;
 360                        t += temp_i;
 361                        base[++i] = pp - t;
 362                }
 363                limit[maxLen] = pp + temp[maxLen] - 1;
 364                limit[maxLen+1] = INT_MAX; /* Sentinel value for reading next sym. */
 365                base[minLen] = 0;
 366        }
 367
 368        /* We've finished reading and digesting the block header.  Now read this
 369           block's Huffman coded symbols from the file and undo the Huffman coding
 370           and run length encoding, saving the result into dbuf[dbufCount++] = uc */
 371
 372        /* Initialize symbol occurrence counters and symbol Move To Front table */
 373        /*memset(byteCount, 0, sizeof(byteCount)); - smaller, but slower */
 374        for (i = 0; i < 256; i++) {
 375                byteCount[i] = 0;
 376                mtfSymbol[i] = (uint8_t)i;
 377        }
 378
 379        /* Loop through compressed symbols. */
 380
 381        runPos = dbufCount = selector = 0;
 382        for (;;) {
 383                struct group_data *hufGroup;
 384                int *base, *limit;
 385                int nextSym;
 386                uint8_t ngrp;
 387
 388                /* Fetch next Huffman coding group from list. */
 389                symCount = GROUP_SIZE - 1;
 390                if (selector >= nSelectors)
 391                        return RETVAL_DATA_ERROR;
 392                ngrp = selectors[selector++];
 393                if (ngrp >= groupCount) {
 394                        dbg("%d selectors[%d]:%d groupCount:%d",
 395                                __LINE__, selector-1, ngrp, groupCount);
 396                        return RETVAL_DATA_ERROR;
 397                }
 398                hufGroup = bd->groups + ngrp;
 399                base = hufGroup->base - 1;
 400                limit = hufGroup->limit - 1;
 401
 402 continue_this_group:
 403                /* Read next Huffman-coded symbol. */
 404
 405                /* Note: It is far cheaper to read maxLen bits and back up than it is
 406                   to read minLen bits and then add additional bit at a time, testing
 407                   as we go.  Because there is a trailing last block (with file CRC),
 408                   there is no danger of the overread causing an unexpected EOF for a
 409                   valid compressed file.
 410                 */
 411                if (1) {
 412                        /* As a further optimization, we do the read inline
 413                           (falling back to a call to get_bits if the buffer runs dry).
 414                         */
 415                        int new_cnt;
 416                        while ((new_cnt = bd->inbufBitCount - hufGroup->maxLen) < 0) {
 417                                /* bd->inbufBitCount < hufGroup->maxLen */
 418                                if (bd->inbufPos == bd->inbufCount) {
 419                                        nextSym = get_bits(bd, hufGroup->maxLen);
 420                                        goto got_huff_bits;
 421                                }
 422                                bd->inbufBits = (bd->inbufBits << 8) | bd->inbuf[bd->inbufPos++];
 423                                bd->inbufBitCount += 8;
 424                        };
 425                        bd->inbufBitCount = new_cnt; /* "bd->inbufBitCount -= hufGroup->maxLen;" */
 426                        nextSym = (bd->inbufBits >> new_cnt) & ((1 << hufGroup->maxLen) - 1);
 427 got_huff_bits: ;
 428                } else { /* unoptimized equivalent */
 429                        nextSym = get_bits(bd, hufGroup->maxLen);
 430                }
 431                /* Figure how many bits are in next symbol and unget extras */
 432                i = hufGroup->minLen;
 433                while (nextSym > limit[i])
 434                        ++i;
 435                j = hufGroup->maxLen - i;
 436                if (j < 0)
 437                        return RETVAL_DATA_ERROR;
 438                bd->inbufBitCount += j;
 439
 440                /* Huffman decode value to get nextSym (with bounds checking) */
 441                nextSym = (nextSym >> j) - base[i];
 442                if ((unsigned)nextSym >= MAX_SYMBOLS)
 443                        return RETVAL_DATA_ERROR;
 444                nextSym = hufGroup->permute[nextSym];
 445
 446                /* We have now decoded the symbol, which indicates either a new literal
 447                   byte, or a repeated run of the most recent literal byte.  First,
 448                   check if nextSym indicates a repeated run, and if so loop collecting
 449                   how many times to repeat the last literal. */
 450                if ((unsigned)nextSym <= SYMBOL_RUNB) { /* RUNA or RUNB */
 451
 452                        /* If this is the start of a new run, zero out counter */
 453                        if (runPos == 0) {
 454                                runPos = 1;
 455                                runCnt = 0;
 456                        }
 457
 458                        /* Neat trick that saves 1 symbol: instead of or-ing 0 or 1 at
 459                           each bit position, add 1 or 2 instead.  For example,
 460                           1011 is 1<<0 + 1<<1 + 2<<2.  1010 is 2<<0 + 2<<1 + 1<<2.
 461                           You can make any bit pattern that way using 1 less symbol than
 462                           the basic or 0/1 method (except all bits 0, which would use no
 463                           symbols, but a run of length 0 doesn't mean anything in this
 464                           context).  Thus space is saved. */
 465                        runCnt += (runPos << nextSym); /* +runPos if RUNA; +2*runPos if RUNB */
 466//The 32-bit overflow of runCnt wasn't yet seen, but probably can happen.
 467//This would be the fix (catches too large count way before it can overflow):
 468//                      if (runCnt > bd->dbufSize) {
 469//                              dbg("runCnt:%u > dbufSize:%u RETVAL_DATA_ERROR",
 470//                                              runCnt, bd->dbufSize);
 471//                              return RETVAL_DATA_ERROR;
 472//                      }
 473                        if (runPos < bd->dbufSize) runPos <<= 1;
 474                        goto end_of_huffman_loop;
 475                }
 476
 477                /* When we hit the first non-run symbol after a run, we now know
 478                   how many times to repeat the last literal, so append that many
 479                   copies to our buffer of decoded symbols (dbuf) now.  (The last
 480                   literal used is the one at the head of the mtfSymbol array.) */
 481                if (runPos != 0) {
 482                        uint8_t tmp_byte;
 483                        if (dbufCount + runCnt > bd->dbufSize) {
 484                                dbg("dbufCount:%u+runCnt:%u %u > dbufSize:%u RETVAL_DATA_ERROR",
 485                                                dbufCount, runCnt, dbufCount + runCnt, bd->dbufSize);
 486                                return RETVAL_DATA_ERROR;
 487                        }
 488                        tmp_byte = symToByte[mtfSymbol[0]];
 489                        byteCount[tmp_byte] += runCnt;
 490                        while ((int)--runCnt >= 0)
 491                                dbuf[dbufCount++] = (uint32_t)tmp_byte;
 492                        runPos = 0;
 493                }
 494
 495                /* Is this the terminating symbol? */
 496                if (nextSym > symTotal) break;
 497
 498                /* At this point, nextSym indicates a new literal character.  Subtract
 499                   one to get the position in the MTF array at which this literal is
 500                   currently to be found.  (Note that the result can't be -1 or 0,
 501                   because 0 and 1 are RUNA and RUNB.  But another instance of the
 502                   first symbol in the mtf array, position 0, would have been handled
 503                   as part of a run above.  Therefore 1 unused mtf position minus
 504                   2 non-literal nextSym values equals -1.) */
 505                if (dbufCount >= bd->dbufSize) return RETVAL_DATA_ERROR;
 506                i = nextSym - 1;
 507                uc = mtfSymbol[i];
 508
 509                /* Adjust the MTF array.  Since we typically expect to move only a
 510                 * small number of symbols, and are bound by 256 in any case, using
 511                 * memmove here would typically be bigger and slower due to function
 512                 * call overhead and other assorted setup costs. */
 513                do {
 514                        mtfSymbol[i] = mtfSymbol[i-1];
 515                } while (--i);
 516                mtfSymbol[0] = uc;
 517                uc = symToByte[uc];
 518
 519                /* We have our literal byte.  Save it into dbuf. */
 520                byteCount[uc]++;
 521                dbuf[dbufCount++] = (uint32_t)uc;
 522
 523                /* Skip group initialization if we're not done with this group.  Done
 524                 * this way to avoid compiler warning. */
 525 end_of_huffman_loop:
 526                if (--symCount >= 0) goto continue_this_group;
 527        }
 528
 529        /* At this point, we've read all the Huffman-coded symbols (and repeated
 530           runs) for this block from the input stream, and decoded them into the
 531           intermediate buffer.  There are dbufCount many decoded bytes in dbuf[].
 532           Now undo the Burrows-Wheeler transform on dbuf.
 533           See http://dogma.net/markn/articles/bwt/bwt.htm
 534         */
 535
 536        /* Turn byteCount into cumulative occurrence counts of 0 to n-1. */
 537        j = 0;
 538        for (i = 0; i < 256; i++) {
 539                int tmp_count = j + byteCount[i];
 540                byteCount[i] = j;
 541                j = tmp_count;
 542        }
 543
 544        /* Figure out what order dbuf would be in if we sorted it. */
 545        for (i = 0; i < dbufCount; i++) {
 546                uint8_t tmp_byte = (uint8_t)dbuf[i];
 547                int tmp_count = byteCount[tmp_byte];
 548                dbuf[tmp_count] |= (i << 8);
 549                byteCount[tmp_byte] = tmp_count + 1;
 550        }
 551
 552        /* Decode first byte by hand to initialize "previous" byte.  Note that it
 553           doesn't get output, and if the first three characters are identical
 554           it doesn't qualify as a run (hence writeRunCountdown=5). */
 555        if (dbufCount) {
 556                uint32_t tmp;
 557                if ((int)origPtr >= dbufCount) return RETVAL_DATA_ERROR;
 558                tmp = dbuf[origPtr];
 559                bd->writeCurrent = (uint8_t)tmp;
 560                bd->writePos = (tmp >> 8);
 561                bd->writeRunCountdown = 5;
 562        }
 563        bd->writeCount = dbufCount;
 564
 565        return RETVAL_OK;
 566}
 567
 568/* Undo Burrows-Wheeler transform on intermediate buffer to produce output.
 569   If start_bunzip was initialized with out_fd=-1, then up to len bytes of
 570   data are written to outbuf.  Return value is number of bytes written or
 571   error (all errors are negative numbers).  If out_fd!=-1, outbuf and len
 572   are ignored, data is written to out_fd and return is RETVAL_OK or error.
 573
 574   NB: read_bunzip returns < 0 on error, or the number of *unfilled* bytes
 575   in outbuf. IOW: on EOF returns len ("all bytes are not filled"), not 0.
 576   (Why? This allows to get rid of one local variable)
 577*/
 578static int read_bunzip(bunzip_data *bd, char *outbuf, int len)
 579{
 580        const uint32_t *dbuf;
 581        int pos, current, previous;
 582        uint32_t CRC;
 583
 584        /* If we already have error/end indicator, return it */
 585        if (bd->writeCount < 0)
 586                return bd->writeCount;
 587
 588        dbuf = bd->dbuf;
 589
 590        /* Register-cached state (hopefully): */
 591        pos = bd->writePos;
 592        current = bd->writeCurrent;
 593        CRC = bd->writeCRC; /* small loss on x86-32 (not enough regs), win on x86-64 */
 594
 595        /* We will always have pending decoded data to write into the output
 596           buffer unless this is the very first call (in which case we haven't
 597           Huffman-decoded a block into the intermediate buffer yet). */
 598        if (bd->writeCopies) {
 599
 600 dec_writeCopies:
 601                /* Inside the loop, writeCopies means extra copies (beyond 1) */
 602                --bd->writeCopies;
 603
 604                /* Loop outputting bytes */
 605                for (;;) {
 606
 607                        /* If the output buffer is full, save cached state and return */
 608                        if (--len < 0) {
 609                                /* Unlikely branch.
 610                                 * Use of "goto" instead of keeping code here
 611                                 * helps compiler to realize this. */
 612                                goto outbuf_full;
 613                        }
 614
 615                        /* Write next byte into output buffer, updating CRC */
 616                        *outbuf++ = current;
 617                        CRC = (CRC << 8) ^ bd->crc32Table[(CRC >> 24) ^ current];
 618
 619                        /* Loop now if we're outputting multiple copies of this byte */
 620                        if (bd->writeCopies) {
 621                                /* Unlikely branch */
 622                                /*--bd->writeCopies;*/
 623                                /*continue;*/
 624                                /* Same, but (ab)using other existing --writeCopies operation
 625                                 * (and this if() compiles into just test+branch pair): */
 626                                goto dec_writeCopies;
 627                        }
 628 decode_next_byte:
 629                        if (--bd->writeCount < 0)
 630                                break; /* input block is fully consumed, need next one */
 631
 632                        /* Follow sequence vector to undo Burrows-Wheeler transform */
 633                        previous = current;
 634                        pos = dbuf[pos];
 635                        current = (uint8_t)pos;
 636                        pos >>= 8;
 637
 638                        /* After 3 consecutive copies of the same byte, the 4th
 639                         * is a repeat count.  We count down from 4 instead
 640                         * of counting up because testing for non-zero is faster */
 641                        if (--bd->writeRunCountdown != 0) {
 642                                if (current != previous)
 643                                        bd->writeRunCountdown = 4;
 644                        } else {
 645                                /* Unlikely branch */
 646                                /* We have a repeated run, this byte indicates the count */
 647                                bd->writeCopies = current;
 648                                current = previous;
 649                                bd->writeRunCountdown = 5;
 650
 651                                /* Sometimes there are just 3 bytes (run length 0) */
 652                                if (!bd->writeCopies) goto decode_next_byte;
 653
 654                                /* Subtract the 1 copy we'd output anyway to get extras */
 655                                --bd->writeCopies;
 656                        }
 657                } /* for(;;) */
 658
 659                /* Decompression of this input block completed successfully */
 660                bd->writeCRC = CRC = ~CRC;
 661                bd->totalCRC = ((bd->totalCRC << 1) | (bd->totalCRC >> 31)) ^ CRC;
 662
 663                /* If this block had a CRC error, force file level CRC error */
 664                if (CRC != bd->headerCRC) {
 665                        bd->totalCRC = bd->headerCRC + 1;
 666                        return RETVAL_LAST_BLOCK;
 667                }
 668        }
 669
 670        /* Refill the intermediate buffer by Huffman-decoding next block of input */
 671        {
 672                int r = get_next_block(bd);
 673                if (r) { /* error/end */
 674                        bd->writeCount = r;
 675                        return (r != RETVAL_LAST_BLOCK) ? r : len;
 676                }
 677        }
 678
 679        CRC = ~0;
 680        pos = bd->writePos;
 681        current = bd->writeCurrent;
 682        goto decode_next_byte;
 683
 684 outbuf_full:
 685        /* Output buffer is full, save cached state and return */
 686        bd->writePos = pos;
 687        bd->writeCurrent = current;
 688        bd->writeCRC = CRC;
 689
 690        bd->writeCopies++;
 691
 692        return 0;
 693}
 694
 695/* Allocate the structure, read file header.  If in_fd==-1, inbuf must contain
 696   a complete bunzip file (len bytes long).  If in_fd!=-1, inbuf and len are
 697   ignored, and data is read from file handle into temporary buffer. */
 698
 699/* Because bunzip2 is used for help text unpacking, and because bb_show_usage()
 700   should work for NOFORK applets too, we must be extremely careful to not leak
 701   any allocations! */
 702static int FAST_FUNC start_bunzip(
 703                void *jmpbuf,
 704                bunzip_data **bdp,
 705                int in_fd,
 706                const void *inbuf, int len)
 707{
 708        bunzip_data *bd;
 709        unsigned i;
 710        enum {
 711                BZh0 = ('B' << 24) + ('Z' << 16) + ('h' << 8) + '0',
 712                h0 = ('h' << 8) + '0',
 713        };
 714
 715        /* Figure out how much data to allocate */
 716        i = sizeof(bunzip_data);
 717        if (in_fd != -1)
 718                i += IOBUF_SIZE;
 719
 720        /* Allocate bunzip_data.  Most fields initialize to zero. */
 721        bd = *bdp = xzalloc(i);
 722
 723        bd->jmpbuf = jmpbuf;
 724
 725        /* Setup input buffer */
 726        bd->in_fd = in_fd;
 727        if (-1 == in_fd) {
 728                /* in this case, bd->inbuf is read-only */
 729                bd->inbuf = (void*)inbuf; /* cast away const-ness */
 730        } else {
 731                bd->inbuf = (uint8_t*)(bd + 1);
 732                memcpy(bd->inbuf, inbuf, len);
 733        }
 734        bd->inbufCount = len;
 735
 736        /* Init the CRC32 table (big endian) */
 737        crc32_filltable(bd->crc32Table, 1);
 738
 739        /* Ensure that file starts with "BZh['1'-'9']." */
 740        /* Update: now caller verifies 1st two bytes, makes .gz/.bz2
 741         * integration easier */
 742        /* was: */
 743        /* i = get_bits(bd, 32); */
 744        /* if ((unsigned)(i - BZh0 - 1) >= 9) return RETVAL_NOT_BZIP_DATA; */
 745        i = get_bits(bd, 16);
 746        if ((unsigned)(i - h0 - 1) >= 9) return RETVAL_NOT_BZIP_DATA;
 747
 748        /* Fourth byte (ascii '1'-'9') indicates block size in units of 100k of
 749           uncompressed data.  Allocate intermediate buffer for block. */
 750        /* bd->dbufSize = 100000 * (i - BZh0); */
 751        bd->dbufSize = 100000 * (i - h0);
 752
 753        /* Cannot use xmalloc - may leak bd in NOFORK case! */
 754        bd->dbuf = malloc_or_warn(bd->dbufSize * sizeof(bd->dbuf[0]));
 755        if (!bd->dbuf) {
 756                free(bd);
 757                xfunc_die();
 758        }
 759        return RETVAL_OK;
 760}
 761
 762static void FAST_FUNC dealloc_bunzip(bunzip_data *bd)
 763{
 764        free(bd->dbuf);
 765        free(bd);
 766}
 767
 768
 769/* Decompress src_fd to dst_fd.  Stops at end of bzip data, not end of file. */
 770IF_DESKTOP(long long) int FAST_FUNC
 771unpack_bz2_stream(transformer_state_t *xstate)
 772{
 773        IF_DESKTOP(long long total_written = 0;)
 774        bunzip_data *bd;
 775        char *outbuf;
 776        int i;
 777        unsigned len;
 778
 779        if (check_signature16(xstate, BZIP2_MAGIC))
 780                return -1;
 781
 782        outbuf = xmalloc(IOBUF_SIZE);
 783        len = 0;
 784        while (1) { /* "Process one BZ... stream" loop */
 785                jmp_buf jmpbuf;
 786
 787                /* Setup for I/O error handling via longjmp */
 788                i = setjmp(jmpbuf);
 789                if (i == 0)
 790                        i = start_bunzip(&jmpbuf, &bd, xstate->src_fd, outbuf + 2, len);
 791
 792                if (i == 0) {
 793                        while (1) { /* "Produce some output bytes" loop */
 794                                i = read_bunzip(bd, outbuf, IOBUF_SIZE);
 795                                if (i < 0) /* error? */
 796                                        break;
 797                                i = IOBUF_SIZE - i; /* number of bytes produced */
 798                                if (i == 0) /* EOF? */
 799                                        break;
 800                                if (i != transformer_write(xstate, outbuf, i)) {
 801                                        i = RETVAL_SHORT_WRITE;
 802                                        goto release_mem;
 803                                }
 804                                IF_DESKTOP(total_written += i;)
 805                        }
 806                }
 807
 808                if (i != RETVAL_LAST_BLOCK
 809                /* Observed case when i == RETVAL_OK:
 810                 * "bzcat z.bz2", where "z.bz2" is a bzipped zero-length file
 811                 * (to be exact, z.bz2 is exactly these 14 bytes:
 812                 * 42 5a 68 39 17 72 45 38 50 90 00 00 00 00).
 813                 */
 814                 && i != RETVAL_OK
 815                ) {
 816                        bb_error_msg("bunzip error %d", i);
 817                        break;
 818                }
 819                if (bd->headerCRC != bd->totalCRC) {
 820                        bb_error_msg("CRC error");
 821                        break;
 822                }
 823
 824                /* Successfully unpacked one BZ stream */
 825                i = RETVAL_OK;
 826
 827                /* Do we have "BZ..." after last processed byte?
 828                 * pbzip2 (parallelized bzip2) produces such files.
 829                 */
 830                len = bd->inbufCount - bd->inbufPos;
 831                memcpy(outbuf, &bd->inbuf[bd->inbufPos], len);
 832                if (len < 2) {
 833                        if (safe_read(xstate->src_fd, outbuf + len, 2 - len) != 2 - len)
 834                                break;
 835                        len = 2;
 836                }
 837                if (*(uint16_t*)outbuf != BZIP2_MAGIC) /* "BZ"? */
 838                        break;
 839                dealloc_bunzip(bd);
 840                len -= 2;
 841        }
 842
 843 release_mem:
 844        dealloc_bunzip(bd);
 845        free(outbuf);
 846
 847        return i ? i : IF_DESKTOP(total_written) + 0;
 848}
 849
 850char* FAST_FUNC
 851unpack_bz2_data(const char *packed, int packed_len, int unpacked_len)
 852{
 853        char *outbuf = NULL;
 854        bunzip_data *bd;
 855        int i;
 856        jmp_buf jmpbuf;
 857
 858        /* Setup for I/O error handling via longjmp */
 859        i = setjmp(jmpbuf);
 860        if (i == 0) {
 861                i = start_bunzip(&jmpbuf,
 862                        &bd,
 863                        /* src_fd: */ -1,
 864                        /* inbuf:  */ packed,
 865                        /* len:    */ packed_len
 866                );
 867        }
 868        /* read_bunzip can longjmp and end up here with i != 0
 869         * on read data errors! Not trivial */
 870        if (i == 0) {
 871                /* Cannot use xmalloc: will leak bd in NOFORK case! */
 872                outbuf = malloc_or_warn(unpacked_len);
 873                if (outbuf)
 874                        read_bunzip(bd, outbuf, unpacked_len);
 875        }
 876        dealloc_bunzip(bd);
 877        return outbuf;
 878}
 879
 880#ifdef TESTING
 881
 882static char *const bunzip_errors[] = {
 883        NULL, "Bad file checksum", "Not bzip data",
 884        "Unexpected input EOF", "Unexpected output EOF", "Data error",
 885        "Out of memory", "Obsolete (pre 0.9.5) bzip format not supported"
 886};
 887
 888/* Dumb little test thing, decompress stdin to stdout */
 889int main(int argc, char **argv)
 890{
 891        char c;
 892
 893        int i = unpack_bz2_stream(0, 1);
 894        if (i < 0)
 895                fprintf(stderr, "%s\n", bunzip_errors[-i]);
 896        else if (read(STDIN_FILENO, &c, 1))
 897                fprintf(stderr, "Trailing garbage ignored\n");
 898        return -i;
 899}
 900#endif
 901