toybox/toys/pending/xzcat.c
<<
>>
Prefs
   1/*
   2 * Simple XZ decoder command line tool
   3 *
   4 * Author: Lasse Collin <lasse.collin@tukaani.org>
   5 *
   6 * This file has been put into the public domain.
   7 * You can do whatever you want with this file.
   8 * Modified for toybox by Isaac Dunham
   9USE_XZCAT(NEWTOY(xzcat, NULL, TOYFLAG_USR|TOYFLAG_BIN))
  10
  11config XZCAT
  12  bool "xzcat"
  13  default n
  14  help
  15    usage: xzcat [filename...]
  16    
  17    Decompress listed files to stdout. Use stdin if no files listed.
  18
  19*/
  20#define FOR_xzcat
  21#include "toys.h"
  22
  23// BEGIN xz.h
  24
  25/**
  26 * enum xz_ret - Return codes
  27 * @XZ_OK:                  Everything is OK so far. More input or more
  28 *                          output space is required to continue.
  29 * @XZ_STREAM_END:          Operation finished successfully.
  30 * @XZ_UNSUPPORTED_CHECK:   Integrity check type is not supported. Decoding
  31 *                          is still possible in multi-call mode by simply
  32 *                          calling xz_dec_run() again.
  33 *                          Note that this return value is used only if
  34 *                          XZ_DEC_ANY_CHECK was defined at build time,
  35 *                          which is not used in the kernel. Unsupported
  36 *                          check types return XZ_OPTIONS_ERROR if
  37 *                          XZ_DEC_ANY_CHECK was not defined at build time.
  38 * @XZ_MEM_ERROR:           Allocating memory failed. The amount of memory 
  39 *                          that was tried to be allocated was no more than the
  40 *                          dict_max argument given to xz_dec_init().
  41 * @XZ_MEMLIMIT_ERROR:      A bigger LZMA2 dictionary would be needed than
  42 *                          allowed by the dict_max argument given to
  43 *                          xz_dec_init().
  44 * @XZ_FORMAT_ERROR:        File format was not recognized (wrong magic
  45 *                          bytes).
  46 * @XZ_OPTIONS_ERROR:       This implementation doesn't support the requested
  47 *                          compression options. In the decoder this means
  48 *                          that the header CRC32 matches, but the header
  49 *                          itself specifies something that we don't support.
  50 * @XZ_DATA_ERROR:          Compressed data is corrupt.
  51 * @XZ_BUF_ERROR:           Cannot make any progress. Details are slightly
  52 *                          different between multi-call and single-call
  53 *                          mode; more information below.
  54 *
  55 * XZ_BUF_ERROR is returned when two consecutive calls to XZ code cannot 
  56 * consume any input and cannot produce any new output. This happens when
  57 * there is no new input available, or the output buffer is full while at
  58 * least one output byte is still pending. Assuming your code is not buggy,
  59 * you can get this error only when decoding a compressed stream that is 
  60 * truncated or otherwise corrupt.
  61 */
  62enum xz_ret {
  63  XZ_OK,
  64  XZ_STREAM_END,
  65  XZ_UNSUPPORTED_CHECK,
  66  XZ_MEM_ERROR,
  67  XZ_MEMLIMIT_ERROR,
  68  XZ_FORMAT_ERROR,
  69  XZ_OPTIONS_ERROR,
  70  XZ_DATA_ERROR,
  71  XZ_BUF_ERROR
  72};
  73
  74/**
  75 * struct xz_buf - Passing input and output buffers to XZ code
  76 * @in:         Beginning of the input buffer. This may be NULL if and only
  77 *              if in_pos is equal to in_size.
  78 * @in_pos:     Current position in the input buffer. This must not exceed
  79 *              in_size.
  80 * @in_size:    Size of the input buffer
  81 * @out:        Beginning of the output buffer. This may be NULL if and only
  82 *              if out_pos is equal to out_size.
  83 * @out_pos:    Current position in the output buffer. This must not exceed
  84 *              out_size.
  85 * @out_size:   Size of the output buffer
  86 *
  87 * Only the contents of the output buffer from out[out_pos] onward, and
  88 * the variables in_pos and out_pos are modified by the XZ code.
  89 */
  90struct xz_buf {
  91  const uint8_t *in;
  92  size_t in_pos;
  93  size_t in_size;
  94
  95  uint8_t *out;
  96  size_t out_pos;
  97  size_t out_size;
  98};
  99
 100/**
 101 * struct xz_dec - Opaque type to hold the XZ decoder state
 102 */
 103struct xz_dec;
 104
 105/**
 106 * xz_dec_init() - Allocate and initialize a XZ decoder state
 107 * @mode:       Operation mode
 108 * @dict_max:   Maximum size of the LZMA2 dictionary (history buffer) for
 109 *              multi-call decoding. LZMA2 dictionary is always 2^n bytes
 110 *              or 2^n + 2^(n-1) bytes (the latter sizes are less common
 111 *              in practice), so other values for dict_max don't make sense.
 112 *              In the kernel, dictionary sizes of 64 KiB, 128 KiB, 256 KiB,
 113 *              512 KiB, and 1 MiB are probably the only reasonable values,
 114 *              except for kernel and initramfs images where a bigger
 115 *              dictionary can be fine and useful.
 116 *
 117 * dict_max specifies the maximum allowed dictionary size that xz_dec_run()
 118 * may allocate once it has parsed the dictionary size from the stream
 119 * headers. This way excessive allocations can be avoided while still
 120 * limiting the maximum memory usage to a sane value to prevent running the
 121 * system out of memory when decompressing streams from untrusted sources.
 122 *
 123 * On success, xz_dec_init() returns a pointer to struct xz_dec, which is
 124 * ready to be used with xz_dec_run(). If memory allocation fails,
 125 * xz_dec_init() returns NULL.
 126 */
 127struct xz_dec *xz_dec_init(uint32_t dict_max);
 128
 129/**
 130 * xz_dec_run() - Run the XZ decoder
 131 * @s:          Decoder state allocated using xz_dec_init()
 132 * @b:          Input and output buffers
 133 *
 134 * The possible return values depend on build options and operation mode.
 135 * See enum xz_ret for details.
 136 *
 137 * Note that if an error occurs in single-call mode (return value is not
 138 * XZ_STREAM_END), b->in_pos and b->out_pos are not modified and the
 139 * contents of the output buffer from b->out[b->out_pos] onward are
 140 * undefined. This is true even after XZ_BUF_ERROR, because with some filter
 141 * chains, there may be a second pass over the output buffer, and this pass
 142 * cannot be properly done if the output buffer is truncated. Thus, you
 143 * cannot give the single-call decoder a too small buffer and then expect to
 144 * get that amount valid data from the beginning of the stream. You must use
 145 * the multi-call decoder if you don't want to uncompress the whole stream.
 146 */
 147enum xz_ret xz_dec_run(struct xz_dec *s, struct xz_buf *b);
 148
 149/**
 150 * xz_dec_reset() - Reset an already allocated decoder state
 151 * @s:          Decoder state allocated using xz_dec_init()
 152 *
 153 * This function can be used to reset the multi-call decoder state without
 154 * freeing and reallocating memory with xz_dec_end() and xz_dec_init().
 155 *
 156 * In single-call mode, xz_dec_reset() is always called in the beginning of
 157 * xz_dec_run(). Thus, explicit call to xz_dec_reset() is useful only in
 158 * multi-call mode.
 159 */
 160void xz_dec_reset(struct xz_dec *s);
 161
 162/**
 163 * xz_dec_end() - Free the memory allocated for the decoder state
 164 * @s:          Decoder state allocated using xz_dec_init(). If s is NULL,
 165 *              this function does nothing.
 166 */
 167void xz_dec_end(struct xz_dec *s);
 168
 169/*
 170 * Update CRC32 value using the polynomial from IEEE-802.3. To start a new
 171 * calculation, the third argument must be zero. To continue the calculation,
 172 * the previously returned value is passed as the third argument.
 173 */
 174static uint32_t xz_crc32_table[256];
 175
 176uint32_t xz_crc32(const uint8_t *buf, size_t size, uint32_t crc)
 177{
 178  crc = ~crc;
 179
 180  while (size != 0) {
 181    crc = xz_crc32_table[*buf++ ^ (crc & 0xFF)] ^ (crc >> 8);
 182    --size;
 183  }
 184
 185  return ~crc;
 186}
 187
 188static uint64_t xz_crc64_table[256];
 189
 190
 191// END xz.h
 192
 193static uint8_t in[BUFSIZ];
 194static uint8_t out[BUFSIZ];
 195
 196void do_xzcat(int fd, char *name)
 197{
 198  struct xz_buf b;
 199  struct xz_dec *s;
 200  enum xz_ret ret;
 201  const char *msg;
 202
 203  crc_init(xz_crc32_table, 1);
 204  const uint64_t poly = 0xC96C5795D7870F42ULL;
 205  uint32_t i;
 206  uint32_t j;
 207  uint64_t r;
 208
 209  /* initialize CRC64 table*/
 210  for (i = 0; i < 256; ++i) {
 211    r = i;
 212    for (j = 0; j < 8; ++j)
 213      r = (r >> 1) ^ (poly & ~((r & 1) - 1));
 214
 215    xz_crc64_table[i] = r;
 216  }
 217
 218  /*
 219   * Support up to 64 MiB dictionary. The actually needed memory
 220   * is allocated once the headers have been parsed.
 221   */
 222  s = xz_dec_init(1 << 26);
 223  if (s == NULL) {
 224    msg = "Memory allocation failed\n";
 225    goto error;
 226  }
 227
 228  b.in = in;
 229  b.in_pos = 0;
 230  b.in_size = 0;
 231  b.out = out;
 232  b.out_pos = 0;
 233  b.out_size = BUFSIZ;
 234
 235  for (;;) {
 236    if (b.in_pos == b.in_size) {
 237      b.in_size = read(fd, in, sizeof(in));
 238      b.in_pos = 0;
 239    }
 240
 241    ret = xz_dec_run(s, &b);
 242
 243    if (b.out_pos == sizeof(out)) {
 244      if (fwrite(out, 1, b.out_pos, stdout) != b.out_pos) {
 245        msg = "Write error\n";
 246        goto error;
 247      }
 248
 249      b.out_pos = 0;
 250    }
 251
 252    if (ret == XZ_OK)
 253      continue;
 254
 255    if (ret == XZ_UNSUPPORTED_CHECK)
 256      continue;
 257
 258    if (fwrite(out, 1, b.out_pos, stdout) != b.out_pos) {
 259      msg = "Write error\n";
 260      goto error;
 261    }
 262
 263    switch (ret) {
 264    case XZ_STREAM_END:
 265      xz_dec_end(s);
 266      return;
 267
 268    case XZ_MEM_ERROR:
 269      msg = "Memory allocation failed\n";
 270      goto error;
 271
 272    case XZ_MEMLIMIT_ERROR:
 273      msg = "Memory usage limit reached\n";
 274      goto error;
 275
 276    case XZ_FORMAT_ERROR:
 277      msg = "Not a .xz file\n";
 278      goto error;
 279
 280    case XZ_OPTIONS_ERROR:
 281      msg = "Unsupported options in the .xz headers\n";
 282      goto error;
 283
 284    case XZ_DATA_ERROR:
 285    case XZ_BUF_ERROR:
 286      msg = "File is corrupt\n";
 287      goto error;
 288
 289    default:
 290      msg = "Bug!\n";
 291      goto error;
 292    }
 293  }
 294
 295error:
 296  xz_dec_end(s);
 297  error_exit("%s", msg);
 298}
 299
 300void xzcat_main(void)
 301{
 302  loopfiles(toys.optargs, do_xzcat);
 303}
 304
 305// BEGIN xz_private.h
 306
 307
 308/* Uncomment as needed to enable BCJ filter decoders. 
 309 * These cost about 2.5 k when all are enabled; SPARC and IA64 make 0.7 k
 310 * */
 311
 312#define XZ_DEC_X86
 313#define XZ_DEC_POWERPC
 314#define XZ_DEC_IA64
 315#define XZ_DEC_ARM
 316#define XZ_DEC_ARMTHUMB
 317#define XZ_DEC_SPARC
 318
 319
 320#define memeq(a, b, size) (memcmp(a, b, size) == 0)
 321
 322/* Inline functions to access unaligned unsigned 32-bit integers */
 323#ifndef get_unaligned_le32
 324static inline uint32_t get_unaligned_le32(const uint8_t *buf)
 325{
 326  return (uint32_t)buf[0]
 327      | ((uint32_t)buf[1] << 8)
 328      | ((uint32_t)buf[2] << 16)
 329      | ((uint32_t)buf[3] << 24);
 330}
 331#endif
 332
 333#ifndef get_unaligned_be32
 334static inline uint32_t get_unaligned_be32(const uint8_t *buf)
 335{
 336  return (uint32_t)(buf[0] << 24)
 337      | ((uint32_t)buf[1] << 16)
 338      | ((uint32_t)buf[2] << 8)
 339      | (uint32_t)buf[3];
 340}
 341#endif
 342
 343#ifndef put_unaligned_le32
 344static inline void put_unaligned_le32(uint32_t val, uint8_t *buf)
 345{
 346  buf[0] = (uint8_t)val;
 347  buf[1] = (uint8_t)(val >> 8);
 348  buf[2] = (uint8_t)(val >> 16);
 349  buf[3] = (uint8_t)(val >> 24);
 350}
 351#endif
 352
 353#ifndef put_unaligned_be32
 354static inline void put_unaligned_be32(uint32_t val, uint8_t *buf)
 355{
 356  buf[0] = (uint8_t)(val >> 24);
 357  buf[1] = (uint8_t)(val >> 16);
 358  buf[2] = (uint8_t)(val >> 8);
 359  buf[3] = (uint8_t)val;
 360}
 361#endif
 362
 363/*
 364 * Use get_unaligned_le32() also for aligned access for simplicity. On
 365 * little endian systems, #define get_le32(ptr) (*(const uint32_t *)(ptr))
 366 * could save a few bytes in code size.
 367 */
 368#ifndef get_le32
 369#       define get_le32 get_unaligned_le32
 370#endif
 371
 372/*
 373 * If any of the BCJ filter decoders are wanted, define XZ_DEC_BCJ.
 374 * XZ_DEC_BCJ is used to enable generic support for BCJ decoders.
 375 */
 376#ifndef XZ_DEC_BCJ
 377#       if defined(XZ_DEC_X86) || defined(XZ_DEC_POWERPC) \
 378      || defined(XZ_DEC_IA64) || defined(XZ_DEC_ARM) \
 379      || defined(XZ_DEC_ARM) || defined(XZ_DEC_ARMTHUMB) \
 380      || defined(XZ_DEC_SPARC)
 381#               define XZ_DEC_BCJ
 382#       endif
 383#endif
 384
 385/*
 386 * Allocate memory for LZMA2 decoder. xz_dec_lzma2_reset() must be used
 387 * before calling xz_dec_lzma2_run().
 388 */
 389struct xz_dec_lzma2 *xz_dec_lzma2_create(uint32_t dict_max);
 390
 391/*
 392 * Decode the LZMA2 properties (one byte) and reset the decoder. Return
 393 * XZ_OK on success, XZ_MEMLIMIT_ERROR if the preallocated dictionary is not
 394 * big enough, and XZ_OPTIONS_ERROR if props indicates something that this
 395 * decoder doesn't support.
 396 */
 397enum xz_ret xz_dec_lzma2_reset(struct xz_dec_lzma2 *s,
 398           uint8_t props);
 399
 400/* Decode raw LZMA2 stream from b->in to b->out. */
 401enum xz_ret xz_dec_lzma2_run(struct xz_dec_lzma2 *s,
 402               struct xz_buf *b);
 403
 404// END "xz_private.h"
 405
 406
 407
 408
 409/*
 410 * Branch/Call/Jump (BCJ) filter decoders
 411 * The rest of the code is inside this ifdef. It makes things a little more
 412 * convenient when building without support for any BCJ filters.
 413 */
 414#ifdef XZ_DEC_BCJ
 415
 416struct xz_dec_bcj {
 417  /* Type of the BCJ filter being used */
 418  enum {
 419    BCJ_X86 = 4,        /* x86 or x86-64 */
 420    BCJ_POWERPC = 5,    /* Big endian only */
 421    BCJ_IA64 = 6,       /* Big or little endian */
 422    BCJ_ARM = 7,        /* Little endian only */
 423    BCJ_ARMTHUMB = 8,   /* Little endian only */
 424    BCJ_SPARC = 9       /* Big or little endian */
 425  } type;
 426
 427  /*
 428   * Return value of the next filter in the chain. We need to preserve
 429   * this information across calls, because we must not call the next
 430   * filter anymore once it has returned XZ_STREAM_END.
 431   */
 432  enum xz_ret ret;
 433
 434  /*
 435   * Absolute position relative to the beginning of the uncompressed
 436   * data (in a single .xz Block). We care only about the lowest 32
 437   * bits so this doesn't need to be uint64_t even with big files.
 438   */
 439  uint32_t pos;
 440
 441  /* x86 filter state */
 442  uint32_t x86_prev_mask;
 443
 444  /* Temporary space to hold the variables from struct xz_buf */
 445  uint8_t *out;
 446  size_t out_pos;
 447  size_t out_size;
 448
 449  struct {
 450    /* Amount of already filtered data in the beginning of buf */
 451    size_t filtered;
 452
 453    /* Total amount of data currently stored in buf  */
 454    size_t size;
 455
 456    /*
 457     * Buffer to hold a mix of filtered and unfiltered data. This
 458     * needs to be big enough to hold Alignment + 2 * Look-ahead:
 459     *
 460     * Type         Alignment   Look-ahead
 461     * x86              1           4
 462     * PowerPC          4           0
 463     * IA-64           16           0
 464     * ARM              4           0
 465     * ARM-Thumb        2           2
 466     * SPARC            4           0
 467     */
 468    uint8_t buf[16];
 469  } temp;
 470};
 471
 472/*
 473 * Decode the Filter ID of a BCJ filter. This implementation doesn't
 474 * support custom start offsets, so no decoding of Filter Properties
 475 * is needed. Returns XZ_OK if the given Filter ID is supported.
 476 * Otherwise XZ_OPTIONS_ERROR is returned.
 477 */
 478enum xz_ret xz_dec_bcj_reset(struct xz_dec_bcj *s, uint8_t id);
 479
 480/*
 481 * Decode raw BCJ + LZMA2 stream. This must be used only if there actually is
 482 * a BCJ filter in the chain. If the chain has only LZMA2, xz_dec_lzma2_run()
 483 * must be called directly.
 484 */
 485enum xz_ret xz_dec_bcj_run(struct xz_dec_bcj *s,
 486             struct xz_dec_lzma2 *lzma2,
 487             struct xz_buf *b);
 488
 489#ifdef XZ_DEC_X86
 490/*
 491 * This is used to test the most significant byte of a memory address
 492 * in an x86 instruction.
 493 */
 494static inline int bcj_x86_test_msbyte(uint8_t b)
 495{
 496  return b == 0x00 || b == 0xFF;
 497}
 498
 499static size_t bcj_x86(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
 500{
 501  static const int mask_to_allowed_status[8]
 502    = { 1,1,1,0,1,0,0,0 };
 503
 504  static const uint8_t mask_to_bit_num[8] = { 0, 1, 2, 2, 3, 3, 3, 3 };
 505
 506  size_t i;
 507  size_t prev_pos = (size_t)-1;
 508  uint32_t prev_mask = s->x86_prev_mask;
 509  uint32_t src;
 510  uint32_t dest;
 511  uint32_t j;
 512  uint8_t b;
 513
 514  if (size <= 4)
 515    return 0;
 516
 517  size -= 4;
 518  for (i = 0; i < size; ++i) {
 519    if ((buf[i] & 0xFE) != 0xE8)
 520      continue;
 521
 522    prev_pos = i - prev_pos;
 523    if (prev_pos > 3) {
 524      prev_mask = 0;
 525    } else {
 526      prev_mask = (prev_mask << (prev_pos - 1)) & 7;
 527      if (prev_mask != 0) {
 528        b = buf[i + 4 - mask_to_bit_num[prev_mask]];
 529        if (!mask_to_allowed_status[prev_mask]
 530            || bcj_x86_test_msbyte(b)) {
 531          prev_pos = i;
 532          prev_mask = (prev_mask << 1) | 1;
 533          continue;
 534        }
 535      }
 536    }
 537
 538    prev_pos = i;
 539
 540    if (bcj_x86_test_msbyte(buf[i + 4])) {
 541      src = get_unaligned_le32(buf + i + 1);
 542      for (;;) {
 543        dest = src - (s->pos + (uint32_t)i + 5);
 544        if (prev_mask == 0)
 545          break;
 546
 547        j = mask_to_bit_num[prev_mask] * 8;
 548        b = (uint8_t)(dest >> (24 - j));
 549        if (!bcj_x86_test_msbyte(b))
 550          break;
 551
 552        src = dest ^ (((uint32_t)1 << (32 - j)) - 1);
 553      }
 554
 555      dest &= 0x01FFFFFF;
 556      dest |= (uint32_t)0 - (dest & 0x01000000);
 557      put_unaligned_le32(dest, buf + i + 1);
 558      i += 4;
 559    } else {
 560      prev_mask = (prev_mask << 1) | 1;
 561    }
 562  }
 563
 564  prev_pos = i - prev_pos;
 565  s->x86_prev_mask = prev_pos > 3 ? 0 : prev_mask << (prev_pos - 1);
 566  return i;
 567}
 568#endif
 569
 570#ifdef XZ_DEC_POWERPC
 571static size_t bcj_powerpc(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
 572{
 573  size_t i;
 574  uint32_t instr;
 575
 576  for (i = 0; i + 4 <= size; i += 4) {
 577    instr = get_unaligned_be32(buf + i);
 578    if ((instr & 0xFC000003) == 0x48000001) {
 579      instr &= 0x03FFFFFC;
 580      instr -= s->pos + (uint32_t)i;
 581      instr &= 0x03FFFFFC;
 582      instr |= 0x48000001;
 583      put_unaligned_be32(instr, buf + i);
 584    }
 585  }
 586
 587  return i;
 588}
 589#endif
 590
 591#ifdef XZ_DEC_IA64
 592static size_t bcj_ia64(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
 593{
 594  static const uint8_t branch_table[32] = {
 595    0, 0, 0, 0, 0, 0, 0, 0,
 596    0, 0, 0, 0, 0, 0, 0, 0,
 597    4, 4, 6, 6, 0, 0, 7, 7,
 598    4, 4, 0, 0, 4, 4, 0, 0
 599  };
 600
 601  /*
 602   * The local variables take a little bit stack space, but it's less
 603   * than what LZMA2 decoder takes, so it doesn't make sense to reduce
 604   * stack usage here without doing that for the LZMA2 decoder too.
 605   */
 606
 607  /* Loop counters */
 608  size_t i;
 609  size_t j;
 610
 611  /* Instruction slot (0, 1, or 2) in the 128-bit instruction word */
 612  uint32_t slot;
 613
 614  /* Bitwise offset of the instruction indicated by slot */
 615  uint32_t bit_pos;
 616
 617  /* bit_pos split into byte and bit parts */
 618  uint32_t byte_pos;
 619  uint32_t bit_res;
 620
 621  /* Address part of an instruction */
 622  uint32_t addr;
 623
 624  /* Mask used to detect which instructions to convert */
 625  uint32_t mask;
 626
 627  /* 41-bit instruction stored somewhere in the lowest 48 bits */
 628  uint64_t instr;
 629
 630  /* Instruction normalized with bit_res for easier manipulation */
 631  uint64_t norm;
 632
 633  for (i = 0; i + 16 <= size; i += 16) {
 634    mask = branch_table[buf[i] & 0x1F];
 635    for (slot = 0, bit_pos = 5; slot < 3; ++slot, bit_pos += 41) {
 636      if (((mask >> slot) & 1) == 0)
 637        continue;
 638
 639      byte_pos = bit_pos >> 3;
 640      bit_res = bit_pos & 7;
 641      instr = 0;
 642      for (j = 0; j < 6; ++j)
 643        instr |= (uint64_t)(buf[i + j + byte_pos])
 644            << (8 * j);
 645
 646      norm = instr >> bit_res;
 647
 648      if (((norm >> 37) & 0x0F) == 0x05
 649          && ((norm >> 9) & 0x07) == 0) {
 650        addr = (norm >> 13) & 0x0FFFFF;
 651        addr |= ((uint32_t)(norm >> 36) & 1) << 20;
 652        addr <<= 4;
 653        addr -= s->pos + (uint32_t)i;
 654        addr >>= 4;
 655
 656        norm &= ~((uint64_t)0x8FFFFF << 13);
 657        norm |= (uint64_t)(addr & 0x0FFFFF) << 13;
 658        norm |= (uint64_t)(addr & 0x100000)
 659            << (36 - 20);
 660
 661        instr &= (1 << bit_res) - 1;
 662        instr |= norm << bit_res;
 663
 664        for (j = 0; j < 6; j++)
 665          buf[i + j + byte_pos]
 666            = (uint8_t)(instr >> (8 * j));
 667      }
 668    }
 669  }
 670
 671  return i;
 672}
 673#endif
 674
 675#ifdef XZ_DEC_ARM
 676static size_t bcj_arm(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
 677{
 678  size_t i;
 679  uint32_t addr;
 680
 681  for (i = 0; i + 4 <= size; i += 4) {
 682    if (buf[i + 3] == 0xEB) {
 683      addr = (uint32_t)buf[i] | ((uint32_t)buf[i + 1] << 8)
 684          | ((uint32_t)buf[i + 2] << 16);
 685      addr <<= 2;
 686      addr -= s->pos + (uint32_t)i + 8;
 687      addr >>= 2;
 688      buf[i] = (uint8_t)addr;
 689      buf[i + 1] = (uint8_t)(addr >> 8);
 690      buf[i + 2] = (uint8_t)(addr >> 16);
 691    }
 692  }
 693
 694  return i;
 695}
 696#endif
 697
 698#ifdef XZ_DEC_ARMTHUMB
 699static size_t bcj_armthumb(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
 700{
 701  size_t i;
 702  uint32_t addr;
 703
 704  for (i = 0; i + 4 <= size; i += 2) {
 705    if ((buf[i + 1] & 0xF8) == 0xF0
 706        && (buf[i + 3] & 0xF8) == 0xF8) {
 707      addr = (((uint32_t)buf[i + 1] & 0x07) << 19)
 708          | ((uint32_t)buf[i] << 11)
 709          | (((uint32_t)buf[i + 3] & 0x07) << 8)
 710          | (uint32_t)buf[i + 2];
 711      addr <<= 1;
 712      addr -= s->pos + (uint32_t)i + 4;
 713      addr >>= 1;
 714      buf[i + 1] = (uint8_t)(0xF0 | ((addr >> 19) & 0x07));
 715      buf[i] = (uint8_t)(addr >> 11);
 716      buf[i + 3] = (uint8_t)(0xF8 | ((addr >> 8) & 0x07));
 717      buf[i + 2] = (uint8_t)addr;
 718      i += 2;
 719    }
 720  }
 721
 722  return i;
 723}
 724#endif
 725
 726#ifdef XZ_DEC_SPARC
 727static size_t bcj_sparc(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
 728{
 729  size_t i;
 730  uint32_t instr;
 731
 732  for (i = 0; i + 4 <= size; i += 4) {
 733    instr = get_unaligned_be32(buf + i);
 734    if ((instr >> 22) == 0x100 || (instr >> 22) == 0x1FF) {
 735      instr <<= 2;
 736      instr -= s->pos + (uint32_t)i;
 737      instr >>= 2;
 738      instr = ((uint32_t)0x40000000 - (instr & 0x400000))
 739          | 0x40000000 | (instr & 0x3FFFFF);
 740      put_unaligned_be32(instr, buf + i);
 741    }
 742  }
 743
 744  return i;
 745}
 746#endif
 747
 748/*
 749 * Apply the selected BCJ filter. Update *pos and s->pos to match the amount
 750 * of data that got filtered.
 751 *
 752 * NOTE: This is implemented as a switch statement to avoid using function
 753 * pointers, which could be problematic in the kernel boot code, which must
 754 * avoid pointers to static data (at least on x86).
 755 */
 756static void bcj_apply(struct xz_dec_bcj *s,
 757          uint8_t *buf, size_t *pos, size_t size)
 758{
 759  size_t filtered;
 760
 761  buf += *pos;
 762  size -= *pos;
 763
 764  switch (s->type) {
 765#ifdef XZ_DEC_X86
 766  case BCJ_X86:
 767    filtered = bcj_x86(s, buf, size);
 768    break;
 769#endif
 770#ifdef XZ_DEC_POWERPC
 771  case BCJ_POWERPC:
 772    filtered = bcj_powerpc(s, buf, size);
 773    break;
 774#endif
 775#ifdef XZ_DEC_IA64
 776  case BCJ_IA64:
 777    filtered = bcj_ia64(s, buf, size);
 778    break;
 779#endif
 780#ifdef XZ_DEC_ARM
 781  case BCJ_ARM:
 782    filtered = bcj_arm(s, buf, size);
 783    break;
 784#endif
 785#ifdef XZ_DEC_ARMTHUMB
 786  case BCJ_ARMTHUMB:
 787    filtered = bcj_armthumb(s, buf, size);
 788    break;
 789#endif
 790#ifdef XZ_DEC_SPARC
 791  case BCJ_SPARC:
 792    filtered = bcj_sparc(s, buf, size);
 793    break;
 794#endif
 795  default:
 796    /* Never reached but silence compiler warnings. */
 797    filtered = 0;
 798    break;
 799  }
 800
 801  *pos += filtered;
 802  s->pos += filtered;
 803}
 804
 805/*
 806 * Flush pending filtered data from temp to the output buffer.
 807 * Move the remaining mixture of possibly filtered and unfiltered
 808 * data to the beginning of temp.
 809 */
 810static void bcj_flush(struct xz_dec_bcj *s, struct xz_buf *b)
 811{
 812  size_t copy_size;
 813
 814  copy_size = minof(s->temp.filtered, b->out_size - b->out_pos);
 815  memcpy(b->out + b->out_pos, s->temp.buf, copy_size);
 816  b->out_pos += copy_size;
 817
 818  s->temp.filtered -= copy_size;
 819  s->temp.size -= copy_size;
 820  memmove(s->temp.buf, s->temp.buf + copy_size, s->temp.size);
 821}
 822
 823/*
 824 * The BCJ filter functions are primitive in sense that they process the
 825 * data in chunks of 1-16 bytes. To hide this issue, this function does
 826 * some buffering.
 827 */
 828enum xz_ret xz_dec_bcj_run(struct xz_dec_bcj *s,
 829             struct xz_dec_lzma2 *lzma2,
 830             struct xz_buf *b)
 831{
 832  size_t out_start;
 833
 834  /*
 835   * Flush pending already filtered data to the output buffer. Return
 836   * immediatelly if we couldn't flush everything, or if the next
 837   * filter in the chain had already returned XZ_STREAM_END.
 838   */
 839  if (s->temp.filtered > 0) {
 840    bcj_flush(s, b);
 841    if (s->temp.filtered > 0)
 842      return XZ_OK;
 843
 844    if (s->ret == XZ_STREAM_END)
 845      return XZ_STREAM_END;
 846  }
 847
 848  /*
 849   * If we have more output space than what is currently pending in
 850   * temp, copy the unfiltered data from temp to the output buffer
 851   * and try to fill the output buffer by decoding more data from the
 852   * next filter in the chain. Apply the BCJ filter on the new data
 853   * in the output buffer. If everything cannot be filtered, copy it
 854   * to temp and rewind the output buffer position accordingly.
 855   *
 856   * This needs to be always run when temp.size == 0 to handle a special
 857   * case where the output buffer is full and the next filter has no
 858   * more output coming but hasn't returned XZ_STREAM_END yet.
 859   */
 860  if (s->temp.size < b->out_size - b->out_pos || s->temp.size == 0) {
 861    out_start = b->out_pos;
 862    memcpy(b->out + b->out_pos, s->temp.buf, s->temp.size);
 863    b->out_pos += s->temp.size;
 864
 865    s->ret = xz_dec_lzma2_run(lzma2, b);
 866    if (s->ret != XZ_STREAM_END
 867        && (s->ret != XZ_OK ))
 868      return s->ret;
 869
 870    bcj_apply(s, b->out, &out_start, b->out_pos);
 871
 872    /*
 873     * As an exception, if the next filter returned XZ_STREAM_END,
 874     * we can do that too, since the last few bytes that remain
 875     * unfiltered are meant to remain unfiltered.
 876     */
 877    if (s->ret == XZ_STREAM_END)
 878      return XZ_STREAM_END;
 879
 880    s->temp.size = b->out_pos - out_start;
 881    b->out_pos -= s->temp.size;
 882    memcpy(s->temp.buf, b->out + b->out_pos, s->temp.size);
 883
 884    /*
 885     * If there wasn't enough input to the next filter to fill
 886     * the output buffer with unfiltered data, there's no point
 887     * to try decoding more data to temp.
 888     */
 889    if (b->out_pos + s->temp.size < b->out_size)
 890      return XZ_OK;
 891  }
 892
 893  /*
 894   * We have unfiltered data in temp. If the output buffer isn't full
 895   * yet, try to fill the temp buffer by decoding more data from the
 896   * next filter. Apply the BCJ filter on temp. Then we hopefully can
 897   * fill the actual output buffer by copying filtered data from temp.
 898   * A mix of filtered and unfiltered data may be left in temp; it will
 899   * be taken care on the next call to this function.
 900   */
 901  if (b->out_pos < b->out_size) {
 902    /* Make b->out{,_pos,_size} temporarily point to s->temp. */
 903    s->out = b->out;
 904    s->out_pos = b->out_pos;
 905    s->out_size = b->out_size;
 906    b->out = s->temp.buf;
 907    b->out_pos = s->temp.size;
 908    b->out_size = sizeof(s->temp.buf);
 909
 910    s->ret = xz_dec_lzma2_run(lzma2, b);
 911
 912    s->temp.size = b->out_pos;
 913    b->out = s->out;
 914    b->out_pos = s->out_pos;
 915    b->out_size = s->out_size;
 916
 917    if (s->ret != XZ_OK && s->ret != XZ_STREAM_END)
 918      return s->ret;
 919
 920    bcj_apply(s, s->temp.buf, &s->temp.filtered, s->temp.size);
 921
 922    /*
 923     * If the next filter returned XZ_STREAM_END, we mark that
 924     * everything is filtered, since the last unfiltered bytes
 925     * of the stream are meant to be left as is.
 926     */
 927    if (s->ret == XZ_STREAM_END)
 928      s->temp.filtered = s->temp.size;
 929
 930    bcj_flush(s, b);
 931    if (s->temp.filtered > 0)
 932      return XZ_OK;
 933  }
 934
 935  return s->ret;
 936}
 937
 938enum xz_ret xz_dec_bcj_reset(struct xz_dec_bcj *s, uint8_t id)
 939{
 940  switch (id) {
 941#ifdef XZ_DEC_X86
 942  case BCJ_X86:
 943#endif
 944#ifdef XZ_DEC_POWERPC
 945  case BCJ_POWERPC:
 946#endif
 947#ifdef XZ_DEC_IA64
 948  case BCJ_IA64:
 949#endif
 950#ifdef XZ_DEC_ARM
 951  case BCJ_ARM:
 952#endif
 953#ifdef XZ_DEC_ARMTHUMB
 954  case BCJ_ARMTHUMB:
 955#endif
 956#ifdef XZ_DEC_SPARC
 957  case BCJ_SPARC:
 958#endif
 959    break;
 960
 961  default:
 962    /* Unsupported Filter ID */
 963    return XZ_OPTIONS_ERROR;
 964  }
 965
 966  s->type = id;
 967  s->ret = XZ_OK;
 968  s->pos = 0;
 969  s->x86_prev_mask = 0;
 970  s->temp.filtered = 0;
 971  s->temp.size = 0;
 972
 973  return XZ_OK;
 974}
 975
 976#endif
 977/*
 978 * LZMA2 decoder
 979 */
 980
 981
 982// BEGIN xz_lzma2.h
 983/*
 984 * LZMA2 definitions
 985 *
 986 */
 987
 988
 989/* Range coder constants */
 990#define RC_SHIFT_BITS 8
 991#define RC_TOP_BITS 24
 992#define RC_TOP_VALUE (1 << RC_TOP_BITS)
 993#define RC_BIT_MODEL_TOTAL_BITS 11
 994#define RC_BIT_MODEL_TOTAL (1 << RC_BIT_MODEL_TOTAL_BITS)
 995#define RC_MOVE_BITS 5
 996
 997/*
 998 * Maximum number of position states. A position state is the lowest pb
 999 * number of bits of the current uncompressed offset. In some places there
1000 * are different sets of probabilities for different position states.
1001 */
1002#define POS_STATES_MAX (1 << 4)
1003
1004/*
1005 * This enum is used to track which LZMA symbols have occurred most recently
1006 * and in which order. This information is used to predict the next symbol.
1007 *
1008 * Symbols:
1009 *  - Literal: One 8-bit byte
1010 *  - Match: Repeat a chunk of data at some distance
1011 *  - Long repeat: Multi-byte match at a recently seen distance
1012 *  - Short repeat: One-byte repeat at a recently seen distance
1013 *
1014 * The symbol names are in from STATE_oldest_older_previous. REP means
1015 * either short or long repeated match, and NONLIT means any non-literal.
1016 */
1017enum lzma_state {
1018  STATE_LIT_LIT,
1019  STATE_MATCH_LIT_LIT,
1020  STATE_REP_LIT_LIT,
1021  STATE_SHORTREP_LIT_LIT,
1022  STATE_MATCH_LIT,
1023  STATE_REP_LIT,
1024  STATE_SHORTREP_LIT,
1025  STATE_LIT_MATCH,
1026  STATE_LIT_LONGREP,
1027  STATE_LIT_SHORTREP,
1028  STATE_NONLIT_MATCH,
1029  STATE_NONLIT_REP
1030};
1031
1032/* Total number of states */
1033#define STATES 12
1034
1035/* The lowest 7 states indicate that the previous state was a literal. */
1036#define LIT_STATES 7
1037
1038/* Indicate that the latest symbol was a literal. */
1039static inline void lzma_state_literal(enum lzma_state *state)
1040{
1041  if (*state <= STATE_SHORTREP_LIT_LIT)
1042    *state = STATE_LIT_LIT;
1043  else if (*state <= STATE_LIT_SHORTREP)
1044    *state -= 3;
1045  else
1046    *state -= 6;
1047}
1048
1049/* Indicate that the latest symbol was a match. */
1050static inline void lzma_state_match(enum lzma_state *state)
1051{
1052  *state = *state < LIT_STATES ? STATE_LIT_MATCH : STATE_NONLIT_MATCH;
1053}
1054
1055/* Indicate that the latest state was a long repeated match. */
1056static inline void lzma_state_long_rep(enum lzma_state *state)
1057{
1058  *state = *state < LIT_STATES ? STATE_LIT_LONGREP : STATE_NONLIT_REP;
1059}
1060
1061/* Indicate that the latest symbol was a short match. */
1062static inline void lzma_state_short_rep(enum lzma_state *state)
1063{
1064  *state = *state < LIT_STATES ? STATE_LIT_SHORTREP : STATE_NONLIT_REP;
1065}
1066
1067/* Test if the previous symbol was a literal. */
1068static inline int lzma_state_is_literal(enum lzma_state state)
1069{
1070  return state < LIT_STATES;
1071}
1072
1073/* Each literal coder is divided in three sections:
1074 *   - 0x001-0x0FF: Without match byte
1075 *   - 0x101-0x1FF: With match byte; match bit is 0
1076 *   - 0x201-0x2FF: With match byte; match bit is 1
1077 *
1078 * Match byte is used when the previous LZMA symbol was something else than
1079 * a literal (that is, it was some kind of match).
1080 */
1081#define LITERAL_CODER_SIZE 0x300
1082
1083/* Maximum number of literal coders */
1084#define LITERAL_CODERS_MAX (1 << 4)
1085
1086/* Minimum length of a match is two bytes. */
1087#define MATCH_LEN_MIN 2
1088
1089/* Match length is encoded with 4, 5, or 10 bits.
1090 *
1091 * Length   Bits
1092 *  2-9      4 = Choice=0 + 3 bits
1093 * 10-17     5 = Choice=1 + Choice2=0 + 3 bits
1094 * 18-273   10 = Choice=1 + Choice2=1 + 8 bits
1095 */
1096#define LEN_LOW_BITS 3
1097#define LEN_LOW_SYMBOLS (1 << LEN_LOW_BITS)
1098#define LEN_MID_BITS 3
1099#define LEN_MID_SYMBOLS (1 << LEN_MID_BITS)
1100#define LEN_HIGH_BITS 8
1101#define LEN_HIGH_SYMBOLS (1 << LEN_HIGH_BITS)
1102#define LEN_SYMBOLS (LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS + LEN_HIGH_SYMBOLS)
1103
1104/*
1105 * Maximum length of a match is 273 which is a result of the encoding
1106 * described above.
1107 */
1108#define MATCH_LEN_MAX (MATCH_LEN_MIN + LEN_SYMBOLS - 1)
1109
1110/*
1111 * Different sets of probabilities are used for match distances that have
1112 * very short match length: Lengths of 2, 3, and 4 bytes have a separate
1113 * set of probabilities for each length. The matches with longer length
1114 * use a shared set of probabilities.
1115 */
1116#define DIST_STATES 4
1117
1118/*
1119 * Get the index of the appropriate probability array for decoding
1120 * the distance slot.
1121 */
1122static inline uint32_t lzma_get_dist_state(uint32_t len)
1123{
1124  return len < DIST_STATES + MATCH_LEN_MIN
1125      ? len - MATCH_LEN_MIN : DIST_STATES - 1;
1126}
1127
1128/*
1129 * The highest two bits of a 32-bit match distance are encoded using six bits.
1130 * This six-bit value is called a distance slot. This way encoding a 32-bit
1131 * value takes 6-36 bits, larger values taking more bits.
1132 */
1133#define DIST_SLOT_BITS 6
1134#define DIST_SLOTS (1 << DIST_SLOT_BITS)
1135
1136/* Match distances up to 127 are fully encoded using probabilities. Since
1137 * the highest two bits (distance slot) are always encoded using six bits,
1138 * the distances 0-3 don't need any additional bits to encode, since the
1139 * distance slot itself is the same as the actual distance. DIST_MODEL_START
1140 * indicates the first distance slot where at least one additional bit is
1141 * needed.
1142 */
1143#define DIST_MODEL_START 4
1144
1145/*
1146 * Match distances greater than 127 are encoded in three pieces:
1147 *   - distance slot: the highest two bits
1148 *   - direct bits: 2-26 bits below the highest two bits
1149 *   - alignment bits: four lowest bits
1150 *
1151 * Direct bits don't use any probabilities.
1152 *
1153 * The distance slot value of 14 is for distances 128-191.
1154 */
1155#define DIST_MODEL_END 14
1156
1157/* Distance slots that indicate a distance <= 127. */
1158#define FULL_DISTANCES_BITS (DIST_MODEL_END / 2)
1159#define FULL_DISTANCES (1 << FULL_DISTANCES_BITS)
1160
1161/*
1162 * For match distances greater than 127, only the highest two bits and the
1163 * lowest four bits (alignment) is encoded using probabilities.
1164 */
1165#define ALIGN_BITS 4
1166#define ALIGN_SIZE (1 << ALIGN_BITS)
1167#define ALIGN_MASK (ALIGN_SIZE - 1)
1168
1169/* Total number of all probability variables */
1170#define PROBS_TOTAL (1846 + LITERAL_CODERS_MAX * LITERAL_CODER_SIZE)
1171
1172/*
1173 * LZMA remembers the four most recent match distances. Reusing these
1174 * distances tends to take less space than re-encoding the actual
1175 * distance value.
1176 */
1177#define REPS 4
1178
1179
1180// END xz_lzma2.h
1181
1182/*
1183 * Range decoder initialization eats the first five bytes of each LZMA chunk.
1184 */
1185#define RC_INIT_BYTES 5
1186
1187/*
1188 * Minimum number of usable input buffer to safely decode one LZMA symbol.
1189 * The worst case is that we decode 22 bits using probabilities and 26
1190 * direct bits. This may decode at maximum of 20 bytes of input. However,
1191 * lzma_main() does an extra normalization before returning, thus we
1192 * need to put 21 here.
1193 */
1194#define LZMA_IN_REQUIRED 21
1195
1196/*
1197 * Dictionary (history buffer)
1198 *
1199 * These are always true:
1200 *    start <= pos <= full <= end
1201 *    pos <= limit <= end
1202 *    end == size
1203 *    size <= size_max
1204 *    allocated <= size
1205 *
1206 * Most of these variables are size_t as a relic of single-call mode,
1207 * in which the dictionary variables address the actual output
1208 * buffer directly.
1209 */
1210struct dictionary {
1211  /* Beginning of the history buffer */
1212  uint8_t *buf;
1213
1214  /* Old position in buf (before decoding more data) */
1215  size_t start;
1216
1217  /* Position in buf */
1218  size_t pos;
1219
1220  /*
1221   * How full dictionary is. This is used to detect corrupt input that
1222   * would read beyond the beginning of the uncompressed stream.
1223   */
1224  size_t full;
1225
1226  /* Write limit; we don't write to buf[limit] or later bytes. */
1227  size_t limit;
1228
1229  /* End of the dictionary buffer. This is the same as the dictionary size. */
1230  size_t end;
1231
1232  /*
1233   * Size of the dictionary as specified in Block Header. This is used
1234   * together with "full" to detect corrupt input that would make us
1235   * read beyond the beginning of the uncompressed stream.
1236   */
1237  uint32_t size;
1238
1239  /*
1240   * Maximum allowed dictionary size.
1241   */
1242  uint32_t size_max;
1243
1244  /*
1245   * Amount of memory currently allocated for the dictionary.
1246   */
1247  uint32_t allocated;
1248};
1249
1250/* Range decoder */
1251struct rc_dec {
1252  uint32_t range;
1253  uint32_t code;
1254
1255  /*
1256   * Number of initializing bytes remaining to be read
1257   * by rc_read_init().
1258   */
1259  uint32_t init_bytes_left;
1260
1261  /*
1262   * Buffer from which we read our input. It can be either
1263   * temp.buf or the caller-provided input buffer.
1264   */
1265  const uint8_t *in;
1266  size_t in_pos;
1267  size_t in_limit;
1268};
1269
1270/* Probabilities for a length decoder. */
1271struct lzma_len_dec {
1272  /* Probability of match length being at least 10 */
1273  uint16_t choice;
1274
1275  /* Probability of match length being at least 18 */
1276  uint16_t choice2;
1277
1278  /* Probabilities for match lengths 2-9 */
1279  uint16_t low[POS_STATES_MAX][LEN_LOW_SYMBOLS];
1280
1281  /* Probabilities for match lengths 10-17 */
1282  uint16_t mid[POS_STATES_MAX][LEN_MID_SYMBOLS];
1283
1284  /* Probabilities for match lengths 18-273 */
1285  uint16_t high[LEN_HIGH_SYMBOLS];
1286};
1287
1288struct lzma_dec {
1289  /* Distances of latest four matches */
1290  uint32_t rep0;
1291  uint32_t rep1;
1292  uint32_t rep2;
1293  uint32_t rep3;
1294
1295  /* Types of the most recently seen LZMA symbols */
1296  enum lzma_state state;
1297
1298  /*
1299   * Length of a match. This is updated so that dict_repeat can
1300   * be called again to finish repeating the whole match.
1301   */
1302  uint32_t len;
1303
1304  /*
1305   * LZMA properties or related bit masks (number of literal
1306   * context bits, a mask dervied from the number of literal
1307   * position bits, and a mask dervied from the number
1308   * position bits)
1309   */
1310  uint32_t lc;
1311  uint32_t literal_pos_mask; /* (1 << lp) - 1 */
1312  uint32_t pos_mask;         /* (1 << pb) - 1 */
1313
1314  /* If 1, it's a match. Otherwise it's a single 8-bit literal. */
1315  uint16_t is_match[STATES][POS_STATES_MAX];
1316
1317  /* If 1, it's a repeated match. The distance is one of rep0 .. rep3. */
1318  uint16_t is_rep[STATES];
1319
1320  /*
1321   * If 0, distance of a repeated match is rep0.
1322   * Otherwise check is_rep1.
1323   */
1324  uint16_t is_rep0[STATES];
1325
1326  /*
1327   * If 0, distance of a repeated match is rep1.
1328   * Otherwise check is_rep2.
1329   */
1330  uint16_t is_rep1[STATES];
1331
1332  /* If 0, distance of a repeated match is rep2. Otherwise it is rep3. */
1333  uint16_t is_rep2[STATES];
1334
1335  /*
1336   * If 1, the repeated match has length of one byte. Otherwise
1337   * the length is decoded from rep_len_decoder.
1338   */
1339  uint16_t is_rep0_long[STATES][POS_STATES_MAX];
1340
1341  /*
1342   * Probability tree for the highest two bits of the match
1343   * distance. There is a separate probability tree for match
1344   * lengths of 2 (i.e. MATCH_LEN_MIN), 3, 4, and [5, 273].
1345   */
1346  uint16_t dist_slot[DIST_STATES][DIST_SLOTS];
1347
1348  /*
1349   * Probility trees for additional bits for match distance
1350   * when the distance is in the range [4, 127].
1351   */
1352  uint16_t dist_special[FULL_DISTANCES - DIST_MODEL_END];
1353
1354  /*
1355   * Probability tree for the lowest four bits of a match
1356   * distance that is equal to or greater than 128.
1357   */
1358  uint16_t dist_align[ALIGN_SIZE];
1359
1360  /* Length of a normal match */
1361  struct lzma_len_dec match_len_dec;
1362
1363  /* Length of a repeated match */
1364  struct lzma_len_dec rep_len_dec;
1365
1366  /* Probabilities of literals */
1367  uint16_t literal[LITERAL_CODERS_MAX][LITERAL_CODER_SIZE];
1368};
1369
1370struct lzma2_dec {
1371  /* Position in xz_dec_lzma2_run(). */
1372  enum lzma2_seq {
1373    SEQ_CONTROL,
1374    SEQ_UNCOMPRESSED_1,
1375    SEQ_UNCOMPRESSED_2,
1376    SEQ_COMPRESSED_0,
1377    SEQ_COMPRESSED_1,
1378    SEQ_PROPERTIES,
1379    SEQ_LZMA_PREPARE,
1380    SEQ_LZMA_RUN,
1381    SEQ_COPY
1382  } sequence;
1383
1384  /* Next position after decoding the compressed size of the chunk. */
1385  enum lzma2_seq next_sequence;
1386
1387  /* Uncompressed size of LZMA chunk (2 MiB at maximum) */
1388  uint32_t uncompressed;
1389
1390  /*
1391   * Compressed size of LZMA chunk or compressed/uncompressed
1392   * size of uncompressed chunk (64 KiB at maximum)
1393   */
1394  uint32_t compressed;
1395
1396  /*
1397   * True if dictionary reset is needed. This is false before
1398   * the first chunk (LZMA or uncompressed).
1399   */
1400  int need_dict_reset;
1401
1402  /*
1403   * True if new LZMA properties are needed. This is false
1404   * before the first LZMA chunk.
1405   */
1406  int need_props;
1407};
1408
1409struct xz_dec_lzma2 {
1410  /*
1411   * The order below is important on x86 to reduce code size and
1412   * it shouldn't hurt on other platforms. Everything up to and
1413   * including lzma.pos_mask are in the first 128 bytes on x86-32,
1414   * which allows using smaller instructions to access those
1415   * variables. On x86-64, fewer variables fit into the first 128
1416   * bytes, but this is still the best order without sacrificing
1417   * the readability by splitting the structures.
1418   */
1419  struct rc_dec rc;
1420  struct dictionary dict;
1421  struct lzma2_dec lzma2;
1422  struct lzma_dec lzma;
1423
1424  /*
1425   * Temporary buffer which holds small number of input bytes between
1426   * decoder calls. See lzma2_lzma() for details.
1427   */
1428  struct {
1429    uint32_t size;
1430    uint8_t buf[3 * LZMA_IN_REQUIRED];
1431  } temp;
1432};
1433
1434/**************
1435 * Dictionary *
1436 **************/
1437
1438/* Reset the dictionary state. */
1439static void dict_reset(struct dictionary *dict)
1440{
1441  dict->start = 0;
1442  dict->pos = 0;
1443  dict->limit = 0;
1444  dict->full = 0;
1445}
1446
1447/* Set dictionary write limit */
1448static void dict_limit(struct dictionary *dict, size_t out_max)
1449{
1450  if (dict->end - dict->pos <= out_max)
1451    dict->limit = dict->end;
1452  else
1453    dict->limit = dict->pos + out_max;
1454}
1455
1456/* Return true if at least one byte can be written into the dictionary. */
1457static inline int dict_has_space(const struct dictionary *dict)
1458{
1459  return dict->pos < dict->limit;
1460}
1461
1462/*
1463 * Get a byte from the dictionary at the given distance. The distance is
1464 * assumed to valid, or as a special case, zero when the dictionary is
1465 * still empty. This special case is needed for single-call decoding to
1466 * avoid writing a '\0' to the end of the destination buffer.
1467 */
1468static inline uint32_t dict_get(const struct dictionary *dict, uint32_t dist)
1469{
1470  size_t offset = dict->pos - dist - 1;
1471
1472  if (dist >= dict->pos)
1473    offset += dict->end;
1474
1475  return dict->full > 0 ? dict->buf[offset] : 0;
1476}
1477
1478/*
1479 * Put one byte into the dictionary. It is assumed that there is space for it.
1480 */
1481static inline void dict_put(struct dictionary *dict, uint8_t byte)
1482{
1483  dict->buf[dict->pos++] = byte;
1484
1485  if (dict->full < dict->pos)
1486    dict->full = dict->pos;
1487}
1488
1489/*
1490 * Repeat given number of bytes from the given distance. If the distance is
1491 * invalid, false is returned. On success, true is returned and *len is
1492 * updated to indicate how many bytes were left to be repeated.
1493 */
1494static int dict_repeat(struct dictionary *dict, uint32_t *len, uint32_t dist)
1495{
1496  size_t back;
1497  uint32_t left;
1498
1499  if (dist >= dict->full || dist >= dict->size) return 0;
1500
1501  left = minof(dict->limit - dict->pos, *len);
1502  *len -= left;
1503
1504  back = dict->pos - dist - 1;
1505  if (dist >= dict->pos)
1506    back += dict->end;
1507
1508  do {
1509    dict->buf[dict->pos++] = dict->buf[back++];
1510    if (back == dict->end)
1511      back = 0;
1512  } while (--left > 0);
1513
1514  if (dict->full < dict->pos)
1515    dict->full = dict->pos;
1516
1517  return 1;
1518}
1519
1520/* Copy uncompressed data as is from input to dictionary and output buffers. */
1521static void dict_uncompressed(struct dictionary *dict, struct xz_buf *b,
1522            uint32_t *left)
1523{
1524  size_t copy_size;
1525
1526  while (*left > 0 && b->in_pos < b->in_size
1527      && b->out_pos < b->out_size) {
1528    copy_size = minof(b->in_size - b->in_pos,
1529        b->out_size - b->out_pos);
1530    if (copy_size > dict->end - dict->pos)
1531      copy_size = dict->end - dict->pos;
1532    if (copy_size > *left)
1533      copy_size = *left;
1534
1535    *left -= copy_size;
1536
1537    memcpy(dict->buf + dict->pos, b->in + b->in_pos, copy_size);
1538    dict->pos += copy_size;
1539
1540    if (dict->full < dict->pos)
1541      dict->full = dict->pos;
1542
1543    if (dict->pos == dict->end)
1544      dict->pos = 0;
1545
1546    memcpy(b->out + b->out_pos, b->in + b->in_pos,
1547        copy_size);
1548
1549    dict->start = dict->pos;
1550
1551    b->out_pos += copy_size;
1552    b->in_pos += copy_size;
1553  }
1554}
1555
1556/*
1557 * Flush pending data from dictionary to b->out. It is assumed that there is
1558 * enough space in b->out. This is guaranteed because caller uses dict_limit()
1559 * before decoding data into the dictionary.
1560 */
1561static uint32_t dict_flush(struct dictionary *dict, struct xz_buf *b)
1562{
1563  size_t copy_size = dict->pos - dict->start;
1564
1565  if (dict->pos == dict->end)
1566    dict->pos = 0;
1567
1568  memcpy(b->out + b->out_pos, dict->buf + dict->start,
1569      copy_size);
1570
1571  dict->start = dict->pos;
1572  b->out_pos += copy_size;
1573  return copy_size;
1574}
1575
1576/*****************
1577 * Range decoder *
1578 *****************/
1579
1580/* Reset the range decoder. */
1581static void rc_reset(struct rc_dec *rc)
1582{
1583  rc->range = (uint32_t)-1;
1584  rc->code = 0;
1585  rc->init_bytes_left = RC_INIT_BYTES;
1586}
1587
1588/*
1589 * Read the first five initial bytes into rc->code if they haven't been
1590 * read already. (Yes, the first byte gets completely ignored.)
1591 */
1592static int rc_read_init(struct rc_dec *rc, struct xz_buf *b)
1593{
1594  while (rc->init_bytes_left > 0) {
1595    if (b->in_pos == b->in_size) return 0;
1596
1597    rc->code = (rc->code << 8) + b->in[b->in_pos++];
1598    --rc->init_bytes_left;
1599  }
1600
1601  return 1;
1602}
1603
1604/* Return true if there may not be enough input for the next decoding loop. */
1605static inline int rc_limit_exceeded(const struct rc_dec *rc)
1606{
1607  return rc->in_pos > rc->in_limit;
1608}
1609
1610/*
1611 * Return true if it is possible (from point of view of range decoder) that
1612 * we have reached the end of the LZMA chunk.
1613 */
1614static inline int rc_is_finished(const struct rc_dec *rc)
1615{
1616  return rc->code == 0;
1617}
1618
1619/* Read the next input byte if needed. */
1620static inline void rc_normalize(struct rc_dec *rc)
1621{
1622  if (rc->range < RC_TOP_VALUE) {
1623    rc->range <<= RC_SHIFT_BITS;
1624    rc->code = (rc->code << RC_SHIFT_BITS) + rc->in[rc->in_pos++];
1625  }
1626}
1627
1628/*
1629 * Decode one bit. In some versions, this function has been splitted in three
1630 * functions so that the compiler is supposed to be able to more easily avoid
1631 * an extra branch. In this particular version of the LZMA decoder, this
1632 * doesn't seem to be a good idea (tested with GCC 3.3.6, 3.4.6, and 4.3.3
1633 * on x86). Using a non-splitted version results in nicer looking code too.
1634 *
1635 * NOTE: This must return an int. Do not make it return a bool or the speed
1636 * of the code generated by GCC 3.x decreases 10-15 %. (GCC 4.3 doesn't care,
1637 * and it generates 10-20 % faster code than GCC 3.x from this file anyway.)
1638 */
1639static inline int rc_bit(struct rc_dec *rc, uint16_t *prob)
1640{
1641  uint32_t bound;
1642  int bit;
1643
1644  rc_normalize(rc);
1645  bound = (rc->range >> RC_BIT_MODEL_TOTAL_BITS) * *prob;
1646  if (rc->code < bound) {
1647    rc->range = bound;
1648    *prob += (RC_BIT_MODEL_TOTAL - *prob) >> RC_MOVE_BITS;
1649    bit = 0;
1650  } else {
1651    rc->range -= bound;
1652    rc->code -= bound;
1653    *prob -= *prob >> RC_MOVE_BITS;
1654    bit = 1;
1655  }
1656
1657  return bit;
1658}
1659
1660/* Decode a bittree starting from the most significant bit. */
1661static inline uint32_t rc_bittree(struct rc_dec *rc,
1662             uint16_t *probs, uint32_t limit)
1663{
1664  uint32_t symbol = 1;
1665
1666  do {
1667    if (rc_bit(rc, &probs[symbol]))
1668      symbol = (symbol << 1) + 1;
1669    else
1670      symbol <<= 1;
1671  } while (symbol < limit);
1672
1673  return symbol;
1674}
1675
1676/* Decode a bittree starting from the least significant bit. */
1677static inline void rc_bittree_reverse(struct rc_dec *rc,
1678                 uint16_t *probs,
1679                 uint32_t *dest, uint32_t limit)
1680{
1681  uint32_t symbol = 1;
1682  uint32_t i = 0;
1683
1684  do {
1685    if (rc_bit(rc, &probs[symbol])) {
1686      symbol = (symbol << 1) + 1;
1687      *dest += 1 << i;
1688    } else {
1689      symbol <<= 1;
1690    }
1691  } while (++i < limit);
1692}
1693
1694/* Decode direct bits (fixed fifty-fifty probability) */
1695static inline void rc_direct(struct rc_dec *rc, uint32_t *dest, uint32_t limit)
1696{
1697  uint32_t mask;
1698
1699  do {
1700    rc_normalize(rc);
1701    rc->range >>= 1;
1702    rc->code -= rc->range;
1703    mask = (uint32_t)0 - (rc->code >> 31);
1704    rc->code += rc->range & mask;
1705    *dest = (*dest << 1) + (mask + 1);
1706  } while (--limit > 0);
1707}
1708
1709/********
1710 * LZMA *
1711 ********/
1712
1713/* Get pointer to literal coder probability array. */
1714static uint16_t *lzma_literal_probs(struct xz_dec_lzma2 *s)
1715{
1716  uint32_t prev_byte = dict_get(&s->dict, 0);
1717  uint32_t low = prev_byte >> (8 - s->lzma.lc);
1718  uint32_t high = (s->dict.pos & s->lzma.literal_pos_mask) << s->lzma.lc;
1719  return s->lzma.literal[low + high];
1720}
1721
1722/* Decode a literal (one 8-bit byte) */
1723static void lzma_literal(struct xz_dec_lzma2 *s)
1724{
1725  uint16_t *probs;
1726  uint32_t symbol;
1727  uint32_t match_byte;
1728  uint32_t match_bit;
1729  uint32_t offset;
1730  uint32_t i;
1731
1732  probs = lzma_literal_probs(s);
1733
1734  if (lzma_state_is_literal(s->lzma.state)) {
1735    symbol = rc_bittree(&s->rc, probs, 0x100);
1736  } else {
1737    symbol = 1;
1738    match_byte = dict_get(&s->dict, s->lzma.rep0) << 1;
1739    offset = 0x100;
1740
1741    do {
1742      match_bit = match_byte & offset;
1743      match_byte <<= 1;
1744      i = offset + match_bit + symbol;
1745
1746      if (rc_bit(&s->rc, &probs[i])) {
1747        symbol = (symbol << 1) + 1;
1748        offset &= match_bit;
1749      } else {
1750        symbol <<= 1;
1751        offset &= ~match_bit;
1752      }
1753    } while (symbol < 0x100);
1754  }
1755
1756  dict_put(&s->dict, (uint8_t)symbol);
1757  lzma_state_literal(&s->lzma.state);
1758}
1759
1760/* Decode the length of the match into s->lzma.len. */
1761static void lzma_len(struct xz_dec_lzma2 *s, struct lzma_len_dec *l,
1762         uint32_t pos_state)
1763{
1764  uint16_t *probs;
1765  uint32_t limit;
1766
1767  if (!rc_bit(&s->rc, &l->choice)) {
1768    probs = l->low[pos_state];
1769    limit = LEN_LOW_SYMBOLS;
1770    s->lzma.len = MATCH_LEN_MIN;
1771  } else {
1772    if (!rc_bit(&s->rc, &l->choice2)) {
1773      probs = l->mid[pos_state];
1774      limit = LEN_MID_SYMBOLS;
1775      s->lzma.len = MATCH_LEN_MIN + LEN_LOW_SYMBOLS;
1776    } else {
1777      probs = l->high;
1778      limit = LEN_HIGH_SYMBOLS;
1779      s->lzma.len = MATCH_LEN_MIN + LEN_LOW_SYMBOLS
1780          + LEN_MID_SYMBOLS;
1781    }
1782  }
1783
1784  s->lzma.len += rc_bittree(&s->rc, probs, limit) - limit;
1785}
1786
1787/* Decode a match. The distance will be stored in s->lzma.rep0. */
1788static void lzma_match(struct xz_dec_lzma2 *s, uint32_t pos_state)
1789{
1790  uint16_t *probs;
1791  uint32_t dist_slot;
1792  uint32_t limit;
1793
1794  lzma_state_match(&s->lzma.state);
1795
1796  s->lzma.rep3 = s->lzma.rep2;
1797  s->lzma.rep2 = s->lzma.rep1;
1798  s->lzma.rep1 = s->lzma.rep0;
1799
1800  lzma_len(s, &s->lzma.match_len_dec, pos_state);
1801
1802  probs = s->lzma.dist_slot[lzma_get_dist_state(s->lzma.len)];
1803  dist_slot = rc_bittree(&s->rc, probs, DIST_SLOTS) - DIST_SLOTS;
1804
1805  if (dist_slot < DIST_MODEL_START) {
1806    s->lzma.rep0 = dist_slot;
1807  } else {
1808    limit = (dist_slot >> 1) - 1;
1809    s->lzma.rep0 = 2 + (dist_slot & 1);
1810
1811    if (dist_slot < DIST_MODEL_END) {
1812      s->lzma.rep0 <<= limit;
1813      probs = s->lzma.dist_special + s->lzma.rep0
1814          - dist_slot - 1;
1815      rc_bittree_reverse(&s->rc, probs,
1816          &s->lzma.rep0, limit);
1817    } else {
1818      rc_direct(&s->rc, &s->lzma.rep0, limit - ALIGN_BITS);
1819      s->lzma.rep0 <<= ALIGN_BITS;
1820      rc_bittree_reverse(&s->rc, s->lzma.dist_align,
1821          &s->lzma.rep0, ALIGN_BITS);
1822    }
1823  }
1824}
1825
1826/*
1827 * Decode a repeated match. The distance is one of the four most recently
1828 * seen matches. The distance will be stored in s->lzma.rep0.
1829 */
1830static void lzma_rep_match(struct xz_dec_lzma2 *s, uint32_t pos_state)
1831{
1832  uint32_t tmp;
1833
1834  if (!rc_bit(&s->rc, &s->lzma.is_rep0[s->lzma.state])) {
1835    if (!rc_bit(&s->rc, &s->lzma.is_rep0_long[
1836        s->lzma.state][pos_state])) {
1837      lzma_state_short_rep(&s->lzma.state);
1838      s->lzma.len = 1;
1839      return;
1840    }
1841  } else {
1842    if (!rc_bit(&s->rc, &s->lzma.is_rep1[s->lzma.state])) {
1843      tmp = s->lzma.rep1;
1844    } else {
1845      if (!rc_bit(&s->rc, &s->lzma.is_rep2[s->lzma.state])) {
1846        tmp = s->lzma.rep2;
1847      } else {
1848        tmp = s->lzma.rep3;
1849        s->lzma.rep3 = s->lzma.rep2;
1850      }
1851
1852      s->lzma.rep2 = s->lzma.rep1;
1853    }
1854
1855    s->lzma.rep1 = s->lzma.rep0;
1856    s->lzma.rep0 = tmp;
1857  }
1858
1859  lzma_state_long_rep(&s->lzma.state);
1860  lzma_len(s, &s->lzma.rep_len_dec, pos_state);
1861}
1862
1863/* LZMA decoder core */
1864static int lzma_main(struct xz_dec_lzma2 *s)
1865{
1866  uint32_t pos_state;
1867
1868  /*
1869   * If the dictionary was reached during the previous call, try to
1870   * finish the possibly pending repeat in the dictionary.
1871   */
1872  if (dict_has_space(&s->dict) && s->lzma.len > 0)
1873    dict_repeat(&s->dict, &s->lzma.len, s->lzma.rep0);
1874
1875  /*
1876   * Decode more LZMA symbols. One iteration may consume up to
1877   * LZMA_IN_REQUIRED - 1 bytes.
1878   */
1879  while (dict_has_space(&s->dict) && !rc_limit_exceeded(&s->rc)) {
1880    pos_state = s->dict.pos & s->lzma.pos_mask;
1881
1882    if (!rc_bit(&s->rc, &s->lzma.is_match[
1883        s->lzma.state][pos_state])) {
1884      lzma_literal(s);
1885    } else {
1886      if (rc_bit(&s->rc, &s->lzma.is_rep[s->lzma.state]))
1887        lzma_rep_match(s, pos_state);
1888      else
1889        lzma_match(s, pos_state);
1890
1891      if (!dict_repeat(&s->dict, &s->lzma.len, s->lzma.rep0))
1892        return 0;
1893    }
1894  }
1895
1896  /*
1897   * Having the range decoder always normalized when we are outside
1898   * this function makes it easier to correctly handle end of the chunk.
1899   */
1900  rc_normalize(&s->rc);
1901
1902  return 1;
1903}
1904
1905/*
1906 * Reset the LZMA decoder and range decoder state. Dictionary is nore reset
1907 * here, because LZMA state may be reset without resetting the dictionary.
1908 */
1909static void lzma_reset(struct xz_dec_lzma2 *s)
1910{
1911  uint16_t *probs;
1912  size_t i;
1913
1914  s->lzma.state = STATE_LIT_LIT;
1915  s->lzma.rep0 = 0;
1916  s->lzma.rep1 = 0;
1917  s->lzma.rep2 = 0;
1918  s->lzma.rep3 = 0;
1919
1920  /*
1921   * All probabilities are initialized to the same value. This hack
1922   * makes the code smaller by avoiding a separate loop for each
1923   * probability array.
1924   *
1925   * This could be optimized so that only that part of literal
1926   * probabilities that are actually required. In the common case
1927   * we would write 12 KiB less.
1928   */
1929  probs = s->lzma.is_match[0];
1930  for (i = 0; i < PROBS_TOTAL; ++i)
1931    probs[i] = RC_BIT_MODEL_TOTAL / 2;
1932
1933  rc_reset(&s->rc);
1934}
1935
1936/*
1937 * Decode and validate LZMA properties (lc/lp/pb) and calculate the bit masks
1938 * from the decoded lp and pb values. On success, the LZMA decoder state is
1939 * reset and true is returned.
1940 */
1941static int lzma_props(struct xz_dec_lzma2 *s, uint8_t props)
1942{
1943  if (props > (4 * 5 + 4) * 9 + 8)
1944    return 0;
1945
1946  s->lzma.pos_mask = 0;
1947  while (props >= 9 * 5) {
1948    props -= 9 * 5;
1949    ++s->lzma.pos_mask;
1950  }
1951
1952  s->lzma.pos_mask = (1 << s->lzma.pos_mask) - 1;
1953
1954  s->lzma.literal_pos_mask = 0;
1955  while (props >= 9) {
1956    props -= 9;
1957    ++s->lzma.literal_pos_mask;
1958  }
1959
1960  s->lzma.lc = props;
1961
1962  if (s->lzma.lc + s->lzma.literal_pos_mask > 4)
1963    return 0;
1964
1965  s->lzma.literal_pos_mask = (1 << s->lzma.literal_pos_mask) - 1;
1966
1967  lzma_reset(s);
1968
1969  return 1;
1970}
1971
1972/*********
1973 * LZMA2 *
1974 *********/
1975
1976/*
1977 * The LZMA decoder assumes that if the input limit (s->rc.in_limit) hasn't
1978 * been exceeded, it is safe to read up to LZMA_IN_REQUIRED bytes. This
1979 * wrapper function takes care of making the LZMA decoder's assumption safe.
1980 *
1981 * As long as there is plenty of input left to be decoded in the current LZMA
1982 * chunk, we decode directly from the caller-supplied input buffer until
1983 * there's LZMA_IN_REQUIRED bytes left. Those remaining bytes are copied into
1984 * s->temp.buf, which (hopefully) gets filled on the next call to this
1985 * function. We decode a few bytes from the temporary buffer so that we can
1986 * continue decoding from the caller-supplied input buffer again.
1987 */
1988static int lzma2_lzma(struct xz_dec_lzma2 *s, struct xz_buf *b)
1989{
1990  size_t in_avail;
1991  uint32_t tmp;
1992
1993  in_avail = b->in_size - b->in_pos;
1994  if (s->temp.size > 0 || s->lzma2.compressed == 0) {
1995    tmp = 2 * LZMA_IN_REQUIRED - s->temp.size;
1996    if (tmp > s->lzma2.compressed - s->temp.size)
1997      tmp = s->lzma2.compressed - s->temp.size;
1998    if (tmp > in_avail)
1999      tmp = in_avail;
2000
2001    memcpy(s->temp.buf + s->temp.size, b->in + b->in_pos, tmp);
2002
2003    if (s->temp.size + tmp == s->lzma2.compressed) {
2004      memset(s->temp.buf + s->temp.size + tmp, 0,
2005          sizeof(s->temp.buf)
2006            - s->temp.size - tmp);
2007      s->rc.in_limit = s->temp.size + tmp;
2008    } else if (s->temp.size + tmp < LZMA_IN_REQUIRED) {
2009      s->temp.size += tmp;
2010      b->in_pos += tmp;
2011      return 1;
2012    } else {
2013      s->rc.in_limit = s->temp.size + tmp - LZMA_IN_REQUIRED;
2014    }
2015
2016    s->rc.in = s->temp.buf;
2017    s->rc.in_pos = 0;
2018
2019    if (!lzma_main(s) || s->rc.in_pos > s->temp.size + tmp)
2020      return 0;
2021
2022    s->lzma2.compressed -= s->rc.in_pos;
2023
2024    if (s->rc.in_pos < s->temp.size) {
2025      s->temp.size -= s->rc.in_pos;
2026      memmove(s->temp.buf, s->temp.buf + s->rc.in_pos,
2027          s->temp.size);
2028      return 1;
2029    }
2030
2031    b->in_pos += s->rc.in_pos - s->temp.size;
2032    s->temp.size = 0;
2033  }
2034
2035  in_avail = b->in_size - b->in_pos;
2036  if (in_avail >= LZMA_IN_REQUIRED) {
2037    s->rc.in = b->in;
2038    s->rc.in_pos = b->in_pos;
2039
2040    if (in_avail >= s->lzma2.compressed + LZMA_IN_REQUIRED)
2041      s->rc.in_limit = b->in_pos + s->lzma2.compressed;
2042    else
2043      s->rc.in_limit = b->in_size - LZMA_IN_REQUIRED;
2044
2045    if (!lzma_main(s))
2046      return 0;
2047
2048    in_avail = s->rc.in_pos - b->in_pos;
2049    if (in_avail > s->lzma2.compressed) return 0;
2050
2051    s->lzma2.compressed -= in_avail;
2052    b->in_pos = s->rc.in_pos;
2053  }
2054
2055  in_avail = b->in_size - b->in_pos;
2056  if (in_avail < LZMA_IN_REQUIRED) {
2057    if (in_avail > s->lzma2.compressed)
2058      in_avail = s->lzma2.compressed;
2059
2060    memcpy(s->temp.buf, b->in + b->in_pos, in_avail);
2061    s->temp.size = in_avail;
2062    b->in_pos += in_avail;
2063  }
2064
2065  return 1;
2066}
2067
2068/*
2069 * Take care of the LZMA2 control layer, and forward the job of actual LZMA
2070 * decoding or copying of uncompressed chunks to other functions.
2071 */
2072enum xz_ret xz_dec_lzma2_run(struct xz_dec_lzma2 *s,
2073               struct xz_buf *b)
2074{
2075  uint32_t tmp;
2076
2077  while (b->in_pos < b->in_size || s->lzma2.sequence == SEQ_LZMA_RUN) {
2078    switch (s->lzma2.sequence) {
2079    case SEQ_CONTROL:
2080      /*
2081       * LZMA2 control byte
2082       *
2083       * Exact values:
2084       *   0x00   End marker
2085       *   0x01   Dictionary reset followed by
2086       *          an uncompressed chunk
2087       *   0x02   Uncompressed chunk (no dictionary reset)
2088       *
2089       * Highest three bits (s->control & 0xE0):
2090       *   0xE0   Dictionary reset, new properties and state
2091       *          reset, followed by LZMA compressed chunk
2092       *   0xC0   New properties and state reset, followed
2093       *          by LZMA compressed chunk (no dictionary
2094       *          reset)
2095       *   0xA0   State reset using old properties,
2096       *          followed by LZMA compressed chunk (no
2097       *          dictionary reset)
2098       *   0x80   LZMA chunk (no dictionary or state reset)
2099       *
2100       * For LZMA compressed chunks, the lowest five bits
2101       * (s->control & 1F) are the highest bits of the
2102       * uncompressed size (bits 16-20).
2103       *
2104       * A new LZMA2 stream must begin with a dictionary
2105       * reset. The first LZMA chunk must set new
2106       * properties and reset the LZMA state.
2107       *
2108       * Values that don't match anything described above
2109       * are invalid and we return XZ_DATA_ERROR.
2110       */
2111      tmp = b->in[b->in_pos++];
2112
2113      if (tmp == 0x00)
2114        return XZ_STREAM_END;
2115
2116      if (tmp >= 0xE0 || tmp == 0x01) {
2117        s->lzma2.need_props = 1;
2118        s->lzma2.need_dict_reset = 0;
2119        dict_reset(&s->dict);
2120      } else if (s->lzma2.need_dict_reset) {
2121        return XZ_DATA_ERROR;
2122      }
2123
2124      if (tmp >= 0x80) {
2125        s->lzma2.uncompressed = (tmp & 0x1F) << 16;
2126        s->lzma2.sequence = SEQ_UNCOMPRESSED_1;
2127
2128        if (tmp >= 0xC0) {
2129          /*
2130           * When there are new properties,
2131           * state reset is done at
2132           * SEQ_PROPERTIES.
2133           */
2134          s->lzma2.need_props = 0;
2135          s->lzma2.next_sequence
2136              = SEQ_PROPERTIES;
2137
2138        } else if (s->lzma2.need_props) {
2139          return XZ_DATA_ERROR;
2140
2141        } else {
2142          s->lzma2.next_sequence
2143              = SEQ_LZMA_PREPARE;
2144          if (tmp >= 0xA0)
2145            lzma_reset(s);
2146        }
2147      } else {
2148        if (tmp > 0x02)
2149          return XZ_DATA_ERROR;
2150
2151        s->lzma2.sequence = SEQ_COMPRESSED_0;
2152        s->lzma2.next_sequence = SEQ_COPY;
2153      }
2154
2155      break;
2156
2157    case SEQ_UNCOMPRESSED_1:
2158      s->lzma2.uncompressed
2159          += (uint32_t)b->in[b->in_pos++] << 8;
2160      s->lzma2.sequence = SEQ_UNCOMPRESSED_2;
2161      break;
2162
2163    case SEQ_UNCOMPRESSED_2:
2164      s->lzma2.uncompressed
2165          += (uint32_t)b->in[b->in_pos++] + 1;
2166      s->lzma2.sequence = SEQ_COMPRESSED_0;
2167      break;
2168
2169    case SEQ_COMPRESSED_0:
2170      s->lzma2.compressed
2171          = (uint32_t)b->in[b->in_pos++] << 8;
2172      s->lzma2.sequence = SEQ_COMPRESSED_1;
2173      break;
2174
2175    case SEQ_COMPRESSED_1:
2176      s->lzma2.compressed
2177          += (uint32_t)b->in[b->in_pos++] + 1;
2178      s->lzma2.sequence = s->lzma2.next_sequence;
2179      break;
2180
2181    case SEQ_PROPERTIES:
2182      if (!lzma_props(s, b->in[b->in_pos++]))
2183        return XZ_DATA_ERROR;
2184
2185      s->lzma2.sequence = SEQ_LZMA_PREPARE;
2186
2187    case SEQ_LZMA_PREPARE:
2188      if (s->lzma2.compressed < RC_INIT_BYTES)
2189        return XZ_DATA_ERROR;
2190
2191      if (!rc_read_init(&s->rc, b))
2192        return XZ_OK;
2193
2194      s->lzma2.compressed -= RC_INIT_BYTES;
2195      s->lzma2.sequence = SEQ_LZMA_RUN;
2196
2197    case SEQ_LZMA_RUN:
2198      /*
2199       * Set dictionary limit to indicate how much we want
2200       * to be encoded at maximum. Decode new data into the
2201       * dictionary. Flush the new data from dictionary to
2202       * b->out. Check if we finished decoding this chunk.
2203       * In case the dictionary got full but we didn't fill
2204       * the output buffer yet, we may run this loop
2205       * multiple times without changing s->lzma2.sequence.
2206       */
2207      dict_limit(&s->dict, minof(b->out_size - b->out_pos,
2208          s->lzma2.uncompressed));
2209      if (!lzma2_lzma(s, b))
2210        return XZ_DATA_ERROR;
2211
2212      s->lzma2.uncompressed -= dict_flush(&s->dict, b);
2213
2214      if (s->lzma2.uncompressed == 0) {
2215        if (s->lzma2.compressed > 0 || s->lzma.len > 0
2216            || !rc_is_finished(&s->rc))
2217          return XZ_DATA_ERROR;
2218
2219        rc_reset(&s->rc);
2220        s->lzma2.sequence = SEQ_CONTROL;
2221
2222      } else if (b->out_pos == b->out_size
2223          || (b->in_pos == b->in_size
2224            && s->temp.size
2225            < s->lzma2.compressed)) {
2226        return XZ_OK;
2227      }
2228
2229      break;
2230
2231    case SEQ_COPY:
2232      dict_uncompressed(&s->dict, b, &s->lzma2.compressed);
2233      if (s->lzma2.compressed > 0)
2234        return XZ_OK;
2235
2236      s->lzma2.sequence = SEQ_CONTROL;
2237      break;
2238    }
2239  }
2240
2241  return XZ_OK;
2242}
2243
2244struct xz_dec_lzma2 *xz_dec_lzma2_create(uint32_t dict_max)
2245{
2246  struct xz_dec_lzma2 *s = malloc(sizeof(*s));
2247  if (s == NULL)
2248    return NULL;
2249
2250  s->dict.size_max = dict_max;
2251  s->dict.buf = NULL;
2252  s->dict.allocated = 0;
2253
2254  return s;
2255}
2256
2257enum xz_ret xz_dec_lzma2_reset(struct xz_dec_lzma2 *s, uint8_t props)
2258{
2259  /* This limits dictionary size to 3 GiB to keep parsing simpler. */
2260  if (props > 39)
2261    return XZ_OPTIONS_ERROR;
2262
2263  s->dict.size = 2 + (props & 1);
2264  s->dict.size <<= (props >> 1) + 11;
2265
2266  if (s->dict.size > s->dict.size_max)
2267    return XZ_MEMLIMIT_ERROR;
2268
2269  s->dict.end = s->dict.size;
2270
2271  if (s->dict.allocated < s->dict.size) {
2272    free(s->dict.buf);
2273    s->dict.buf = malloc(s->dict.size);
2274    if (s->dict.buf == NULL) {
2275      s->dict.allocated = 0;
2276      return XZ_MEM_ERROR;
2277    }
2278  }
2279
2280  s->lzma.len = 0;
2281
2282  s->lzma2.sequence = SEQ_CONTROL;
2283  s->lzma2.need_dict_reset = 1;
2284
2285  s->temp.size = 0;
2286
2287  return XZ_OK;
2288}
2289
2290/*
2291 * .xz Stream decoder
2292 */
2293
2294
2295// BEGIN xz_stream.h
2296/*
2297 * Definitions for handling the .xz file format
2298 */
2299
2300/*
2301 * See the .xz file format specification at
2302 * http://tukaani.org/xz/xz-file-format.txt
2303 * to understand the container format.
2304 */
2305
2306#define STREAM_HEADER_SIZE 12
2307
2308#define HEADER_MAGIC "\3757zXZ"
2309#define HEADER_MAGIC_SIZE 6
2310
2311#define FOOTER_MAGIC "YZ"
2312#define FOOTER_MAGIC_SIZE 2
2313
2314/*
2315 * Variable-length integer can hold a 63-bit unsigned integer or a special
2316 * value indicating that the value is unknown.
2317 *
2318 * Experimental: vli_type can be defined to uint32_t to save a few bytes
2319 * in code size (no effect on speed). Doing so limits the uncompressed and
2320 * compressed size of the file to less than 256 MiB and may also weaken
2321 * error detection slightly.
2322 */
2323typedef uint64_t vli_type;
2324
2325#define VLI_MAX ((vli_type)-1 / 2)
2326#define VLI_UNKNOWN ((vli_type)-1)
2327
2328/* Maximum encoded size of a VLI */
2329#define VLI_BYTES_MAX (sizeof(vli_type) * 8 / 7)
2330
2331/* Integrity Check types */
2332enum xz_check {
2333  XZ_CHECK_NONE = 0,
2334  XZ_CHECK_CRC32 = 1,
2335  XZ_CHECK_CRC64 = 4,
2336  XZ_CHECK_SHA256 = 10
2337};
2338
2339/* Maximum possible Check ID */
2340#define XZ_CHECK_MAX 15
2341// END xz_stream.h
2342
2343#define IS_CRC64(check_type) ((check_type) == XZ_CHECK_CRC64)
2344
2345/* Hash used to validate the Index field */
2346struct xz_dec_hash {
2347  vli_type unpadded;
2348  vli_type uncompressed;
2349  uint32_t crc32;
2350};
2351
2352struct xz_dec {
2353  /* Position in dec_main() */
2354  enum {
2355    SEQ_STREAM_HEADER,
2356    SEQ_BLOCK_START,
2357    SEQ_BLOCK_HEADER,
2358    SEQ_BLOCK_UNCOMPRESS,
2359    SEQ_BLOCK_PADDING,
2360    SEQ_BLOCK_CHECK,
2361    SEQ_INDEX,
2362    SEQ_INDEX_PADDING,
2363    SEQ_INDEX_CRC32,
2364    SEQ_STREAM_FOOTER
2365  } sequence;
2366
2367  /* Position in variable-length integers and Check fields */
2368  uint32_t pos;
2369
2370  /* Variable-length integer decoded by dec_vli() */
2371  vli_type vli;
2372
2373  /* Saved in_pos and out_pos */
2374  size_t in_start;
2375  size_t out_start;
2376
2377  /* CRC32 or CRC64 value in Block or CRC32 value in Index */
2378  uint64_t crc;
2379
2380  /* Type of the integrity check calculated from uncompressed data */
2381  enum xz_check check_type;
2382
2383  /*
2384   * True if the next call to xz_dec_run() is allowed to return
2385   * XZ_BUF_ERROR.
2386   */
2387  int allow_buf_error;
2388
2389  /* Information stored in Block Header */
2390  struct {
2391    /*
2392     * Value stored in the Compressed Size field, or
2393     * VLI_UNKNOWN if Compressed Size is not present.
2394     */
2395    vli_type compressed;
2396
2397    /*
2398     * Value stored in the Uncompressed Size field, or
2399     * VLI_UNKNOWN if Uncompressed Size is not present.
2400     */
2401    vli_type uncompressed;
2402
2403    /* Size of the Block Header field */
2404    uint32_t size;
2405  } block_header;
2406
2407  /* Information collected when decoding Blocks */
2408  struct {
2409    /* Observed compressed size of the current Block */
2410    vli_type compressed;
2411
2412    /* Observed uncompressed size of the current Block */
2413    vli_type uncompressed;
2414
2415    /* Number of Blocks decoded so far */
2416    vli_type count;
2417
2418    /*
2419     * Hash calculated from the Block sizes. This is used to
2420     * validate the Index field.
2421     */
2422    struct xz_dec_hash hash;
2423  } block;
2424
2425  /* Variables needed when verifying the Index field */
2426  struct {
2427    /* Position in dec_index() */
2428    enum {
2429      SEQ_INDEX_COUNT,
2430      SEQ_INDEX_UNPADDED,
2431      SEQ_INDEX_UNCOMPRESSED
2432    } sequence;
2433
2434    /* Size of the Index in bytes */
2435    vli_type size;
2436
2437    /* Number of Records (matches block.count in valid files) */
2438    vli_type count;
2439
2440    /*
2441     * Hash calculated from the Records (matches block.hash in
2442     * valid files).
2443     */
2444    struct xz_dec_hash hash;
2445  } index;
2446
2447  /*
2448   * Temporary buffer needed to hold Stream Header, Block Header,
2449   * and Stream Footer. The Block Header is the biggest (1 KiB)
2450   * so we reserve space according to that. buf[] has to be aligned
2451   * to a multiple of four bytes; the size_t variables before it
2452   * should guarantee this.
2453   */
2454  struct {
2455    size_t pos;
2456    size_t size;
2457    uint8_t buf[1024];
2458  } temp;
2459
2460  struct xz_dec_lzma2 *lzma2;
2461
2462#ifdef XZ_DEC_BCJ
2463  struct xz_dec_bcj *bcj;
2464  int bcj_active;
2465#endif
2466};
2467
2468/* Sizes of the Check field with different Check IDs */
2469static const uint8_t check_sizes[16] = {
2470  0,
2471  4, 4, 4,
2472  8, 8, 8,
2473  16, 16, 16,
2474  32, 32, 32,
2475  64, 64, 64
2476};
2477
2478/*
2479 * Fill s->temp by copying data starting from b->in[b->in_pos]. Caller
2480 * must have set s->temp.pos to indicate how much data we are supposed
2481 * to copy into s->temp.buf. Return true once s->temp.pos has reached
2482 * s->temp.size.
2483 */
2484static int fill_temp(struct xz_dec *s, struct xz_buf *b)
2485{
2486  size_t copy_size = minof(b->in_size - b->in_pos, s->temp.size - s->temp.pos);
2487
2488  memcpy(s->temp.buf + s->temp.pos, b->in + b->in_pos, copy_size);
2489  b->in_pos += copy_size;
2490  s->temp.pos += copy_size;
2491
2492  if (s->temp.pos == s->temp.size) {
2493    s->temp.pos = 0;
2494    return 1;
2495  }
2496
2497  return 0;
2498}
2499
2500/* Decode a variable-length integer (little-endian base-128 encoding) */
2501static enum xz_ret dec_vli(struct xz_dec *s, const uint8_t *in,
2502         size_t *in_pos, size_t in_size)
2503{
2504  uint8_t byte;
2505
2506  if (s->pos == 0)
2507    s->vli = 0;
2508
2509  while (*in_pos < in_size) {
2510    byte = in[*in_pos];
2511    ++*in_pos;
2512
2513    s->vli |= (vli_type)(byte & 0x7F) << s->pos;
2514
2515    if ((byte & 0x80) == 0) {
2516      /* Don't allow non-minimal encodings. */
2517      if (byte == 0 && s->pos != 0)
2518        return XZ_DATA_ERROR;
2519
2520      s->pos = 0;
2521      return XZ_STREAM_END;
2522    }
2523
2524    s->pos += 7;
2525    if (s->pos == 7 * VLI_BYTES_MAX)
2526      return XZ_DATA_ERROR;
2527  }
2528
2529  return XZ_OK;
2530}
2531
2532/*
2533 * Decode the Compressed Data field from a Block. Update and validate
2534 * the observed compressed and uncompressed sizes of the Block so that
2535 * they don't exceed the values possibly stored in the Block Header
2536 * (validation assumes that no integer overflow occurs, since vli_type
2537 * is normally uint64_t). Update the CRC32 or CRC64 value if presence of
2538 * the CRC32 or CRC64 field was indicated in Stream Header.
2539 *
2540 * Once the decoding is finished, validate that the observed sizes match
2541 * the sizes possibly stored in the Block Header. Update the hash and
2542 * Block count, which are later used to validate the Index field.
2543 */
2544static enum xz_ret dec_block(struct xz_dec *s, struct xz_buf *b)
2545{
2546  enum xz_ret ret;
2547
2548  s->in_start = b->in_pos;
2549  s->out_start = b->out_pos;
2550
2551#ifdef XZ_DEC_BCJ
2552  if (s->bcj_active)
2553    ret = xz_dec_bcj_run(s->bcj, s->lzma2, b);
2554  else
2555#endif
2556    ret = xz_dec_lzma2_run(s->lzma2, b);
2557
2558  s->block.compressed += b->in_pos - s->in_start;
2559  s->block.uncompressed += b->out_pos - s->out_start;
2560
2561  /*
2562   * There is no need to separately check for VLI_UNKNOWN, since
2563   * the observed sizes are always smaller than VLI_UNKNOWN.
2564   */
2565  if (s->block.compressed > s->block_header.compressed
2566      || s->block.uncompressed
2567        > s->block_header.uncompressed)
2568    return XZ_DATA_ERROR;
2569
2570  if (s->check_type == XZ_CHECK_CRC32)
2571    s->crc = xz_crc32(b->out + s->out_start,
2572        b->out_pos - s->out_start, s->crc);
2573  else if (s->check_type == XZ_CHECK_CRC64)
2574    s->crc = ~(s->crc);
2575    size_t size = b->out_pos - s->out_start;
2576    uint8_t *buf = b->out + s->out_start;
2577    while (size) {
2578      s->crc = xz_crc64_table[*buf++ ^ (s->crc & 0xFF)] ^ (s->crc >> 8);
2579      --size;
2580    }
2581    s->crc=~(s->crc);
2582
2583  if (ret == XZ_STREAM_END) {
2584    if (s->block_header.compressed != VLI_UNKNOWN
2585        && s->block_header.compressed
2586          != s->block.compressed)
2587      return XZ_DATA_ERROR;
2588
2589    if (s->block_header.uncompressed != VLI_UNKNOWN
2590        && s->block_header.uncompressed
2591          != s->block.uncompressed)
2592      return XZ_DATA_ERROR;
2593
2594    s->block.hash.unpadded += s->block_header.size
2595        + s->block.compressed;
2596
2597    s->block.hash.unpadded += check_sizes[s->check_type];
2598
2599    s->block.hash.uncompressed += s->block.uncompressed;
2600    s->block.hash.crc32 = xz_crc32(
2601        (const uint8_t *)&s->block.hash,
2602        sizeof(s->block.hash), s->block.hash.crc32);
2603
2604    ++s->block.count;
2605  }
2606
2607  return ret;
2608}
2609
2610/* Update the Index size and the CRC32 value. */
2611static void index_update(struct xz_dec *s, const struct xz_buf *b)
2612{
2613  size_t in_used = b->in_pos - s->in_start;
2614  s->index.size += in_used;
2615  s->crc = xz_crc32(b->in + s->in_start, in_used, s->crc);
2616}
2617
2618/*
2619 * Decode the Number of Records, Unpadded Size, and Uncompressed Size
2620 * fields from the Index field. That is, Index Padding and CRC32 are not
2621 * decoded by this function.
2622 *
2623 * This can return XZ_OK (more input needed), XZ_STREAM_END (everything
2624 * successfully decoded), or XZ_DATA_ERROR (input is corrupt).
2625 */
2626static enum xz_ret dec_index(struct xz_dec *s, struct xz_buf *b)
2627{
2628  enum xz_ret ret;
2629
2630  do {
2631    ret = dec_vli(s, b->in, &b->in_pos, b->in_size);
2632    if (ret != XZ_STREAM_END) {
2633      index_update(s, b);
2634      return ret;
2635    }
2636
2637    switch (s->index.sequence) {
2638    case SEQ_INDEX_COUNT:
2639      s->index.count = s->vli;
2640
2641      /*
2642       * Validate that the Number of Records field
2643       * indicates the same number of Records as
2644       * there were Blocks in the Stream.
2645       */
2646      if (s->index.count != s->block.count)
2647        return XZ_DATA_ERROR;
2648
2649      s->index.sequence = SEQ_INDEX_UNPADDED;
2650      break;
2651
2652    case SEQ_INDEX_UNPADDED:
2653      s->index.hash.unpadded += s->vli;
2654      s->index.sequence = SEQ_INDEX_UNCOMPRESSED;
2655      break;
2656
2657    case SEQ_INDEX_UNCOMPRESSED:
2658      s->index.hash.uncompressed += s->vli;
2659      s->index.hash.crc32 = xz_crc32(
2660          (const uint8_t *)&s->index.hash,
2661          sizeof(s->index.hash),
2662          s->index.hash.crc32);
2663      --s->index.count;
2664      s->index.sequence = SEQ_INDEX_UNPADDED;
2665      break;
2666    }
2667  } while (s->index.count > 0);
2668
2669  return XZ_STREAM_END;
2670}
2671
2672/*
2673 * Validate that the next four or eight input bytes match the value
2674 * of s->crc. s->pos must be zero when starting to validate the first byte.
2675 * The "bits" argument allows using the same code for both CRC32 and CRC64.
2676 */
2677static enum xz_ret crc_validate(struct xz_dec *s, struct xz_buf *b,
2678        uint32_t bits)
2679{
2680  do {
2681    if (b->in_pos == b->in_size)
2682      return XZ_OK;
2683
2684    if (((s->crc >> s->pos) & 0xFF) != b->in[b->in_pos++])
2685      return XZ_DATA_ERROR;
2686
2687    s->pos += 8;
2688
2689  } while (s->pos < bits);
2690
2691  s->crc = 0;
2692  s->pos = 0;
2693
2694  return XZ_STREAM_END;
2695}
2696
2697/*
2698 * Skip over the Check field when the Check ID is not supported.
2699 * Returns true once the whole Check field has been skipped over.
2700 */
2701static int check_skip(struct xz_dec *s, struct xz_buf *b)
2702{
2703  while (s->pos < check_sizes[s->check_type]) {
2704    if (b->in_pos == b->in_size) return 0;
2705
2706    ++b->in_pos;
2707    ++s->pos;
2708  }
2709
2710  s->pos = 0;
2711
2712  return 1;
2713}
2714
2715/* Decode the Stream Header field (the first 12 bytes of the .xz Stream). */
2716static enum xz_ret dec_stream_header(struct xz_dec *s)
2717{
2718  if (!memeq(s->temp.buf, HEADER_MAGIC, HEADER_MAGIC_SIZE))
2719    return XZ_FORMAT_ERROR;
2720
2721  if (xz_crc32(s->temp.buf + HEADER_MAGIC_SIZE, 2, 0)
2722      != get_le32(s->temp.buf + HEADER_MAGIC_SIZE + 2))
2723    return XZ_DATA_ERROR;
2724
2725  if (s->temp.buf[HEADER_MAGIC_SIZE] != 0)
2726    return XZ_OPTIONS_ERROR;
2727
2728  /*
2729   * Of integrity checks, we support none (Check ID = 0),
2730   * CRC32 (Check ID = 1), and optionally CRC64 (Check ID = 4).
2731   * However, if XZ_DEC_ANY_CHECK is defined, we will accept other
2732   * check types too, but then the check won't be verified and
2733   * a warning (XZ_UNSUPPORTED_CHECK) will be given.
2734   */
2735  s->check_type = s->temp.buf[HEADER_MAGIC_SIZE + 1];
2736
2737  if (s->check_type > XZ_CHECK_MAX)
2738    return XZ_OPTIONS_ERROR;
2739
2740  if (s->check_type > XZ_CHECK_CRC32 && !IS_CRC64(s->check_type))
2741    return XZ_UNSUPPORTED_CHECK;
2742
2743  return XZ_OK;
2744}
2745
2746/* Decode the Stream Footer field (the last 12 bytes of the .xz Stream) */
2747static enum xz_ret dec_stream_footer(struct xz_dec *s)
2748{
2749  if (!memeq(s->temp.buf + 10, FOOTER_MAGIC, FOOTER_MAGIC_SIZE))
2750    return XZ_DATA_ERROR;
2751
2752  if (xz_crc32(s->temp.buf + 4, 6, 0) != get_le32(s->temp.buf))
2753    return XZ_DATA_ERROR;
2754
2755  /*
2756   * Validate Backward Size. Note that we never added the size of the
2757   * Index CRC32 field to s->index.size, thus we use s->index.size / 4
2758   * instead of s->index.size / 4 - 1.
2759   */
2760  if ((s->index.size >> 2) != get_le32(s->temp.buf + 4))
2761    return XZ_DATA_ERROR;
2762
2763  if (s->temp.buf[8] != 0 || s->temp.buf[9] != s->check_type)
2764    return XZ_DATA_ERROR;
2765
2766  /*
2767   * Use XZ_STREAM_END instead of XZ_OK to be more convenient
2768   * for the caller.
2769   */
2770  return XZ_STREAM_END;
2771}
2772
2773/* Decode the Block Header and initialize the filter chain. */
2774static enum xz_ret dec_block_header(struct xz_dec *s)
2775{
2776  enum xz_ret ret;
2777
2778  /*
2779   * Validate the CRC32. We know that the temp buffer is at least
2780   * eight bytes so this is safe.
2781   */
2782  s->temp.size -= 4;
2783  if (xz_crc32(s->temp.buf, s->temp.size, 0)
2784      != get_le32(s->temp.buf + s->temp.size))
2785    return XZ_DATA_ERROR;
2786
2787  s->temp.pos = 2;
2788
2789  /*
2790   * Catch unsupported Block Flags. We support only one or two filters
2791   * in the chain, so we catch that with the same test.
2792   */
2793#ifdef XZ_DEC_BCJ
2794  if (s->temp.buf[1] & 0x3E)
2795#else
2796  if (s->temp.buf[1] & 0x3F)
2797#endif
2798    return XZ_OPTIONS_ERROR;
2799
2800  /* Compressed Size */
2801  if (s->temp.buf[1] & 0x40) {
2802    if (dec_vli(s, s->temp.buf, &s->temp.pos, s->temp.size)
2803          != XZ_STREAM_END)
2804      return XZ_DATA_ERROR;
2805
2806    s->block_header.compressed = s->vli;
2807  } else {
2808    s->block_header.compressed = VLI_UNKNOWN;
2809  }
2810
2811  /* Uncompressed Size */
2812  if (s->temp.buf[1] & 0x80) {
2813    if (dec_vli(s, s->temp.buf, &s->temp.pos, s->temp.size)
2814        != XZ_STREAM_END)
2815      return XZ_DATA_ERROR;
2816
2817    s->block_header.uncompressed = s->vli;
2818  } else {
2819    s->block_header.uncompressed = VLI_UNKNOWN;
2820  }
2821
2822#ifdef XZ_DEC_BCJ
2823  /* If there are two filters, the first one must be a BCJ filter. */
2824  s->bcj_active = s->temp.buf[1] & 0x01;
2825  if (s->bcj_active) {
2826    if (s->temp.size - s->temp.pos < 2)
2827      return XZ_OPTIONS_ERROR;
2828
2829    ret = xz_dec_bcj_reset(s->bcj, s->temp.buf[s->temp.pos++]);
2830    if (ret != XZ_OK)
2831      return ret;
2832
2833    /*
2834     * We don't support custom start offset,
2835     * so Size of Properties must be zero.
2836     */
2837    if (s->temp.buf[s->temp.pos++] != 0x00)
2838      return XZ_OPTIONS_ERROR;
2839  }
2840#endif
2841
2842  /* Valid Filter Flags always take at least two bytes. */
2843  if (s->temp.size - s->temp.pos < 2)
2844    return XZ_DATA_ERROR;
2845
2846  /* Filter ID = LZMA2 */
2847  if (s->temp.buf[s->temp.pos++] != 0x21)
2848    return XZ_OPTIONS_ERROR;
2849
2850  /* Size of Properties = 1-byte Filter Properties */
2851  if (s->temp.buf[s->temp.pos++] != 0x01)
2852    return XZ_OPTIONS_ERROR;
2853
2854  /* Filter Properties contains LZMA2 dictionary size. */
2855  if (s->temp.size - s->temp.pos < 1)
2856    return XZ_DATA_ERROR;
2857
2858  ret = xz_dec_lzma2_reset(s->lzma2, s->temp.buf[s->temp.pos++]);
2859  if (ret != XZ_OK)
2860    return ret;
2861
2862  /* The rest must be Header Padding. */
2863  while (s->temp.pos < s->temp.size)
2864    if (s->temp.buf[s->temp.pos++] != 0x00)
2865      return XZ_OPTIONS_ERROR;
2866
2867  s->temp.pos = 0;
2868  s->block.compressed = 0;
2869  s->block.uncompressed = 0;
2870
2871  return XZ_OK;
2872}
2873
2874static enum xz_ret dec_main(struct xz_dec *s, struct xz_buf *b)
2875{
2876  enum xz_ret ret;
2877
2878  /*
2879   * Store the start position for the case when we are in the middle
2880   * of the Index field.
2881   */
2882  s->in_start = b->in_pos;
2883
2884  for (;;) {
2885    switch (s->sequence) {
2886    case SEQ_STREAM_HEADER:
2887      /*
2888       * Stream Header is copied to s->temp, and then
2889       * decoded from there. This way if the caller
2890       * gives us only little input at a time, we can
2891       * still keep the Stream Header decoding code
2892       * simple. Similar approach is used in many places
2893       * in this file.
2894       */
2895      if (!fill_temp(s, b))
2896        return XZ_OK;
2897
2898      /*
2899       * If dec_stream_header() returns
2900       * XZ_UNSUPPORTED_CHECK, it is still possible
2901       * to continue decoding if working in multi-call
2902       * mode. Thus, update s->sequence before calling
2903       * dec_stream_header().
2904       */
2905      s->sequence = SEQ_BLOCK_START;
2906
2907      ret = dec_stream_header(s);
2908      if (ret != XZ_OK)
2909        return ret;
2910
2911    case SEQ_BLOCK_START:
2912      /* We need one byte of input to continue. */
2913      if (b->in_pos == b->in_size)
2914        return XZ_OK;
2915
2916      /* See if this is the beginning of the Index field. */
2917      if (b->in[b->in_pos] == 0) {
2918        s->in_start = b->in_pos++;
2919        s->sequence = SEQ_INDEX;
2920        break;
2921      }
2922
2923      /*
2924       * Calculate the size of the Block Header and
2925       * prepare to decode it.
2926       */
2927      s->block_header.size
2928        = ((uint32_t)b->in[b->in_pos] + 1) * 4;
2929
2930      s->temp.size = s->block_header.size;
2931      s->temp.pos = 0;
2932      s->sequence = SEQ_BLOCK_HEADER;
2933
2934    case SEQ_BLOCK_HEADER:
2935      if (!fill_temp(s, b))
2936        return XZ_OK;
2937
2938      ret = dec_block_header(s);
2939      if (ret != XZ_OK)
2940        return ret;
2941
2942      s->sequence = SEQ_BLOCK_UNCOMPRESS;
2943
2944    case SEQ_BLOCK_UNCOMPRESS:
2945      ret = dec_block(s, b);
2946      if (ret != XZ_STREAM_END)
2947        return ret;
2948
2949      s->sequence = SEQ_BLOCK_PADDING;
2950
2951    case SEQ_BLOCK_PADDING:
2952      /*
2953       * Size of Compressed Data + Block Padding
2954       * must be a multiple of four. We don't need
2955       * s->block.compressed for anything else
2956       * anymore, so we use it here to test the size
2957       * of the Block Padding field.
2958       */
2959      while (s->block.compressed & 3) {
2960        if (b->in_pos == b->in_size)
2961          return XZ_OK;
2962
2963        if (b->in[b->in_pos++] != 0)
2964          return XZ_DATA_ERROR;
2965
2966        ++s->block.compressed;
2967      }
2968
2969      s->sequence = SEQ_BLOCK_CHECK;
2970
2971    case SEQ_BLOCK_CHECK:
2972      if (s->check_type == XZ_CHECK_CRC32) {
2973        ret = crc_validate(s, b, 32);
2974        if (ret != XZ_STREAM_END)
2975          return ret;
2976      }
2977      else if (IS_CRC64(s->check_type)) {
2978        ret = crc_validate(s, b, 64);
2979        if (ret != XZ_STREAM_END)
2980          return ret;
2981      }
2982      else if (!check_skip(s, b)) {
2983        return XZ_OK;
2984      }
2985
2986      s->sequence = SEQ_BLOCK_START;
2987      break;
2988
2989    case SEQ_INDEX:
2990      ret = dec_index(s, b);
2991      if (ret != XZ_STREAM_END)
2992        return ret;
2993
2994      s->sequence = SEQ_INDEX_PADDING;
2995
2996    case SEQ_INDEX_PADDING:
2997      while ((s->index.size + (b->in_pos - s->in_start))
2998          & 3) {
2999        if (b->in_pos == b->in_size) {
3000          index_update(s, b);
3001          return XZ_OK;
3002        }
3003
3004        if (b->in[b->in_pos++] != 0)
3005          return XZ_DATA_ERROR;
3006      }
3007
3008      /* Finish the CRC32 value and Index size. */
3009      index_update(s, b);
3010
3011      /* Compare the hashes to validate the Index field. */
3012      if (!memeq(&s->block.hash, &s->index.hash,
3013          sizeof(s->block.hash)))
3014        return XZ_DATA_ERROR;
3015
3016      s->sequence = SEQ_INDEX_CRC32;
3017
3018    case SEQ_INDEX_CRC32:
3019      ret = crc_validate(s, b, 32);
3020      if (ret != XZ_STREAM_END)
3021        return ret;
3022
3023      s->temp.size = STREAM_HEADER_SIZE;
3024      s->sequence = SEQ_STREAM_FOOTER;
3025
3026    case SEQ_STREAM_FOOTER:
3027      if (!fill_temp(s, b))
3028        return XZ_OK;
3029
3030      return dec_stream_footer(s);
3031    }
3032  }
3033
3034  /* Never reached */
3035}
3036
3037/*
3038 * xz_dec_run() is a wrapper for dec_main() to handle some special cases in
3039 * multi-call and single-call decoding.
3040 *
3041 * In multi-call mode, we must return XZ_BUF_ERROR when it seems clear that we
3042 * are not going to make any progress anymore. This is to prevent the caller
3043 * from calling us infinitely when the input file is truncated or otherwise
3044 * corrupt. Since zlib-style API allows that the caller fills the input buffer
3045 * only when the decoder doesn't produce any new output, we have to be careful
3046 * to avoid returning XZ_BUF_ERROR too easily: XZ_BUF_ERROR is returned only
3047 * after the second consecutive call to xz_dec_run() that makes no progress.
3048 *
3049 * In single-call mode, if we couldn't decode everything and no error
3050 * occurred, either the input is truncated or the output buffer is too small.
3051 * Since we know that the last input byte never produces any output, we know
3052 * that if all the input was consumed and decoding wasn't finished, the file
3053 * must be corrupt. Otherwise the output buffer has to be too small or the
3054 * file is corrupt in a way that decoding it produces too big output.
3055 *
3056 * If single-call decoding fails, we reset b->in_pos and b->out_pos back to
3057 * their original values. This is because with some filter chains there won't
3058 * be any valid uncompressed data in the output buffer unless the decoding
3059 * actually succeeds (that's the price to pay of using the output buffer as
3060 * the workspace).
3061 */
3062enum xz_ret xz_dec_run(struct xz_dec *s, struct xz_buf *b)
3063{
3064  size_t in_start;
3065  size_t out_start;
3066  enum xz_ret ret;
3067
3068  in_start = b->in_pos;
3069  out_start = b->out_pos;
3070  ret = dec_main(s, b);
3071
3072  if (ret == XZ_OK && in_start == b->in_pos && out_start == b->out_pos) {
3073    if (s->allow_buf_error)
3074      ret = XZ_BUF_ERROR;
3075
3076    s->allow_buf_error = 1;
3077  } else {
3078    s->allow_buf_error = 0;
3079  }
3080
3081  return ret;
3082}
3083
3084struct xz_dec *xz_dec_init(uint32_t dict_max)
3085{
3086  struct xz_dec *s = malloc(sizeof(*s));
3087  if (!s)
3088    return NULL;
3089
3090#ifdef XZ_DEC_BCJ
3091  s->bcj = malloc(sizeof(*s->bcj));
3092  if (!s->bcj)
3093    goto error_bcj;
3094#endif
3095
3096  s->lzma2 = xz_dec_lzma2_create(dict_max);
3097  if (s->lzma2 == NULL)
3098    goto error_lzma2;
3099
3100  xz_dec_reset(s);
3101  return s;
3102
3103error_lzma2:
3104#ifdef XZ_DEC_BCJ
3105  free(s->bcj);
3106error_bcj:
3107#endif
3108  free(s);
3109  return NULL;
3110}
3111
3112void xz_dec_reset(struct xz_dec *s)
3113{
3114  s->sequence = SEQ_STREAM_HEADER;
3115  s->allow_buf_error = 0;
3116  s->pos = 0;
3117  s->crc = 0;
3118  memset(&s->block, 0, sizeof(s->block));
3119  memset(&s->index, 0, sizeof(s->index));
3120  s->temp.pos = 0;
3121  s->temp.size = STREAM_HEADER_SIZE;
3122}
3123
3124void xz_dec_end(struct xz_dec *s)
3125{
3126  if (s != NULL) {
3127    free((s->lzma2)->dict.buf);
3128    free(s->lzma2);
3129
3130#ifdef XZ_DEC_BCJ
3131    free(s->bcj);
3132#endif
3133    free(s);
3134  }
3135}
3136