linux/lib/zlib_inflate/inffast.c
<<
>>
Prefs
   1/* inffast.c -- fast decoding
   2 * Copyright (C) 1995-2004 Mark Adler
   3 * For conditions of distribution and use, see copyright notice in zlib.h
   4 */
   5
   6#include <linux/zutil.h>
   7#include "inftrees.h"
   8#include "inflate.h"
   9#include "inffast.h"
  10
  11#ifndef ASMINF
  12
  13union uu {
  14        unsigned short us;
  15        unsigned char b[2];
  16};
  17
  18/* Endian independent version */
  19static inline unsigned short
  20get_unaligned16(const unsigned short *p)
  21{
  22        union uu  mm;
  23        unsigned char *b = (unsigned char *)p;
  24
  25        mm.b[0] = b[0];
  26        mm.b[1] = b[1];
  27        return mm.us;
  28}
  29
  30/*
  31   Decode literal, length, and distance codes and write out the resulting
  32   literal and match bytes until either not enough input or output is
  33   available, an end-of-block is encountered, or a data error is encountered.
  34   When large enough input and output buffers are supplied to inflate(), for
  35   example, a 16K input buffer and a 64K output buffer, more than 95% of the
  36   inflate execution time is spent in this routine.
  37
  38   Entry assumptions:
  39
  40        state->mode == LEN
  41        strm->avail_in >= 6
  42        strm->avail_out >= 258
  43        start >= strm->avail_out
  44        state->bits < 8
  45
  46   On return, state->mode is one of:
  47
  48        LEN -- ran out of enough output space or enough available input
  49        TYPE -- reached end of block code, inflate() to interpret next block
  50        BAD -- error in block data
  51
  52   Notes:
  53
  54    - The maximum input bits used by a length/distance pair is 15 bits for the
  55      length code, 5 bits for the length extra, 15 bits for the distance code,
  56      and 13 bits for the distance extra.  This totals 48 bits, or six bytes.
  57      Therefore if strm->avail_in >= 6, then there is enough input to avoid
  58      checking for available input while decoding.
  59
  60    - The maximum bytes that a single length/distance pair can output is 258
  61      bytes, which is the maximum length that can be coded.  inflate_fast()
  62      requires strm->avail_out >= 258 for each loop to avoid checking for
  63      output space.
  64
  65    - @start:   inflate()'s starting value for strm->avail_out
  66 */
  67void inflate_fast(z_streamp strm, unsigned start)
  68{
  69    struct inflate_state *state;
  70    const unsigned char *in;    /* local strm->next_in */
  71    const unsigned char *last;  /* while in < last, enough input available */
  72    unsigned char *out;         /* local strm->next_out */
  73    unsigned char *beg;         /* inflate()'s initial strm->next_out */
  74    unsigned char *end;         /* while out < end, enough space available */
  75#ifdef INFLATE_STRICT
  76    unsigned dmax;              /* maximum distance from zlib header */
  77#endif
  78    unsigned wsize;             /* window size or zero if not using window */
  79    unsigned whave;             /* valid bytes in the window */
  80    unsigned write;             /* window write index */
  81    unsigned char *window;      /* allocated sliding window, if wsize != 0 */
  82    unsigned long hold;         /* local strm->hold */
  83    unsigned bits;              /* local strm->bits */
  84    code const *lcode;          /* local strm->lencode */
  85    code const *dcode;          /* local strm->distcode */
  86    unsigned lmask;             /* mask for first level of length codes */
  87    unsigned dmask;             /* mask for first level of distance codes */
  88    code this;                  /* retrieved table entry */
  89    unsigned op;                /* code bits, operation, extra bits, or */
  90                                /*  window position, window bytes to copy */
  91    unsigned len;               /* match length, unused bytes */
  92    unsigned dist;              /* match distance */
  93    unsigned char *from;        /* where to copy match from */
  94
  95    /* copy state to local variables */
  96    state = (struct inflate_state *)strm->state;
  97    in = strm->next_in;
  98    last = in + (strm->avail_in - 5);
  99    out = strm->next_out;
 100    beg = out - (start - strm->avail_out);
 101    end = out + (strm->avail_out - 257);
 102#ifdef INFLATE_STRICT
 103    dmax = state->dmax;
 104#endif
 105    wsize = state->wsize;
 106    whave = state->whave;
 107    write = state->write;
 108    window = state->window;
 109    hold = state->hold;
 110    bits = state->bits;
 111    lcode = state->lencode;
 112    dcode = state->distcode;
 113    lmask = (1U << state->lenbits) - 1;
 114    dmask = (1U << state->distbits) - 1;
 115
 116    /* decode literals and length/distances until end-of-block or not enough
 117       input data or output space */
 118    do {
 119        if (bits < 15) {
 120            hold += (unsigned long)(*in++) << bits;
 121            bits += 8;
 122            hold += (unsigned long)(*in++) << bits;
 123            bits += 8;
 124        }
 125        this = lcode[hold & lmask];
 126      dolen:
 127        op = (unsigned)(this.bits);
 128        hold >>= op;
 129        bits -= op;
 130        op = (unsigned)(this.op);
 131        if (op == 0) {                          /* literal */
 132            *out++ = (unsigned char)(this.val);
 133        }
 134        else if (op & 16) {                     /* length base */
 135            len = (unsigned)(this.val);
 136            op &= 15;                           /* number of extra bits */
 137            if (op) {
 138                if (bits < op) {
 139                    hold += (unsigned long)(*in++) << bits;
 140                    bits += 8;
 141                }
 142                len += (unsigned)hold & ((1U << op) - 1);
 143                hold >>= op;
 144                bits -= op;
 145            }
 146            if (bits < 15) {
 147                hold += (unsigned long)(*in++) << bits;
 148                bits += 8;
 149                hold += (unsigned long)(*in++) << bits;
 150                bits += 8;
 151            }
 152            this = dcode[hold & dmask];
 153          dodist:
 154            op = (unsigned)(this.bits);
 155            hold >>= op;
 156            bits -= op;
 157            op = (unsigned)(this.op);
 158            if (op & 16) {                      /* distance base */
 159                dist = (unsigned)(this.val);
 160                op &= 15;                       /* number of extra bits */
 161                if (bits < op) {
 162                    hold += (unsigned long)(*in++) << bits;
 163                    bits += 8;
 164                    if (bits < op) {
 165                        hold += (unsigned long)(*in++) << bits;
 166                        bits += 8;
 167                    }
 168                }
 169                dist += (unsigned)hold & ((1U << op) - 1);
 170#ifdef INFLATE_STRICT
 171                if (dist > dmax) {
 172                    strm->msg = (char *)"invalid distance too far back";
 173                    state->mode = BAD;
 174                    break;
 175                }
 176#endif
 177                hold >>= op;
 178                bits -= op;
 179                op = (unsigned)(out - beg);     /* max distance in output */
 180                if (dist > op) {                /* see if copy from window */
 181                    op = dist - op;             /* distance back in window */
 182                    if (op > whave) {
 183                        strm->msg = (char *)"invalid distance too far back";
 184                        state->mode = BAD;
 185                        break;
 186                    }
 187                    from = window;
 188                    if (write == 0) {           /* very common case */
 189                        from += wsize - op;
 190                        if (op < len) {         /* some from window */
 191                            len -= op;
 192                            do {
 193                                *out++ = *from++;
 194                            } while (--op);
 195                            from = out - dist;  /* rest from output */
 196                        }
 197                    }
 198                    else if (write < op) {      /* wrap around window */
 199                        from += wsize + write - op;
 200                        op -= write;
 201                        if (op < len) {         /* some from end of window */
 202                            len -= op;
 203                            do {
 204                                *out++ = *from++;
 205                            } while (--op);
 206                            from = window;
 207                            if (write < len) {  /* some from start of window */
 208                                op = write;
 209                                len -= op;
 210                                do {
 211                                    *out++ = *from++;
 212                                } while (--op);
 213                                from = out - dist;      /* rest from output */
 214                            }
 215                        }
 216                    }
 217                    else {                      /* contiguous in window */
 218                        from += write - op;
 219                        if (op < len) {         /* some from window */
 220                            len -= op;
 221                            do {
 222                                *out++ = *from++;
 223                            } while (--op);
 224                            from = out - dist;  /* rest from output */
 225                        }
 226                    }
 227                    while (len > 2) {
 228                        *out++ = *from++;
 229                        *out++ = *from++;
 230                        *out++ = *from++;
 231                        len -= 3;
 232                    }
 233                    if (len) {
 234                        *out++ = *from++;
 235                        if (len > 1)
 236                            *out++ = *from++;
 237                    }
 238                }
 239                else {
 240                    unsigned short *sout;
 241                    unsigned long loops;
 242
 243                    from = out - dist;          /* copy direct from output */
 244                    /* minimum length is three */
 245                    /* Align out addr */
 246                    if (!((long)(out - 1) & 1)) {
 247                        *out++ = *from++;
 248                        len--;
 249                    }
 250                    sout = (unsigned short *)(out);
 251                    if (dist > 2) {
 252                        unsigned short *sfrom;
 253
 254                        sfrom = (unsigned short *)(from);
 255                        loops = len >> 1;
 256                        do {
 257                            if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
 258                                *sout++ = *sfrom++;
 259                            else
 260                                *sout++ = get_unaligned16(sfrom++);
 261                        } while (--loops);
 262                        out = (unsigned char *)sout;
 263                        from = (unsigned char *)sfrom;
 264                    } else { /* dist == 1 or dist == 2 */
 265                        unsigned short pat16;
 266
 267                        pat16 = *(sout-1);
 268                        if (dist == 1) {
 269                                union uu mm;
 270                                /* copy one char pattern to both bytes */
 271                                mm.us = pat16;
 272                                mm.b[0] = mm.b[1];
 273                                pat16 = mm.us;
 274                        }
 275                        loops = len >> 1;
 276                        do
 277                            *sout++ = pat16;
 278                        while (--loops);
 279                        out = (unsigned char *)sout;
 280                    }
 281                    if (len & 1)
 282                        *out++ = *from++;
 283                }
 284            }
 285            else if ((op & 64) == 0) {          /* 2nd level distance code */
 286                this = dcode[this.val + (hold & ((1U << op) - 1))];
 287                goto dodist;
 288            }
 289            else {
 290                strm->msg = (char *)"invalid distance code";
 291                state->mode = BAD;
 292                break;
 293            }
 294        }
 295        else if ((op & 64) == 0) {              /* 2nd level length code */
 296            this = lcode[this.val + (hold & ((1U << op) - 1))];
 297            goto dolen;
 298        }
 299        else if (op & 32) {                     /* end-of-block */
 300            state->mode = TYPE;
 301            break;
 302        }
 303        else {
 304            strm->msg = (char *)"invalid literal/length code";
 305            state->mode = BAD;
 306            break;
 307        }
 308    } while (in < last && out < end);
 309
 310    /* return unused bytes (on entry, bits < 8, so in won't go too far back) */
 311    len = bits >> 3;
 312    in -= len;
 313    bits -= len << 3;
 314    hold &= (1U << bits) - 1;
 315
 316    /* update state and return */
 317    strm->next_in = in;
 318    strm->next_out = out;
 319    strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last));
 320    strm->avail_out = (unsigned)(out < end ?
 321                                 257 + (end - out) : 257 - (out - end));
 322    state->hold = hold;
 323    state->bits = bits;
 324    return;
 325}
 326
 327/*
 328   inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe):
 329   - Using bit fields for code structure
 330   - Different op definition to avoid & for extra bits (do & for table bits)
 331   - Three separate decoding do-loops for direct, window, and write == 0
 332   - Special case for distance > 1 copies to do overlapped load and store copy
 333   - Explicit branch predictions (based on measured branch probabilities)
 334   - Deferring match copy and interspersed it with decoding subsequent codes
 335   - Swapping literal/length else
 336   - Swapping window/direct else
 337   - Larger unrolled copy loops (three is about right)
 338   - Moving len -= 3 statement into middle of loop
 339 */
 340
 341#endif /* !ASMINF */
 342