LXR linux/fs/ntfs3/lib/decompress

   1/* SPDX-License-Identifier: GPL-2.0-or-later */
   2/*
   3 * decompress_common.h - Code shared by the XPRESS and LZX decompressors
   4 *
   5 * Copyright (C) 2015 Eric Biggers
   6 */
   7
   8#ifndef _LINUX_NTFS3_LIB_DECOMPRESS_COMMON_H
   9#define _LINUX_NTFS3_LIB_DECOMPRESS_COMMON_H
  10
  11#include <linux/string.h>
  12#include <linux/compiler.h>
  13#include <linux/types.h>
  14#include <linux/slab.h>
  15#include <asm/unaligned.h>
  16
  17
  18/* "Force inline" macro (not required, but helpful for performance)  */
  19#define forceinline __always_inline
  20
  21/* Enable whole-word match copying on selected architectures  */
  22#if defined(__i386__) || defined(__x86_64__) || defined(__ARM_FEATURE_UNALIGNED)
  23#  define FAST_UNALIGNED_ACCESS
  24#endif
  25
  26/* Size of a machine word  */
  27#define WORDBYTES (sizeof(size_t))
  28
  29static forceinline void
  30copy_unaligned_word(const void *src, void *dst)
  31{
  32        put_unaligned(get_unaligned((const size_t *)src), (size_t *)dst);
  33}
  34
  35
  36/* Generate a "word" with platform-dependent size whose bytes all contain the
  37 * value 'b'.
  38 */
  39static forceinline size_t repeat_byte(u8 b)
  40{
  41        size_t v;
  42
  43        v = b;
  44        v |= v << 8;
  45        v |= v << 16;
  46        v |= v << ((WORDBYTES == 8) ? 32 : 0);
  47        return v;
  48}
  49
  50/* Structure that encapsulates a block of in-memory data being interpreted as a
  51 * stream of bits, optionally with interwoven literal bytes.  Bits are assumed
  52 * to be stored in little endian 16-bit coding units, with the bits ordered high
  53 * to low.
  54 */
  55struct input_bitstream {
  56
  57        /* Bits that have been read from the input buffer.  The bits are
  58         * left-justified; the next bit is always bit 31.
  59         */
  60        u32 bitbuf;
  61
  62        /* Number of bits currently held in @bitbuf.  */
  63        u32 bitsleft;
  64
  65        /* Pointer to the next byte to be retrieved from the input buffer.  */
  66        const u8 *next;
  67
  68        /* Pointer to just past the end of the input buffer.  */
  69        const u8 *end;
  70};
  71
  72/* Initialize a bitstream to read from the specified input buffer.  */
  73static forceinline void init_input_bitstream(struct input_bitstream *is,
  74                                             const void *buffer, u32 size)
  75{
  76        is->bitbuf = 0;
  77        is->bitsleft = 0;
  78        is->next = buffer;
  79        is->end = is->next + size;
  80}
  81
  82/* Ensure the bit buffer variable for the bitstream contains at least @num_bits
  83 * bits.  Following this, bitstream_peek_bits() and/or bitstream_remove_bits()
  84 * may be called on the bitstream to peek or remove up to @num_bits bits.  Note
  85 * that @num_bits must be <= 16.
  86 */
  87static forceinline void bitstream_ensure_bits(struct input_bitstream *is,
  88                                              u32 num_bits)
  89{
  90        if (is->bitsleft < num_bits) {
  91                if (is->end - is->next >= 2) {
  92                        is->bitbuf |= (u32)get_unaligned_le16(is->next)
  93                                        << (16 - is->bitsleft);
  94                        is->next += 2;
  95                }
  96                is->bitsleft += 16;
  97        }
  98}
  99
 100/* Return the next @num_bits bits from the bitstream, without removing them.
 101 * There must be at least @num_bits remaining in the buffer variable, from a
 102 * previous call to bitstream_ensure_bits().
 103 */
 104static forceinline u32
 105bitstream_peek_bits(const struct input_bitstream *is, const u32 num_bits)
 106{
 107        return (is->bitbuf >> 1) >> (sizeof(is->bitbuf) * 8 - num_bits - 1);
 108}
 109
 110/* Remove @num_bits from the bitstream.  There must be at least @num_bits
 111 * remaining in the buffer variable, from a previous call to
 112 * bitstream_ensure_bits().
 113 */
 114static forceinline void
 115bitstream_remove_bits(struct input_bitstream *is, u32 num_bits)
 116{
 117        is->bitbuf <<= num_bits;
 118        is->bitsleft -= num_bits;
 119}
 120
 121/* Remove and return @num_bits bits from the bitstream.  There must be at least
 122 * @num_bits remaining in the buffer variable, from a previous call to
 123 * bitstream_ensure_bits().
 124 */
 125static forceinline u32
 126bitstream_pop_bits(struct input_bitstream *is, u32 num_bits)
 127{
 128        u32 bits = bitstream_peek_bits(is, num_bits);
 129
 130        bitstream_remove_bits(is, num_bits);
 131        return bits;
 132}
 133
 134/* Read and return the next @num_bits bits from the bitstream.  */
 135static forceinline u32
 136bitstream_read_bits(struct input_bitstream *is, u32 num_bits)
 137{
 138        bitstream_ensure_bits(is, num_bits);
 139        return bitstream_pop_bits(is, num_bits);
 140}
 141
 142/* Read and return the next literal byte embedded in the bitstream.  */
 143static forceinline u8
 144bitstream_read_byte(struct input_bitstream *is)
 145{
 146        if (unlikely(is->end == is->next))
 147                return 0;
 148        return *is->next++;
 149}
 150
 151/* Read and return the next 16-bit integer embedded in the bitstream.  */
 152static forceinline u16
 153bitstream_read_u16(struct input_bitstream *is)
 154{
 155        u16 v;
 156
 157        if (unlikely(is->end - is->next < 2))
 158                return 0;
 159        v = get_unaligned_le16(is->next);
 160        is->next += 2;
 161        return v;
 162}
 163
 164/* Read and return the next 32-bit integer embedded in the bitstream.  */
 165static forceinline u32
 166bitstream_read_u32(struct input_bitstream *is)
 167{
 168        u32 v;
 169
 170        if (unlikely(is->end - is->next < 4))
 171                return 0;
 172        v = get_unaligned_le32(is->next);
 173        is->next += 4;
 174        return v;
 175}
 176
 177/* Read into @dst_buffer an array of literal bytes embedded in the bitstream.
 178 * Return either a pointer to the byte past the last written, or NULL if the
 179 * read overflows the input buffer.
 180 */
 181static forceinline void *bitstream_read_bytes(struct input_bitstream *is,
 182                                              void *dst_buffer, size_t count)
 183{
 184        if ((size_t)(is->end - is->next) < count)
 185                return NULL;
 186        memcpy(dst_buffer, is->next, count);
 187        is->next += count;
 188        return (u8 *)dst_buffer + count;
 189}
 190
 191/* Align the input bitstream on a coding-unit boundary.  */
 192static forceinline void bitstream_align(struct input_bitstream *is)
 193{
 194        is->bitsleft = 0;
 195        is->bitbuf = 0;
 196}
 197
 198extern int make_huffman_decode_table(u16 decode_table[], const u32 num_syms,
 199                                     const u32 num_bits, const u8 lens[],
 200                                     const u32 max_codeword_len,
 201                                     u16 working_space[]);
 202
 203
 204/* Reads and returns the next Huffman-encoded symbol from a bitstream.  If the
 205 * input data is exhausted, the Huffman symbol is decoded as if the missing bits
 206 * are all zeroes.
 207 */
 208static forceinline u32 read_huffsym(struct input_bitstream *istream,
 209                                         const u16 decode_table[],
 210                                         u32 table_bits,
 211                                         u32 max_codeword_len)
 212{
 213        u32 entry;
 214        u32 key_bits;
 215
 216        bitstream_ensure_bits(istream, max_codeword_len);
 217
 218        /* Index the decode table by the next table_bits bits of the input.  */
 219        key_bits = bitstream_peek_bits(istream, table_bits);
 220        entry = decode_table[key_bits];
 221        if (entry < 0xC000) {
 222                /* Fast case: The decode table directly provided the
 223                 * symbol and codeword length.  The low 11 bits are the
 224                 * symbol, and the high 5 bits are the codeword length.
 225                 */
 226                bitstream_remove_bits(istream, entry >> 11);
 227                return entry & 0x7FF;
 228        }
 229        /* Slow case: The codeword for the symbol is longer than
 230         * table_bits, so the symbol does not have an entry
 231         * directly in the first (1 << table_bits) entries of the
 232         * decode table.  Traverse the appropriate binary tree
 233         * bit-by-bit to decode the symbol.
 234         */
 235        bitstream_remove_bits(istream, table_bits);
 236        do {
 237                key_bits = (entry & 0x3FFF) + bitstream_pop_bits(istream, 1);
 238        } while ((entry = decode_table[key_bits]) >= 0xC000);
 239        return entry;
 240}
 241
 242/*
 243 * Copy an LZ77 match at (dst - offset) to dst.
 244 *
 245 * The length and offset must be already validated --- that is, (dst - offset)
 246 * can't underrun the output buffer, and (dst + length) can't overrun the output
 247 * buffer.  Also, the length cannot be 0.
 248 *
 249 * @bufend points to the byte past the end of the output buffer.  This function
 250 * won't write any data beyond this position.
 251 *
 252 * Returns dst + length.
 253 */
 254static forceinline u8 *lz_copy(u8 *dst, u32 length, u32 offset, const u8 *bufend,
 255                               u32 min_length)
 256{
 257        const u8 *src = dst - offset;
 258
 259        /*
 260         * Try to copy one machine word at a time.  On i386 and x86_64 this is
 261         * faster than copying one byte at a time, unless the data is
 262         * near-random and all the matches have very short lengths.  Note that
 263         * since this requires unaligned memory accesses, it won't necessarily
 264         * be faster on every architecture.
 265         *
 266         * Also note that we might copy more than the length of the match.  For
 267         * example, if a word is 8 bytes and the match is of length 5, then
 268         * we'll simply copy 8 bytes.  This is okay as long as we don't write
 269         * beyond the end of the output buffer, hence the check for (bufend -
 270         * end >= WORDBYTES - 1).
 271         */
 272#ifdef FAST_UNALIGNED_ACCESS
 273        u8 * const end = dst + length;
 274
 275        if (bufend - end >= (ptrdiff_t)(WORDBYTES - 1)) {
 276
 277                if (offset >= WORDBYTES) {
 278                        /* The source and destination words don't overlap.  */
 279
 280                        /* To improve branch prediction, one iteration of this
 281                         * loop is unrolled.  Most matches are short and will
 282                         * fail the first check.  But if that check passes, then
 283                         * it becomes increasing likely that the match is long
 284                         * and we'll need to continue copying.
 285                         */
 286
 287                        copy_unaligned_word(src, dst);
 288                        src += WORDBYTES;
 289                        dst += WORDBYTES;
 290
 291                        if (dst < end) {
 292                                do {
 293                                        copy_unaligned_word(src, dst);
 294                                        src += WORDBYTES;
 295                                        dst += WORDBYTES;
 296                                } while (dst < end);
 297                        }
 298                        return end;
 299                } else if (offset == 1) {
 300
 301                        /* Offset 1 matches are equivalent to run-length
 302                         * encoding of the previous byte.  This case is common
 303                         * if the data contains many repeated bytes.
 304                         */
 305                        size_t v = repeat_byte(*(dst - 1));
 306
 307                        do {
 308                                put_unaligned(v, (size_t *)dst);
 309                                src += WORDBYTES;
 310                                dst += WORDBYTES;
 311                        } while (dst < end);
 312                        return end;
 313                }
 314                /*
 315                 * We don't bother with special cases for other 'offset <
 316                 * WORDBYTES', which are usually rarer than 'offset == 1'.  Extra
 317                 * checks will just slow things down.  Actually, it's possible
 318                 * to handle all the 'offset < WORDBYTES' cases using the same
 319                 * code, but it still becomes more complicated doesn't seem any
 320                 * faster overall; it definitely slows down the more common
 321                 * 'offset == 1' case.
 322                 */
 323        }
 324#endif /* FAST_UNALIGNED_ACCESS */
 325
 326        /* Fall back to a bytewise copy.  */
 327
 328        if (min_length >= 2) {
 329                *dst++ = *src++;
 330                length--;
 331        }
 332        if (min_length >= 3) {
 333                *dst++ = *src++;
 334                length--;
 335        }
 336        do {
 337                *dst++ = *src++;
 338        } while (--length);
 339
 340        return dst;
 341}
 342
 343#endif /* _LINUX_NTFS3_LIB_DECOMPRESS_COMMON_H */
 344