uboot/lib/zstd/huf_decompress.c
<<
>>
Prefs
   1// SPDX-License-Identifier: (GPL-2.0 or BSD-2-Clause)
   2/*
   3 * Huffman decoder, part of New Generation Entropy library
   4 * Copyright (C) 2013-2016, Yann Collet.
   5 *
   6 * You can contact the author at :
   7 * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
   8 */
   9
  10/* **************************************************************
  11*  Compiler specifics
  12****************************************************************/
  13#define FORCE_INLINE static __always_inline
  14
  15/* **************************************************************
  16*  Dependencies
  17****************************************************************/
  18#include "bitstream.h" /* BIT_* */
  19#include "fse.h"       /* header compression */
  20#include "huf.h"
  21#include <linux/compiler.h>
  22#include <linux/kernel.h>
  23#include <linux/string.h> /* memcpy, memset */
  24
  25/* **************************************************************
  26*  Error Management
  27****************************************************************/
  28#define HUF_STATIC_ASSERT(c)                                   \
  29        {                                                      \
  30                enum { HUF_static_assert = 1 / (int)(!!(c)) }; \
  31        } /* use only *after* variable declarations */
  32
  33/*-***************************/
  34/*  generic DTableDesc       */
  35/*-***************************/
  36
  37typedef struct {
  38        BYTE maxTableLog;
  39        BYTE tableType;
  40        BYTE tableLog;
  41        BYTE reserved;
  42} DTableDesc;
  43
  44static DTableDesc HUF_getDTableDesc(const HUF_DTable *table)
  45{
  46        DTableDesc dtd;
  47        memcpy(&dtd, table, sizeof(dtd));
  48        return dtd;
  49}
  50
  51/*-***************************/
  52/*  single-symbol decoding   */
  53/*-***************************/
  54
  55typedef struct {
  56        BYTE byte;
  57        BYTE nbBits;
  58} HUF_DEltX2; /* single-symbol decoding */
  59
  60size_t HUF_readDTableX2_wksp(HUF_DTable *DTable, const void *src, size_t srcSize, void *workspace, size_t workspaceSize)
  61{
  62        U32 tableLog = 0;
  63        U32 nbSymbols = 0;
  64        size_t iSize;
  65        void *const dtPtr = DTable + 1;
  66        HUF_DEltX2 *const dt = (HUF_DEltX2 *)dtPtr;
  67
  68        U32 *rankVal;
  69        BYTE *huffWeight;
  70        size_t spaceUsed32 = 0;
  71
  72        rankVal = (U32 *)workspace + spaceUsed32;
  73        spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1;
  74        huffWeight = (BYTE *)((U32 *)workspace + spaceUsed32);
  75        spaceUsed32 += ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
  76
  77        if ((spaceUsed32 << 2) > workspaceSize)
  78                return ERROR(tableLog_tooLarge);
  79        workspace = (U32 *)workspace + spaceUsed32;
  80        workspaceSize -= (spaceUsed32 << 2);
  81
  82        HUF_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
  83        /* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */
  84
  85        iSize = HUF_readStats_wksp(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize, workspace, workspaceSize);
  86        if (HUF_isError(iSize))
  87                return iSize;
  88
  89        /* Table header */
  90        {
  91                DTableDesc dtd = HUF_getDTableDesc(DTable);
  92                if (tableLog > (U32)(dtd.maxTableLog + 1))
  93                        return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */
  94                dtd.tableType = 0;
  95                dtd.tableLog = (BYTE)tableLog;
  96                memcpy(DTable, &dtd, sizeof(dtd));
  97        }
  98
  99        /* Calculate starting value for each rank */
 100        {
 101                U32 n, nextRankStart = 0;
 102                for (n = 1; n < tableLog + 1; n++) {
 103                        U32 const curr = nextRankStart;
 104                        nextRankStart += (rankVal[n] << (n - 1));
 105                        rankVal[n] = curr;
 106                }
 107        }
 108
 109        /* fill DTable */
 110        {
 111                U32 n;
 112                for (n = 0; n < nbSymbols; n++) {
 113                        U32 const w = huffWeight[n];
 114                        U32 const length = (1 << w) >> 1;
 115                        U32 u;
 116                        HUF_DEltX2 D;
 117                        D.byte = (BYTE)n;
 118                        D.nbBits = (BYTE)(tableLog + 1 - w);
 119                        for (u = rankVal[w]; u < rankVal[w] + length; u++)
 120                                dt[u] = D;
 121                        rankVal[w] += length;
 122                }
 123        }
 124
 125        return iSize;
 126}
 127
 128static BYTE HUF_decodeSymbolX2(BIT_DStream_t *Dstream, const HUF_DEltX2 *dt, const U32 dtLog)
 129{
 130        size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
 131        BYTE const c = dt[val].byte;
 132        BIT_skipBits(Dstream, dt[val].nbBits);
 133        return c;
 134}
 135
 136#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) *ptr++ = HUF_decodeSymbolX2(DStreamPtr, dt, dtLog)
 137
 138#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr)         \
 139        if (ZSTD_64bits() || (HUF_TABLELOG_MAX <= 12)) \
 140        HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
 141
 142#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
 143        if (ZSTD_64bits())                     \
 144        HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
 145
 146FORCE_INLINE size_t HUF_decodeStreamX2(BYTE *p, BIT_DStream_t *const bitDPtr, BYTE *const pEnd, const HUF_DEltX2 *const dt, const U32 dtLog)
 147{
 148        BYTE *const pStart = p;
 149
 150        /* up to 4 symbols at a time */
 151        while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd - 4)) {
 152                HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
 153                HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
 154                HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
 155                HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
 156        }
 157
 158        /* closer to the end */
 159        while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd))
 160                HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
 161
 162        /* no more data to retrieve from bitstream, hence no need to reload */
 163        while (p < pEnd)
 164                HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
 165
 166        return pEnd - pStart;
 167}
 168
 169static size_t HUF_decompress1X2_usingDTable_internal(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
 170{
 171        BYTE *op = (BYTE *)dst;
 172        BYTE *const oend = op + dstSize;
 173        const void *dtPtr = DTable + 1;
 174        const HUF_DEltX2 *const dt = (const HUF_DEltX2 *)dtPtr;
 175        BIT_DStream_t bitD;
 176        DTableDesc const dtd = HUF_getDTableDesc(DTable);
 177        U32 const dtLog = dtd.tableLog;
 178
 179        {
 180                size_t const errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize);
 181                if (HUF_isError(errorCode))
 182                        return errorCode;
 183        }
 184
 185        HUF_decodeStreamX2(op, &bitD, oend, dt, dtLog);
 186
 187        /* check */
 188        if (!BIT_endOfDStream(&bitD))
 189                return ERROR(corruption_detected);
 190
 191        return dstSize;
 192}
 193
 194size_t HUF_decompress1X2_usingDTable(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
 195{
 196        DTableDesc dtd = HUF_getDTableDesc(DTable);
 197        if (dtd.tableType != 0)
 198                return ERROR(GENERIC);
 199        return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
 200}
 201
 202size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable *DCtx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize)
 203{
 204        const BYTE *ip = (const BYTE *)cSrc;
 205
 206        size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize, workspace, workspaceSize);
 207        if (HUF_isError(hSize))
 208                return hSize;
 209        if (hSize >= cSrcSize)
 210                return ERROR(srcSize_wrong);
 211        ip += hSize;
 212        cSrcSize -= hSize;
 213
 214        return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx);
 215}
 216
 217static size_t HUF_decompress4X2_usingDTable_internal(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
 218{
 219        /* Check */
 220        if (cSrcSize < 10)
 221                return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
 222
 223        {
 224                const BYTE *const istart = (const BYTE *)cSrc;
 225                BYTE *const ostart = (BYTE *)dst;
 226                BYTE *const oend = ostart + dstSize;
 227                const void *const dtPtr = DTable + 1;
 228                const HUF_DEltX2 *const dt = (const HUF_DEltX2 *)dtPtr;
 229
 230                /* Init */
 231                BIT_DStream_t bitD1;
 232                BIT_DStream_t bitD2;
 233                BIT_DStream_t bitD3;
 234                BIT_DStream_t bitD4;
 235                size_t const length1 = ZSTD_readLE16(istart);
 236                size_t const length2 = ZSTD_readLE16(istart + 2);
 237                size_t const length3 = ZSTD_readLE16(istart + 4);
 238                size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
 239                const BYTE *const istart1 = istart + 6; /* jumpTable */
 240                const BYTE *const istart2 = istart1 + length1;
 241                const BYTE *const istart3 = istart2 + length2;
 242                const BYTE *const istart4 = istart3 + length3;
 243                const size_t segmentSize = (dstSize + 3) / 4;
 244                BYTE *const opStart2 = ostart + segmentSize;
 245                BYTE *const opStart3 = opStart2 + segmentSize;
 246                BYTE *const opStart4 = opStart3 + segmentSize;
 247                BYTE *op1 = ostart;
 248                BYTE *op2 = opStart2;
 249                BYTE *op3 = opStart3;
 250                BYTE *op4 = opStart4;
 251                U32 endSignal;
 252                DTableDesc const dtd = HUF_getDTableDesc(DTable);
 253                U32 const dtLog = dtd.tableLog;
 254
 255                if (length4 > cSrcSize)
 256                        return ERROR(corruption_detected); /* overflow */
 257                {
 258                        size_t const errorCode = BIT_initDStream(&bitD1, istart1, length1);
 259                        if (HUF_isError(errorCode))
 260                                return errorCode;
 261                }
 262                {
 263                        size_t const errorCode = BIT_initDStream(&bitD2, istart2, length2);
 264                        if (HUF_isError(errorCode))
 265                                return errorCode;
 266                }
 267                {
 268                        size_t const errorCode = BIT_initDStream(&bitD3, istart3, length3);
 269                        if (HUF_isError(errorCode))
 270                                return errorCode;
 271                }
 272                {
 273                        size_t const errorCode = BIT_initDStream(&bitD4, istart4, length4);
 274                        if (HUF_isError(errorCode))
 275                                return errorCode;
 276                }
 277
 278                /* 16-32 symbols per loop (4-8 symbols per stream) */
 279                endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
 280                for (; (endSignal == BIT_DStream_unfinished) && (op4 < (oend - 7));) {
 281                        HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
 282                        HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
 283                        HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
 284                        HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
 285                        HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
 286                        HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
 287                        HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
 288                        HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
 289                        HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
 290                        HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
 291                        HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
 292                        HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
 293                        HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
 294                        HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
 295                        HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
 296                        HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
 297                        endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
 298                }
 299
 300                /* check corruption */
 301                if (op1 > opStart2)
 302                        return ERROR(corruption_detected);
 303                if (op2 > opStart3)
 304                        return ERROR(corruption_detected);
 305                if (op3 > opStart4)
 306                        return ERROR(corruption_detected);
 307                /* note : op4 supposed already verified within main loop */
 308
 309                /* finish bitStreams one by one */
 310                HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
 311                HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
 312                HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
 313                HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog);
 314
 315                /* check */
 316                endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
 317                if (!endSignal)
 318                        return ERROR(corruption_detected);
 319
 320                /* decoded size */
 321                return dstSize;
 322        }
 323}
 324
 325size_t HUF_decompress4X2_usingDTable(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
 326{
 327        DTableDesc dtd = HUF_getDTableDesc(DTable);
 328        if (dtd.tableType != 0)
 329                return ERROR(GENERIC);
 330        return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
 331}
 332
 333size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize)
 334{
 335        const BYTE *ip = (const BYTE *)cSrc;
 336
 337        size_t const hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize, workspace, workspaceSize);
 338        if (HUF_isError(hSize))
 339                return hSize;
 340        if (hSize >= cSrcSize)
 341                return ERROR(srcSize_wrong);
 342        ip += hSize;
 343        cSrcSize -= hSize;
 344
 345        return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx);
 346}
 347
 348/* *************************/
 349/* double-symbols decoding */
 350/* *************************/
 351typedef struct {
 352        U16 sequence;
 353        BYTE nbBits;
 354        BYTE length;
 355} HUF_DEltX4; /* double-symbols decoding */
 356
 357typedef struct {
 358        BYTE symbol;
 359        BYTE weight;
 360} sortedSymbol_t;
 361
 362/* HUF_fillDTableX4Level2() :
 363 * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */
 364static void HUF_fillDTableX4Level2(HUF_DEltX4 *DTable, U32 sizeLog, const U32 consumed, const U32 *rankValOrigin, const int minWeight,
 365                                   const sortedSymbol_t *sortedSymbols, const U32 sortedListSize, U32 nbBitsBaseline, U16 baseSeq)
 366{
 367        HUF_DEltX4 DElt;
 368        U32 rankVal[HUF_TABLELOG_MAX + 1];
 369
 370        /* get pre-calculated rankVal */
 371        memcpy(rankVal, rankValOrigin, sizeof(rankVal));
 372
 373        /* fill skipped values */
 374        if (minWeight > 1) {
 375                U32 i, skipSize = rankVal[minWeight];
 376                ZSTD_writeLE16(&(DElt.sequence), baseSeq);
 377                DElt.nbBits = (BYTE)(consumed);
 378                DElt.length = 1;
 379                for (i = 0; i < skipSize; i++)
 380                        DTable[i] = DElt;
 381        }
 382
 383        /* fill DTable */
 384        {
 385                U32 s;
 386                for (s = 0; s < sortedListSize; s++) { /* note : sortedSymbols already skipped */
 387                        const U32 symbol = sortedSymbols[s].symbol;
 388                        const U32 weight = sortedSymbols[s].weight;
 389                        const U32 nbBits = nbBitsBaseline - weight;
 390                        const U32 length = 1 << (sizeLog - nbBits);
 391                        const U32 start = rankVal[weight];
 392                        U32 i = start;
 393                        const U32 end = start + length;
 394
 395                        ZSTD_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8)));
 396                        DElt.nbBits = (BYTE)(nbBits + consumed);
 397                        DElt.length = 2;
 398                        do {
 399                                DTable[i++] = DElt;
 400                        } while (i < end); /* since length >= 1 */
 401
 402                        rankVal[weight] += length;
 403                }
 404        }
 405}
 406
 407typedef U32 rankVal_t[HUF_TABLELOG_MAX][HUF_TABLELOG_MAX + 1];
 408typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
 409
 410static void HUF_fillDTableX4(HUF_DEltX4 *DTable, const U32 targetLog, const sortedSymbol_t *sortedList, const U32 sortedListSize, const U32 *rankStart,
 411                             rankVal_t rankValOrigin, const U32 maxWeight, const U32 nbBitsBaseline)
 412{
 413        U32 rankVal[HUF_TABLELOG_MAX + 1];
 414        const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */
 415        const U32 minBits = nbBitsBaseline - maxWeight;
 416        U32 s;
 417
 418        memcpy(rankVal, rankValOrigin, sizeof(rankVal));
 419
 420        /* fill DTable */
 421        for (s = 0; s < sortedListSize; s++) {
 422                const U16 symbol = sortedList[s].symbol;
 423                const U32 weight = sortedList[s].weight;
 424                const U32 nbBits = nbBitsBaseline - weight;
 425                const U32 start = rankVal[weight];
 426                const U32 length = 1 << (targetLog - nbBits);
 427
 428                if (targetLog - nbBits >= minBits) { /* enough room for a second symbol */
 429                        U32 sortedRank;
 430                        int minWeight = nbBits + scaleLog;
 431                        if (minWeight < 1)
 432                                minWeight = 1;
 433                        sortedRank = rankStart[minWeight];
 434                        HUF_fillDTableX4Level2(DTable + start, targetLog - nbBits, nbBits, rankValOrigin[nbBits], minWeight, sortedList + sortedRank,
 435                                               sortedListSize - sortedRank, nbBitsBaseline, symbol);
 436                } else {
 437                        HUF_DEltX4 DElt;
 438                        ZSTD_writeLE16(&(DElt.sequence), symbol);
 439                        DElt.nbBits = (BYTE)(nbBits);
 440                        DElt.length = 1;
 441                        {
 442                                U32 const end = start + length;
 443                                U32 u;
 444                                for (u = start; u < end; u++)
 445                                        DTable[u] = DElt;
 446                        }
 447                }
 448                rankVal[weight] += length;
 449        }
 450}
 451
 452size_t HUF_readDTableX4_wksp(HUF_DTable *DTable, const void *src, size_t srcSize, void *workspace, size_t workspaceSize)
 453{
 454        U32 tableLog, maxW, sizeOfSort, nbSymbols;
 455        DTableDesc dtd = HUF_getDTableDesc(DTable);
 456        U32 const maxTableLog = dtd.maxTableLog;
 457        size_t iSize;
 458        void *dtPtr = DTable + 1; /* force compiler to avoid strict-aliasing */
 459        HUF_DEltX4 *const dt = (HUF_DEltX4 *)dtPtr;
 460        U32 *rankStart;
 461
 462        rankValCol_t *rankVal;
 463        U32 *rankStats;
 464        U32 *rankStart0;
 465        sortedSymbol_t *sortedSymbol;
 466        BYTE *weightList;
 467        size_t spaceUsed32 = 0;
 468
 469        HUF_STATIC_ASSERT((sizeof(rankValCol_t) & 3) == 0);
 470
 471        rankVal = (rankValCol_t *)((U32 *)workspace + spaceUsed32);
 472        spaceUsed32 += (sizeof(rankValCol_t) * HUF_TABLELOG_MAX) >> 2;
 473        rankStats = (U32 *)workspace + spaceUsed32;
 474        spaceUsed32 += HUF_TABLELOG_MAX + 1;
 475        rankStart0 = (U32 *)workspace + spaceUsed32;
 476        spaceUsed32 += HUF_TABLELOG_MAX + 2;
 477        sortedSymbol = (sortedSymbol_t *)((U32 *)workspace + spaceUsed32);
 478        spaceUsed32 += ALIGN(sizeof(sortedSymbol_t) * (HUF_SYMBOLVALUE_MAX + 1), sizeof(U32)) >> 2;
 479        weightList = (BYTE *)((U32 *)workspace + spaceUsed32);
 480        spaceUsed32 += ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
 481
 482        if ((spaceUsed32 << 2) > workspaceSize)
 483                return ERROR(tableLog_tooLarge);
 484        workspace = (U32 *)workspace + spaceUsed32;
 485        workspaceSize -= (spaceUsed32 << 2);
 486
 487        rankStart = rankStart0 + 1;
 488        memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1));
 489
 490        HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */
 491        if (maxTableLog > HUF_TABLELOG_MAX)
 492                return ERROR(tableLog_tooLarge);
 493        /* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */
 494
 495        iSize = HUF_readStats_wksp(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize, workspace, workspaceSize);
 496        if (HUF_isError(iSize))
 497                return iSize;
 498
 499        /* check result */
 500        if (tableLog > maxTableLog)
 501                return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */
 502
 503        /* find maxWeight */
 504        for (maxW = tableLog; rankStats[maxW] == 0; maxW--) {
 505        } /* necessarily finds a solution before 0 */
 506
 507        /* Get start index of each weight */
 508        {
 509                U32 w, nextRankStart = 0;
 510                for (w = 1; w < maxW + 1; w++) {
 511                        U32 curr = nextRankStart;
 512                        nextRankStart += rankStats[w];
 513                        rankStart[w] = curr;
 514                }
 515                rankStart[0] = nextRankStart; /* put all 0w symbols at the end of sorted list*/
 516                sizeOfSort = nextRankStart;
 517        }
 518
 519        /* sort symbols by weight */
 520        {
 521                U32 s;
 522                for (s = 0; s < nbSymbols; s++) {
 523                        U32 const w = weightList[s];
 524                        U32 const r = rankStart[w]++;
 525                        sortedSymbol[r].symbol = (BYTE)s;
 526                        sortedSymbol[r].weight = (BYTE)w;
 527                }
 528                rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */
 529        }
 530
 531        /* Build rankVal */
 532        {
 533                U32 *const rankVal0 = rankVal[0];
 534                {
 535                        int const rescale = (maxTableLog - tableLog) - 1; /* tableLog <= maxTableLog */
 536                        U32 nextRankVal = 0;
 537                        U32 w;
 538                        for (w = 1; w < maxW + 1; w++) {
 539                                U32 curr = nextRankVal;
 540                                nextRankVal += rankStats[w] << (w + rescale);
 541                                rankVal0[w] = curr;
 542                        }
 543                }
 544                {
 545                        U32 const minBits = tableLog + 1 - maxW;
 546                        U32 consumed;
 547                        for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) {
 548                                U32 *const rankValPtr = rankVal[consumed];
 549                                U32 w;
 550                                for (w = 1; w < maxW + 1; w++) {
 551                                        rankValPtr[w] = rankVal0[w] >> consumed;
 552                                }
 553                        }
 554                }
 555        }
 556
 557        HUF_fillDTableX4(dt, maxTableLog, sortedSymbol, sizeOfSort, rankStart0, rankVal, maxW, tableLog + 1);
 558
 559        dtd.tableLog = (BYTE)maxTableLog;
 560        dtd.tableType = 1;
 561        memcpy(DTable, &dtd, sizeof(dtd));
 562        return iSize;
 563}
 564
 565static U32 HUF_decodeSymbolX4(void *op, BIT_DStream_t *DStream, const HUF_DEltX4 *dt, const U32 dtLog)
 566{
 567        size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
 568        memcpy(op, dt + val, 2);
 569        BIT_skipBits(DStream, dt[val].nbBits);
 570        return dt[val].length;
 571}
 572
 573static U32 HUF_decodeLastSymbolX4(void *op, BIT_DStream_t *DStream, const HUF_DEltX4 *dt, const U32 dtLog)
 574{
 575        size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
 576        memcpy(op, dt + val, 1);
 577        if (dt[val].length == 1)
 578                BIT_skipBits(DStream, dt[val].nbBits);
 579        else {
 580                if (DStream->bitsConsumed < (sizeof(DStream->bitContainer) * 8)) {
 581                        BIT_skipBits(DStream, dt[val].nbBits);
 582                        if (DStream->bitsConsumed > (sizeof(DStream->bitContainer) * 8))
 583                                /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
 584                                DStream->bitsConsumed = (sizeof(DStream->bitContainer) * 8);
 585                }
 586        }
 587        return 1;
 588}
 589
 590#define HUF_DECODE_SYMBOLX4_0(ptr, DStreamPtr) ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
 591
 592#define HUF_DECODE_SYMBOLX4_1(ptr, DStreamPtr)         \
 593        if (ZSTD_64bits() || (HUF_TABLELOG_MAX <= 12)) \
 594        ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
 595
 596#define HUF_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \
 597        if (ZSTD_64bits())                     \
 598        ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
 599
 600FORCE_INLINE size_t HUF_decodeStreamX4(BYTE *p, BIT_DStream_t *bitDPtr, BYTE *const pEnd, const HUF_DEltX4 *const dt, const U32 dtLog)
 601{
 602        BYTE *const pStart = p;
 603
 604        /* up to 8 symbols at a time */
 605        while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd - (sizeof(bitDPtr->bitContainer) - 1))) {
 606                HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
 607                HUF_DECODE_SYMBOLX4_1(p, bitDPtr);
 608                HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
 609                HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
 610        }
 611
 612        /* closer to end : up to 2 symbols at a time */
 613        while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd - 2))
 614                HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
 615
 616        while (p <= pEnd - 2)
 617                HUF_DECODE_SYMBOLX4_0(p, bitDPtr); /* no need to reload : reached the end of DStream */
 618
 619        if (p < pEnd)
 620                p += HUF_decodeLastSymbolX4(p, bitDPtr, dt, dtLog);
 621
 622        return p - pStart;
 623}
 624
 625static size_t HUF_decompress1X4_usingDTable_internal(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
 626{
 627        BIT_DStream_t bitD;
 628
 629        /* Init */
 630        {
 631                size_t const errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize);
 632                if (HUF_isError(errorCode))
 633                        return errorCode;
 634        }
 635
 636        /* decode */
 637        {
 638                BYTE *const ostart = (BYTE *)dst;
 639                BYTE *const oend = ostart + dstSize;
 640                const void *const dtPtr = DTable + 1; /* force compiler to not use strict-aliasing */
 641                const HUF_DEltX4 *const dt = (const HUF_DEltX4 *)dtPtr;
 642                DTableDesc const dtd = HUF_getDTableDesc(DTable);
 643                HUF_decodeStreamX4(ostart, &bitD, oend, dt, dtd.tableLog);
 644        }
 645
 646        /* check */
 647        if (!BIT_endOfDStream(&bitD))
 648                return ERROR(corruption_detected);
 649
 650        /* decoded size */
 651        return dstSize;
 652}
 653
 654size_t HUF_decompress1X4_usingDTable(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
 655{
 656        DTableDesc dtd = HUF_getDTableDesc(DTable);
 657        if (dtd.tableType != 1)
 658                return ERROR(GENERIC);
 659        return HUF_decompress1X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
 660}
 661
 662size_t HUF_decompress1X4_DCtx_wksp(HUF_DTable *DCtx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize)
 663{
 664        const BYTE *ip = (const BYTE *)cSrc;
 665
 666        size_t const hSize = HUF_readDTableX4_wksp(DCtx, cSrc, cSrcSize, workspace, workspaceSize);
 667        if (HUF_isError(hSize))
 668                return hSize;
 669        if (hSize >= cSrcSize)
 670                return ERROR(srcSize_wrong);
 671        ip += hSize;
 672        cSrcSize -= hSize;
 673
 674        return HUF_decompress1X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx);
 675}
 676
 677static size_t HUF_decompress4X4_usingDTable_internal(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
 678{
 679        if (cSrcSize < 10)
 680                return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
 681
 682        {
 683                const BYTE *const istart = (const BYTE *)cSrc;
 684                BYTE *const ostart = (BYTE *)dst;
 685                BYTE *const oend = ostart + dstSize;
 686                const void *const dtPtr = DTable + 1;
 687                const HUF_DEltX4 *const dt = (const HUF_DEltX4 *)dtPtr;
 688
 689                /* Init */
 690                BIT_DStream_t bitD1;
 691                BIT_DStream_t bitD2;
 692                BIT_DStream_t bitD3;
 693                BIT_DStream_t bitD4;
 694                size_t const length1 = ZSTD_readLE16(istart);
 695                size_t const length2 = ZSTD_readLE16(istart + 2);
 696                size_t const length3 = ZSTD_readLE16(istart + 4);
 697                size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
 698                const BYTE *const istart1 = istart + 6; /* jumpTable */
 699                const BYTE *const istart2 = istart1 + length1;
 700                const BYTE *const istart3 = istart2 + length2;
 701                const BYTE *const istart4 = istart3 + length3;
 702                size_t const segmentSize = (dstSize + 3) / 4;
 703                BYTE *const opStart2 = ostart + segmentSize;
 704                BYTE *const opStart3 = opStart2 + segmentSize;
 705                BYTE *const opStart4 = opStart3 + segmentSize;
 706                BYTE *op1 = ostart;
 707                BYTE *op2 = opStart2;
 708                BYTE *op3 = opStart3;
 709                BYTE *op4 = opStart4;
 710                U32 endSignal;
 711                DTableDesc const dtd = HUF_getDTableDesc(DTable);
 712                U32 const dtLog = dtd.tableLog;
 713
 714                if (length4 > cSrcSize)
 715                        return ERROR(corruption_detected); /* overflow */
 716                {
 717                        size_t const errorCode = BIT_initDStream(&bitD1, istart1, length1);
 718                        if (HUF_isError(errorCode))
 719                                return errorCode;
 720                }
 721                {
 722                        size_t const errorCode = BIT_initDStream(&bitD2, istart2, length2);
 723                        if (HUF_isError(errorCode))
 724                                return errorCode;
 725                }
 726                {
 727                        size_t const errorCode = BIT_initDStream(&bitD3, istart3, length3);
 728                        if (HUF_isError(errorCode))
 729                                return errorCode;
 730                }
 731                {
 732                        size_t const errorCode = BIT_initDStream(&bitD4, istart4, length4);
 733                        if (HUF_isError(errorCode))
 734                                return errorCode;
 735                }
 736
 737                /* 16-32 symbols per loop (4-8 symbols per stream) */
 738                endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
 739                for (; (endSignal == BIT_DStream_unfinished) & (op4 < (oend - (sizeof(bitD4.bitContainer) - 1)));) {
 740                        HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
 741                        HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
 742                        HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
 743                        HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
 744                        HUF_DECODE_SYMBOLX4_1(op1, &bitD1);
 745                        HUF_DECODE_SYMBOLX4_1(op2, &bitD2);
 746                        HUF_DECODE_SYMBOLX4_1(op3, &bitD3);
 747                        HUF_DECODE_SYMBOLX4_1(op4, &bitD4);
 748                        HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
 749                        HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
 750                        HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
 751                        HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
 752                        HUF_DECODE_SYMBOLX4_0(op1, &bitD1);
 753                        HUF_DECODE_SYMBOLX4_0(op2, &bitD2);
 754                        HUF_DECODE_SYMBOLX4_0(op3, &bitD3);
 755                        HUF_DECODE_SYMBOLX4_0(op4, &bitD4);
 756
 757                        endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
 758                }
 759
 760                /* check corruption */
 761                if (op1 > opStart2)
 762                        return ERROR(corruption_detected);
 763                if (op2 > opStart3)
 764                        return ERROR(corruption_detected);
 765                if (op3 > opStart4)
 766                        return ERROR(corruption_detected);
 767                /* note : op4 already verified within main loop */
 768
 769                /* finish bitStreams one by one */
 770                HUF_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog);
 771                HUF_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog);
 772                HUF_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog);
 773                HUF_decodeStreamX4(op4, &bitD4, oend, dt, dtLog);
 774
 775                /* check */
 776                {
 777                        U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
 778                        if (!endCheck)
 779                                return ERROR(corruption_detected);
 780                }
 781
 782                /* decoded size */
 783                return dstSize;
 784        }
 785}
 786
 787size_t HUF_decompress4X4_usingDTable(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
 788{
 789        DTableDesc dtd = HUF_getDTableDesc(DTable);
 790        if (dtd.tableType != 1)
 791                return ERROR(GENERIC);
 792        return HUF_decompress4X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
 793}
 794
 795size_t HUF_decompress4X4_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize)
 796{
 797        const BYTE *ip = (const BYTE *)cSrc;
 798
 799        size_t hSize = HUF_readDTableX4_wksp(dctx, cSrc, cSrcSize, workspace, workspaceSize);
 800        if (HUF_isError(hSize))
 801                return hSize;
 802        if (hSize >= cSrcSize)
 803                return ERROR(srcSize_wrong);
 804        ip += hSize;
 805        cSrcSize -= hSize;
 806
 807        return HUF_decompress4X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx);
 808}
 809
 810/* ********************************/
 811/* Generic decompression selector */
 812/* ********************************/
 813
 814size_t HUF_decompress1X_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
 815{
 816        DTableDesc const dtd = HUF_getDTableDesc(DTable);
 817        return dtd.tableType ? HUF_decompress1X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable)
 818                             : HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable);
 819}
 820
 821size_t HUF_decompress4X_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
 822{
 823        DTableDesc const dtd = HUF_getDTableDesc(DTable);
 824        return dtd.tableType ? HUF_decompress4X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable)
 825                             : HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable);
 826}
 827
 828typedef struct {
 829        U32 tableTime;
 830        U32 decode256Time;
 831} algo_time_t;
 832static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] = {
 833    /* single, double, quad */
 834    {{0, 0}, {1, 1}, {2, 2}},                /* Q==0 : impossible */
 835    {{0, 0}, {1, 1}, {2, 2}},                /* Q==1 : impossible */
 836    {{38, 130}, {1313, 74}, {2151, 38}},     /* Q == 2 : 12-18% */
 837    {{448, 128}, {1353, 74}, {2238, 41}},    /* Q == 3 : 18-25% */
 838    {{556, 128}, {1353, 74}, {2238, 47}},    /* Q == 4 : 25-32% */
 839    {{714, 128}, {1418, 74}, {2436, 53}},    /* Q == 5 : 32-38% */
 840    {{883, 128}, {1437, 74}, {2464, 61}},    /* Q == 6 : 38-44% */
 841    {{897, 128}, {1515, 75}, {2622, 68}},    /* Q == 7 : 44-50% */
 842    {{926, 128}, {1613, 75}, {2730, 75}},    /* Q == 8 : 50-56% */
 843    {{947, 128}, {1729, 77}, {3359, 77}},    /* Q == 9 : 56-62% */
 844    {{1107, 128}, {2083, 81}, {4006, 84}},   /* Q ==10 : 62-69% */
 845    {{1177, 128}, {2379, 87}, {4785, 88}},   /* Q ==11 : 69-75% */
 846    {{1242, 128}, {2415, 93}, {5155, 84}},   /* Q ==12 : 75-81% */
 847    {{1349, 128}, {2644, 106}, {5260, 106}}, /* Q ==13 : 81-87% */
 848    {{1455, 128}, {2422, 124}, {4174, 124}}, /* Q ==14 : 87-93% */
 849    {{722, 128}, {1891, 145}, {1936, 146}},  /* Q ==15 : 93-99% */
 850};
 851
 852/** HUF_selectDecoder() :
 853*   Tells which decoder is likely to decode faster,
 854*   based on a set of pre-determined metrics.
 855*   @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 .
 856*   Assumption : 0 < cSrcSize < dstSize <= 128 KB */
 857U32 HUF_selectDecoder(size_t dstSize, size_t cSrcSize)
 858{
 859        /* decoder timing evaluation */
 860        U32 const Q = (U32)(cSrcSize * 16 / dstSize); /* Q < 16 since dstSize > cSrcSize */
 861        U32 const D256 = (U32)(dstSize >> 8);
 862        U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
 863        U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
 864        DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, for cache eviction */
 865
 866        return DTime1 < DTime0;
 867}
 868
 869typedef size_t (*decompressionAlgo)(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize);
 870
 871size_t HUF_decompress4X_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize)
 872{
 873        /* validation checks */
 874        if (dstSize == 0)
 875                return ERROR(dstSize_tooSmall);
 876        if (cSrcSize > dstSize)
 877                return ERROR(corruption_detected); /* invalid */
 878        if (cSrcSize == dstSize) {
 879                memcpy(dst, cSrc, dstSize);
 880                return dstSize;
 881        } /* not compressed */
 882        if (cSrcSize == 1) {
 883                memset(dst, *(const BYTE *)cSrc, dstSize);
 884                return dstSize;
 885        } /* RLE */
 886
 887        {
 888                U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
 889                return algoNb ? HUF_decompress4X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize)
 890                              : HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize);
 891        }
 892}
 893
 894size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize)
 895{
 896        /* validation checks */
 897        if (dstSize == 0)
 898                return ERROR(dstSize_tooSmall);
 899        if ((cSrcSize >= dstSize) || (cSrcSize <= 1))
 900                return ERROR(corruption_detected); /* invalid */
 901
 902        {
 903                U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
 904                return algoNb ? HUF_decompress4X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize)
 905                              : HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize);
 906        }
 907}
 908
 909size_t HUF_decompress1X_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize)
 910{
 911        /* validation checks */
 912        if (dstSize == 0)
 913                return ERROR(dstSize_tooSmall);
 914        if (cSrcSize > dstSize)
 915                return ERROR(corruption_detected); /* invalid */
 916        if (cSrcSize == dstSize) {
 917                memcpy(dst, cSrc, dstSize);
 918                return dstSize;
 919        } /* not compressed */
 920        if (cSrcSize == 1) {
 921                memset(dst, *(const BYTE *)cSrc, dstSize);
 922                return dstSize;
 923        } /* RLE */
 924
 925        {
 926                U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
 927                return algoNb ? HUF_decompress1X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize)
 928                              : HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize);
 929        }
 930}
 931