linux/lib/zstd/huf_compress.c
<<
>>
Prefs
   1/*
   2 * Huffman encoder, part of New Generation Entropy library
   3 * Copyright (C) 2013-2016, Yann Collet.
   4 *
   5 * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
   6 *
   7 * Redistribution and use in source and binary forms, with or without
   8 * modification, are permitted provided that the following conditions are
   9 * met:
  10 *
  11 *   * Redistributions of source code must retain the above copyright
  12 * notice, this list of conditions and the following disclaimer.
  13 *   * Redistributions in binary form must reproduce the above
  14 * copyright notice, this list of conditions and the following disclaimer
  15 * in the documentation and/or other materials provided with the
  16 * distribution.
  17 *
  18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29 *
  30 * This program is free software; you can redistribute it and/or modify it under
  31 * the terms of the GNU General Public License version 2 as published by the
  32 * Free Software Foundation. This program is dual-licensed; you may select
  33 * either version 2 of the GNU General Public License ("GPL") or BSD license
  34 * ("BSD").
  35 *
  36 * You can contact the author at :
  37 * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
  38 */
  39
  40/* **************************************************************
  41*  Includes
  42****************************************************************/
  43#include "bitstream.h"
  44#include "fse.h" /* header compression */
  45#include "huf.h"
  46#include <linux/kernel.h>
  47#include <linux/string.h> /* memcpy, memset */
  48
  49/* **************************************************************
  50*  Error Management
  51****************************************************************/
  52#define HUF_STATIC_ASSERT(c)                                   \
  53        {                                                      \
  54                enum { HUF_static_assert = 1 / (int)(!!(c)) }; \
  55        } /* use only *after* variable declarations */
  56#define CHECK_V_F(e, f)     \
  57        size_t const e = f; \
  58        if (ERR_isError(e)) \
  59        return f
  60#define CHECK_F(f)                        \
  61        {                                 \
  62                CHECK_V_F(_var_err__, f); \
  63        }
  64
  65/* **************************************************************
  66*  Utils
  67****************************************************************/
  68unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
  69{
  70        return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1);
  71}
  72
  73/* *******************************************************
  74*  HUF : Huffman block compression
  75*********************************************************/
  76/* HUF_compressWeights() :
  77 * Same as FSE_compress(), but dedicated to huff0's weights compression.
  78 * The use case needs much less stack memory.
  79 * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX.
  80 */
  81#define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6
  82size_t HUF_compressWeights_wksp(void *dst, size_t dstSize, const void *weightTable, size_t wtSize, void *workspace, size_t workspaceSize)
  83{
  84        BYTE *const ostart = (BYTE *)dst;
  85        BYTE *op = ostart;
  86        BYTE *const oend = ostart + dstSize;
  87
  88        U32 maxSymbolValue = HUF_TABLELOG_MAX;
  89        U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER;
  90
  91        FSE_CTable *CTable;
  92        U32 *count;
  93        S16 *norm;
  94        size_t spaceUsed32 = 0;
  95
  96        HUF_STATIC_ASSERT(sizeof(FSE_CTable) == sizeof(U32));
  97
  98        CTable = (FSE_CTable *)((U32 *)workspace + spaceUsed32);
  99        spaceUsed32 += FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX);
 100        count = (U32 *)workspace + spaceUsed32;
 101        spaceUsed32 += HUF_TABLELOG_MAX + 1;
 102        norm = (S16 *)((U32 *)workspace + spaceUsed32);
 103        spaceUsed32 += ALIGN(sizeof(S16) * (HUF_TABLELOG_MAX + 1), sizeof(U32)) >> 2;
 104
 105        if ((spaceUsed32 << 2) > workspaceSize)
 106                return ERROR(tableLog_tooLarge);
 107        workspace = (U32 *)workspace + spaceUsed32;
 108        workspaceSize -= (spaceUsed32 << 2);
 109
 110        /* init conditions */
 111        if (wtSize <= 1)
 112                return 0; /* Not compressible */
 113
 114        /* Scan input and build symbol stats */
 115        {
 116                CHECK_V_F(maxCount, FSE_count_simple(count, &maxSymbolValue, weightTable, wtSize));
 117                if (maxCount == wtSize)
 118                        return 1; /* only a single symbol in src : rle */
 119                if (maxCount == 1)
 120                        return 0; /* each symbol present maximum once => not compressible */
 121        }
 122
 123        tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue);
 124        CHECK_F(FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue));
 125
 126        /* Write table description header */
 127        {
 128                CHECK_V_F(hSize, FSE_writeNCount(op, oend - op, norm, maxSymbolValue, tableLog));
 129                op += hSize;
 130        }
 131
 132        /* Compress */
 133        CHECK_F(FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, workspace, workspaceSize));
 134        {
 135                CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, weightTable, wtSize, CTable));
 136                if (cSize == 0)
 137                        return 0; /* not enough space for compressed data */
 138                op += cSize;
 139        }
 140
 141        return op - ostart;
 142}
 143
 144struct HUF_CElt_s {
 145        U16 val;
 146        BYTE nbBits;
 147}; /* typedef'd to HUF_CElt within "huf.h" */
 148
 149/*! HUF_writeCTable_wksp() :
 150        `CTable` : Huffman tree to save, using huf representation.
 151        @return : size of saved CTable */
 152size_t HUF_writeCTable_wksp(void *dst, size_t maxDstSize, const HUF_CElt *CTable, U32 maxSymbolValue, U32 huffLog, void *workspace, size_t workspaceSize)
 153{
 154        BYTE *op = (BYTE *)dst;
 155        U32 n;
 156
 157        BYTE *bitsToWeight;
 158        BYTE *huffWeight;
 159        size_t spaceUsed32 = 0;
 160
 161        bitsToWeight = (BYTE *)((U32 *)workspace + spaceUsed32);
 162        spaceUsed32 += ALIGN(HUF_TABLELOG_MAX + 1, sizeof(U32)) >> 2;
 163        huffWeight = (BYTE *)((U32 *)workspace + spaceUsed32);
 164        spaceUsed32 += ALIGN(HUF_SYMBOLVALUE_MAX, sizeof(U32)) >> 2;
 165
 166        if ((spaceUsed32 << 2) > workspaceSize)
 167                return ERROR(tableLog_tooLarge);
 168        workspace = (U32 *)workspace + spaceUsed32;
 169        workspaceSize -= (spaceUsed32 << 2);
 170
 171        /* check conditions */
 172        if (maxSymbolValue > HUF_SYMBOLVALUE_MAX)
 173                return ERROR(maxSymbolValue_tooLarge);
 174
 175        /* convert to weight */
 176        bitsToWeight[0] = 0;
 177        for (n = 1; n < huffLog + 1; n++)
 178                bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
 179        for (n = 0; n < maxSymbolValue; n++)
 180                huffWeight[n] = bitsToWeight[CTable[n].nbBits];
 181
 182        /* attempt weights compression by FSE */
 183        {
 184                CHECK_V_F(hSize, HUF_compressWeights_wksp(op + 1, maxDstSize - 1, huffWeight, maxSymbolValue, workspace, workspaceSize));
 185                if ((hSize > 1) & (hSize < maxSymbolValue / 2)) { /* FSE compressed */
 186                        op[0] = (BYTE)hSize;
 187                        return hSize + 1;
 188                }
 189        }
 190
 191        /* write raw values as 4-bits (max : 15) */
 192        if (maxSymbolValue > (256 - 128))
 193                return ERROR(GENERIC); /* should not happen : likely means source cannot be compressed */
 194        if (((maxSymbolValue + 1) / 2) + 1 > maxDstSize)
 195                return ERROR(dstSize_tooSmall); /* not enough space within dst buffer */
 196        op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue - 1));
 197        huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */
 198        for (n = 0; n < maxSymbolValue; n += 2)
 199                op[(n / 2) + 1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n + 1]);
 200        return ((maxSymbolValue + 1) / 2) + 1;
 201}
 202
 203size_t HUF_readCTable_wksp(HUF_CElt *CTable, U32 maxSymbolValue, const void *src, size_t srcSize, void *workspace, size_t workspaceSize)
 204{
 205        U32 *rankVal;
 206        BYTE *huffWeight;
 207        U32 tableLog = 0;
 208        U32 nbSymbols = 0;
 209        size_t readSize;
 210        size_t spaceUsed32 = 0;
 211
 212        rankVal = (U32 *)workspace + spaceUsed32;
 213        spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1;
 214        huffWeight = (BYTE *)((U32 *)workspace + spaceUsed32);
 215        spaceUsed32 += ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
 216
 217        if ((spaceUsed32 << 2) > workspaceSize)
 218                return ERROR(tableLog_tooLarge);
 219        workspace = (U32 *)workspace + spaceUsed32;
 220        workspaceSize -= (spaceUsed32 << 2);
 221
 222        /* get symbol weights */
 223        readSize = HUF_readStats_wksp(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize, workspace, workspaceSize);
 224        if (ERR_isError(readSize))
 225                return readSize;
 226
 227        /* check result */
 228        if (tableLog > HUF_TABLELOG_MAX)
 229                return ERROR(tableLog_tooLarge);
 230        if (nbSymbols > maxSymbolValue + 1)
 231                return ERROR(maxSymbolValue_tooSmall);
 232
 233        /* Prepare base value per rank */
 234        {
 235                U32 n, nextRankStart = 0;
 236                for (n = 1; n <= tableLog; n++) {
 237                        U32 curr = nextRankStart;
 238                        nextRankStart += (rankVal[n] << (n - 1));
 239                        rankVal[n] = curr;
 240                }
 241        }
 242
 243        /* fill nbBits */
 244        {
 245                U32 n;
 246                for (n = 0; n < nbSymbols; n++) {
 247                        const U32 w = huffWeight[n];
 248                        CTable[n].nbBits = (BYTE)(tableLog + 1 - w);
 249                }
 250        }
 251
 252        /* fill val */
 253        {
 254                U16 nbPerRank[HUF_TABLELOG_MAX + 2] = {0}; /* support w=0=>n=tableLog+1 */
 255                U16 valPerRank[HUF_TABLELOG_MAX + 2] = {0};
 256                {
 257                        U32 n;
 258                        for (n = 0; n < nbSymbols; n++)
 259                                nbPerRank[CTable[n].nbBits]++;
 260                }
 261                /* determine stating value per rank */
 262                valPerRank[tableLog + 1] = 0; /* for w==0 */
 263                {
 264                        U16 min = 0;
 265                        U32 n;
 266                        for (n = tableLog; n > 0; n--) { /* start at n=tablelog <-> w=1 */
 267                                valPerRank[n] = min;     /* get starting value within each rank */
 268                                min += nbPerRank[n];
 269                                min >>= 1;
 270                        }
 271                }
 272                /* assign value within rank, symbol order */
 273                {
 274                        U32 n;
 275                        for (n = 0; n <= maxSymbolValue; n++)
 276                                CTable[n].val = valPerRank[CTable[n].nbBits]++;
 277                }
 278        }
 279
 280        return readSize;
 281}
 282
 283typedef struct nodeElt_s {
 284        U32 count;
 285        U16 parent;
 286        BYTE byte;
 287        BYTE nbBits;
 288} nodeElt;
 289
 290static U32 HUF_setMaxHeight(nodeElt *huffNode, U32 lastNonNull, U32 maxNbBits)
 291{
 292        const U32 largestBits = huffNode[lastNonNull].nbBits;
 293        if (largestBits <= maxNbBits)
 294                return largestBits; /* early exit : no elt > maxNbBits */
 295
 296        /* there are several too large elements (at least >= 2) */
 297        {
 298                int totalCost = 0;
 299                const U32 baseCost = 1 << (largestBits - maxNbBits);
 300                U32 n = lastNonNull;
 301
 302                while (huffNode[n].nbBits > maxNbBits) {
 303                        totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits));
 304                        huffNode[n].nbBits = (BYTE)maxNbBits;
 305                        n--;
 306                } /* n stops at huffNode[n].nbBits <= maxNbBits */
 307                while (huffNode[n].nbBits == maxNbBits)
 308                        n--; /* n end at index of smallest symbol using < maxNbBits */
 309
 310                /* renorm totalCost */
 311                totalCost >>= (largestBits - maxNbBits); /* note : totalCost is necessarily a multiple of baseCost */
 312
 313                /* repay normalized cost */
 314                {
 315                        U32 const noSymbol = 0xF0F0F0F0;
 316                        U32 rankLast[HUF_TABLELOG_MAX + 2];
 317                        int pos;
 318
 319                        /* Get pos of last (smallest) symbol per rank */
 320                        memset(rankLast, 0xF0, sizeof(rankLast));
 321                        {
 322                                U32 currNbBits = maxNbBits;
 323                                for (pos = n; pos >= 0; pos--) {
 324                                        if (huffNode[pos].nbBits >= currNbBits)
 325                                                continue;
 326                                        currNbBits = huffNode[pos].nbBits; /* < maxNbBits */
 327                                        rankLast[maxNbBits - currNbBits] = pos;
 328                                }
 329                        }
 330
 331                        while (totalCost > 0) {
 332                                U32 nBitsToDecrease = BIT_highbit32(totalCost) + 1;
 333                                for (; nBitsToDecrease > 1; nBitsToDecrease--) {
 334                                        U32 highPos = rankLast[nBitsToDecrease];
 335                                        U32 lowPos = rankLast[nBitsToDecrease - 1];
 336                                        if (highPos == noSymbol)
 337                                                continue;
 338                                        if (lowPos == noSymbol)
 339                                                break;
 340                                        {
 341                                                U32 const highTotal = huffNode[highPos].count;
 342                                                U32 const lowTotal = 2 * huffNode[lowPos].count;
 343                                                if (highTotal <= lowTotal)
 344                                                        break;
 345                                        }
 346                                }
 347                                /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */
 348                                /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */
 349                                while ((nBitsToDecrease <= HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol))
 350                                        nBitsToDecrease++;
 351                                totalCost -= 1 << (nBitsToDecrease - 1);
 352                                if (rankLast[nBitsToDecrease - 1] == noSymbol)
 353                                        rankLast[nBitsToDecrease - 1] = rankLast[nBitsToDecrease]; /* this rank is no longer empty */
 354                                huffNode[rankLast[nBitsToDecrease]].nbBits++;
 355                                if (rankLast[nBitsToDecrease] == 0) /* special case, reached largest symbol */
 356                                        rankLast[nBitsToDecrease] = noSymbol;
 357                                else {
 358                                        rankLast[nBitsToDecrease]--;
 359                                        if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits - nBitsToDecrease)
 360                                                rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */
 361                                }
 362                        } /* while (totalCost > 0) */
 363
 364                        while (totalCost < 0) {                /* Sometimes, cost correction overshoot */
 365                                if (rankLast[1] == noSymbol) { /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0
 366                                                                  (using maxNbBits) */
 367                                        while (huffNode[n].nbBits == maxNbBits)
 368                                                n--;
 369                                        huffNode[n + 1].nbBits--;
 370                                        rankLast[1] = n + 1;
 371                                        totalCost++;
 372                                        continue;
 373                                }
 374                                huffNode[rankLast[1] + 1].nbBits--;
 375                                rankLast[1]++;
 376                                totalCost++;
 377                        }
 378                }
 379        } /* there are several too large elements (at least >= 2) */
 380
 381        return maxNbBits;
 382}
 383
 384typedef struct {
 385        U32 base;
 386        U32 curr;
 387} rankPos;
 388
 389static void HUF_sort(nodeElt *huffNode, const U32 *count, U32 maxSymbolValue)
 390{
 391        rankPos rank[32];
 392        U32 n;
 393
 394        memset(rank, 0, sizeof(rank));
 395        for (n = 0; n <= maxSymbolValue; n++) {
 396                U32 r = BIT_highbit32(count[n] + 1);
 397                rank[r].base++;
 398        }
 399        for (n = 30; n > 0; n--)
 400                rank[n - 1].base += rank[n].base;
 401        for (n = 0; n < 32; n++)
 402                rank[n].curr = rank[n].base;
 403        for (n = 0; n <= maxSymbolValue; n++) {
 404                U32 const c = count[n];
 405                U32 const r = BIT_highbit32(c + 1) + 1;
 406                U32 pos = rank[r].curr++;
 407                while ((pos > rank[r].base) && (c > huffNode[pos - 1].count))
 408                        huffNode[pos] = huffNode[pos - 1], pos--;
 409                huffNode[pos].count = c;
 410                huffNode[pos].byte = (BYTE)n;
 411        }
 412}
 413
 414/** HUF_buildCTable_wksp() :
 415 *  Same as HUF_buildCTable(), but using externally allocated scratch buffer.
 416 *  `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of 1024 unsigned.
 417 */
 418#define STARTNODE (HUF_SYMBOLVALUE_MAX + 1)
 419typedef nodeElt huffNodeTable[2 * HUF_SYMBOLVALUE_MAX + 1 + 1];
 420size_t HUF_buildCTable_wksp(HUF_CElt *tree, const U32 *count, U32 maxSymbolValue, U32 maxNbBits, void *workSpace, size_t wkspSize)
 421{
 422        nodeElt *const huffNode0 = (nodeElt *)workSpace;
 423        nodeElt *const huffNode = huffNode0 + 1;
 424        U32 n, nonNullRank;
 425        int lowS, lowN;
 426        U16 nodeNb = STARTNODE;
 427        U32 nodeRoot;
 428
 429        /* safety checks */
 430        if (wkspSize < sizeof(huffNodeTable))
 431                return ERROR(GENERIC); /* workSpace is not large enough */
 432        if (maxNbBits == 0)
 433                maxNbBits = HUF_TABLELOG_DEFAULT;
 434        if (maxSymbolValue > HUF_SYMBOLVALUE_MAX)
 435                return ERROR(GENERIC);
 436        memset(huffNode0, 0, sizeof(huffNodeTable));
 437
 438        /* sort, decreasing order */
 439        HUF_sort(huffNode, count, maxSymbolValue);
 440
 441        /* init for parents */
 442        nonNullRank = maxSymbolValue;
 443        while (huffNode[nonNullRank].count == 0)
 444                nonNullRank--;
 445        lowS = nonNullRank;
 446        nodeRoot = nodeNb + lowS - 1;
 447        lowN = nodeNb;
 448        huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS - 1].count;
 449        huffNode[lowS].parent = huffNode[lowS - 1].parent = nodeNb;
 450        nodeNb++;
 451        lowS -= 2;
 452        for (n = nodeNb; n <= nodeRoot; n++)
 453                huffNode[n].count = (U32)(1U << 30);
 454        huffNode0[0].count = (U32)(1U << 31); /* fake entry, strong barrier */
 455
 456        /* create parents */
 457        while (nodeNb <= nodeRoot) {
 458                U32 n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
 459                U32 n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
 460                huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count;
 461                huffNode[n1].parent = huffNode[n2].parent = nodeNb;
 462                nodeNb++;
 463        }
 464
 465        /* distribute weights (unlimited tree height) */
 466        huffNode[nodeRoot].nbBits = 0;
 467        for (n = nodeRoot - 1; n >= STARTNODE; n--)
 468                huffNode[n].nbBits = huffNode[huffNode[n].parent].nbBits + 1;
 469        for (n = 0; n <= nonNullRank; n++)
 470                huffNode[n].nbBits = huffNode[huffNode[n].parent].nbBits + 1;
 471
 472        /* enforce maxTableLog */
 473        maxNbBits = HUF_setMaxHeight(huffNode, nonNullRank, maxNbBits);
 474
 475        /* fill result into tree (val, nbBits) */
 476        {
 477                U16 nbPerRank[HUF_TABLELOG_MAX + 1] = {0};
 478                U16 valPerRank[HUF_TABLELOG_MAX + 1] = {0};
 479                if (maxNbBits > HUF_TABLELOG_MAX)
 480                        return ERROR(GENERIC); /* check fit into table */
 481                for (n = 0; n <= nonNullRank; n++)
 482                        nbPerRank[huffNode[n].nbBits]++;
 483                /* determine stating value per rank */
 484                {
 485                        U16 min = 0;
 486                        for (n = maxNbBits; n > 0; n--) {
 487                                valPerRank[n] = min; /* get starting value within each rank */
 488                                min += nbPerRank[n];
 489                                min >>= 1;
 490                        }
 491                }
 492                for (n = 0; n <= maxSymbolValue; n++)
 493                        tree[huffNode[n].byte].nbBits = huffNode[n].nbBits; /* push nbBits per symbol, symbol order */
 494                for (n = 0; n <= maxSymbolValue; n++)
 495                        tree[n].val = valPerRank[tree[n].nbBits]++; /* assign value within rank, symbol order */
 496        }
 497
 498        return maxNbBits;
 499}
 500
 501static size_t HUF_estimateCompressedSize(HUF_CElt *CTable, const unsigned *count, unsigned maxSymbolValue)
 502{
 503        size_t nbBits = 0;
 504        int s;
 505        for (s = 0; s <= (int)maxSymbolValue; ++s) {
 506                nbBits += CTable[s].nbBits * count[s];
 507        }
 508        return nbBits >> 3;
 509}
 510
 511static int HUF_validateCTable(const HUF_CElt *CTable, const unsigned *count, unsigned maxSymbolValue)
 512{
 513        int bad = 0;
 514        int s;
 515        for (s = 0; s <= (int)maxSymbolValue; ++s) {
 516                bad |= (count[s] != 0) & (CTable[s].nbBits == 0);
 517        }
 518        return !bad;
 519}
 520
 521static void HUF_encodeSymbol(BIT_CStream_t *bitCPtr, U32 symbol, const HUF_CElt *CTable)
 522{
 523        BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits);
 524}
 525
 526size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
 527
 528#define HUF_FLUSHBITS(s)  BIT_flushBits(s)
 529
 530#define HUF_FLUSHBITS_1(stream)                                            \
 531        if (sizeof((stream)->bitContainer) * 8 < HUF_TABLELOG_MAX * 2 + 7) \
 532        HUF_FLUSHBITS(stream)
 533
 534#define HUF_FLUSHBITS_2(stream)                                            \
 535        if (sizeof((stream)->bitContainer) * 8 < HUF_TABLELOG_MAX * 4 + 7) \
 536        HUF_FLUSHBITS(stream)
 537
 538size_t HUF_compress1X_usingCTable(void *dst, size_t dstSize, const void *src, size_t srcSize, const HUF_CElt *CTable)
 539{
 540        const BYTE *ip = (const BYTE *)src;
 541        BYTE *const ostart = (BYTE *)dst;
 542        BYTE *const oend = ostart + dstSize;
 543        BYTE *op = ostart;
 544        size_t n;
 545        BIT_CStream_t bitC;
 546
 547        /* init */
 548        if (dstSize < 8)
 549                return 0; /* not enough space to compress */
 550        {
 551                size_t const initErr = BIT_initCStream(&bitC, op, oend - op);
 552                if (HUF_isError(initErr))
 553                        return 0;
 554        }
 555
 556        n = srcSize & ~3; /* join to mod 4 */
 557        switch (srcSize & 3) {
 558        case 3: HUF_encodeSymbol(&bitC, ip[n + 2], CTable); HUF_FLUSHBITS_2(&bitC);
 559                fallthrough;
 560        case 2: HUF_encodeSymbol(&bitC, ip[n + 1], CTable); HUF_FLUSHBITS_1(&bitC);
 561                fallthrough;
 562        case 1: HUF_encodeSymbol(&bitC, ip[n + 0], CTable); HUF_FLUSHBITS(&bitC);
 563                fallthrough;
 564        case 0:
 565        default:;
 566        }
 567
 568        for (; n > 0; n -= 4) { /* note : n&3==0 at this stage */
 569                HUF_encodeSymbol(&bitC, ip[n - 1], CTable);
 570                HUF_FLUSHBITS_1(&bitC);
 571                HUF_encodeSymbol(&bitC, ip[n - 2], CTable);
 572                HUF_FLUSHBITS_2(&bitC);
 573                HUF_encodeSymbol(&bitC, ip[n - 3], CTable);
 574                HUF_FLUSHBITS_1(&bitC);
 575                HUF_encodeSymbol(&bitC, ip[n - 4], CTable);
 576                HUF_FLUSHBITS(&bitC);
 577        }
 578
 579        return BIT_closeCStream(&bitC);
 580}
 581
 582size_t HUF_compress4X_usingCTable(void *dst, size_t dstSize, const void *src, size_t srcSize, const HUF_CElt *CTable)
 583{
 584        size_t const segmentSize = (srcSize + 3) / 4; /* first 3 segments */
 585        const BYTE *ip = (const BYTE *)src;
 586        const BYTE *const iend = ip + srcSize;
 587        BYTE *const ostart = (BYTE *)dst;
 588        BYTE *const oend = ostart + dstSize;
 589        BYTE *op = ostart;
 590
 591        if (dstSize < 6 + 1 + 1 + 1 + 8)
 592                return 0; /* minimum space to compress successfully */
 593        if (srcSize < 12)
 594                return 0; /* no saving possible : too small input */
 595        op += 6;          /* jumpTable */
 596
 597        {
 598                CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend - op, ip, segmentSize, CTable));
 599                if (cSize == 0)
 600                        return 0;
 601                ZSTD_writeLE16(ostart, (U16)cSize);
 602                op += cSize;
 603        }
 604
 605        ip += segmentSize;
 606        {
 607                CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend - op, ip, segmentSize, CTable));
 608                if (cSize == 0)
 609                        return 0;
 610                ZSTD_writeLE16(ostart + 2, (U16)cSize);
 611                op += cSize;
 612        }
 613
 614        ip += segmentSize;
 615        {
 616                CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend - op, ip, segmentSize, CTable));
 617                if (cSize == 0)
 618                        return 0;
 619                ZSTD_writeLE16(ostart + 4, (U16)cSize);
 620                op += cSize;
 621        }
 622
 623        ip += segmentSize;
 624        {
 625                CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend - op, ip, iend - ip, CTable));
 626                if (cSize == 0)
 627                        return 0;
 628                op += cSize;
 629        }
 630
 631        return op - ostart;
 632}
 633
 634static size_t HUF_compressCTable_internal(BYTE *const ostart, BYTE *op, BYTE *const oend, const void *src, size_t srcSize, unsigned singleStream,
 635                                          const HUF_CElt *CTable)
 636{
 637        size_t const cSize =
 638            singleStream ? HUF_compress1X_usingCTable(op, oend - op, src, srcSize, CTable) : HUF_compress4X_usingCTable(op, oend - op, src, srcSize, CTable);
 639        if (HUF_isError(cSize)) {
 640                return cSize;
 641        }
 642        if (cSize == 0) {
 643                return 0;
 644        } /* uncompressible */
 645        op += cSize;
 646        /* check compressibility */
 647        if ((size_t)(op - ostart) >= srcSize - 1) {
 648                return 0;
 649        }
 650        return op - ostart;
 651}
 652
 653/* `workSpace` must a table of at least 1024 unsigned */
 654static size_t HUF_compress_internal(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog,
 655                                    unsigned singleStream, void *workSpace, size_t wkspSize, HUF_CElt *oldHufTable, HUF_repeat *repeat, int preferRepeat)
 656{
 657        BYTE *const ostart = (BYTE *)dst;
 658        BYTE *const oend = ostart + dstSize;
 659        BYTE *op = ostart;
 660
 661        U32 *count;
 662        size_t const countSize = sizeof(U32) * (HUF_SYMBOLVALUE_MAX + 1);
 663        HUF_CElt *CTable;
 664        size_t const CTableSize = sizeof(HUF_CElt) * (HUF_SYMBOLVALUE_MAX + 1);
 665
 666        /* checks & inits */
 667        if (wkspSize < sizeof(huffNodeTable) + countSize + CTableSize)
 668                return ERROR(GENERIC);
 669        if (!srcSize)
 670                return 0; /* Uncompressed (note : 1 means rle, so first byte must be correct) */
 671        if (!dstSize)
 672                return 0; /* cannot fit within dst budget */
 673        if (srcSize > HUF_BLOCKSIZE_MAX)
 674                return ERROR(srcSize_wrong); /* curr block size limit */
 675        if (huffLog > HUF_TABLELOG_MAX)
 676                return ERROR(tableLog_tooLarge);
 677        if (!maxSymbolValue)
 678                maxSymbolValue = HUF_SYMBOLVALUE_MAX;
 679        if (!huffLog)
 680                huffLog = HUF_TABLELOG_DEFAULT;
 681
 682        count = (U32 *)workSpace;
 683        workSpace = (BYTE *)workSpace + countSize;
 684        wkspSize -= countSize;
 685        CTable = (HUF_CElt *)workSpace;
 686        workSpace = (BYTE *)workSpace + CTableSize;
 687        wkspSize -= CTableSize;
 688
 689        /* Heuristic : If we don't need to check the validity of the old table use the old table for small inputs */
 690        if (preferRepeat && repeat && *repeat == HUF_repeat_valid) {
 691                return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable);
 692        }
 693
 694        /* Scan input and build symbol stats */
 695        {
 696                CHECK_V_F(largest, FSE_count_wksp(count, &maxSymbolValue, (const BYTE *)src, srcSize, (U32 *)workSpace));
 697                if (largest == srcSize) {
 698                        *ostart = ((const BYTE *)src)[0];
 699                        return 1;
 700                } /* single symbol, rle */
 701                if (largest <= (srcSize >> 7) + 1)
 702                        return 0; /* Fast heuristic : not compressible enough */
 703        }
 704
 705        /* Check validity of previous table */
 706        if (repeat && *repeat == HUF_repeat_check && !HUF_validateCTable(oldHufTable, count, maxSymbolValue)) {
 707                *repeat = HUF_repeat_none;
 708        }
 709        /* Heuristic : use existing table for small inputs */
 710        if (preferRepeat && repeat && *repeat != HUF_repeat_none) {
 711                return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable);
 712        }
 713
 714        /* Build Huffman Tree */
 715        huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
 716        {
 717                CHECK_V_F(maxBits, HUF_buildCTable_wksp(CTable, count, maxSymbolValue, huffLog, workSpace, wkspSize));
 718                huffLog = (U32)maxBits;
 719                /* Zero the unused symbols so we can check it for validity */
 720                memset(CTable + maxSymbolValue + 1, 0, CTableSize - (maxSymbolValue + 1) * sizeof(HUF_CElt));
 721        }
 722
 723        /* Write table description header */
 724        {
 725                CHECK_V_F(hSize, HUF_writeCTable_wksp(op, dstSize, CTable, maxSymbolValue, huffLog, workSpace, wkspSize));
 726                /* Check if using the previous table will be beneficial */
 727                if (repeat && *repeat != HUF_repeat_none) {
 728                        size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, count, maxSymbolValue);
 729                        size_t const newSize = HUF_estimateCompressedSize(CTable, count, maxSymbolValue);
 730                        if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) {
 731                                return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable);
 732                        }
 733                }
 734                /* Use the new table */
 735                if (hSize + 12ul >= srcSize) {
 736                        return 0;
 737                }
 738                op += hSize;
 739                if (repeat) {
 740                        *repeat = HUF_repeat_none;
 741                }
 742                if (oldHufTable) {
 743                        memcpy(oldHufTable, CTable, CTableSize);
 744                } /* Save the new table */
 745        }
 746        return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, CTable);
 747}
 748
 749size_t HUF_compress1X_wksp(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog, void *workSpace,
 750                           size_t wkspSize)
 751{
 752        return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace, wkspSize, NULL, NULL, 0);
 753}
 754
 755size_t HUF_compress1X_repeat(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog, void *workSpace,
 756                             size_t wkspSize, HUF_CElt *hufTable, HUF_repeat *repeat, int preferRepeat)
 757{
 758        return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace, wkspSize, hufTable, repeat,
 759                                     preferRepeat);
 760}
 761
 762size_t HUF_compress4X_wksp(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog, void *workSpace,
 763                           size_t wkspSize)
 764{
 765        return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace, wkspSize, NULL, NULL, 0);
 766}
 767
 768size_t HUF_compress4X_repeat(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog, void *workSpace,
 769                             size_t wkspSize, HUF_CElt *hufTable, HUF_repeat *repeat, int preferRepeat)
 770{
 771        return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace, wkspSize, hufTable, repeat,
 772                                     preferRepeat);
 773}
 774