linux/lib/zstd/fse_compress.c
<<
>>
Prefs
   1/*
   2 * FSE : Finite State Entropy encoder
   3 * Copyright (C) 2013-2015, Yann Collet.
   4 *
   5 * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
   6 *
   7 * Redistribution and use in source and binary forms, with or without
   8 * modification, are permitted provided that the following conditions are
   9 * met:
  10 *
  11 *   * Redistributions of source code must retain the above copyright
  12 * notice, this list of conditions and the following disclaimer.
  13 *   * Redistributions in binary form must reproduce the above
  14 * copyright notice, this list of conditions and the following disclaimer
  15 * in the documentation and/or other materials provided with the
  16 * distribution.
  17 *
  18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29 *
  30 * This program is free software; you can redistribute it and/or modify it under
  31 * the terms of the GNU General Public License version 2 as published by the
  32 * Free Software Foundation. This program is dual-licensed; you may select
  33 * either version 2 of the GNU General Public License ("GPL") or BSD license
  34 * ("BSD").
  35 *
  36 * You can contact the author at :
  37 * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
  38 */
  39
  40/* **************************************************************
  41*  Compiler specifics
  42****************************************************************/
  43#define FORCE_INLINE static __always_inline
  44
  45/* **************************************************************
  46*  Includes
  47****************************************************************/
  48#include "bitstream.h"
  49#include "fse.h"
  50#include <linux/compiler.h>
  51#include <linux/kernel.h>
  52#include <linux/math64.h>
  53#include <linux/string.h> /* memcpy, memset */
  54
  55/* **************************************************************
  56*  Error Management
  57****************************************************************/
  58#define FSE_STATIC_ASSERT(c)                                   \
  59        {                                                      \
  60                enum { FSE_static_assert = 1 / (int)(!!(c)) }; \
  61        } /* use only *after* variable declarations */
  62
  63/* **************************************************************
  64*  Templates
  65****************************************************************/
  66/*
  67  designed to be included
  68  for type-specific functions (template emulation in C)
  69  Objective is to write these functions only once, for improved maintenance
  70*/
  71
  72/* safety checks */
  73#ifndef FSE_FUNCTION_EXTENSION
  74#error "FSE_FUNCTION_EXTENSION must be defined"
  75#endif
  76#ifndef FSE_FUNCTION_TYPE
  77#error "FSE_FUNCTION_TYPE must be defined"
  78#endif
  79
  80/* Function names */
  81#define FSE_CAT(X, Y) X##Y
  82#define FSE_FUNCTION_NAME(X, Y) FSE_CAT(X, Y)
  83#define FSE_TYPE_NAME(X, Y) FSE_CAT(X, Y)
  84
  85/* Function templates */
  86
  87/* FSE_buildCTable_wksp() :
  88 * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
  89 * wkspSize should be sized to handle worst case situation, which is `1<<max_tableLog * sizeof(FSE_FUNCTION_TYPE)`
  90 * workSpace must also be properly aligned with FSE_FUNCTION_TYPE requirements
  91 */
  92size_t FSE_buildCTable_wksp(FSE_CTable *ct, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void *workspace, size_t workspaceSize)
  93{
  94        U32 const tableSize = 1 << tableLog;
  95        U32 const tableMask = tableSize - 1;
  96        void *const ptr = ct;
  97        U16 *const tableU16 = ((U16 *)ptr) + 2;
  98        void *const FSCT = ((U32 *)ptr) + 1 /* header */ + (tableLog ? tableSize >> 1 : 1);
  99        FSE_symbolCompressionTransform *const symbolTT = (FSE_symbolCompressionTransform *)(FSCT);
 100        U32 const step = FSE_TABLESTEP(tableSize);
 101        U32 highThreshold = tableSize - 1;
 102
 103        U32 *cumul;
 104        FSE_FUNCTION_TYPE *tableSymbol;
 105        size_t spaceUsed32 = 0;
 106
 107        cumul = (U32 *)workspace + spaceUsed32;
 108        spaceUsed32 += FSE_MAX_SYMBOL_VALUE + 2;
 109        tableSymbol = (FSE_FUNCTION_TYPE *)((U32 *)workspace + spaceUsed32);
 110        spaceUsed32 += ALIGN(sizeof(FSE_FUNCTION_TYPE) * ((size_t)1 << tableLog), sizeof(U32)) >> 2;
 111
 112        if ((spaceUsed32 << 2) > workspaceSize)
 113                return ERROR(tableLog_tooLarge);
 114        workspace = (U32 *)workspace + spaceUsed32;
 115        workspaceSize -= (spaceUsed32 << 2);
 116
 117        /* CTable header */
 118        tableU16[-2] = (U16)tableLog;
 119        tableU16[-1] = (U16)maxSymbolValue;
 120
 121        /* For explanations on how to distribute symbol values over the table :
 122        *  http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
 123
 124        /* symbol start positions */
 125        {
 126                U32 u;
 127                cumul[0] = 0;
 128                for (u = 1; u <= maxSymbolValue + 1; u++) {
 129                        if (normalizedCounter[u - 1] == -1) { /* Low proba symbol */
 130                                cumul[u] = cumul[u - 1] + 1;
 131                                tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u - 1);
 132                        } else {
 133                                cumul[u] = cumul[u - 1] + normalizedCounter[u - 1];
 134                        }
 135                }
 136                cumul[maxSymbolValue + 1] = tableSize + 1;
 137        }
 138
 139        /* Spread symbols */
 140        {
 141                U32 position = 0;
 142                U32 symbol;
 143                for (symbol = 0; symbol <= maxSymbolValue; symbol++) {
 144                        int nbOccurences;
 145                        for (nbOccurences = 0; nbOccurences < normalizedCounter[symbol]; nbOccurences++) {
 146                                tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol;
 147                                position = (position + step) & tableMask;
 148                                while (position > highThreshold)
 149                                        position = (position + step) & tableMask; /* Low proba area */
 150                        }
 151                }
 152
 153                if (position != 0)
 154                        return ERROR(GENERIC); /* Must have gone through all positions */
 155        }
 156
 157        /* Build table */
 158        {
 159                U32 u;
 160                for (u = 0; u < tableSize; u++) {
 161                        FSE_FUNCTION_TYPE s = tableSymbol[u];   /* note : static analyzer may not understand tableSymbol is properly initialized */
 162                        tableU16[cumul[s]++] = (U16)(tableSize + u); /* TableU16 : sorted by symbol order; gives next state value */
 163                }
 164        }
 165
 166        /* Build Symbol Transformation Table */
 167        {
 168                unsigned total = 0;
 169                unsigned s;
 170                for (s = 0; s <= maxSymbolValue; s++) {
 171                        switch (normalizedCounter[s]) {
 172                        case 0: break;
 173
 174                        case -1:
 175                        case 1:
 176                                symbolTT[s].deltaNbBits = (tableLog << 16) - (1 << tableLog);
 177                                symbolTT[s].deltaFindState = total - 1;
 178                                total++;
 179                                break;
 180                        default: {
 181                                U32 const maxBitsOut = tableLog - BIT_highbit32(normalizedCounter[s] - 1);
 182                                U32 const minStatePlus = normalizedCounter[s] << maxBitsOut;
 183                                symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus;
 184                                symbolTT[s].deltaFindState = total - normalizedCounter[s];
 185                                total += normalizedCounter[s];
 186                        }
 187                        }
 188                }
 189        }
 190
 191        return 0;
 192}
 193
 194/*-**************************************************************
 195*  FSE NCount encoding-decoding
 196****************************************************************/
 197size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
 198{
 199        size_t const maxHeaderSize = (((maxSymbolValue + 1) * tableLog) >> 3) + 3;
 200        return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */
 201}
 202
 203static size_t FSE_writeNCount_generic(void *header, size_t headerBufferSize, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
 204                                      unsigned writeIsSafe)
 205{
 206        BYTE *const ostart = (BYTE *)header;
 207        BYTE *out = ostart;
 208        BYTE *const oend = ostart + headerBufferSize;
 209        int nbBits;
 210        const int tableSize = 1 << tableLog;
 211        int remaining;
 212        int threshold;
 213        U32 bitStream;
 214        int bitCount;
 215        unsigned charnum = 0;
 216        int previous0 = 0;
 217
 218        bitStream = 0;
 219        bitCount = 0;
 220        /* Table Size */
 221        bitStream += (tableLog - FSE_MIN_TABLELOG) << bitCount;
 222        bitCount += 4;
 223
 224        /* Init */
 225        remaining = tableSize + 1; /* +1 for extra accuracy */
 226        threshold = tableSize;
 227        nbBits = tableLog + 1;
 228
 229        while (remaining > 1) { /* stops at 1 */
 230                if (previous0) {
 231                        unsigned start = charnum;
 232                        while (!normalizedCounter[charnum])
 233                                charnum++;
 234                        while (charnum >= start + 24) {
 235                                start += 24;
 236                                bitStream += 0xFFFFU << bitCount;
 237                                if ((!writeIsSafe) && (out > oend - 2))
 238                                        return ERROR(dstSize_tooSmall); /* Buffer overflow */
 239                                out[0] = (BYTE)bitStream;
 240                                out[1] = (BYTE)(bitStream >> 8);
 241                                out += 2;
 242                                bitStream >>= 16;
 243                        }
 244                        while (charnum >= start + 3) {
 245                                start += 3;
 246                                bitStream += 3 << bitCount;
 247                                bitCount += 2;
 248                        }
 249                        bitStream += (charnum - start) << bitCount;
 250                        bitCount += 2;
 251                        if (bitCount > 16) {
 252                                if ((!writeIsSafe) && (out > oend - 2))
 253                                        return ERROR(dstSize_tooSmall); /* Buffer overflow */
 254                                out[0] = (BYTE)bitStream;
 255                                out[1] = (BYTE)(bitStream >> 8);
 256                                out += 2;
 257                                bitStream >>= 16;
 258                                bitCount -= 16;
 259                        }
 260                }
 261                {
 262                        int count = normalizedCounter[charnum++];
 263                        int const max = (2 * threshold - 1) - remaining;
 264                        remaining -= count < 0 ? -count : count;
 265                        count++; /* +1 for extra accuracy */
 266                        if (count >= threshold)
 267                                count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
 268                        bitStream += count << bitCount;
 269                        bitCount += nbBits;
 270                        bitCount -= (count < max);
 271                        previous0 = (count == 1);
 272                        if (remaining < 1)
 273                                return ERROR(GENERIC);
 274                        while (remaining < threshold)
 275                                nbBits--, threshold >>= 1;
 276                }
 277                if (bitCount > 16) {
 278                        if ((!writeIsSafe) && (out > oend - 2))
 279                                return ERROR(dstSize_tooSmall); /* Buffer overflow */
 280                        out[0] = (BYTE)bitStream;
 281                        out[1] = (BYTE)(bitStream >> 8);
 282                        out += 2;
 283                        bitStream >>= 16;
 284                        bitCount -= 16;
 285                }
 286        }
 287
 288        /* flush remaining bitStream */
 289        if ((!writeIsSafe) && (out > oend - 2))
 290                return ERROR(dstSize_tooSmall); /* Buffer overflow */
 291        out[0] = (BYTE)bitStream;
 292        out[1] = (BYTE)(bitStream >> 8);
 293        out += (bitCount + 7) / 8;
 294
 295        if (charnum > maxSymbolValue + 1)
 296                return ERROR(GENERIC);
 297
 298        return (out - ostart);
 299}
 300
 301size_t FSE_writeNCount(void *buffer, size_t bufferSize, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
 302{
 303        if (tableLog > FSE_MAX_TABLELOG)
 304                return ERROR(tableLog_tooLarge); /* Unsupported */
 305        if (tableLog < FSE_MIN_TABLELOG)
 306                return ERROR(GENERIC); /* Unsupported */
 307
 308        if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog))
 309                return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0);
 310
 311        return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1);
 312}
 313
 314/*-**************************************************************
 315*  Counting histogram
 316****************************************************************/
 317/*! FSE_count_simple
 318        This function counts byte values within `src`, and store the histogram into table `count`.
 319        It doesn't use any additional memory.
 320        But this function is unsafe : it doesn't check that all values within `src` can fit into `count`.
 321        For this reason, prefer using a table `count` with 256 elements.
 322        @return : count of most numerous element
 323*/
 324size_t FSE_count_simple(unsigned *count, unsigned *maxSymbolValuePtr, const void *src, size_t srcSize)
 325{
 326        const BYTE *ip = (const BYTE *)src;
 327        const BYTE *const end = ip + srcSize;
 328        unsigned maxSymbolValue = *maxSymbolValuePtr;
 329        unsigned max = 0;
 330
 331        memset(count, 0, (maxSymbolValue + 1) * sizeof(*count));
 332        if (srcSize == 0) {
 333                *maxSymbolValuePtr = 0;
 334                return 0;
 335        }
 336
 337        while (ip < end)
 338                count[*ip++]++;
 339
 340        while (!count[maxSymbolValue])
 341                maxSymbolValue--;
 342        *maxSymbolValuePtr = maxSymbolValue;
 343
 344        {
 345                U32 s;
 346                for (s = 0; s <= maxSymbolValue; s++)
 347                        if (count[s] > max)
 348                                max = count[s];
 349        }
 350
 351        return (size_t)max;
 352}
 353
 354/* FSE_count_parallel_wksp() :
 355 * Same as FSE_count_parallel(), but using an externally provided scratch buffer.
 356 * `workSpace` size must be a minimum of `1024 * sizeof(unsigned)`` */
 357static size_t FSE_count_parallel_wksp(unsigned *count, unsigned *maxSymbolValuePtr, const void *source, size_t sourceSize, unsigned checkMax,
 358                                      unsigned *const workSpace)
 359{
 360        const BYTE *ip = (const BYTE *)source;
 361        const BYTE *const iend = ip + sourceSize;
 362        unsigned maxSymbolValue = *maxSymbolValuePtr;
 363        unsigned max = 0;
 364        U32 *const Counting1 = workSpace;
 365        U32 *const Counting2 = Counting1 + 256;
 366        U32 *const Counting3 = Counting2 + 256;
 367        U32 *const Counting4 = Counting3 + 256;
 368
 369        memset(Counting1, 0, 4 * 256 * sizeof(unsigned));
 370
 371        /* safety checks */
 372        if (!sourceSize) {
 373                memset(count, 0, maxSymbolValue + 1);
 374                *maxSymbolValuePtr = 0;
 375                return 0;
 376        }
 377        if (!maxSymbolValue)
 378                maxSymbolValue = 255; /* 0 == default */
 379
 380        /* by stripes of 16 bytes */
 381        {
 382                U32 cached = ZSTD_read32(ip);
 383                ip += 4;
 384                while (ip < iend - 15) {
 385                        U32 c = cached;
 386                        cached = ZSTD_read32(ip);
 387                        ip += 4;
 388                        Counting1[(BYTE)c]++;
 389                        Counting2[(BYTE)(c >> 8)]++;
 390                        Counting3[(BYTE)(c >> 16)]++;
 391                        Counting4[c >> 24]++;
 392                        c = cached;
 393                        cached = ZSTD_read32(ip);
 394                        ip += 4;
 395                        Counting1[(BYTE)c]++;
 396                        Counting2[(BYTE)(c >> 8)]++;
 397                        Counting3[(BYTE)(c >> 16)]++;
 398                        Counting4[c >> 24]++;
 399                        c = cached;
 400                        cached = ZSTD_read32(ip);
 401                        ip += 4;
 402                        Counting1[(BYTE)c]++;
 403                        Counting2[(BYTE)(c >> 8)]++;
 404                        Counting3[(BYTE)(c >> 16)]++;
 405                        Counting4[c >> 24]++;
 406                        c = cached;
 407                        cached = ZSTD_read32(ip);
 408                        ip += 4;
 409                        Counting1[(BYTE)c]++;
 410                        Counting2[(BYTE)(c >> 8)]++;
 411                        Counting3[(BYTE)(c >> 16)]++;
 412                        Counting4[c >> 24]++;
 413                }
 414                ip -= 4;
 415        }
 416
 417        /* finish last symbols */
 418        while (ip < iend)
 419                Counting1[*ip++]++;
 420
 421        if (checkMax) { /* verify stats will fit into destination table */
 422                U32 s;
 423                for (s = 255; s > maxSymbolValue; s--) {
 424                        Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
 425                        if (Counting1[s])
 426                                return ERROR(maxSymbolValue_tooSmall);
 427                }
 428        }
 429
 430        {
 431                U32 s;
 432                for (s = 0; s <= maxSymbolValue; s++) {
 433                        count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
 434                        if (count[s] > max)
 435                                max = count[s];
 436                }
 437        }
 438
 439        while (!count[maxSymbolValue])
 440                maxSymbolValue--;
 441        *maxSymbolValuePtr = maxSymbolValue;
 442        return (size_t)max;
 443}
 444
 445/* FSE_countFast_wksp() :
 446 * Same as FSE_countFast(), but using an externally provided scratch buffer.
 447 * `workSpace` size must be table of >= `1024` unsigned */
 448size_t FSE_countFast_wksp(unsigned *count, unsigned *maxSymbolValuePtr, const void *source, size_t sourceSize, unsigned *workSpace)
 449{
 450        if (sourceSize < 1500)
 451                return FSE_count_simple(count, maxSymbolValuePtr, source, sourceSize);
 452        return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 0, workSpace);
 453}
 454
 455/* FSE_count_wksp() :
 456 * Same as FSE_count(), but using an externally provided scratch buffer.
 457 * `workSpace` size must be table of >= `1024` unsigned */
 458size_t FSE_count_wksp(unsigned *count, unsigned *maxSymbolValuePtr, const void *source, size_t sourceSize, unsigned *workSpace)
 459{
 460        if (*maxSymbolValuePtr < 255)
 461                return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 1, workSpace);
 462        *maxSymbolValuePtr = 255;
 463        return FSE_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace);
 464}
 465
 466/*-**************************************************************
 467*  FSE Compression Code
 468****************************************************************/
 469/*! FSE_sizeof_CTable() :
 470        FSE_CTable is a variable size structure which contains :
 471        `U16 tableLog;`
 472        `U16 maxSymbolValue;`
 473        `U16 nextStateNumber[1 << tableLog];`                         // This size is variable
 474        `FSE_symbolCompressionTransform symbolTT[maxSymbolValue+1];`  // This size is variable
 475Allocation is manual (C standard does not support variable-size structures).
 476*/
 477size_t FSE_sizeof_CTable(unsigned maxSymbolValue, unsigned tableLog)
 478{
 479        if (tableLog > FSE_MAX_TABLELOG)
 480                return ERROR(tableLog_tooLarge);
 481        return FSE_CTABLE_SIZE_U32(tableLog, maxSymbolValue) * sizeof(U32);
 482}
 483
 484/* provides the minimum logSize to safely represent a distribution */
 485static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
 486{
 487        U32 minBitsSrc = BIT_highbit32((U32)(srcSize - 1)) + 1;
 488        U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2;
 489        U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols;
 490        return minBits;
 491}
 492
 493unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus)
 494{
 495        U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus;
 496        U32 tableLog = maxTableLog;
 497        U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue);
 498        if (tableLog == 0)
 499                tableLog = FSE_DEFAULT_TABLELOG;
 500        if (maxBitsSrc < tableLog)
 501                tableLog = maxBitsSrc; /* Accuracy can be reduced */
 502        if (minBits > tableLog)
 503                tableLog = minBits; /* Need a minimum to safely represent all symbol values */
 504        if (tableLog < FSE_MIN_TABLELOG)
 505                tableLog = FSE_MIN_TABLELOG;
 506        if (tableLog > FSE_MAX_TABLELOG)
 507                tableLog = FSE_MAX_TABLELOG;
 508        return tableLog;
 509}
 510
 511unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
 512{
 513        return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 2);
 514}
 515
 516/* Secondary normalization method.
 517   To be used when primary method fails. */
 518
 519static size_t FSE_normalizeM2(short *norm, U32 tableLog, const unsigned *count, size_t total, U32 maxSymbolValue)
 520{
 521        short const NOT_YET_ASSIGNED = -2;
 522        U32 s;
 523        U32 distributed = 0;
 524        U32 ToDistribute;
 525
 526        /* Init */
 527        U32 const lowThreshold = (U32)(total >> tableLog);
 528        U32 lowOne = (U32)((total * 3) >> (tableLog + 1));
 529
 530        for (s = 0; s <= maxSymbolValue; s++) {
 531                if (count[s] == 0) {
 532                        norm[s] = 0;
 533                        continue;
 534                }
 535                if (count[s] <= lowThreshold) {
 536                        norm[s] = -1;
 537                        distributed++;
 538                        total -= count[s];
 539                        continue;
 540                }
 541                if (count[s] <= lowOne) {
 542                        norm[s] = 1;
 543                        distributed++;
 544                        total -= count[s];
 545                        continue;
 546                }
 547
 548                norm[s] = NOT_YET_ASSIGNED;
 549        }
 550        ToDistribute = (1 << tableLog) - distributed;
 551
 552        if ((total / ToDistribute) > lowOne) {
 553                /* risk of rounding to zero */
 554                lowOne = (U32)((total * 3) / (ToDistribute * 2));
 555                for (s = 0; s <= maxSymbolValue; s++) {
 556                        if ((norm[s] == NOT_YET_ASSIGNED) && (count[s] <= lowOne)) {
 557                                norm[s] = 1;
 558                                distributed++;
 559                                total -= count[s];
 560                                continue;
 561                        }
 562                }
 563                ToDistribute = (1 << tableLog) - distributed;
 564        }
 565
 566        if (distributed == maxSymbolValue + 1) {
 567                /* all values are pretty poor;
 568                   probably incompressible data (should have already been detected);
 569                   find max, then give all remaining points to max */
 570                U32 maxV = 0, maxC = 0;
 571                for (s = 0; s <= maxSymbolValue; s++)
 572                        if (count[s] > maxC)
 573                                maxV = s, maxC = count[s];
 574                norm[maxV] += (short)ToDistribute;
 575                return 0;
 576        }
 577
 578        if (total == 0) {
 579                /* all of the symbols were low enough for the lowOne or lowThreshold */
 580                for (s = 0; ToDistribute > 0; s = (s + 1) % (maxSymbolValue + 1))
 581                        if (norm[s] > 0)
 582                                ToDistribute--, norm[s]++;
 583                return 0;
 584        }
 585
 586        {
 587                U64 const vStepLog = 62 - tableLog;
 588                U64 const mid = (1ULL << (vStepLog - 1)) - 1;
 589                U64 const rStep = div_u64((((U64)1 << vStepLog) * ToDistribute) + mid, (U32)total); /* scale on remaining */
 590                U64 tmpTotal = mid;
 591                for (s = 0; s <= maxSymbolValue; s++) {
 592                        if (norm[s] == NOT_YET_ASSIGNED) {
 593                                U64 const end = tmpTotal + (count[s] * rStep);
 594                                U32 const sStart = (U32)(tmpTotal >> vStepLog);
 595                                U32 const sEnd = (U32)(end >> vStepLog);
 596                                U32 const weight = sEnd - sStart;
 597                                if (weight < 1)
 598                                        return ERROR(GENERIC);
 599                                norm[s] = (short)weight;
 600                                tmpTotal = end;
 601                        }
 602                }
 603        }
 604
 605        return 0;
 606}
 607
 608size_t FSE_normalizeCount(short *normalizedCounter, unsigned tableLog, const unsigned *count, size_t total, unsigned maxSymbolValue)
 609{
 610        /* Sanity checks */
 611        if (tableLog == 0)
 612                tableLog = FSE_DEFAULT_TABLELOG;
 613        if (tableLog < FSE_MIN_TABLELOG)
 614                return ERROR(GENERIC); /* Unsupported size */
 615        if (tableLog > FSE_MAX_TABLELOG)
 616                return ERROR(tableLog_tooLarge); /* Unsupported size */
 617        if (tableLog < FSE_minTableLog(total, maxSymbolValue))
 618                return ERROR(GENERIC); /* Too small tableLog, compression potentially impossible */
 619
 620        {
 621                U32 const rtbTable[] = {0, 473195, 504333, 520860, 550000, 700000, 750000, 830000};
 622                U64 const scale = 62 - tableLog;
 623                U64 const step = div_u64((U64)1 << 62, (U32)total); /* <== here, one division ! */
 624                U64 const vStep = 1ULL << (scale - 20);
 625                int stillToDistribute = 1 << tableLog;
 626                unsigned s;
 627                unsigned largest = 0;
 628                short largestP = 0;
 629                U32 lowThreshold = (U32)(total >> tableLog);
 630
 631                for (s = 0; s <= maxSymbolValue; s++) {
 632                        if (count[s] == total)
 633                                return 0; /* rle special case */
 634                        if (count[s] == 0) {
 635                                normalizedCounter[s] = 0;
 636                                continue;
 637                        }
 638                        if (count[s] <= lowThreshold) {
 639                                normalizedCounter[s] = -1;
 640                                stillToDistribute--;
 641                        } else {
 642                                short proba = (short)((count[s] * step) >> scale);
 643                                if (proba < 8) {
 644                                        U64 restToBeat = vStep * rtbTable[proba];
 645                                        proba += (count[s] * step) - ((U64)proba << scale) > restToBeat;
 646                                }
 647                                if (proba > largestP)
 648                                        largestP = proba, largest = s;
 649                                normalizedCounter[s] = proba;
 650                                stillToDistribute -= proba;
 651                        }
 652                }
 653                if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) {
 654                        /* corner case, need another normalization method */
 655                        size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue);
 656                        if (FSE_isError(errorCode))
 657                                return errorCode;
 658                } else
 659                        normalizedCounter[largest] += (short)stillToDistribute;
 660        }
 661
 662        return tableLog;
 663}
 664
 665/* fake FSE_CTable, for raw (uncompressed) input */
 666size_t FSE_buildCTable_raw(FSE_CTable *ct, unsigned nbBits)
 667{
 668        const unsigned tableSize = 1 << nbBits;
 669        const unsigned tableMask = tableSize - 1;
 670        const unsigned maxSymbolValue = tableMask;
 671        void *const ptr = ct;
 672        U16 *const tableU16 = ((U16 *)ptr) + 2;
 673        void *const FSCT = ((U32 *)ptr) + 1 /* header */ + (tableSize >> 1); /* assumption : tableLog >= 1 */
 674        FSE_symbolCompressionTransform *const symbolTT = (FSE_symbolCompressionTransform *)(FSCT);
 675        unsigned s;
 676
 677        /* Sanity checks */
 678        if (nbBits < 1)
 679                return ERROR(GENERIC); /* min size */
 680
 681        /* header */
 682        tableU16[-2] = (U16)nbBits;
 683        tableU16[-1] = (U16)maxSymbolValue;
 684
 685        /* Build table */
 686        for (s = 0; s < tableSize; s++)
 687                tableU16[s] = (U16)(tableSize + s);
 688
 689        /* Build Symbol Transformation Table */
 690        {
 691                const U32 deltaNbBits = (nbBits << 16) - (1 << nbBits);
 692                for (s = 0; s <= maxSymbolValue; s++) {
 693                        symbolTT[s].deltaNbBits = deltaNbBits;
 694                        symbolTT[s].deltaFindState = s - 1;
 695                }
 696        }
 697
 698        return 0;
 699}
 700
 701/* fake FSE_CTable, for rle input (always same symbol) */
 702size_t FSE_buildCTable_rle(FSE_CTable *ct, BYTE symbolValue)
 703{
 704        void *ptr = ct;
 705        U16 *tableU16 = ((U16 *)ptr) + 2;
 706        void *FSCTptr = (U32 *)ptr + 2;
 707        FSE_symbolCompressionTransform *symbolTT = (FSE_symbolCompressionTransform *)FSCTptr;
 708
 709        /* header */
 710        tableU16[-2] = (U16)0;
 711        tableU16[-1] = (U16)symbolValue;
 712
 713        /* Build table */
 714        tableU16[0] = 0;
 715        tableU16[1] = 0; /* just in case */
 716
 717        /* Build Symbol Transformation Table */
 718        symbolTT[symbolValue].deltaNbBits = 0;
 719        symbolTT[symbolValue].deltaFindState = 0;
 720
 721        return 0;
 722}
 723
 724static size_t FSE_compress_usingCTable_generic(void *dst, size_t dstSize, const void *src, size_t srcSize, const FSE_CTable *ct, const unsigned fast)
 725{
 726        const BYTE *const istart = (const BYTE *)src;
 727        const BYTE *const iend = istart + srcSize;
 728        const BYTE *ip = iend;
 729
 730        BIT_CStream_t bitC;
 731        FSE_CState_t CState1, CState2;
 732
 733        /* init */
 734        if (srcSize <= 2)
 735                return 0;
 736        {
 737                size_t const initError = BIT_initCStream(&bitC, dst, dstSize);
 738                if (FSE_isError(initError))
 739                        return 0; /* not enough space available to write a bitstream */
 740        }
 741
 742#define FSE_FLUSHBITS(s) (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s))
 743
 744        if (srcSize & 1) {
 745                FSE_initCState2(&CState1, ct, *--ip);
 746                FSE_initCState2(&CState2, ct, *--ip);
 747                FSE_encodeSymbol(&bitC, &CState1, *--ip);
 748                FSE_FLUSHBITS(&bitC);
 749        } else {
 750                FSE_initCState2(&CState2, ct, *--ip);
 751                FSE_initCState2(&CState1, ct, *--ip);
 752        }
 753
 754        /* join to mod 4 */
 755        srcSize -= 2;
 756        if ((sizeof(bitC.bitContainer) * 8 > FSE_MAX_TABLELOG * 4 + 7) && (srcSize & 2)) { /* test bit 2 */
 757                FSE_encodeSymbol(&bitC, &CState2, *--ip);
 758                FSE_encodeSymbol(&bitC, &CState1, *--ip);
 759                FSE_FLUSHBITS(&bitC);
 760        }
 761
 762        /* 2 or 4 encoding per loop */
 763        while (ip > istart) {
 764
 765                FSE_encodeSymbol(&bitC, &CState2, *--ip);
 766
 767                if (sizeof(bitC.bitContainer) * 8 < FSE_MAX_TABLELOG * 2 + 7) /* this test must be static */
 768                        FSE_FLUSHBITS(&bitC);
 769
 770                FSE_encodeSymbol(&bitC, &CState1, *--ip);
 771
 772                if (sizeof(bitC.bitContainer) * 8 > FSE_MAX_TABLELOG * 4 + 7) { /* this test must be static */
 773                        FSE_encodeSymbol(&bitC, &CState2, *--ip);
 774                        FSE_encodeSymbol(&bitC, &CState1, *--ip);
 775                }
 776
 777                FSE_FLUSHBITS(&bitC);
 778        }
 779
 780        FSE_flushCState(&bitC, &CState2);
 781        FSE_flushCState(&bitC, &CState1);
 782        return BIT_closeCStream(&bitC);
 783}
 784
 785size_t FSE_compress_usingCTable(void *dst, size_t dstSize, const void *src, size_t srcSize, const FSE_CTable *ct)
 786{
 787        unsigned const fast = (dstSize >= FSE_BLOCKBOUND(srcSize));
 788
 789        if (fast)
 790                return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1);
 791        else
 792                return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 0);
 793}
 794
 795size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }
 796