linux/lib/zstd/compress/zstd_ldm.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) Yann Collet, Facebook, Inc.
   3 * All rights reserved.
   4 *
   5 * This source code is licensed under both the BSD-style license (found in the
   6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
   7 * in the COPYING file in the root directory of this source tree).
   8 * You may select, at your option, one of the above-listed licenses.
   9 */
  10
  11#include "zstd_ldm.h"
  12
  13#include "../common/debug.h"
  14#include <linux/xxhash.h>
  15#include "zstd_fast.h"          /* ZSTD_fillHashTable() */
  16#include "zstd_double_fast.h"   /* ZSTD_fillDoubleHashTable() */
  17#include "zstd_ldm_geartab.h"
  18
  19#define LDM_BUCKET_SIZE_LOG 3
  20#define LDM_MIN_MATCH_LENGTH 64
  21#define LDM_HASH_RLOG 7
  22
  23typedef struct {
  24    U64 rolling;
  25    U64 stopMask;
  26} ldmRollingHashState_t;
  27
  28/* ZSTD_ldm_gear_init():
  29 *
  30 * Initializes the rolling hash state such that it will honor the
  31 * settings in params. */
  32static void ZSTD_ldm_gear_init(ldmRollingHashState_t* state, ldmParams_t const* params)
  33{
  34    unsigned maxBitsInMask = MIN(params->minMatchLength, 64);
  35    unsigned hashRateLog = params->hashRateLog;
  36
  37    state->rolling = ~(U32)0;
  38
  39    /* The choice of the splitting criterion is subject to two conditions:
  40     *   1. it has to trigger on average every 2^(hashRateLog) bytes;
  41     *   2. ideally, it has to depend on a window of minMatchLength bytes.
  42     *
  43     * In the gear hash algorithm, bit n depends on the last n bytes;
  44     * so in order to obtain a good quality splitting criterion it is
  45     * preferable to use bits with high weight.
  46     *
  47     * To match condition 1 we use a mask with hashRateLog bits set
  48     * and, because of the previous remark, we make sure these bits
  49     * have the highest possible weight while still respecting
  50     * condition 2.
  51     */
  52    if (hashRateLog > 0 && hashRateLog <= maxBitsInMask) {
  53        state->stopMask = (((U64)1 << hashRateLog) - 1) << (maxBitsInMask - hashRateLog);
  54    } else {
  55        /* In this degenerate case we simply honor the hash rate. */
  56        state->stopMask = ((U64)1 << hashRateLog) - 1;
  57    }
  58}
  59
  60/* ZSTD_ldm_gear_feed():
  61 *
  62 * Registers in the splits array all the split points found in the first
  63 * size bytes following the data pointer. This function terminates when
  64 * either all the data has been processed or LDM_BATCH_SIZE splits are
  65 * present in the splits array.
  66 *
  67 * Precondition: The splits array must not be full.
  68 * Returns: The number of bytes processed. */
  69static size_t ZSTD_ldm_gear_feed(ldmRollingHashState_t* state,
  70                                 BYTE const* data, size_t size,
  71                                 size_t* splits, unsigned* numSplits)
  72{
  73    size_t n;
  74    U64 hash, mask;
  75
  76    hash = state->rolling;
  77    mask = state->stopMask;
  78    n = 0;
  79
  80#define GEAR_ITER_ONCE() do { \
  81        hash = (hash << 1) + ZSTD_ldm_gearTab[data[n] & 0xff]; \
  82        n += 1; \
  83        if (UNLIKELY((hash & mask) == 0)) { \
  84            splits[*numSplits] = n; \
  85            *numSplits += 1; \
  86            if (*numSplits == LDM_BATCH_SIZE) \
  87                goto done; \
  88        } \
  89    } while (0)
  90
  91    while (n + 3 < size) {
  92        GEAR_ITER_ONCE();
  93        GEAR_ITER_ONCE();
  94        GEAR_ITER_ONCE();
  95        GEAR_ITER_ONCE();
  96    }
  97    while (n < size) {
  98        GEAR_ITER_ONCE();
  99    }
 100
 101#undef GEAR_ITER_ONCE
 102
 103done:
 104    state->rolling = hash;
 105    return n;
 106}
 107
 108void ZSTD_ldm_adjustParameters(ldmParams_t* params,
 109                               ZSTD_compressionParameters const* cParams)
 110{
 111    params->windowLog = cParams->windowLog;
 112    ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX);
 113    DEBUGLOG(4, "ZSTD_ldm_adjustParameters");
 114    if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG;
 115    if (!params->minMatchLength) params->minMatchLength = LDM_MIN_MATCH_LENGTH;
 116    if (params->hashLog == 0) {
 117        params->hashLog = MAX(ZSTD_HASHLOG_MIN, params->windowLog - LDM_HASH_RLOG);
 118        assert(params->hashLog <= ZSTD_HASHLOG_MAX);
 119    }
 120    if (params->hashRateLog == 0) {
 121        params->hashRateLog = params->windowLog < params->hashLog
 122                                   ? 0
 123                                   : params->windowLog - params->hashLog;
 124    }
 125    params->bucketSizeLog = MIN(params->bucketSizeLog, params->hashLog);
 126}
 127
 128size_t ZSTD_ldm_getTableSize(ldmParams_t params)
 129{
 130    size_t const ldmHSize = ((size_t)1) << params.hashLog;
 131    size_t const ldmBucketSizeLog = MIN(params.bucketSizeLog, params.hashLog);
 132    size_t const ldmBucketSize = ((size_t)1) << (params.hashLog - ldmBucketSizeLog);
 133    size_t const totalSize = ZSTD_cwksp_alloc_size(ldmBucketSize)
 134                           + ZSTD_cwksp_alloc_size(ldmHSize * sizeof(ldmEntry_t));
 135    return params.enableLdm ? totalSize : 0;
 136}
 137
 138size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize)
 139{
 140    return params.enableLdm ? (maxChunkSize / params.minMatchLength) : 0;
 141}
 142
 143/* ZSTD_ldm_getBucket() :
 144 *  Returns a pointer to the start of the bucket associated with hash. */
 145static ldmEntry_t* ZSTD_ldm_getBucket(
 146        ldmState_t* ldmState, size_t hash, ldmParams_t const ldmParams)
 147{
 148    return ldmState->hashTable + (hash << ldmParams.bucketSizeLog);
 149}
 150
 151/* ZSTD_ldm_insertEntry() :
 152 *  Insert the entry with corresponding hash into the hash table */
 153static void ZSTD_ldm_insertEntry(ldmState_t* ldmState,
 154                                 size_t const hash, const ldmEntry_t entry,
 155                                 ldmParams_t const ldmParams)
 156{
 157    BYTE* const pOffset = ldmState->bucketOffsets + hash;
 158    unsigned const offset = *pOffset;
 159
 160    *(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + offset) = entry;
 161    *pOffset = (BYTE)((offset + 1) & ((1u << ldmParams.bucketSizeLog) - 1));
 162
 163}
 164
 165/* ZSTD_ldm_countBackwardsMatch() :
 166 *  Returns the number of bytes that match backwards before pIn and pMatch.
 167 *
 168 *  We count only bytes where pMatch >= pBase and pIn >= pAnchor. */
 169static size_t ZSTD_ldm_countBackwardsMatch(
 170            const BYTE* pIn, const BYTE* pAnchor,
 171            const BYTE* pMatch, const BYTE* pMatchBase)
 172{
 173    size_t matchLength = 0;
 174    while (pIn > pAnchor && pMatch > pMatchBase && pIn[-1] == pMatch[-1]) {
 175        pIn--;
 176        pMatch--;
 177        matchLength++;
 178    }
 179    return matchLength;
 180}
 181
 182/* ZSTD_ldm_countBackwardsMatch_2segments() :
 183 *  Returns the number of bytes that match backwards from pMatch,
 184 *  even with the backwards match spanning 2 different segments.
 185 *
 186 *  On reaching `pMatchBase`, start counting from mEnd */
 187static size_t ZSTD_ldm_countBackwardsMatch_2segments(
 188                    const BYTE* pIn, const BYTE* pAnchor,
 189                    const BYTE* pMatch, const BYTE* pMatchBase,
 190                    const BYTE* pExtDictStart, const BYTE* pExtDictEnd)
 191{
 192    size_t matchLength = ZSTD_ldm_countBackwardsMatch(pIn, pAnchor, pMatch, pMatchBase);
 193    if (pMatch - matchLength != pMatchBase || pMatchBase == pExtDictStart) {
 194        /* If backwards match is entirely in the extDict or prefix, immediately return */
 195        return matchLength;
 196    }
 197    DEBUGLOG(7, "ZSTD_ldm_countBackwardsMatch_2segments: found 2-parts backwards match (length in prefix==%zu)", matchLength);
 198    matchLength += ZSTD_ldm_countBackwardsMatch(pIn - matchLength, pAnchor, pExtDictEnd, pExtDictStart);
 199    DEBUGLOG(7, "final backwards match length = %zu", matchLength);
 200    return matchLength;
 201}
 202
 203/* ZSTD_ldm_fillFastTables() :
 204 *
 205 *  Fills the relevant tables for the ZSTD_fast and ZSTD_dfast strategies.
 206 *  This is similar to ZSTD_loadDictionaryContent.
 207 *
 208 *  The tables for the other strategies are filled within their
 209 *  block compressors. */
 210static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms,
 211                                      void const* end)
 212{
 213    const BYTE* const iend = (const BYTE*)end;
 214
 215    switch(ms->cParams.strategy)
 216    {
 217    case ZSTD_fast:
 218        ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast);
 219        break;
 220
 221    case ZSTD_dfast:
 222        ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast);
 223        break;
 224
 225    case ZSTD_greedy:
 226    case ZSTD_lazy:
 227    case ZSTD_lazy2:
 228    case ZSTD_btlazy2:
 229    case ZSTD_btopt:
 230    case ZSTD_btultra:
 231    case ZSTD_btultra2:
 232        break;
 233    default:
 234        assert(0);  /* not possible : not a valid strategy id */
 235    }
 236
 237    return 0;
 238}
 239
 240void ZSTD_ldm_fillHashTable(
 241            ldmState_t* ldmState, const BYTE* ip,
 242            const BYTE* iend, ldmParams_t const* params)
 243{
 244    U32 const minMatchLength = params->minMatchLength;
 245    U32 const hBits = params->hashLog - params->bucketSizeLog;
 246    BYTE const* const base = ldmState->window.base;
 247    BYTE const* const istart = ip;
 248    ldmRollingHashState_t hashState;
 249    size_t* const splits = ldmState->splitIndices;
 250    unsigned numSplits;
 251
 252    DEBUGLOG(5, "ZSTD_ldm_fillHashTable");
 253
 254    ZSTD_ldm_gear_init(&hashState, params);
 255    while (ip < iend) {
 256        size_t hashed;
 257        unsigned n;
 258        
 259        numSplits = 0;
 260        hashed = ZSTD_ldm_gear_feed(&hashState, ip, iend - ip, splits, &numSplits);
 261
 262        for (n = 0; n < numSplits; n++) {
 263            if (ip + splits[n] >= istart + minMatchLength) {
 264                BYTE const* const split = ip + splits[n] - minMatchLength;
 265                U64 const xxhash = xxh64(split, minMatchLength, 0);
 266                U32 const hash = (U32)(xxhash & (((U32)1 << hBits) - 1));
 267                ldmEntry_t entry;
 268
 269                entry.offset = (U32)(split - base);
 270                entry.checksum = (U32)(xxhash >> 32);
 271                ZSTD_ldm_insertEntry(ldmState, hash, entry, *params);
 272            }
 273        }
 274
 275        ip += hashed;
 276    }
 277}
 278
 279
 280/* ZSTD_ldm_limitTableUpdate() :
 281 *
 282 *  Sets cctx->nextToUpdate to a position corresponding closer to anchor
 283 *  if it is far way
 284 *  (after a long match, only update tables a limited amount). */
 285static void ZSTD_ldm_limitTableUpdate(ZSTD_matchState_t* ms, const BYTE* anchor)
 286{
 287    U32 const curr = (U32)(anchor - ms->window.base);
 288    if (curr > ms->nextToUpdate + 1024) {
 289        ms->nextToUpdate =
 290            curr - MIN(512, curr - ms->nextToUpdate - 1024);
 291    }
 292}
 293
 294static size_t ZSTD_ldm_generateSequences_internal(
 295        ldmState_t* ldmState, rawSeqStore_t* rawSeqStore,
 296        ldmParams_t const* params, void const* src, size_t srcSize)
 297{
 298    /* LDM parameters */
 299    int const extDict = ZSTD_window_hasExtDict(ldmState->window);
 300    U32 const minMatchLength = params->minMatchLength;
 301    U32 const entsPerBucket = 1U << params->bucketSizeLog;
 302    U32 const hBits = params->hashLog - params->bucketSizeLog;
 303    /* Prefix and extDict parameters */
 304    U32 const dictLimit = ldmState->window.dictLimit;
 305    U32 const lowestIndex = extDict ? ldmState->window.lowLimit : dictLimit;
 306    BYTE const* const base = ldmState->window.base;
 307    BYTE const* const dictBase = extDict ? ldmState->window.dictBase : NULL;
 308    BYTE const* const dictStart = extDict ? dictBase + lowestIndex : NULL;
 309    BYTE const* const dictEnd = extDict ? dictBase + dictLimit : NULL;
 310    BYTE const* const lowPrefixPtr = base + dictLimit;
 311    /* Input bounds */
 312    BYTE const* const istart = (BYTE const*)src;
 313    BYTE const* const iend = istart + srcSize;
 314    BYTE const* const ilimit = iend - HASH_READ_SIZE;
 315    /* Input positions */
 316    BYTE const* anchor = istart;
 317    BYTE const* ip = istart;
 318    /* Rolling hash state */
 319    ldmRollingHashState_t hashState;
 320    /* Arrays for staged-processing */
 321    size_t* const splits = ldmState->splitIndices;
 322    ldmMatchCandidate_t* const candidates = ldmState->matchCandidates;
 323    unsigned numSplits;
 324
 325    if (srcSize < minMatchLength)
 326        return iend - anchor;
 327
 328    /* Initialize the rolling hash state with the first minMatchLength bytes */
 329    ZSTD_ldm_gear_init(&hashState, params);
 330    {
 331        size_t n = 0;
 332
 333        while (n < minMatchLength) {
 334            numSplits = 0;
 335            n += ZSTD_ldm_gear_feed(&hashState, ip + n, minMatchLength - n,
 336                                    splits, &numSplits);
 337        }
 338        ip += minMatchLength;
 339    }
 340
 341    while (ip < ilimit) {
 342        size_t hashed;
 343        unsigned n;
 344
 345        numSplits = 0;
 346        hashed = ZSTD_ldm_gear_feed(&hashState, ip, ilimit - ip,
 347                                    splits, &numSplits);
 348
 349        for (n = 0; n < numSplits; n++) {
 350            BYTE const* const split = ip + splits[n] - minMatchLength;
 351            U64 const xxhash = xxh64(split, minMatchLength, 0);
 352            U32 const hash = (U32)(xxhash & (((U32)1 << hBits) - 1));
 353
 354            candidates[n].split = split;
 355            candidates[n].hash = hash;
 356            candidates[n].checksum = (U32)(xxhash >> 32);
 357            candidates[n].bucket = ZSTD_ldm_getBucket(ldmState, hash, *params);
 358            PREFETCH_L1(candidates[n].bucket);
 359        }
 360
 361        for (n = 0; n < numSplits; n++) {
 362            size_t forwardMatchLength = 0, backwardMatchLength = 0,
 363                   bestMatchLength = 0, mLength;
 364            BYTE const* const split = candidates[n].split;
 365            U32 const checksum = candidates[n].checksum;
 366            U32 const hash = candidates[n].hash;
 367            ldmEntry_t* const bucket = candidates[n].bucket;
 368            ldmEntry_t const* cur;
 369            ldmEntry_t const* bestEntry = NULL;
 370            ldmEntry_t newEntry;
 371
 372            newEntry.offset = (U32)(split - base);
 373            newEntry.checksum = checksum;
 374
 375            /* If a split point would generate a sequence overlapping with
 376             * the previous one, we merely register it in the hash table and
 377             * move on */
 378            if (split < anchor) {
 379                ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params);
 380                continue;
 381            }
 382
 383            for (cur = bucket; cur < bucket + entsPerBucket; cur++) {
 384                size_t curForwardMatchLength, curBackwardMatchLength,
 385                       curTotalMatchLength;
 386                if (cur->checksum != checksum || cur->offset <= lowestIndex) {
 387                    continue;
 388                }
 389                if (extDict) {
 390                    BYTE const* const curMatchBase =
 391                        cur->offset < dictLimit ? dictBase : base;
 392                    BYTE const* const pMatch = curMatchBase + cur->offset;
 393                    BYTE const* const matchEnd =
 394                        cur->offset < dictLimit ? dictEnd : iend;
 395                    BYTE const* const lowMatchPtr =
 396                        cur->offset < dictLimit ? dictStart : lowPrefixPtr;
 397                    curForwardMatchLength =
 398                        ZSTD_count_2segments(split, pMatch, iend, matchEnd, lowPrefixPtr);
 399                    if (curForwardMatchLength < minMatchLength) {
 400                        continue;
 401                    }
 402                    curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch_2segments(
 403                            split, anchor, pMatch, lowMatchPtr, dictStart, dictEnd);
 404                } else { /* !extDict */
 405                    BYTE const* const pMatch = base + cur->offset;
 406                    curForwardMatchLength = ZSTD_count(split, pMatch, iend);
 407                    if (curForwardMatchLength < minMatchLength) {
 408                        continue;
 409                    }
 410                    curBackwardMatchLength =
 411                        ZSTD_ldm_countBackwardsMatch(split, anchor, pMatch, lowPrefixPtr);
 412                }
 413                curTotalMatchLength = curForwardMatchLength + curBackwardMatchLength;
 414
 415                if (curTotalMatchLength > bestMatchLength) {
 416                    bestMatchLength = curTotalMatchLength;
 417                    forwardMatchLength = curForwardMatchLength;
 418                    backwardMatchLength = curBackwardMatchLength;
 419                    bestEntry = cur;
 420                }
 421            }
 422
 423            /* No match found -- insert an entry into the hash table
 424             * and process the next candidate match */
 425            if (bestEntry == NULL) {
 426                ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params);
 427                continue;
 428            }
 429
 430            /* Match found */
 431            mLength = forwardMatchLength + backwardMatchLength;
 432            {
 433                U32 const offset = (U32)(split - base) - bestEntry->offset;
 434                rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size;
 435
 436                /* Out of sequence storage */
 437                if (rawSeqStore->size == rawSeqStore->capacity)
 438                    return ERROR(dstSize_tooSmall);
 439                seq->litLength = (U32)(split - backwardMatchLength - anchor);
 440                seq->matchLength = (U32)mLength;
 441                seq->offset = offset;
 442                rawSeqStore->size++;
 443            }
 444
 445            /* Insert the current entry into the hash table --- it must be
 446             * done after the previous block to avoid clobbering bestEntry */
 447            ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params);
 448
 449            anchor = split + forwardMatchLength;
 450        }
 451
 452        ip += hashed;
 453    }
 454
 455    return iend - anchor;
 456}
 457
 458/*! ZSTD_ldm_reduceTable() :
 459 *  reduce table indexes by `reducerValue` */
 460static void ZSTD_ldm_reduceTable(ldmEntry_t* const table, U32 const size,
 461                                 U32 const reducerValue)
 462{
 463    U32 u;
 464    for (u = 0; u < size; u++) {
 465        if (table[u].offset < reducerValue) table[u].offset = 0;
 466        else table[u].offset -= reducerValue;
 467    }
 468}
 469
 470size_t ZSTD_ldm_generateSequences(
 471        ldmState_t* ldmState, rawSeqStore_t* sequences,
 472        ldmParams_t const* params, void const* src, size_t srcSize)
 473{
 474    U32 const maxDist = 1U << params->windowLog;
 475    BYTE const* const istart = (BYTE const*)src;
 476    BYTE const* const iend = istart + srcSize;
 477    size_t const kMaxChunkSize = 1 << 20;
 478    size_t const nbChunks = (srcSize / kMaxChunkSize) + ((srcSize % kMaxChunkSize) != 0);
 479    size_t chunk;
 480    size_t leftoverSize = 0;
 481
 482    assert(ZSTD_CHUNKSIZE_MAX >= kMaxChunkSize);
 483    /* Check that ZSTD_window_update() has been called for this chunk prior
 484     * to passing it to this function.
 485     */
 486    assert(ldmState->window.nextSrc >= (BYTE const*)src + srcSize);
 487    /* The input could be very large (in zstdmt), so it must be broken up into
 488     * chunks to enforce the maximum distance and handle overflow correction.
 489     */
 490    assert(sequences->pos <= sequences->size);
 491    assert(sequences->size <= sequences->capacity);
 492    for (chunk = 0; chunk < nbChunks && sequences->size < sequences->capacity; ++chunk) {
 493        BYTE const* const chunkStart = istart + chunk * kMaxChunkSize;
 494        size_t const remaining = (size_t)(iend - chunkStart);
 495        BYTE const *const chunkEnd =
 496            (remaining < kMaxChunkSize) ? iend : chunkStart + kMaxChunkSize;
 497        size_t const chunkSize = chunkEnd - chunkStart;
 498        size_t newLeftoverSize;
 499        size_t const prevSize = sequences->size;
 500
 501        assert(chunkStart < iend);
 502        /* 1. Perform overflow correction if necessary. */
 503        if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) {
 504            U32 const ldmHSize = 1U << params->hashLog;
 505            U32 const correction = ZSTD_window_correctOverflow(
 506                &ldmState->window, /* cycleLog */ 0, maxDist, chunkStart);
 507            ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction);
 508            /* invalidate dictionaries on overflow correction */
 509            ldmState->loadedDictEnd = 0;
 510        }
 511        /* 2. We enforce the maximum offset allowed.
 512         *
 513         * kMaxChunkSize should be small enough that we don't lose too much of
 514         * the window through early invalidation.
 515         * TODO: * Test the chunk size.
 516         *       * Try invalidation after the sequence generation and test the
 517         *         the offset against maxDist directly.
 518         *
 519         * NOTE: Because of dictionaries + sequence splitting we MUST make sure
 520         * that any offset used is valid at the END of the sequence, since it may
 521         * be split into two sequences. This condition holds when using
 522         * ZSTD_window_enforceMaxDist(), but if we move to checking offsets
 523         * against maxDist directly, we'll have to carefully handle that case.
 524         */
 525        ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, &ldmState->loadedDictEnd, NULL);
 526        /* 3. Generate the sequences for the chunk, and get newLeftoverSize. */
 527        newLeftoverSize = ZSTD_ldm_generateSequences_internal(
 528            ldmState, sequences, params, chunkStart, chunkSize);
 529        if (ZSTD_isError(newLeftoverSize))
 530            return newLeftoverSize;
 531        /* 4. We add the leftover literals from previous iterations to the first
 532         *    newly generated sequence, or add the `newLeftoverSize` if none are
 533         *    generated.
 534         */
 535        /* Prepend the leftover literals from the last call */
 536        if (prevSize < sequences->size) {
 537            sequences->seq[prevSize].litLength += (U32)leftoverSize;
 538            leftoverSize = newLeftoverSize;
 539        } else {
 540            assert(newLeftoverSize == chunkSize);
 541            leftoverSize += chunkSize;
 542        }
 543    }
 544    return 0;
 545}
 546
 547void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch) {
 548    while (srcSize > 0 && rawSeqStore->pos < rawSeqStore->size) {
 549        rawSeq* seq = rawSeqStore->seq + rawSeqStore->pos;
 550        if (srcSize <= seq->litLength) {
 551            /* Skip past srcSize literals */
 552            seq->litLength -= (U32)srcSize;
 553            return;
 554        }
 555        srcSize -= seq->litLength;
 556        seq->litLength = 0;
 557        if (srcSize < seq->matchLength) {
 558            /* Skip past the first srcSize of the match */
 559            seq->matchLength -= (U32)srcSize;
 560            if (seq->matchLength < minMatch) {
 561                /* The match is too short, omit it */
 562                if (rawSeqStore->pos + 1 < rawSeqStore->size) {
 563                    seq[1].litLength += seq[0].matchLength;
 564                }
 565                rawSeqStore->pos++;
 566            }
 567            return;
 568        }
 569        srcSize -= seq->matchLength;
 570        seq->matchLength = 0;
 571        rawSeqStore->pos++;
 572    }
 573}
 574
 575/*
 576 * If the sequence length is longer than remaining then the sequence is split
 577 * between this block and the next.
 578 *
 579 * Returns the current sequence to handle, or if the rest of the block should
 580 * be literals, it returns a sequence with offset == 0.
 581 */
 582static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore,
 583                                 U32 const remaining, U32 const minMatch)
 584{
 585    rawSeq sequence = rawSeqStore->seq[rawSeqStore->pos];
 586    assert(sequence.offset > 0);
 587    /* Likely: No partial sequence */
 588    if (remaining >= sequence.litLength + sequence.matchLength) {
 589        rawSeqStore->pos++;
 590        return sequence;
 591    }
 592    /* Cut the sequence short (offset == 0 ==> rest is literals). */
 593    if (remaining <= sequence.litLength) {
 594        sequence.offset = 0;
 595    } else if (remaining < sequence.litLength + sequence.matchLength) {
 596        sequence.matchLength = remaining - sequence.litLength;
 597        if (sequence.matchLength < minMatch) {
 598            sequence.offset = 0;
 599        }
 600    }
 601    /* Skip past `remaining` bytes for the future sequences. */
 602    ZSTD_ldm_skipSequences(rawSeqStore, remaining, minMatch);
 603    return sequence;
 604}
 605
 606void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) {
 607    U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes);
 608    while (currPos && rawSeqStore->pos < rawSeqStore->size) {
 609        rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos];
 610        if (currPos >= currSeq.litLength + currSeq.matchLength) {
 611            currPos -= currSeq.litLength + currSeq.matchLength;
 612            rawSeqStore->pos++;
 613        } else {
 614            rawSeqStore->posInSequence = currPos;
 615            break;
 616        }
 617    }
 618    if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) {
 619        rawSeqStore->posInSequence = 0;
 620    }
 621}
 622
 623size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
 624    ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
 625    void const* src, size_t srcSize)
 626{
 627    const ZSTD_compressionParameters* const cParams = &ms->cParams;
 628    unsigned const minMatch = cParams->minMatch;
 629    ZSTD_blockCompressor const blockCompressor =
 630        ZSTD_selectBlockCompressor(cParams->strategy, ZSTD_matchState_dictMode(ms));
 631    /* Input bounds */
 632    BYTE const* const istart = (BYTE const*)src;
 633    BYTE const* const iend = istart + srcSize;
 634    /* Input positions */
 635    BYTE const* ip = istart;
 636
 637    DEBUGLOG(5, "ZSTD_ldm_blockCompress: srcSize=%zu", srcSize);
 638    /* If using opt parser, use LDMs only as candidates rather than always accepting them */
 639    if (cParams->strategy >= ZSTD_btopt) {
 640        size_t lastLLSize;
 641        ms->ldmSeqStore = rawSeqStore;
 642        lastLLSize = blockCompressor(ms, seqStore, rep, src, srcSize);
 643        ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore, srcSize);
 644        return lastLLSize;
 645    }
 646
 647    assert(rawSeqStore->pos <= rawSeqStore->size);
 648    assert(rawSeqStore->size <= rawSeqStore->capacity);
 649    /* Loop through each sequence and apply the block compressor to the literals */
 650    while (rawSeqStore->pos < rawSeqStore->size && ip < iend) {
 651        /* maybeSplitSequence updates rawSeqStore->pos */
 652        rawSeq const sequence = maybeSplitSequence(rawSeqStore,
 653                                                   (U32)(iend - ip), minMatch);
 654        int i;
 655        /* End signal */
 656        if (sequence.offset == 0)
 657            break;
 658
 659        assert(ip + sequence.litLength + sequence.matchLength <= iend);
 660
 661        /* Fill tables for block compressor */
 662        ZSTD_ldm_limitTableUpdate(ms, ip);
 663        ZSTD_ldm_fillFastTables(ms, ip);
 664        /* Run the block compressor */
 665        DEBUGLOG(5, "pos %u : calling block compressor on segment of size %u", (unsigned)(ip-istart), sequence.litLength);
 666        {
 667            size_t const newLitLength =
 668                blockCompressor(ms, seqStore, rep, ip, sequence.litLength);
 669            ip += sequence.litLength;
 670            /* Update the repcodes */
 671            for (i = ZSTD_REP_NUM - 1; i > 0; i--)
 672                rep[i] = rep[i-1];
 673            rep[0] = sequence.offset;
 674            /* Store the sequence */
 675            ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend,
 676                          sequence.offset + ZSTD_REP_MOVE,
 677                          sequence.matchLength - MINMATCH);
 678            ip += sequence.matchLength;
 679        }
 680    }
 681    /* Fill the tables for the block compressor */
 682    ZSTD_ldm_limitTableUpdate(ms, ip);
 683    ZSTD_ldm_fillFastTables(ms, ip);
 684    /* Compress the last literals */
 685    return blockCompressor(ms, seqStore, rep, ip, iend - ip);
 686}
 687