blob: a4e916008b3a7192ea387bf4cff06f7d79314df7 [file] [log] [blame]
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
/*-*************************************
* Dependencies
***************************************/
#include "../common/zstd_deps.h" /* INT_MAX, ZSTD_memset, ZSTD_memcpy */
#include "../common/cpu.h"
#include "../common/mem.h"
#include "hist.h" /* HIST_countFast_wksp */
#define FSE_STATIC_LINKING_ONLY /* FSE_encodeSymbol */
#include "../common/fse.h"
#define HUF_STATIC_LINKING_ONLY
#include "../common/huf.h"
#include "zstd_compress_internal.h"
#include "zstd_compress_sequences.h"
#include "zstd_compress_literals.h"
#include "zstd_fast.h"
#include "zstd_double_fast.h"
#include "zstd_lazy.h"
#include "zstd_opt.h"
#include "zstd_ldm.h"
#include "zstd_compress_superblock.h"
/* ***************************************************************
* Tuning parameters
*****************************************************************/
/*!
* COMPRESS_HEAPMODE :
* Select how default decompression function ZSTD_compress() allocates its context,
* on stack (0, default), or into heap (1).
* Note that functions with explicit context such as ZSTD_compressCCtx() are unaffected.
*/
/*-*************************************
* Helper functions
***************************************/
/* ZSTD_compressBound()
* Note that the result from this function is only compatible with the "normal"
* full-block strategy.
* When there are a lot of small blocks due to frequent flush in streaming mode
* the overhead of headers can make the compressed data to be larger than the
* return value of ZSTD_compressBound().
*/
size_t ZSTD_compressBound(size_t srcSize) {
return ZSTD_COMPRESSBOUND(srcSize);
}
/*-*************************************
* Context memory management
***************************************/
struct ZSTD_CDict_s {
const void* dictContent;
size_t dictContentSize;
ZSTD_dictContentType_e dictContentType; /* The dictContentType the CDict was created with */
U32* entropyWorkspace; /* entropy workspace of HUF_WORKSPACE_SIZE bytes */
ZSTD_cwksp workspace;
ZSTD_matchState_t matchState;
ZSTD_compressedBlockState_t cBlockState;
ZSTD_customMem customMem;
U32 dictID;
int compressionLevel; /* 0 indicates that advanced API was used to select CDict params */
}; /* typedef'd to ZSTD_CDict within "zstd.h" */
ZSTD_CCtx* ZSTD_createCCtx(void)
{
return ZSTD_createCCtx_advanced(ZSTD_defaultCMem);
}
static void ZSTD_initCCtx(ZSTD_CCtx* cctx, ZSTD_customMem memManager)
{
assert(cctx != NULL);
ZSTD_memset(cctx, 0, sizeof(*cctx));
cctx->customMem = memManager;
cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
{ size_t const err = ZSTD_CCtx_reset(cctx, ZSTD_reset_parameters);
assert(!ZSTD_isError(err));
(void)err;
}
}
ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem)
{
ZSTD_STATIC_ASSERT(zcss_init==0);
ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN==(0ULL - 1));
if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
{ ZSTD_CCtx* const cctx = (ZSTD_CCtx*)ZSTD_customMalloc(sizeof(ZSTD_CCtx), customMem);
if (!cctx) return NULL;
ZSTD_initCCtx(cctx, customMem);
return cctx;
}
}
ZSTD_CCtx* ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize)
{
ZSTD_cwksp ws;
ZSTD_CCtx* cctx;
if (workspaceSize <= sizeof(ZSTD_CCtx)) return NULL; /* minimum size */
if ((size_t)workspace & 7) return NULL; /* must be 8-aligned */
ZSTD_cwksp_init(&ws, workspace, workspaceSize, ZSTD_cwksp_static_alloc);
cctx = (ZSTD_CCtx*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CCtx));
if (cctx == NULL) return NULL;
ZSTD_memset(cctx, 0, sizeof(ZSTD_CCtx));
ZSTD_cwksp_move(&cctx->workspace, &ws);
cctx->staticSize = workspaceSize;
/* statically sized space. entropyWorkspace never moves (but prev/next block swap places) */
if (!ZSTD_cwksp_check_available(&cctx->workspace, ENTROPY_WORKSPACE_SIZE + 2 * sizeof(ZSTD_compressedBlockState_t))) return NULL;
cctx->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t));
cctx->blockState.nextCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t));
cctx->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cctx->workspace, ENTROPY_WORKSPACE_SIZE);
cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
return cctx;
}
/*
* Clears and frees all of the dictionaries in the CCtx.
*/
static void ZSTD_clearAllDicts(ZSTD_CCtx* cctx)
{
ZSTD_customFree(cctx->localDict.dictBuffer, cctx->customMem);
ZSTD_freeCDict(cctx->localDict.cdict);
ZSTD_memset(&cctx->localDict, 0, sizeof(cctx->localDict));
ZSTD_memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict));
cctx->cdict = NULL;
}
static size_t ZSTD_sizeof_localDict(ZSTD_localDict dict)
{
size_t const bufferSize = dict.dictBuffer != NULL ? dict.dictSize : 0;
size_t const cdictSize = ZSTD_sizeof_CDict(dict.cdict);
return bufferSize + cdictSize;
}
static void ZSTD_freeCCtxContent(ZSTD_CCtx* cctx)
{
assert(cctx != NULL);
assert(cctx->staticSize == 0);
ZSTD_clearAllDicts(cctx);
ZSTD_cwksp_free(&cctx->workspace, cctx->customMem);
}
size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx)
{
if (cctx==NULL) return 0; /* support free on NULL */
RETURN_ERROR_IF(cctx->staticSize, memory_allocation,
"not compatible with static CCtx");
{
int cctxInWorkspace = ZSTD_cwksp_owns_buffer(&cctx->workspace, cctx);
ZSTD_freeCCtxContent(cctx);
if (!cctxInWorkspace) {
ZSTD_customFree(cctx, cctx->customMem);
}
}
return 0;
}
static size_t ZSTD_sizeof_mtctx(const ZSTD_CCtx* cctx)
{
(void)cctx;
return 0;
}
size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx)
{
if (cctx==NULL) return 0; /* support sizeof on NULL */
/* cctx may be in the workspace */
return (cctx->workspace.workspace == cctx ? 0 : sizeof(*cctx))
+ ZSTD_cwksp_sizeof(&cctx->workspace)
+ ZSTD_sizeof_localDict(cctx->localDict)
+ ZSTD_sizeof_mtctx(cctx);
}
size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs)
{
return ZSTD_sizeof_CCtx(zcs); /* same object */
}
/* private API call, for dictBuilder only */
const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) { return &(ctx->seqStore); }
/* Returns 1 if compression parameters are such that we should
* enable long distance matching (wlog >= 27, strategy >= btopt).
* Returns 0 otherwise.
*/
static U32 ZSTD_CParams_shouldEnableLdm(const ZSTD_compressionParameters* const cParams) {
return cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 27;
}
static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams(
ZSTD_compressionParameters cParams)
{
ZSTD_CCtx_params cctxParams;
/* should not matter, as all cParams are presumed properly defined */
ZSTD_CCtxParams_init(&cctxParams, ZSTD_CLEVEL_DEFAULT);
cctxParams.cParams = cParams;
if (ZSTD_CParams_shouldEnableLdm(&cParams)) {
DEBUGLOG(4, "ZSTD_makeCCtxParamsFromCParams(): Including LDM into cctx params");
cctxParams.ldmParams.enableLdm = 1;
/* LDM is enabled by default for optimal parser and window size >= 128MB */
ZSTD_ldm_adjustParameters(&cctxParams.ldmParams, &cParams);
assert(cctxParams.ldmParams.hashLog >= cctxParams.ldmParams.bucketSizeLog);
assert(cctxParams.ldmParams.hashRateLog < 32);
}
assert(!ZSTD_checkCParams(cParams));
return cctxParams;
}
static ZSTD_CCtx_params* ZSTD_createCCtxParams_advanced(
ZSTD_customMem customMem)
{
ZSTD_CCtx_params* params;
if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
params = (ZSTD_CCtx_params*)ZSTD_customCalloc(
sizeof(ZSTD_CCtx_params), customMem);
if (!params) { return NULL; }
ZSTD_CCtxParams_init(params, ZSTD_CLEVEL_DEFAULT);
params->customMem = customMem;
return params;
}
ZSTD_CCtx_params* ZSTD_createCCtxParams(void)
{
return ZSTD_createCCtxParams_advanced(ZSTD_defaultCMem);
}
size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params)
{
if (params == NULL) { return 0; }
ZSTD_customFree(params, params->customMem);
return 0;
}
size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params)
{
return ZSTD_CCtxParams_init(params, ZSTD_CLEVEL_DEFAULT);
}
size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel) {
RETURN_ERROR_IF(!cctxParams, GENERIC, "NULL pointer!");
ZSTD_memset(cctxParams, 0, sizeof(*cctxParams));
cctxParams->compressionLevel = compressionLevel;
cctxParams->fParams.contentSizeFlag = 1;
return 0;
}
#define ZSTD_NO_CLEVEL 0
/*
* Initializes the cctxParams from params and compressionLevel.
* @param compressionLevel If params are derived from a compression level then that compression level, otherwise ZSTD_NO_CLEVEL.
*/
static void ZSTD_CCtxParams_init_internal(ZSTD_CCtx_params* cctxParams, ZSTD_parameters const* params, int compressionLevel)
{
assert(!ZSTD_checkCParams(params->cParams));
ZSTD_memset(cctxParams, 0, sizeof(*cctxParams));
cctxParams->cParams = params->cParams;
cctxParams->fParams = params->fParams;
/* Should not matter, as all cParams are presumed properly defined.
* But, set it for tracing anyway.
*/
cctxParams->compressionLevel = compressionLevel;
}
size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params)
{
RETURN_ERROR_IF(!cctxParams, GENERIC, "NULL pointer!");
FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) , "");
ZSTD_CCtxParams_init_internal(cctxParams, &params, ZSTD_NO_CLEVEL);
return 0;
}
/*
* Sets cctxParams' cParams and fParams from params, but otherwise leaves them alone.
* @param param Validated zstd parameters.
*/
static void ZSTD_CCtxParams_setZstdParams(
ZSTD_CCtx_params* cctxParams, const ZSTD_parameters* params)
{
assert(!ZSTD_checkCParams(params->cParams));
cctxParams->cParams = params->cParams;
cctxParams->fParams = params->fParams;
/* Should not matter, as all cParams are presumed properly defined.
* But, set it for tracing anyway.
*/
cctxParams->compressionLevel = ZSTD_NO_CLEVEL;
}
ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
{
ZSTD_bounds bounds = { 0, 0, 0 };
switch(param)
{
case ZSTD_c_compressionLevel:
bounds.lowerBound = ZSTD_minCLevel();
bounds.upperBound = ZSTD_maxCLevel();
return bounds;
case ZSTD_c_windowLog:
bounds.lowerBound = ZSTD_WINDOWLOG_MIN;
bounds.upperBound = ZSTD_WINDOWLOG_MAX;
return bounds;
case ZSTD_c_hashLog:
bounds.lowerBound = ZSTD_HASHLOG_MIN;
bounds.upperBound = ZSTD_HASHLOG_MAX;
return bounds;
case ZSTD_c_chainLog:
bounds.lowerBound = ZSTD_CHAINLOG_MIN;
bounds.upperBound = ZSTD_CHAINLOG_MAX;
return bounds;
case ZSTD_c_searchLog:
bounds.lowerBound = ZSTD_SEARCHLOG_MIN;
bounds.upperBound = ZSTD_SEARCHLOG_MAX;
return bounds;
case ZSTD_c_minMatch:
bounds.lowerBound = ZSTD_MINMATCH_MIN;
bounds.upperBound = ZSTD_MINMATCH_MAX;
return bounds;
case ZSTD_c_targetLength:
bounds.lowerBound = ZSTD_TARGETLENGTH_MIN;
bounds.upperBound = ZSTD_TARGETLENGTH_MAX;
return bounds;
case ZSTD_c_strategy:
bounds.lowerBound = ZSTD_STRATEGY_MIN;
bounds.upperBound = ZSTD_STRATEGY_MAX;
return bounds;
case ZSTD_c_contentSizeFlag:
bounds.lowerBound = 0;
bounds.upperBound = 1;
return bounds;
case ZSTD_c_checksumFlag:
bounds.lowerBound = 0;
bounds.upperBound = 1;
return bounds;
case ZSTD_c_dictIDFlag:
bounds.lowerBound = 0;
bounds.upperBound = 1;
return bounds;
case ZSTD_c_nbWorkers:
bounds.lowerBound = 0;
bounds.upperBound = 0;
return bounds;
case ZSTD_c_jobSize:
bounds.lowerBound = 0;
bounds.upperBound = 0;
return bounds;
case ZSTD_c_overlapLog:
bounds.lowerBound = 0;
bounds.upperBound = 0;
return bounds;
case ZSTD_c_enableDedicatedDictSearch:
bounds.lowerBound = 0;
bounds.upperBound = 1;
return bounds;
case ZSTD_c_enableLongDistanceMatching:
bounds.lowerBound = 0;
bounds.upperBound = 1;
return bounds;
case ZSTD_c_ldmHashLog:
bounds.lowerBound = ZSTD_LDM_HASHLOG_MIN;
bounds.upperBound = ZSTD_LDM_HASHLOG_MAX;
return bounds;
case ZSTD_c_ldmMinMatch:
bounds.lowerBound = ZSTD_LDM_MINMATCH_MIN;
bounds.upperBound = ZSTD_LDM_MINMATCH_MAX;
return bounds;
case ZSTD_c_ldmBucketSizeLog:
bounds.lowerBound = ZSTD_LDM_BUCKETSIZELOG_MIN;
bounds.upperBound = ZSTD_LDM_BUCKETSIZELOG_MAX;
return bounds;
case ZSTD_c_ldmHashRateLog:
bounds.lowerBound = ZSTD_LDM_HASHRATELOG_MIN;
bounds.upperBound = ZSTD_LDM_HASHRATELOG_MAX;
return bounds;
/* experimental parameters */
case ZSTD_c_rsyncable:
bounds.lowerBound = 0;
bounds.upperBound = 1;
return bounds;
case ZSTD_c_forceMaxWindow :
bounds.lowerBound = 0;
bounds.upperBound = 1;
return bounds;
case ZSTD_c_format:
ZSTD_STATIC_ASSERT(ZSTD_f_zstd1 < ZSTD_f_zstd1_magicless);
bounds.lowerBound = ZSTD_f_zstd1;
bounds.upperBound = ZSTD_f_zstd1_magicless; /* note : how to ensure at compile time that this is the highest value enum ? */
return bounds;
case ZSTD_c_forceAttachDict:
ZSTD_STATIC_ASSERT(ZSTD_dictDefaultAttach < ZSTD_dictForceLoad);
bounds.lowerBound = ZSTD_dictDefaultAttach;
bounds.upperBound = ZSTD_dictForceLoad; /* note : how to ensure at compile time that this is the highest value enum ? */
return bounds;
case ZSTD_c_literalCompressionMode:
ZSTD_STATIC_ASSERT(ZSTD_lcm_auto < ZSTD_lcm_huffman && ZSTD_lcm_huffman < ZSTD_lcm_uncompressed);
bounds.lowerBound = ZSTD_lcm_auto;
bounds.upperBound = ZSTD_lcm_uncompressed;
return bounds;
case ZSTD_c_targetCBlockSize:
bounds.lowerBound = ZSTD_TARGETCBLOCKSIZE_MIN;
bounds.upperBound = ZSTD_TARGETCBLOCKSIZE_MAX;
return bounds;
case ZSTD_c_srcSizeHint:
bounds.lowerBound = ZSTD_SRCSIZEHINT_MIN;
bounds.upperBound = ZSTD_SRCSIZEHINT_MAX;
return bounds;
case ZSTD_c_stableInBuffer:
case ZSTD_c_stableOutBuffer:
bounds.lowerBound = (int)ZSTD_bm_buffered;
bounds.upperBound = (int)ZSTD_bm_stable;
return bounds;
case ZSTD_c_blockDelimiters:
bounds.lowerBound = (int)ZSTD_sf_noBlockDelimiters;
bounds.upperBound = (int)ZSTD_sf_explicitBlockDelimiters;
return bounds;
case ZSTD_c_validateSequences:
bounds.lowerBound = 0;
bounds.upperBound = 1;
return bounds;
default:
bounds.error = ERROR(parameter_unsupported);
return bounds;
}
}
/* ZSTD_cParam_clampBounds:
* Clamps the value into the bounded range.
*/
static size_t ZSTD_cParam_clampBounds(ZSTD_cParameter cParam, int* value)
{
ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam);
if (ZSTD_isError(bounds.error)) return bounds.error;
if (*value < bounds.lowerBound) *value = bounds.lowerBound;
if (*value > bounds.upperBound) *value = bounds.upperBound;
return 0;
}
#define BOUNDCHECK(cParam, val) { \
RETURN_ERROR_IF(!ZSTD_cParam_withinBounds(cParam,val), \
parameter_outOfBound, "Param out of bounds"); \
}
static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param)
{
switch(param)
{
case ZSTD_c_compressionLevel:
case ZSTD_c_hashLog:
case ZSTD_c_chainLog:
case ZSTD_c_searchLog:
case ZSTD_c_minMatch:
case ZSTD_c_targetLength:
case ZSTD_c_strategy:
return 1;
case ZSTD_c_format:
case ZSTD_c_windowLog:
case ZSTD_c_contentSizeFlag:
case ZSTD_c_checksumFlag:
case ZSTD_c_dictIDFlag:
case ZSTD_c_forceMaxWindow :
case ZSTD_c_nbWorkers:
case ZSTD_c_jobSize:
case ZSTD_c_overlapLog:
case ZSTD_c_rsyncable:
case ZSTD_c_enableDedicatedDictSearch:
case ZSTD_c_enableLongDistanceMatching:
case ZSTD_c_ldmHashLog:
case ZSTD_c_ldmMinMatch:
case ZSTD_c_ldmBucketSizeLog:
case ZSTD_c_ldmHashRateLog:
case ZSTD_c_forceAttachDict:
case ZSTD_c_literalCompressionMode:
case ZSTD_c_targetCBlockSize:
case ZSTD_c_srcSizeHint:
case ZSTD_c_stableInBuffer:
case ZSTD_c_stableOutBuffer:
case ZSTD_c_blockDelimiters:
case ZSTD_c_validateSequences:
default:
return 0;
}
}
size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value)
{
DEBUGLOG(4, "ZSTD_CCtx_setParameter (%i, %i)", (int)param, value);
if (cctx->streamStage != zcss_init) {
if (ZSTD_isUpdateAuthorized(param)) {
cctx->cParamsChanged = 1;
} else {
RETURN_ERROR(stage_wrong, "can only set params in ctx init stage");
} }
switch(param)
{
case ZSTD_c_nbWorkers:
RETURN_ERROR_IF((value!=0) && cctx->staticSize, parameter_unsupported,
"MT not compatible with static alloc");
break;
case ZSTD_c_compressionLevel:
case ZSTD_c_windowLog:
case ZSTD_c_hashLog:
case ZSTD_c_chainLog:
case ZSTD_c_searchLog:
case ZSTD_c_minMatch:
case ZSTD_c_targetLength:
case ZSTD_c_strategy:
case ZSTD_c_ldmHashRateLog:
case ZSTD_c_format:
case ZSTD_c_contentSizeFlag:
case ZSTD_c_checksumFlag:
case ZSTD_c_dictIDFlag:
case ZSTD_c_forceMaxWindow:
case ZSTD_c_forceAttachDict:
case ZSTD_c_literalCompressionMode:
case ZSTD_c_jobSize:
case ZSTD_c_overlapLog:
case ZSTD_c_rsyncable:
case ZSTD_c_enableDedicatedDictSearch:
case ZSTD_c_enableLongDistanceMatching:
case ZSTD_c_ldmHashLog:
case ZSTD_c_ldmMinMatch:
case ZSTD_c_ldmBucketSizeLog:
case ZSTD_c_targetCBlockSize:
case ZSTD_c_srcSizeHint:
case ZSTD_c_stableInBuffer:
case ZSTD_c_stableOutBuffer:
case ZSTD_c_blockDelimiters:
case ZSTD_c_validateSequences:
break;
default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
}
return ZSTD_CCtxParams_setParameter(&cctx->requestedParams, param, value);
}
size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
ZSTD_cParameter param, int value)
{
DEBUGLOG(4, "ZSTD_CCtxParams_setParameter (%i, %i)", (int)param, value);
switch(param)
{
case ZSTD_c_format :
BOUNDCHECK(ZSTD_c_format, value);
CCtxParams->format = (ZSTD_format_e)value;
return (size_t)CCtxParams->format;
case ZSTD_c_compressionLevel : {
FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), "");
if (value == 0)
CCtxParams->compressionLevel = ZSTD_CLEVEL_DEFAULT; /* 0 == default */
else
CCtxParams->compressionLevel = value;
if (CCtxParams->compressionLevel >= 0) return (size_t)CCtxParams->compressionLevel;
return 0; /* return type (size_t) cannot represent negative values */
}
case ZSTD_c_windowLog :
if (value!=0) /* 0 => use default */
BOUNDCHECK(ZSTD_c_windowLog, value);
CCtxParams->cParams.windowLog = (U32)value;
return CCtxParams->cParams.windowLog;
case ZSTD_c_hashLog :
if (value!=0) /* 0 => use default */
BOUNDCHECK(ZSTD_c_hashLog, value);
CCtxParams->cParams.hashLog = (U32)value;
return CCtxParams->cParams.hashLog;
case ZSTD_c_chainLog :
if (value!=0) /* 0 => use default */
BOUNDCHECK(ZSTD_c_chainLog, value);
CCtxParams->cParams.chainLog = (U32)value;
return CCtxParams->cParams.chainLog;
case ZSTD_c_searchLog :
if (value!=0) /* 0 => use default */
BOUNDCHECK(ZSTD_c_searchLog, value);
CCtxParams->cParams.searchLog = (U32)value;
return (size_t)value;
case ZSTD_c_minMatch :
if (value!=0) /* 0 => use default */
BOUNDCHECK(ZSTD_c_minMatch, value);
CCtxParams->cParams.minMatch = value;
return CCtxParams->cParams.minMatch;
case ZSTD_c_targetLength :
BOUNDCHECK(ZSTD_c_targetLength, value);
CCtxParams->cParams.targetLength = value;
return CCtxParams->cParams.targetLength;
case ZSTD_c_strategy :
if (value!=0) /* 0 => use default */
BOUNDCHECK(ZSTD_c_strategy, value);
CCtxParams->cParams.strategy = (ZSTD_strategy)value;
return (size_t)CCtxParams->cParams.strategy;
case ZSTD_c_contentSizeFlag :
/* Content size written in frame header _when known_ (default:1) */
DEBUGLOG(4, "set content size flag = %u", (value!=0));
CCtxParams->fParams.contentSizeFlag = value != 0;
return CCtxParams->fParams.contentSizeFlag;
case ZSTD_c_checksumFlag :
/* A 32-bits content checksum will be calculated and written at end of frame (default:0) */
CCtxParams->fParams.checksumFlag = value != 0;
return CCtxParams->fParams.checksumFlag;
case ZSTD_c_dictIDFlag : /* When applicable, dictionary's dictID is provided in frame header (default:1) */
DEBUGLOG(4, "set dictIDFlag = %u", (value!=0));
CCtxParams->fParams.noDictIDFlag = !value;
return !CCtxParams->fParams.noDictIDFlag;
case ZSTD_c_forceMaxWindow :
CCtxParams->forceWindow = (value != 0);
return CCtxParams->forceWindow;
case ZSTD_c_forceAttachDict : {
const ZSTD_dictAttachPref_e pref = (ZSTD_dictAttachPref_e)value;
BOUNDCHECK(ZSTD_c_forceAttachDict, pref);
CCtxParams->attachDictPref = pref;
return CCtxParams->attachDictPref;
}
case ZSTD_c_literalCompressionMode : {
const ZSTD_literalCompressionMode_e lcm = (ZSTD_literalCompressionMode_e)value;
BOUNDCHECK(ZSTD_c_literalCompressionMode, lcm);
CCtxParams->literalCompressionMode = lcm;
return CCtxParams->literalCompressionMode;
}
case ZSTD_c_nbWorkers :
RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading");
return 0;
case ZSTD_c_jobSize :
RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading");
return 0;
case ZSTD_c_overlapLog :
RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading");
return 0;
case ZSTD_c_rsyncable :
RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading");
return 0;
case ZSTD_c_enableDedicatedDictSearch :
CCtxParams->enableDedicatedDictSearch = (value!=0);
return CCtxParams->enableDedicatedDictSearch;
case ZSTD_c_enableLongDistanceMatching :
CCtxParams->ldmParams.enableLdm = (value!=0);
return CCtxParams->ldmParams.enableLdm;
case ZSTD_c_ldmHashLog :
if (value!=0) /* 0 ==> auto */
BOUNDCHECK(ZSTD_c_ldmHashLog, value);
CCtxParams->ldmParams.hashLog = value;
return CCtxParams->ldmParams.hashLog;
case ZSTD_c_ldmMinMatch :
if (value!=0) /* 0 ==> default */
BOUNDCHECK(ZSTD_c_ldmMinMatch, value);
CCtxParams->ldmParams.minMatchLength = value;
return CCtxParams->ldmParams.minMatchLength;
case ZSTD_c_ldmBucketSizeLog :
if (value!=0) /* 0 ==> default */
BOUNDCHECK(ZSTD_c_ldmBucketSizeLog, value);
CCtxParams->ldmParams.bucketSizeLog = value;
return CCtxParams->ldmParams.bucketSizeLog;
case ZSTD_c_ldmHashRateLog :
if (value!=0) /* 0 ==> default */
BOUNDCHECK(ZSTD_c_ldmHashRateLog, value);
CCtxParams->ldmParams.hashRateLog = value;
return CCtxParams->ldmParams.hashRateLog;
case ZSTD_c_targetCBlockSize :
if (value!=0) /* 0 ==> default */
BOUNDCHECK(ZSTD_c_targetCBlockSize, value);
CCtxParams->targetCBlockSize = value;
return CCtxParams->targetCBlockSize;
case ZSTD_c_srcSizeHint :
if (value!=0) /* 0 ==> default */
BOUNDCHECK(ZSTD_c_srcSizeHint, value);
CCtxParams->srcSizeHint = value;
return CCtxParams->srcSizeHint;
case ZSTD_c_stableInBuffer:
BOUNDCHECK(ZSTD_c_stableInBuffer, value);
CCtxParams->inBufferMode = (ZSTD_bufferMode_e)value;
return CCtxParams->inBufferMode;
case ZSTD_c_stableOutBuffer:
BOUNDCHECK(ZSTD_c_stableOutBuffer, value);
CCtxParams->outBufferMode = (ZSTD_bufferMode_e)value;
return CCtxParams->outBufferMode;
case ZSTD_c_blockDelimiters:
BOUNDCHECK(ZSTD_c_blockDelimiters, value);
CCtxParams->blockDelimiters = (ZSTD_sequenceFormat_e)value;
return CCtxParams->blockDelimiters;
case ZSTD_c_validateSequences:
BOUNDCHECK(ZSTD_c_validateSequences, value);
CCtxParams->validateSequences = value;
return CCtxParams->validateSequences;
default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
}
}
size_t ZSTD_CCtx_getParameter(ZSTD_CCtx const* cctx, ZSTD_cParameter param, int* value)
{
return ZSTD_CCtxParams_getParameter(&cctx->requestedParams, param, value);
}
size_t ZSTD_CCtxParams_getParameter(
ZSTD_CCtx_params const* CCtxParams, ZSTD_cParameter param, int* value)
{
switch(param)
{
case ZSTD_c_format :
*value = CCtxParams->format;
break;
case ZSTD_c_compressionLevel :
*value = CCtxParams->compressionLevel;
break;
case ZSTD_c_windowLog :
*value = (int)CCtxParams->cParams.windowLog;
break;
case ZSTD_c_hashLog :
*value = (int)CCtxParams->cParams.hashLog;
break;
case ZSTD_c_chainLog :
*value = (int)CCtxParams->cParams.chainLog;
break;
case ZSTD_c_searchLog :
*value = CCtxParams->cParams.searchLog;
break;
case ZSTD_c_minMatch :
*value = CCtxParams->cParams.minMatch;
break;
case ZSTD_c_targetLength :
*value = CCtxParams->cParams.targetLength;
break;
case ZSTD_c_strategy :
*value = (unsigned)CCtxParams->cParams.strategy;
break;
case ZSTD_c_contentSizeFlag :
*value = CCtxParams->fParams.contentSizeFlag;
break;
case ZSTD_c_checksumFlag :
*value = CCtxParams->fParams.checksumFlag;
break;
case ZSTD_c_dictIDFlag :
*value = !CCtxParams->fParams.noDictIDFlag;
break;
case ZSTD_c_forceMaxWindow :
*value = CCtxParams->forceWindow;
break;
case ZSTD_c_forceAttachDict :
*value = CCtxParams->attachDictPref;
break;
case ZSTD_c_literalCompressionMode :
*value = CCtxParams->literalCompressionMode;
break;
case ZSTD_c_nbWorkers :
assert(CCtxParams->nbWorkers == 0);
*value = CCtxParams->nbWorkers;
break;
case ZSTD_c_jobSize :
RETURN_ERROR(parameter_unsupported, "not compiled with multithreading");
case ZSTD_c_overlapLog :
RETURN_ERROR(parameter_unsupported, "not compiled with multithreading");
case ZSTD_c_rsyncable :
RETURN_ERROR(parameter_unsupported, "not compiled with multithreading");
case ZSTD_c_enableDedicatedDictSearch :
*value = CCtxParams->enableDedicatedDictSearch;
break;
case ZSTD_c_enableLongDistanceMatching :
*value = CCtxParams->ldmParams.enableLdm;
break;
case ZSTD_c_ldmHashLog :
*value = CCtxParams->ldmParams.hashLog;
break;
case ZSTD_c_ldmMinMatch :
*value = CCtxParams->ldmParams.minMatchLength;
break;
case ZSTD_c_ldmBucketSizeLog :
*value = CCtxParams->ldmParams.bucketSizeLog;
break;
case ZSTD_c_ldmHashRateLog :
*value = CCtxParams->ldmParams.hashRateLog;
break;
case ZSTD_c_targetCBlockSize :
*value = (int)CCtxParams->targetCBlockSize;
break;
case ZSTD_c_srcSizeHint :
*value = (int)CCtxParams->srcSizeHint;
break;
case ZSTD_c_stableInBuffer :
*value = (int)CCtxParams->inBufferMode;
break;
case ZSTD_c_stableOutBuffer :
*value = (int)CCtxParams->outBufferMode;
break;
case ZSTD_c_blockDelimiters :
*value = (int)CCtxParams->blockDelimiters;
break;
case ZSTD_c_validateSequences :
*value = (int)CCtxParams->validateSequences;
break;
default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
}
return 0;
}
/* ZSTD_CCtx_setParametersUsingCCtxParams() :
* just applies `params` into `cctx`
* no action is performed, parameters are merely stored.
* If ZSTDMT is enabled, parameters are pushed to cctx->mtctx.
* This is possible even if a compression is ongoing.
* In which case, new parameters will be applied on the fly, starting with next compression job.
*/
size_t ZSTD_CCtx_setParametersUsingCCtxParams(
ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params)
{
DEBUGLOG(4, "ZSTD_CCtx_setParametersUsingCCtxParams");
RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
"The context is in the wrong stage!");
RETURN_ERROR_IF(cctx->cdict, stage_wrong,
"Can't override parameters with cdict attached (some must "
"be inherited from the cdict).");
cctx->requestedParams = *params;
return 0;
}
ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize)
{
DEBUGLOG(4, "ZSTD_CCtx_setPledgedSrcSize to %u bytes", (U32)pledgedSrcSize);
RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
"Can't set pledgedSrcSize when not in init stage.");
cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1;
return 0;
}
static ZSTD_compressionParameters ZSTD_dedicatedDictSearch_getCParams(
int const compressionLevel,
size_t const dictSize);
static int ZSTD_dedicatedDictSearch_isSupported(
const ZSTD_compressionParameters* cParams);
static void ZSTD_dedicatedDictSearch_revertCParams(
ZSTD_compressionParameters* cParams);
/*
* Initializes the local dict using the requested parameters.
* NOTE: This does not use the pledged src size, because it may be used for more
* than one compression.
*/
static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx)
{
ZSTD_localDict* const dl = &cctx->localDict;
if (dl->dict == NULL) {
/* No local dictionary. */
assert(dl->dictBuffer == NULL);
assert(dl->cdict == NULL);
assert(dl->dictSize == 0);
return 0;
}
if (dl->cdict != NULL) {
assert(cctx->cdict == dl->cdict);
/* Local dictionary already initialized. */
return 0;
}
assert(dl->dictSize > 0);
assert(cctx->cdict == NULL);
assert(cctx->prefixDict.dict == NULL);
dl->cdict = ZSTD_createCDict_advanced2(
dl->dict,
dl->dictSize,
ZSTD_dlm_byRef,
dl->dictContentType,
&cctx->requestedParams,
cctx->customMem);
RETURN_ERROR_IF(!dl->cdict, memory_allocation, "ZSTD_createCDict_advanced failed");
cctx->cdict = dl->cdict;
return 0;
}
size_t ZSTD_CCtx_loadDictionary_advanced(
ZSTD_CCtx* cctx, const void* dict, size_t dictSize,
ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType)
{
RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
"Can't load a dictionary when ctx is not in init stage.");
DEBUGLOG(4, "ZSTD_CCtx_loadDictionary_advanced (size: %u)", (U32)dictSize);
ZSTD_clearAllDicts(cctx); /* in case one already exists */
if (dict == NULL || dictSize == 0) /* no dictionary mode */
return 0;
if (dictLoadMethod == ZSTD_dlm_byRef) {
cctx->localDict.dict = dict;
} else {
void* dictBuffer;
RETURN_ERROR_IF(cctx->staticSize, memory_allocation,
"no malloc for static CCtx");
dictBuffer = ZSTD_customMalloc(dictSize, cctx->customMem);
RETURN_ERROR_IF(!dictBuffer, memory_allocation, "NULL pointer!");
ZSTD_memcpy(dictBuffer, dict, dictSize);
cctx->localDict.dictBuffer = dictBuffer;
cctx->localDict.dict = dictBuffer;
}
cctx->localDict.dictSize = dictSize;
cctx->localDict.dictContentType = dictContentType;
return 0;
}
ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_byReference(
ZSTD_CCtx* cctx, const void* dict, size_t dictSize)
{
return ZSTD_CCtx_loadDictionary_advanced(
cctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto);
}
ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize)
{
return ZSTD_CCtx_loadDictionary_advanced(
cctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto);
}
size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict)
{
RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
"Can't ref a dict when ctx not in init stage.");
/* Free the existing local cdict (if any) to save memory. */
ZSTD_clearAllDicts(cctx);
cctx->cdict = cdict;
return 0;
}
size_t ZSTD_CCtx_refThreadPool(ZSTD_CCtx* cctx, ZSTD_threadPool* pool)
{
RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
"Can't ref a pool when ctx not in init stage.");
cctx->pool = pool;
return 0;
}
size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize)
{
return ZSTD_CCtx_refPrefix_advanced(cctx, prefix, prefixSize, ZSTD_dct_rawContent);
}
size_t ZSTD_CCtx_refPrefix_advanced(
ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType)
{
RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
"Can't ref a prefix when ctx not in init stage.");
ZSTD_clearAllDicts(cctx);
if (prefix != NULL && prefixSize > 0) {
cctx->prefixDict.dict = prefix;
cctx->prefixDict.dictSize = prefixSize;
cctx->prefixDict.dictContentType = dictContentType;
}
return 0;
}
/*! ZSTD_CCtx_reset() :
* Also dumps dictionary */
size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset)
{
if ( (reset == ZSTD_reset_session_only)
|| (reset == ZSTD_reset_session_and_parameters) ) {
cctx->streamStage = zcss_init;
cctx->pledgedSrcSizePlusOne = 0;
}
if ( (reset == ZSTD_reset_parameters)
|| (reset == ZSTD_reset_session_and_parameters) ) {
RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
"Can't reset parameters only when not in init stage.");
ZSTD_clearAllDicts(cctx);
return ZSTD_CCtxParams_reset(&cctx->requestedParams);
}
return 0;
}
/* ZSTD_checkCParams() :
control CParam values remain within authorized range.
@return : 0, or an error code if one value is beyond authorized range */
size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams)
{
BOUNDCHECK(ZSTD_c_windowLog, (int)cParams.windowLog);
BOUNDCHECK(ZSTD_c_chainLog, (int)cParams.chainLog);
BOUNDCHECK(ZSTD_c_hashLog, (int)cParams.hashLog);
BOUNDCHECK(ZSTD_c_searchLog, (int)cParams.searchLog);
BOUNDCHECK(ZSTD_c_minMatch, (int)cParams.minMatch);
BOUNDCHECK(ZSTD_c_targetLength,(int)cParams.targetLength);
BOUNDCHECK(ZSTD_c_strategy, cParams.strategy);
return 0;
}
/* ZSTD_clampCParams() :
* make CParam values within valid range.
* @return : valid CParams */
static ZSTD_compressionParameters
ZSTD_clampCParams(ZSTD_compressionParameters cParams)
{
# define CLAMP_TYPE(cParam, val, type) { \
ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); \
if ((int)val<bounds.lowerBound) val=(type)bounds.lowerBound; \
else if ((int)val>bounds.upperBound) val=(type)bounds.upperBound; \
}
# define CLAMP(cParam, val) CLAMP_TYPE(cParam, val, unsigned)
CLAMP(ZSTD_c_windowLog, cParams.windowLog);
CLAMP(ZSTD_c_chainLog, cParams.chainLog);
CLAMP(ZSTD_c_hashLog, cParams.hashLog);
CLAMP(ZSTD_c_searchLog, cParams.searchLog);
CLAMP(ZSTD_c_minMatch, cParams.minMatch);
CLAMP(ZSTD_c_targetLength,cParams.targetLength);
CLAMP_TYPE(ZSTD_c_strategy,cParams.strategy, ZSTD_strategy);
return cParams;
}
/* ZSTD_cycleLog() :
* condition for correct operation : hashLog > 1 */
U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat)
{
U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2);
return hashLog - btScale;
}
/* ZSTD_dictAndWindowLog() :
* Returns an adjusted window log that is large enough to fit the source and the dictionary.
* The zstd format says that the entire dictionary is valid if one byte of the dictionary
* is within the window. So the hashLog and chainLog should be large enough to reference both
* the dictionary and the window. So we must use this adjusted dictAndWindowLog when downsizing
* the hashLog and windowLog.
* NOTE: srcSize must not be ZSTD_CONTENTSIZE_UNKNOWN.
*/
static U32 ZSTD_dictAndWindowLog(U32 windowLog, U64 srcSize, U64 dictSize)
{
const U64 maxWindowSize = 1ULL << ZSTD_WINDOWLOG_MAX;
/* No dictionary ==> No change */
if (dictSize == 0) {
return windowLog;
}
assert(windowLog <= ZSTD_WINDOWLOG_MAX);
assert(srcSize != ZSTD_CONTENTSIZE_UNKNOWN); /* Handled in ZSTD_adjustCParams_internal() */
{
U64 const windowSize = 1ULL << windowLog;
U64 const dictAndWindowSize = dictSize + windowSize;
/* If the window size is already large enough to fit both the source and the dictionary
* then just use the window size. Otherwise adjust so that it fits the dictionary and
* the window.
*/
if (windowSize >= dictSize + srcSize) {
return windowLog; /* Window size large enough already */
} else if (dictAndWindowSize >= maxWindowSize) {
return ZSTD_WINDOWLOG_MAX; /* Larger than max window log */
} else {
return ZSTD_highbit32((U32)dictAndWindowSize - 1) + 1;
}
}
}
/* ZSTD_adjustCParams_internal() :
* optimize `cPar` for a specified input (`srcSize` and `dictSize`).
* mostly downsize to reduce memory consumption and initialization latency.
* `srcSize` can be ZSTD_CONTENTSIZE_UNKNOWN when not known.
* `mode` is the mode for parameter adjustment. See docs for `ZSTD_cParamMode_e`.
* note : `srcSize==0` means 0!
* condition : cPar is presumed validated (can be checked using ZSTD_checkCParams()). */
static ZSTD_compressionParameters
ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar,
unsigned long long srcSize,
size_t dictSize,
ZSTD_cParamMode_e mode)
{
const U64 minSrcSize = 513; /* (1<<9) + 1 */
const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1);
assert(ZSTD_checkCParams(cPar)==0);
switch (mode) {
case ZSTD_cpm_unknown:
case ZSTD_cpm_noAttachDict:
/* If we don't know the source size, don't make any
* assumptions about it. We will already have selected
* smaller parameters if a dictionary is in use.
*/
break;
case ZSTD_cpm_createCDict:
/* Assume a small source size when creating a dictionary
* with an unkown source size.
*/
if (dictSize && srcSize == ZSTD_CONTENTSIZE_UNKNOWN)
srcSize = minSrcSize;
break;
case ZSTD_cpm_attachDict:
/* Dictionary has its own dedicated parameters which have
* already been selected. We are selecting parameters
* for only the source.
*/
dictSize = 0;
break;
default:
assert(0);
break;
}
/* resize windowLog if input is small enough, to use less memory */
if ( (srcSize < maxWindowResize)
&& (dictSize < maxWindowResize) ) {
U32 const tSize = (U32)(srcSize + dictSize);
static U32 const hashSizeMin = 1 << ZSTD_HASHLOG_MIN;
U32 const srcLog = (tSize < hashSizeMin) ? ZSTD_HASHLOG_MIN :
ZSTD_highbit32(tSize-1) + 1;
if (cPar.windowLog > srcLog) cPar.windowLog = srcLog;
}
if (srcSize != ZSTD_CONTENTSIZE_UNKNOWN) {
U32 const dictAndWindowLog = ZSTD_dictAndWindowLog(cPar.windowLog, (U64)srcSize, (U64)dictSize);
U32 const cycleLog = ZSTD_cycleLog(cPar.chainLog, cPar.strategy);
if (cPar.hashLog > dictAndWindowLog+1) cPar.hashLog = dictAndWindowLog+1;
if (cycleLog > dictAndWindowLog)
cPar.chainLog -= (cycleLog - dictAndWindowLog);
}
if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN)
cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* minimum wlog required for valid frame header */
return cPar;
}
ZSTD_compressionParameters
ZSTD_adjustCParams(ZSTD_compressionParameters cPar,
unsigned long long srcSize,
size_t dictSize)
{
cPar = ZSTD_clampCParams(cPar); /* resulting cPar is necessarily valid (all parameters within range) */
if (srcSize == 0) srcSize = ZSTD_CONTENTSIZE_UNKNOWN;
return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize, ZSTD_cpm_unknown);
}
static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode);
static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode);
static void ZSTD_overrideCParams(
ZSTD_compressionParameters* cParams,
const ZSTD_compressionParameters* overrides)
{
if (overrides->windowLog) cParams->windowLog = overrides->windowLog;
if (overrides->hashLog) cParams->hashLog = overrides->hashLog;
if (overrides->chainLog) cParams->chainLog = overrides->chainLog;
if (overrides->searchLog) cParams->searchLog = overrides->searchLog;
if (overrides->minMatch) cParams->minMatch = overrides->minMatch;
if (overrides->targetLength) cParams->targetLength = overrides->targetLength;
if (overrides->strategy) cParams->strategy = overrides->strategy;
}
ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode)
{
ZSTD_compressionParameters cParams;
if (srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN && CCtxParams->srcSizeHint > 0) {
srcSizeHint = CCtxParams->srcSizeHint;
}
cParams = ZSTD_getCParams_internal(CCtxParams->compressionLevel, srcSizeHint, dictSize, mode);
if (CCtxParams->ldmParams.enableLdm) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG;
ZSTD_overrideCParams(&cParams, &CCtxParams->cParams);
assert(!ZSTD_checkCParams(cParams));
/* srcSizeHint == 0 means 0 */
return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize, mode);
}
static size_t
ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams,
const U32 forCCtx)
{
size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog);
size_t const hSize = ((size_t)1) << cParams->hashLog;
U32 const hashLog3 = (forCCtx && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0;
size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0;
/* We don't use ZSTD_cwksp_alloc_size() here because the tables aren't
* surrounded by redzones in ASAN. */
size_t const tableSpace = chainSize * sizeof(U32)
+ hSize * sizeof(U32)
+ h3Size * sizeof(U32);
size_t const optPotentialSpace =
ZSTD_cwksp_alloc_size((MaxML+1) * sizeof(U32))
+ ZSTD_cwksp_alloc_size((MaxLL+1) * sizeof(U32))
+ ZSTD_cwksp_alloc_size((MaxOff+1) * sizeof(U32))
+ ZSTD_cwksp_alloc_size((1<<Litbits) * sizeof(U32))
+ ZSTD_cwksp_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t))
+ ZSTD_cwksp_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));
size_t const optSpace = (forCCtx && (cParams->strategy >= ZSTD_btopt))
? optPotentialSpace
: 0;
DEBUGLOG(4, "chainSize: %u - hSize: %u - h3Size: %u",
(U32)chainSize, (U32)hSize, (U32)h3Size);
return tableSpace + optSpace;
}
static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal(
const ZSTD_compressionParameters* cParams,
const ldmParams_t* ldmParams,
const int isStatic,
const size_t buffInSize,
const size_t buffOutSize,
const U64 pledgedSrcSize)
{
size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << cParams->windowLog), pledgedSrcSize));
size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize);
U32 const divider = (cParams->minMatch==3) ? 3 : 4;
size_t const maxNbSeq = blockSize / divider;
size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize)
+ ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(seqDef))
+ 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE));
size_t const entropySpace = ZSTD_cwksp_alloc_size(ENTROPY_WORKSPACE_SIZE);
size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t));
size_t const matchStateSize = ZSTD_sizeof_matchState(cParams, /* forCCtx */ 1);
size_t const ldmSpace = ZSTD_ldm_getTableSize(*ldmParams);
size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(*ldmParams, blockSize);
size_t const ldmSeqSpace = ldmParams->enableLdm ?
ZSTD_cwksp_alloc_size(maxNbLdmSeq * sizeof(rawSeq)) : 0;
size_t const bufferSpace = ZSTD_cwksp_alloc_size(buffInSize)
+ ZSTD_cwksp_alloc_size(buffOutSize);
size_t const cctxSpace = isStatic ? ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx)) : 0;
size_t const neededSpace =
cctxSpace +
entropySpace +
blockStateSpace +
ldmSpace +
ldmSeqSpace +
matchStateSize +
tokenSpace +
bufferSpace;
DEBUGLOG(5, "estimate workspace : %u", (U32)neededSpace);
return neededSpace;
}
size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params)
{
ZSTD_compressionParameters const cParams =
ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict);
RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only.");
/* estimateCCtxSize is for one-shot compression. So no buffers should
* be needed. However, we still allocate two 0-sized buffers, which can
* take space under ASAN. */
return ZSTD_estimateCCtxSize_usingCCtxParams_internal(
&cParams, &params->ldmParams, 1, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN);
}
size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams)
{
ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams);
return ZSTD_estimateCCtxSize_usingCCtxParams(&params);
}
static size_t ZSTD_estimateCCtxSize_internal(int compressionLevel)
{
int tier = 0;
size_t largestSize = 0;
static const unsigned long long srcSizeTiers[4] = {16 KB, 128 KB, 256 KB, ZSTD_CONTENTSIZE_UNKNOWN};
for (; tier < 4; ++tier) {
/* Choose the set of cParams for a given level across all srcSizes that give the largest cctxSize */
ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeTiers[tier], 0, ZSTD_cpm_noAttachDict);
largestSize = MAX(ZSTD_estimateCCtxSize_usingCParams(cParams), largestSize);
}
return largestSize;
}
size_t ZSTD_estimateCCtxSize(int compressionLevel)
{
int level;
size_t memBudget = 0;
for (level=MIN(compressionLevel, 1); level<=compressionLevel; level++) {
/* Ensure monotonically increasing memory usage as compression level increases */
size_t const newMB = ZSTD_estimateCCtxSize_internal(level);
if (newMB > memBudget) memBudget = newMB;
}
return memBudget;
}
size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params)
{
RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only.");
{ ZSTD_compressionParameters const cParams =
ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict);
size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog);
size_t const inBuffSize = (params->inBufferMode == ZSTD_bm_buffered)
? ((size_t)1 << cParams.windowLog) + blockSize
: 0;
size_t const outBuffSize = (params->outBufferMode == ZSTD_bm_buffered)
? ZSTD_compressBound(blockSize) + 1
: 0;
return ZSTD_estimateCCtxSize_usingCCtxParams_internal(
&cParams, &params->ldmParams, 1, inBuffSize, outBuffSize,
ZSTD_CONTENTSIZE_UNKNOWN);
}
}
size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams)
{
ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams);
return ZSTD_estimateCStreamSize_usingCCtxParams(&params);
}
static size_t ZSTD_estimateCStreamSize_internal(int compressionLevel)
{
ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict);
return ZSTD_estimateCStreamSize_usingCParams(cParams);
}
size_t ZSTD_estimateCStreamSize(int compressionLevel)
{
int level;
size_t memBudget = 0;
for (level=MIN(compressionLevel, 1); level<=compressionLevel; level++) {
size_t const newMB = ZSTD_estimateCStreamSize_internal(level);
if (newMB > memBudget) memBudget = newMB;
}
return memBudget;
}
/* ZSTD_getFrameProgression():
* tells how much data has been consumed (input) and produced (output) for current frame.
* able to count progression inside worker threads (non-blocking mode).
*/
ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx)
{
{ ZSTD_frameProgression fp;
size_t const buffered = (cctx->inBuff == NULL) ? 0 :
cctx->inBuffPos - cctx->inToCompress;
if (buffered) assert(cctx->inBuffPos >= cctx->inToCompress);
assert(buffered <= ZSTD_BLOCKSIZE_MAX);
fp.ingested = cctx->consumedSrcSize + buffered;
fp.consumed = cctx->consumedSrcSize;
fp.produced = cctx->producedCSize;
fp.flushed = cctx->producedCSize; /* simplified; some data might still be left within streaming output buffer */
fp.currentJobID = 0;
fp.nbActiveWorkers = 0;
return fp;
} }
/*! ZSTD_toFlushNow()
* Only useful for multithreading scenarios currently (nbWorkers >= 1).
*/
size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx)
{
(void)cctx;
return 0; /* over-simplification; could also check if context is currently running in streaming mode, and in which case, report how many bytes are left to be flushed within output buffer */
}
static void ZSTD_assertEqualCParams(ZSTD_compressionParameters cParams1,
ZSTD_compressionParameters cParams2)
{
(void)cParams1;
(void)cParams2;
assert(cParams1.windowLog == cParams2.windowLog);
assert(cParams1.chainLog == cParams2.chainLog);
assert(cParams1.hashLog == cParams2.hashLog);
assert(cParams1.searchLog == cParams2.searchLog);
assert(cParams1.minMatch == cParams2.minMatch);
assert(cParams1.targetLength == cParams2.targetLength);
assert(cParams1.strategy == cParams2.strategy);
}
void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs)
{
int i;
for (i = 0; i < ZSTD_REP_NUM; ++i)
bs->rep[i] = repStartValue[i];
bs->entropy.huf.repeatMode = HUF_repeat_none;
bs->entropy.fse.offcode_repeatMode = FSE_repeat_none;
bs->entropy.fse.matchlength_repeatMode = FSE_repeat_none;
bs->entropy.fse.litlength_repeatMode = FSE_repeat_none;
}
/*! ZSTD_invalidateMatchState()
* Invalidate all the matches in the match finder tables.
* Requires nextSrc and base to be set (can be NULL).
*/
static void ZSTD_invalidateMatchState(ZSTD_matchState_t* ms)
{
ZSTD_window_clear(&ms->window);
ms->nextToUpdate = ms->window.dictLimit;
ms->loadedDictEnd = 0;
ms->opt.litLengthSum = 0; /* force reset of btopt stats */
ms->dictMatchState = NULL;
}
/*
* Controls, for this matchState reset, whether the tables need to be cleared /
* prepared for the coming compression (ZSTDcrp_makeClean), or whether the
* tables can be left unclean (ZSTDcrp_leaveDirty), because we know that a
* subsequent operation will overwrite the table space anyways (e.g., copying
* the matchState contents in from a CDict).
*/
typedef enum {
ZSTDcrp_makeClean,
ZSTDcrp_leaveDirty
} ZSTD_compResetPolicy_e;
/*
* Controls, for this matchState reset, whether indexing can continue where it
* left off (ZSTDirp_continue), or whether it needs to be restarted from zero
* (ZSTDirp_reset).
*/
typedef enum {
ZSTDirp_continue,
ZSTDirp_reset
} ZSTD_indexResetPolicy_e;
typedef enum {
ZSTD_resetTarget_CDict,
ZSTD_resetTarget_CCtx
} ZSTD_resetTarget_e;
static size_t
ZSTD_reset_matchState(ZSTD_matchState_t* ms,
ZSTD_cwksp* ws,
const ZSTD_compressionParameters* cParams,
const ZSTD_compResetPolicy_e crp,
const ZSTD_indexResetPolicy_e forceResetIndex,
const ZSTD_resetTarget_e forWho)
{
size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog);
size_t const hSize = ((size_t)1) << cParams->hashLog;
U32 const hashLog3 = ((forWho == ZSTD_resetTarget_CCtx) && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0;
size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0;
DEBUGLOG(4, "reset indices : %u", forceResetIndex == ZSTDirp_reset);
if (forceResetIndex == ZSTDirp_reset) {
ZSTD_window_init(&ms->window);
ZSTD_cwksp_mark_tables_dirty(ws);
}
ms->hashLog3 = hashLog3;
ZSTD_invalidateMatchState(ms);
assert(!ZSTD_cwksp_reserve_failed(ws)); /* check that allocation hasn't already failed */
ZSTD_cwksp_clear_tables(ws);
DEBUGLOG(5, "reserving table space");
/* table Space */
ms->hashTable = (U32*)ZSTD_cwksp_reserve_table(ws, hSize * sizeof(U32));
ms->chainTable = (U32*)ZSTD_cwksp_reserve_table(ws, chainSize * sizeof(U32));
ms->hashTable3 = (U32*)ZSTD_cwksp_reserve_table(ws, h3Size * sizeof(U32));
RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation,
"failed a workspace allocation in ZSTD_reset_matchState");
DEBUGLOG(4, "reset table : %u", crp!=ZSTDcrp_leaveDirty);
if (crp!=ZSTDcrp_leaveDirty) {
/* reset tables only */
ZSTD_cwksp_clean_tables(ws);
}
/* opt parser space */
if ((forWho == ZSTD_resetTarget_CCtx) && (cParams->strategy >= ZSTD_btopt)) {
DEBUGLOG(4, "reserving optimal parser space");
ms->opt.litFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (1<<Litbits) * sizeof(unsigned));
ms->opt.litLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxLL+1) * sizeof(unsigned));
ms->opt.matchLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxML+1) * sizeof(unsigned));
ms->opt.offCodeFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxOff+1) * sizeof(unsigned));
ms->opt.matchTable = (ZSTD_match_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t));
ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));
}
ms->cParams = *cParams;
RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation,
"failed a workspace allocation in ZSTD_reset_matchState");
return 0;
}
/* ZSTD_indexTooCloseToMax() :
* minor optimization : prefer memset() rather than reduceIndex()
* which is measurably slow in some circumstances (reported for Visual Studio).
* Works when re-using a context for a lot of smallish inputs :
* if all inputs are smaller than ZSTD_INDEXOVERFLOW_MARGIN,
* memset() will be triggered before reduceIndex().
*/
#define ZSTD_INDEXOVERFLOW_MARGIN (16 MB)
static int ZSTD_indexTooCloseToMax(ZSTD_window_t w)
{
return (size_t)(w.nextSrc - w.base) > (ZSTD_CURRENT_MAX - ZSTD_INDEXOVERFLOW_MARGIN);
}
/*! ZSTD_resetCCtx_internal() :
note : `params` are assumed fully validated at this stage */
static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
ZSTD_CCtx_params params,
U64 const pledgedSrcSize,
ZSTD_compResetPolicy_e const crp,
ZSTD_buffered_policy_e const zbuff)
{
ZSTD_cwksp* const ws = &zc->workspace;
DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u",
(U32)pledgedSrcSize, params.cParams.windowLog);
assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
zc->isFirstBlock = 1;
if (params.ldmParams.enableLdm) {
/* Adjust long distance matching parameters */
ZSTD_ldm_adjustParameters(&params.ldmParams, &params.cParams);
assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog);
assert(params.ldmParams.hashRateLog < 32);
}
{ size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params.cParams.windowLog), pledgedSrcSize));
size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize);
U32 const divider = (params.cParams.minMatch==3) ? 3 : 4;
size_t const maxNbSeq = blockSize / divider;
size_t const buffOutSize = (zbuff == ZSTDb_buffered && params.outBufferMode == ZSTD_bm_buffered)
? ZSTD_compressBound(blockSize) + 1
: 0;
size_t const buffInSize = (zbuff == ZSTDb_buffered && params.inBufferMode == ZSTD_bm_buffered)
? windowSize + blockSize
: 0;
size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(params.ldmParams, blockSize);
int const indexTooClose = ZSTD_indexTooCloseToMax(zc->blockState.matchState.window);
ZSTD_indexResetPolicy_e needsIndexReset =
(!indexTooClose && zc->initialized) ? ZSTDirp_continue : ZSTDirp_reset;
size_t const neededSpace =
ZSTD_estimateCCtxSize_usingCCtxParams_internal(
&params.cParams, &params.ldmParams, zc->staticSize != 0,
buffInSize, buffOutSize, pledgedSrcSize);
FORWARD_IF_ERROR(neededSpace, "cctx size estimate failed!");
if (!zc->staticSize) ZSTD_cwksp_bump_oversized_duration(ws, 0);
/* Check if workspace is large enough, alloc a new one if needed */
{
int const workspaceTooSmall = ZSTD_cwksp_sizeof(ws) < neededSpace;
int const workspaceWasteful = ZSTD_cwksp_check_wasteful(ws, neededSpace);
DEBUGLOG(4, "Need %zu B workspace", neededSpace);
DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize);
if (workspaceTooSmall || workspaceWasteful) {
DEBUGLOG(4, "Resize workspaceSize from %zuKB to %zuKB",
ZSTD_cwksp_sizeof(ws) >> 10,
neededSpace >> 10);
RETURN_ERROR_IF(zc->staticSize, memory_allocation, "static cctx : no resize");
needsIndexReset = ZSTDirp_reset;
ZSTD_cwksp_free(ws, zc->customMem);
FORWARD_IF_ERROR(ZSTD_cwksp_create(ws, neededSpace, zc->customMem), "");
DEBUGLOG(5, "reserving object space");
/* Statically sized space.
* entropyWorkspace never moves,
* though prev/next block swap places */
assert(ZSTD_cwksp_check_available(ws, 2 * sizeof(ZSTD_compressedBlockState_t)));
zc->blockState.prevCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t));
RETURN_ERROR_IF(zc->blockState.prevCBlock == NULL, memory_allocation, "couldn't allocate prevCBlock");
zc->blockState.nextCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t));
RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate nextCBlock");
zc->entropyWorkspace = (U32*) ZSTD_cwksp_reserve_object(ws, ENTROPY_WORKSPACE_SIZE);
RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate entropyWorkspace");
} }
ZSTD_cwksp_clear(ws);
/* init params */
zc->appliedParams = params;
zc->blockState.matchState.cParams = params.cParams;
zc->pledgedSrcSizePlusOne = pledgedSrcSize+1;
zc->consumedSrcSize = 0;
zc->producedCSize = 0;
if (pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN)
zc->appliedParams.fParams.contentSizeFlag = 0;
DEBUGLOG(4, "pledged content size : %u ; flag : %u",
(unsigned)pledgedSrcSize, zc->appliedParams.fParams.contentSizeFlag);
zc->blockSize = blockSize;
xxh64_reset(&zc->xxhState, 0);
zc->stage = ZSTDcs_init;
zc->dictID = 0;
zc->dictContentSize = 0;
ZSTD_reset_compressedBlockState(zc->blockState.prevCBlock);
/* ZSTD_wildcopy() is used to copy into the literals buffer,
* so we have to oversize the buffer by WILDCOPY_OVERLENGTH bytes.
*/
zc->seqStore.litStart = ZSTD_cwksp_reserve_buffer(ws, blockSize + WILDCOPY_OVERLENGTH);
zc->seqStore.maxNbLit = blockSize;
/* buffers */
zc->bufferedPolicy = zbuff;
zc->inBuffSize = buffInSize;
zc->inBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffInSize);
zc->outBuffSize = buffOutSize;
zc->outBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffOutSize);
/* ldm bucketOffsets table */
if (params.ldmParams.enableLdm) {
/* TODO: avoid memset? */
size_t const numBuckets =
((size_t)1) << (params.ldmParams.hashLog -
params.ldmParams.bucketSizeLog);
zc->ldmState.bucketOffsets = ZSTD_cwksp_reserve_buffer(ws, numBuckets);
ZSTD_memset(zc->ldmState.bucketOffsets, 0, numBuckets);
}
/* sequences storage */
ZSTD_referenceExternalSequences(zc, NULL, 0);
zc->seqStore.maxNbSeq = maxNbSeq;
zc->seqStore.llCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
zc->seqStore.mlCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
zc->seqStore.ofCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
zc->seqStore.sequencesStart = (seqDef*)ZSTD_cwksp_reserve_aligned(ws, maxNbSeq * sizeof(seqDef));
FORWARD_IF_ERROR(ZSTD_reset_matchState(
&zc->blockState.matchState,
ws,
&params.cParams,
crp,
needsIndexReset,
ZSTD_resetTarget_CCtx), "");
/* ldm hash table */
if (params.ldmParams.enableLdm) {
/* TODO: avoid memset? */
size_t const ldmHSize = ((size_t)1) << params.ldmParams.hashLog;
zc->ldmState.hashTable = (ldmEntry_t*)ZSTD_cwksp_reserve_aligned(ws, ldmHSize * sizeof(ldmEntry_t));
ZSTD_memset(zc->ldmState.hashTable, 0, ldmHSize * sizeof(ldmEntry_t));
zc->ldmSequences = (rawSeq*)ZSTD_cwksp_reserve_aligned(ws, maxNbLdmSeq * sizeof(rawSeq));
zc->maxNbLdmSequences = maxNbLdmSeq;
ZSTD_window_init(&zc->ldmState.window);
ZSTD_window_clear(&zc->ldmState.window);
zc->ldmState.loadedDictEnd = 0;
}
/* Due to alignment, when reusing a workspace, we can actually consume
* up to 3 extra bytes for alignment. See the comments in zstd_cwksp.h
*/
assert(ZSTD_cwksp_used(ws) >= neededSpace &&
ZSTD_cwksp_used(ws) <= neededSpace + 3);
DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws));
zc->initialized = 1;
return 0;
}
}
/* ZSTD_invalidateRepCodes() :
* ensures next compression will not use repcodes from previous block.
* Note : only works with regular variant;
* do not use with extDict variant ! */
void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx) {
int i;
for (i=0; i<ZSTD_REP_NUM; i++) cctx->blockState.prevCBlock->rep[i] = 0;
assert(!ZSTD_window_hasExtDict(cctx->blockState.matchState.window));
}
/* These are the approximate sizes for each strategy past which copying the
* dictionary tables into the working context is faster than using them
* in-place.
*/
static const size_t attachDictSizeCutoffs[ZSTD_STRATEGY_MAX+1] = {
8 KB, /* unused */
8 KB, /* ZSTD_fast */
16 KB, /* ZSTD_dfast */
32 KB, /* ZSTD_greedy */
32 KB, /* ZSTD_lazy */
32 KB, /* ZSTD_lazy2 */
32 KB, /* ZSTD_btlazy2 */
32 KB, /* ZSTD_btopt */
8 KB, /* ZSTD_btultra */
8 KB /* ZSTD_btultra2 */
};
static int ZSTD_shouldAttachDict(const ZSTD_CDict* cdict,
const ZSTD_CCtx_params* params,
U64 pledgedSrcSize)
{
size_t cutoff = attachDictSizeCutoffs[cdict->matchState.cParams.strategy];
int const dedicatedDictSearch = cdict->matchState.dedicatedDictSearch;
return dedicatedDictSearch
|| ( ( pledgedSrcSize <= cutoff
|| pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN
|| params->attachDictPref == ZSTD_dictForceAttach )
&& params->attachDictPref != ZSTD_dictForceCopy
&& !params->forceWindow ); /* dictMatchState isn't correctly
* handled in _enforceMaxDist */
}
static size_t
ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx,
const ZSTD_CDict* cdict,
ZSTD_CCtx_params params,
U64 pledgedSrcSize,
ZSTD_buffered_policy_e zbuff)
{
{
ZSTD_compressionParameters adjusted_cdict_cParams = cdict->matchState.cParams;
unsigned const windowLog = params.cParams.windowLog;
assert(windowLog != 0);
/* Resize working context table params for input only, since the dict
* has its own tables. */
/* pledgedSrcSize == 0 means 0! */
if (cdict->matchState.dedicatedDictSearch) {
ZSTD_dedicatedDictSearch_revertCParams(&adjusted_cdict_cParams);
}
params.cParams = ZSTD_adjustCParams_internal(adjusted_cdict_cParams, pledgedSrcSize,
cdict->dictContentSize, ZSTD_cpm_attachDict);
params.cParams.windowLog = windowLog;
FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
ZSTDcrp_makeClean, zbuff), "");
assert(cctx->appliedParams.cParams.strategy == adjusted_cdict_cParams.strategy);
}
{ const U32 cdictEnd = (U32)( cdict->matchState.window.nextSrc
- cdict->matchState.window.base);
const U32 cdictLen = cdictEnd - cdict->matchState.window.dictLimit;
if (cdictLen == 0) {
/* don't even attach dictionaries with no contents */
DEBUGLOG(4, "skipping attaching empty dictionary");
} else {
DEBUGLOG(4, "attaching dictionary into context");
cctx->blockState.matchState.dictMatchState = &cdict->matchState;
/* prep working match state so dict matches never have negative indices
* when they are translated to the working context's index space. */
if (cctx->blockState.matchState.window.dictLimit < cdictEnd) {
cctx->blockState.matchState.window.nextSrc =
cctx->blockState.matchState.window.base + cdictEnd;
ZSTD_window_clear(&cctx->blockState.matchState.window);
}
/* loadedDictEnd is expressed within the referential of the active context */
cctx->blockState.matchState.loadedDictEnd = cctx->blockState.matchState.window.dictLimit;
} }
cctx->dictID = cdict->dictID;
cctx->dictContentSize = cdict->dictContentSize;
/* copy block state */
ZSTD_memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState));
return 0;
}
static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx,
const ZSTD_CDict* cdict,
ZSTD_CCtx_params params,
U64 pledgedSrcSize,
ZSTD_buffered_policy_e zbuff)
{
const ZSTD_compressionParameters *cdict_cParams = &cdict->matchState.cParams;
assert(!cdict->matchState.dedicatedDictSearch);
DEBUGLOG(4, "copying dictionary into context");
{ unsigned const windowLog = params.cParams.windowLog;
assert(windowLog != 0);
/* Copy only compression parameters related to tables. */
params.cParams = *cdict_cParams;
params.cParams.windowLog = windowLog;
FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
ZSTDcrp_leaveDirty, zbuff), "");
assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy);
assert(cctx->appliedParams.cParams.hashLog == cdict_cParams->hashLog);
assert(cctx->appliedParams.cParams.chainLog == cdict_cParams->chainLog);
}
ZSTD_cwksp_mark_tables_dirty(&cctx->workspace);
/* copy tables */
{ size_t const chainSize = (cdict_cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cdict_cParams->chainLog);
size_t const hSize = (size_t)1 << cdict_cParams->hashLog;
ZSTD_memcpy(cctx->blockState.matchState.hashTable,
cdict->matchState.hashTable,
hSize * sizeof(U32));
ZSTD_memcpy(cctx->blockState.matchState.chainTable,
cdict->matchState.chainTable,
chainSize * sizeof(U32));
}
/* Zero the hashTable3, since the cdict never fills it */
{ int const h3log = cctx->blockState.matchState.hashLog3;
size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0;
assert(cdict->matchState.hashLog3 == 0);
ZSTD_memset(cctx->blockState.matchState.hashTable3, 0, h3Size * sizeof(U32));
}
ZSTD_cwksp_mark_tables_clean(&cctx->workspace);
/* copy dictionary offsets */
{ ZSTD_matchState_t const* srcMatchState = &cdict->matchState;
ZSTD_matchState_t* dstMatchState = &cctx->blockState.matchState;
dstMatchState->window = srcMatchState->window;
dstMatchState->nextToUpdate = srcMatchState->nextToUpdate;
dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd;
}
cctx->dictID = cdict->dictID;
cctx->dictContentSize = cdict->dictContentSize;
/* copy block state */
ZSTD_memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState));
return 0;
}
/* We have a choice between copying the dictionary context into the working
* context, or referencing the dictionary context from the working context
* in-place. We decide here which strategy to use. */
static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx,
const ZSTD_CDict* cdict,
const ZSTD_CCtx_params* params,
U64 pledgedSrcSize,
ZSTD_buffered_policy_e zbuff)
{
DEBUGLOG(4, "ZSTD_resetCCtx_usingCDict (pledgedSrcSize=%u)",
(unsigned)pledgedSrcSize);
if (ZSTD_shouldAttachDict(cdict, params, pledgedSrcSize)) {
return ZSTD_resetCCtx_byAttachingCDict(
cctx, cdict, *params, pledgedSrcSize, zbuff);
} else {
return ZSTD_resetCCtx_byCopyingCDict(
cctx, cdict, *params, pledgedSrcSize, zbuff);
}
}
/*! ZSTD_copyCCtx_internal() :
* Duplicate an existing context `srcCCtx` into another one `dstCCtx`.
* Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()).
* The "context", in this case, refers to the hash and chain tables,
* entropy tables, and dictionary references.
* `windowLog` value is enforced if != 0, otherwise value is copied from srcCCtx.
* @return : 0, or an error code */
static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx,
const ZSTD_CCtx* srcCCtx,
ZSTD_frameParameters fParams,
U64 pledgedSrcSize,
ZSTD_buffered_policy_e zbuff)
{
DEBUGLOG(5, "ZSTD_copyCCtx_internal");
RETURN_ERROR_IF(srcCCtx->stage!=ZSTDcs_init, stage_wrong,
"Can't copy a ctx that's not in init stage.");
ZSTD_memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem));
{ ZSTD_CCtx_params params = dstCCtx->requestedParams;
/* Copy only compression parameters related to tables. */
params.cParams = srcCCtx->appliedParams.cParams;
params.fParams = fParams;
ZSTD_resetCCtx_internal(dstCCtx, params, pledgedSrcSize,
ZSTDcrp_leaveDirty, zbuff);
assert(dstCCtx->appliedParams.cParams.windowLog == srcCCtx->appliedParams.cParams.windowLog);
assert(dstCCtx->appliedParams.cParams.strategy == srcCCtx->appliedParams.cParams.strategy);
assert(dstCCtx->appliedParams.cParams.hashLog == srcCCtx->appliedParams.cParams.hashLog);
assert(dstCCtx->appliedParams.cParams.chainLog == srcCCtx->appliedParams.cParams.chainLog);
assert(dstCCtx->blockState.matchState.hashLog3 == srcCCtx->blockState.matchState.hashLog3);
}
ZSTD_cwksp_mark_tables_dirty(&dstCCtx->workspace);
/* copy tables */
{ size_t const chainSize = (srcCCtx->appliedParams.cParams.strategy == ZSTD_fast) ? 0 : ((size_t)1 << srcCCtx->appliedParams.cParams.chainLog);
size_t const hSize = (size_t)1 << srcCCtx->appliedParams.cParams.hashLog;
int const h3log = srcCCtx->blockState.matchState.hashLog3;
size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0;
ZSTD_memcpy(dstCCtx->blockState.matchState.hashTable,
srcCCtx->blockState.matchState.hashTable,
hSize * sizeof(U32));
ZSTD_memcpy(dstCCtx->blockState.matchState.chainTable,
srcCCtx->blockState.matchState.chainTable,
chainSize * sizeof(U32));
ZSTD_memcpy(dstCCtx->blockState.matchState.hashTable3,
srcCCtx->blockState.matchState.hashTable3,
h3Size * sizeof(U32));
}
ZSTD_cwksp_mark_tables_clean(&dstCCtx->workspace);
/* copy dictionary offsets */
{
const ZSTD_matchState_t* srcMatchState = &srcCCtx->blockState.matchState;
ZSTD_matchState_t* dstMatchState = &dstCCtx->blockState.matchState;
dstMatchState->window = srcMatchState->window;
dstMatchState->nextToUpdate = srcMatchState->nextToUpdate;
dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd;
}
dstCCtx->dictID = srcCCtx->dictID;
dstCCtx->dictContentSize = srcCCtx->dictContentSize;
/* copy block state */
ZSTD_memcpy(dstCCtx->blockState.prevCBlock, srcCCtx->blockState.prevCBlock, sizeof(*srcCCtx->blockState.prevCBlock));
return 0;
}
/*! ZSTD_copyCCtx() :
* Duplicate an existing context `srcCCtx` into another one `dstCCtx`.
* Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()).
* pledgedSrcSize==0 means "unknown".
* @return : 0, or an error code */
size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long long pledgedSrcSize)
{
ZSTD_frameParameters fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
ZSTD_buffered_policy_e const zbuff = srcCCtx->bufferedPolicy;
ZSTD_STATIC_ASSERT((U32)ZSTDb_buffered==1);
if (pledgedSrcSize==0) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN;
fParams.contentSizeFlag = (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN);
return ZSTD_copyCCtx_internal(dstCCtx, srcCCtx,
fParams, pledgedSrcSize,
zbuff);
}
#define ZSTD_ROWSIZE 16
/*! ZSTD_reduceTable() :
* reduce table indexes by `reducerValue`, or squash to zero.
* PreserveMark preserves "unsorted mark" for btlazy2 strategy.
* It must be set to a clear 0/1 value, to remove branch during inlining.
* Presume table size is a multiple of ZSTD_ROWSIZE
* to help auto-vectorization */
FORCE_INLINE_TEMPLATE void
ZSTD_reduceTable_internal (U32* const table, U32 const size, U32 const reducerValue, int const preserveMark)
{
int const nbRows = (int)size / ZSTD_ROWSIZE;
int cellNb = 0;
int rowNb;
assert((size & (ZSTD_ROWSIZE-1)) == 0); /* multiple of ZSTD_ROWSIZE */
assert(size < (1U<<31)); /* can be casted to int */
for (rowNb=0 ; rowNb < nbRows ; rowNb++) {
int column;
for (column=0; column<ZSTD_ROWSIZE; column++) {
if (preserveMark) {
U32 const adder = (table[cellNb] == ZSTD_DUBT_UNSORTED_MARK) ? reducerValue : 0;
table[cellNb] += adder;
}
if (table[cellNb] < reducerValue) table[cellNb] = 0;
else table[cellNb] -= reducerValue;
cellNb++;
} }
}
static void ZSTD_reduceTable(U32* const table, U32 const size, U32 const reducerValue)
{
ZSTD_reduceTable_internal(table, size, reducerValue, 0);
}
static void ZSTD_reduceTable_btlazy2(U32* const table, U32 const size, U32 const reducerValue)
{
ZSTD_reduceTable_internal(table, size, reducerValue, 1);
}
/*! ZSTD_reduceIndex() :
* rescale all indexes to avoid future overflow (indexes are U32) */
static void ZSTD_reduceIndex (ZSTD_matchState_t* ms, ZSTD_CCtx_params const* params, const U32 reducerValue)
{
{ U32 const hSize = (U32)1 << params->cParams.hashLog;
ZSTD_reduceTable(ms->hashTable, hSize, reducerValue);
}
if (params->cParams.strategy != ZSTD_fast) {
U32 const chainSize = (U32)1 << params->cParams.chainLog;
if (params->cParams.strategy == ZSTD_btlazy2)
ZSTD_reduceTable_btlazy2(ms->chainTable, chainSize, reducerValue);
else
ZSTD_reduceTable(ms->chainTable, chainSize, reducerValue);
}
if (ms->hashLog3) {
U32 const h3Size = (U32)1 << ms->hashLog3;
ZSTD_reduceTable(ms->hashTable3, h3Size, reducerValue);
}
}
/*-*******************************************************
* Block entropic compression
*********************************************************/
/* See doc/zstd_compression_format.md for detailed format description */
void ZSTD_seqToCodes(const seqStore_t* seqStorePtr)
{
const seqDef* const sequences = seqStorePtr->sequencesStart;
BYTE* const llCodeTable = seqStorePtr->llCode;
BYTE* const ofCodeTable = seqStorePtr->ofCode;
BYTE* const mlCodeTable = seqStorePtr->mlCode;
U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
U32 u;
assert(nbSeq <= seqStorePtr->maxNbSeq);
for (u=0; u<nbSeq; u++) {
U32 const llv = sequences[u].litLength;
U32 const mlv = sequences[u].matchLength;
llCodeTable[u] = (BYTE)ZSTD_LLcode(llv);
ofCodeTable[u] = (BYTE)ZSTD_highbit32(sequences[u].offset);
mlCodeTable[u] = (BYTE)ZSTD_MLcode(mlv);
}
if (seqStorePtr->longLengthID==1)
llCodeTable[seqStorePtr->longLengthPos] = MaxLL;
if (seqStorePtr->longLengthID==2)
mlCodeTable[seqStorePtr->longLengthPos] = MaxML;
}
/* ZSTD_useTargetCBlockSize():
* Returns if target compressed block size param is being used.
* If used, compression will do best effort to make a compressed block size to be around targetCBlockSize.
* Returns 1 if true, 0 otherwise. */
static int ZSTD_useTargetCBlockSize(const ZSTD_CCtx_params* cctxParams)
{
DEBUGLOG(5, "ZSTD_useTargetCBlockSize (targetCBlockSize=%zu)", cctxParams->targetCBlockSize);
return (cctxParams->targetCBlockSize != 0);
}
/* ZSTD_entropyCompressSequences_internal():
* actually compresses both literals and sequences */
MEM_STATIC size_t
ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr,
const ZSTD_entropyCTables_t* prevEntropy,
ZSTD_entropyCTables_t* nextEntropy,
const ZSTD_CCtx_params* cctxParams,
void* dst, size_t dstCapacity,
void* entropyWorkspace, size_t entropyWkspSize,
const int bmi2)
{
const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN;
ZSTD_strategy const strategy = cctxParams->cParams.strategy;
unsigned* count = (unsigned*)entropyWorkspace;
FSE_CTable* CTable_LitLength = nextEntropy->fse.litlengthCTable;
FSE_CTable* CTable_OffsetBits = nextEntropy->fse.offcodeCTable;
FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable;
U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */
const seqDef* const sequences = seqStorePtr->sequencesStart;
const BYTE* const ofCodeTable = seqStorePtr->ofCode;
const BYTE* const llCodeTable = seqStorePtr->llCode;
const BYTE* const mlCodeTable = seqStorePtr->mlCode;
BYTE* const ostart = (BYTE*)dst;
BYTE* const oend = ostart + dstCapacity;
BYTE* op = ostart;
size_t const nbSeq = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
BYTE* seqHead;
BYTE* lastNCount = NULL;
entropyWorkspace = count + (MaxSeq + 1);
entropyWkspSize -= (MaxSeq + 1) * sizeof(*count);
DEBUGLOG(4, "ZSTD_entropyCompressSequences_internal (nbSeq=%zu)", nbSeq);
ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
assert(entropyWkspSize >= HUF_WORKSPACE_SIZE);
/* Compress literals */
{ const BYTE* const literals = seqStorePtr->litStart;
size_t const litSize = (size_t)(seqStorePtr->lit - literals);
size_t const cSize = ZSTD_compressLiterals(
&prevEntropy->huf, &nextEntropy->huf,
cctxParams->cParams.strategy,
ZSTD_disableLiteralsCompression(cctxParams),
op, dstCapacity,
literals, litSize,
entropyWorkspace, entropyWkspSize,
bmi2);
FORWARD_IF_ERROR(cSize, "ZSTD_compressLiterals failed");
assert(cSize <= dstCapacity);
op += cSize;
}
/* Sequences Header */
RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/,
dstSize_tooSmall, "Can't fit seq hdr in output buf!");
if (nbSeq < 128) {
*op++ = (BYTE)nbSeq;
} else if (nbSeq < LONGNBSEQ) {
op[0] = (BYTE)((nbSeq>>8) + 0x80);
op[1] = (BYTE)nbSeq;
op+=2;
} else {
op[0]=0xFF;
MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ));
op+=3;
}
assert(op <= oend);
if (nbSeq==0) {
/* Copy the old tables over as if we repeated them */
ZSTD_memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse));
return (size_t)(op - ostart);
}
/* seqHead : flags for FSE encoding type */
seqHead = op++;
assert(op <= oend);
/* convert length/distances into codes */
ZSTD_seqToCodes(seqStorePtr);
/* build CTable for Literal Lengths */
{ unsigned max = MaxLL;
size_t const mostFrequent = HIST_countFast_wksp(count, &max, llCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */
DEBUGLOG(5, "Building LL table");
nextEntropy->fse.litlength_repeatMode = prevEntropy->fse.litlength_repeatMode;
LLtype = ZSTD_selectEncodingType(&nextEntropy->fse.litlength_repeatMode,
count, max, mostFrequent, nbSeq,
LLFSELog, prevEntropy->fse.litlengthCTable,
LL_defaultNorm, LL_defaultNormLog,
ZSTD_defaultAllowed, strategy);
assert(set_basic < set_compressed && set_rle < set_compressed);
assert(!(LLtype < set_compressed && nextEntropy->fse.litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
{ size_t const countSize = ZSTD_buildCTable(
op, (size_t)(oend - op),
CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype,
count, max, llCodeTable, nbSeq,
LL_defaultNorm, LL_defaultNormLog, MaxLL,
prevEntropy->fse.litlengthCTable,
sizeof(prevEntropy->fse.litlengthCTable),
entropyWorkspace, entropyWkspSize);
FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed");
if (LLtype == set_compressed)
lastNCount = op;
op += countSize;
assert(op <= oend);
} }
/* build CTable for Offsets */
{ unsigned max = MaxOff;
size_t const mostFrequent = HIST_countFast_wksp(
count, &max, ofCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */
/* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */
ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed;
DEBUGLOG(5, "Building OF table");
nextEntropy->fse.offcode_repeatMode = prevEntropy->fse.offcode_repeatMode;
Offtype = ZSTD_selectEncodingType(&nextEntropy->fse.offcode_repeatMode,
count, max, mostFrequent, nbSeq,
OffFSELog, prevEntropy->fse.offcodeCTable,
OF_defaultNorm, OF_defaultNormLog,
defaultPolicy, strategy);
assert(!(Offtype < set_compressed && nextEntropy->fse.offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */
{ size_t const countSize = ZSTD_buildCTable(
op, (size_t)(oend - op),
CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype,
count, max, ofCodeTable, nbSeq,
OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
prevEntropy->fse.offcodeCTable,
sizeof(prevEntropy->fse.offcodeCTable),
entropyWorkspace, entropyWkspSize);
FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed");
if (Offtype == set_compressed)
lastNCount = op;
op += countSize;
assert(op <= oend);
} }
/* build CTable for MatchLengths */
{ unsigned max = MaxML;
size_t const mostFrequent = HIST_countFast_wksp(
count, &max, mlCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */
DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op));
nextEntropy->fse.matchlength_repeatMode = prevEntropy->fse.matchlength_repeatMode;
MLtype = ZSTD_selectEncodingType(&nextEntropy->fse.matchlength_repeatMode,
count, max, mostFrequent, nbSeq,
MLFSELog, prevEntropy->fse.matchlengthCTable,
ML_defaultNorm, ML_defaultNormLog,
ZSTD_defaultAllowed, strategy);
assert(!(MLtype < set_compressed && nextEntropy->fse.matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
{ size_t const countSize = ZSTD_buildCTable(
op, (size_t)(oend - op),
CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype,
count, max, mlCodeTable, nbSeq,
ML_defaultNorm, ML_defaultNormLog, MaxML,
prevEntropy->fse.matchlengthCTable,
sizeof(prevEntropy->fse.matchlengthCTable),
entropyWorkspace, entropyWkspSize);
FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed");
if (MLtype == set_compressed)
lastNCount = op;
op += countSize;
assert(op <= oend);
} }
*seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
{ size_t const bitstreamSize = ZSTD_encodeSequences(
op, (size_t)(oend - op),
CTable_MatchLength, mlCodeTable,
CTable_OffsetBits, ofCodeTable,
CTable_LitLength, llCodeTable,
sequences, nbSeq,
longOffsets, bmi2);
FORWARD_IF_ERROR(bitstreamSize, "ZSTD_encodeSequences failed");
op += bitstreamSize;
assert(op <= oend);
/* zstd versions <= 1.3.4 mistakenly report corruption when
* FSE_readNCount() receives a buffer < 4 bytes.
* Fixed by https://github.com/facebook/zstd/pull/1146.
* This can happen when the last set_compressed table present is 2
* bytes and the bitstream is only one byte.
* In this exceedingly rare case, we will simply emit an uncompressed
* block, since it isn't worth optimizing.
*/
if (lastNCount && (op - lastNCount) < 4) {
/* NCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */
assert(op - lastNCount == 3);
DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by "
"emitting an uncompressed block.");
return 0;
}
}
DEBUGLOG(5, "compressed block size : %u", (unsigned)(op - ostart));
return (size_t)(op - ostart);
}
MEM_STATIC size_t
ZSTD_entropyCompressSequences(seqStore_t* seqStorePtr,
const ZSTD_entropyCTables_t* prevEntropy,
ZSTD_entropyCTables_t* nextEntropy,
const ZSTD_CCtx_params* cctxParams,
void* dst, size_t dstCapacity,
size_t srcSize,
void* entropyWorkspace, size_t entropyWkspSize,
int bmi2)
{
size_t const cSize = ZSTD_entropyCompressSequences_internal(
seqStorePtr, prevEntropy, nextEntropy, cctxParams,
dst, dstCapacity,
entropyWorkspace, entropyWkspSize, bmi2);
if (cSize == 0) return 0;
/* When srcSize <= dstCapacity, there is enough space to write a raw uncompressed block.
* Since we ran out of space, block must be not compressible, so fall back to raw uncompressed block.
*/
if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity))
return 0; /* block not compressed */
FORWARD_IF_ERROR(cSize, "ZSTD_entropyCompressSequences_internal failed");
/* Check compressibility */
{ size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, cctxParams->cParams.strategy);
if (cSize >= maxCSize) return 0; /* block not compressed */
}
DEBUGLOG(4, "ZSTD_entropyCompressSequences() cSize: %zu\n", cSize);
return cSize;
}
/* ZSTD_selectBlockCompressor() :
* Not static, but internal use only (used by long distance matcher)
* assumption : strat is a valid strategy */
ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode)
{
static const ZSTD_blockCompressor blockCompressor[4][ZSTD_STRATEGY_MAX+1] = {
{ ZSTD_compressBlock_fast /* default for 0 */,
ZSTD_compressBlock_fast,
ZSTD_compressBlock_doubleFast,
ZSTD_compressBlock_greedy,
ZSTD_compressBlock_lazy,
ZSTD_compressBlock_lazy2,
ZSTD_compressBlock_btlazy2,
ZSTD_compressBlock_btopt,
ZSTD_compressBlock_btultra,
ZSTD_compressBlock_btultra2 },
{ ZSTD_compressBlock_fast_extDict /* default for 0 */,
ZSTD_compressBlock_fast_extDict,
ZSTD_compressBlock_doubleFast_extDict,
ZSTD_compressBlock_greedy_extDict,
ZSTD_compressBlock_lazy_extDict,
ZSTD_compressBlock_lazy2_extDict,
ZSTD_compressBlock_btlazy2_extDict,
ZSTD_compressBlock_btopt_extDict,
ZSTD_compressBlock_btultra_extDict,
ZSTD_compressBlock_btultra_extDict },
{ ZSTD_compressBlock_fast_dictMatchState /* default for 0 */,
ZSTD_compressBlock_fast_dictMatchState,
ZSTD_compressBlock_doubleFast_dictMatchState,
ZSTD_compressBlock_greedy_dictMatchState,
ZSTD_compressBlock_lazy_dictMatchState,
ZSTD_compressBlock_lazy2_dictMatchState,
ZSTD_compressBlock_btlazy2_dictMatchState,
ZSTD_compressBlock_btopt_dictMatchState,
ZSTD_compressBlock_btultra_dictMatchState,
ZSTD_compressBlock_btultra_dictMatchState },
{ NULL /* default for 0 */,
NULL,
NULL,
ZSTD_compressBlock_greedy_dedicatedDictSearch,
ZSTD_compressBlock_lazy_dedicatedDictSearch,
ZSTD_compressBlock_lazy2_dedicatedDictSearch,
NULL,
NULL,
NULL,
NULL }
};
ZSTD_blockCompressor selectedCompressor;
ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1);
assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat));
selectedCompressor = blockCompressor[(int)dictMode][(int)strat];
assert(selectedCompressor != NULL);
return selectedCompressor;
}
static void ZSTD_storeLastLiterals(seqStore_t* seqStorePtr,
const BYTE* anchor, size_t lastLLSize)
{
ZSTD_memcpy(seqStorePtr->lit, anchor, lastLLSize);
seqStorePtr->lit += lastLLSize;
}
void ZSTD_resetSeqStore(seqStore_t* ssPtr)
{
ssPtr->lit = ssPtr->litStart;
ssPtr->sequences = ssPtr->sequencesStart;
ssPtr->longLengthID = 0;
}
typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e;
static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
{
ZSTD_matchState_t* const ms = &zc->blockState.matchState;
DEBUGLOG(5, "ZSTD_buildSeqStore (srcSize=%zu)", srcSize);
assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
/* Assert that we have correctly flushed the ctx params into the ms's copy */
ZSTD_assertEqualCParams(zc->appliedParams.cParams, ms->cParams);
if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) {
if (zc->appliedParams.cParams.strategy >= ZSTD_btopt) {
ZSTD_ldm_skipRawSeqStoreBytes(&zc->externSeqStore, srcSize);
} else {
ZSTD_ldm_skipSequences(&zc->externSeqStore, srcSize, zc->appliedParams.cParams.minMatch);
}
return ZSTDbss_noCompress; /* don't even attempt compression below a certain srcSize */
}
ZSTD_resetSeqStore(&(zc->seqStore));
/* required for optimal parser to read stats from dictionary */
ms->opt.symbolCosts = &zc->blockState.prevCBlock->entropy;
/* tell the optimal parser how we expect to compress literals */
ms->opt.literalCompressionMode = zc->appliedParams.literalCompressionMode;
/* a gap between an attached dict and the current window is not safe,
* they must remain adjacent,
* and when that stops being the case, the dict must be unset */
assert(ms->dictMatchState == NULL || ms->loadedDictEnd == ms->window.dictLimit);
/* limited update after a very long match */
{ const BYTE* const base = ms->window.base;
const BYTE* const istart = (const BYTE*)src;
const U32 curr = (U32)(istart-base);
if (sizeof(ptrdiff_t)==8) assert(istart - base < (ptrdiff_t)(U32)(-1)); /* ensure no overflow */
if (curr > ms->nextToUpdate + 384)
ms->nextToUpdate = curr - MIN(192, (U32)(curr - ms->nextToUpdate - 384));
}
/* select and store sequences */
{ ZSTD_dictMode_e const dictMode = ZSTD_matchState_dictMode(ms);
size_t lastLLSize;
{ int i;
for (i = 0; i < ZSTD_REP_NUM; ++i)
zc->blockState.nextCBlock->rep[i] = zc->blockState.prevCBlock->rep[i];
}
if (zc->externSeqStore.pos < zc->externSeqStore.size) {
assert(!zc->appliedParams.ldmParams.enableLdm);
/* Updates ldmSeqStore.pos */
lastLLSize =
ZSTD_ldm_blockCompress(&zc->externSeqStore,
ms, &zc->seqStore,
zc->blockState.nextCBlock->rep,
src, srcSize);
assert(zc->externSeqStore.pos <= zc->externSeqStore.size);
} else if (zc->appliedParams.ldmParams.enableLdm) {
rawSeqStore_t ldmSeqStore = kNullRawSeqStore;
ldmSeqStore.seq = zc->ldmSequences;
ldmSeqStore.capacity = zc->maxNbLdmSequences;
/* Updates ldmSeqStore.size */
FORWARD_IF_ERROR(ZSTD_ldm_generateSequences(&zc->ldmState, &ldmSeqStore,
&zc->appliedParams.ldmParams,
src, srcSize), "");
/* Updates ldmSeqStore.pos */
lastLLSize =
ZSTD_ldm_blockCompress(&ldmSeqStore,
ms, &zc->seqStore,
zc->blockState.nextCBlock->rep,
src, srcSize);
assert(ldmSeqStore.pos == ldmSeqStore.size);
} else { /* not long range mode */
ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, dictMode);
ms->ldmSeqStore = NULL;
lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize);
}
{ const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize;
ZSTD_storeLastLiterals(&zc->seqStore, lastLiterals, lastLLSize);
} }
return ZSTDbss_compress;
}
static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc)
{
const seqStore_t* seqStore = ZSTD_getSeqStore(zc);
const seqDef* seqStoreSeqs = seqStore->sequencesStart;
size_t seqStoreSeqSize = seqStore->sequences - seqStoreSeqs;
size_t seqStoreLiteralsSize = (size_t)(seqStore->lit - seqStore->litStart);
size_t literalsRead = 0;
size_t lastLLSize;
ZSTD_Sequence* outSeqs = &zc->seqCollector.seqStart[zc->seqCollector.seqIndex];
size_t i;
repcodes_t updatedRepcodes;
assert(zc->seqCollector.seqIndex + 1 < zc->seqCollector.maxSequences);
/* Ensure we have enough space for last literals "sequence" */
assert(zc->seqCollector.maxSequences >= seqStoreSeqSize + 1);
ZSTD_memcpy(updatedRepcodes.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t));
for (i = 0; i < seqStoreSeqSize; ++i) {
U32 rawOffset = seqStoreSeqs[i].offset - ZSTD_REP_NUM;
outSeqs[i].litLength = seqStoreSeqs[i].litLength;
outSeqs[i].matchLength = seqStoreSeqs[i].matchLength + MINMATCH;
outSeqs[i].rep = 0;
if (i == seqStore->longLengthPos) {
if (seqStore->longLengthID == 1) {
outSeqs[i].litLength += 0x10000;
} else if (seqStore->longLengthID == 2) {
outSeqs[i].matchLength += 0x10000;
}
}
if (seqStoreSeqs[i].offset <= ZSTD_REP_NUM) {
/* Derive the correct offset corresponding to a repcode */
outSeqs[i].rep = seqStoreSeqs[i].offset;
if (outSeqs[i].litLength != 0) {
rawOffset = updatedRepcodes.rep[outSeqs[i].rep - 1];
} else {
if (outSeqs[i].rep == 3) {
rawOffset = updatedRepcodes.rep[0] - 1;
} else {
rawOffset = updatedRepcodes.rep[outSeqs[i].rep];
}
}
}
outSeqs[i].offset = rawOffset;
/* seqStoreSeqs[i].offset == offCode+1, and ZSTD_updateRep() expects offCode