1
0
mirror of https://github.com/facebook/zstd.git synced 2025-07-29 11:21:22 +03:00

Merge pull request #3486 from daniellerozenblit/patch-from-low-memory-mode

Mmap large dictionaries in patch-from mode
This commit is contained in:
daniellerozenblit
2023-03-09 15:30:09 -05:00
committed by GitHub
4 changed files with 149 additions and 26 deletions

View File

@ -485,6 +485,11 @@ void FIO_setPassThroughFlag(FIO_prefs_t* const prefs, int value) {
prefs->passThrough = (value != 0);
}
void FIO_setMMapDict(FIO_prefs_t* const prefs, ZSTD_paramSwitch_e value)
{
prefs->mmapDict = value;
}
/* FIO_ctx_t functions */
void FIO_setHasStdoutOutput(FIO_ctx_t* const fCtx, int value) {
@ -660,7 +665,23 @@ FIO_openDstFile(FIO_ctx_t* fCtx, FIO_prefs_t* const prefs,
}
}
/*! FIO_createDictBuffer() :
/* FIO_getDictFileStat() :
*/
static void FIO_getDictFileStat(const char* fileName, stat_t* dictFileStat) {
assert(dictFileStat != NULL);
if (fileName == NULL) return;
if (!UTIL_stat(fileName, dictFileStat)) {
EXM_THROW(31, "Stat failed on dictionary file %s: %s", fileName, strerror(errno));
}
if (!UTIL_isRegularFileStat(dictFileStat)) {
EXM_THROW(32, "Dictionary %s must be a regular file.", fileName);
}
}
/* FIO_createDictBuffer() :
* creates a buffer, pointed by `*bufferPtr`,
* loads `filename` content into it, up to DICTSIZE_MAX bytes.
* @return : loaded size
@ -678,14 +699,6 @@ static size_t FIO_createDictBuffer(void** bufferPtr, const char* fileName, FIO_p
DISPLAYLEVEL(4,"Loading %s as dictionary \n", fileName);
if (!UTIL_stat(fileName, dictFileStat)) {
EXM_THROW(31, "Stat failed on dictionary file %s: %s", fileName, strerror(errno));
}
if (!UTIL_isRegularFileStat(dictFileStat)) {
EXM_THROW(32, "Dictionary %s must be a regular file.", fileName);
}
fileHandle = fopen(fileName, "rb");
if (fileHandle == NULL) {
@ -712,6 +725,70 @@ static size_t FIO_createDictBuffer(void** bufferPtr, const char* fileName, FIO_p
return (size_t)fileSize;
}
#if (PLATFORM_POSIX_VERSION > 0)
#include <sys/mman.h>
static void* FIO_mmap(size_t fileSize, int fileHandle)
{
return mmap
(NULL, (size_t)fileSize, PROT_READ, MAP_PRIVATE, fileHandle, 0);
}
static int FIO_munmap(void* buffer, size_t bufferSize)
{
return munmap(buffer, bufferSize);
}
/* We might want to also do mapping for windows */
static size_t FIO_createDictBufferMMap(void** bufferPtr, const char* fileName, FIO_prefs_t* const prefs, stat_t* dictFileStat)
{
int fileHandle;
U64 fileSize;
assert(bufferPtr != NULL);
assert(dictFileStat != NULL);
*bufferPtr = NULL;
if (fileName == NULL) return 0;
DISPLAYLEVEL(4,"Loading %s as dictionary \n", fileName);
fileHandle = open(fileName, O_RDONLY);
if (fileHandle == -1) {
EXM_THROW(33, "Couldn't open dictionary %s: %s", fileName, strerror(errno));
}
fileSize = UTIL_getFileSizeStat(dictFileStat);
{
size_t const dictSizeMax = prefs->patchFromMode ? prefs->memLimit : DICTSIZE_MAX;
if (fileSize > dictSizeMax) {
EXM_THROW(34, "Dictionary file %s is too large (> %u bytes)",
fileName, (unsigned)dictSizeMax); /* avoid extreme cases */
}
}
*bufferPtr = FIO_mmap((size_t)fileSize, fileHandle);
close(fileHandle);
return (size_t)fileSize;
}
#else
static size_t FIO_createDictBufferMMap(void** bufferPtr, const char* fileName, FIO_prefs_t* const prefs, stat_t* dictFileStat)
{
return FIO_createDictBuffer(bufferPtr, fileName, prefs, dictFileStat);
}
static void FIO_munmap(void* buffer, size_t bufferSize) {
(void)bufferSize;
free(buffer);
}
#endif
static void FIO_freeDict(const FIO_Dict_t* dict) {
if (dict->dictBufferType == FIO_mallocDict) {
free(dict->dictBuffer);
} else if (dict->dictBufferType == FIO_mmapDict) {
FIO_munmap(dict->dictBuffer, dict->dictBufferSize);
} else {
assert(0); /* Should not reach this case */
}
}
/* FIO_checkFilenameCollisions() :
@ -914,8 +991,7 @@ static ZSTD_outBuffer setOutBuffer(void* buf, size_t s, size_t pos)
* Compression
************************************************************************/
typedef struct {
void* dictBuffer;
size_t dictBufferSize;
FIO_Dict_t dict;
const char* dictFileName;
stat_t dictFileStat;
ZSTD_CStream* cctx;
@ -961,6 +1037,8 @@ static void FIO_adjustParamsForPatchFromMode(FIO_prefs_t* const prefs,
static cRess_t FIO_createCResources(FIO_prefs_t* const prefs,
const char* dictFileName, unsigned long long const maxSrcFileSize,
int cLevel, ZSTD_compressionParameters comprParams) {
int useMMap = prefs->mmapDict == ZSTD_ps_enable;
int forceNoUseMMap = prefs->mmapDict == ZSTD_ps_disable;
cRess_t ress;
memset(&ress, 0, sizeof(ress));
@ -970,19 +1048,30 @@ static cRess_t FIO_createCResources(FIO_prefs_t* const prefs,
EXM_THROW(30, "allocation error (%s): can't create ZSTD_CCtx",
strerror(errno));
FIO_getDictFileStat(dictFileName, &ress.dictFileStat);
/* need to update memLimit before calling createDictBuffer
* because of memLimit check inside it */
if (prefs->patchFromMode) {
U64 const dictSize = UTIL_getFileSizeStat(&ress.dictFileStat);
unsigned long long const ssSize = (unsigned long long)prefs->streamSrcSize;
FIO_adjustParamsForPatchFromMode(prefs, &comprParams, UTIL_getFileSize(dictFileName), ssSize > 0 ? ssSize : maxSrcFileSize, cLevel);
useMMap |= dictSize > prefs->memLimit;
FIO_adjustParamsForPatchFromMode(prefs, &comprParams, dictSize, ssSize > 0 ? ssSize : maxSrcFileSize, cLevel);
}
ress.dict.dictBufferType = (useMMap && !forceNoUseMMap) ? FIO_mmapDict : FIO_mallocDict;
if (ress.dict.dictBufferType == FIO_mallocDict) {
ress.dict.dictBufferSize = FIO_createDictBuffer(&ress.dict.dictBuffer, dictFileName, prefs, &ress.dictFileStat); /* works with dictFileName==NULL */
} else {
ress.dict.dictBufferSize = FIO_createDictBufferMMap(&ress.dict.dictBuffer, dictFileName, prefs, &ress.dictFileStat);
}
ress.dictBufferSize = FIO_createDictBuffer(&ress.dictBuffer, dictFileName, prefs, &ress.dictFileStat); /* works with dictFileName==NULL */
ress.writeCtx = AIO_WritePool_create(prefs, ZSTD_CStreamOutSize());
ress.readCtx = AIO_ReadPool_create(prefs, ZSTD_CStreamInSize());
/* Advanced parameters, including dictionary */
if (dictFileName && (ress.dictBuffer==NULL))
if (dictFileName && (ress.dict.dictBuffer==NULL))
EXM_THROW(32, "allocation error : can't create dictBuffer");
ress.dictFileName = dictFileName;
@ -1032,9 +1121,9 @@ static cRess_t FIO_createCResources(FIO_prefs_t* const prefs,
#endif
/* dictionary */
if (prefs->patchFromMode) {
CHECK( ZSTD_CCtx_refPrefix(ress.cctx, ress.dictBuffer, ress.dictBufferSize) );
CHECK( ZSTD_CCtx_refPrefix(ress.cctx, ress.dict.dictBuffer, ress.dict.dictBufferSize) );
} else {
CHECK( ZSTD_CCtx_loadDictionary(ress.cctx, ress.dictBuffer, ress.dictBufferSize) );
CHECK( ZSTD_CCtx_loadDictionary_byReference(ress.cctx, ress.dict.dictBuffer, ress.dict.dictBufferSize) );
}
return ress;
@ -1042,7 +1131,7 @@ static cRess_t FIO_createCResources(FIO_prefs_t* const prefs,
static void FIO_freeCResources(const cRess_t* const ress)
{
free(ress->dictBuffer);
FIO_freeDict(&(ress->dict));
AIO_WritePool_free(ress->writeCtx);
AIO_ReadPool_free(ress->readCtx);
ZSTD_freeCStream(ress->cctx); /* never fails */
@ -2051,6 +2140,7 @@ int FIO_compressMultipleFilenames(FIO_ctx_t* const fCtx,
* Decompression
***************************************************************************/
typedef struct {
FIO_Dict_t dict;
ZSTD_DStream* dctx;
WritePoolCtx_t *writeCtx;
ReadPoolCtx_t *readCtx;
@ -2058,13 +2148,22 @@ typedef struct {
static dRess_t FIO_createDResources(FIO_prefs_t* const prefs, const char* dictFileName)
{
int useMMap = prefs->mmapDict == ZSTD_ps_enable;
int forceNoUseMMap = prefs->mmapDict == ZSTD_ps_disable;
stat_t statbuf;
dRess_t ress;
memset(&ress, 0, sizeof(ress));
if (prefs->patchFromMode)
FIO_adjustMemLimitForPatchFromMode(prefs, UTIL_getFileSize(dictFileName), 0 /* just use the dict size */);
FIO_getDictFileStat(dictFileName, &statbuf);
if (prefs->patchFromMode){
U64 const dictSize = UTIL_getFileSizeStat(&statbuf);
useMMap |= dictSize > prefs->memLimit;
FIO_adjustMemLimitForPatchFromMode(prefs, dictSize, 0 /* just use the dict size */);
}
/* Allocation */
ress.dict.dictBufferType = (useMMap && !forceNoUseMMap) ? FIO_mmapDict : FIO_mallocDict;
ress.dctx = ZSTD_createDStream();
if (ress.dctx==NULL)
EXM_THROW(60, "Error: %s : can't create ZSTD_DStream", strerror(errno));
@ -2072,22 +2171,29 @@ static dRess_t FIO_createDResources(FIO_prefs_t* const prefs, const char* dictFi
CHECK( ZSTD_DCtx_setParameter(ress.dctx, ZSTD_d_forceIgnoreChecksum, !prefs->checksumFlag));
/* dictionary */
{ void* dictBuffer;
stat_t statbuf;
size_t const dictBufferSize = FIO_createDictBuffer(&dictBuffer, dictFileName, prefs, &statbuf);
CHECK( ZSTD_DCtx_reset(ress.dctx, ZSTD_reset_session_only) );
CHECK( ZSTD_DCtx_loadDictionary(ress.dctx, dictBuffer, dictBufferSize) );
free(dictBuffer);
{ if (ress.dict.dictBufferType == FIO_mallocDict) {
ress.dict.dictBufferSize = FIO_createDictBuffer(&ress.dict.dictBuffer, dictFileName, prefs, &statbuf);
} else {
ress.dict.dictBufferSize = FIO_createDictBufferMMap(&ress.dict.dictBuffer, dictFileName, prefs, &statbuf);
}
CHECK(ZSTD_DCtx_reset(ress.dctx, ZSTD_reset_session_only) );
if (prefs->patchFromMode){
CHECK(ZSTD_DCtx_refPrefix(ress.dctx, ress.dict.dictBuffer, ress.dict.dictBufferSize));
} else {
CHECK(ZSTD_DCtx_loadDictionary_byReference(ress.dctx, ress.dict.dictBuffer, ress.dict.dictBufferSize));
}
}
ress.writeCtx = AIO_WritePool_create(prefs, ZSTD_DStreamOutSize());
ress.readCtx = AIO_ReadPool_create(prefs, ZSTD_DStreamInSize());
return ress;
}
static void FIO_freeDResources(dRess_t ress)
{
FIO_freeDict(&(ress.dict));
CHECK( ZSTD_freeDStream(ress.dctx) );
AIO_WritePool_free(ress.writeCtx);
AIO_ReadPool_free(ress.readCtx);
@ -2670,6 +2776,8 @@ int FIO_decompressFilename(FIO_ctx_t* const fCtx, FIO_prefs_t* const prefs,
int const decodingError = FIO_decompressSrcFile(fCtx, prefs, ress, dstFileName, srcFileName);
FIO_freeDResources(ress);
return decodingError;
}

View File

@ -106,6 +106,7 @@ void FIO_setContentSize(FIO_prefs_t* const prefs, int value);
void FIO_displayCompressionParameters(const FIO_prefs_t* prefs);
void FIO_setAsyncIOFlag(FIO_prefs_t* const prefs, int value);
void FIO_setPassThroughFlag(FIO_prefs_t* const prefs, int value);
void FIO_setMMapDict(FIO_prefs_t* const prefs, ZSTD_paramSwitch_e value);
/* FIO_ctx_t functions */
void FIO_setNbFilesTotal(FIO_ctx_t* const fCtx, int value);

View File

@ -69,6 +69,15 @@ typedef struct FIO_prefs_s {
int contentSize;
int allowBlockDevices;
int passThrough;
ZSTD_paramSwitch_e mmapDict;
} FIO_prefs_t;
typedef enum {FIO_mallocDict, FIO_mmapDict} FIO_dictBufferType_t;
typedef struct {
void* dictBuffer;
size_t dictBufferSize;
FIO_dictBufferType_t dictBufferType;
} FIO_Dict_t;
#endif /* FILEIO_TYPES_HEADER */

View File

@ -254,6 +254,7 @@ static void usage_advanced(const char* programName)
DISPLAYOUT("\n");
DISPLAYOUT(" --format=zstd Compress files to the `.zst` format. [Default]\n");
DISPLAYOUT(" --mmap-dict Memory-map dictionary file rather than mallocing and loading all at once");
#ifdef ZSTD_GZCOMPRESS
DISPLAYOUT(" --format=gzip Compress files to the `.gz` format.\n");
#endif
@ -851,6 +852,7 @@ int main(int argCount, const char* argv[])
ultra=0,
contentSize=1,
removeSrcFile=0;
ZSTD_paramSwitch_e mmapDict=ZSTD_ps_auto;
ZSTD_paramSwitch_e useRowMatchFinder = ZSTD_ps_auto;
FIO_compressionType_t cType = FIO_zstdCompression;
unsigned nbWorkers = 0;
@ -984,6 +986,8 @@ int main(int argCount, const char* argv[])
if (longCommandWArg(&argument, "--adapt=")) { adapt = 1; if (!parseAdaptParameters(argument, &adaptMin, &adaptMax)) { badusage(programName); CLEAN_RETURN(1); } continue; }
if (!strcmp(argument, "--single-thread")) { nbWorkers = 0; singleThread = 1; continue; }
if (!strcmp(argument, "--format=zstd")) { suffix = ZSTD_EXTENSION; cType = FIO_zstdCompression; continue; }
if (!strcmp(argument, "--mmap-dict")) { mmapDict = ZSTD_ps_enable; continue; }
if (!strcmp(argument, "--no-mmap-dict")) { mmapDict = ZSTD_ps_disable; continue; }
#ifdef ZSTD_GZCOMPRESS
if (!strcmp(argument, "--format=gzip")) { suffix = GZ_EXTENSION; cType = FIO_gzipCompression; continue; }
if (exeNameMatch(programName, ZSTD_GZ)) { /* behave like gzip */
@ -1526,6 +1530,7 @@ int main(int argCount, const char* argv[])
FIO_setNotificationLevel(g_displayLevel);
FIO_setAllowBlockDevices(prefs, allowBlockDevices);
FIO_setPatchFromMode(prefs, patchFromDictFileName != NULL);
FIO_setMMapDict(prefs, mmapDict);
if (memLimit == 0) {
if (compressionParams.windowLog == 0) {
memLimit = (U32)1 << g_defaultMaxWindowLog;