diff --git a/build/LICENSE b/build/LICENSE new file mode 100644 index 000000000..e69de29bb diff --git a/programs/fileio.c b/programs/fileio.c index 09687c67a..4691003bb 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -628,6 +628,102 @@ static size_t FIO_createDictBuffer(void** bufferPtr, const char* fileName) return (size_t)fileSize; } + + +/* FIO_checkFilenameCollisions() : + * Checks for and warns if there are any files that would have the same output path + */ +int FIO_checkFilenameCollisions(const char** filenameTable, unsigned nbFiles) { + const char **filenameTableSorted, *c, *prevElem, *filename; + unsigned u; + + #if defined(_MSC_VER) || defined(__MINGW32__) || defined (__MSVCRT__) /* windows support */ + c = "\\"; + #else + c = "/"; + #endif + + filenameTableSorted = (const char**) malloc(sizeof(char*) * nbFiles); + if (!filenameTableSorted) { + DISPLAY("Unable to malloc new str array, not checking for name collisions\n"); + return 1; + } + + for (u = 0; u < nbFiles; ++u) { + filename = strrchr(filenameTable[u], c[0]); + if (filename == NULL) { + filenameTableSorted[u] = filenameTable[u]; + } else { + filenameTableSorted[u] = filename+1; + } + } + + qsort((void*)filenameTableSorted, nbFiles, sizeof(char*), UTIL_compareStr); + prevElem = filenameTableSorted[0]; + for (u = 1; u < nbFiles; ++u) { + if (strcmp(prevElem, filenameTableSorted[u]) == 0) { + DISPLAY("WARNING: Two files have same filename: %s\n", prevElem); + } + prevElem = filenameTableSorted[u]; + } + + free((void*)filenameTableSorted); + return 0; +} + +/* FIO_createFilename_fromOutDir() : + * Takes a source file name and specified output directory, and + * allocates memory for and returns a pointer to final path. + * This function never returns an error (it may abort() in case of pb) + */ +static char* +FIO_createFilename_fromOutDir(const char* srcFilename, const char* outDirName, const size_t suffixLen) +{ + const char* c, *filenameBegin; + char* filename, *result; + size_t finalPathLen; + + #if defined(_MSC_VER) || defined(__MINGW32__) || defined (__MSVCRT__) /* windows support */ + c = "\\"; + #else + c = "/"; + #endif + + finalPathLen = strlen(outDirName); + filenameBegin = strrchr(srcFilename, c[0]); + if (filenameBegin == NULL) { + filename = (char*) malloc((strlen(srcFilename)+1) * sizeof(char)); + if (!filename) { + EXM_THROW(30, "zstd: %s", strerror(errno)); + } + strcpy(filename, srcFilename); + } else { + filename = (char*) malloc((strlen(filenameBegin+1)+1) * sizeof(char)); + if (!filename) { + EXM_THROW(30, "zstd: %s", strerror(errno)); + } + strcpy(filename, filenameBegin+1); + } + + finalPathLen += strlen(filename); + result = (char*) malloc((finalPathLen+suffixLen+30) * sizeof(char)); + if (!result) { + free(filename); + EXM_THROW(30, "zstd: %s", strerror(errno)); + } + + strcpy(result, outDirName); + if (outDirName[strlen(outDirName)-1] == c[0]) { + strcat(result, filename); + } else { + strcat(result, c); + strcat(result, filename); + } + + free(filename); + return result; +} + #ifndef ZSTD_NOCOMPRESS /* ********************************************************************** @@ -1276,9 +1372,7 @@ static int FIO_compressFilename_dstFile(FIO_prefs_t* const prefs, int result; stat_t statbuf; int transfer_permissions = 0; - assert(ress.srcFile != NULL); - if (ress.dstFile == NULL) { closeDstFile = 1; DISPLAYLEVEL(6, "FIO_compressFilename_dstFile: opening dst: %s", dstFileName); @@ -1369,11 +1463,9 @@ FIO_compressFilename_srcFile(FIO_prefs_t* const prefs, return result; } - -int FIO_compressFilename(FIO_prefs_t* const prefs, - const char* dstFileName, const char* srcFileName, - const char* dictFileName, int compressionLevel, - ZSTD_compressionParameters comprParams) +int FIO_compressFilename(FIO_prefs_t* const prefs, const char* dstFileName, + const char* srcFileName, const char* dictFileName, + int compressionLevel, ZSTD_compressionParameters comprParams) { cRess_t const ress = FIO_createCResources(prefs, dictFileName, compressionLevel, comprParams); int const result = FIO_compressFilename_srcFile(prefs, ress, dstFileName, srcFileName, compressionLevel); @@ -1383,21 +1475,25 @@ int FIO_compressFilename(FIO_prefs_t* const prefs, return result; } - /* FIO_determineCompressedName() : * create a destination filename for compressed srcFileName. * @return a pointer to it. * This function never returns an error (it may abort() in case of pb) */ static const char* -FIO_determineCompressedName(const char* srcFileName, const char* suffix) +FIO_determineCompressedName(const char* srcFileName, const char* outDirName, const char* suffix) { static size_t dfnbCapacity = 0; static char* dstFileNameBuffer = NULL; /* using static allocation : this function cannot be multi-threaded */ - - size_t const sfnSize = strlen(srcFileName); + char* outDirFilename = NULL; + size_t sfnSize = strlen(srcFileName); size_t const suffixSize = strlen(suffix); - + if (outDirName) { + outDirFilename = FIO_createFilename_fromOutDir(srcFileName, outDirName, suffixSize); + sfnSize = strlen(outDirFilename); + assert(outDirFilename != NULL); + } + if (dfnbCapacity <= sfnSize+suffixSize+1) { /* resize buffer for dstName */ free(dstFileNameBuffer); @@ -1405,23 +1501,30 @@ FIO_determineCompressedName(const char* srcFileName, const char* suffix) dstFileNameBuffer = (char*)malloc(dfnbCapacity); if (!dstFileNameBuffer) { EXM_THROW(30, "zstd: %s", strerror(errno)); - } } + } + } assert(dstFileNameBuffer != NULL); - memcpy(dstFileNameBuffer, srcFileName, sfnSize); - memcpy(dstFileNameBuffer+sfnSize, suffix, suffixSize+1 /* Include terminating null */); + if (outDirFilename) { + memcpy(dstFileNameBuffer, outDirFilename, sfnSize); + free(outDirFilename); + } else { + memcpy(dstFileNameBuffer, srcFileName, sfnSize); + } + memcpy(dstFileNameBuffer+sfnSize, suffix, suffixSize+1 /* Include terminating null */); return dstFileNameBuffer; } /* FIO_compressMultipleFilenames() : * compress nbFiles files - * into one destination (outFileName) - * or into one file each (outFileName == NULL, but suffix != NULL). + * into either one destination (outFileName), + * or into one file each (outFileName == NULL, but suffix != NULL), + * or into a destination folder (specified with -O) */ -int FIO_compressMultipleFilenames(FIO_prefs_t* const prefs, - const char** inFileNamesTable, unsigned nbFiles, - const char* outFileName, const char* suffix, +int FIO_compressMultipleFilenames(FIO_prefs_t* const prefs, const char** inFileNamesTable, + const char* outDirName, unsigned nbFiles, + const char* outFileName, const char* suffix, const char* dictFileName, int compressionLevel, ZSTD_compressionParameters comprParams) { @@ -1430,7 +1533,6 @@ int FIO_compressMultipleFilenames(FIO_prefs_t* const prefs, /* init */ assert(outFileName != NULL || suffix != NULL); - if (outFileName != NULL) { /* output into a single destination (stdout typically) */ ress.dstFile = FIO_openDstFile(prefs, NULL, outFileName); if (ress.dstFile == NULL) { /* could not open outFileName */ @@ -1448,9 +1550,12 @@ int FIO_compressMultipleFilenames(FIO_prefs_t* const prefs, unsigned u; for (u=0; u #include +#if defined(_MSC_VER) || defined(__MINGW32__) || defined (__MSVCRT__) +#include /* needed for _mkdir in windows */ +#endif int UTIL_fileExist(const char* filename) { @@ -98,6 +101,10 @@ U32 UTIL_isDirectory(const char* infilename) return 0; } +int UTIL_compareStr(const void *p1, const void *p2) { + return strcmp(* (char * const *) p1, * (char * const *) p2); +} + int UTIL_isSameFile(const char* file1, const char* file2) { #if defined(_MSC_VER) diff --git a/programs/util.h b/programs/util.h index 0080b63c7..c73f7e9b0 100644 --- a/programs/util.h +++ b/programs/util.h @@ -134,6 +134,7 @@ int UTIL_setFileStat(const char* filename, stat_t* statbuf); U32 UTIL_isDirectory(const char* infilename); int UTIL_getFileStat(const char* infilename, stat_t* statbuf); int UTIL_isSameFile(const char* file1, const char* file2); +int UTIL_compareStr(const void *p1, const void *p2); U32 UTIL_isLink(const char* infilename); #define UTIL_FILESIZE_UNKNOWN ((U64)(-1)) diff --git a/programs/zstdcli.c b/programs/zstdcli.c index 98df728a9..ae53d2c39 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -136,6 +136,7 @@ static int usage_advanced(const char* programName) DISPLAY( " -q : suppress warnings; specify twice to suppress errors too\n"); DISPLAY( " -c : force write to standard output, even if it is the console\n"); DISPLAY( " -l : print information about zstd compressed files \n"); + DISPLAY( " --output-dir-flat directory: results stored into `directory`. Filename collisions mean first file will be compressed. With -f, the last file will be compressed.\n"); #ifndef ZSTD_NOCOMPRESS DISPLAY( "--ultra : enable levels beyond %i, up to %i (requires more memory)\n", ZSTDCLI_CLEVEL_MAX, ZSTD_maxCLevel()); DISPLAY( "--long[=#]: enable long distance matching with given window log (default: %u)\n", g_defaultMaxWindowLog); @@ -562,6 +563,7 @@ int main(int argCount, const char* argv[]) adaptMax = MAXCLEVEL, rsyncable = 0, nextArgumentIsOutFileName = 0, + nextArgumentIsOutDirName = 0, nextArgumentIsMaxDict = 0, nextArgumentIsDictID = 0, nextArgumentsAreFiles = 0, @@ -586,6 +588,7 @@ int main(int argCount, const char* argv[]) unsigned filenameIdx = 0; const char* programName = argv[0]; const char* outFileName = NULL; + const char* outDirName = NULL; const char* dictFileName = NULL; const char* suffix = ZSTD_EXTENSION; unsigned maxDictSize = g_defaultMaxDictSize; @@ -686,6 +689,7 @@ int main(int argCount, const char* argv[]) if (!strcmp(argument, "--keep")) { FIO_setRemoveSrcFile(prefs, 0); continue; } if (!strcmp(argument, "--rm")) { FIO_setRemoveSrcFile(prefs, 1); continue; } if (!strcmp(argument, "--priority=rt")) { setRealTimePrio = 1; continue; } + if (!strcmp(argument, "--output-dir-flat")) {nextArgumentIsOutDirName=1; lastCommand=1; continue; } if (!strcmp(argument, "--adapt")) { adapt = 1; continue; } if (longCommandWArg(&argument, "--adapt=")) { adapt = 1; if (!parseAdaptParameters(argument, &adaptMin, &adaptMax)) CLEAN_RETURN(badusage(programName)); continue; } if (!strcmp(argument, "--single-thread")) { nbWorkers = 0; singleThread = 1; continue; } @@ -852,7 +856,7 @@ int main(int argCount, const char* argv[]) /* destination file name */ case 'o': nextArgumentIsOutFileName=1; lastCommand=1; argument++; break; - + /* limit decompression memory */ case 'M': argument++; @@ -965,6 +969,13 @@ int main(int argCount, const char* argv[]) continue; } + if (nextArgumentIsOutDirName) { + nextArgumentIsOutDirName = 0; + lastCommand = 0; + outDirName = argument; + continue; + } + /* add filename to list */ filenameTable[filenameIdx++] = argument; } @@ -1166,7 +1177,7 @@ int main(int argCount, const char* argv[]) if ((filenameIdx==1) && outFileName) operationResult = FIO_compressFilename(prefs, outFileName, filenameTable[0], dictFileName, cLevel, compressionParams); else - operationResult = FIO_compressMultipleFilenames(prefs, filenameTable, filenameIdx, outFileName, suffix, dictFileName, cLevel, compressionParams); + operationResult = FIO_compressMultipleFilenames(prefs, filenameTable, outDirName, filenameIdx, outFileName, suffix, dictFileName, cLevel, compressionParams); #else (void)suffix; (void)adapt; (void)rsyncable; (void)ultra; (void)cLevel; (void)ldmFlag; (void)literalCompressionMode; (void)targetCBlockSize; (void)streamSrcSize; (void)srcSizeHint; /* not used when ZSTD_NOCOMPRESS set */ DISPLAY("Compression not supported \n"); @@ -1184,7 +1195,7 @@ int main(int argCount, const char* argv[]) if (filenameIdx==1 && outFileName) operationResult = FIO_decompressFilename(prefs, outFileName, filenameTable[0], dictFileName); else - operationResult = FIO_decompressMultipleFilenames(prefs, filenameTable, filenameIdx, outFileName, dictFileName); + operationResult = FIO_decompressMultipleFilenames(prefs, filenameTable, filenameIdx, outDirName, outFileName, dictFileName); #else DISPLAY("Decompression not supported \n"); #endif diff --git a/tests/playTests.sh b/tests/playTests.sh index 5a47ceb5e..06de4f8ac 100755 --- a/tests/playTests.sh +++ b/tests/playTests.sh @@ -269,6 +269,24 @@ if [ "$?" -eq 139 ]; then fi rm tmp* +println "test : compress multiple files into an output directory, --output-dir-flat" +println henlo > tmp1 +mkdir tmpInputTestDir +mkdir tmpInputTestDir/we +mkdir tmpInputTestDir/we/must +mkdir tmpInputTestDir/we/must/go +mkdir tmpInputTestDir/we/must/go/deeper +println cool > tmpInputTestDir/we/must/go/deeper/tmp2 +mkdir tmpOutDir +$ZSTD tmp1 tmpInputTestDir/we/must/go/deeper/tmp2 --output-dir-flat tmpOutDir +test -f tmpOutDir/tmp1.zst +test -f tmpOutDir/tmp2.zst +println "test : decompress multiple files into an output directory, --output-dir-flat" +mkdir tmpOutDirDecomp +$ZSTD tmpOutDir/ -r -d --output-dir-flat tmpOutDirDecomp +test -f tmpOutDirDecomp/tmp2 +test -f tmpOutDirDecomp/tmp1 +rm -rf tmp* println "\n===> Advanced compression parameters " println "Hello world!" | $ZSTD --zstd=windowLog=21, - -o tmp.zst && die "wrong parameters not detected!" @@ -412,7 +430,6 @@ ls -ls tmp* # check size of tmpdec (should be 2*(tmp1 + tmp2 + tmp3)) println "compress multiple files including a missing one (notHere) : " $ZSTD -f tmp1 notHere tmp2 && die "missing file not detected!" - println "\n===> stream-size mode" ./datagen -g11000 > tmp