diff --git a/Makefile b/Makefile index 87d6a035c..a5e6cf8b0 100644 --- a/Makefile +++ b/Makefile @@ -32,7 +32,7 @@ # ################################################################ # Version number -export VERSION := 0.1.2 +export VERSION := 0.2.0 PRGDIR = programs ZSTDDIR = lib diff --git a/lib/bitstream.h b/lib/bitstream.h new file mode 100644 index 000000000..20e40ec66 --- /dev/null +++ b/lib/bitstream.h @@ -0,0 +1,384 @@ +/* ****************************************************************** + bitstream + Part of NewGen Entropy library + header file (to include) + Copyright (C) 2013-2015, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +****************************************************************** */ +#ifndef BITSTREAM_H_MODULE +#define BITSTREAM_H_MODULE + +#if defined (__cplusplus) +extern "C" { +#endif + + +/* +* This API consists of small unitary functions, which highly benefit from being inlined. +* Since link-time-optimization is not available for all compilers, +* these functions are defined into a .h to be included. +*/ + +/****************************************** +* Includes +******************************************/ +#include "mem.h" /* unaligned access routines */ +#include "error.h" /* error codes and messages */ + + +/******************************************** +* bitStream compression API (write forward) +********************************************/ +/* +* bitStream can mix input from multiple sources. +* A critical property of these streams is that they encode and decode in **reverse** direction. +* So the first bit sequence you add will be the last to be read, like a LIFO stack. +*/ +typedef struct +{ + size_t bitContainer; + int bitPos; + char* startPtr; + char* ptr; + char* endPtr; +} BIT_CStream_t; + +MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* dstBuffer, size_t maxDstSize); +MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits); +MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC); +MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC); + +/* +* Start by initCStream, providing the maximum size of write buffer to write into. +* bitStream will never write outside of this buffer. +* buffer must be at least as large as a size_t, otherwise function result will be an error code. +* +* bits are first added to a local register. +* Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems. +* Writing data into memory is a manual operation, performed by the flushBits function. +* Hence keep track how many bits are potentially stored into local register to avoid register overflow. +* After a flushBits, a maximum of 7 bits might still be stored into local register. +* +* Avoid storing elements of more than 25 bits if you want compatibility with 32-bits bitstream readers. +* +* Last operation is to close the bitStream. +* The function returns the final size of CStream in bytes. +* If data couldn't fit into dstBuffer, it will return a 0 ( == not storable) +*/ + + +/********************************************** +* bitStream decompression API (read backward) +**********************************************/ +typedef struct +{ + size_t bitContainer; + unsigned bitsConsumed; + const char* ptr; + const char* start; +} BIT_DStream_t; + +typedef enum { BIT_DStream_unfinished = 0, + BIT_DStream_endOfBuffer = 1, + BIT_DStream_completed = 2, + BIT_DStream_overflow = 3 } BIT_DStream_status; /* result of BIT_reloadDStream() */ + /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */ + +MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize); +MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits); +MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD); +MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD); + + +/* +* Start by invoking BIT_initDStream(). +* A chunk of the bitStream is then stored into a local register. +* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t). +* You can then retrieve bitFields stored into the local register, **in reverse order**. +* Local register is manually filled from memory by the BIT_reloadDStream() method. +* A reload guarantee a minimum of ((8*sizeof(size_t))-7) bits when its result is BIT_DStream_unfinished. +* Otherwise, it can be less than that, so proceed accordingly. +* Checking if DStream has reached its end can be performed with BIT_endOfDStream() +*/ + + +/****************************************** +* unsafe API +******************************************/ +MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits); +/* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */ + +MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC); +/* unsafe version; does not check buffer overflow */ + +MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits); +/* faster, but works only if nbBits >= 1 */ + + + +/**************************************************************** +* Helper functions +****************************************************************/ +MEM_STATIC unsigned BIT_highbit32 (register U32 val) +{ +# if defined(_MSC_VER) /* Visual */ + unsigned long r; + _BitScanReverse ( &r, val ); + return (unsigned) r; +# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */ + return 31 - __builtin_clz (val); +# else /* Software version */ + static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; + U32 v = val; + unsigned r; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + r = DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27]; + return r; +# endif +} + + +/**************************************************************** +* bitStream encoding +****************************************************************/ + +MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* startPtr, size_t maxSize) +{ + bitC->bitContainer = 0; + bitC->bitPos = 0; + bitC->startPtr = (char*)startPtr; + bitC->ptr = bitC->startPtr; + bitC->endPtr = bitC->startPtr + maxSize - sizeof(bitC->ptr); + if (maxSize < sizeof(bitC->ptr)) return ERROR(dstSize_tooSmall); + return 0; +} + +MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits) +{ + static const unsigned mask[] = { 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF, 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, 0xFFFFFF, 0x1FFFFFF }; /* up to 25 bits */ + bitC->bitContainer |= (value & mask[nbBits]) << bitC->bitPos; + bitC->bitPos += nbBits; +} + +/*! BIT_addBitsFast + * works only if `value` is _clean_, meaning all high bits above nbBits are 0 */ +MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits) +{ + bitC->bitContainer |= value << bitC->bitPos; + bitC->bitPos += nbBits; +} + +/*! BIT_flushBitsFast + * unsafe version; does not check buffer overflow */ +MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC) +{ + size_t nbBytes = bitC->bitPos >> 3; + MEM_writeLEST(bitC->ptr, bitC->bitContainer); + bitC->ptr += nbBytes; + bitC->bitPos &= 7; + bitC->bitContainer >>= nbBytes*8; +} + +MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC) +{ + size_t nbBytes = bitC->bitPos >> 3; + MEM_writeLEST(bitC->ptr, bitC->bitContainer); + bitC->ptr += nbBytes; + if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr; + bitC->bitPos &= 7; + bitC->bitContainer >>= nbBytes*8; +} + +/*! BIT_closeCStream + * @result : size of CStream, in bytes, or 0 if it cannot fit into dstBuffer */ +MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC) +{ + char* endPtr; + + BIT_addBitsFast(bitC, 1, 1); /* endMark */ + BIT_flushBits(bitC); + + if (bitC->ptr >= bitC->endPtr) /* too close to buffer's end */ + return 0; /* not storable */ + + endPtr = bitC->ptr; + endPtr += bitC->bitPos > 0; /* remaining bits (incomplete byte) */ + + return (endPtr - bitC->startPtr); +} + + +/********************************************************** +* bitStream decoding +**********************************************************/ + +/*!BIT_initDStream +* Initialize a BIT_DStream_t. +* @bitD : a pointer to an already allocated BIT_DStream_t structure +* @srcBuffer must point at the beginning of a bitStream +* @srcSize must be the exact size of the bitStream +* @result : size of stream (== srcSize) or an errorCode if a problem is detected +*/ +MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize) +{ + if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); } + + if (srcSize >= sizeof(size_t)) /* normal case */ + { + U32 contain32; + bitD->start = (const char*)srcBuffer; + bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(size_t); + bitD->bitContainer = MEM_readLEST(bitD->ptr); + contain32 = ((const BYTE*)srcBuffer)[srcSize-1]; + if (contain32 == 0) return ERROR(GENERIC); /* endMark not present */ + bitD->bitsConsumed = 8 - BIT_highbit32(contain32); + } + else + { + U32 contain32; + bitD->start = (const char*)srcBuffer; + bitD->ptr = bitD->start; + bitD->bitContainer = *(const BYTE*)(bitD->start); + switch(srcSize) + { + case 7: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[6]) << (sizeof(size_t)*8 - 16); + case 6: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[5]) << (sizeof(size_t)*8 - 24); + case 5: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[4]) << (sizeof(size_t)*8 - 32); + case 4: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[3]) << 24; + case 3: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[2]) << 16; + case 2: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[1]) << 8; + default:; + } + contain32 = ((const BYTE*)srcBuffer)[srcSize-1]; + if (contain32 == 0) return ERROR(GENERIC); /* endMark not present */ + bitD->bitsConsumed = 8 - BIT_highbit32(contain32); + bitD->bitsConsumed += (U32)(sizeof(size_t) - srcSize)*8; + } + + return srcSize; +} + +/*!BIT_lookBits + * Provides next n bits from local register + * local register is not modified (bits are still present for next read/look) + * On 32-bits, maxNbBits==25 + * On 64-bits, maxNbBits==57 + * @return : value extracted + */ +MEM_STATIC size_t BIT_lookBits(BIT_DStream_t* bitD, U32 nbBits) +{ + const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1; + return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask); +} + +/*! BIT_lookBitsFast : +* unsafe version; only works only if nbBits >= 1 */ +MEM_STATIC size_t BIT_lookBitsFast(BIT_DStream_t* bitD, U32 nbBits) +{ + const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1; + return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask); +} + +MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits) +{ + bitD->bitsConsumed += nbBits; +} + +/*!BIT_readBits + * Read next n bits from local register. + * pay attention to not read more than nbBits contained into local register. + * @return : extracted value. + */ +MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits) +{ + size_t value = BIT_lookBits(bitD, nbBits); + BIT_skipBits(bitD, nbBits); + return value; +} + +/*!BIT_readBitsFast : +* unsafe version; only works only if nbBits >= 1 */ +MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits) +{ + size_t value = BIT_lookBitsFast(bitD, nbBits); + BIT_skipBits(bitD, nbBits); + return value; +} + +MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD) +{ + if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* should never happen */ + return BIT_DStream_overflow; + + if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer)) + { + bitD->ptr -= bitD->bitsConsumed >> 3; + bitD->bitsConsumed &= 7; + bitD->bitContainer = MEM_readLEST(bitD->ptr); + return BIT_DStream_unfinished; + } + if (bitD->ptr == bitD->start) + { + if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer; + return BIT_DStream_completed; + } + { + U32 nbBytes = bitD->bitsConsumed >> 3; + BIT_DStream_status result = BIT_DStream_unfinished; + if (bitD->ptr - nbBytes < bitD->start) + { + nbBytes = (U32)(bitD->ptr - bitD->start); /* ptr > start */ + result = BIT_DStream_endOfBuffer; + } + bitD->ptr -= nbBytes; + bitD->bitsConsumed -= nbBytes*8; + bitD->bitContainer = MEM_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD) */ + return result; + } +} + +/*! BIT_endOfDStream +* @return Tells if DStream has reached its exact end +*/ +MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream) +{ + return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8)); +} + +#if defined (__cplusplus) +} +#endif + +#endif /* BITSTREAM_H_MODULE */ diff --git a/lib/error.h b/lib/error.h new file mode 100644 index 000000000..a6981dd3b --- /dev/null +++ b/lib/error.h @@ -0,0 +1,91 @@ +/* ****************************************************************** + Error codes and messages + Copyright (C) 2013-2015, Yann Collet + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +****************************************************************** */ +#ifndef ERROR_H_MODULE +#define ERROR_H_MODULE + +#if defined (__cplusplus) +extern "C" { +#endif + + +/****************************************** +* Compiler-specific +******************************************/ +#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# define ERR_STATIC static inline +#elif defined(_MSC_VER) +# define ERR_STATIC static __inline +#elif defined(__GNUC__) +# define ERR_STATIC static __attribute__((unused)) +#else +# define ERR_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */ +#endif + + +/****************************************** +* Error Management +******************************************/ +#define PREFIX(name) ZSTD_error_##name + +#define ERROR(name) (size_t)-PREFIX(name) + +#define ERROR_LIST(ITEM) \ + ITEM(PREFIX(No_Error)) ITEM(PREFIX(GENERIC)) \ + ITEM(PREFIX(dstSize_tooSmall)) ITEM(PREFIX(srcSize_wrong)) \ + ITEM(PREFIX(prefix_unknown)) ITEM(PREFIX(corruption_detected)) \ + ITEM(PREFIX(tableLog_tooLarge)) ITEM(PREFIX(maxSymbolValue_tooLarge)) ITEM(PREFIX(maxSymbolValue_tooSmall)) \ + ITEM(PREFIX(maxCode)) + +#define ERROR_GENERATE_ENUM(ENUM) ENUM, +typedef enum { ERROR_LIST(ERROR_GENERATE_ENUM) } ERR_codes; /* enum is exposed, to detect & handle specific errors; compare function result to -enum value */ + +#define ERROR_CONVERTTOSTRING(STRING) #STRING, +#define ERROR_GENERATE_STRING(EXPR) ERROR_CONVERTTOSTRING(EXPR) +static const char* ERR_strings[] = { ERROR_LIST(ERROR_GENERATE_STRING) }; + +ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); } + +ERR_STATIC const char* ERR_getErrorName(size_t code) +{ + static const char* codeError = "Unspecified error code"; + if (ERR_isError(code)) return ERR_strings[-(int)(code)]; + return codeError; +} + + +#if defined (__cplusplus) +} +#endif + +#endif /* ERROR_H_MODULE */ diff --git a/lib/fse.c b/lib/fse.c index 4fc692f24..38e40a576 100644 --- a/lib/fse.c +++ b/lib/fse.c @@ -61,13 +61,6 @@ /**************************************************************** * Byte symbol type ****************************************************************/ -typedef struct -{ - unsigned short newState; - unsigned char symbol; - unsigned char nbBits; -} FSE_decode_t; /* size == U32 */ - #endif /* !FSE_COMMONDEFS_ONLY */ @@ -80,8 +73,8 @@ typedef struct # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ # pragma warning(disable : 4214) /* disable: C4214: non-int bitfields */ #else -# define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) # ifdef __GNUC__ +# define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) # define FORCE_INLINE static inline __attribute__((always_inline)) # else # define FORCE_INLINE static inline @@ -95,231 +88,10 @@ typedef struct #include /* malloc, free, qsort */ #include /* memcpy, memset */ #include /* printf (debug) */ +#include "bitstream.h" #include "fse_static.h" -#ifndef MEM_ACCESS_MODULE -#define MEM_ACCESS_MODULE -/**************************************************************** -* Basic Types -*****************************************************************/ -#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ -# include -typedef uint8_t BYTE; -typedef uint16_t U16; -typedef int16_t S16; -typedef uint32_t U32; -typedef int32_t S32; -typedef uint64_t U64; -typedef int64_t S64; -#else -typedef unsigned char BYTE; -typedef unsigned short U16; -typedef signed short S16; -typedef unsigned int U32; -typedef signed int S32; -typedef unsigned long long U64; -typedef signed long long S64; -#endif - -#endif /* MEM_ACCESS_MODULE */ - -/**************************************************************** -* Memory I/O -*****************************************************************/ -/* FSE_FORCE_MEMORY_ACCESS - * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. - * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. - * The below switch allow to select different access method for improved performance. - * Method 0 (default) : use `memcpy()`. Safe and portable. - * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). - * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. - * Method 2 : direct access. This method is portable but violate C standard. - * It can generate buggy code on targets generating assembly depending on alignment. - * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) - * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. - * Prefer these methods in priority order (0 > 1 > 2) - */ -#ifndef FSE_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ -# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) -# define FSE_FORCE_MEMORY_ACCESS 2 -# elif defined(__INTEL_COMPILER) || \ - (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) -# define FSE_FORCE_MEMORY_ACCESS 1 -# endif -#endif - - -static unsigned FSE_32bits(void) -{ - return sizeof(void*)==4; -} - -static unsigned FSE_isLittleEndian(void) -{ - const union { U32 i; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */ - return one.c[0]; -} - -#if defined(FSE_FORCE_MEMORY_ACCESS) && (FSE_FORCE_MEMORY_ACCESS==2) - -static U16 FSE_read16(const void* memPtr) { return *(const U16*) memPtr; } -static U32 FSE_read32(const void* memPtr) { return *(const U32*) memPtr; } -static U64 FSE_read64(const void* memPtr) { return *(const U64*) memPtr; } - -static void FSE_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; } -static void FSE_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; } -static void FSE_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; } - -#elif defined(FSE_FORCE_MEMORY_ACCESS) && (FSE_FORCE_MEMORY_ACCESS==1) - -/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ -/* currently only defined for gcc and icc */ -typedef union { U16 u16; U32 u32; U64 u64; } __attribute__((packed)) unalign; - -static U16 FSE_read16(const void* ptr) { return ((const unalign*)ptr)->u16; } -static U32 FSE_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } -static U64 FSE_read64(const void* ptr) { return ((const unalign*)ptr)->u64; } - -static void FSE_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; } -static void FSE_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; } -static void FSE_write64(void* memPtr, U64 value) { ((unalign*)memPtr)->u64 = value; } - -#else - -static U16 FSE_read16(const void* memPtr) -{ - U16 val; memcpy(&val, memPtr, sizeof(val)); return val; -} - -static U32 FSE_read32(const void* memPtr) -{ - U32 val; memcpy(&val, memPtr, sizeof(val)); return val; -} - -static U64 FSE_read64(const void* memPtr) -{ - U64 val; memcpy(&val, memPtr, sizeof(val)); return val; -} - -static void FSE_write16(void* memPtr, U16 value) -{ - memcpy(memPtr, &value, sizeof(value)); -} - -static void FSE_write32(void* memPtr, U32 value) -{ - memcpy(memPtr, &value, sizeof(value)); -} - -static void FSE_write64(void* memPtr, U64 value) -{ - memcpy(memPtr, &value, sizeof(value)); -} - -#endif // FSE_FORCE_MEMORY_ACCESS - -static U16 FSE_readLE16(const void* memPtr) -{ - if (FSE_isLittleEndian()) - return FSE_read16(memPtr); - else - { - const BYTE* p = (const BYTE*)memPtr; - return (U16)(p[0] + (p[1]<<8)); - } -} - -static void FSE_writeLE16(void* memPtr, U16 val) -{ - if (FSE_isLittleEndian()) - { - FSE_write16(memPtr, val); - } - else - { - BYTE* p = (BYTE*)memPtr; - p[0] = (BYTE)val; - p[1] = (BYTE)(val>>8); - } -} - -static U32 FSE_readLE32(const void* memPtr) -{ - if (FSE_isLittleEndian()) - return FSE_read32(memPtr); - else - { - const BYTE* p = (const BYTE*)memPtr; - return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24)); - } -} - -static void FSE_writeLE32(void* memPtr, U32 val32) -{ - if (FSE_isLittleEndian()) - { - FSE_write32(memPtr, val32); - } - else - { - BYTE* p = (BYTE*)memPtr; - p[0] = (BYTE)val32; - p[1] = (BYTE)(val32>>8); - p[2] = (BYTE)(val32>>16); - p[3] = (BYTE)(val32>>24); - } -} - -static U64 FSE_readLE64(const void* memPtr) -{ - if (FSE_isLittleEndian()) - return FSE_read64(memPtr); - else - { - const BYTE* p = (const BYTE*)memPtr; - return (U64)((U64)p[0] + ((U64)p[1]<<8) + ((U64)p[2]<<16) + ((U64)p[3]<<24) - + ((U64)p[4]<<32) + ((U64)p[5]<<40) + ((U64)p[6]<<48) + ((U64)p[7]<<56)); - } -} - -static void FSE_writeLE64(void* memPtr, U64 val64) -{ - if (FSE_isLittleEndian()) - { - FSE_write64(memPtr, val64); - } - else - { - BYTE* p = (BYTE*)memPtr; - p[0] = (BYTE)val64; - p[1] = (BYTE)(val64>>8); - p[2] = (BYTE)(val64>>16); - p[3] = (BYTE)(val64>>24); - p[4] = (BYTE)(val64>>32); - p[5] = (BYTE)(val64>>40); - p[6] = (BYTE)(val64>>48); - p[7] = (BYTE)(val64>>56); - } -} - -static size_t FSE_readLEST(const void* memPtr) -{ - if (FSE_32bits()) - return (size_t)FSE_readLE32(memPtr); - else - return (size_t)FSE_readLE64(memPtr); -} - -static void FSE_writeLEST(void* memPtr, size_t val) -{ - if (FSE_32bits()) - FSE_writeLE32(memPtr, (U32)val); - else - FSE_writeLE64(memPtr, (U64)val); -} - - /**************************************************************** * Constants *****************************************************************/ @@ -344,40 +116,9 @@ static void FSE_writeLEST(void* memPtr, size_t val) /**************************************************************** * Complex types ****************************************************************/ -typedef struct -{ - int deltaFindState; - U32 deltaNbBits; -} FSE_symbolCompressionTransform; /* total 8 bytes */ - typedef U32 CTable_max_t[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)]; typedef U32 DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)]; -/**************************************************************** -* Internal functions -****************************************************************/ -FORCE_INLINE unsigned FSE_highbit32 (register U32 val) -{ -# if defined(_MSC_VER) /* Visual */ - unsigned long r; - _BitScanReverse ( &r, val ); - return (unsigned) r; -# elif defined(__GNUC__) && (GCC_VERSION >= 304) /* GCC Intrinsic */ - return 31 - __builtin_clz (val); -# else /* Software version */ - static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; - U32 v = val; - unsigned r; - v |= v >> 1; - v |= v >> 2; - v |= v >> 4; - v |= v >> 8; - v |= v >> 16; - r = DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27]; - return r; -# endif -} - /**************************************************************** * Templates @@ -424,7 +165,7 @@ size_t FSE_FUNCTION_NAME(FSE_count_generic, FSE_FUNCTION_EXTENSION) *maxSymbolValuePtr = 0; return 0; } - if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return (size_t)-FSE_ERROR_GENERIC; /* maxSymbolValue too large : unsupported */ + if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(GENERIC); /* maxSymbolValue too large : unsupported */ if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE; /* 0 == default */ if ((safe) || (sizeof(FSE_FUNCTION_TYPE)>1)) @@ -432,33 +173,33 @@ size_t FSE_FUNCTION_NAME(FSE_count_generic, FSE_FUNCTION_EXTENSION) /* check input values, to avoid count table overflow */ while (ip < iend-3) { - if (*ip>maxSymbolValue) return (size_t)-FSE_ERROR_GENERIC; Counting1[*ip++]++; - if (*ip>maxSymbolValue) return (size_t)-FSE_ERROR_GENERIC; Counting2[*ip++]++; - if (*ip>maxSymbolValue) return (size_t)-FSE_ERROR_GENERIC; Counting3[*ip++]++; - if (*ip>maxSymbolValue) return (size_t)-FSE_ERROR_GENERIC; Counting4[*ip++]++; + if (*ip>maxSymbolValue) return ERROR(GENERIC); Counting1[*ip++]++; + if (*ip>maxSymbolValue) return ERROR(GENERIC); Counting2[*ip++]++; + if (*ip>maxSymbolValue) return ERROR(GENERIC); Counting3[*ip++]++; + if (*ip>maxSymbolValue) return ERROR(GENERIC); Counting4[*ip++]++; } } else { - U32 cached = FSE_read32(ip); ip += 4; + U32 cached = MEM_read32(ip); ip += 4; while (ip < iend-15) { - U32 c = cached; cached = FSE_read32(ip); ip += 4; + U32 c = cached; cached = MEM_read32(ip); ip += 4; Counting1[(BYTE) c ]++; Counting2[(BYTE)(c>>8) ]++; Counting3[(BYTE)(c>>16)]++; Counting4[ c>>24 ]++; - c = cached; cached = FSE_read32(ip); ip += 4; + c = cached; cached = MEM_read32(ip); ip += 4; Counting1[(BYTE) c ]++; Counting2[(BYTE)(c>>8) ]++; Counting3[(BYTE)(c>>16)]++; Counting4[ c>>24 ]++; - c = cached; cached = FSE_read32(ip); ip += 4; + c = cached; cached = MEM_read32(ip); ip += 4; Counting1[(BYTE) c ]++; Counting2[(BYTE)(c>>8) ]++; Counting3[(BYTE)(c>>16)]++; Counting4[ c>>24 ]++; - c = cached; cached = FSE_read32(ip); ip += 4; + c = cached; cached = MEM_read32(ip); ip += 4; Counting1[(BYTE) c ]++; Counting2[(BYTE)(c>>8) ]++; Counting3[(BYTE)(c>>16)]++; @@ -468,7 +209,7 @@ size_t FSE_FUNCTION_NAME(FSE_count_generic, FSE_FUNCTION_EXTENSION) } /* finish last symbols */ - while (ipmaxSymbolValue)) return (size_t)-FSE_ERROR_GENERIC; Counting1[*ip++]++; } + while (ipmaxSymbolValue)) return ERROR(GENERIC); Counting1[*ip++]++; } for (s=0; s<=(int)maxSymbolValue; s++) { @@ -512,7 +253,7 @@ size_t FSE_FUNCTION_NAME(FSE_buildCTable, FSE_FUNCTION_EXTENSION) const unsigned step = FSE_tableStep(tableSize); unsigned cumul[FSE_MAX_SYMBOL_VALUE+2]; U32 position = 0; - FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE]; /* init not necessary, but analyzer complain about it */ + FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE]; /* init isn't necessary, even if static analyzer complain about it */ U32 highThreshold = tableSize-1; unsigned symbol; unsigned i; @@ -550,7 +291,7 @@ size_t FSE_FUNCTION_NAME(FSE_buildCTable, FSE_FUNCTION_EXTENSION) } } - if (position!=0) return (size_t)-FSE_ERROR_GENERIC; /* Must have gone through all positions */ + if (position!=0) return ERROR(GENERIC); /* Must have gone through all positions */ /* Build table */ for (i=0; i FSE_MAX_SYMBOL_VALUE) return (size_t)-FSE_ERROR_maxSymbolValue_tooLarge; - if (tableLog > FSE_MAX_TABLELOG) return (size_t)-FSE_ERROR_tableLog_tooLarge; + if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge); + if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Init, lay down lowprob symbols */ DTableH[0].tableLog = (U16)tableLog; @@ -656,7 +392,7 @@ size_t FSE_FUNCTION_NAME(FSE_buildDTable, FSE_FUNCTION_EXTENSION) } } - if (position!=0) return (size_t)-FSE_ERROR_GENERIC; /* position must reach all cells once, otherwise normalizedCounter is incorrect */ + if (position!=0) return ERROR(GENERIC); /* position must reach all cells once, otherwise normalizedCounter is incorrect */ /* Build Decoding table */ { @@ -665,7 +401,7 @@ size_t FSE_FUNCTION_NAME(FSE_buildDTable, FSE_FUNCTION_EXTENSION) { FSE_FUNCTION_TYPE symbol = (FSE_FUNCTION_TYPE)(tableDecode[i].symbol); U16 nextState = symbolNext[symbol]++; - tableDecode[i].nbBits = (BYTE) (tableLog - FSE_highbit32 ((U32)nextState) ); + tableDecode[i].nbBits = (BYTE) (tableLog - BIT_highbit32 ((U32)nextState) ); tableDecode[i].newState = (U16) ( (nextState << tableDecode[i].nbBits) - tableSize); } } @@ -675,31 +411,17 @@ size_t FSE_FUNCTION_NAME(FSE_buildDTable, FSE_FUNCTION_EXTENSION) } -/****************************************** -* FSE byte symbol -******************************************/ #ifndef FSE_COMMONDEFS_ONLY +/****************************************** +* FSE helper functions +******************************************/ +unsigned FSE_isError(size_t code) { return ERR_isError(code); } -unsigned FSE_isError(size_t code) { return (code > (size_t)(-FSE_ERROR_maxCode)); } - -#define FSE_GENERATE_STRING(STRING) #STRING, -static const char* FSE_errorStrings[] = { FSE_LIST_ERRORS(FSE_GENERATE_STRING) }; - -const char* FSE_getErrorName(size_t code) -{ - static const char* codeError = "Unspecified error code"; - if (FSE_isError(code)) return FSE_errorStrings[-(int)(code)]; - return codeError; -} - -static short FSE_abs(short a) -{ - return a<0? -a : a; -} +const char* FSE_getErrorName(size_t code) { return ERR_getErrorName(code); } /**************************************************************** -* Header bitstream management +* FSE NCount encoding-decoding ****************************************************************/ size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog) { @@ -707,6 +429,11 @@ size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog) return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */ } +static short FSE_abs(short a) +{ + return a<0 ? -a : a; +} + static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, unsigned writeIsSafe) @@ -744,7 +471,7 @@ static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize, { start+=24; bitStream += 0xFFFFU << bitCount; - if ((!writeIsSafe) && (out > oend-2)) return (size_t)-FSE_ERROR_dstSize_tooSmall; /* Buffer overflow */ + if ((!writeIsSafe) && (out > oend-2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */ out[0] = (BYTE) bitStream; out[1] = (BYTE)(bitStream>>8); out+=2; @@ -760,7 +487,7 @@ static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize, bitCount += 2; if (bitCount>16) { - if ((!writeIsSafe) && (out > oend - 2)) return (size_t)-FSE_ERROR_dstSize_tooSmall; /* Buffer overflow */ + if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */ out[0] = (BYTE)bitStream; out[1] = (BYTE)(bitStream>>8); out += 2; @@ -772,7 +499,7 @@ static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize, short count = normalizedCounter[charnum++]; const short max = (short)((2*threshold-1)-remaining); remaining -= FSE_abs(count); - if (remaining<1) return (size_t)-FSE_ERROR_GENERIC; + if (remaining<1) return ERROR(GENERIC); count++; /* +1 for extra accuracy */ if (count>=threshold) count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */ bitStream += count << bitCount; @@ -783,7 +510,7 @@ static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize, } if (bitCount>16) { - if ((!writeIsSafe) && (out > oend - 2)) return (size_t)-FSE_ERROR_dstSize_tooSmall; /* Buffer overflow */ + if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */ out[0] = (BYTE)bitStream; out[1] = (BYTE)(bitStream>>8); out += 2; @@ -793,12 +520,12 @@ static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize, } /* flush remaining bitStream */ - if ((!writeIsSafe) && (out > oend - 2)) return (size_t)-FSE_ERROR_dstSize_tooSmall; /* Buffer overflow */ + if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */ out[0] = (BYTE)bitStream; out[1] = (BYTE)(bitStream>>8); out+= (bitCount+7) /8; - if (charnum > maxSymbolValue + 1) return (size_t)-FSE_ERROR_GENERIC; + if (charnum > maxSymbolValue + 1) return ERROR(GENERIC); return (out-ostart); } @@ -806,8 +533,8 @@ static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize, size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) { - if (tableLog > FSE_MAX_TABLELOG) return (size_t)-FSE_ERROR_GENERIC; /* Unsupported */ - if (tableLog < FSE_MIN_TABLELOG) return (size_t)-FSE_ERROR_GENERIC; /* Unsupported */ + if (tableLog > FSE_MAX_TABLELOG) return ERROR(GENERIC); /* Unsupported */ + if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported */ if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog)) return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0); @@ -830,10 +557,10 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t unsigned charnum = 0; int previous0 = 0; - if (hbSize < 4) return (size_t)-FSE_ERROR_srcSize_wrong; - bitStream = FSE_readLE32(ip); + if (hbSize < 4) return ERROR(srcSize_wrong); + bitStream = MEM_readLE32(ip); nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */ - if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return (size_t)-FSE_ERROR_tableLog_tooLarge; + if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge); bitStream >>= 4; bitCount = 4; *tableLogPtr = nbBits; @@ -852,7 +579,7 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t if (ip < iend-5) { ip+=2; - bitStream = FSE_readLE32(ip) >> bitCount; + bitStream = MEM_readLE32(ip) >> bitCount; } else { @@ -868,13 +595,13 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t } n0 += bitStream & 3; bitCount += 2; - if (n0 > *maxSVPtr) return (size_t)-FSE_ERROR_maxSymbolValue_tooSmall; + if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall); while (charnum < n0) normalizedCounter[charnum++] = 0; if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { ip += bitCount>>3; bitCount &= 7; - bitStream = FSE_readLE32(ip) >> bitCount; + bitStream = MEM_readLE32(ip) >> bitCount; } else bitStream >>= 2; @@ -914,17 +641,17 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t else { bitCount -= (int)(8 * (iend - 4 - ip)); - ip = iend - 4; - } - bitStream = FSE_readLE32(ip) >> (bitCount & 31); + ip = iend - 4; + } + bitStream = MEM_readLE32(ip) >> (bitCount & 31); } } } - if (remaining != 1) return (size_t)-FSE_ERROR_GENERIC; + if (remaining != 1) return ERROR(GENERIC); *maxSVPtr = charnum-1; ip += (bitCount+7)>>3; - if ((size_t)(ip-istart) > hbSize) return (size_t)-FSE_ERROR_srcSize_wrong; + if ((size_t)(ip-istart) > hbSize) return ERROR(srcSize_wrong); return ip-istart; } @@ -933,7 +660,7 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t * FSE Compression Code ****************************************************************/ /* -FSE_CTable[0] is a variable size structure which contains : +FSE_CTable is a variable size structure which contains : U16 tableLog; U16 maxSymbolValue; U16 nextStateNumber[1 << tableLog]; // This size is variable @@ -945,7 +672,7 @@ size_t FSE_sizeof_CTable (unsigned maxSymbolValue, unsigned tableLog) { size_t size; FSE_STATIC_ASSERT((size_t)FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)*4 >= sizeof(CTable_max_t)); /* A compilation error here means FSE_CTABLE_SIZE_U32 is not large enough */ - if (tableLog > FSE_MAX_TABLELOG) return (size_t)-FSE_ERROR_GENERIC; + if (tableLog > FSE_MAX_TABLELOG) return ERROR(GENERIC); size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32); return size; } @@ -967,20 +694,20 @@ void FSE_freeCTable (FSE_CTable* ct) /* provides the minimum logSize to safely represent a distribution */ static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue) { - U32 minBitsSrc = FSE_highbit32((U32)(srcSize - 1)) + 1; - U32 minBitsSymbols = FSE_highbit32(maxSymbolValue) + 2; - U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols; - return minBits; + U32 minBitsSrc = BIT_highbit32((U32)(srcSize - 1)) + 1; + U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2; + U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols; + return minBits; } unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue) { - U32 maxBitsSrc = FSE_highbit32((U32)(srcSize - 1)) - 2; + U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - 2; U32 tableLog = maxTableLog; - U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue); + U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue); if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG; - if (maxBitsSrc < tableLog) tableLog = maxBitsSrc; /* Accuracy can be reduced */ - if (minBits > tableLog) tableLog = minBits; /* Need a minimum to safely represent all symbol values */ + if (maxBitsSrc < tableLog) tableLog = maxBitsSrc; /* Accuracy can be reduced */ + if (minBits > tableLog) tableLog = minBits; /* Need a minimum to safely represent all symbol values */ if (tableLog < FSE_MIN_TABLELOG) tableLog = FSE_MIN_TABLELOG; if (tableLog > FSE_MAX_TABLELOG) tableLog = FSE_MAX_TABLELOG; return tableLog; @@ -1068,7 +795,7 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, U32 sEnd = (U32)(end >> vStepLog); U32 weight = sEnd - sStart; if (weight < 1) - return (size_t)-FSE_ERROR_GENERIC; + return ERROR(GENERIC); norm[s] = (short)weight; tmpTotal = end; } @@ -1085,9 +812,9 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog, { /* Sanity checks */ if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG; - if (tableLog < FSE_MIN_TABLELOG) return (size_t)-FSE_ERROR_GENERIC; /* Unsupported size */ - if (tableLog > FSE_MAX_TABLELOG) return (size_t)-FSE_ERROR_GENERIC; /* Unsupported size */ - if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return (size_t)-FSE_ERROR_GENERIC; /* Too small tableLog, compression potentially impossible */ + if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported size */ + if (tableLog > FSE_MAX_TABLELOG) return ERROR(GENERIC); /* Unsupported size */ + if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC); /* Too small tableLog, compression potentially impossible */ { U32 const rtbTable[] = { 0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 }; @@ -1168,7 +895,7 @@ size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits) unsigned s; /* Sanity checks */ - if (nbBits < 1) return (size_t)-FSE_ERROR_GENERIC; /* min size */ + if (nbBits < 1) return ERROR(GENERIC); /* min size */ /* header */ tableU16[-2] = (U16) nbBits; @@ -1188,7 +915,6 @@ size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits) return 0; } - /* fake FSE_CTable, for rle (100% always same symbol) input */ size_t FSE_buildCTable_rle (FSE_CTable* ct, BYTE symbolValue) { @@ -1213,91 +939,6 @@ size_t FSE_buildCTable_rle (FSE_CTable* ct, BYTE symbolValue) } -size_t FSE_initCStream(FSE_CStream_t* bitC, void* start, size_t maxSize) -{ - if (maxSize < sizeof(bitC->ptr)) return (size_t)-FSE_ERROR_dstSize_tooSmall; - bitC->bitContainer = 0; - bitC->bitPos = 0; - bitC->startPtr = (char*)start; - bitC->ptr = bitC->startPtr; - bitC->endPtr = bitC->startPtr + maxSize - sizeof(bitC->ptr); - return 0; -} - -void FSE_initCState(FSE_CState_t* statePtr, const FSE_CTable* ct) -{ - const U32 tableLog = ( (const U16*) ct) [0]; - statePtr->value = (ptrdiff_t)1<stateTable = ((const U16*) ct) + 2; - statePtr->symbolTT = (const FSE_symbolCompressionTransform*)((const U32*)ct + 1 + (tableLog ? (1<<(tableLog-1)) : 1)); - statePtr->stateLog = tableLog; -} - -void FSE_addBitsFast(FSE_CStream_t* bitC, size_t value, unsigned nbBits) /* only use if upper bits are clean 0 */ -{ - bitC->bitContainer |= value << bitC->bitPos; - bitC->bitPos += nbBits; -} - -void FSE_addBits(FSE_CStream_t* bitC, size_t value, unsigned nbBits) -{ - static const unsigned mask[] = { 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF, 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, 0xFFFFFF, 0x1FFFFFF }; /* up to 25 bits */ - bitC->bitContainer |= (value & mask[nbBits]) << bitC->bitPos; - bitC->bitPos += nbBits; -} - -void FSE_encodeSymbol(FSE_CStream_t* bitC, FSE_CState_t* statePtr, U32 symbol) -{ - const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol]; - const U16* const stateTable = (const U16*)(statePtr->stateTable); - U32 nbBitsOut = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16); - FSE_addBits(bitC, statePtr->value, nbBitsOut); - statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT.deltaFindState]; -} - -void FSE_flushBitsFast(FSE_CStream_t* bitC) /* only if dst buffer is large enough ( >= FSE_compressBound()) */ -{ - size_t nbBytes = bitC->bitPos >> 3; - FSE_writeLEST(bitC->ptr, bitC->bitContainer); - bitC->ptr += nbBytes; - bitC->bitPos &= 7; - bitC->bitContainer >>= nbBytes*8; -} - -void FSE_flushBits(FSE_CStream_t* bitC) -{ - size_t nbBytes = bitC->bitPos >> 3; - FSE_writeLEST(bitC->ptr, bitC->bitContainer); - bitC->ptr += nbBytes; - if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr; - bitC->bitPos &= 7; - bitC->bitContainer >>= nbBytes*8; -} - -void FSE_flushCState(FSE_CStream_t* bitC, const FSE_CState_t* statePtr) -{ - FSE_addBits(bitC, statePtr->value, statePtr->stateLog); - FSE_flushBits(bitC); -} - - -size_t FSE_closeCStream(FSE_CStream_t* bitC) -{ - char* endPtr; - - FSE_addBitsFast(bitC, 1, 1); - FSE_flushBits(bitC); - - if (bitC->ptr >= bitC->endPtr) /* too close to buffer's end */ - return 0; /* not compressible */ - - endPtr = bitC->ptr; - endPtr += bitC->bitPos > 0; - - return (endPtr - bitC->startPtr); -} - - static size_t FSE_compress_usingCTable_generic (void* dst, size_t dstSize, const void* src, size_t srcSize, const FSE_CTable* ct, const unsigned fast) @@ -1307,19 +948,19 @@ static size_t FSE_compress_usingCTable_generic (void* dst, size_t dstSize, const BYTE* const iend = istart + srcSize; size_t errorCode; - FSE_CStream_t bitC; + BIT_CStream_t bitC; FSE_CState_t CState1, CState2; /* init */ - errorCode = FSE_initCStream(&bitC, dst, dstSize); + errorCode = BIT_initCStream(&bitC, dst, dstSize); if (FSE_isError(errorCode)) return 0; FSE_initCState(&CState1, ct); CState2 = CState1; ip=iend; -#define FSE_FLUSHBITS(s) (fast ? FSE_flushBitsFast(s) : FSE_flushBits(s)) +#define FSE_FLUSHBITS(s) (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s)) /* join to even */ if (srcSize & 1) @@ -1357,7 +998,7 @@ static size_t FSE_compress_usingCTable_generic (void* dst, size_t dstSize, FSE_flushCState(&bitC, &CState2); FSE_flushCState(&bitC, &CState1); - return FSE_closeCStream(&bitC); + return BIT_closeCStream(&bitC); } size_t FSE_compress_usingCTable (void* dst, size_t dstSize, @@ -1459,7 +1100,7 @@ size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits) unsigned s; /* Sanity checks */ - if (nbBits < 1) return (size_t)-FSE_ERROR_GENERIC; /* min size */ + if (nbBits < 1) return ERROR(GENERIC); /* min size */ /* Build Decoding Table */ DTableH->tableLog = (U16)nbBits; @@ -1474,176 +1115,6 @@ size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits) return 0; } - -/* FSE_initDStream - * Initialize a FSE_DStream_t. - * srcBuffer must point at the beginning of an FSE block. - * The function result is the size of the FSE_block (== srcSize). - * If srcSize is too small, the function will return an errorCode; - */ -size_t FSE_initDStream(FSE_DStream_t* bitD, const void* srcBuffer, size_t srcSize) -{ - if (srcSize < 1) return (size_t)-FSE_ERROR_srcSize_wrong; - - if (srcSize >= sizeof(size_t)) - { - U32 contain32; - bitD->start = (const char*)srcBuffer; - bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(size_t); - bitD->bitContainer = FSE_readLEST(bitD->ptr); - contain32 = ((const BYTE*)srcBuffer)[srcSize-1]; - if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC; /* stop bit not present */ - bitD->bitsConsumed = 8 - FSE_highbit32(contain32); - } - else - { - U32 contain32; - bitD->start = (const char*)srcBuffer; - bitD->ptr = bitD->start; - bitD->bitContainer = *(const BYTE*)(bitD->start); - switch(srcSize) - { - case 7: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[6]) << (sizeof(size_t)*8 - 16); - case 6: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[5]) << (sizeof(size_t)*8 - 24); - case 5: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[4]) << (sizeof(size_t)*8 - 32); - case 4: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[3]) << 24; - case 3: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[2]) << 16; - case 2: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[1]) << 8; - default:; - } - contain32 = ((const BYTE*)srcBuffer)[srcSize-1]; - if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC; /* stop bit not present */ - bitD->bitsConsumed = 8 - FSE_highbit32(contain32); - bitD->bitsConsumed += (U32)(sizeof(size_t) - srcSize)*8; - } - - return srcSize; -} - - -/* FSE_lookBits - * Provides next n bits from the bitContainer. - * bitContainer is not modified (bits are still present for next read/look) - * On 32-bits, maxNbBits==25 - * On 64-bits, maxNbBits==57 - * return : value extracted. - */ -static size_t FSE_lookBits(FSE_DStream_t* bitD, U32 nbBits) -{ - const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1; - return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask); -} - -static size_t FSE_lookBitsFast(FSE_DStream_t* bitD, U32 nbBits) /* only if nbBits >= 1 !! */ -{ - const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1; - return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask); -} - -static void FSE_skipBits(FSE_DStream_t* bitD, U32 nbBits) -{ - bitD->bitsConsumed += nbBits; -} - - -/* FSE_readBits - * Read next n bits from the bitContainer. - * On 32-bits, don't read more than maxNbBits==25 - * On 64-bits, don't read more than maxNbBits==57 - * Use the fast variant *only* if n >= 1. - * return : value extracted. - */ -size_t FSE_readBits(FSE_DStream_t* bitD, U32 nbBits) -{ - size_t value = FSE_lookBits(bitD, nbBits); - FSE_skipBits(bitD, nbBits); - return value; -} - -size_t FSE_readBitsFast(FSE_DStream_t* bitD, U32 nbBits) /* only if nbBits >= 1 !! */ -{ - size_t value = FSE_lookBitsFast(bitD, nbBits); - FSE_skipBits(bitD, nbBits); - return value; -} - -unsigned FSE_reloadDStream(FSE_DStream_t* bitD) -{ - if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* should never happen */ - return FSE_DStream_tooFar; - - if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer)) - { - bitD->ptr -= bitD->bitsConsumed >> 3; - bitD->bitsConsumed &= 7; - bitD->bitContainer = FSE_readLEST(bitD->ptr); - return FSE_DStream_unfinished; - } - if (bitD->ptr == bitD->start) - { - if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return FSE_DStream_endOfBuffer; - return FSE_DStream_completed; - } - { - U32 nbBytes = bitD->bitsConsumed >> 3; - U32 result = FSE_DStream_unfinished; - if (bitD->ptr - nbBytes < bitD->start) - { - nbBytes = (U32)(bitD->ptr - bitD->start); /* ptr > start */ - result = FSE_DStream_endOfBuffer; - } - bitD->ptr -= nbBytes; - bitD->bitsConsumed -= nbBytes*8; - bitD->bitContainer = FSE_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD) */ - return result; - } -} - - -void FSE_initDState(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD, const FSE_DTable* dt) -{ - const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)dt; - DStatePtr->state = FSE_readBits(bitD, DTableH->tableLog); - FSE_reloadDStream(bitD); - DStatePtr->table = dt + 1; -} - -BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD) -{ - const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; - const U32 nbBits = DInfo.nbBits; - BYTE symbol = DInfo.symbol; - size_t lowBits = FSE_readBits(bitD, nbBits); - - DStatePtr->state = DInfo.newState + lowBits; - return symbol; -} - -BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD) -{ - const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; - const U32 nbBits = DInfo.nbBits; - BYTE symbol = DInfo.symbol; - size_t lowBits = FSE_readBitsFast(bitD, nbBits); - - DStatePtr->state = DInfo.newState + lowBits; - return symbol; -} - -/* FSE_endOfDStream - Tells if bitD has reached end of bitStream or not */ - -unsigned FSE_endOfDStream(const FSE_DStream_t* bitD) -{ - return ((bitD->ptr == bitD->start) && (bitD->bitsConsumed == sizeof(bitD->bitContainer)*8)); -} - -unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr) -{ - return DStatePtr->state == 0; -} - - FORCE_INLINE size_t FSE_decompress_usingDTable_generic( void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, @@ -1654,13 +1125,13 @@ FORCE_INLINE size_t FSE_decompress_usingDTable_generic( BYTE* const omax = op + maxDstSize; BYTE* const olimit = omax-3; - FSE_DStream_t bitD; + BIT_DStream_t bitD; FSE_DState_t state1; FSE_DState_t state2; size_t errorCode; /* Init */ - errorCode = FSE_initDStream(&bitD, cSrc, cSrcSize); /* replaced last arg by maxCompressed Size */ + errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize); /* replaced last arg by maxCompressed Size */ if (FSE_isError(errorCode)) return errorCode; FSE_initDState(&state1, &bitD, dt); @@ -1669,48 +1140,48 @@ FORCE_INLINE size_t FSE_decompress_usingDTable_generic( #define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD) /* 4 symbols per loop */ - for ( ; (FSE_reloadDStream(&bitD)==FSE_DStream_unfinished) && (op sizeof(bitD.bitContainer)*8) /* This test must be static */ - FSE_reloadDStream(&bitD); + BIT_reloadDStream(&bitD); op[1] = FSE_GETSYMBOL(&state2); if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */ - { if (FSE_reloadDStream(&bitD) > FSE_DStream_unfinished) { op+=2; break; } } + { if (BIT_reloadDStream(&bitD) > BIT_DStream_unfinished) { op+=2; break; } } op[2] = FSE_GETSYMBOL(&state1); if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */ - FSE_reloadDStream(&bitD); + BIT_reloadDStream(&bitD); op[3] = FSE_GETSYMBOL(&state2); } /* tail */ - /* note : FSE_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly FSE_DStream_completed */ + /* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */ while (1) { - if ( (FSE_reloadDStream(&bitD)>FSE_DStream_completed) || (op==omax) || (FSE_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state1))) ) + if ( (BIT_reloadDStream(&bitD)>BIT_DStream_completed) || (op==omax) || (BIT_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state1))) ) break; *op++ = FSE_GETSYMBOL(&state1); - if ( (FSE_reloadDStream(&bitD)>FSE_DStream_completed) || (op==omax) || (FSE_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state2))) ) + if ( (BIT_reloadDStream(&bitD)>BIT_DStream_completed) || (op==omax) || (BIT_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state2))) ) break; *op++ = FSE_GETSYMBOL(&state2); } /* end ? */ - if (FSE_endOfDStream(&bitD) && FSE_endOfDState(&state1) && FSE_endOfDState(&state2)) + if (BIT_endOfDStream(&bitD) && FSE_endOfDState(&state1) && FSE_endOfDState(&state2)) return op-ostart; - if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall; /* dst buffer is full, but cSrc unfinished */ + if (op==omax) return ERROR(dstSize_tooSmall); /* dst buffer is full, but cSrc unfinished */ - return (size_t)-FSE_ERROR_corruptionDetected; + return ERROR(corruption_detected); } @@ -1737,12 +1208,12 @@ size_t FSE_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSr unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE; size_t errorCode; - if (cSrcSize<2) return (size_t)-FSE_ERROR_srcSize_wrong; /* too small input size */ + if (cSrcSize<2) return ERROR(srcSize_wrong); /* too small input size */ /* normal FSE decoding mode */ errorCode = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize); if (FSE_isError(errorCode)) return errorCode; - if (errorCode >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong; /* too small input size */ + if (errorCode >= cSrcSize) return ERROR(srcSize_wrong); /* too small input size */ ip += errorCode; cSrcSize -= errorCode; @@ -1755,712 +1226,4 @@ size_t FSE_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSr -/********************************************************* -* Huff0 : Huffman block compression -*********************************************************/ -#define HUF_MAX_SYMBOL_VALUE 255 -#define HUF_DEFAULT_TABLELOG 12 /* used by default, when not specified */ -#define HUF_MAX_TABLELOG 12 /* max possible tableLog; for allocation purpose; can be modified */ -#define HUF_ABSOLUTEMAX_TABLELOG 16 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */ -#if (HUF_MAX_TABLELOG > HUF_ABSOLUTEMAX_TABLELOG) -# error "HUF_MAX_TABLELOG is too large !" -#endif - -typedef struct HUF_CElt_s { - U16 val; - BYTE nbBits; -} HUF_CElt ; - -typedef struct nodeElt_s { - U32 count; - U16 parent; - BYTE byte; - BYTE nbBits; -} nodeElt; - -/* HUF_writeCTable() : - return : size of saved CTable */ -size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* tree, U32 maxSymbolValue, U32 huffLog) -{ - BYTE bitsToWeight[HUF_ABSOLUTEMAX_TABLELOG + 1]; - BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1]; - U32 n; - BYTE* op = (BYTE*)dst; - size_t size; - - /* check conditions */ - if (maxSymbolValue > HUF_MAX_SYMBOL_VALUE + 1) - return (size_t)-FSE_ERROR_GENERIC; - - /* convert to weight */ - bitsToWeight[0] = 0; - for (n=1; n<=huffLog; n++) - bitsToWeight[n] = (BYTE)(huffLog + 1 - n); - for (n=0; n= 128) return (size_t)-FSE_ERROR_GENERIC; /* should never happen, since maxSymbolValue <= 255 */ - if ((size <= 1) || (size >= maxSymbolValue/2)) - { - if (size==1) /* RLE */ - { - /* only possible case : serie of 1 (because there are at least 2) */ - /* can only be 2^n or (2^n-1), otherwise not an huffman tree */ - BYTE code; - switch(maxSymbolValue) - { - case 1: code = 0; break; - case 2: code = 1; break; - case 3: code = 2; break; - case 4: code = 3; break; - case 7: code = 4; break; - case 8: code = 5; break; - case 15: code = 6; break; - case 16: code = 7; break; - case 31: code = 8; break; - case 32: code = 9; break; - case 63: code = 10; break; - case 64: code = 11; break; - case 127: code = 12; break; - case 128: code = 13; break; - default : return (size_t)-FSE_ERROR_corruptionDetected; - } - op[0] = (BYTE)(255-13 + code); - return 1; - } - /* Not compressible */ - if (maxSymbolValue > (241-128)) return (size_t)-FSE_ERROR_GENERIC; /* not implemented (not possible with current format) */ - if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return (size_t)-FSE_ERROR_dstSize_tooSmall; /* not enough space within dst buffer */ - op[0] = (BYTE)(128 /*special case*/ + 0 /* Not Compressible */ + (maxSymbolValue-1)); - huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause issue in final combination */ - for (n=0; n= 2) - { - const U32 baseCost = 1 << (largestBits - maxNbBits); - U32 n = lastNonNull; - - while (huffNode[n].nbBits > maxNbBits) - { - totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits)); - huffNode[n].nbBits = (BYTE)maxNbBits; - n --; - } - - /* renorm totalCost */ - totalCost >>= (largestBits - maxNbBits); /* note : totalCost necessarily multiple of baseCost */ - - // repay cost - while (huffNode[n].nbBits == maxNbBits) n--; // n at last of rank (maxNbBits-1) - - { - const U32 noOne = 0xF0F0F0F0; - // Get pos of last (smallest) symbol per rank - U32 rankLast[HUF_MAX_TABLELOG]; - U32 currentNbBits = maxNbBits; - int pos; - memset(rankLast, 0xF0, sizeof(rankLast)); - for (pos=n ; pos >= 0; pos--) - { - if (huffNode[pos].nbBits >= currentNbBits) continue; - currentNbBits = huffNode[pos].nbBits; - rankLast[maxNbBits-currentNbBits] = pos; - } - - while (totalCost > 0) - { - U32 nBitsToDecrease = FSE_highbit32(totalCost) + 1; - for ( ; nBitsToDecrease > 1; nBitsToDecrease--) - { - U32 highPos = rankLast[nBitsToDecrease]; - U32 lowPos = rankLast[nBitsToDecrease-1]; - if (highPos == noOne) continue; - if (lowPos == noOne) break; - { - U32 highTotal = huffNode[highPos].count; - U32 lowTotal = 2 * huffNode[lowPos].count; - if (highTotal <= lowTotal) break; - } - } - while (rankLast[nBitsToDecrease] == noOne) - nBitsToDecrease ++; // In some rare cases, no more rank 1 left => overshoot to closest - totalCost -= 1 << (nBitsToDecrease-1); - if (rankLast[nBitsToDecrease-1] == noOne) - rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease]; // now there is one elt - huffNode[rankLast[nBitsToDecrease]].nbBits ++; - if (rankLast[nBitsToDecrease] == 0) - rankLast[nBitsToDecrease] = noOne; - else - { - rankLast[nBitsToDecrease]--; - if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease) - rankLast[nBitsToDecrease] = noOne; // rank list emptied - } - } - - while (totalCost < 0) /* Sometimes, cost correction overshoot */ - { - if (rankLast[1] == noOne) /* special case, no weight 1, let's find it back at n */ - { - while (huffNode[n].nbBits == maxNbBits) n--; - huffNode[n+1].nbBits--; - rankLast[1] = n+1; - totalCost++; - continue; - } - huffNode[ rankLast[1] + 1 ].nbBits--; - rankLast[1]++; - totalCost ++; - } - } - } - - return maxNbBits; -} - - -typedef struct { - U32 base; - U32 current; -} rankPos; - -static void HUF_sort(nodeElt* huffNode, const U32* count, U32 maxSymbolValue) -{ - rankPos rank[32]; - U32 n; - - memset(rank, 0, sizeof(rank)); - for (n=0; n<=maxSymbolValue; n++) - { - U32 r = FSE_highbit32(count[n] + 1); - rank[r].base ++; - } - for (n=30; n>0; n--) rank[n-1].base += rank[n].base; - for (n=0; n<32; n++) rank[n].current = rank[n].base; - for (n=0; n<=maxSymbolValue; n++) - { - U32 c = count[n]; - U32 r = FSE_highbit32(c+1) + 1; - U32 pos = rank[r].current++; - while ((pos > rank[r].base) && (c > huffNode[pos-1].count)) huffNode[pos]=huffNode[pos-1], pos--; - huffNode[pos].count = c; - huffNode[pos].byte = (BYTE)n; - } -} - - -#define STARTNODE (HUF_MAX_SYMBOL_VALUE+1) -size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits) -{ - nodeElt huffNode0[2*HUF_MAX_SYMBOL_VALUE+1 +1]; - nodeElt* huffNode = huffNode0 + 1; - U32 n, nonNullRank; - int lowS, lowN; - U16 nodeNb = STARTNODE; - U32 nodeRoot; - - /* safety checks */ - if (maxNbBits == 0) maxNbBits = HUF_DEFAULT_TABLELOG; - if (maxSymbolValue > HUF_MAX_SYMBOL_VALUE) return (size_t)-FSE_ERROR_GENERIC; - memset(huffNode0, 0, sizeof(huffNode0)); - - // sort, decreasing order - HUF_sort(huffNode, count, maxSymbolValue); - - // init for parents - nonNullRank = maxSymbolValue; - while(huffNode[nonNullRank].count == 0) nonNullRank--; - lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb; - huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count; - huffNode[lowS].parent = huffNode[lowS-1].parent = nodeNb; - nodeNb++; lowS-=2; - for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30); - huffNode0[0].count = (U32)(1U<<31); - - // create parents - while (nodeNb <= nodeRoot) - { - U32 n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; - U32 n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; - huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count; - huffNode[n1].parent = huffNode[n2].parent = nodeNb; - nodeNb++; - } - - // distribute weights (unlimited tree height) - huffNode[nodeRoot].nbBits = 0; - for (n=nodeRoot-1; n>=STARTNODE; n--) - huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; - for (n=0; n<=nonNullRank; n++) - huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; - - // enforce maxTableLog - maxNbBits = HUF_setMaxHeight(huffNode, nonNullRank, maxNbBits); - - // fill result into tree (val, nbBits) - { - U16 nbPerRank[HUF_ABSOLUTEMAX_TABLELOG+1] = {0}; - U16 valPerRank[HUF_ABSOLUTEMAX_TABLELOG+1]; - if (maxNbBits > HUF_ABSOLUTEMAX_TABLELOG) return (size_t)-FSE_ERROR_GENERIC; // check - for (n=0; n<=nonNullRank; n++) - nbPerRank[huffNode[n].nbBits]++; - { - // determine stating value per rank - U16 min = 0; - for (n=maxNbBits; n>0; n--) - { - valPerRank[n] = min; // get starting value within each rank - min += nbPerRank[n]; - min >>= 1; - } - } - for (n=0; n<=maxSymbolValue; n++) - tree[huffNode[n].byte].nbBits = huffNode[n].nbBits; // push nbBits per symbol, symbol order - for (n=0; n<=maxSymbolValue; n++) - tree[n].val = valPerRank[tree[n].nbBits]++; // assign value within rank, symbol order - } - - return maxNbBits; -} - -static void HUF_encodeSymbol(FSE_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable) -{ - FSE_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits); -} - -#define FSE_FLUSHBITS_1(stream) \ - if (sizeof((stream)->bitContainer)*8 < HUF_MAX_TABLELOG*2+7) FSE_FLUSHBITS(stream) - -#define FSE_FLUSHBITS_2(stream) \ - if (sizeof((stream)->bitContainer)*8 < HUF_MAX_TABLELOG*4+7) FSE_FLUSHBITS(stream) - -size_t HUF_compress_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, HUF_CElt* CTable) -{ - const BYTE* ip = (const BYTE*) src; - BYTE* const ostart = (BYTE*)dst; - BYTE* op = (BYTE*) ostart; - BYTE* const oend = ostart + dstSize; - U16* jumpTable = (U16*) dst; - size_t n, streamSize; - const unsigned fast = (dstSize >= HUF_BLOCKBOUND(srcSize)); - size_t errorCode; - FSE_CStream_t bitC; - - /* init */ - if (dstSize < 8) return 0; - op += 6; /* jump Table -- could be optimized by delta / deviation */ - errorCode = FSE_initCStream(&bitC, op, oend-op); - if (FSE_isError(errorCode)) return 0; - - n = srcSize & ~15; // mod 16 - switch (srcSize & 15) - { - case 15: HUF_encodeSymbol(&bitC, ip[n+14], CTable); - FSE_FLUSHBITS_1(&bitC); - case 14: HUF_encodeSymbol(&bitC, ip[n+13], CTable); - FSE_FLUSHBITS_2(&bitC); - case 13: HUF_encodeSymbol(&bitC, ip[n+12], CTable); - FSE_FLUSHBITS_1(&bitC); - case 12: HUF_encodeSymbol(&bitC, ip[n+11], CTable); - FSE_FLUSHBITS(&bitC); - case 11: HUF_encodeSymbol(&bitC, ip[n+10], CTable); - FSE_FLUSHBITS_1(&bitC); - case 10: HUF_encodeSymbol(&bitC, ip[n+ 9], CTable); - FSE_FLUSHBITS_2(&bitC); - case 9 : HUF_encodeSymbol(&bitC, ip[n+ 8], CTable); - FSE_FLUSHBITS_1(&bitC); - case 8 : HUF_encodeSymbol(&bitC, ip[n+ 7], CTable); - FSE_FLUSHBITS(&bitC); - case 7 : HUF_encodeSymbol(&bitC, ip[n+ 6], CTable); - FSE_FLUSHBITS_1(&bitC); - case 6 : HUF_encodeSymbol(&bitC, ip[n+ 5], CTable); - FSE_FLUSHBITS_2(&bitC); - case 5 : HUF_encodeSymbol(&bitC, ip[n+ 4], CTable); - FSE_FLUSHBITS_1(&bitC); - case 4 : HUF_encodeSymbol(&bitC, ip[n+ 3], CTable); - FSE_FLUSHBITS(&bitC); - case 3 : HUF_encodeSymbol(&bitC, ip[n+ 2], CTable); - FSE_FLUSHBITS_2(&bitC); - case 2 : HUF_encodeSymbol(&bitC, ip[n+ 1], CTable); - FSE_FLUSHBITS_1(&bitC); - case 1 : HUF_encodeSymbol(&bitC, ip[n+ 0], CTable); - FSE_FLUSHBITS(&bitC); - case 0 : - default: ; - } - - for (; n>0; n-=16) - { - HUF_encodeSymbol(&bitC, ip[n- 4], CTable); - FSE_FLUSHBITS_1(&bitC); - HUF_encodeSymbol(&bitC, ip[n- 8], CTable); - FSE_FLUSHBITS_2(&bitC); - HUF_encodeSymbol(&bitC, ip[n-12], CTable); - FSE_FLUSHBITS_1(&bitC); - HUF_encodeSymbol(&bitC, ip[n-16], CTable); - FSE_FLUSHBITS(&bitC); - } - streamSize = FSE_closeCStream(&bitC); - if (streamSize==0) return 0; /* not enough space within dst buffer == uncompressible */ - FSE_writeLE16(jumpTable, (U16)streamSize); - op += streamSize; - - errorCode = FSE_initCStream(&bitC, op, oend-op); - if (FSE_isError(errorCode)) return 0; - n = srcSize & ~15; // mod 16 - for (; n>0; n-=16) - { - HUF_encodeSymbol(&bitC, ip[n- 3], CTable); - FSE_FLUSHBITS_1(&bitC); - HUF_encodeSymbol(&bitC, ip[n- 7], CTable); - FSE_FLUSHBITS_2(&bitC); - HUF_encodeSymbol(&bitC, ip[n-11], CTable); - FSE_FLUSHBITS_1(&bitC); - HUF_encodeSymbol(&bitC, ip[n-15], CTable); - FSE_FLUSHBITS(&bitC); - } - streamSize = FSE_closeCStream(&bitC); - if (streamSize==0) return 0; /* not enough space within dst buffer == uncompressible */ - FSE_writeLE16(jumpTable+1, (U16)streamSize); - op += streamSize; - - errorCode = FSE_initCStream(&bitC, op, oend-op); - if (FSE_isError(errorCode)) return 0; - n = srcSize & ~15; // mod 16 - for (; n>0; n-=16) - { - HUF_encodeSymbol(&bitC, ip[n- 2], CTable); - FSE_FLUSHBITS_1(&bitC); - HUF_encodeSymbol(&bitC, ip[n- 6], CTable); - FSE_FLUSHBITS_2(&bitC); - HUF_encodeSymbol(&bitC, ip[n-10], CTable); - FSE_FLUSHBITS_1(&bitC); - HUF_encodeSymbol(&bitC, ip[n-14], CTable); - FSE_FLUSHBITS(&bitC); - } - streamSize = FSE_closeCStream(&bitC); - if (streamSize==0) return 0; /* not enough space within dst buffer == uncompressible */ - FSE_writeLE16(jumpTable+2, (U16)streamSize); - op += streamSize; - - errorCode = FSE_initCStream(&bitC, op, oend-op); - if (FSE_isError(errorCode)) return 0; - n = srcSize & ~15; // mod 16 - for (; n>0; n-=16) - { - HUF_encodeSymbol(&bitC, ip[n- 1], CTable); - FSE_FLUSHBITS_1(&bitC); - HUF_encodeSymbol(&bitC, ip[n- 5], CTable); - FSE_FLUSHBITS_2(&bitC); - HUF_encodeSymbol(&bitC, ip[n- 9], CTable); - FSE_FLUSHBITS_1(&bitC); - HUF_encodeSymbol(&bitC, ip[n-13], CTable); - FSE_FLUSHBITS(&bitC); - } - streamSize = FSE_closeCStream(&bitC); - if (streamSize==0) return 0; /* not enough space within dst buffer == uncompressible */ - op += streamSize; - - return op-ostart; -} - - -size_t HUF_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog) -{ - BYTE* const ostart = (BYTE*)dst; - BYTE* op = ostart; - BYTE* const oend = ostart + dstSize; - - U32 count[HUF_MAX_SYMBOL_VALUE+1]; - HUF_CElt CTable[HUF_MAX_SYMBOL_VALUE+1]; - size_t errorCode; - - /* early out */ - if (srcSize <= 1) return srcSize; /* Uncompressed or RLE */ - if (!maxSymbolValue) maxSymbolValue = HUF_MAX_SYMBOL_VALUE; - if (!huffLog) huffLog = HUF_DEFAULT_TABLELOG; - - /* Scan input and build symbol stats */ - errorCode = FSE_count (count, &maxSymbolValue, (const BYTE*)src, srcSize); - if (FSE_isError(errorCode)) return errorCode; - if (errorCode == srcSize) return 1; - if (errorCode < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */ - - /* Build Huffman Tree */ - errorCode = HUF_buildCTable (CTable, count, maxSymbolValue, huffLog); - if (FSE_isError(errorCode)) return errorCode; - huffLog = (U32)errorCode; - - /* Write table description header */ - errorCode = HUF_writeCTable (op, dstSize, CTable, maxSymbolValue, huffLog); /* don't write last symbol, implied */ - if (FSE_isError(errorCode)) return errorCode; - op += errorCode; - - /* Compress */ - errorCode = HUF_compress_usingCTable(op, oend - op, src, srcSize, CTable); - if (FSE_isError(errorCode)) return errorCode; - if (errorCode==0) return 0; - op += errorCode; - - /* check compressibility */ - if ((size_t)(op-ostart) >= srcSize-1) - return op-ostart; - - return op-ostart; -} - -size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize) -{ - return HUF_compress2(dst, maxDstSize, src, (U32)srcSize, 255, HUF_DEFAULT_TABLELOG); -} - - -/********************************************************* -* Huff0 : Huffman block decompression -*********************************************************/ -typedef struct { - BYTE byte; - BYTE nbBits; -} HUF_DElt; - -size_t HUF_readDTable (U16* DTable, const void* src, size_t srcSize) -{ - BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1]; - U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1]; /* large enough for values from 0 to 16 */ - U32 weightTotal; - U32 maxBits; - const BYTE* ip = (const BYTE*) src; - size_t iSize = ip[0]; - size_t oSize; - U32 n; - U32 nextRankStart; - HUF_DElt* const dt = (HUF_DElt*)(DTable + 1); - - FSE_STATIC_ASSERT(sizeof(HUF_DElt) == sizeof(U16)); /* if compilation fails here, assertion is false */ - //memset(huffWeight, 0, sizeof(huffWeight)); /* should not be necessary, but some analyzer complain ... */ - if (iSize >= 128) /* special header */ - { - if (iSize >= (242)) /* RLE */ - { - static int l[14] = { 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 }; - oSize = l[iSize-242]; - memset(huffWeight, 1, oSize); - iSize = 0; - } - else /* Incompressible */ - { - oSize = iSize - 127; - iSize = ((oSize+1)/2); - if (iSize+1 > srcSize) return (size_t)-FSE_ERROR_srcSize_wrong; - ip += 1; - for (n=0; n> 4; - huffWeight[n+1] = ip[n/2] & 15; - } - } - } - else /* header compressed with FSE (normal case) */ - { - if (iSize+1 > srcSize) return (size_t)-FSE_ERROR_srcSize_wrong; - oSize = FSE_decompress(huffWeight, HUF_MAX_SYMBOL_VALUE, ip+1, iSize); /* max 255 values decoded, last one is implied */ - if (FSE_isError(oSize)) return oSize; - } - - /* collect weight stats */ - memset(rankVal, 0, sizeof(rankVal)); - weightTotal = 0; - for (n=0; n= HUF_ABSOLUTEMAX_TABLELOG) return (size_t)-FSE_ERROR_corruptionDetected; - rankVal[huffWeight[n]]++; - weightTotal += (1 << huffWeight[n]) >> 1; - } - - /* get last non-null symbol weight (implied, total must be 2^n) */ - maxBits = FSE_highbit32(weightTotal) + 1; - if (maxBits > DTable[0]) return (size_t)-FSE_ERROR_tableLog_tooLarge; /* DTable is too small */ - DTable[0] = (U16)maxBits; - { - U32 total = 1 << maxBits; - U32 rest = total - weightTotal; - U32 verif = 1 << FSE_highbit32(rest); - U32 lastWeight = FSE_highbit32(rest) + 1; - if (verif != rest) return (size_t)-FSE_ERROR_corruptionDetected; /* last value must be a clean power of 2 */ - huffWeight[oSize] = (BYTE)lastWeight; - rankVal[lastWeight]++; - } - - /* check tree construction validity */ - if ((rankVal[1] < 2) || (rankVal[1] & 1)) return (size_t)-FSE_ERROR_corruptionDetected; /* by construction : at least 2 elts of rank 1, must be even */ - - /* Prepare ranks */ - nextRankStart = 0; - for (n=1; n<=maxBits; n++) - { - U32 current = nextRankStart; - nextRankStart += (rankVal[n] << (n-1)); - rankVal[n] = current; - } - - /* fill DTable */ - for (n=0; n<=oSize; n++) - { - const U32 w = huffWeight[n]; - const U32 length = (1 << w) >> 1; - U32 i; - HUF_DElt D; - D.byte = (BYTE)n; D.nbBits = (BYTE)(maxBits + 1 - w); - for (i = rankVal[w]; i < rankVal[w] + length; i++) - dt[i] = D; - rankVal[w] += length; - } - - return iSize+1; -} - - -static BYTE HUF_decodeSymbol(FSE_DStream_t* Dstream, const HUF_DElt* dt, const U32 dtLog) -{ - const size_t val = FSE_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */ - const BYTE c = dt[val].byte; - FSE_skipBits(Dstream, dt[val].nbBits); - return c; -} - -static size_t HUF_decompress_usingDTable( /* -3% slower when non static */ - void* dst, size_t maxDstSize, - const void* cSrc, size_t cSrcSize, - const U16* DTable) -{ - BYTE* const ostart = (BYTE*) dst; - BYTE* op = ostart; - BYTE* const omax = op + maxDstSize; - BYTE* const olimit = omax-15; - - const HUF_DElt* const dt = (const HUF_DElt*)(DTable+1); - const U32 dtLog = DTable[0]; - size_t errorCode; - U32 reloadStatus; - - /* Init */ - - const U16* jumpTable = (const U16*)cSrc; - const size_t length1 = FSE_readLE16(jumpTable); - const size_t length2 = FSE_readLE16(jumpTable+1); - const size_t length3 = FSE_readLE16(jumpTable+2); - const size_t length4 = cSrcSize - 6 - length1 - length2 - length3; // check coherency !! - const char* const start1 = (const char*)(cSrc) + 6; - const char* const start2 = start1 + length1; - const char* const start3 = start2 + length2; - const char* const start4 = start3 + length3; - FSE_DStream_t bitD1, bitD2, bitD3, bitD4; - - if (length1+length2+length3+6 >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong; - - errorCode = FSE_initDStream(&bitD1, start1, length1); - if (FSE_isError(errorCode)) return errorCode; - errorCode = FSE_initDStream(&bitD2, start2, length2); - if (FSE_isError(errorCode)) return errorCode; - errorCode = FSE_initDStream(&bitD3, start3, length3); - if (FSE_isError(errorCode)) return errorCode; - errorCode = FSE_initDStream(&bitD4, start4, length4); - if (FSE_isError(errorCode)) return errorCode; - - reloadStatus=FSE_reloadDStream(&bitD2); - - /* 16 symbols per loop */ - for ( ; (reloadStatus12)) FSE_reloadDStream(&Dstream) - -#define HUF_DECODE_SYMBOL_2(n, Dstream) \ - op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \ - if (FSE_32bits()) FSE_reloadDStream(&Dstream) - - HUF_DECODE_SYMBOL_1( 0, bitD1); - HUF_DECODE_SYMBOL_1( 1, bitD2); - HUF_DECODE_SYMBOL_1( 2, bitD3); - HUF_DECODE_SYMBOL_1( 3, bitD4); - HUF_DECODE_SYMBOL_2( 4, bitD1); - HUF_DECODE_SYMBOL_2( 5, bitD2); - HUF_DECODE_SYMBOL_2( 6, bitD3); - HUF_DECODE_SYMBOL_2( 7, bitD4); - HUF_DECODE_SYMBOL_1( 8, bitD1); - HUF_DECODE_SYMBOL_1( 9, bitD2); - HUF_DECODE_SYMBOL_1(10, bitD3); - HUF_DECODE_SYMBOL_1(11, bitD4); - HUF_DECODE_SYMBOL_0(12, bitD1); - HUF_DECODE_SYMBOL_0(13, bitD2); - HUF_DECODE_SYMBOL_0(14, bitD3); - HUF_DECODE_SYMBOL_0(15, bitD4); - } - - if (reloadStatus!=FSE_DStream_completed) /* not complete : some bitStream might be FSE_DStream_unfinished */ - return (size_t)-FSE_ERROR_corruptionDetected; - - /* tail */ - { - // bitTail = bitD1; // *much* slower : -20% !??! - FSE_DStream_t bitTail; - bitTail.ptr = bitD1.ptr; - bitTail.bitsConsumed = bitD1.bitsConsumed; - bitTail.bitContainer = bitD1.bitContainer; // required in case of FSE_DStream_endOfBuffer - bitTail.start = start1; - for ( ; (FSE_reloadDStream(&bitTail) < FSE_DStream_completed) && (op= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong; - ip += errorCode; - cSrcSize -= errorCode; - - return HUF_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, DTable); -} - - #endif /* FSE_COMMONDEFS_ONLY */ diff --git a/lib/fse.h b/lib/fse.h index 36d86dcdf..4b1ecb15b 100644 --- a/lib/fse.h +++ b/lib/fse.h @@ -73,32 +73,6 @@ FSE_decompress(): */ -/****************************************** -* Huff0 simple functions -******************************************/ -size_t HUF_compress(void* dst, size_t maxDstSize, - const void* src, size_t srcSize); -size_t HUF_decompress(void* dst, size_t maxDstSize, - const void* cSrc, size_t cSrcSize); -/* -HUF_compress(): - Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'. - 'dst' buffer must be already allocated. Compression runs faster is maxDstSize >= HUF_compressBound(srcSize) - return : size of compressed data (<= maxDstSize) - Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!! - if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression. - if FSE_isError(return), compression failed (more details using FSE_getErrorName()) - -HUF_decompress(): - Decompress Huff0 data from buffer 'cSrc', of size 'cSrcSize', - into already allocated destination buffer 'dst', of size 'maxDstSize'. - return : size of regenerated data (<= maxDstSize) - or an error code, which can be tested using FSE_isError() - - ** Important ** : HUF_decompress() doesn't decompress non-compressible nor RLE data !!! -*/ - - /****************************************** * Tool functions ******************************************/ @@ -123,8 +97,6 @@ FSE_compress2(): */ size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); -size_t HUF_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); - /****************************************** * FSE detailed API diff --git a/lib/fse_static.h b/lib/fse_static.h index 84e704c65..e5ad53bb7 100644 --- a/lib/fse_static.h +++ b/lib/fse_static.h @@ -43,6 +43,7 @@ extern "C" { * FSE API compatible with DLL ******************************************/ #include "fse.h" +#include "bitstream.h" /****************************************** @@ -57,30 +58,6 @@ extern "C" { #define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2)) #define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<>8) + 8) /* only true if pre-filtered with fast heuristic */ -#define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ - -/* You can statically allocate Huff0 DTable as a table of unsigned short using below macro */ -#define HUF_DTABLE_SIZE_U16(maxTableLog) (1 + (1<= FSE_DStream_completed + BIT_reloadDStream(&DStream) >= BIT_DStream_completed When it's done, verify decompression is fully completed, by checking both DStream and the relevant states. Checking if DStream has reached its end is performed by : - FSE_endOfDStream(&DStream); + BIT_endOfDStream(&DStream); Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible. FSE_endOfDState(&DState); */ /****************************************** -* FSE unsafe symbol API +* FSE unsafe API ******************************************/ -size_t FSE_readBitsFast(FSE_DStream_t* bitD, unsigned nbBits); -/* faster, but works only if nbBits >= 1 (otherwise, result will be corrupted) */ +static unsigned char FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD); +/* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */ -unsigned char FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD); -/* faster, but works only if allways nbBits >= 1 (otherwise, result will be corrupted) */ + +/****************************************** +* Implementation of inline functions +******************************************/ +typedef struct +{ + int deltaFindState; + U32 deltaNbBits; +} FSE_symbolCompressionTransform; /* total 8 bytes */ + +MEM_STATIC void FSE_initCState(FSE_CState_t* statePtr, const FSE_CTable* ct) +{ + const U32 tableLog = ( (const U16*) ct) [0]; + statePtr->value = (ptrdiff_t)1<stateTable = ((const U16*) ct) + 2; + statePtr->symbolTT = (const void*)((const U32*)ct + 1 + (tableLog ? (1<<(tableLog-1)) : 1)); + statePtr->stateLog = tableLog; +} + +MEM_STATIC void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* statePtr, U32 symbol) +{ + const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol]; + const U16* const stateTable = (const U16*)(statePtr->stateTable); + U32 nbBitsOut = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16); + BIT_addBits(bitC, statePtr->value, nbBitsOut); + statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT.deltaFindState]; +} + +MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePtr) +{ + BIT_addBits(bitC, statePtr->value, statePtr->stateLog); + BIT_flushBits(bitC); +} + +/* decompression */ + +typedef struct { + U16 tableLog; + U16 fastMode; +} FSE_DTableHeader; /* sizeof U32 */ + +typedef struct +{ + unsigned short newState; + unsigned char symbol; + unsigned char nbBits; +} FSE_decode_t; /* size == U32 */ + +MEM_STATIC void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt) +{ + const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)dt; + DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog); + BIT_reloadDStream(bitD); + DStatePtr->table = dt + 1; +} + +MEM_STATIC BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) +{ + const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + const U32 nbBits = DInfo.nbBits; + BYTE symbol = DInfo.symbol; + size_t lowBits = BIT_readBits(bitD, nbBits); + + DStatePtr->state = DInfo.newState + lowBits; + return symbol; +} + +MEM_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) +{ + const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + const U32 nbBits = DInfo.nbBits; + BYTE symbol = DInfo.symbol; + size_t lowBits = BIT_readBitsFast(bitD, nbBits); + + DStatePtr->state = DInfo.newState + lowBits; + return symbol; +} + +MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr) +{ + return DStatePtr->state == 0; +} #if defined (__cplusplus) diff --git a/lib/huff0.c b/lib/huff0.c new file mode 100644 index 000000000..7a0b1924b --- /dev/null +++ b/lib/huff0.c @@ -0,0 +1,1705 @@ +/* ****************************************************************** + Huff0 : Huffman coder, part of New Generation Entropy library + Copyright (C) 2013-2015, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - FSE+Huff0 source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +****************************************************************** */ + +/**************************************************************** +* Compiler specifics +****************************************************************/ +#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +/* inline is defined */ +#elif defined(_MSC_VER) +# define inline __inline +#else +# define inline /* disable inline */ +#endif + + +#ifdef _MSC_VER /* Visual Studio */ +# define FORCE_INLINE static __forceinline +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +#else +# ifdef __GNUC__ +# define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) +# define FORCE_INLINE static inline __attribute__((always_inline)) +# else +# define FORCE_INLINE static inline +# endif +#endif + + +/**************************************************************** +* Includes +****************************************************************/ +#include /* malloc, free, qsort */ +#include /* memcpy, memset */ +#include /* printf (debug) */ +#include "huff0_static.h" +#include "bitstream.h" +#include "fse.h" /* header compression */ + + +/**************************************************************** +* Error Management +****************************************************************/ +#define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ + + +/****************************************** +* Helper functions +******************************************/ +unsigned HUF_isError(size_t code) { return ERR_isError(code); } + +const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); } + + +/********************************************************* +* Huff0 : Huffman block compression +*********************************************************/ +#define HUF_ABSOLUTEMAX_TABLELOG 16 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */ +#define HUF_MAX_TABLELOG 12 /* max configured tableLog (for static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */ +#define HUF_DEFAULT_TABLELOG HUF_MAX_TABLELOG /* tableLog by default, when not specified */ +#define HUF_MAX_SYMBOL_VALUE 255 +#if (HUF_MAX_TABLELOG > HUF_ABSOLUTEMAX_TABLELOG) +# error "HUF_MAX_TABLELOG is too large !" +#endif + +typedef struct HUF_CElt_s { + U16 val; + BYTE nbBits; +} HUF_CElt ; + +typedef struct nodeElt_s { + U32 count; + U16 parent; + BYTE byte; + BYTE nbBits; +} nodeElt; + +/*! HUF_writeCTable() : + @dst : destination buffer + @CTable : huffman tree to save, using huff0 representation + @return : size of saved CTable */ +size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, U32 maxSymbolValue, U32 huffLog) +{ + BYTE bitsToWeight[HUF_MAX_TABLELOG + 1]; + BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1]; + U32 n; + BYTE* op = (BYTE*)dst; + size_t size; + + /* check conditions */ + if (maxSymbolValue > HUF_MAX_SYMBOL_VALUE + 1) + return ERROR(GENERIC); + + /* convert to weight */ + bitsToWeight[0] = 0; + for (n=1; n<=huffLog; n++) + bitsToWeight[n] = (BYTE)(huffLog + 1 - n); + for (n=0; n= 128) return ERROR(GENERIC); /* should never happen, since maxSymbolValue <= 255 */ + if ((size <= 1) || (size >= maxSymbolValue/2)) + { + if (size==1) /* RLE */ + { + /* only possible case : serie of 1 (because there are at least 2) */ + /* can only be 2^n or (2^n-1), otherwise not an huffman tree */ + BYTE code; + switch(maxSymbolValue) + { + case 1: code = 0; break; + case 2: code = 1; break; + case 3: code = 2; break; + case 4: code = 3; break; + case 7: code = 4; break; + case 8: code = 5; break; + case 15: code = 6; break; + case 16: code = 7; break; + case 31: code = 8; break; + case 32: code = 9; break; + case 63: code = 10; break; + case 64: code = 11; break; + case 127: code = 12; break; + case 128: code = 13; break; + default : return ERROR(corruption_detected); + } + op[0] = (BYTE)(255-13 + code); + return 1; + } + /* Not compressible */ + if (maxSymbolValue > (241-128)) return ERROR(GENERIC); /* not implemented (not possible with current format) */ + if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall); /* not enough space within dst buffer */ + op[0] = (BYTE)(128 /*special case*/ + 0 /* Not Compressible */ + (maxSymbolValue-1)); + huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause issue in final combination */ + for (n=0; n= 2) + { + const U32 baseCost = 1 << (largestBits - maxNbBits); + U32 n = lastNonNull; + + while (huffNode[n].nbBits > maxNbBits) + { + totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits)); + huffNode[n].nbBits = (BYTE)maxNbBits; + n --; + } + + /* renorm totalCost */ + totalCost >>= (largestBits - maxNbBits); /* note : totalCost necessarily multiple of baseCost */ + + // repay cost + while (huffNode[n].nbBits == maxNbBits) n--; // n at last of rank (maxNbBits-1) + + { + const U32 noOne = 0xF0F0F0F0; + // Get pos of last (smallest) symbol per rank + U32 rankLast[HUF_MAX_TABLELOG]; + U32 currentNbBits = maxNbBits; + int pos; + memset(rankLast, 0xF0, sizeof(rankLast)); + for (pos=n ; pos >= 0; pos--) + { + if (huffNode[pos].nbBits >= currentNbBits) continue; + currentNbBits = huffNode[pos].nbBits; + rankLast[maxNbBits-currentNbBits] = pos; + } + + while (totalCost > 0) + { + U32 nBitsToDecrease = BIT_highbit32(totalCost) + 1; + for ( ; nBitsToDecrease > 1; nBitsToDecrease--) + { + U32 highPos = rankLast[nBitsToDecrease]; + U32 lowPos = rankLast[nBitsToDecrease-1]; + if (highPos == noOne) continue; + if (lowPos == noOne) break; + { + U32 highTotal = huffNode[highPos].count; + U32 lowTotal = 2 * huffNode[lowPos].count; + if (highTotal <= lowTotal) break; + } + } + while (rankLast[nBitsToDecrease] == noOne) + nBitsToDecrease ++; // In some rare cases, no more rank 1 left => overshoot to closest + totalCost -= 1 << (nBitsToDecrease-1); + if (rankLast[nBitsToDecrease-1] == noOne) + rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease]; // now there is one elt + huffNode[rankLast[nBitsToDecrease]].nbBits ++; + if (rankLast[nBitsToDecrease] == 0) + rankLast[nBitsToDecrease] = noOne; + else + { + rankLast[nBitsToDecrease]--; + if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease) + rankLast[nBitsToDecrease] = noOne; // rank list emptied + } + } + + while (totalCost < 0) /* Sometimes, cost correction overshoot */ + { + if (rankLast[1] == noOne) /* special case, no weight 1, let's find it back at n */ + { + while (huffNode[n].nbBits == maxNbBits) n--; + huffNode[n+1].nbBits--; + rankLast[1] = n+1; + totalCost++; + continue; + } + huffNode[ rankLast[1] + 1 ].nbBits--; + rankLast[1]++; + totalCost ++; + } + } + } + + return maxNbBits; +} + + +typedef struct { + U32 base; + U32 current; +} rankPos; + +static void HUF_sort(nodeElt* huffNode, const U32* count, U32 maxSymbolValue) +{ + rankPos rank[32]; + U32 n; + + memset(rank, 0, sizeof(rank)); + for (n=0; n<=maxSymbolValue; n++) + { + U32 r = BIT_highbit32(count[n] + 1); + rank[r].base ++; + } + for (n=30; n>0; n--) rank[n-1].base += rank[n].base; + for (n=0; n<32; n++) rank[n].current = rank[n].base; + for (n=0; n<=maxSymbolValue; n++) + { + U32 c = count[n]; + U32 r = BIT_highbit32(c+1) + 1; + U32 pos = rank[r].current++; + while ((pos > rank[r].base) && (c > huffNode[pos-1].count)) huffNode[pos]=huffNode[pos-1], pos--; + huffNode[pos].count = c; + huffNode[pos].byte = (BYTE)n; + } +} + + +#define STARTNODE (HUF_MAX_SYMBOL_VALUE+1) +size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits) +{ + nodeElt huffNode0[2*HUF_MAX_SYMBOL_VALUE+1 +1]; + nodeElt* huffNode = huffNode0 + 1; + U32 n, nonNullRank; + int lowS, lowN; + U16 nodeNb = STARTNODE; + U32 nodeRoot; + + /* safety checks */ + if (maxNbBits == 0) maxNbBits = HUF_DEFAULT_TABLELOG; + if (maxSymbolValue > HUF_MAX_SYMBOL_VALUE) return ERROR(GENERIC); + memset(huffNode0, 0, sizeof(huffNode0)); + + // sort, decreasing order + HUF_sort(huffNode, count, maxSymbolValue); + + // init for parents + nonNullRank = maxSymbolValue; + while(huffNode[nonNullRank].count == 0) nonNullRank--; + lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb; + huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count; + huffNode[lowS].parent = huffNode[lowS-1].parent = nodeNb; + nodeNb++; lowS-=2; + for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30); + huffNode0[0].count = (U32)(1U<<31); + + // create parents + while (nodeNb <= nodeRoot) + { + U32 n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; + U32 n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; + huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count; + huffNode[n1].parent = huffNode[n2].parent = nodeNb; + nodeNb++; + } + + // distribute weights (unlimited tree height) + huffNode[nodeRoot].nbBits = 0; + for (n=nodeRoot-1; n>=STARTNODE; n--) + huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; + for (n=0; n<=nonNullRank; n++) + huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; + + /* enforce maxTableLog */ + maxNbBits = HUF_setMaxHeight(huffNode, nonNullRank, maxNbBits); + + /* fill result into tree (val, nbBits) */ + { + U16 nbPerRank[HUF_MAX_TABLELOG+1] = {0}; + U16 valPerRank[HUF_MAX_TABLELOG+1] = {0}; + if (maxNbBits > HUF_MAX_TABLELOG) return ERROR(GENERIC); /* check fit into table */ + for (n=0; n<=nonNullRank; n++) + nbPerRank[huffNode[n].nbBits]++; + { + /* determine stating value per rank */ + U16 min = 0; + for (n=maxNbBits; n>0; n--) + { + valPerRank[n] = min; // get starting value within each rank + min += nbPerRank[n]; + min >>= 1; + } + } + for (n=0; n<=maxSymbolValue; n++) + tree[huffNode[n].byte].nbBits = huffNode[n].nbBits; // push nbBits per symbol, symbol order + for (n=0; n<=maxSymbolValue; n++) + tree[n].val = valPerRank[tree[n].nbBits]++; // assign value within rank, symbol order + } + + return maxNbBits; +} + +static void HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable) +{ + BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits); +} + +size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); } + +#define HUF_FLUSHBITS(s) (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s)) + +#define HUF_FLUSHBITS_1(stream) \ + if (sizeof((stream)->bitContainer)*8 < HUF_MAX_TABLELOG*2+7) HUF_FLUSHBITS(stream) + +#define HUF_FLUSHBITS_2(stream) \ + if (sizeof((stream)->bitContainer)*8 < HUF_MAX_TABLELOG*4+7) HUF_FLUSHBITS(stream) + +size_t HUF_compress_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) +{ + const BYTE* ip = (const BYTE*) src; + BYTE* const ostart = (BYTE*)dst; + BYTE* op = ostart; + BYTE* const oend = ostart + dstSize; + size_t n; + const unsigned fast = (dstSize >= HUF_BLOCKBOUND(srcSize)); + size_t errorCode; + BIT_CStream_t bitC; + + /* init */ + if (dstSize < 8) return 0; /* not enough space to compress */ + errorCode = BIT_initCStream(&bitC, op, oend-op); + if (HUF_isError(errorCode)) return 0; + + n = srcSize & ~3; /* join to mod 4 */ + switch (srcSize & 3) + { + case 3 : HUF_encodeSymbol(&bitC, ip[n+ 2], CTable); + HUF_FLUSHBITS_2(&bitC); + case 2 : HUF_encodeSymbol(&bitC, ip[n+ 1], CTable); + HUF_FLUSHBITS_1(&bitC); + case 1 : HUF_encodeSymbol(&bitC, ip[n+ 0], CTable); + HUF_FLUSHBITS(&bitC); + case 0 : + default: ; + } + + for (; n>0; n-=4) /* note : n&3==0 at this stage */ + { + HUF_encodeSymbol(&bitC, ip[n- 1], CTable); + HUF_FLUSHBITS_1(&bitC); + HUF_encodeSymbol(&bitC, ip[n- 2], CTable); + HUF_FLUSHBITS_2(&bitC); + HUF_encodeSymbol(&bitC, ip[n- 3], CTable); + HUF_FLUSHBITS_1(&bitC); + HUF_encodeSymbol(&bitC, ip[n- 4], CTable); + HUF_FLUSHBITS(&bitC); + } + + return BIT_closeCStream(&bitC); +} + + +static size_t HUF_compress_into4Segments(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) +{ + size_t segmentSize = (srcSize+3)/4; /* first 3 segments */ + size_t errorCode; + const BYTE* ip = (const BYTE*) src; + const BYTE* const iend = ip + srcSize; + BYTE* const ostart = (BYTE*) dst; + BYTE* op = ostart; + BYTE* const oend = ostart + dstSize; + + if (dstSize < 6 + 1 + 1 + 1 + 8) return 0; /* minimum space to compress successfully */ + if (srcSize < 12) return 0; /* no saving possible : too small input */ + op += 6; /* jumpTable */ + + errorCode = HUF_compress_usingCTable(op, oend-op, ip, segmentSize, CTable); + if (HUF_isError(errorCode)) return errorCode; + if (errorCode==0) return 0; + MEM_writeLE16(ostart, (U16)errorCode); + + ip += segmentSize; + op += errorCode; + errorCode = HUF_compress_usingCTable(op, oend-op, ip, segmentSize, CTable); + if (HUF_isError(errorCode)) return errorCode; + if (errorCode==0) return 0; + MEM_writeLE16(ostart+2, (U16)errorCode); + + ip += segmentSize; + op += errorCode; + errorCode = HUF_compress_usingCTable(op, oend-op, ip, segmentSize, CTable); + if (HUF_isError(errorCode)) return errorCode; + if (errorCode==0) return 0; + MEM_writeLE16(ostart+4, (U16)errorCode); + + ip += segmentSize; + op += errorCode; + errorCode = HUF_compress_usingCTable(op, oend-op, ip, iend-ip, CTable); + if (HUF_isError(errorCode)) return errorCode; + if (errorCode==0) return 0; + + op += errorCode; + return op-ostart; +} + + +size_t HUF_compress2 (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog) +{ + BYTE* const ostart = (BYTE*)dst; + BYTE* op = ostart; + BYTE* const oend = ostart + dstSize; + + U32 count[HUF_MAX_SYMBOL_VALUE+1]; + HUF_CElt CTable[HUF_MAX_SYMBOL_VALUE+1]; + size_t errorCode; + + /* checks & inits */ + if (srcSize < 1) return 0; /* Uncompressed */ + if (dstSize < 1) return 0; /* not compressible within dst budget */ + if (srcSize > 128 * 1024) return ERROR(srcSize_wrong); /* current block size limit */ + if (huffLog > HUF_MAX_TABLELOG) return ERROR(tableLog_tooLarge); + if (!maxSymbolValue) maxSymbolValue = HUF_MAX_SYMBOL_VALUE; + if (!huffLog) huffLog = HUF_DEFAULT_TABLELOG; + + /* Scan input and build symbol stats */ + errorCode = FSE_count (count, &maxSymbolValue, (const BYTE*)src, srcSize); + if (HUF_isError(errorCode)) return errorCode; + if (errorCode == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } + if (errorCode <= (srcSize >> 7)+1) return 0; /* Heuristic : not compressible enough */ + + /* Build Huffman Tree */ + errorCode = HUF_buildCTable (CTable, count, maxSymbolValue, huffLog); + if (HUF_isError(errorCode)) return errorCode; + huffLog = (U32)errorCode; + + /* Write table description header */ + errorCode = HUF_writeCTable (op, dstSize, CTable, maxSymbolValue, huffLog); + if (HUF_isError(errorCode)) return errorCode; + if (errorCode + 12 >= srcSize) return 0; /* not useful to try compression */ + op += errorCode; + + /* Compress */ + //errorCode = HUF_compress_usingCTable(op, oend - op, src, srcSize, CTable); /* single segment */ + errorCode = HUF_compress_into4Segments(op, oend - op, src, srcSize, CTable); + if (HUF_isError(errorCode)) return errorCode; + if (errorCode==0) return 0; + op += errorCode; + + /* check compressibility */ + if ((size_t)(op-ostart) >= srcSize-1) + return 0; + + return op-ostart; +} + +size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize) +{ + return HUF_compress2(dst, maxDstSize, src, (U32)srcSize, 255, HUF_DEFAULT_TABLELOG); +} + + +/********************************************************* +* Huff0 : Huffman block decompression +*********************************************************/ +typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX2; /* single-symbol decoding */ + +typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX4; /* double-symbols decoding */ + +typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t; + +/*! HUF_readStats + Read compact Huffman tree, saved by HUF_writeCTable + @huffWeight : destination buffer + @return : size read from `src` +*/ +static size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, + U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize) +{ + U32 weightTotal; + U32 tableLog; + const BYTE* ip = (const BYTE*) src; + size_t iSize = ip[0]; + size_t oSize; + U32 n; + + //memset(huffWeight, 0, hwSize); /* is not necessary, even though some analyzer complain ... */ + + if (iSize >= 128) /* special header */ + { + if (iSize >= (242)) /* RLE */ + { + static int l[14] = { 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 }; + oSize = l[iSize-242]; + memset(huffWeight, 1, hwSize); + iSize = 0; + } + else /* Incompressible */ + { + oSize = iSize - 127; + iSize = ((oSize+1)/2); + if (iSize+1 > srcSize) return ERROR(srcSize_wrong); + if (oSize >= hwSize) return ERROR(corruption_detected); + ip += 1; + for (n=0; n> 4; + huffWeight[n+1] = ip[n/2] & 15; + } + } + } + else /* header compressed with FSE (normal case) */ + { + if (iSize+1 > srcSize) return ERROR(srcSize_wrong); + oSize = FSE_decompress(huffWeight, hwSize-1, ip+1, iSize); /* max (hwSize-1) values decoded, as last one is implied */ + if (FSE_isError(oSize)) return oSize; + } + + /* collect weight stats */ + memset(rankStats, 0, (HUF_ABSOLUTEMAX_TABLELOG + 1) * sizeof(U32)); + weightTotal = 0; + for (n=0; n= HUF_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected); + rankStats[huffWeight[n]]++; + weightTotal += (1 << huffWeight[n]) >> 1; + } + + /* get last non-null symbol weight (implied, total must be 2^n) */ + tableLog = BIT_highbit32(weightTotal) + 1; + { + U32 total = 1 << tableLog; + U32 rest = total - weightTotal; + U32 verif = 1 << BIT_highbit32(rest); + U32 lastWeight = BIT_highbit32(rest) + 1; + if (verif != rest) return ERROR(corruption_detected); /* last value must be a clean power of 2 */ + huffWeight[oSize] = (BYTE)lastWeight; + rankStats[lastWeight]++; + } + + /* check tree construction validity */ + if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected); /* by construction : at least 2 elts of rank 1, must be even */ + + /* results */ + *nbSymbolsPtr = (U32)(oSize+1); + *tableLogPtr = tableLog; + return iSize+1; +} + + +/**************************/ +/* single-symbol decoding */ +/**************************/ + +size_t HUF_readDTableX2 (U16* DTable, const void* src, size_t srcSize) +{ + BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1]; + U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1]; /* large enough for values from 0 to 16 */ + U32 tableLog = 0; + const BYTE* ip = (const BYTE*) src; + size_t iSize = ip[0]; + U32 nbSymbols = 0; + U32 n; + U32 nextRankStart; + HUF_DEltX2* const dt = (HUF_DEltX2*)(DTable + 1); + + HUF_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(U16)); /* if compilation fails here, assertion is false */ + //memset(huffWeight, 0, sizeof(huffWeight)); /* is not necessary, even though some analyzer complain ... */ + + iSize = HUF_readStats(huffWeight, HUF_MAX_SYMBOL_VALUE + 1, rankVal, &nbSymbols, &tableLog, src, srcSize); + if (HUF_isError(iSize)) return iSize; + + /* check result */ + if (tableLog > DTable[0]) return ERROR(tableLog_tooLarge); /* DTable is too small */ + DTable[0] = (U16)tableLog; /* maybe should separate sizeof DTable, as allocated, from used size of DTable, in case of DTable re-use */ + + /* Prepare ranks */ + nextRankStart = 0; + for (n=1; n<=tableLog; n++) + { + U32 current = nextRankStart; + nextRankStart += (rankVal[n] << (n-1)); + rankVal[n] = current; + } + + /* fill DTable */ + for (n=0; n> 1; + U32 i; + HUF_DEltX2 D; + D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w); + for (i = rankVal[w]; i < rankVal[w] + length; i++) + dt[i] = D; + rankVal[w] += length; + } + + return iSize; +} + +static BYTE HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, const U32 dtLog) +{ + const size_t val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */ + const BYTE c = dt[val].byte; + BIT_skipBits(Dstream, dt[val].nbBits); + return c; +} + +#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \ + *ptr++ = HUF_decodeSymbolX2(DStreamPtr, dt, dtLog) + +#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \ + if (MEM_64bits() || (HUF_MAX_TABLELOG<=12)) \ + HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) + +#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \ + if (MEM_64bits()) \ + HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) + +static inline size_t HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX2* const dt, const U32 dtLog) +{ + BYTE* const pStart = p; + + /* up to 4 symbols at a time */ + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-4)) + { + HUF_DECODE_SYMBOLX2_2(p, bitDPtr); + HUF_DECODE_SYMBOLX2_1(p, bitDPtr); + HUF_DECODE_SYMBOLX2_2(p, bitDPtr); + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + } + + /* closer to the end */ + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd)) + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + + /* no more data to retrieve from bitstream, hence no need to reload */ + while (p < pEnd) + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + + return pEnd-pStart; +} + +size_t HUF_decompress1X2_usingDTable( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const U16* DTable) +{ + BYTE* op = (BYTE*)dst; + BYTE* const oend = op + dstSize; + size_t errorCode; + const U32 dtLog = DTable[0]; + const HUF_DEltX2* const dt = ((const HUF_DEltX2*)DTable) +1; + BIT_DStream_t bitD; + errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize); + if (HUF_isError(errorCode)) return errorCode; + + HUF_decodeStreamX2(op, &bitD, oend, dt, dtLog); + + /* check */ + if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected); + + return dstSize; +} + +size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_MAX_TABLELOG); + const BYTE* ip = (const BYTE*) cSrc; + size_t errorCode; + + errorCode = HUF_readDTableX2 (DTable, cSrc, cSrcSize); + if (HUF_isError(errorCode)) return errorCode; + if (errorCode >= cSrcSize) return ERROR(srcSize_wrong); + ip += errorCode; + cSrcSize -= errorCode; + + return HUF_decompress1X2_usingDTable (dst, dstSize, ip, cSrcSize, DTable); +} + + +size_t HUF_decompress4X2_usingDTable( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const U16* DTable) +{ + if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ + + { + const BYTE* const istart = (const BYTE*) cSrc; + BYTE* const ostart = (BYTE*) dst; + BYTE* const oend = ostart + dstSize; + + const HUF_DEltX2* const dt = ((const HUF_DEltX2*)DTable) +1; + const U32 dtLog = DTable[0]; + size_t errorCode; + + /* Init */ + BIT_DStream_t bitD1; + BIT_DStream_t bitD2; + BIT_DStream_t bitD3; + BIT_DStream_t bitD4; + const size_t length1 = MEM_readLE16(istart); + const size_t length2 = MEM_readLE16(istart+2); + const size_t length3 = MEM_readLE16(istart+4); + size_t length4; + const BYTE* const istart1 = istart + 6; /* jumpTable */ + const BYTE* const istart2 = istart1 + length1; + const BYTE* const istart3 = istart2 + length2; + const BYTE* const istart4 = istart3 + length3; + const size_t segmentSize = (dstSize+3) / 4; + BYTE* const opStart2 = ostart + segmentSize; + BYTE* const opStart3 = opStart2 + segmentSize; + BYTE* const opStart4 = opStart3 + segmentSize; + BYTE* op1 = ostart; + BYTE* op2 = opStart2; + BYTE* op3 = opStart3; + BYTE* op4 = opStart4; + U32 endSignal; + + length4 = cSrcSize - (length1 + length2 + length3 + 6); + if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ + errorCode = BIT_initDStream(&bitD1, istart1, length1); + if (HUF_isError(errorCode)) return errorCode; + errorCode = BIT_initDStream(&bitD2, istart2, length2); + if (HUF_isError(errorCode)) return errorCode; + errorCode = BIT_initDStream(&bitD3, istart3, length3); + if (HUF_isError(errorCode)) return errorCode; + errorCode = BIT_initDStream(&bitD4, istart4, length4); + if (HUF_isError(errorCode)) return errorCode; + + /* 16-32 symbols per loop (4-8 symbols per stream) */ + endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); + for ( ; (endSignal==BIT_DStream_unfinished) && (op4<(oend-7)) ; ) + { + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_1(op1, &bitD1); + HUF_DECODE_SYMBOLX2_1(op2, &bitD2); + HUF_DECODE_SYMBOLX2_1(op3, &bitD3); + HUF_DECODE_SYMBOLX2_1(op4, &bitD4); + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_0(op1, &bitD1); + HUF_DECODE_SYMBOLX2_0(op2, &bitD2); + HUF_DECODE_SYMBOLX2_0(op3, &bitD3); + HUF_DECODE_SYMBOLX2_0(op4, &bitD4); + + endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); + } + + /* check corruption */ + if (op1 > opStart2) return ERROR(corruption_detected); + if (op2 > opStart3) return ERROR(corruption_detected); + if (op3 > opStart4) return ERROR(corruption_detected); + /* note : op4 supposed already verified within main loop */ + + /* finish bitStreams one by one */ + HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog); + HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog); + HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog); + HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog); + + /* check */ + endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); + if (!endSignal) return ERROR(corruption_detected); + + /* decoded size */ + return dstSize; + } +} + + +size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_MAX_TABLELOG); + const BYTE* ip = (const BYTE*) cSrc; + size_t errorCode; + + errorCode = HUF_readDTableX2 (DTable, cSrc, cSrcSize); + if (HUF_isError(errorCode)) return errorCode; + if (errorCode >= cSrcSize) return ERROR(srcSize_wrong); + ip += errorCode; + cSrcSize -= errorCode; + + return HUF_decompress4X2_usingDTable (dst, dstSize, ip, cSrcSize, DTable); +} + + +/***************************/ +/* double-symbols decoding */ +/***************************/ + +static void HUF_fillDTableX4Level2(HUF_DEltX4* DTable, U32 sizeLog, const U32 consumed, + const U32* rankValOrigin, const int minWeight, + const sortedSymbol_t* sortedSymbols, const U32 sortedListSize, + U32 nbBitsBaseline, U16 baseSeq) +{ + HUF_DEltX4 DElt; + U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1]; + U32 s; + + /* get pre-calculated rankVal */ + memcpy(rankVal, rankValOrigin, sizeof(rankVal)); + + /* fill skipped values */ + if (minWeight>1) + { + U32 i, skipSize = rankVal[minWeight]; + MEM_writeLE16(&(DElt.sequence), baseSeq); + DElt.nbBits = (BYTE)(consumed); + DElt.length = 1; + for (i = 0; i < skipSize; i++) + DTable[i] = DElt; + } + + /* fill DTable */ + for (s=0; s= 1 */ + + rankVal[weight] += length; + } +} + +typedef U32 rankVal_t[HUF_ABSOLUTEMAX_TABLELOG][HUF_ABSOLUTEMAX_TABLELOG + 1]; + +static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog, + const sortedSymbol_t* sortedList, const U32 sortedListSize, + const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight, + const U32 nbBitsBaseline) +{ + U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1]; + const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */ + const U32 minBits = nbBitsBaseline - maxWeight; + U32 s; + + memcpy(rankVal, rankValOrigin, sizeof(rankVal)); + + /* fill DTable */ + for (s=0; s= minBits) /* enough room for a second symbol */ + { + U32 sortedRank; + int minWeight = nbBits + scaleLog; + if (minWeight < 1) minWeight = 1; + sortedRank = rankStart[minWeight]; + HUF_fillDTableX4Level2(DTable+start, targetLog-nbBits, nbBits, + rankValOrigin[nbBits], minWeight, + sortedList+sortedRank, sortedListSize-sortedRank, + nbBitsBaseline, symbol); + } + else + { + U32 i; + const U32 end = start + length; + HUF_DEltX4 DElt; + + MEM_writeLE16(&(DElt.sequence), symbol); + DElt.nbBits = (BYTE)(nbBits); + DElt.length = 1; + for (i = start; i < end; i++) + DTable[i] = DElt; + } + rankVal[weight] += length; + } +} + +size_t HUF_readDTableX4 (U32* DTable, const void* src, size_t srcSize) +{ + BYTE weightList[HUF_MAX_SYMBOL_VALUE + 1]; + sortedSymbol_t sortedSymbol[HUF_MAX_SYMBOL_VALUE + 1]; + U32 rankStats[HUF_ABSOLUTEMAX_TABLELOG + 1] = { 0 }; + U32 rankStart0[HUF_ABSOLUTEMAX_TABLELOG + 2] = { 0 }; + U32* const rankStart = rankStart0+1; + rankVal_t rankVal; + U32 tableLog, maxW, sizeOfSort, nbSymbols; + const U32 memLog = DTable[0]; + const BYTE* ip = (const BYTE*) src; + size_t iSize = ip[0]; + HUF_DEltX4* const dt = ((HUF_DEltX4*)DTable) + 1; + + HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(U32)); /* if compilation fails here, assertion is false */ + if (memLog > HUF_ABSOLUTEMAX_TABLELOG) return ERROR(tableLog_tooLarge); + //memset(weightList, 0, sizeof(weightList)); /* is not necessary, even though some analyzer complain ... */ + + iSize = HUF_readStats(weightList, HUF_MAX_SYMBOL_VALUE + 1, rankStats, &nbSymbols, &tableLog, src, srcSize); + if (HUF_isError(iSize)) return iSize; + + /* check result */ + if (tableLog > memLog) return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */ + + /* find maxWeight */ + for (maxW = tableLog; rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */ + + /* Get start index of each weight */ + { + U32 w, nextRankStart = 0; + for (w=1; w<=maxW; w++) + { + U32 current = nextRankStart; + nextRankStart += rankStats[w]; + rankStart[w] = current; + } + rankStart[0] = nextRankStart; /* put all 0w symbols at the end of sorted list*/ + sizeOfSort = nextRankStart; + } + + /* sort symbols by weight */ + { + U32 s; + for (s=0; s> consumed; + } + + + HUF_fillDTableX4(dt, memLog, + sortedSymbol, sizeOfSort, + rankStart0, rankVal, maxW, + tableLog+1); + + return iSize; +} + + +static U32 HUF_decodeSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog) +{ + const size_t val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ + memcpy(op, dt+val, 2); + BIT_skipBits(DStream, dt[val].nbBits); + return dt[val].length; +} + +static U32 HUF_decodeLastSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog) +{ + const size_t val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ + memcpy(op, dt+val, 1); + if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits); + else + { + if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) + { + BIT_skipBits(DStream, dt[val].nbBits); + if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8)) + DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8); /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */ + } + } + return 1; +} + + +#define HUF_DECODE_SYMBOLX4_0(ptr, DStreamPtr) \ + ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) + +#define HUF_DECODE_SYMBOLX4_1(ptr, DStreamPtr) \ + if (MEM_64bits() || (HUF_MAX_TABLELOG<=12)) \ + ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) + +#define HUF_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \ + if (MEM_64bits()) \ + ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) + +static inline size_t HUF_decodeStreamX4(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, const HUF_DEltX4* const dt, const U32 dtLog) +{ + BYTE* const pStart = p; + + /* up to 8 symbols at a time */ + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd-7)) + { + HUF_DECODE_SYMBOLX4_2(p, bitDPtr); + HUF_DECODE_SYMBOLX4_1(p, bitDPtr); + HUF_DECODE_SYMBOLX4_2(p, bitDPtr); + HUF_DECODE_SYMBOLX4_0(p, bitDPtr); + } + + /* closer to the end */ + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-2)) + HUF_DECODE_SYMBOLX4_0(p, bitDPtr); + + while (p <= pEnd-2) + HUF_DECODE_SYMBOLX4_0(p, bitDPtr); /* no need to reload : reached the end of DStream */ + + if (p < pEnd) + p += HUF_decodeLastSymbolX4(p, bitDPtr, dt, dtLog); + + return p-pStart; +} + + +size_t HUF_decompress1X4_usingDTable( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const U32* DTable) +{ + const BYTE* const istart = (const BYTE*) cSrc; + BYTE* const ostart = (BYTE*) dst; + BYTE* const oend = ostart + dstSize; + + const U32 dtLog = DTable[0]; + const HUF_DEltX4* const dt = ((const HUF_DEltX4*)DTable) +1; + size_t errorCode; + + /* Init */ + BIT_DStream_t bitD; + errorCode = BIT_initDStream(&bitD, istart, cSrcSize); + if (HUF_isError(errorCode)) return errorCode; + + /* finish bitStreams one by one */ + HUF_decodeStreamX4(ostart, &bitD, oend, dt, dtLog); + + /* check */ + if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected); + + /* decoded size */ + return dstSize; +} + +size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_MAX_TABLELOG); + const BYTE* ip = (const BYTE*) cSrc; + + size_t hSize = HUF_readDTableX4 (DTable, cSrc, cSrcSize); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; + cSrcSize -= hSize; + + return HUF_decompress1X4_usingDTable (dst, dstSize, ip, cSrcSize, DTable); +} + +size_t HUF_decompress4X4_usingDTable( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const U32* DTable) +{ + if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ + + { + const BYTE* const istart = (const BYTE*) cSrc; + BYTE* const ostart = (BYTE*) dst; + BYTE* const oend = ostart + dstSize; + + const HUF_DEltX4* const dt = ((const HUF_DEltX4*)DTable) +1; + const U32 dtLog = DTable[0]; + size_t errorCode; + + /* Init */ + BIT_DStream_t bitD1; + BIT_DStream_t bitD2; + BIT_DStream_t bitD3; + BIT_DStream_t bitD4; + const size_t length1 = MEM_readLE16(istart); + const size_t length2 = MEM_readLE16(istart+2); + const size_t length3 = MEM_readLE16(istart+4); + size_t length4; + const BYTE* const istart1 = istart + 6; /* jumpTable */ + const BYTE* const istart2 = istart1 + length1; + const BYTE* const istart3 = istart2 + length2; + const BYTE* const istart4 = istart3 + length3; + const size_t segmentSize = (dstSize+3) / 4; + BYTE* const opStart2 = ostart + segmentSize; + BYTE* const opStart3 = opStart2 + segmentSize; + BYTE* const opStart4 = opStart3 + segmentSize; + BYTE* op1 = ostart; + BYTE* op2 = opStart2; + BYTE* op3 = opStart3; + BYTE* op4 = opStart4; + U32 endSignal; + + length4 = cSrcSize - (length1 + length2 + length3 + 6); + if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ + errorCode = BIT_initDStream(&bitD1, istart1, length1); + if (HUF_isError(errorCode)) return errorCode; + errorCode = BIT_initDStream(&bitD2, istart2, length2); + if (HUF_isError(errorCode)) return errorCode; + errorCode = BIT_initDStream(&bitD3, istart3, length3); + if (HUF_isError(errorCode)) return errorCode; + errorCode = BIT_initDStream(&bitD4, istart4, length4); + if (HUF_isError(errorCode)) return errorCode; + + /* 16-32 symbols per loop (4-8 symbols per stream) */ + endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); + for ( ; (endSignal==BIT_DStream_unfinished) && (op4<(oend-7)) ; ) + { + HUF_DECODE_SYMBOLX4_2(op1, &bitD1); + HUF_DECODE_SYMBOLX4_2(op2, &bitD2); + HUF_DECODE_SYMBOLX4_2(op3, &bitD3); + HUF_DECODE_SYMBOLX4_2(op4, &bitD4); + HUF_DECODE_SYMBOLX4_1(op1, &bitD1); + HUF_DECODE_SYMBOLX4_1(op2, &bitD2); + HUF_DECODE_SYMBOLX4_1(op3, &bitD3); + HUF_DECODE_SYMBOLX4_1(op4, &bitD4); + HUF_DECODE_SYMBOLX4_2(op1, &bitD1); + HUF_DECODE_SYMBOLX4_2(op2, &bitD2); + HUF_DECODE_SYMBOLX4_2(op3, &bitD3); + HUF_DECODE_SYMBOLX4_2(op4, &bitD4); + HUF_DECODE_SYMBOLX4_0(op1, &bitD1); + HUF_DECODE_SYMBOLX4_0(op2, &bitD2); + HUF_DECODE_SYMBOLX4_0(op3, &bitD3); + HUF_DECODE_SYMBOLX4_0(op4, &bitD4); + + endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); + } + + /* check corruption */ + if (op1 > opStart2) return ERROR(corruption_detected); + if (op2 > opStart3) return ERROR(corruption_detected); + if (op3 > opStart4) return ERROR(corruption_detected); + /* note : op4 supposed already verified within main loop */ + + /* finish bitStreams one by one */ + HUF_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog); + HUF_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog); + HUF_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog); + HUF_decodeStreamX4(op4, &bitD4, oend, dt, dtLog); + + /* check */ + endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); + if (!endSignal) return ERROR(corruption_detected); + + /* decoded size */ + return dstSize; + } +} + + +size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_MAX_TABLELOG); + const BYTE* ip = (const BYTE*) cSrc; + + size_t hSize = HUF_readDTableX4 (DTable, cSrc, cSrcSize); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; + cSrcSize -= hSize; + + return HUF_decompress4X4_usingDTable (dst, dstSize, ip, cSrcSize, DTable); +} + + +/**********************************/ +/* quad-symbol decoding */ +/**********************************/ +typedef struct { BYTE nbBits; BYTE nbBytes; } HUF_DDescX6; +typedef union { BYTE byte[4]; U32 sequence; } HUF_DSeqX6; + +/* recursive, up to level 3; may benefit from