From 9306feb8fabdbaa1e4c4ccee90eb3cf8848556e6 Mon Sep 17 00:00:00 2001 From: Stella Lau Date: Thu, 13 Jul 2017 13:44:48 -0700 Subject: [PATCH] [ldm] Switch to using lib/common/mem.h and move typedefs to ldm.h Summary: Test Plan: Reviewers: Subscribers: Tasks: Tags: Blame Revision: --- contrib/long_distance_matching/Makefile | 7 +- contrib/long_distance_matching/ldm.c | 82 +++++++++++------------ contrib/long_distance_matching/ldm.h | 15 +++-- contrib/long_distance_matching/main-ldm.c | 13 ++-- contrib/long_distance_matching/util.c | 68 ------------------- contrib/long_distance_matching/util.h | 27 -------- 6 files changed, 57 insertions(+), 155 deletions(-) delete mode 100644 contrib/long_distance_matching/util.c delete mode 100644 contrib/long_distance_matching/util.h diff --git a/contrib/long_distance_matching/Makefile b/contrib/long_distance_matching/Makefile index 5ffd4eafe..8ba16d03d 100644 --- a/contrib/long_distance_matching/Makefile +++ b/contrib/long_distance_matching/Makefile @@ -9,6 +9,7 @@ # This Makefile presumes libzstd is installed, using `sudo make install` +CPPFLAGS+= -I../../lib/common CFLAGS ?= -O3 DEBUGFLAGS = -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \ -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \ @@ -26,11 +27,7 @@ default: all all: main-ldm - -#main : ldm.c main.c -# $(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@ - -main-ldm : util.c ldm.c main-ldm.c +main-ldm : ldm.c main-ldm.c $(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@ clean: diff --git a/contrib/long_distance_matching/ldm.c b/contrib/long_distance_matching/ldm.c index 7a5450732..00099fbeb 100644 --- a/contrib/long_distance_matching/ldm.c +++ b/contrib/long_distance_matching/ldm.c @@ -4,23 +4,22 @@ #include #include "ldm.h" -#include "util.h" // Insert every (HASH_ONLY_EVERY + 1) into the hash table. #define HASH_ONLY_EVERY 0 -#define LDM_MEMORY_USAGE 20 +#define LDM_MEMORY_USAGE 22 #define LDM_HASHLOG (LDM_MEMORY_USAGE-2) #define LDM_HASHTABLESIZE (1 << (LDM_MEMORY_USAGE)) #define LDM_HASHTABLESIZE_U32 ((LDM_HASHTABLESIZE) >> 2) #define LDM_OFFSET_SIZE 4 -#define WINDOW_SIZE (1 << 20) +#define WINDOW_SIZE (1 << 29) //These should be multiples of four. -#define LDM_HASH_LENGTH 4 -#define MINMATCH 4 +#define LDM_HASH_LENGTH 8 +#define MINMATCH 8 #define ML_BITS 4 #define ML_MASK ((1U<= 8; lengthLeft -= 8) { - if (LDM_read64(curP) != LDM_read64(curMatch)) { + if (MEM_read64(curP) != MEM_read64(curMatch)) { return 0; } curP += 8; curMatch += 8; } if (lengthLeft > 0) { - return (LDM_read32(curP) == LDM_read32(curMatch)); + return (MEM_read32(curP) == MEM_read32(curMatch)); } return 1; } @@ -184,10 +173,9 @@ static hash_t checksumToHash(U32 sum) { * b(k,l) = \sum_{i = k}^l ((l - i + 1) * x_i) (mod M) * checksum(k,l) = a(k,l) + 2^{16} * b(k,l) */ -static U32 getChecksum(const char *data, U32 len) { +static U32 getChecksum(const BYTE *buf, U32 len) { U32 i; U32 s1, s2; - const schar *buf = (const schar *)data; s1 = s2 = 0; for (i = 0; i < (len - 4); i += 4) { @@ -215,7 +203,7 @@ static U32 getChecksum(const char *data, U32 len) { * Thus toRemove should correspond to data[0]. */ static U32 updateChecksum(U32 sum, U32 len, - schar toRemove, schar toAdd) { + BYTE toRemove, BYTE toAdd) { U32 s1 = (sum & 0xffff) - toRemove + toAdd; U32 s2 = (sum >> 16) - ((toRemove + CHECKSUM_CHAR_OFFSET) * len) + s1; @@ -244,13 +232,13 @@ static void setNextHash(LDM_CCtx *cctx) { // cctx->nextSum = getChecksum((const char *)cctx->nextIp, LDM_HASH_LENGTH); cctx->nextSum = updateChecksum( cctx->lastSum, LDM_HASH_LENGTH, - (schar)((cctx->lastPosHashed)[0]), - (schar)((cctx->lastPosHashed)[LDM_HASH_LENGTH])); + (cctx->lastPosHashed)[0], + (cctx->lastPosHashed)[LDM_HASH_LENGTH]); cctx->nextPosHashed = cctx->nextIp; cctx->nextHash = checksumToHash(cctx->nextSum); #ifdef RUN_CHECKS - check = getChecksum((const char *)cctx->nextIp, LDM_HASH_LENGTH); + check = getChecksum(cctx->nextIp, LDM_HASH_LENGTH); if (check != cctx->nextSum) { printf("CHECK: setNextHash failed %u %u\n", check, cctx->nextSum); @@ -279,7 +267,8 @@ static void putHashOfCurrentPositionFromHash( // Hash only every HASH_ONLY_EVERY times, based on cctx->ip. // Note: this works only when cctx->step is 1. if (((cctx->ip - cctx->ibase) & HASH_ONLY_EVERY) == HASH_ONLY_EVERY) { - (cctx->hashTable)[hash] = (hashEntry){ (offset_t)(cctx->ip - cctx->ibase) }; + (cctx->hashTable)[hash] = + (LDM_hashEntry){ (offset_t)(cctx->ip - cctx->ibase) }; } cctx->lastPosHashed = cctx->ip; @@ -307,7 +296,7 @@ static void LDM_updateLastHashFromNextHash(LDM_CCtx *cctx) { * Insert hash of the current position into the hash table. */ static void LDM_putHashOfCurrentPosition(LDM_CCtx *cctx) { - U32 sum = getChecksum((const char *)cctx->ip, LDM_HASH_LENGTH); + U32 sum = getChecksum(cctx->ip, LDM_HASH_LENGTH); hash_t hash = checksumToHash(sum); #ifdef RUN_CHECKS @@ -337,7 +326,7 @@ static unsigned countMatchLength(const BYTE *pIn, const BYTE *pMatch, const BYTE *pInLimit) { const BYTE * const pStart = pIn; while (pIn < pInLimit - 1) { - BYTE const diff = LDM_readByte(pMatch) ^ LDM_readByte(pIn); + BYTE const diff = (*pMatch) ^ *(pIn); if (!diff) { pIn++; pMatch++; @@ -427,9 +416,9 @@ static int LDM_findBestMatch(LDM_CCtx *cctx, const BYTE **match) { * Update input pointer, inserting hashes into hash table along the way. */ static void outputBlock(LDM_CCtx *cctx, - unsigned const literalLength, - unsigned const offset, - unsigned const matchLength) { + const unsigned literalLength, + const unsigned offset, + const unsigned matchLength) { BYTE *token = cctx->op++; /* Encode the literal length. */ @@ -449,7 +438,7 @@ static void outputBlock(LDM_CCtx *cctx, cctx->op += literalLength; /* Encode the offset. */ - LDM_write32(cctx->op, offset); + MEM_write32(cctx->op, offset); cctx->op += LDM_OFFSET_SIZE; /* Encode the match length. */ @@ -457,10 +446,10 @@ static void outputBlock(LDM_CCtx *cctx, unsigned matchLengthRemaining = matchLength; *token += ML_MASK; matchLengthRemaining -= ML_MASK; - LDM_write32(cctx->op, 0xFFFFFFFF); + MEM_write32(cctx->op, 0xFFFFFFFF); while (matchLengthRemaining >= 4*0xFF) { cctx->op += 4; - LDM_write32(cctx->op, 0xffffffff); + MEM_write32(cctx->op, 0xffffffff); matchLengthRemaining -= 4*0xFF; } cctx->op += matchLengthRemaining / 255; @@ -514,9 +503,9 @@ size_t LDM_compress(const void *src, size_t srcSize, * length) and update pointers and hashes. */ { - unsigned const literalLength = (unsigned)(cctx.ip - cctx.anchor); - unsigned const offset = cctx.ip - match; - unsigned const matchLength = countMatchLength( + const unsigned literalLength = (unsigned)(cctx.ip - cctx.anchor); + const unsigned offset = cctx.ip - match; + const unsigned matchLength = countMatchLength( cctx.ip + MINMATCH, match + MINMATCH, cctx.ihashLimit); #ifdef COMPUTE_STATS @@ -605,7 +594,7 @@ size_t LDM_decompress(const void *src, size_t compressSize, size_t length, offset; /* Get the literal length. */ - unsigned const token = *(dctx.ip)++; + const unsigned token = *(dctx.ip)++; if ((length = (token >> ML_BITS)) == RUN_MASK) { unsigned s; do { @@ -621,7 +610,7 @@ size_t LDM_decompress(const void *src, size_t compressSize, dctx.op = cpy; //TODO : dynamic offset size - offset = LDM_read32(dctx.ip); + offset = MEM_read32(dctx.ip); dctx.ip += LDM_OFFSET_SIZE; match = dctx.op - offset; @@ -647,6 +636,11 @@ size_t LDM_decompress(const void *src, size_t compressSize, return dctx.op - (BYTE *)dst; } +// TODO: implement and test hash function +void LDM_test(void) { + +} + /* void LDM_test(const void *src, size_t srcSize, void *dst, size_t maxDstSize) { diff --git a/contrib/long_distance_matching/ldm.h b/contrib/long_distance_matching/ldm.h index f04b6e958..fd8c2ab88 100644 --- a/contrib/long_distance_matching/ldm.h +++ b/contrib/long_distance_matching/ldm.h @@ -3,10 +3,18 @@ #include /* size_t */ +#include "mem.h" // from /lib/common/mem.h + #define LDM_COMPRESS_SIZE 8 #define LDM_DECOMPRESS_SIZE 8 #define LDM_HEADER_SIZE ((LDM_COMPRESS_SIZE)+(LDM_DECOMPRESS_SIZE)) +typedef U32 offset_t; +typedef U32 hash_t; +typedef struct LDM_hashEntry LDM_hashEntry; +typedef struct LDM_compressStats LDM_compressStats; +typedef struct LDM_CCtx LDM_CCtx; + /** * Compresses src into dst. * @@ -46,10 +54,9 @@ size_t LDM_decompress(const void *src, size_t srcSize, * * NB: LDM_compress and LDM_decompress currently do not add/read headers. */ -void LDM_readHeader(const void *src, size_t *compressSize, - size_t *decompressSize); +void LDM_readHeader(const void *src, U64 *compressSize, + U64 *decompressSize); -void LDM_test(const void *src, size_t srcSize, - void *dst, size_t maxDstSize); +void LDM_test(void); #endif /* LDM_H */ diff --git a/contrib/long_distance_matching/main-ldm.c b/contrib/long_distance_matching/main-ldm.c index 9e7d45264..2017cf4ef 100644 --- a/contrib/long_distance_matching/main-ldm.c +++ b/contrib/long_distance_matching/main-ldm.c @@ -75,12 +75,6 @@ static int compress(const char *fname, const char *oname) { return 1; } -/* -#ifdef TEST - LDM_test(src, statbuf.st_size, - dst + LDM_HEADER_SIZE, statbuf.st_size); -#endif -*/ compressSize = LDM_HEADER_SIZE + LDM_compress(src, statbuf.st_size, dst + LDM_HEADER_SIZE, maxCompressSize); @@ -116,7 +110,8 @@ static int decompress(const char *fname, const char *oname) { int fdin, fdout; struct stat statbuf; char *src, *dst; - size_t compressSize, decompressSize, outSize; + U64 compressSize, decompressSize; + size_t outSize; /* Open the input file. */ if ((fdin = open(fname, O_RDONLY)) < 0) { @@ -267,5 +262,9 @@ int main(int argc, const char *argv[]) { } /* verify */ verify(inpFilename, decFilename); + +#ifdef TEST + LDM_test(); +#endif return 0; } diff --git a/contrib/long_distance_matching/util.c b/contrib/long_distance_matching/util.c deleted file mode 100644 index 47ac8a126..000000000 --- a/contrib/long_distance_matching/util.c +++ /dev/null @@ -1,68 +0,0 @@ -#include -#include -#include -#include - -#include "util.h" - -typedef uint8_t BYTE; -typedef uint16_t U16; -typedef uint32_t U32; -typedef int32_t S32; -typedef uint64_t U64; - -unsigned LDM_isLittleEndian(void) { - const union { U32 u; BYTE c[4]; } one = { 1 }; - return one.c[0]; -} - -U16 LDM_read16(const void *memPtr) { - U16 val; - memcpy(&val, memPtr, sizeof(val)); - return val; -} - -U16 LDM_readLE16(const void *memPtr) { - if (LDM_isLittleEndian()) { - return LDM_read16(memPtr); - } else { - const BYTE *p = (const BYTE *)memPtr; - return (U16)((U16)p[0] + (p[1] << 8)); - } -} - -void LDM_write16(void *memPtr, U16 value){ - memcpy(memPtr, &value, sizeof(value)); -} - -void LDM_write32(void *memPtr, U32 value) { - memcpy(memPtr, &value, sizeof(value)); -} - -void LDM_writeLE16(void *memPtr, U16 value) { - if (LDM_isLittleEndian()) { - LDM_write16(memPtr, value); - } else { - BYTE* p = (BYTE *)memPtr; - p[0] = (BYTE) value; - p[1] = (BYTE)(value>>8); - } -} - -U32 LDM_read32(const void *ptr) { - return *(const U32 *)ptr; -} - -U64 LDM_read64(const void *ptr) { - return *(const U64 *)ptr; -} - -void LDM_copy8(void *dst, const void *src) { - memcpy(dst, src, 8); -} - -BYTE LDM_readByte(const void *memPtr) { - BYTE val; - memcpy(&val, memPtr, 1); - return val; -} diff --git a/contrib/long_distance_matching/util.h b/contrib/long_distance_matching/util.h deleted file mode 100644 index dbf55cbc2..000000000 --- a/contrib/long_distance_matching/util.h +++ /dev/null @@ -1,27 +0,0 @@ -#ifndef LDM_UTIL_H -#define LDM_UTIL_H - -unsigned LDM_isLittleEndian(void); - -uint16_t LDM_read16(const void *memPtr); - -uint16_t LDM_readLE16(const void *memPtr); - -void LDM_write16(void *memPtr, uint16_t value); - -void LDM_write32(void *memPtr, uint32_t value); - -void LDM_writeLE16(void *memPtr, uint16_t value); - -uint32_t LDM_read32(const void *ptr); - -uint64_t LDM_read64(const void *ptr); - -void LDM_copy8(void *dst, const void *src); - -uint8_t LDM_readByte(const void *ptr); - -void LDM_write64(void *memPtr, uint64_t value); - - -#endif /* LDM_UTIL_H */