1
0
mirror of https://github.com/facebook/zstd.git synced 2025-08-08 17:22:10 +03:00

[ldm] Switch to using lib/common/mem.h and move typedefs to ldm.h

Summary:

Test Plan:

Reviewers:

Subscribers:

Tasks:

Tags:

Blame Revision:
This commit is contained in:
Stella Lau
2017-07-13 13:44:48 -07:00
parent 50421d9474
commit 9306feb8fa
6 changed files with 57 additions and 155 deletions

View File

@@ -9,6 +9,7 @@
# This Makefile presumes libzstd is installed, using `sudo make install` # This Makefile presumes libzstd is installed, using `sudo make install`
CPPFLAGS+= -I../../lib/common
CFLAGS ?= -O3 CFLAGS ?= -O3
DEBUGFLAGS = -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \ DEBUGFLAGS = -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
-Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \ -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \
@@ -26,11 +27,7 @@ default: all
all: main-ldm all: main-ldm
main-ldm : ldm.c main-ldm.c
#main : ldm.c main.c
# $(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@
main-ldm : util.c ldm.c main-ldm.c
$(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@ $(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@
clean: clean:

View File

@@ -4,23 +4,22 @@
#include <stdio.h> #include <stdio.h>
#include "ldm.h" #include "ldm.h"
#include "util.h"
// Insert every (HASH_ONLY_EVERY + 1) into the hash table. // Insert every (HASH_ONLY_EVERY + 1) into the hash table.
#define HASH_ONLY_EVERY 0 #define HASH_ONLY_EVERY 0
#define LDM_MEMORY_USAGE 20 #define LDM_MEMORY_USAGE 22
#define LDM_HASHLOG (LDM_MEMORY_USAGE-2) #define LDM_HASHLOG (LDM_MEMORY_USAGE-2)
#define LDM_HASHTABLESIZE (1 << (LDM_MEMORY_USAGE)) #define LDM_HASHTABLESIZE (1 << (LDM_MEMORY_USAGE))
#define LDM_HASHTABLESIZE_U32 ((LDM_HASHTABLESIZE) >> 2) #define LDM_HASHTABLESIZE_U32 ((LDM_HASHTABLESIZE) >> 2)
#define LDM_OFFSET_SIZE 4 #define LDM_OFFSET_SIZE 4
#define WINDOW_SIZE (1 << 20) #define WINDOW_SIZE (1 << 29)
//These should be multiples of four. //These should be multiples of four.
#define LDM_HASH_LENGTH 4 #define LDM_HASH_LENGTH 8
#define MINMATCH 4 #define MINMATCH 8
#define ML_BITS 4 #define ML_BITS 4
#define ML_MASK ((1U<<ML_BITS)-1) #define ML_MASK ((1U<<ML_BITS)-1)
@@ -32,21 +31,11 @@
//#define RUN_CHECKS //#define RUN_CHECKS
//#define LDM_DEBUG //#define LDM_DEBUG
typedef uint8_t BYTE; struct LDM_hashEntry {
typedef uint16_t U16;
typedef uint32_t U32;
typedef int32_t S32;
typedef uint64_t U64;
typedef uint32_t offset_t;
typedef uint32_t hash_t;
typedef signed char schar;
typedef struct hashEntry {
offset_t offset; offset_t offset;
} hashEntry; };
typedef struct LDM_compressStats { struct LDM_compressStats {
U32 numMatches; U32 numMatches;
U32 totalMatchLength; U32 totalMatchLength;
U32 totalLiteralLength; U32 totalLiteralLength;
@@ -54,9 +43,9 @@ typedef struct LDM_compressStats {
U32 numCollisions; U32 numCollisions;
U32 numHashInserts; U32 numHashInserts;
} LDM_compressStats; };
typedef struct LDM_CCtx { struct LDM_CCtx {
size_t isize; /* Input size */ size_t isize; /* Input size */
size_t maxOSize; /* Maximum output size */ size_t maxOSize; /* Maximum output size */
@@ -79,7 +68,7 @@ typedef struct LDM_CCtx {
LDM_compressStats stats; /* Compression statistics */ LDM_compressStats stats; /* Compression statistics */
hashEntry hashTable[LDM_HASHTABLESIZE_U32]; LDM_hashEntry hashTable[LDM_HASHTABLESIZE_U32];
const BYTE *lastPosHashed; /* Last position hashed */ const BYTE *lastPosHashed; /* Last position hashed */
hash_t lastHash; /* Hash corresponding to lastPosHashed */ hash_t lastHash; /* Hash corresponding to lastPosHashed */
@@ -94,7 +83,7 @@ typedef struct LDM_CCtx {
// DEBUG // DEBUG
const BYTE *DEBUG_setNextHash; const BYTE *DEBUG_setNextHash;
} LDM_CCtx; };
/** /**
* Outputs compression statistics. * Outputs compression statistics.
@@ -157,14 +146,14 @@ static int LDM_isValidMatch(const BYTE *p, const BYTE *match) {
const BYTE *curMatch = match; const BYTE *curMatch = match;
for (; lengthLeft >= 8; lengthLeft -= 8) { for (; lengthLeft >= 8; lengthLeft -= 8) {
if (LDM_read64(curP) != LDM_read64(curMatch)) { if (MEM_read64(curP) != MEM_read64(curMatch)) {
return 0; return 0;
} }
curP += 8; curP += 8;
curMatch += 8; curMatch += 8;
} }
if (lengthLeft > 0) { if (lengthLeft > 0) {
return (LDM_read32(curP) == LDM_read32(curMatch)); return (MEM_read32(curP) == MEM_read32(curMatch));
} }
return 1; return 1;
} }
@@ -184,10 +173,9 @@ static hash_t checksumToHash(U32 sum) {
* b(k,l) = \sum_{i = k}^l ((l - i + 1) * x_i) (mod M) * b(k,l) = \sum_{i = k}^l ((l - i + 1) * x_i) (mod M)
* checksum(k,l) = a(k,l) + 2^{16} * b(k,l) * checksum(k,l) = a(k,l) + 2^{16} * b(k,l)
*/ */
static U32 getChecksum(const char *data, U32 len) { static U32 getChecksum(const BYTE *buf, U32 len) {
U32 i; U32 i;
U32 s1, s2; U32 s1, s2;
const schar *buf = (const schar *)data;
s1 = s2 = 0; s1 = s2 = 0;
for (i = 0; i < (len - 4); i += 4) { for (i = 0; i < (len - 4); i += 4) {
@@ -215,7 +203,7 @@ static U32 getChecksum(const char *data, U32 len) {
* Thus toRemove should correspond to data[0]. * Thus toRemove should correspond to data[0].
*/ */
static U32 updateChecksum(U32 sum, U32 len, static U32 updateChecksum(U32 sum, U32 len,
schar toRemove, schar toAdd) { BYTE toRemove, BYTE toAdd) {
U32 s1 = (sum & 0xffff) - toRemove + toAdd; U32 s1 = (sum & 0xffff) - toRemove + toAdd;
U32 s2 = (sum >> 16) - ((toRemove + CHECKSUM_CHAR_OFFSET) * len) + s1; U32 s2 = (sum >> 16) - ((toRemove + CHECKSUM_CHAR_OFFSET) * len) + s1;
@@ -244,13 +232,13 @@ static void setNextHash(LDM_CCtx *cctx) {
// cctx->nextSum = getChecksum((const char *)cctx->nextIp, LDM_HASH_LENGTH); // cctx->nextSum = getChecksum((const char *)cctx->nextIp, LDM_HASH_LENGTH);
cctx->nextSum = updateChecksum( cctx->nextSum = updateChecksum(
cctx->lastSum, LDM_HASH_LENGTH, cctx->lastSum, LDM_HASH_LENGTH,
(schar)((cctx->lastPosHashed)[0]), (cctx->lastPosHashed)[0],
(schar)((cctx->lastPosHashed)[LDM_HASH_LENGTH])); (cctx->lastPosHashed)[LDM_HASH_LENGTH]);
cctx->nextPosHashed = cctx->nextIp; cctx->nextPosHashed = cctx->nextIp;
cctx->nextHash = checksumToHash(cctx->nextSum); cctx->nextHash = checksumToHash(cctx->nextSum);
#ifdef RUN_CHECKS #ifdef RUN_CHECKS
check = getChecksum((const char *)cctx->nextIp, LDM_HASH_LENGTH); check = getChecksum(cctx->nextIp, LDM_HASH_LENGTH);
if (check != cctx->nextSum) { if (check != cctx->nextSum) {
printf("CHECK: setNextHash failed %u %u\n", check, cctx->nextSum); printf("CHECK: setNextHash failed %u %u\n", check, cctx->nextSum);
@@ -279,7 +267,8 @@ static void putHashOfCurrentPositionFromHash(
// Hash only every HASH_ONLY_EVERY times, based on cctx->ip. // Hash only every HASH_ONLY_EVERY times, based on cctx->ip.
// Note: this works only when cctx->step is 1. // Note: this works only when cctx->step is 1.
if (((cctx->ip - cctx->ibase) & HASH_ONLY_EVERY) == HASH_ONLY_EVERY) { if (((cctx->ip - cctx->ibase) & HASH_ONLY_EVERY) == HASH_ONLY_EVERY) {
(cctx->hashTable)[hash] = (hashEntry){ (offset_t)(cctx->ip - cctx->ibase) }; (cctx->hashTable)[hash] =
(LDM_hashEntry){ (offset_t)(cctx->ip - cctx->ibase) };
} }
cctx->lastPosHashed = cctx->ip; cctx->lastPosHashed = cctx->ip;
@@ -307,7 +296,7 @@ static void LDM_updateLastHashFromNextHash(LDM_CCtx *cctx) {
* Insert hash of the current position into the hash table. * Insert hash of the current position into the hash table.
*/ */
static void LDM_putHashOfCurrentPosition(LDM_CCtx *cctx) { static void LDM_putHashOfCurrentPosition(LDM_CCtx *cctx) {
U32 sum = getChecksum((const char *)cctx->ip, LDM_HASH_LENGTH); U32 sum = getChecksum(cctx->ip, LDM_HASH_LENGTH);
hash_t hash = checksumToHash(sum); hash_t hash = checksumToHash(sum);
#ifdef RUN_CHECKS #ifdef RUN_CHECKS
@@ -337,7 +326,7 @@ static unsigned countMatchLength(const BYTE *pIn, const BYTE *pMatch,
const BYTE *pInLimit) { const BYTE *pInLimit) {
const BYTE * const pStart = pIn; const BYTE * const pStart = pIn;
while (pIn < pInLimit - 1) { while (pIn < pInLimit - 1) {
BYTE const diff = LDM_readByte(pMatch) ^ LDM_readByte(pIn); BYTE const diff = (*pMatch) ^ *(pIn);
if (!diff) { if (!diff) {
pIn++; pIn++;
pMatch++; pMatch++;
@@ -427,9 +416,9 @@ static int LDM_findBestMatch(LDM_CCtx *cctx, const BYTE **match) {
* Update input pointer, inserting hashes into hash table along the way. * Update input pointer, inserting hashes into hash table along the way.
*/ */
static void outputBlock(LDM_CCtx *cctx, static void outputBlock(LDM_CCtx *cctx,
unsigned const literalLength, const unsigned literalLength,
unsigned const offset, const unsigned offset,
unsigned const matchLength) { const unsigned matchLength) {
BYTE *token = cctx->op++; BYTE *token = cctx->op++;
/* Encode the literal length. */ /* Encode the literal length. */
@@ -449,7 +438,7 @@ static void outputBlock(LDM_CCtx *cctx,
cctx->op += literalLength; cctx->op += literalLength;
/* Encode the offset. */ /* Encode the offset. */
LDM_write32(cctx->op, offset); MEM_write32(cctx->op, offset);
cctx->op += LDM_OFFSET_SIZE; cctx->op += LDM_OFFSET_SIZE;
/* Encode the match length. */ /* Encode the match length. */
@@ -457,10 +446,10 @@ static void outputBlock(LDM_CCtx *cctx,
unsigned matchLengthRemaining = matchLength; unsigned matchLengthRemaining = matchLength;
*token += ML_MASK; *token += ML_MASK;
matchLengthRemaining -= ML_MASK; matchLengthRemaining -= ML_MASK;
LDM_write32(cctx->op, 0xFFFFFFFF); MEM_write32(cctx->op, 0xFFFFFFFF);
while (matchLengthRemaining >= 4*0xFF) { while (matchLengthRemaining >= 4*0xFF) {
cctx->op += 4; cctx->op += 4;
LDM_write32(cctx->op, 0xffffffff); MEM_write32(cctx->op, 0xffffffff);
matchLengthRemaining -= 4*0xFF; matchLengthRemaining -= 4*0xFF;
} }
cctx->op += matchLengthRemaining / 255; cctx->op += matchLengthRemaining / 255;
@@ -514,9 +503,9 @@ size_t LDM_compress(const void *src, size_t srcSize,
* length) and update pointers and hashes. * length) and update pointers and hashes.
*/ */
{ {
unsigned const literalLength = (unsigned)(cctx.ip - cctx.anchor); const unsigned literalLength = (unsigned)(cctx.ip - cctx.anchor);
unsigned const offset = cctx.ip - match; const unsigned offset = cctx.ip - match;
unsigned const matchLength = countMatchLength( const unsigned matchLength = countMatchLength(
cctx.ip + MINMATCH, match + MINMATCH, cctx.ihashLimit); cctx.ip + MINMATCH, match + MINMATCH, cctx.ihashLimit);
#ifdef COMPUTE_STATS #ifdef COMPUTE_STATS
@@ -605,7 +594,7 @@ size_t LDM_decompress(const void *src, size_t compressSize,
size_t length, offset; size_t length, offset;
/* Get the literal length. */ /* Get the literal length. */
unsigned const token = *(dctx.ip)++; const unsigned token = *(dctx.ip)++;
if ((length = (token >> ML_BITS)) == RUN_MASK) { if ((length = (token >> ML_BITS)) == RUN_MASK) {
unsigned s; unsigned s;
do { do {
@@ -621,7 +610,7 @@ size_t LDM_decompress(const void *src, size_t compressSize,
dctx.op = cpy; dctx.op = cpy;
//TODO : dynamic offset size //TODO : dynamic offset size
offset = LDM_read32(dctx.ip); offset = MEM_read32(dctx.ip);
dctx.ip += LDM_OFFSET_SIZE; dctx.ip += LDM_OFFSET_SIZE;
match = dctx.op - offset; match = dctx.op - offset;
@@ -647,6 +636,11 @@ size_t LDM_decompress(const void *src, size_t compressSize,
return dctx.op - (BYTE *)dst; return dctx.op - (BYTE *)dst;
} }
// TODO: implement and test hash function
void LDM_test(void) {
}
/* /*
void LDM_test(const void *src, size_t srcSize, void LDM_test(const void *src, size_t srcSize,
void *dst, size_t maxDstSize) { void *dst, size_t maxDstSize) {

View File

@@ -3,10 +3,18 @@
#include <stddef.h> /* size_t */ #include <stddef.h> /* size_t */
#include "mem.h" // from /lib/common/mem.h
#define LDM_COMPRESS_SIZE 8 #define LDM_COMPRESS_SIZE 8
#define LDM_DECOMPRESS_SIZE 8 #define LDM_DECOMPRESS_SIZE 8
#define LDM_HEADER_SIZE ((LDM_COMPRESS_SIZE)+(LDM_DECOMPRESS_SIZE)) #define LDM_HEADER_SIZE ((LDM_COMPRESS_SIZE)+(LDM_DECOMPRESS_SIZE))
typedef U32 offset_t;
typedef U32 hash_t;
typedef struct LDM_hashEntry LDM_hashEntry;
typedef struct LDM_compressStats LDM_compressStats;
typedef struct LDM_CCtx LDM_CCtx;
/** /**
* Compresses src into dst. * Compresses src into dst.
* *
@@ -46,10 +54,9 @@ size_t LDM_decompress(const void *src, size_t srcSize,
* *
* NB: LDM_compress and LDM_decompress currently do not add/read headers. * NB: LDM_compress and LDM_decompress currently do not add/read headers.
*/ */
void LDM_readHeader(const void *src, size_t *compressSize, void LDM_readHeader(const void *src, U64 *compressSize,
size_t *decompressSize); U64 *decompressSize);
void LDM_test(const void *src, size_t srcSize, void LDM_test(void);
void *dst, size_t maxDstSize);
#endif /* LDM_H */ #endif /* LDM_H */

View File

@@ -75,12 +75,6 @@ static int compress(const char *fname, const char *oname) {
return 1; return 1;
} }
/*
#ifdef TEST
LDM_test(src, statbuf.st_size,
dst + LDM_HEADER_SIZE, statbuf.st_size);
#endif
*/
compressSize = LDM_HEADER_SIZE + compressSize = LDM_HEADER_SIZE +
LDM_compress(src, statbuf.st_size, LDM_compress(src, statbuf.st_size,
dst + LDM_HEADER_SIZE, maxCompressSize); dst + LDM_HEADER_SIZE, maxCompressSize);
@@ -116,7 +110,8 @@ static int decompress(const char *fname, const char *oname) {
int fdin, fdout; int fdin, fdout;
struct stat statbuf; struct stat statbuf;
char *src, *dst; char *src, *dst;
size_t compressSize, decompressSize, outSize; U64 compressSize, decompressSize;
size_t outSize;
/* Open the input file. */ /* Open the input file. */
if ((fdin = open(fname, O_RDONLY)) < 0) { if ((fdin = open(fname, O_RDONLY)) < 0) {
@@ -267,5 +262,9 @@ int main(int argc, const char *argv[]) {
} }
/* verify */ /* verify */
verify(inpFilename, decFilename); verify(inpFilename, decFilename);
#ifdef TEST
LDM_test();
#endif
return 0; return 0;
} }

View File

@@ -1,68 +0,0 @@
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <stdio.h>
#include "util.h"
typedef uint8_t BYTE;
typedef uint16_t U16;
typedef uint32_t U32;
typedef int32_t S32;
typedef uint64_t U64;
unsigned LDM_isLittleEndian(void) {
const union { U32 u; BYTE c[4]; } one = { 1 };
return one.c[0];
}
U16 LDM_read16(const void *memPtr) {
U16 val;
memcpy(&val, memPtr, sizeof(val));
return val;
}
U16 LDM_readLE16(const void *memPtr) {
if (LDM_isLittleEndian()) {
return LDM_read16(memPtr);
} else {
const BYTE *p = (const BYTE *)memPtr;
return (U16)((U16)p[0] + (p[1] << 8));
}
}
void LDM_write16(void *memPtr, U16 value){
memcpy(memPtr, &value, sizeof(value));
}
void LDM_write32(void *memPtr, U32 value) {
memcpy(memPtr, &value, sizeof(value));
}
void LDM_writeLE16(void *memPtr, U16 value) {
if (LDM_isLittleEndian()) {
LDM_write16(memPtr, value);
} else {
BYTE* p = (BYTE *)memPtr;
p[0] = (BYTE) value;
p[1] = (BYTE)(value>>8);
}
}
U32 LDM_read32(const void *ptr) {
return *(const U32 *)ptr;
}
U64 LDM_read64(const void *ptr) {
return *(const U64 *)ptr;
}
void LDM_copy8(void *dst, const void *src) {
memcpy(dst, src, 8);
}
BYTE LDM_readByte(const void *memPtr) {
BYTE val;
memcpy(&val, memPtr, 1);
return val;
}

View File

@@ -1,27 +0,0 @@
#ifndef LDM_UTIL_H
#define LDM_UTIL_H
unsigned LDM_isLittleEndian(void);
uint16_t LDM_read16(const void *memPtr);
uint16_t LDM_readLE16(const void *memPtr);
void LDM_write16(void *memPtr, uint16_t value);
void LDM_write32(void *memPtr, uint32_t value);
void LDM_writeLE16(void *memPtr, uint16_t value);
uint32_t LDM_read32(const void *ptr);
uint64_t LDM_read64(const void *ptr);
void LDM_copy8(void *dst, const void *src);
uint8_t LDM_readByte(const void *ptr);
void LDM_write64(void *memPtr, uint64_t value);
#endif /* LDM_UTIL_H */