mirror of
https://github.com/facebook/zstd.git
synced 2025-08-07 06:23:00 +03:00
Merge pull request #4170 from facebook/dict_cSpeed
Improve dictionary compression speed
This commit is contained in:
@@ -140,7 +140,6 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
|
|||||||
U32 idxl1; /* the long match index for ip1 */
|
U32 idxl1; /* the long match index for ip1 */
|
||||||
|
|
||||||
const BYTE* matchl0; /* the long match for ip */
|
const BYTE* matchl0; /* the long match for ip */
|
||||||
const BYTE* matchl0_safe; /* matchl0 or safe address */
|
|
||||||
const BYTE* matchs0; /* the short match for ip */
|
const BYTE* matchs0; /* the short match for ip */
|
||||||
const BYTE* matchl1; /* the long match for ip1 */
|
const BYTE* matchl1; /* the long match for ip1 */
|
||||||
const BYTE* matchs0_safe; /* matchs0 or safe address */
|
const BYTE* matchs0_safe; /* matchs0 or safe address */
|
||||||
@@ -201,7 +200,7 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
|
|||||||
* However expression below complies into conditional move. Since
|
* However expression below complies into conditional move. Since
|
||||||
* match is unlikely and we only *branch* on idxl0 > prefixLowestIndex
|
* match is unlikely and we only *branch* on idxl0 > prefixLowestIndex
|
||||||
* if there is a match, all branches become predictable. */
|
* if there is a match, all branches become predictable. */
|
||||||
matchl0_safe = ZSTD_selectAddr(idxl0, prefixLowestIndex, matchl0, &dummy[0]);
|
{ const BYTE* const matchl0_safe = ZSTD_selectAddr(idxl0, prefixLowestIndex, matchl0, &dummy[0]);
|
||||||
|
|
||||||
/* check prefix long match */
|
/* check prefix long match */
|
||||||
if (MEM_read64(matchl0_safe) == MEM_read64(ip) && matchl0_safe == matchl0) {
|
if (MEM_read64(matchl0_safe) == MEM_read64(ip) && matchl0_safe == matchl0) {
|
||||||
@@ -209,7 +208,7 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
|
|||||||
offset = (U32)(ip-matchl0);
|
offset = (U32)(ip-matchl0);
|
||||||
while (((ip>anchor) & (matchl0>prefixLowest)) && (ip[-1] == matchl0[-1])) { ip--; matchl0--; mLength++; } /* catch up */
|
while (((ip>anchor) & (matchl0>prefixLowest)) && (ip[-1] == matchl0[-1])) { ip--; matchl0--; mLength++; } /* catch up */
|
||||||
goto _match_found;
|
goto _match_found;
|
||||||
}
|
} }
|
||||||
|
|
||||||
idxl1 = hashLong[hl1];
|
idxl1 = hashLong[hl1];
|
||||||
matchl1 = base + idxl1;
|
matchl1 = base + idxl1;
|
||||||
@@ -412,14 +411,12 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
|
|||||||
goto _match_stored;
|
goto _match_stored;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (matchIndexL > prefixLowestIndex) {
|
if ((matchIndexL >= prefixLowestIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
|
||||||
/* check prefix long match */
|
/* check prefix long match */
|
||||||
if (MEM_read64(matchLong) == MEM_read64(ip)) {
|
|
||||||
mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8;
|
mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8;
|
||||||
offset = (U32)(ip-matchLong);
|
offset = (U32)(ip-matchLong);
|
||||||
while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
|
while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
|
||||||
goto _match_found;
|
goto _match_found;
|
||||||
}
|
|
||||||
} else if (dictTagsMatchL) {
|
} else if (dictTagsMatchL) {
|
||||||
/* check dictMatchState long match */
|
/* check dictMatchState long match */
|
||||||
U32 const dictMatchIndexL = dictMatchIndexAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS;
|
U32 const dictMatchIndexL = dictMatchIndexAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS;
|
||||||
@@ -434,7 +431,7 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
|
|||||||
} }
|
} }
|
||||||
|
|
||||||
if (matchIndexS > prefixLowestIndex) {
|
if (matchIndexS > prefixLowestIndex) {
|
||||||
/* check prefix short match */
|
/* short match candidate */
|
||||||
if (MEM_read32(match) == MEM_read32(ip)) {
|
if (MEM_read32(match) == MEM_read32(ip)) {
|
||||||
goto _search_next_long;
|
goto _search_next_long;
|
||||||
}
|
}
|
||||||
@@ -464,14 +461,12 @@ _search_next_long:
|
|||||||
hashLong[hl3] = curr + 1;
|
hashLong[hl3] = curr + 1;
|
||||||
|
|
||||||
/* check prefix long +1 match */
|
/* check prefix long +1 match */
|
||||||
if (matchIndexL3 > prefixLowestIndex) {
|
if ((matchIndexL3 >= prefixLowestIndex) && (MEM_read64(matchL3) == MEM_read64(ip+1))) {
|
||||||
if (MEM_read64(matchL3) == MEM_read64(ip+1)) {
|
|
||||||
mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8;
|
mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8;
|
||||||
ip++;
|
ip++;
|
||||||
offset = (U32)(ip-matchL3);
|
offset = (U32)(ip-matchL3);
|
||||||
while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */
|
while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */
|
||||||
goto _match_found;
|
goto _match_found;
|
||||||
}
|
|
||||||
} else if (dictTagsMatchL3) {
|
} else if (dictTagsMatchL3) {
|
||||||
/* check dict long +1 match */
|
/* check dict long +1 match */
|
||||||
U32 const dictMatchIndexL3 = dictMatchIndexAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS;
|
U32 const dictMatchIndexL3 = dictMatchIndexAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS;
|
||||||
|
@@ -597,8 +597,8 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (matchIndex > prefixStartIndex && MEM_read32(match) == MEM_read32(ip0)) {
|
if (ZSTD_match4Found_cmov(ip0, match, matchIndex, prefixStartIndex)) {
|
||||||
/* found a regular match */
|
/* found a regular match of size >= 4 */
|
||||||
U32 const offset = (U32) (ip0 - match);
|
U32 const offset = (U32) (ip0 - match);
|
||||||
mLength = ZSTD_count(ip0 + 4, match + 4, iend) + 4;
|
mLength = ZSTD_count(ip0 + 4, match + 4, iend) + 4;
|
||||||
while (((ip0 > anchor) & (match > prefixStart))
|
while (((ip0 > anchor) & (match > prefixStart))
|
||||||
|
@@ -2450,7 +2450,7 @@ static int basicUnitTests(U32 const seed, double compressibility)
|
|||||||
3742, 3675, 3674, 3665, 3664,
|
3742, 3675, 3674, 3665, 3664,
|
||||||
3663, 3662, 3661, 3660, 3660,
|
3663, 3662, 3661, 3660, 3660,
|
||||||
3660, 3660, 3660 };
|
3660, 3660, 3660 };
|
||||||
size_t const target_wdict_cSize[22+1] = { 2830, 2896, 2893, 2820, 2940,
|
size_t const target_wdict_cSize[22+1] = { 2830, 2896, 2893, 2840, 2950,
|
||||||
2950, 2950, 2925, 2900, 2892,
|
2950, 2950, 2925, 2900, 2892,
|
||||||
2910, 2910, 2910, 2780, 2775,
|
2910, 2910, 2910, 2780, 2775,
|
||||||
2765, 2760, 2755, 2754, 2753,
|
2765, 2760, 2755, 2754, 2753,
|
||||||
|
Reference in New Issue
Block a user