mirror of
https://github.com/facebook/zstd.git
synced 2025-07-29 11:21:22 +03:00
increase vocabulary size
makes compression a bit less good, hence a bit more comparable with real text (though still too easy to compress). level 6 is now stronger than level 4, by a hair. However, there is still a ratio dip at level 5.
This commit is contained in:
7
.gitignore
vendored
7
.gitignore
vendored
@ -39,12 +39,15 @@ buck-out/
|
||||
build-*
|
||||
*.gcda
|
||||
|
||||
# IDE
|
||||
.clang_complete
|
||||
compile_flags.txt
|
||||
.clang-format
|
||||
|
||||
# Other files
|
||||
.directory
|
||||
_codelite/
|
||||
_zstdbench/
|
||||
.clang_complete
|
||||
compile_flags.txt
|
||||
*.idea
|
||||
*.swp
|
||||
.DS_Store
|
||||
|
@ -8,7 +8,6 @@
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
|
||||
/* Implementation notes:
|
||||
*
|
||||
* This is a very simple lorem ipsum generator
|
||||
@ -32,9 +31,9 @@
|
||||
*/
|
||||
|
||||
#include "lorem.h"
|
||||
#include <string.h> /* memcpy */
|
||||
#include <limits.h> /* INT_MAX */
|
||||
#include <assert.h>
|
||||
#include <limits.h> /* INT_MAX */
|
||||
#include <string.h> /* memcpy */
|
||||
|
||||
#define WORD_MAX_SIZE 20
|
||||
|
||||
@ -54,24 +53,36 @@ static const char *words[] = {
|
||||
"dui", "faucibus", "ornare", "at", "elementum",
|
||||
"eu", "facilisis", "odio", "morbi", "quis",
|
||||
"eros", "donec", "ac", "orci", "purus",
|
||||
"turpis", "cursus", "leo", "vel", "porta"};
|
||||
"turpis", "cursus", "leo", "vel", "porta",
|
||||
"consequat", "interdum", "varius", "vulputate", "aliquet",
|
||||
"pharetra", "nunc", "auctor", "urna", "id",
|
||||
"metus", "viverra", "nibh", "cras", "mi",
|
||||
"unde", "omnis", "iste", "natus", "error",
|
||||
"perspiciatis", "voluptatem", "accusantium", "doloremque", "laudantium",
|
||||
"totam", "rem", "aperiam", "eaque", "ipsa",
|
||||
"quae", "ab", "illo", "inventore", "veritatis",
|
||||
"quasi", "architecto", "beatae", "vitae", "dicta",
|
||||
"sunt", "explicabo", "nemo", "ipsam", "quia",
|
||||
"voluptas", "aspernatur", "aut", "odit", "fugit"
|
||||
};
|
||||
|
||||
/* simple distribution that favors small words :
|
||||
/* simple 1-dimension distribution that favors small words :
|
||||
* 1 letter : weight 3
|
||||
* 2-3 letters : weight 2
|
||||
* 4+ letters : weight 1
|
||||
* This is expected to be a bit more difficult to compress */
|
||||
*/
|
||||
static const int distrib[] = {
|
||||
0, 1, 2, 3, 3, 4, 5, 6, 7, 8,
|
||||
8,9, 9, 10, 11, 12, 13, 13, 14, 15,
|
||||
15, 16, 17, 18, 19, 19, 20, 21, 22, 23,
|
||||
24, 25, 26, 26, 26, 27, 28, 29, 30, 31,
|
||||
32, 33, 34, 34, 35, 36, 37, 38, 39, 40,
|
||||
41, 41, 42, 43, 43, 44, 45, 45, 46, 47,
|
||||
48, 49, 50, 51, 52, 53, 54, 55, 55, 56,
|
||||
57, 58, 58, 59, 60, 60, 61, 62, 63, 64,
|
||||
65, 66, 67, 67, 68, 69, 70, 71, 72, 72,
|
||||
73, 73, 74 };
|
||||
0, 1, 2, 3, 3, 4, 5, 6, 7, 8, 8, 9, 9, 10, 11,
|
||||
12, 13, 13, 14, 15, 15, 16, 17, 18, 19, 19, 20, 21, 22, 23,
|
||||
24, 25, 26, 26, 26, 27, 28, 29, 30, 31, 32, 33, 34, 34, 35,
|
||||
36, 37, 38, 39, 40, 41, 41, 42, 43, 43, 44, 45, 45, 46, 47,
|
||||
48, 49, 50, 51, 52, 53, 54, 55, 55, 56, 57, 58, 58, 59, 60,
|
||||
60, 61, 62, 63, 64, 65, 66, 67, 67, 68, 69, 70, 71, 72, 72,
|
||||
73, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 84, 85,
|
||||
86, 87, 88, 89, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99,
|
||||
100, 101, 101, 102, 103, 104, 105, 106, 106, 107, 108, 109, 110, 111, 112,
|
||||
113, 114, 115, 116, 117, 118, 119, 129, 121, 122, 123, 124,
|
||||
};
|
||||
static const unsigned distribCount = sizeof(distrib) / sizeof(distrib[0]);
|
||||
|
||||
/* Note: this unit only works when invoked sequentially.
|
||||
@ -82,7 +93,8 @@ static size_t g_maxChars = 10000000;
|
||||
static unsigned g_randRoot = 0;
|
||||
|
||||
#define RDG_rotl32(x, r) ((x << r) | (x >> (32 - r)))
|
||||
static unsigned LOREM_rand(unsigned range) {
|
||||
static unsigned LOREM_rand(unsigned range)
|
||||
{
|
||||
static const unsigned prime1 = 2654435761U;
|
||||
static const unsigned prime2 = 2246822519U;
|
||||
unsigned rand32 = g_randRoot;
|
||||
@ -93,7 +105,8 @@ static unsigned LOREM_rand(unsigned range) {
|
||||
return (unsigned)(((unsigned long long)rand32 * range) >> 32);
|
||||
}
|
||||
|
||||
static void writeLastCharacters(void) {
|
||||
static void writeLastCharacters(void)
|
||||
{
|
||||
size_t lastChars = g_maxChars - g_nbChars;
|
||||
assert(g_maxChars >= g_nbChars);
|
||||
if (lastChars == 0)
|
||||
@ -125,12 +138,14 @@ static void generateWord(const char *word, const char *separator, int upCase)
|
||||
g_nbChars += strlen(separator);
|
||||
}
|
||||
|
||||
static int about(unsigned target) {
|
||||
static int about(unsigned target)
|
||||
{
|
||||
return (int)(LOREM_rand(target) + LOREM_rand(target) + 1);
|
||||
}
|
||||
|
||||
/* Function to generate a random sentence */
|
||||
static void generateSentence(int nbWords) {
|
||||
static void generateSentence(int nbWords)
|
||||
{
|
||||
int commaPos = about(9);
|
||||
int comma2 = commaPos + about(7);
|
||||
int i;
|
||||
@ -148,7 +163,8 @@ static void generateSentence(int nbWords) {
|
||||
}
|
||||
}
|
||||
|
||||
static void generateParagraph(int nbSentences) {
|
||||
static void generateParagraph(int nbSentences)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < nbSentences; i++) {
|
||||
int wordsPerSentence = about(8);
|
||||
@ -164,7 +180,8 @@ static void generateParagraph(int nbSentences) {
|
||||
|
||||
/* It's "common" for lorem ipsum generators to start with the same first
|
||||
* pre-defined sentence */
|
||||
static void generateFirstSentence(void) {
|
||||
static void generateFirstSentence(void)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < 18; i++) {
|
||||
const char* word = words[i];
|
||||
@ -178,9 +195,8 @@ static void generateFirstSentence(void) {
|
||||
generateWord(words[18], ". ", 0);
|
||||
}
|
||||
|
||||
size_t LOREM_genBlock(void* buffer, size_t size,
|
||||
unsigned seed,
|
||||
int first, int fill)
|
||||
size_t
|
||||
LOREM_genBlock(void* buffer, size_t size, unsigned seed, int first, int fill)
|
||||
{
|
||||
g_ptr = (char*)buffer;
|
||||
assert(size < INT_MAX);
|
||||
@ -204,4 +220,3 @@ void LOREM_genBuffer(void* buffer, size_t size, unsigned seed)
|
||||
{
|
||||
LOREM_genBlock(buffer, size, seed, 1, 1);
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user