1
0
mirror of https://sourceware.org/git/glibc.git synced 2025-08-13 03:22:30 +03:00

Optimize hash table generation in makedb

This commit is contained in:
Ulrich Drepper
2011-06-15 10:20:21 -04:00
parent 2666d441c2
commit a9e836b040
2 changed files with 106 additions and 22 deletions

View File

@@ -1,3 +1,8 @@
2011-06-15 Ulrich Drepper <drepper@gmail.com>
* nss/makedb.c (compute_tables): Check result of multiple hash table
sizes to minimize maximum chain length.
2011-06-14 Ulrich Drepper <drepper@gmail.com> 2011-06-14 Ulrich Drepper <drepper@gmail.com>
* Versions.def: Add entry for libnss_db. * Versions.def: Add entry for libnss_db.

View File

@@ -63,7 +63,7 @@ struct database
char *keystrtab; char *keystrtab;
} *databases; } *databases;
static size_t ndatabases; static size_t ndatabases;
static size_t nhashentries; static size_t nhashentries_total;
static size_t valstrlen; static size_t valstrlen;
static void *valstrtree; static void *valstrtree;
static char *valstrtab; static char *valstrtab;
@@ -542,6 +542,37 @@ copy_valstr (const void *nodep, const VISIT which, const int depth)
} }
static int
is_prime (size_t candidate)
{
/* No even number and none less than 10 will be passed here. */
size_t divn = 3;
size_t sq = divn * divn;
while (sq < candidate && candidate % divn != 0)
{
++divn;
sq += 4 * divn;
++divn;
}
return candidate % divn != 0;
}
static size_t
next_prime (size_t seed)
{
/* Make it definitely odd. */
seed |= 1;
while (!is_prime (seed))
seed += 2;
return seed;
}
static void static void
compute_tables (void) compute_tables (void)
{ {
@@ -558,15 +589,23 @@ compute_tables (void)
/* We simply use an odd number large than twice the number of /* We simply use an odd number large than twice the number of
elements to store in the hash table for the size. This gives elements to store in the hash table for the size. This gives
enough efficiency. */ enough efficiency. */
db->nhashentries = db->nentries * 2 + 1; #define TEST_RANGE 30
db->hashtable = xmalloc (db->nhashentries * sizeof (stridx_t)); size_t nhashentries_min = next_prime (MAX (db->nentries,
memset (db->hashtable, '\xff', db->nhashentries * sizeof (stridx_t)); db->nentries
db->keyidxtab = xmalloc (db->nhashentries * sizeof (stridx_t)); * 2 - TEST_RANGE));
memset (db->keyidxtab, '\xff', db->nhashentries * sizeof (stridx_t)); size_t nhashentries_max = MAX (nhashentries_min, db->nentries * 4);
db->keystrtab = xmalloc (db->keystrlen); size_t nhashentries_best = nhashentries_min;
size_t chainlength_best = db->nentries;
size_t max_chainlength = 0; db->hashtable = xmalloc (2 * nhashentries_max * sizeof (stridx_t)
char *wp = db->keystrtab; + db->keystrlen);
db->keyidxtab = db->hashtable + nhashentries_max;
db->keystrtab = (char *) (db->keyidxtab + nhashentries_max);
size_t max_chainlength;
char *wp;
size_t nhashentries;
bool copy_string = false;
void add_key(const void *nodep, const VISIT which, const int depth) void add_key(const void *nodep, const VISIT which, const int depth)
{ {
@@ -575,18 +614,24 @@ compute_tables (void)
const struct dbentry *dbe = *(const struct dbentry **) nodep; const struct dbentry *dbe = *(const struct dbentry **) nodep;
ptrdiff_t stridx = wp - db->keystrtab; ptrdiff_t stridx;
if (copy_string)
{
stridx = wp - db->keystrtab;
wp = stpcpy (wp, dbe->str) + 1; wp = stpcpy (wp, dbe->str) + 1;
}
else
stridx = 0;
size_t hidx = dbe->hashval % db->nhashentries; size_t hidx = dbe->hashval % nhashentries;
size_t hval2 = 1 + dbe->hashval % (db->nhashentries - 2); size_t hval2 = 1 + dbe->hashval % (nhashentries - 2);
size_t chainlength = 0; size_t chainlength = 0;
while (db->hashtable[hidx] != ~((stridx_t) 0)) while (db->hashtable[hidx] != ~((stridx_t) 0))
{ {
++chainlength; ++chainlength;
if ((hidx += hval2) >= db->nhashentries) if ((hidx += hval2) >= nhashentries)
hidx -= db->nhashentries; hidx -= nhashentries;
} }
db->hashtable[hidx] = dbe->validx; db->hashtable[hidx] = dbe->validx;
@@ -595,11 +640,45 @@ compute_tables (void)
max_chainlength = MAX (max_chainlength, chainlength); max_chainlength = MAX (max_chainlength, chainlength);
} }
nhashentries = nhashentries_min;
for (size_t cnt = 0; cnt < TEST_RANGE; ++cnt)
{
memset (db->hashtable, '\xff', nhashentries * sizeof (stridx_t));
max_chainlength = 0;
wp = db->keystrtab;
twalk (db->entries, add_key); twalk (db->entries, add_key);
// XXX if hash length is too long resize table and start again if (max_chainlength == 0)
{
/* No need to look further, this is as good as it gets. */
nhashentries_best = nhashentries;
break;
}
nhashentries += db->nhashentries; if (max_chainlength < chainlength_best)
{
chainlength_best = max_chainlength;
nhashentries_best = nhashentries;
}
nhashentries = next_prime (nhashentries + 1);
if (nhashentries > nhashentries_max)
break;
}
/* Recompute the best table again, this time fill in the strings. */
nhashentries = nhashentries_best;
memset (db->hashtable, '\xff',
2 * nhashentries_max * sizeof (stridx_t));
copy_string = true;
wp = db->keystrtab;
twalk (db->entries, add_key);
db->nhashentries = nhashentries_best;
nhashentries_total += nhashentries_best;
} }
} }
@@ -626,7 +705,7 @@ write_output (int fd)
iov[1].iov_len = valstrlen; iov[1].iov_len = valstrlen;
file_offset += valstrlen; file_offset += valstrlen;
size_t keydataoffset = file_offset + nhashentries * sizeof (stridx_t); size_t keydataoffset = file_offset + nhashentries_total * sizeof (stridx_t);
for (struct database *db = databases; db != NULL; db = db->next) for (struct database *db = databases; db != NULL; db = db->next)
if (db->entries != NULL) if (db->entries != NULL)
{ {
@@ -659,7 +738,7 @@ write_output (int fd)
assert (filled_dbs == ndatabases); assert (filled_dbs == ndatabases);
assert (file_offset == (iov[0].iov_len + iov[1].iov_len assert (file_offset == (iov[0].iov_len + iov[1].iov_len
+ nhashentries * sizeof (stridx_t))); + nhashentries_total * sizeof (stridx_t)));
header->allocate = file_offset; header->allocate = file_offset;
if (writev (fd, iov, 2 + ndatabases * 3) != keydataoffset) if (writev (fd, iov, 2 + ndatabases * 3) != keydataoffset)