Optimize hash table generation in makedb

2025-08-13 03:22:30 +03:00 · 2011-06-15 10:20:21 -04:00
parent 2666d441c2
commit a9e836b040
2 changed files with 106 additions and 22 deletions
--- a/5
+++ b/5
@@ -1,3 +1,8 @@
 2011-06-15  Ulrich Drepper  <drepper@gmail.com>
 	* nss/makedb.c (compute_tables): Check result of multiple hash table
 	sizes to minimize maximum chain length.
 2011-06-14  Ulrich Drepper  <drepper@gmail.com>
 	* Versions.def: Add entry for libnss_db.
--- a/nss/makedb.c
+++ b/nss/makedb.c
@@ -63,7 +63,7 @@ struct database
  char *keystrtab;
 } *databases;
 static size_t ndatabases;
-static size_t nhashentries;
+static size_t nhashentries_total;
 static size_t valstrlen;
 static void *valstrtree;
 static char *valstrtab;
@@ -542,6 +542,37 @@ copy_valstr (const void *nodep, const VISIT which, const int depth)
 }
 static int
 is_prime (size_t candidate)
 {
  /* No even number and none less than 10 will be passed here.  */
  size_t divn = 3;
  size_t sq = divn * divn;
  while (sq < candidate && candidate % divn != 0)
    {
      ++divn;
      sq += 4 * divn;
      ++divn;
    }
  return candidate % divn != 0;
 }
 static size_t
 next_prime (size_t seed)
 {
  /* Make it definitely odd.  */
  seed |= 1;
  while (!is_prime (seed))
    seed += 2;
  return seed;
 }
 static void
 compute_tables (void)
 {
@@ -558,15 +589,23 @@ compute_tables (void)
 	/* We simply use an odd number large than twice the number of
 	   elements to store in the hash table for the size.  This gives
 	   enough efficiency.  */
-	db->nhashentries = db->nentries * 2 + 1;
+#define TEST_RANGE 30
-	db->hashtable = xmalloc (db->nhashentries * sizeof (stridx_t));
+	size_t nhashentries_min = next_prime (MAX (db->nentries,
-	memset (db->hashtable, '\xff', db->nhashentries * sizeof (stridx_t));
+						   db->nentries
-	db->keyidxtab = xmalloc (db->nhashentries * sizeof (stridx_t));
+						   * 2 - TEST_RANGE));
-	memset (db->keyidxtab, '\xff', db->nhashentries * sizeof (stridx_t));
+	size_t nhashentries_max = MAX (nhashentries_min, db->nentries * 4);
-	db->keystrtab = xmalloc (db->keystrlen);
+	size_t nhashentries_best = nhashentries_min;
 	size_t chainlength_best = db->nentries;
-	size_t max_chainlength = 0;
+	db->hashtable = xmalloc (2 * nhashentries_max * sizeof (stridx_t)
-	char *wp = db->keystrtab;
+				 + db->keystrlen);
 	db->keyidxtab = db->hashtable + nhashentries_max;
 	db->keystrtab = (char *) (db->keyidxtab + nhashentries_max);
 	size_t max_chainlength;
 	char *wp;
 	size_t nhashentries;
 	bool copy_string = false;
 	void add_key(const void *nodep, const VISIT which, const int depth)
 	{
@@ -575,18 +614,24 @@ compute_tables (void)
 	  const struct dbentry *dbe = *(const struct dbentry **) nodep;
-	  ptrdiff_t stridx = wp - db->keystrtab;
+	  ptrdiff_t stridx;
 	  if (copy_string)
 	    {
 	      stridx = wp - db->keystrtab;
 	      wp = stpcpy (wp, dbe->str) + 1;
 	    }
 	  else
 	    stridx = 0;
-	  size_t hidx = dbe->hashval % db->nhashentries;
+	  size_t hidx = dbe->hashval % nhashentries;
-	  size_t hval2 = 1 + dbe->hashval % (db->nhashentries - 2);
+	  size_t hval2 = 1 + dbe->hashval % (nhashentries - 2);
 	  size_t chainlength = 0;
 	  while (db->hashtable[hidx] != ~((stridx_t) 0))
 	    {
 	      ++chainlength;
-	      if ((hidx += hval2) >= db->nhashentries)
+	      if ((hidx += hval2) >= nhashentries)
-		hidx -= db->nhashentries;
+		hidx -= nhashentries;
 	    }
 	  db->hashtable[hidx] = dbe->validx;
@@ -595,11 +640,45 @@ compute_tables (void)
 	  max_chainlength = MAX (max_chainlength, chainlength);
 	}
 	nhashentries = nhashentries_min;
 	for (size_t cnt = 0; cnt < TEST_RANGE; ++cnt)
 	  {
 	    memset (db->hashtable, '\xff', nhashentries * sizeof (stridx_t));
 	    max_chainlength = 0;
 	    wp = db->keystrtab;
 	    twalk (db->entries, add_key);
-	// XXX if hash length is too long resize table and start again
+	    if (max_chainlength == 0)
 	      {
 		/* No need to look further, this is as good as it gets.  */
 		nhashentries_best = nhashentries;
 		break;
 	      }
-	nhashentries += db->nhashentries;
+	    if (max_chainlength < chainlength_best)
 	      {
 		chainlength_best = max_chainlength;
 		nhashentries_best = nhashentries;
 	      }
 	    nhashentries = next_prime (nhashentries + 1);
 	    if (nhashentries > nhashentries_max)
 	      break;
 	  }
 	/* Recompute the best table again, this time fill in the strings.  */
 	nhashentries = nhashentries_best;
 	memset (db->hashtable, '\xff',
 		2 * nhashentries_max * sizeof (stridx_t));
 	copy_string = true;
 	wp = db->keystrtab;
 	twalk (db->entries, add_key);
 	db->nhashentries = nhashentries_best;
 	nhashentries_total += nhashentries_best;
    }
 }
@@ -626,7 +705,7 @@ write_output (int fd)
  iov[1].iov_len = valstrlen;
  file_offset += valstrlen;
-  size_t keydataoffset = file_offset + nhashentries * sizeof (stridx_t);
+  size_t keydataoffset = file_offset + nhashentries_total * sizeof (stridx_t);
  for (struct database *db = databases; db != NULL; db = db->next)
    if (db->entries != NULL)
      {
@@ -659,7 +738,7 @@ write_output (int fd)
  assert (filled_dbs == ndatabases);
  assert (file_offset == (iov[0].iov_len + iov[1].iov_len
-			  + nhashentries * sizeof (stridx_t)));
+			  + nhashentries_total * sizeof (stridx_t)));
  header->allocate = file_offset;
  if (writev (fd, iov, 2 + ndatabases * 3) != keydataoffset)