tools/hashtab.c

Go to the documentation of this file.
00001 /* An expandable hash tables datatype.  
00002    Copyright (C) 1999, 2000, 2001 Free Software Foundation, Inc.
00003    Contributed by Vladimir Makarov (vmakarov@cygnus.com).
00004 
00005 This file is part of the libiberty library.
00006 Libiberty is free software; you can redistribute it and/or
00007 modify it under the terms of the GNU Library General Public
00008 License as published by the Free Software Foundation; either
00009 version 2 of the License, or (at your option) any later version.
00010 
00011 Libiberty is distributed in the hope that it will be useful,
00012 but WITHOUT ANY WARRANTY; without even the implied warranty of
00013 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014 Library General Public License for more details.
00015 
00016 You should have received a copy of the GNU Library General Public
00017 License along with libiberty; see the file COPYING.LIB.  If
00018 not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
00019 Boston, MA 02111-1307, USA.  */
00020 
00021 /* This package implements basic hash table functionality.  It is possible
00022    to search for an entry, create an entry and destroy an entry.
00023 
00024    Elements in the table are generic pointers.
00025 
00026    The size of the table is not fixed; if the occupancy of the table
00027    grows too high the hash table will be expanded.
00028 
00029    The abstract data implementation is based on generalized Algorithm D
00030    from Knuth's book "The art of computer programming".  Hash table is
00031    expanded by creation of new hash table and transferring elements from
00032    the old table to the new table. */
00033 
00034 #include <sys/types.h>
00035 #include <stdlib.h>
00036 #include <string.h>
00037 #include <stdio.h>
00038 #include "hashtab.h"
00039 
00040 /* This macro defines reserved value for empty table entry. */
00041 
00042 #define EMPTY_ENTRY    ((void *) 0)
00043 
00044 /* This macro defines reserved value for table entry which contained
00045    a deleted element. */
00046 
00047 #define DELETED_ENTRY  ((void *) 1)
00048 
00049 static unsigned long higher_prime_number (unsigned long);
00050 static hashval_t hash_pointer (const void *);
00051 static int eq_pointer (const void *, const void *);
00052 static int htab_expand (htab_t);
00053 static void **find_empty_slot_for_expand  (htab_t, hashval_t);
00054 
00055 /* At some point, we could make these be NULL, and modify the
00056    hash-table routines to handle NULL specially; that would avoid
00057    function-call overhead for the common case of hashing pointers.  */
00058 htab_hash htab_hash_pointer = hash_pointer;
00059 htab_eq htab_eq_pointer = eq_pointer;
00060 
00061 /* The following function returns a nearest prime number which is
00062    greater than N, and near a power of two. */
00063 
00064 static unsigned long
00065 higher_prime_number (n)
00066      unsigned long n;
00067 {
00068   /* These are primes that are near, but slightly smaller than, a
00069      power of two.  */
00070   static unsigned long primes[] = {
00071     (unsigned long) 2,
00072     (unsigned long) 7,
00073     (unsigned long) 13,
00074     (unsigned long) 31,
00075     (unsigned long) 61,
00076     (unsigned long) 127,
00077     (unsigned long) 251,
00078     (unsigned long) 509,
00079     (unsigned long) 1021,
00080     (unsigned long) 2039,
00081     (unsigned long) 4093,
00082     (unsigned long) 8191,
00083     (unsigned long) 16381,
00084     (unsigned long) 32749,
00085     (unsigned long) 65521,
00086     (unsigned long) 131071,
00087     (unsigned long) 262139,
00088     (unsigned long) 524287,
00089     (unsigned long) 1048573,
00090     (unsigned long) 2097143,
00091     (unsigned long) 4194301,
00092     (unsigned long) 8388593,
00093     (unsigned long) 16777213,
00094     (unsigned long) 33554393,
00095     (unsigned long) 67108859,
00096     (unsigned long) 134217689,
00097     (unsigned long) 268435399,
00098     (unsigned long) 536870909,
00099     (unsigned long) 1073741789,
00100     (unsigned long) 2147483647,
00101                                         /* 4294967291L */
00102     ((unsigned long) 2147483647) + ((unsigned long) 2147483644),
00103   };
00104 
00105   unsigned long* low = &primes[0];
00106   unsigned long* high = &primes[sizeof(primes) / sizeof(primes[0])];
00107 
00108   while (low != high)
00109     {
00110       unsigned long* mid = low + (high - low) / 2;
00111       if (n > *mid)
00112         low = mid + 1;
00113       else
00114         high = mid;
00115     }
00116 
00117   /* If we've run out of primes, abort.  */
00118   if (n > *low)
00119     {
00120       fprintf (stderr, "Cannot find prime bigger than %lu\n", n);
00121       abort ();
00122     }
00123 
00124   return *low;
00125 }
00126 
00127 /* Returns a hash code for P.  */
00128 
00129 static hashval_t
00130 hash_pointer (p)
00131      const void * p;
00132 {
00133   return (hashval_t) ((long)p >> 3);
00134 }
00135 
00136 /* Returns non-zero if P1 and P2 are equal.  */
00137 
00138 static int
00139 eq_pointer (p1, p2)
00140      const void * p1;
00141      const void * p2;
00142 {
00143   return p1 == p2;
00144 }
00145 
00146 /* This function creates table with length slightly longer than given
00147    source length.  The created hash table is initiated as empty (all the
00148    hash table entries are EMPTY_ENTRY).  The function returns the created
00149    hash table.  Memory allocation may fail; it may return NULL.  */
00150 
00151 htab_t
00152 htab_try_create (size, hash_f, eq_f, del_f)
00153      size_t size;
00154      htab_hash hash_f;
00155      htab_eq eq_f;
00156      htab_del del_f;
00157 {
00158   htab_t result;
00159 
00160   size = higher_prime_number (size);
00161   result = (htab_t) calloc (1, sizeof (struct htab));
00162   if (result == NULL)
00163     return NULL;
00164 
00165   result->entries = (void **) calloc (size, sizeof (void *));
00166   if (result->entries == NULL)
00167     {
00168       free (result);
00169       return NULL;
00170     }
00171 
00172   result->size = size;
00173   result->hash_f = hash_f;
00174   result->eq_f = eq_f;
00175   result->del_f = del_f;
00176   result->return_allocation_failure = 1;
00177   return result;
00178 }
00179 
00180 /* This function frees all memory allocated for given hash table.
00181    Naturally the hash table must already exist. */
00182 
00183 void
00184 htab_delete (htab)
00185      htab_t htab;
00186 {
00187   int i;
00188 
00189   if (htab->del_f)
00190     for (i = htab->size - 1; i >= 0; i--)
00191       if (htab->entries[i] != EMPTY_ENTRY
00192           && htab->entries[i] != DELETED_ENTRY)
00193         (*htab->del_f) (htab->entries[i]);
00194 
00195   free (htab->entries);
00196   free (htab);
00197 }
00198 
00199 /* This function clears all entries in the given hash table.  */
00200 
00201 void
00202 htab_empty (htab)
00203      htab_t htab;
00204 {
00205   int i;
00206 
00207   if (htab->del_f)
00208     for (i = htab->size - 1; i >= 0; i--)
00209       if (htab->entries[i] != EMPTY_ENTRY
00210           && htab->entries[i] != DELETED_ENTRY)
00211         (*htab->del_f) (htab->entries[i]);
00212 
00213   memset (htab->entries, 0, htab->size * sizeof (void *));
00214 }
00215 
00216 /* Similar to htab_find_slot, but without several unwanted side effects:
00217     - Does not call htab->eq_f when it finds an existing entry.
00218     - Does not change the count of elements/searches/collisions in the
00219       hash table.
00220    This function also assumes there are no deleted entries in the table.
00221    HASH is the hash value for the element to be inserted.  */
00222 
00223 static void **
00224 find_empty_slot_for_expand (htab, hash)
00225      htab_t htab;
00226      hashval_t hash;
00227 {
00228   size_t size = htab->size;
00229   hashval_t hash2 = 1 + hash % (size - 2);
00230   unsigned int index = hash % size;
00231 
00232   for (;;)
00233     {
00234       void **slot = htab->entries + index;
00235 
00236       if (*slot == EMPTY_ENTRY)
00237         return slot;
00238       else if (*slot == DELETED_ENTRY)
00239         abort ();
00240 
00241       index += hash2;
00242       if (index >= size)
00243         index -= size;
00244     }
00245 }
00246 
00247 /* The following function changes size of memory allocated for the
00248    entries and repeatedly inserts the table elements.  The occupancy
00249    of the table after the call will be about 50%.  Naturally the hash
00250    table must already exist.  Remember also that the place of the
00251    table entries is changed.  If memory allocation failures are allowed,
00252    this function will return zero, indicating that the table could not be
00253    expanded.  If all goes well, it will return a non-zero value.  */
00254 
00255 static int
00256 htab_expand (htab)
00257      htab_t htab;
00258 {
00259   void **oentries;
00260   void **olimit;
00261   void **p;
00262 
00263   oentries = htab->entries;
00264   olimit = oentries + htab->size;
00265 
00266   htab->size = higher_prime_number (htab->size * 2);
00267 
00268   if (htab->return_allocation_failure)
00269     {
00270       void **nentries = (void **) calloc (htab->size, sizeof (void **));
00271       if (nentries == NULL)
00272         return 0;
00273       htab->entries = nentries;
00274     }
00275 
00276   htab->n_elements -= htab->n_deleted;
00277   htab->n_deleted = 0;
00278 
00279   p = oentries;
00280   do
00281     {
00282       void * x = *p;
00283 
00284       if (x != EMPTY_ENTRY && x != DELETED_ENTRY)
00285         {
00286           void **q = find_empty_slot_for_expand (htab, (*htab->hash_f) (x));
00287 
00288           *q = x;
00289         }
00290 
00291       p++;
00292     }
00293   while (p < olimit);
00294 
00295   free (oentries);
00296   return 1;
00297 }
00298 
00299 /* This function searches for a hash table entry equal to the given
00300    element.  It cannot be used to insert or delete an element.  */
00301 
00302 void *
00303 htab_find_with_hash (htab, element, hash)
00304      htab_t htab;
00305      const void * element;
00306      hashval_t hash;
00307 {
00308   unsigned int index;
00309   hashval_t hash2;
00310   size_t size;
00311   void * entry;
00312 
00313   htab->searches++;
00314   size = htab->size;
00315   index = hash % size;
00316 
00317   entry = htab->entries[index];
00318   if (entry == EMPTY_ENTRY
00319       || (entry != DELETED_ENTRY && (*htab->eq_f) (entry, element)))
00320     return entry;
00321 
00322   hash2 = 1 + hash % (size - 2);
00323 
00324   for (;;)
00325     {
00326       htab->collisions++;
00327       index += hash2;
00328       if (index >= size)
00329         index -= size;
00330 
00331       entry = htab->entries[index];
00332       if (entry == EMPTY_ENTRY
00333           || (entry != DELETED_ENTRY && (*htab->eq_f) (entry, element)))
00334         return entry;
00335     }
00336 }
00337 
00338 /* Like htab_find_slot_with_hash, but compute the hash value from the
00339    element.  */
00340 
00341 void *
00342 htab_find (htab, element)
00343      htab_t htab;
00344      const void * element;
00345 {
00346   return htab_find_with_hash (htab, element, (*htab->hash_f) (element));
00347 }
00348 
00349 /* This function searches for a hash table slot containing an entry
00350    equal to the given element.  To delete an entry, call this with
00351    INSERT = 0, then call htab_clear_slot on the slot returned (possibly
00352    after doing some checks).  To insert an entry, call this with
00353    INSERT = 1, then write the value you want into the returned slot.
00354    When inserting an entry, NULL may be returned if memory allocation
00355    fails.  */
00356 
00357 void **
00358 htab_find_slot_with_hash (htab, element, hash, insert)
00359      htab_t htab;
00360      const void * element;
00361      hashval_t hash;
00362      enum insert_option insert;
00363 {
00364   void **first_deleted_slot;
00365   unsigned int index;
00366   hashval_t hash2;
00367   size_t size;
00368 
00369   if (insert == INSERT && htab->size * 3 <= htab->n_elements * 4
00370       && htab_expand (htab) == 0)
00371     return NULL;
00372 
00373   size = htab->size;
00374   hash2 = 1 + hash % (size - 2);
00375   index = hash % size;
00376 
00377   htab->searches++;
00378   first_deleted_slot = NULL;
00379 
00380   for (;;)
00381     {
00382       void * entry = htab->entries[index];
00383       if (entry == EMPTY_ENTRY)
00384         {
00385           if (insert == NO_INSERT)
00386             return NULL;
00387 
00388           htab->n_elements++;
00389 
00390           if (first_deleted_slot)
00391             {
00392               *first_deleted_slot = EMPTY_ENTRY;
00393               return first_deleted_slot;
00394             }
00395 
00396           return &htab->entries[index];
00397         }
00398 
00399       if (entry == DELETED_ENTRY)
00400         {
00401           if (!first_deleted_slot)
00402             first_deleted_slot = &htab->entries[index];
00403         }
00404       else  if ((*htab->eq_f) (entry, element))
00405         return &htab->entries[index];
00406       
00407       htab->collisions++;
00408       index += hash2;
00409       if (index >= size)
00410         index -= size;
00411     }
00412 }
00413 
00414 /* Like htab_find_slot_with_hash, but compute the hash value from the
00415    element.  */
00416 
00417 void **
00418 htab_find_slot (htab, element, insert)
00419      htab_t htab;
00420      const void * element;
00421      enum insert_option insert;
00422 {
00423   return htab_find_slot_with_hash (htab, element, (*htab->hash_f) (element),
00424                                    insert);
00425 }
00426 
00427 /* This function deletes an element with the given value from hash
00428    table.  If there is no matching element in the hash table, this
00429    function does nothing.  */
00430 
00431 void
00432 htab_remove_elt (htab, element)
00433      htab_t htab;
00434      void * element;
00435 {
00436   void **slot;
00437 
00438   slot = htab_find_slot (htab, element, NO_INSERT);
00439   if (*slot == EMPTY_ENTRY)
00440     return;
00441 
00442   if (htab->del_f)
00443     (*htab->del_f) (*slot);
00444 
00445   *slot = DELETED_ENTRY;
00446   htab->n_deleted++;
00447 }
00448 
00449 /* This function clears a specified slot in a hash table.  It is
00450    useful when you've already done the lookup and don't want to do it
00451    again.  */
00452 
00453 void
00454 htab_clear_slot (htab, slot)
00455      htab_t htab;
00456      void **slot;
00457 {
00458   if (slot < htab->entries || slot >= htab->entries + htab->size
00459       || *slot == EMPTY_ENTRY || *slot == DELETED_ENTRY)
00460     abort ();
00461 
00462   if (htab->del_f)
00463     (*htab->del_f) (*slot);
00464 
00465   *slot = DELETED_ENTRY;
00466   htab->n_deleted++;
00467 }
00468 
00469 /* This function scans over the entire hash table calling
00470    CALLBACK for each live entry.  If CALLBACK returns false,
00471    the iteration stops.  INFO is passed as CALLBACK's second
00472    argument.  */
00473 
00474 void
00475 htab_traverse (htab, callback, info)
00476      htab_t htab;
00477      htab_trav callback;
00478      void * info;
00479 {
00480   void **slot = htab->entries;
00481   void **limit = slot + htab->size;
00482 
00483   do
00484     {
00485       void * x = *slot;
00486 
00487       if (x != EMPTY_ENTRY && x != DELETED_ENTRY)
00488         if (!(*callback) (slot, info))
00489           break;
00490     }
00491   while (++slot < limit);
00492 }
00493 
00494 /* Return the current size of given hash table. */
00495 
00496 size_t
00497 htab_size (htab)
00498      htab_t htab;
00499 {
00500   return htab->size;
00501 }
00502 
00503 /* Return the current number of elements in given hash table. */
00504 
00505 size_t
00506 htab_elements (htab)
00507      htab_t htab;
00508 {
00509   return htab->n_elements - htab->n_deleted;
00510 }
00511 
00512 /* Return the fraction of fixed collisions during all work with given
00513    hash table. */
00514 
00515 double
00516 htab_collisions (htab)
00517      htab_t htab;
00518 {
00519   if (htab->searches == 0)
00520     return 0.0;
00521 
00522   return (double) htab->collisions / (double) htab->searches;
00523 }

Generated on Mon Mar 5 13:44:07 2007 for rpm by  doxygen 1.5.1