• Main Page
  • Related Pages
  • Modules
  • Data Structures
  • Files
  • File List
  • Globals

rpmio/rpmhash.c

Go to the documentation of this file.
00001 
00006 #include "system.h"
00007 #include <rpmhash.h>
00008 #include "debug.h"
00009 
00010 typedef /*@owned@*/ const void * voidptr;
00011 
00012 typedef struct hashBucket_s * hashBucket;
00013 
00016 struct hashBucket_s {
00017     voidptr key;                        
00018 /*@owned@*/ voidptr * data;             
00019     int dataCount;                      
00020 /*@dependent@*/hashBucket next;         
00021 };
00022 
00025 struct hashTable_s {
00026     int numBuckets;                     
00027     size_t keySize;                     
00028     int freeData;       
00029     hashBucket * buckets;               
00030     hashFunctionType fn;                
00031     hashEqualityType eq;                
00032 };
00033 
00039 /*@unused@*/ static inline /*@null@*/ void *
00040 _free(/*@only@*/ /*@null@*/ const void * p) /*@modifies p@*/
00041 {
00042     if (p != NULL)      free((void *)p);
00043     return NULL;
00044 }
00045 
00052 static /*@shared@*/ /*@null@*/
00053 hashBucket findEntry(hashTable ht, const void * key)
00054         /*@*/
00055 {
00056     uint32_t hash = 0;
00057     hashBucket b;
00058 
00059     /*@-modunconnomods@*/
00060     hash = ht->fn(hash, key, 0) % ht->numBuckets;
00061     b = ht->buckets[hash];
00062 
00063     while (b && b->key && ht->eq(b->key, key))
00064         b = b->next;
00065     /*@=modunconnomods@*/
00066 
00067     return b;
00068 }
00069 
00077 static uint32_t hashFunctionString(uint32_t h, const void * data, size_t size)
00078         /*@*/
00079 {
00080     const char *key = data;
00081 
00082     if (size == 0)
00083         size = strlen(key);
00084 
00085     /*
00086      * DJBX33A (Daniel J. Bernstein, Times 33 with Addition)
00087      *
00088      * This is Daniel J. Bernstein's popular `times 33' hash function as
00089      * posted by him years ago on comp.lang.c. It basically uses a  function
00090      * like ``hash(i) = hash(i-1) * 33 + str[i]''. This is one of the best
00091      * known hash functions for strings. Because it is both computed very
00092      * fast and distributes very well.
00093      *
00094      * The magic of number 33, i.e. why it works better than many other
00095      * constants, prime or not, has never been adequately explained by
00096      * anyone. So I try an explanation: if one experimentally tests all
00097      * multipliers between 1 and 256 (as RSE did now) one detects that even
00098      * numbers are not useable at all. The remaining 128 odd numbers
00099      * (except for the number 1) work more or less all equally well. They
00100      * all distribute in an acceptable way and this way fill a hash table
00101      * with an average percent of approx. 86%.
00102      *
00103      * If one compares the Chi^2 values of the variants, the number 33 not
00104      * even has the best value. But the number 33 and a few other equally
00105      * good numbers like 17, 31, 63, 127 and 129 have nevertheless a great
00106      * advantage to the remaining numbers in the large set of possible
00107      * multipliers: their multiply operation can be replaced by a faster
00108      * operation based on just one shift plus either a single addition
00109      * or subtraction operation. And because a hash function has to both
00110      * distribute good _and_ has to be very fast to compute, those few
00111      * numbers should be preferred and seems to be the reason why Daniel J.
00112      * Bernstein also preferred it.
00113      *
00114      * Below you can find the variant implemented with the
00115      * multiplication optimized via bit shifts and hash unrolled eight
00116      * times for speed.
00117      *                     -- Ralf S. Engelschall <rse@engelschall.com>
00118      */
00119     if (h == 0)
00120         h = 5381;
00121     for (; size >= 8; size -= 8) {
00122         h = ((h << 5) + h) + (uint32_t)*key++;
00123         h = ((h << 5) + h) + (uint32_t)*key++;
00124         h = ((h << 5) + h) + (uint32_t)*key++;
00125         h = ((h << 5) + h) + (uint32_t)*key++;
00126         h = ((h << 5) + h) + (uint32_t)*key++;
00127         h = ((h << 5) + h) + (uint32_t)*key++;
00128         h = ((h << 5) + h) + (uint32_t)*key++;
00129         h = ((h << 5) + h) + (uint32_t)*key++;
00130     }
00131     switch (size) {
00132         case 7: h = ((h << 5) + h) + (uint32_t)*key++; /*@fallthrough@*/
00133         case 6: h = ((h << 5) + h) + (uint32_t)*key++; /*@fallthrough@*/
00134         case 5: h = ((h << 5) + h) + (uint32_t)*key++; /*@fallthrough@*/
00135         case 4: h = ((h << 5) + h) + (uint32_t)*key++; /*@fallthrough@*/
00136         case 3: h = ((h << 5) + h) + (uint32_t)*key++; /*@fallthrough@*/
00137         case 2: h = ((h << 5) + h) + (uint32_t)*key++; /*@fallthrough@*/
00138         case 1: h = ((h << 5) + h) + (uint32_t)*key++; break;
00139         default: /* case 0: */ break;
00140     }
00141 
00142     return h;
00143 }
00144 
00151 static int hashEqualityString(const void * key1, const void * key2)
00152         /*@*/
00153 {
00154     const char *k1 = (const char *)key1;
00155     const char *k2 = (const char *)key2;
00156     return strcmp(k1, k2);
00157 }
00158 
00159 hashTable htCreate(int numBuckets, size_t keySize, int freeData,
00160                 hashFunctionType fn, hashEqualityType eq)
00161 {
00162     hashTable ht;
00163 
00164     ht = xmalloc(sizeof(*ht));
00165     ht->numBuckets = numBuckets;
00166     ht->buckets = xcalloc(numBuckets, sizeof(*ht->buckets));
00167     ht->keySize = keySize;
00168     ht->freeData = freeData;
00169     /*@-assignexpose@*/
00170     ht->fn = (fn != NULL ? fn : hashFunctionString);
00171     ht->eq = (eq != NULL ? eq : hashEqualityString);
00172     /*@=assignexpose@*/
00173 
00174     return ht;
00175 }
00176 
00177 void htAddEntry(hashTable ht, const void * key, const void * data)
00178 {
00179     uint32_t hash = 0;
00180     hashBucket b;
00181 
00182     hash = ht->fn(hash, key, 0) % ht->numBuckets;
00183     b = ht->buckets[hash];
00184 
00185     while (b && b->key && ht->eq(b->key, key))
00186         b = b->next;
00187 
00188     if (b == NULL) {
00189         b = xmalloc(sizeof(*b));
00190         if (ht->keySize) {
00191             char *k = xmalloc(ht->keySize);
00192             memcpy(k, key, ht->keySize);
00193             b->key = k;
00194         } else {
00195             b->key = key;
00196         }
00197         b->dataCount = 0;
00198         b->next = ht->buckets[hash];
00199         b->data = NULL;
00200         ht->buckets[hash] = b;
00201     }
00202 
00203     b->data = xrealloc(b->data, sizeof(*b->data) * (b->dataCount + 1));
00204     b->data[b->dataCount++] = data;
00205 }
00206 
00207 hashTable htFree(hashTable ht)
00208 {
00209     hashBucket b, n;
00210     int i;
00211 
00212     for (i = 0; i < ht->numBuckets; i++) {
00213         b = ht->buckets[i];
00214         if (b == NULL)
00215             continue;
00216         ht->buckets[i] = NULL;
00217         if (ht->keySize > 0)
00218             b->key = _free(b->key);
00219         do {
00220             n = b->next;
00221             if (b->data) {
00222                 if (ht->freeData)
00223                     *b->data = _free(*b->data);
00224                 b->data = _free(b->data);
00225             }
00226             b = _free(b);
00227         } while ((b = n) != NULL);
00228     }
00229 
00230     ht->buckets = _free(ht->buckets);
00231     ht = _free(ht);
00232     return NULL;
00233 }
00234 
00235 int htHasEntry(hashTable ht, const void * key)
00236 {
00237     hashBucket b;
00238 
00239     if (!(b = findEntry(ht, key))) return 0; else return 1;
00240 }
00241 
00242 int htGetEntry(hashTable ht, const void * key, const void * data,
00243                int * dataCount, const void * tableKey)
00244 {
00245     hashBucket b;
00246 
00247     if ((b = findEntry(ht, key)) == NULL)
00248         return 1;
00249 
00250     if (data)
00251         *(const void ***)data = (const void **) b->data;
00252     if (dataCount)
00253         *dataCount = b->dataCount;
00254     if (tableKey)
00255         *(const void **)tableKey = b->key;
00256 
00257     return 0;
00258 }

Generated on Mon Nov 29 2010 05:18:47 for rpm by  doxygen 1.7.2