rpm
5.2.1
|
00001 00006 #include "system.h" 00007 #include <rpmiotypes.h> 00008 #include <rpmio.h> 00009 #include <rpmhash.h> 00010 #include "debug.h" 00011 00012 /*@unchecked@*/ 00013 int _ht_debug = 0; 00014 00015 typedef /*@owned@*/ const void * voidptr; 00016 00017 typedef struct hashBucket_s * hashBucket; 00018 00021 struct hashBucket_s { 00022 voidptr key; 00023 /*@owned@*/ voidptr * data; 00024 int dataCount; 00025 /*@dependent@*/hashBucket next; 00026 }; 00027 00030 struct hashTable_s { 00031 struct rpmioItem_s _item; 00032 int numBuckets; 00033 size_t keySize; 00034 int freeData; 00035 hashBucket * buckets; 00036 /*@relnull@*/ 00037 hashFunctionType fn; 00038 /*@relnull@*/ 00039 hashEqualityType eq; 00040 #if defined(__LCLINT__) 00041 /*@refs@*/ 00042 int nrefs; 00043 #endif 00044 }; 00045 00052 static /*@shared@*/ /*@null@*/ 00053 hashBucket findEntry(hashTable ht, const void * key) 00054 /*@*/ 00055 { 00056 rpmuint32_t hash = 0; 00057 hashBucket b; 00058 00059 /*@-modunconnomods@*/ 00060 hash = ht->fn(hash, key, 0) % ht->numBuckets; 00061 b = ht->buckets[hash]; 00062 00063 while (b && b->key && ht->eq(b->key, key)) 00064 b = b->next; 00065 /*@=modunconnomods@*/ 00066 00067 return b; 00068 } 00069 00070 int hashEqualityString(const void * key1, const void * key2) 00071 { 00072 const char *k1 = (const char *)key1; 00073 const char *k2 = (const char *)key2; 00074 return strcmp(k1, k2); 00075 } 00076 00077 rpmuint32_t hashFunctionString(rpmuint32_t h, const void * data, size_t size) 00078 { 00079 const char *key = data; 00080 00081 if (size == 0) 00082 size = strlen(key); 00083 00084 /* 00085 * DJBX33A (Daniel J. Bernstein, Times 33 with Addition) 00086 * 00087 * This is Daniel J. Bernstein's popular `times 33' hash function as 00088 * posted by him years ago on comp.lang.c. It basically uses a function 00089 * like ``hash(i) = hash(i-1) * 33 + str[i]''. This is one of the best 00090 * known hash functions for strings. Because it is both computed very 00091 * fast and distributes very well. 00092 * 00093 * The magic of number 33, i.e. why it works better than many other 00094 * constants, prime or not, has never been adequately explained by 00095 * anyone. So I try an explanation: if one experimentally tests all 00096 * multipliers between 1 and 256 (as RSE did now) one detects that even 00097 * numbers are not useable at all. The remaining 128 odd numbers 00098 * (except for the number 1) work more or less all equally well. They 00099 * all distribute in an acceptable way and this way fill a hash table 00100 * with an average percent of approx. 86%. 00101 * 00102 * If one compares the Chi^2 values of the variants, the number 33 not 00103 * even has the best value. But the number 33 and a few other equally 00104 * good numbers like 17, 31, 63, 127 and 129 have nevertheless a great 00105 * advantage to the remaining numbers in the large set of possible 00106 * multipliers: their multiply operation can be replaced by a faster 00107 * operation based on just one shift plus either a single addition 00108 * or subtraction operation. And because a hash function has to both 00109 * distribute good _and_ has to be very fast to compute, those few 00110 * numbers should be preferred and seems to be the reason why Daniel J. 00111 * Bernstein also preferred it. 00112 * 00113 * Below you can find the variant implemented with the 00114 * multiplication optimized via bit shifts and hash unrolled eight 00115 * times for speed. 00116 * -- Ralf S. Engelschall <rse@engelschall.com> 00117 */ 00118 if (h == 0) 00119 h = 5381; 00120 for (; size >= 8; size -= 8) { 00121 h = ((h << 5) + h) + (rpmuint32_t)*key++; 00122 h = ((h << 5) + h) + (rpmuint32_t)*key++; 00123 h = ((h << 5) + h) + (rpmuint32_t)*key++; 00124 h = ((h << 5) + h) + (rpmuint32_t)*key++; 00125 h = ((h << 5) + h) + (rpmuint32_t)*key++; 00126 h = ((h << 5) + h) + (rpmuint32_t)*key++; 00127 h = ((h << 5) + h) + (rpmuint32_t)*key++; 00128 h = ((h << 5) + h) + (rpmuint32_t)*key++; 00129 } 00130 switch (size) { 00131 case 7: h = ((h << 5) + h) + (rpmuint32_t)*key++; /*@fallthrough@*/ 00132 case 6: h = ((h << 5) + h) + (rpmuint32_t)*key++; /*@fallthrough@*/ 00133 case 5: h = ((h << 5) + h) + (rpmuint32_t)*key++; /*@fallthrough@*/ 00134 case 4: h = ((h << 5) + h) + (rpmuint32_t)*key++; /*@fallthrough@*/ 00135 case 3: h = ((h << 5) + h) + (rpmuint32_t)*key++; /*@fallthrough@*/ 00136 case 2: h = ((h << 5) + h) + (rpmuint32_t)*key++; /*@fallthrough@*/ 00137 case 1: h = ((h << 5) + h) + (rpmuint32_t)*key++; break; 00138 default: /* case 0: */ break; 00139 } 00140 00141 return h; 00142 } 00143 00144 void htAddEntry(hashTable ht, const void * key, const void * data) 00145 { 00146 rpmuint32_t hash = 0; 00147 hashBucket b; 00148 00149 hash = ht->fn(hash, key, 0) % ht->numBuckets; 00150 b = ht->buckets[hash]; 00151 00152 while (b && b->key && ht->eq(b->key, key)) 00153 b = b->next; 00154 00155 if (b == NULL) { 00156 b = xmalloc(sizeof(*b)); 00157 if (ht->keySize) { 00158 char *k = xmalloc(ht->keySize); 00159 memcpy(k, key, ht->keySize); 00160 b->key = k; 00161 } else { 00162 b->key = key; 00163 } 00164 b->dataCount = 0; 00165 b->next = ht->buckets[hash]; 00166 b->data = NULL; 00167 ht->buckets[hash] = b; 00168 } 00169 00170 b->data = xrealloc(b->data, sizeof(*b->data) * (b->dataCount + 1)); 00171 b->data[b->dataCount++] = data; 00172 } 00173 00174 int htHasEntry(hashTable ht, const void * key) 00175 { 00176 hashBucket b; 00177 00178 if (!(b = findEntry(ht, key))) return 0; else return 1; 00179 } 00180 00181 int htGetEntry(hashTable ht, const void * key, const void * data, 00182 int * dataCount, const void * tableKey) 00183 { 00184 hashBucket b; 00185 00186 if ((b = findEntry(ht, key)) == NULL) 00187 return 1; 00188 00189 if (data) 00190 *(const void ***)data = (const void **) b->data; 00191 if (dataCount) 00192 *dataCount = b->dataCount; 00193 if (tableKey) 00194 *(const void **)tableKey = b->key; 00195 00196 return 0; 00197 } 00198 00199 const void ** htGetKeys(hashTable ht) 00200 { 00201 const void ** keys = xcalloc(ht->numBuckets+1, sizeof(const void*)); 00202 const void ** keypointer = keys; 00203 hashBucket b, n; 00204 int i; 00205 00206 for (i = 0; i < ht->numBuckets; i++) { 00207 b = ht->buckets[i]; 00208 if (b == NULL) 00209 continue; 00210 if (b->data) 00211 *(keys++) = b->key; 00212 do { 00213 n = b->next; 00214 if(n != NULL) 00215 *(keys++) = n->key; 00216 } while ((b = n) != NULL); 00217 } 00218 00219 return keypointer; 00220 } 00221 00222 /*@-mustmod@*/ /* XXX splint on crack */ 00223 static void htFini(void * _ht) 00224 /*@modifies _ht @*/ 00225 { 00226 hashTable ht = _ht; 00227 hashBucket b, n; 00228 int i; 00229 00230 for (i = 0; i < ht->numBuckets; i++) { 00231 b = ht->buckets[i]; 00232 if (b == NULL) 00233 continue; 00234 ht->buckets[i] = NULL; 00235 if (ht->keySize > 0) 00236 b->key = _free(b->key); 00237 do { 00238 n = b->next; 00239 if (b->data) { 00240 if (ht->freeData) 00241 *b->data = _free(*b->data); 00242 b->data = _free(b->data); 00243 } 00244 b = _free(b); 00245 } while ((b = n) != NULL); 00246 } 00247 00248 ht->buckets = _free(ht->buckets); 00249 } 00250 /*@=mustmod@*/ 00251 00252 /*@unchecked@*/ /*@only@*/ /*@null@*/ 00253 rpmioPool _htPool; 00254 00255 static hashTable htGetPool(/*@null@*/ rpmioPool pool) 00256 /*@globals _htPool, fileSystem @*/ 00257 /*@modifies pool, _htPool, fileSystem @*/ 00258 { 00259 hashTable ht; 00260 00261 if (_htPool == NULL) { 00262 _htPool = rpmioNewPool("ht", sizeof(*ht), -1, _ht_debug, 00263 NULL, NULL, htFini); 00264 pool = _htPool; 00265 } 00266 return (hashTable) rpmioGetPool(pool, sizeof(*ht)); 00267 } 00268 00269 hashTable htCreate(int numBuckets, size_t keySize, int freeData, 00270 hashFunctionType fn, hashEqualityType eq) 00271 { 00272 hashTable ht = htGetPool(_htPool); 00273 00274 ht->numBuckets = numBuckets; 00275 ht->buckets = xcalloc(numBuckets, sizeof(*ht->buckets)); 00276 ht->keySize = keySize; 00277 ht->freeData = freeData; 00278 /*@-assignexpose@*/ 00279 ht->fn = (fn != NULL ? fn : hashFunctionString); 00280 ht->eq = (eq != NULL ? eq : hashEqualityString); 00281 /*@=assignexpose@*/ 00282 00283 return htLink(ht); 00284 }