2 static const char *rcsid =
3 "$FreeBSD: src/lib/libc/stdlib/strhash.c,v 1.8 1999/09/05 17:42:45 peter Exp $";
9 * Terry Jones & Jordan Hubbard
11 * PCS Computer Systeme, GmbH.
12 * Munich, West Germany
15 * All rights reserved.
17 * This is unsupported software and is subject to change without notice.
18 * the author makes no representations about the suitability of this software
19 * for any purpose. It is supplied "as is" without express or implied
22 * Permission to use, copy, modify, and distribute this software and its
23 * documentation for any purpose and without fee is hereby granted, provided
24 * that the above copyright notice appear in all copies and that both that
25 * copyright notice and this permission notice appear in supporting
26 * documentation, and that the name of the author not be used in
27 * advertising or publicity pertaining to distribution of the software
28 * without specific, written prior permission.
33 * This is a fairly simple open addressing hash scheme.
34 * Terry did all the code, I just did the spec.
35 * Thanks again, you crazy Aussie..
41 * Revision 2.0 90/03/26 01:44:26 jkh
44 * Revision 1.8 90/03/09 19:22:35 jkh
45 * Fixed bogus comment.
47 * Revision 1.7 90/03/09 19:01:08 jkh
48 * Added comments, GPL.
50 * Revision 1.6 90/03/08 17:55:58 terry
51 * Rearranged hash_purge to be a tiny bit more efficient.
52 * Added verbose option to hash_stats.
54 * Revision 1.5 90/03/08 17:19:54 terry
55 * Added hash_purge. Added arg to hash_traverse. Changed all
58 * Revision 1.4 90/03/08 12:02:35 terry
59 * Fixed problems with allocation that I screwed up last night.
60 * Changed bucket lists to be singly linked. Thanks to JKH, my hero.
62 * Revision 1.3 90/03/07 21:33:33 terry
63 * Cleaned up a few decls to keep gcc -Wall quiet.
65 * Revision 1.2 90/03/07 21:14:53 terry
66 * Comments. Added HASH_STATS define. Removed hash_find()
69 * Revision 1.1 90/03/07 20:49:45 terry
79 #include <sys/types.h>
82 #define HASH_NULL (hash_table *)0
83 #define NODE_NULL (hash_node *)0
84 #define GENERIC_NULL (void *)0
89 static int _hash(int size, char *key);
90 static hash_node *list_find(caddr_t key, hash_node *head);
96 * Malloc room for a new hash table and then room for its
97 * bucket pointers. Then set all the buckets to
98 * point to 0. Return the address of the new table.
101 hash_create(int size)
104 hash_table *new = (hash_table *)malloc(sizeof(hash_table));
106 if (!new || size < 0){
114 if (!(new->buckets = (hash_node **)malloc(size * sizeof(hash_node *)))){
118 for (i = 0; i < size; i++){
119 new->buckets[i] = NODE_NULL;
130 * Find the key in the linked list pointed to by head.
133 list_find(caddr_t key, hash_node *head)
136 if (!strcmp(head->key, key)){
148 * Compute the hash value for the given key.
151 _hash(int size, char *key)
153 unsigned int h = 0x0;
156 h = (h << 1) ^ (h ^ (unsigned char) *key++);
166 * Find the key and (if it's there) remove it entirely.
167 * The function (*nukefunc)() is in charge of disposing
168 * of the storage help by the data associated with the node.
171 hash_destroy(hash_table *table, char *key, void (*nukefunc)())
173 int bucket = _hash(table->size, key);
174 hash_node *found = table->buckets[bucket];
175 hash_node *to_free = NODE_NULL;
181 if (!strcmp(found->key, key)) {
183 * It was the head of the list.
185 table->buckets[bucket] = found->next;
190 * Walk the list, looking one ahead.
192 while (found->next) {
193 if (!strcmp(found->next->key, key)) {
194 to_free = found->next;
195 found->next = found->next->next;
207 (*nukefunc)(to_free->key, to_free->data);
216 * Search the table for the given key. Then:
218 * 1) If you find it and there is no replacement function, just
219 * return what you found. (This is a simple search).
220 * 2) If you find it and there is a replacement function, run
221 * the function on the data you found, and replace the old
222 * data with whatever is passed in datum. Return 0.
223 * 3) If you don't find it and there is some datum, insert a
224 * new item into the table. Insertions go at the front of
225 * the bucket. Return 0.
226 * 4) Otherwise just return 0.
230 hash_search(hash_table *table, caddr_t key, void *datum,
231 void (*replace_func)())
233 int bucket = _hash(table->size, key);
234 hash_node *found = list_find(key, table->buckets[bucket]);
241 (*replace_func)(found->data);
248 static int assign_key();
250 hash_node *new = (hash_node *)malloc(sizeof(hash_node));
252 if (!new || !assign_key(key, new)){
256 new->next = table->buckets[bucket];
257 table->buckets[bucket] = new;
268 * Set the key value of a node to be 'key'. Get some space from
269 * malloc and copy it in etc. Return 1 if all is well, 0 otherwise.
272 assign_key(char *key, hash_node *node)
278 if (!(node->key = (char *)malloc(strlen(key) + 1))){
283 strcat(node->key, key);
290 * Traverse the hash table and run the function func on the
291 * data found at each node and the argument we're passed for it.
294 hash_traverse(hash_table *table, int (*func)(), void *arg)
297 register int size = table->size;
302 for (i = 0; i < size; i++) {
303 hash_node *n = table->buckets[i];
305 if ((*func)(n->key, n->data, arg) == 0)
316 * Run through the entire hash table. Call purge_func
317 * on the data found at each node, and then free the node.
318 * Set all the bucket pointers to 0.
321 hash_purge(hash_table *table, void (*purge_func)(char *p1, void *p2))
324 register int size = table->size;
326 for (i = 0; i < size; i++) {
327 hash_node *n = table->buckets[i];
330 hash_node *to_free = n;
332 (*purge_func)(n->key, n->data);
337 table->buckets[i] = NODE_NULL;
343 #define min(a, b) (a) < (b) ? (a) : (b)
348 * Print statistics about the current table allocation to stdout.
351 hash_stats(hash_table *table, int verbose)
354 int total_elements = 0;
355 int non_empty_buckets = 0;
359 int size = table->size;
361 if (!(counts = (int *)malloc(size * sizeof(int)))){
362 fprintf(stderr, "malloc returns 0\n");
366 for (i = 0; i < size; i++){
368 hash_node *n = table->buckets[i];
375 printf("bucket %2d: ", i);
379 printf(" %s", n->key);
385 total_elements += counts[i];
386 if (counts[i] > max_count){
387 max_count = counts[i];
390 else if (counts[i] == max_count){
394 if (counts[i] && verbose){
395 printf(" (%d)\n", counts[i]);
400 printf("%d element%s in storage.\n", total_elements, total_elements == 1 ? "" : "s");
403 printf("%d of %d (%.2f%%) buckets are in use\n", non_empty_buckets, size,
404 (double)100 * (double)non_empty_buckets / (double)(size));
405 printf("the maximum number of elements in a bucket is %d (%d times)\n", max_count, max_repeats);
406 printf("average per bucket is %f\n", (double)total_elements / (double)non_empty_buckets);
407 printf("optimal would be %f\n", (double)total_elements / (double)(min(size, total_elements)));