1 /* hash.c - hash table lookup strings -
2 Copyright (C) 1987, 1990, 1991, 1992 Free Software Foundation, Inc.
4 This file is part of GAS, the GNU Assembler.
6 GAS is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
11 GAS is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GAS; see the file COPYING. If not, write to
18 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
21 * BUGS, GRIPES, APOLOGIA etc.
23 * A typical user doesn't need ALL this: I intend to make a library out
24 * of it one day - Dean Elsner.
25 * Also, I want to change the definition of a symbol to (address,length)
26 * so I can put arbitrary binary in the names stored. [see hsh.c for that]
28 * This slime is common coupled inside the module. Com-coupling (and other
29 * vandalism) was done to speed running time. The interfaces at the
30 * module's edges are adequately clean.
32 * There is no way to (a) run a test script through this heap and (b)
33 * compare results with previous scripts, to see if we have broken any
34 * code. Use GNU (f)utilities to do this. A few commands assist test.
35 * The testing is awkward: it tries to be both batch & interactive.
36 * For now, interactive rules!
38 * $FreeBSD: src/gnu/usr.bin/as/hash.c,v 1.7 1999/08/27 23:34:17 peter Exp $
39 * $DragonFly: src/gnu/usr.bin/as/Attic/hash.c,v 1.2 2003/06/17 04:25:44 dillon Exp $
43 * The idea is to implement a symbol table. A test jig is here.
44 * Symbols are arbitrary strings; they can't contain '\0'.
45 * [See hsh.c for a more general symbol flavour.]
46 * Each symbol is associated with a char*, which can point to anything
47 * you want, allowing an arbitrary property list for each symbol.
49 * The basic operations are:
51 * new creates symbol table, returns handle
52 * find (symbol) returns char*
53 * insert (symbol,char*) error if symbol already in table
54 * delete (symbol) returns char* if symbol was in table
55 * apply so you can delete all symbols before die()
56 * die destroy symbol table (free up memory)
58 * Supplementary functions include:
60 * say how big? what % full?
61 * replace (symbol,newval) report previous value
62 * jam (symbol,value) assert symbol:=value
64 * You, the caller, have control over errors: this just reports them.
66 * This package requires malloc(), free().
67 * Malloc(size) returns NULL or address of char[size].
68 * Free(address) frees same.
72 * The code and its structures are re-enterent.
73 * Before you do anything else, you must call hash_new() which will
74 * return the address of a hash-table-control-block (or NULL if there
75 * is not enough memory). You then use this address as a handle of the
76 * symbol table by passing it to all the other hash_...() functions.
77 * The only approved way to recover the memory used by the symbol table
78 * is to call hash_die() with the handle of the symbol table.
80 * Before you call hash_die() you normally delete anything pointed to
81 * by individual symbols. After hash_die() you can't use that symbol
84 * The char* you associate with a symbol may not be NULL (0) because
85 * NULL is returned whenever a symbol is not in the table. Any other
86 * value is OK, except DELETED, #defined below.
88 * When you supply a symbol string for insertion, YOU MUST PRESERVE THE
89 * STRING until that symbol is deleted from the table. The reason is that
90 * only the address you supply, NOT the symbol string itself, is stored
91 * in the symbol table.
93 * You may delete and add symbols arbitrarily.
94 * Any or all symbols may have the same 'value' (char *). In fact, these
95 * routines don't do anything with your symbol values.
97 * You have no right to know where the symbol:char* mapping is stored,
98 * because it moves around in memory; also because we may change how it
99 * works and we don't want to break your code do we? However the handle
100 * (address of struct hash_control) is never changed in
101 * the life of the symbol table.
103 * What you CAN find out about a symbol table is:
104 * how many slots are in the hash table?
105 * how many slots are filled with symbols?
106 * (total hashes,collisions) for (reads,writes) (*)
107 * All of the above values vary in time.
108 * (*) some of these numbers will not be meaningful if we change the
115 * Hash table is an array of hash_entries; each entry is a pointer to a
116 * a string and a user-supplied value 1 char* wide.
118 * The array always has 2 ** n elements, n>0, n integer.
119 * There is also a 'wall' entry after the array, which is always empty
120 * and acts as a sentinel to stop running off the end of the array.
121 * When the array gets too full, we create a new array twice as large
122 * and re-hash the symbols into the new array, then forget the old array.
123 * (Of course, we copy the values into the new array before we junk the
132 #define TRUE (!FALSE)
133 #endif /* no FALSE yet */
136 #define min(a, b) ((a) < (b) ? (a) : (b))
140 #define error as_fatal
142 #define DELETED ((char *)1) /* guarenteed invalid address */
143 #define START_POWER (11) /* power of two: size of new hash table *//* JF was 6 */
144 /* JF These next two aren't used any more. */
145 /* #define START_SIZE (64) / * 2 ** START_POWER */
146 /* #define START_FULL (32) / * number of entries before table expands */
147 #define islive(ptr) (ptr->hash_string && ptr->hash_string != DELETED)
148 /* above TRUE if a symbol is in entry @ ptr */
150 #define STAT_SIZE (0) /* number of slots in hash table */
151 /* the wall does not count here */
152 /* we expect this is always a power of 2 */
153 #define STAT_ACCESS (1) /* number of hash_ask()s */
154 #define STAT__READ (0) /* reading */
155 #define STAT__WRITE (1) /* writing */
156 #define STAT_COLLIDE (3) /* number of collisions (total) */
157 /* this may exceed STAT_ACCESS if we have */
158 /* lots of collisions/access */
159 #define STAT_USED (5) /* slots used right now */
160 #define STATLENGTH (6) /* size of statistics block */
161 #if STATLENGTH != HASH_STATLENGTH
162 Panic! Please make #include "stat.h" agree with previous definitions!
165 /* #define SUSPECT to do runtime checks */
166 /* #define TEST to be a test jig for hash...() */
168 #ifdef TEST /* TEST: use smaller hash table */
170 #define START_POWER (3)
172 #define START_SIZE (8)
174 #define START_FULL (4)
177 /*------------------ plan ---------------------------------- i = internal
179 struct hash_control * c;
180 struct hash_entry * e; i
181 int b[z]; buffer for statistics
183 char * s; symbol string (address) [ key ]
184 char * v; value string (address) [datum]
185 boolean f; TRUE if we found s in hash table i
186 char * t; error string; "" means OK
187 int a; access type [0...n) i
189 c=hash_new () create new hash_control
191 hash_die (c) destroy hash_control (and hash table)
192 table should be empty.
193 doesn't check if table is empty.
194 c has no meaning after this.
196 hash_say (c,b,z) report statistics of hash_control.
197 also report number of available statistics.
199 v=hash_delete (c,s) delete symbol, return old value if any.
200 ask() NULL means no old value.
203 v=hash_replace (c,s,v) replace old value of s with v.
204 ask() NULL means no old value: no table change.
207 t=hash_insert (c,s,v) insert (s,v) in c.
208 ask() return error string.
209 f it is an error to insert if s is already
211 if any error, c is unchanged.
213 t=hash_jam (c,s,v) assert that new value of s will be v. i
214 ask() it may decide to GROW the table. i
217 t=hash_grow (c) grow the hash table. i
218 jam() will invoke JAM. i
220 ?=hash_apply (c,y) apply y() to every symbol in c.
221 y evtries visited in 'unspecified' order.
223 v=hash_find (c,s) return value of s, or NULL if s not in c.
227 f,e=hash_ask() (c,s,a) return slot where s SHOULD live. i
228 code() maintain collision stats in c. i
230 .=hash_code (c,s) compute hash-code for s, i
231 from parameters of c. i
235 static char hash_found; /* returned by hash_ask() to stop extra */
236 /* testing. hash_ask() wants to return both */
237 /* a slot and a status. This is the status. */
238 /* TRUE: found symbol */
239 /* FALSE: absent: empty or deleted slot */
240 /* Also returned by hash_jam(). */
241 /* TRUE: we replaced a value */
242 /* FALSE: we inserted a value */
244 static struct hash_entry * hash_ask();
245 static int hash_code ();
246 static char * hash_grow();
249 * h a s h _ n e w ( )
252 struct hash_control *
253 hash_new() /* create a new hash table */
254 /* return NULL if failed */
255 /* return handle (address of struct hash) */
257 register struct hash_control * retval;
258 register struct hash_entry * room; /* points to hash table */
259 register struct hash_entry * wall;
260 register struct hash_entry * entry;
261 register int * ip; /* scan stats block of struct hash_control */
262 register int * nd; /* limit of stats block */
264 if (( room = (struct hash_entry *) malloc( sizeof(struct
265 hash_entry)*((1<<START_POWER) + 1) ) ) != NULL)
266 /* +1 for the wall entry */
268 if (( retval = (struct hash_control *) malloc(sizeof(struct
269 hash_control)) ) != NULL)
271 nd = retval->hash_stat + STATLENGTH;
272 for (ip=retval->hash_stat; ip<nd; ip++)
277 retval->hash_stat[STAT_SIZE] = 1<<START_POWER;
278 retval->hash_mask = (1<<START_POWER) - 1;
279 retval->hash_sizelog = START_POWER;
280 /* works for 1's compl ok */
281 retval->hash_where = room;
283 wall = room + (1<<START_POWER);
284 retval->hash_full = (1<<START_POWER)/2;
285 for (entry=room; entry <= wall; entry++)
287 entry->hash_string = NULL;
293 retval = NULL; /* no room for table: fake a failure */
295 return(retval); /* return NULL or set-up structs */
299 * h a s h _ d i e ( )
301 * Table should be empty, but this is not checked.
302 * To empty the table, try hash_apply()ing a symbol deleter.
303 * Return to free memory both the hash table and it's control
305 * 'handle' has no meaning after this function.
306 * No errors are recoverable.
310 struct hash_control * handle;
312 free((char *)handle->hash_where);
313 free((char *)handle);
317 * h a s h _ s a y ( )
319 * Return the size of the statistics table, and as many statistics as
320 * we can until either (a) we have run out of statistics or (b) caller
321 * has run out of buffer.
322 * NOTE: hash_say treats all statistics alike.
323 * These numbers may change with time, due to insertions, deletions
324 * and expansions of the table.
325 * The first "statistic" returned is the length of hash_stat[].
326 * Then contents of hash_stat[] are read out (in ascending order)
327 * until your buffer or hash_stat[] is exausted.
330 hash_say(handle,buffer,bufsiz)
331 register struct hash_control * handle;
332 register int buffer[/*bufsiz*/];
335 register int * nd; /* limit of statistics block */
336 register int * ip; /* scan statistics */
338 ip = handle->hash_stat;
339 nd = ip + min(bufsiz-1,STATLENGTH);
340 if (bufsiz>0) /* trust nothing! bufsiz <= 0 is dangerous */
342 *buffer++ = STATLENGTH;
343 for (; ip<nd; ip++,buffer++)
351 * h a s h _ d e l e t e ( )
353 * Try to delete a symbol from the table.
354 * If it was there, return its value (and adjust STAT_USED).
355 * Otherwise, return NULL.
356 * Anyway, the symbol is not present after this function.
359 char * /* NULL if string not in table, else */
360 /* returns value of deleted symbol */
361 hash_delete(handle,string)
362 register struct hash_control * handle;
363 register char * string;
365 register char * retval; /* NULL if string not in table */
366 register struct hash_entry * entry; /* NULL or entry of this symbol */
368 entry = hash_ask(handle,string,STAT__WRITE);
371 retval = entry->hash_value;
372 entry->hash_string = DELETED; /* mark as deleted */
373 handle->hash_stat[STAT_USED] -= 1; /* slots-in-use count */
375 if (handle->hash_stat[STAT_USED]<0)
377 error("hash_delete");
379 #endif /* def SUSPECT */
389 * h a s h _ r e p l a c e ( )
391 * Try to replace the old value of a symbol with a new value.
392 * Normally return the old value.
393 * Return NULL and don't change the table if the symbol is not already
397 hash_replace(handle,string,value)
398 register struct hash_control * handle;
399 register char * string;
400 register char * value;
402 register struct hash_entry * entry;
403 register char * retval;
405 entry = hash_ask(handle,string,STAT__WRITE);
408 retval = entry->hash_value;
409 entry->hash_value = value;
420 * h a s h _ i n s e r t ( )
422 * Insert a (symbol-string, value) into the hash table.
423 * Return an error string, "" means OK.
424 * It is an 'error' to insert an existing symbol.
427 char * /* return error string */
428 hash_insert(handle,string,value)
429 register struct hash_control * handle;
430 register char * string;
431 register char * value;
433 register struct hash_entry * entry;
434 register char * retval;
437 if (handle->hash_stat[STAT_USED] > handle->hash_full)
439 retval = hash_grow(handle);
443 entry = hash_ask(handle,string,STAT__WRITE);
450 entry->hash_value = value;
451 entry->hash_string = string;
452 handle->hash_stat[STAT_USED] += 1;
459 * h a s h _ j a m ( )
461 * Regardless of what was in the symbol table before, after hash_jam()
462 * the named symbol has the given value. The symbol is either inserted or
463 * (its value is) relpaced.
464 * An error message string is returned, "" means OK.
466 * WARNING: this may decide to grow the hashed symbol table.
467 * To do this, we call hash_grow(), WHICH WILL recursively CALL US.
469 * We report status internally: hash_found is TRUE if we replaced, but
470 * false if we inserted.
473 hash_jam(handle,string,value)
474 register struct hash_control * handle;
475 register char * string;
476 register char * value;
478 register char * retval;
479 register struct hash_entry * entry;
482 if (handle->hash_stat[STAT_USED] > handle->hash_full)
484 retval = hash_grow(handle);
488 entry = hash_ask(handle,string,STAT__WRITE);
491 entry->hash_string = string;
492 handle->hash_stat[STAT_USED] += 1;
494 entry->hash_value = value;
500 * h a s h _ g r o w ( )
502 * Grow a new (bigger) hash table from the old one.
503 * We choose to double the hash table's size.
504 * Return a human-scrutible error string: "" if OK.
505 * Warning! This uses hash_jam(), which had better not recurse
506 * back here! Hash_jam() conditionally calls us, but we ALWAYS
511 hash_grow(handle) /* make a hash table grow */
512 struct hash_control * handle;
514 register struct hash_entry * newwall;
515 register struct hash_entry * newwhere;
516 struct hash_entry * newtrack;
517 register struct hash_entry * oldtrack;
518 register struct hash_entry * oldwhere;
519 register struct hash_entry * oldwall;
529 * capture info about old hash table
531 oldwhere = handle->hash_where;
532 oldwall = handle->hash_wall;
534 oldused = handle->hash_stat[STAT_USED];
537 * attempt to get enough room for a hash table twice as big
539 temp = handle->hash_stat[STAT_SIZE];
540 if (( newwhere = (struct hash_entry *)
541 xmalloc((long)((temp+temp+1)*sizeof(struct hash_entry)))) != NULL)
542 /* +1 for wall slot */
544 retval = ""; /* assume success until proven otherwise */
546 * have enough room: now we do all the work.
547 * double the size of everything in handle,
548 * note: hash_mask frob works for 1's & for 2's complement machines
550 handle->hash_mask = handle->hash_mask + handle->hash_mask + 1;
551 handle->hash_stat[STAT_SIZE] <<= 1;
552 newsize = handle->hash_stat[STAT_SIZE];
553 handle->hash_where = newwhere;
554 handle->hash_full <<= 1;
555 handle->hash_sizelog += 1;
556 handle->hash_stat[STAT_USED] = 0;
558 newwall = newwhere + newsize;
560 * set all those pesky new slots to vacant.
562 for (newtrack=newwhere; newtrack <= newwall; newtrack++)
564 newtrack->hash_string = NULL;
567 * we will do a scan of the old table, the hard way, using the
568 * new control block to re-insert the data into new hash table.
570 handle->hash_stat[STAT_USED] = 0; /* inserts will bump it up to correct */
571 for (oldtrack=oldwhere; oldtrack < oldwall; oldtrack++)
573 if (((string = oldtrack->hash_string) != NULL) && string != DELETED)
575 if ( * (retval = hash_jam(handle,string,oldtrack->hash_value) ) )
582 if ( !*retval && handle->hash_stat[STAT_USED] != oldused)
584 retval = "hash_used";
590 * we have a completely faked up control block.
591 * return the old hash table.
593 free((char *)oldwhere);
595 * Here with success. retval is already "".
607 * h a s h _ a p p l y ( )
609 * Use this to scan each entry in symbol table.
610 * For each symbol, this calls (applys) a nominated function supplying the
611 * symbol's value (and the symbol's name).
612 * The idea is you use this to destroy whatever is associted with
613 * any values in the table BEFORE you destroy the table with hash_die.
614 * Of course, you can use it for other jobs; whenever you need to
615 * visit all extant symbols in the table.
617 * We choose to have a call-you-back idea for two reasons:
618 * asthetic: it is a neater idea to use apply than an explicit loop
619 * sensible: if we ever had to grow the symbol table (due to insertions)
620 * then we would lose our place in the table when we re-hashed
621 * symbols into the new table in a different order.
623 * The order symbols are visited depends entirely on the hashing function.
624 * Whenever you insert a (symbol, value) you risk expanding the table. If
625 * you do expand the table, then the hashing function WILL change, so you
626 * MIGHT get a different order of symbols visited. In other words, if you
627 * want the same order of visiting symbols as the last time you used
628 * hash_apply() then you better not have done any hash_insert()s or
629 * hash_jam()s since the last time you used hash_apply().
631 * In future we may use the value returned by your nominated function.
632 * One idea is to abort the scan if, after applying the function to a
633 * certain node, the function returns a certain code.
634 * To be safe, please make your functions of type char *. If you always
635 * return NULL, then the scan will complete, visiting every symbol in
636 * the table exactly once. ALL OTHER RETURNED VALUES have no meaning yet!
639 * The function you supply should be of the form:
640 * char * myfunct(string,value)
641 * char * string; |* the symbol's name *|
642 * char * value; |* the symbol's value *|
648 * The returned value of hash_apply() is (char*)NULL. In future it may return
649 * other values. NULL means "completed scan OK". Other values have no meaning
650 * yet. (The function has no graceful failures.)
653 hash_apply(handle,function)
654 struct hash_control * handle;
657 register struct hash_entry * entry;
658 register struct hash_entry * wall;
660 wall = handle->hash_wall;
661 for (entry = handle->hash_where; entry < wall; entry++)
663 if (islive(entry)) /* silly code: tests entry->string twice! */
665 (*function)(entry->hash_string,entry->hash_value);
672 * h a s h _ f i n d ( )
674 * Given symbol string, find value (if any).
675 * Return found value or NULL.
678 hash_find(handle,string) /* return char* or NULL */
679 struct hash_control * handle;
682 register struct hash_entry * entry;
683 register char * retval;
685 entry = hash_ask(handle,string,STAT__READ);
688 retval = entry->hash_value;
698 * h a s h _ a s k ( )
700 * Searches for given symbol string.
701 * Return the slot where it OUGHT to live. It may be there.
702 * Return hash_found: TRUE only if symbol is in that slot.
703 * Access argument is to help keep statistics in control block.
706 static struct hash_entry * /* string slot, may be empty or deleted */
707 hash_ask(handle,string,access)
708 struct hash_control * handle;
710 int access; /* access type */
712 register char *string1; /* JF avoid strcmp calls */
715 register struct hash_entry * slot;
716 register int collision; /* count collisions */
718 slot = handle->hash_where + hash_code(handle,string); /* start looking here */
719 handle->hash_stat[STAT_ACCESS+access] += 1;
722 while (((s = slot->hash_string) != NULL) && s != DELETED)
724 for (string1=string;;) {
725 if ((c= *s++) == 0) {
740 * in use: we found string slot
742 * at wall: we fell off: wrap round ????
743 * in table: dig here slot
744 * at DELETED: dig here slot
746 if (slot == handle->hash_wall)
748 slot = handle->hash_where; /* now look again */
749 while (((s = slot->hash_string) != NULL) && s != DELETED)
751 for (string1=string;*s;string1++,s++) {
755 if (*s == *string1) {
764 * in use: we found it slot
765 * empty: wall: ERROR IMPOSSIBLE !!!!
766 * in table: dig here slot
767 * DELETED:dig here slot
770 /* fprintf(stderr,"hash_ask(%s)->%d(%d)\n",string,hash_code(handle,string),collision); */
771 handle->hash_stat[STAT_COLLIDE+access] += collision;
772 return(slot); /* also return hash_found */
778 * Does hashing of symbol string to hash number.
782 hash_code(handle,string)
783 struct hash_control * handle;
784 register char * string;
786 register long h; /* hash code built here */
787 register long c; /* each character lands here */
788 register int n; /* Amount to shift h by */
790 n = (handle->hash_sizelog - 3);
792 while ((c = *string++) != 0)
795 h = (h<<3) + (h>>n) + c;
797 return (h & handle->hash_mask);
801 * Here is a test program to exercise above.
805 #define TABLES (6) /* number of hash tables to maintain */
806 /* (at once) in any testing */
807 #define STATBUFSIZE (12) /* we can have 12 statistics */
809 int statbuf[STATBUFSIZE]; /* display statistics here */
810 char answer[100]; /* human farts here */
811 char * hashtable[TABLES]; /* we test many hash tables at once */
812 char * h; /* points to curent hash_control */
820 int number; /* number 0:TABLES-1 of current hashed */
825 char (*applicatee());
829 struct hash_control * hash_new();
830 char * hash_replace();
835 printf("type h <RETURN> for help\n");
838 printf("hash_test command: ");
839 fgets(answer, 100, stdin);
841 if (isupper(command)) command = tolower(command); /* ecch! */
845 printf("old hash table #=%d.\n",number);
849 for (pp=hashtable; pp<hashtable+TABLES; pp++)
851 printf("address of hash table #%d control block is %xx\n"
856 hash_apply(h,applicatee);
859 hash_apply(h,destroy);
863 p = hash_find(h,name=what("symbol"));
864 printf("value of \"%s\" is \"%s\"\n",name,p?p:"NOT-PRESENT");
867 printf("# show old, select new default hash table number\n");
868 printf("? display all hashtable control block addresses\n");
869 printf("a apply a simple display-er to each symbol in table\n");
870 printf("d die: destroy hashtable\n");
871 printf("f find value of nominated symbol\n");
872 printf("h this help\n");
873 printf("i insert value into symbol\n");
874 printf("j jam value into symbol\n");
875 printf("n new hashtable\n");
876 printf("r replace a value with another\n");
877 printf("s say what %% of table is used\n");
878 printf("q exit this program\n");
879 printf("x delete a symbol from table, report its value\n");
882 p = hash_insert(h,name=what("symbol"),value=what("value"));
885 printf("symbol=\"%s\" value=\"%s\" error=%s\n",name,value,p);
889 p = hash_jam(h,name=what("symbol"),value=what("value"));
892 printf("symbol=\"%s\" value=\"%s\" error=%s\n",name,value,p);
896 h = hashtable[number] = (char *) hash_new();
901 p = hash_replace(h,name=what("symbol"),value=what("value"));
902 printf("old value was \"%s\"\n",p?p:"{}");
905 hash_say(h,statbuf,STATBUFSIZE);
906 for (ip=statbuf; ip<statbuf+STATBUFSIZE; ip++)
913 p = hash_delete(h,name=what("symbol"));
914 printf("old value was \"%s\"\n",p?p:"{}");
917 printf("I can't understand command \"%c\"\n",command);
930 printf(" %s : ",description);
931 fgets(answer, 100, stdin);
932 /* will one day clean up answer here */
933 retval = malloc(strlen(answer)+1);
938 (void)strcpy(retval,answer);
943 destroy(string,value)
954 applicatee(string,value)
958 printf("%.20s-%.20s\n",string,value);
962 whattable() /* determine number: what hash table to use */
963 /* also determine h: points to hash_control */
968 printf(" what hash table (%d:%d) ? ",0,TABLES-1);
969 fgets(answer, 100, stdin);
970 sscanf(answer,"%d",&number);
971 if (number >= 0 && number<TABLES)
973 h = hashtable[number];
976 printf("warning: current hash-table-#%d. has no hash-control\n",number);
982 printf("invalid hash table number: %d\n",number);
989 #endif /* #ifdef TEST */