2 * STRTABLE.C - String table routines
4 * (c)Copyright 1993-2014, Matthew Dillon, All Rights Reserved. See the
5 * COPYRIGHT file at the base of the distribution.
10 #define STR_MAGIC ((int)0xA5F3B211)
11 #define STR_HSIZE 16384
12 #define STR_HMASK (STR_HSIZE - 1)
14 typedef struct StrTable {
15 struct StrTable *st_Next;
17 int st_DataLen; /* does not including terminator */
20 int st_Enumerate; /* Enumeration helper for code gen */
22 char st_Data[0]; /* embedded binary or C string */
25 StrTable *StrTableHash[STR_HSIZE];
28 strhash(const char *str, int bytes)
32 hv = (hv << 5) ^ *str ^ (hv >> 23);
36 return((hv ^ (hv >> 16)) & STR_HMASK);
40 * Obtain a persistent shared string representing the passed binary data.
41 * Multple references to the same string will return the same shared pointer,
42 * a feature we depend on heavily in the rest of the codebase in order to
43 * avoid comparing strings over and over again.
45 * This code is content-agnostic and may be used for binary data.
47 * The caller has the choice of including the terminating zero for strings
48 * in the official length or not. This code always adds a terminating zero
49 * 'out of band' to make string handling easier.
52 StrTableAlloc(const char *ptr, int len, int special)
56 pst = &StrTableHash[strhash(ptr, len)];
57 while ((st = *pst) != NULL) {
59 * Look for match, bump ref count and return if found. Note
60 * that we can reuse a previously dereferenced but
61 * not-yet-freed string.
63 if (len == st->st_DataLen &&
64 bcmp(ptr, st->st_Data, len) == 0) {
66 st->st_Special = special;
73 if (st->st_Refs == 0) {
76 st->st_Magic = 0xDEADDEED;
77 zfree(st, sizeof(StrTable) + st->st_DataLen + 1);
82 st = zalloc(sizeof(StrTable) + len + 1);
84 st->st_Magic = STR_MAGIC;
87 st->st_Special = special;
88 bcopy(ptr, (char *)&st->st_Data[0], len);
89 ((char *)st->st_Data)[len] = 0; /* out of band terminator */
94 * Convert the string represented by the token into a string table string.
97 StrTableToken(token_t *tok)
99 return(StrTableAlloc(tok->t_Ptr, tok->t_Len, 0));
103 StrCmpToken(string_t id, token_t *tok)
105 int len = strlen(id);
109 n = (len > tok->t_Len) ? tok->t_Len : len;
110 r = strncmp(id, tok->t_Ptr, n);
112 if (len > tok->t_Len)
114 else if (len < tok->t_Len)
125 snprintf(buf, sizeof(buf), "%d", v);
126 return(StrTableAlloc(buf, strlen(buf), 0));
130 * Bump the ref count on a string that is already in the string table.
133 StrTableDup(const char *str)
135 StrTable *st = (StrTable *)(__DECONST(char *,str) -
136 offsetof(StrTable, st_Data[0]));
138 dassert(st->st_Magic == STR_MAGIC);
139 dassert(st->st_Refs > 0);
145 ReplaceStrTable(string_t *pstr, string_t id)
153 * Release a string in the string table. We do not free the structure
154 * immediately, allowing it to be potentially reused. The structure will
155 * be freed when a later StrTableRef() encounters it during a hash scan.
158 RelStrTable(string_t *pstr)
162 if ((str = *pstr) != NULL) {
166 st = (StrTable *)(__DECONST(char *, str) -
167 offsetof(StrTable, st_Data[0]));
168 dassert(st->st_Magic == STR_MAGIC);
169 dassert(st->st_Refs > 0);
171 /* do in-line free in later allocation */
176 StrTableSpecial(string_t id)
181 st = (StrTable *)(__DECONST(char *, id) -
182 offsetof(StrTable, st_Data[0]));
183 dassert(st->st_Magic == STR_MAGIC);
184 return(st->st_Special);
190 * Generate a unique id for this string table entry. Typically stored
191 * in the d_BackendId field of a declaration.
194 StrTableEnumerate(string_t id)
198 st = (StrTable *)(__DECONST(char *, id) -
199 offsetof(StrTable, st_Data[0]));
202 return st->st_Enumerate;
206 * Return the length, in bytes, of an identifier. The length does not
207 * include the terminating zero unless the original allocation included
211 StrTableLen(string_t id)
215 st = (StrTable *)(__DECONST(char *, id) -
216 offsetof(StrTable, st_Data[0]));
217 dassert(st->st_Magic == STR_MAGIC);
218 return(st->st_DataLen);
222 * Strip the identifier from the trailing element of the path. This routine
223 * takes a nul-terminated string and returns a pointer and length within
227 StripPathId(const char *path, int *plen)
229 int len = strlen(path);
232 * strip any trailing slashes
234 while (len > 0 && path[len - 1] == '/')
238 * Locate the last element
240 while (len > 0 && path[len - 1] != '/')
245 * Parse the identifier until we hit a '/' or a '.' (extension)
248 while (path[len] && path[len] != '/' && path[len] != '.')
255 SimpleIntToken(token_t *tok, int *rv)
260 if (t != TOK_INTEGER) {
261 t = LexError(tok, TOK_ERR_EXPECTED_SIMPLE_INTEGER);
266 for (i = 0; i < tok->t_Len; ++i) {
267 char c = tok->t_Ptr[i];
270 } else if (c >= '0' && c <= '9') {
271 *rv = *rv * 10 + (c - '0');
274 TOK_ERR_EXPECTED_SIMPLE_INTEGER);
283 SimpleRunesizeToken(token_t *tok, runesize_t *rv)
288 if (t != TOK_INTEGER) {
289 t = LexError(tok, TOK_ERR_EXPECTED_SIMPLE_INTEGER);
294 for (i = 0; i < tok->t_Len; ++i) {
295 char c = tok->t_Ptr[i];
298 } else if (c >= '0' && c <= '9') {
299 *rv = *rv * 10 + (c - '0');
302 TOK_ERR_EXPECTED_SIMPLE_INTEGER);
312 * StrTableEscapeQuotedString() - convert quoted string into actual string.
314 * NOTE: The length of the string in the string table includes the
315 * terminator. The string may also be concatenated.
317 * NOTE: the lexical analyzer does a more robust format check.
322 if (c >= '0' && c <= '9')
324 if (c >= 'a' && c <= 'f')
325 return(c + (10 - 'a'));
326 if (c >= 'A' && c <= 'F')
327 return(c + (10 - 'A'));
332 StrTableEscapeQuotedString(const char *str, int len, int term)
347 for (i = 0; i < len; ++i) {
351 searching = 1 - searching;
353 } else if (searching) {
360 switch((c = str[i])) {
373 c = (hexDigit(str[i-1]) << 4) |
379 dassert(c >= '0' && c <= '7');
386 c = (c << 3) | (str[i] & 7);
392 c = (c << 3) | (str[i] & 7);
403 dbuf = malloc(rlen + 1);
407 dassert(term == 0 || term == 1);
408 id = StrTableAlloc(dbuf, rlen + term, 0);