/* * STRTABLE.C - String table routines * * (c)Copyright 1993-2014, Matthew Dillon, All Rights Reserved. See the * COPYRIGHT file at the base of the distribution. */ #include "defs.h" #define STR_MAGIC ((int)0xA5F3B211) #define STR_HSIZE 16384 #define STR_HMASK (STR_HSIZE - 1) typedef struct StrTable { struct StrTable *st_Next; int st_Magic; int st_DataLen; /* does not including terminator */ int st_Refs; int st_Special; int st_Enumerate; /* Enumeration helper for code gen */ int st_Unused01; void *st_Info; /* (used for run-time) */ char st_Data[0]; /* embedded binary or C string */ } StrTable; static StrTable *StrTableHash[STR_HSIZE]; static __inline int strhash(const char *str, int bytes) { int hv = 0xA3F2B364; while (bytes) { hv = (hv << 5) ^ *str ^ (hv >> 23); --bytes; ++str; } return((hv ^ (hv >> 16)) & STR_HMASK); } /* * Obtain a persistent shared string representing the passed binary data. * Multple references to the same string will return the same shared pointer, * a feature we depend on heavily in the rest of the codebase in order to * avoid comparing strings over and over again. * * This code is content-agnostic and may be used for binary data. * * The caller has the choice of including the terminating zero for strings * in the official length or not. This code always adds a terminating zero * 'out of band' to make string handling easier. */ string_t StrTableAlloc(const char *ptr, int len, int special) { StrTable **pst, *st; pst = &StrTableHash[strhash(ptr, len)]; while ((st = *pst) != NULL) { /* * Look for match, bump ref count and return if found. Note * that we can reuse a previously dereferenced but * not-yet-freed string. */ if (len == st->st_DataLen && bcmp(ptr, st->st_Data, len) == 0) { if (special) st->st_Special = special; ++st->st_Refs; return(st->st_Data); } /* * in-scan free */ if (st->st_Refs == 0) { *pst = st->st_Next; st->st_Next = NULL; st->st_Magic = 0xDEADDEED; zfree(st, sizeof(StrTable) + st->st_DataLen + 1); } else { pst = &st->st_Next; } } st = zalloc(sizeof(StrTable) + len + 1); *pst = st; st->st_Magic = STR_MAGIC; st->st_DataLen = len; st->st_Refs = 1; st->st_Special = special; bcopy(ptr, (char *)&st->st_Data[0], len); ((char *)st->st_Data)[len] = 0; /* out of band terminator */ return(st->st_Data); } /* * Convert the string represented by the token into a string table string. */ string_t StrTableToken(token_t *tok) { return(StrTableAlloc(tok->t_Ptr, tok->t_Len, 0)); } int StrCmpToken(string_t id, token_t *tok) { int len = strlen(id); int n; int r; n = (len > tok->t_Len) ? tok->t_Len : len; r = strncmp(id, tok->t_Ptr, n); if (r == 0) { if (len > tok->t_Len) r = 1; else if (len < tok->t_Len) r = -1; } return(r); } string_t StrTableInt(int v) { char buf[32]; snprintf(buf, sizeof(buf), "%d", v); return(StrTableAlloc(buf, strlen(buf), 0)); } /* * Bump the ref count on a string that is already in the string table. */ string_t StrTableDup(const char *str) { StrTable *st = (StrTable *)(__DECONST(char *,str) - offsetof(StrTable, st_Data[0])); dassert(st->st_Magic == STR_MAGIC); dassert(st->st_Refs > 0); ++st->st_Refs; return(str); } void ReplaceStrTable(string_t *pstr, string_t id) { if (*pstr) RelStrTable(pstr); *pstr = id; } /* * Release a string in the string table. We do not free the structure * immediately, allowing it to be potentially reused. The structure will * be freed when a later StrTableRef() encounters it during a hash scan. */ void RelStrTable(string_t *pstr) { string_t str; if ((str = *pstr) != NULL) { StrTable *st; *pstr = NULL; st = (StrTable *)(__DECONST(char *, str) - offsetof(StrTable, st_Data[0])); dassert(st->st_Magic == STR_MAGIC); dassert(st->st_Refs > 0); --st->st_Refs; /* do in-line free in later allocation */ } } int StrTableSpecial(string_t id) { if (id) { StrTable *st; st = (StrTable *)(__DECONST(char *, id) - offsetof(StrTable, st_Data[0])); dassert(st->st_Magic == STR_MAGIC); return(st->st_Special); } return(0); } /* * Generate a unique id for this string table entry. Typically stored * in the d_BackendId field of a declaration. */ int StrTableEnumerate(string_t id) { StrTable *st; st = (StrTable *)(__DECONST(char *, id) - offsetof(StrTable, st_Data[0])); ++st->st_Enumerate; return st->st_Enumerate; } /* * Return the length, in bytes, of an identifier. The length does not * include the terminating zero unless the original allocation included * it in its length. */ int StrTableLen(string_t id) { StrTable *st; st = (StrTable *)(__DECONST(char *, id) - offsetof(StrTable, st_Data[0])); dassert(st->st_Magic == STR_MAGIC); return(st->st_DataLen); } /* * Strip the identifier from the trailing element of the path. This routine * takes a nul-terminated string and returns a pointer and length within * the same string. */ const char * StripPathId(const char *path, int *plen) { int len = strlen(path); /* * strip any trailing slashes */ while (len > 0 && path[len - 1] == '/') --len; /* * Locate the last element */ while (len > 0 && path[len - 1] != '/') --len; path += len; /* * Parse the identifier until we hit a '/' or a '.' (extension) */ len = 0; while (path[len] && path[len] != '/' && path[len] != '.') ++len; *plen = len; return(path); } int SimpleIntToken(token_t *tok, int *rv) { int t = tok->t_Type; *rv = 0; if (t != TOK_INTEGER) { t = LexError(tok, TOK_ERR_EXPECTED_SIMPLE_INTEGER); } else { int i; int neg = 1; for (i = 0; i < tok->t_Len; ++i) { char c = tok->t_Ptr[i]; if (c == '-') { neg = -neg; } else if (c >= '0' && c <= '9') { *rv = *rv * 10 + (c - '0'); } else { t = LexError(tok, TOK_ERR_EXPECTED_SIMPLE_INTEGER); } } *rv = *rv * neg; } return(t); } int SimpleRunesizeToken(token_t *tok, runesize_t *rv) { int t = tok->t_Type; *rv = 0; if (t != TOK_INTEGER) { t = LexError(tok, TOK_ERR_EXPECTED_SIMPLE_INTEGER); } else { int i; int neg = 1; for (i = 0; i < tok->t_Len; ++i) { char c = tok->t_Ptr[i]; if (c == '-') { neg = -neg; } else if (c >= '0' && c <= '9') { *rv = *rv * 10 + (c - '0'); } else { t = LexError(tok, TOK_ERR_EXPECTED_SIMPLE_INTEGER); } } if (neg < 0) *rv = -*rv; } return(t); } /* * StrTableEscapeQuotedString() - convert quoted string into actual string. * * NOTE: The length of the string in the string table includes the * terminator. The string may also be concatenated. * * NOTE: the lexical analyzer does a more robust format check. */ static __inline int hexDigit(char c) { if (c >= '0' && c <= '9') return(c - '0'); if (c >= 'a' && c <= 'f') return(c + (10 - 'a')); if (c >= 'A' && c <= 'F') return(c + (10 - 'A')); return(-1); } string_t StrTableEscapeQuotedString(const char *str, int len, int term) { int rlen; int i; int searching; char *dbuf = NULL; char btype; char etype; string_t id; dassert(len >= 2); btype = len ? str[0] : 0; etype = btype; if (btype == '<') etype = '>'; loop: searching = 1; rlen = 0; for (i = 0; i < len; ++i) { char c = str[i]; if (searching) { if (c == btype) searching = 1 - searching; continue; } if (c == etype) { searching = 1 - searching; continue; } if (btype != TOK_BSTRING && c == '\\') { ++i; dassert(i < len); switch((c = str[i])) { case 'n': c = 10; break; case 't': c = 9; break; case 'r': c = 13; break; case 'x': i += 2; dassert(i < len); c = (hexDigit(str[i-1]) << 4) | hexDigit(str[i]); break; case '\\': break; default: dassert(c >= '0' && c <= '7'); c &= 7; if (i + 1 < len && str[i+1] >= '0' && str[i+1] <= '7') { ++i; c = (c << 3) | (str[i] & 7); if (i + 1 < len && str[i+1] >= '0' && str[i+1] <= '7') { ++i; c = (c << 3) | (str[i] & 7); } } break; } } if (dbuf) dbuf[rlen] = c; ++rlen; } if (dbuf == NULL) { dbuf = malloc(rlen + 1); goto loop; } dbuf[rlen] = 0; dassert(term == 0 || term == 1); id = StrTableAlloc(dbuf, rlen + term, 0); free(dbuf); return(id); }