/* * STRTABLE.C - String table routines * * (c)Copyright 1993-2014, Matthew Dillon, All Rights Reserved. See the * COPYRIGHT file at the base of the distribution. */ #include "defs.h" #define STR_MAGIC ((int)0xA5F3B211) #define STR_HSIZE 16384 #define STR_HMASK (STR_HSIZE - 1) typedef struct StrTable { struct StrTable *st_Next; int st_Magic; int st_DataLen; /* does not including terminator */ int st_Refs; int st_Special; int st_Enumerate; /* Enumeration helper for code gen */ int st_Unused01; void *st_Info; /* (used for run-time) */ char st_Data[0]; /* embedded binary or C string */ } StrTable; static StrTable * StrTableHash[STR_HSIZE]; static __inline int strhash(const char *str, int bytes) { int hv = 0xA3F2B364; while (bytes) { hv = (hv << 5) ^ *str ^ (hv >> 23); --bytes; ++str; } return ((hv ^ (hv >> 16)) & STR_HMASK); } /* * Obtain a persistent shared string representing the passed binary data. * Multple references to the same string will return the same shared pointer, * a feature we depend on heavily in the rest of the codebase in order to * avoid comparing strings over and over again. * * This code is content-agnostic and may be used for binary data. * * The caller has the choice of including the terminating zero for strings in * the official length or not. This code always adds a terminating zero 'out * of band' to make string handling easier. */ string_t StrTableAlloc(const char *ptr, int len, int special) { StrTable **pst, *st; pst = &StrTableHash[strhash(ptr, len)]; while ((st = *pst) != NULL) { /* * Look for match, bump ref count and return if found. Note that we * can reuse a previously dereferenced but not-yet-freed string. */ if (len == st->st_DataLen && bcmp(ptr, st->st_Data, len) == 0) { if (special) st->st_Special = special; ++st->st_Refs; return (st->st_Data); } /* * in-scan free */ if (st->st_Refs == 0) { *pst = st->st_Next; st->st_Next = NULL; st->st_Magic = 0xDEADDEED; zfree(st, sizeof(StrTable) + st->st_DataLen + 1); } else { pst = &st->st_Next; } } st = zalloc(sizeof(StrTable) + len + 1); *pst = st; st->st_Magic = STR_MAGIC; st->st_DataLen = len; st->st_Refs = 1; st->st_Special = special; bcopy(ptr, (char *)&st->st_Data[0], len); ((char *)st->st_Data)[len] = 0; /* out of band terminator */ return (st->st_Data); } /* * Convert the string represented by the token into a string table string. */ string_t StrTableToken(token_t *tok) { return (StrTableAlloc(tok->t_Ptr, tok->t_Len, 0)); } /* * Bump the ref count on a string that is already in the string table. */ string_t StrTableDup(const char *str) { StrTable *st = (StrTable *) (__DECONST(char *, str)- offsetof(StrTable, st_Data[0])); dassert(st->st_Magic == STR_MAGIC); dassert(st->st_Refs > 0); ++st->st_Refs; return (str); } void ReplaceStrTable(string_t *pstr, string_t id) { if (*pstr) RelStrTable(pstr); *pstr = id; } /* * Release a string in the string table. We do not free the structure * immediately, allowing it to be potentially reused. The structure will be * freed when a later StrTableRef() encounters it during a hash scan. */ void RelStrTable(string_t *pstr) { string_t str; if ((str = *pstr) != NULL) { StrTable *st; *pstr = NULL; st = (StrTable *) (__DECONST(char *, str)- offsetof(StrTable, st_Data[0])); dassert(st->st_Magic == STR_MAGIC); dassert(st->st_Refs > 0); --st->st_Refs; /* do in-line free in later allocation */ } } int StrTableSpecial(string_t id) { if (id) { StrTable *st; st = (StrTable *) (__DECONST(char *, id)- offsetof(StrTable, st_Data[0])); dassert(st->st_Magic == STR_MAGIC); return (st->st_Special); } return (0); } /* * Generate a unique id for this string table entry. Typically stored in the * d_BackendId field of a declaration. */ int StrTableEnumerate(string_t id) { StrTable *st; st = (StrTable *) (__DECONST(char *, id)- offsetof(StrTable, st_Data[0])); ++st->st_Enumerate; return st->st_Enumerate; } /* * Return the length, in bytes, of an identifier. The length does not * include the terminating zero unless the original allocation included it in * its length. */ int StrTableLen(string_t id) { StrTable *st; st = (StrTable *) (__DECONST(char *, id)- offsetof(StrTable, st_Data[0])); dassert(st->st_Magic == STR_MAGIC); return (st->st_DataLen); } /* * Strip the identifier from the trailing element of the path. This routine * takes a nul-terminated string and returns a pointer and length within the * same string. */ const char * StripPathId(const char *path, int *plen) { int len = strlen(path); /* * strip any trailing slashes */ while (len > 0 && path[len - 1] == '/') --len; /* * Locate the last element */ while (len > 0 && path[len - 1] != '/') --len; path += len; /* * Parse the identifier until we hit a '/' or a '.' (extension) */ len = 0; while (path[len] && path[len] != '/' && path[len] != '.') ++len; *plen = len; return (path); } int SimpleIntToken(token_t *tok, int *rv) { int t = tok->t_Type; *rv = 0; if (t != TOK_INTEGER) { t = LexError(tok, TOK_ERR_EXPECTED_SIMPLE_INTEGER); } else { int i; int neg = 1; for (i = 0; i < tok->t_Len; ++i) { char c = tok->t_Ptr[i]; if (c == '-') { neg = -neg; } else if (c >= '0' && c <= '9') { *rv = *rv * 10 + (c - '0'); } else { t = LexError(tok, TOK_ERR_EXPECTED_SIMPLE_INTEGER); } } *rv = *rv * neg; } return (t); } int SimpleRunesizeToken(token_t *tok, urunesize_t *rv) { int t = tok->t_Type; *rv = 0; if (t != TOK_INTEGER) { t = LexError(tok, TOK_ERR_EXPECTED_SIMPLE_INTEGER); } else { int i; int neg = 1; for (i = 0; i < tok->t_Len; ++i) { char c = tok->t_Ptr[i]; if (c == '-') { neg = -neg; } else if (c >= '0' && c <= '9') { *rv = *rv * 10 + (c - '0'); } else { t = LexError(tok, TOK_ERR_EXPECTED_SIMPLE_INTEGER); } } if (neg < 0) *rv = -*rv; } return (t); } /* * StrTableEscapeQuotedString() - convert quoted string into actual string. * * NOTE: The length of the string in the string table includes the * terminator. The string may also be concatenated. * * NOTE: the lexical analyzer does a more robust format check. */ static __inline int hexDigit(char c) { if (c >= '0' && c <= '9') return (c - '0'); if (c >= 'a' && c <= 'f') return (c + (10 - 'a')); if (c >= 'A' && c <= 'F') return (c + (10 - 'A')); return (-1); } static char * BufEscapeQuotedString(const char *str, int *lenp) { int rlen; int i; int searching; int len = *lenp; char *dbuf = NULL; char btype; char etype; dassert(len >= 2); btype = len ? str[0] : 0; etype = btype; if (btype == '<') etype = '>'; loop: searching = 1; rlen = 0; for (i = 0; i < len; ++i) { char c = str[i]; if (searching) { if (c == btype) searching = 1 - searching; continue; } if (c == etype) { searching = 1 - searching; continue; } if (btype != TOK_BSTRING && c == '\\') { ++i; dassert(i < len); switch ((c = str[i])) { case 'n': c = 10; break; case 't': c = 9; break; case 'r': c = 13; break; case 'x': i += 2; dassert(i < len); c = (hexDigit(str[i - 1]) << 4) | hexDigit(str[i]); break; case '\\': break; default: dassert(c >= '0' && c <= '7'); c &= 7; if (i + 1 < len && str[i + 1] >= '0' && str[i + 1] <= '7') { ++i; c = (c << 3) | (str[i] & 7); if (i + 1 < len && str[i + 1] >= '0' && str[i + 1] <= '7') { ++i; c = (c << 3) | (str[i] & 7); } } break; } } if (dbuf) dbuf[rlen] = c; ++rlen; } if (dbuf == NULL) { dbuf = malloc(rlen + 1); goto loop; } dbuf[rlen] = 0; *lenp = rlen; return (dbuf); } string_t StrTableEscapeQuotedString(const char *str, int len, int term) { string_t id; char *dbuf; dbuf = BufEscapeQuotedString(str, &len); dassert(term == 0 || term == 1); id = StrTableAlloc(dbuf, len + term, 0); free(dbuf); return (id); } uint64_t StrTableSingleQuotedString(const char *str, int len, Type **typep) { uint8_t *dbuf; uint64_t key; int i; dbuf = (uint8_t *)BufEscapeQuotedString(str, &len); key = 0; for (i = 0; i < len; ++i) key = (key << 8) | dbuf[i]; switch (len) { case 0: *typep = &UInt8Type; break; case 1: *typep = &UInt8Type; break; case 2: *typep = &UInt16Type; break; case 3: case 4: *typep = &UInt32Type; break; case 5: case 6: case 7: case 8: *typep = &UInt64Type; break; default: key = iscsi_crc32(dbuf, len) | 0x80000000U; *typep = &UInt32Type; break; } free(dbuf); return key; }