/* * LEX.C * * (c)Copyright 2016, Matthew Dillon, All Rights Reserved. See the * COPYRIGHT file at the base of the distribution. * * RAS Lexer */ #include "defs.h" #define S_WS 0x01 /* whitespace */ #define S_AL 0x02 /* alpha */ #define S_NU 0x04 /* number */ #define S_OP 0x08 /* single-character token */ #define S_SY 0x10 /* symbol prefix */ #define S_CM 0x20 /* comment */ #define S_DQ 0x40 /* string */ static uint8_t RasLexAry[256] = { /*00*/ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /*08*/ 0x00, S_WS, S_WS, 0x00, 0x00, S_WS, 0x00, 0x00, /*10*/ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /*18*/ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /*20*/ S_WS, 0x00, S_DQ, S_CM, S_OP, S_SY, 0x00, S_OP, /*28*/ S_OP, S_OP, 0x00, S_OP, S_OP, S_OP, S_OP, 0x00, /*30*/ S_NU, S_NU, S_NU, S_NU, S_NU, S_NU, S_NU, S_NU, /*38*/ S_NU, S_NU, S_OP, S_CM, 0x00, 0x00, 0x00, 0x00, /*40*/ S_SY, S_AL, S_AL, S_AL, S_AL, S_AL, S_AL, S_AL, /*48*/ S_AL, S_AL, S_AL, S_AL, S_AL, S_AL, S_AL, S_AL, /*50*/ S_AL, S_AL, S_AL, S_AL, S_AL, S_AL, S_AL, S_AL, /*58*/ S_AL, S_AL, S_AL, 0x00, 0x00, 0x00, 0x00, S_AL, /*60*/ 0x00, S_AL, S_AL, S_AL, S_AL, S_AL, S_AL, S_AL, /*68*/ S_AL, S_AL, S_AL, S_AL, S_AL, S_AL, S_AL, S_AL, /*70*/ S_AL, S_AL, S_AL, S_AL, S_AL, S_AL, S_AL, S_AL, /*78*/ S_AL, S_AL, S_AL, S_OP, 0x00, S_OP, 0x00, 0x00, /*80*/ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /*88*/ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /*90*/ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /*98*/ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /*A0*/ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /*A8*/ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /*B0*/ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /*B8*/ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /*C0*/ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /*C8*/ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /*D0*/ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /*D8*/ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /*E0*/ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /*E8*/ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /*F0*/ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /*F8*/ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; static void rasGetValue(const uint8_t *pbase, const uint8_t *pend, int opsize, int negate, int64_t *immlo, int64_t *immhi); static uint32_t rasGetRegister(RASParser *p, const uint8_t *pbase, const uint8_t *pend); /* * Open assembly file */ RASParser * RasOpen(const char *path) { RASParser *p = zalloc(sizeof(*p)); if (path) { p->fi = fopen(path, "r"); if (p->fi == NULL) { fprintf(stderr, "RAS: Unable to open %s\n", path); zfree(p, sizeof(*p)); return NULL; } p->ipath = strdup(path); } else { p->fi = stdin; } return p; } /* * Close assembly file and cleanup */ void RasClose(RASParser *p) { if (p->ipath) { free(p->ipath); fclose(p->fi); } else { p->fi = NULL; /* was stdin */ } if (p->opath) { free(p->opath); fclose(p->fo); } else { p->fo = NULL; /* was stdout */ } zfree(p, sizeof(*p)); } /* * Obtain the first/next line of the assembly file. Token parsing * will occur only within the line. * * This function always parses any initial label or symbol on the line * and returns it (or NULL) in *symp. * * Returns 0 on EOF, else non-zero. */ int RasLine(RASParser *p, rsym_t **symp) { size_t size; ++p->lineno; p->line = (uint8_t *)fgetln(p->fi, &size); if (p->line == NULL) return 0; dassert(size > 0); p->lptr = p->line; p->lend = p->line + size; p->tbase = p->line; p->tend = p->lend; if (RasLexAry[*p->line] & (S_WS | S_CM)) { *symp = NULL; } else { switch(RasToken(p)) { case TOK_ID: case TOK_SYMBOL: *symp = p->sym; break; default: RasError(p, "Left-justified token not label"); return 0; } } return 1; } /* * Obtain the next token. Sets (tbase, tbytes) as a side-effect. The line * terminates on a comment character (';' or '#'). Returns 0 on line EOF. */ rastoken_t RasToken(RASParser *p) { const uint8_t *ptr; const uint8_t *lend = p->lend; rastoken_t t = 0; int negate; p->flags &= ~RASPF_DIDPLMI; negate = 0; for (ptr = p->lptr; ptr < lend; ++ptr) { switch(RasLexAry[*ptr]) { case S_WS: continue; case S_SY: switch(*ptr) { case '@': t = TOK_SYMBOL; break; case '%': t = TOK_REG; break; default: dpanic("Unknown S_SY token '%c'", *ptr); } /* fall through */ case S_AL: /* * Label, symbol, or register */ p->tbase = ptr; ++ptr; while (ptr < lend && (RasLexAry[*ptr] & (S_AL|S_NU))) ++ptr; if (t == 0) t = TOK_ID; if (t == TOK_REG) { p->valuelo = 0; if (p->tbase + 1 >= lend) { p->tend = ptr; RasError(p, "Illegal Register Spec"); } else { p->tend = ptr; /* if error */ p->valuelo = rasGetRegister(p, p->tbase, ptr); } } else { p->sym = RasGetSymbol(p->tbase, ptr - p->tbase); } break; case S_OP: /* * Single character operator */ p->tbase = ptr; t = (rastoken_t)*ptr; ++ptr; /* * Handle + and - specially, combine with the value * that follows, handle negation, and flag that the * value was prefixed with the + or -. */ if (t != '+' && t != '-') break; if (t == '-') negate = 1; p->flags |= RASPF_DIDPLMI; /* fall through */ case S_NU: /* * Value (will lex FP values too). * * NOTE: Some leeway is given on the 'number' in terms * of alpha characters to handle 0x*, hex * exponents, size postfix chars, and so forth. */ p->tbase = ptr; while (ptr < lend && (RasLexAry[*ptr] & (S_AL|S_NU))) ++ptr; if (ptr < lend + 1 && *ptr == '.' && (RasLexAry[ptr[1]] & S_NU)) { ++ptr; while (ptr < lend && (RasLexAry[*ptr] & (S_AL|S_NU))) { ++ptr; } if (ptr[-1] == 'e' || ptr[-1] == 'E') --ptr; } if (ptr < lend && (*ptr == 'e' || *ptr == 'E')) { ++ptr; if (ptr < lend && (*ptr == '+' || *ptr == '-')) { ++ptr; } while (ptr < lend && (RasLexAry[*ptr] & (S_AL|S_NU))) { ++ptr; } } /* parser may re-call in EA parsing */ if (negate) p->flags |= RASPF_NEGATIVE; else p->flags &= ~RASPF_NEGATIVE; rasGetValue(p->tbase, ptr, REXT_I64, negate, &p->valuelo, &p->valuehi); t = TOK_VALUE; break; case S_CM: /* * Comment (leave token 0) */ p->tbase = ptr; ptr = lend; break; case S_DQ: /* * Quoted string */ p->tbase = ptr; ++ptr; while (ptr < lend && *ptr != '\"') ++ptr; if (ptr < lend) ++ptr; t = TOK_STRING; break; default: p->tbase = ptr; p->tend = ptr + 1; RasFatal(p, "Unknown lexical token (%c)", *ptr); } break; } p->tend = ptr; p->lptr = ptr; p->tok = t; return t; } void RasGetValue(RASParser *p, uint8_t opsize, int64_t *lowp, int64_t *highp) { rasGetValue(p->tbase, p->tend, opsize, (p->flags & RASPF_NEGATIVE), lowp, highp); } /* * Extract an immediate value, with instruction opsize hinting. If the * opsize is unspecified a runesize_t sized signed integer (i.e. an offset) * is assumed. */ static void rasGetValue(const uint8_t *pbase, const uint8_t *pend, int opsize, int negate, int64_t *immlo, int64_t *immhi) { int base = 10; int hasdot; const uint8_t *ptr; union { float flt; int32_t val; } size32; union { double flt; int64_t val; } size64; union { long double flt; int64_t val[2]; } size128; *immlo = 0; *immhi = 0; /* * Allow numbers to be interpreted as floating point. For bitstuffing * raw floating point use 0x */ hasdot = 0; for (ptr = pbase; ptr < pend; ++ptr) { if (*ptr == '.') hasdot = 1; } if (pbase + 1 < pend && pbase[0] == '0' && (pbase[1] == 'x' || pbase[1] == 'X')) { pbase += 2; base = 16; } if (base == 16 && pbase < pend && /* FP format */ (*pbase == 'K' || *pbase == 'M')) { /* * FP value defined in hex IEEE layout. */ ++pbase; } else if (base == 10 && hasdot) { /* * Otherwise if FP use strtod() */ switch(opsize) { case REXT_I32: size32.flt = strtof((const char *)pbase, NULL); if (negate) size32.flt = -size32.flt; *immlo = size32.val; break; case REXT_I64: size64.flt = strtod((const char *)pbase, NULL); if (negate) size64.flt = -size64.flt; *immlo = size64.val; break; case REXT_I128: size128.flt = strtold((const char *)pbase, NULL); if (negate) size128.flt = -size128.flt; #if _BYTE_ORDER == _LITTLE_ENDIAN *immlo = size128.val[0]; *immhi = size128.val[1]; #else *immlo = size128.val[1]; *immhi = size128.val[0]; #endif break; default: dpanic("Unknown REXT %d", opsize); } return; } /* * Otherwise interpret as an integer XXX 16-byte format */ while (pbase < pend) { int v; if (*pbase >= '0' && *pbase <= '9') v = *pbase - '0'; else if (*pbase >= 'a' && *pbase <= 'f') v = *pbase - 'a' + 10; else if (*pbase >= 'A' && *pbase <= 'F') v = *pbase - 'A' + 10; else v = 0; if (base == 16) *immhi = (*immhi << 4) | ((*immlo >> 60) & 15); *immlo = *immlo * base + v; ++pbase; } /* * Fixup *immhi when *immlo is negative for opsizes != 128 bits. * Fixup *immhi for negation by doing it manually (-n == ~n + 1) */ if (*immlo < 0 && opsize != REXT_I128) *immhi = -1; if (negate) { *immlo ^= -1; *immhi ^= -1; if (*immlo == -1) ++*immhi; ++*immlo; } } static uint32_t rasGetRegister(RASParser *p, const uint8_t *pbase, const uint8_t *pend) { uint32_t regno = 0; int64_t valuelo = 0; int64_t valuehi = 0; ++pbase; /* skip '%' */ if (pbase >= pend) { RasError(p, "Illegal Register Spec"); } else if (pend - pbase == 2) { if (pbase[0] == 's' && pbase[1] == 'g') { regno = REG_SG; } else if (pbase[0] == 'r' && pbase[1] == 'p') { regno = REG_RP; } else if (pbase[0] == 'd' && pbase[1] == 'b') { regno = REG_DB; } else if (pbase[0] == 't' && pbase[1] == 'p') { regno = REG_TP; } else if (pbase[0] == 'a' && pbase[1] == 'p') { regno = REG_AP; } else if (pbase[0] == 'f' && pbase[1] == 'p') { regno = REG_FP; } else if (pbase[0] == 'p' && pbase[1] == 'c') { regno = REG_PC; } else { RasError(p, "Illegal Register Spec"); } } else if (pbase[0] == 'p') { rasGetValue(pbase + 1, pend, REXT_I64, 0, &valuelo, &valuehi); regno = valuelo | REGF_PTR; } else if (pbase[0] == 'r') { rasGetValue(pbase + 1, pend, REXT_I64, 0, &valuelo, &valuehi); regno = valuelo; } else { RasError(p, "Illegal Register Spec"); } return regno; } void RasError(RASParser *p, const char *ctl, ...) { va_list va; const char *highlight_start = "\033[7m"; const char *highlight_stop = "\033[m"; int chars_before; int chars_in; int chars_after; p->error = 1; fprintf(stderr, "RAS: %s line %d: ", (p->ipath ? p->ipath : ""), p->lineno); va_start(va, ctl); vfprintf(stderr, ctl, va); va_end(va); fprintf(stderr, "\n"); chars_before = p->tbase - p->line; chars_in = p->tend - p->tbase; chars_after = p->lend - p->tend; fprintf(stderr, "%*.*s%s%*.*s%s%*.*s", chars_before, chars_before, p->line, highlight_start, chars_in, chars_in, p->tbase, highlight_stop, chars_after, chars_after, p->tend); fflush(stderr); } void RasFatal(RASParser *p, const char *ctl, ...) { va_list va; const char *highlight_start = "\033[7m"; const char *highlight_stop = "\033[m"; int chars_before; int chars_in; int chars_after; p->error = 1; fprintf(stderr, "RAS: %s line %d: ", (p->ipath ? p->ipath : ""), p->lineno); va_start(va, ctl); vfprintf(stderr, ctl, va); va_end(va); fprintf(stderr, "\n"); chars_before = p->tbase - p->line; chars_in = p->tend - p->tbase; chars_after = p->lend - p->tend; fprintf(stderr, "%*.*s%s%*.*s%s%*.*s", chars_before, chars_before, p->line, highlight_start, chars_in, chars_in, p->tbase, highlight_stop, chars_after, chars_after, p->tend); fflush(stderr); exit(1); }