1 /* This is the Assembler Pre-Processor
2 Copyright (C) 1987, 1990, 1991, 1992, 1994 Free Software Foundation, Inc.
4 This file is part of GAS, the GNU Assembler.
6 GAS is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
11 GAS is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GAS; see the file COPYING. If not, write to
18 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
20 /* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90 */
21 /* App, the assembler pre-processor. This pre-processor strips out excess
22 spaces, turns single-quoted characters into a decimal constant, and turns
23 # <number> <filename> <garbage> into a .line <number>\n.file <filename>
24 pair. This needs better error-handling. */
27 * $FreeBSD: src/gnu/usr.bin/as/app.c,v 1.7 1999/08/27 23:34:10 peter Exp $
28 * $DragonFly: src/gnu/usr.bin/as/Attic/app.c,v 1.2 2003/06/17 04:25:44 dillon Exp $
31 #include "as.h" /* For BAD_CASE() only */
35 #define const /* empty */
40 static const char symbol_chars[] =
41 "$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
43 #define LEX_IS_SYMBOL_COMPONENT 1
44 #define LEX_IS_WHITESPACE 2
45 #define LEX_IS_LINE_SEPARATOR 3
46 #define LEX_IS_COMMENT_START 4
47 #define LEX_IS_LINE_COMMENT_START 5
48 #define LEX_IS_TWOCHAR_COMMENT_1ST 6
49 #define LEX_IS_TWOCHAR_COMMENT_2ND 7
50 #define LEX_IS_STRINGQUOTE 8
51 #define LEX_IS_COLON 9
52 #define LEX_IS_NEWLINE 10
53 #define LEX_IS_ONECHAR_QUOTE 11
54 #define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT)
55 #define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE)
56 #define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR)
57 #define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START)
58 #define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START)
59 #define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE)
61 static int process_escape PARAMS ((int));
63 /* FIXME-soon: The entire lexer/parser thingy should be
64 built statically at compile time rather than dynamically
65 each and every time the assembler is run. xoxorich. */
72 lex[' '] = LEX_IS_WHITESPACE;
73 lex['\t'] = LEX_IS_WHITESPACE;
74 lex['\n'] = LEX_IS_NEWLINE;
75 lex[';'] = LEX_IS_LINE_SEPARATOR;
76 lex['"'] = LEX_IS_STRINGQUOTE;
78 lex['\''] = LEX_IS_ONECHAR_QUOTE;
80 lex[':'] = LEX_IS_COLON;
84 #ifdef SINGLE_QUOTE_STRINGS
85 lex['\''] = LEX_IS_STRINGQUOTE;
88 /* Note that these override the previous defaults, e.g. if ';' is a
89 comment char, then it isn't a line separator. */
90 for (p = symbol_chars; *p; ++p)
92 lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
93 } /* declare symbol characters */
95 for (p = comment_chars; *p; p++)
97 lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
98 } /* declare comment chars */
100 for (p = line_comment_chars; *p; p++)
102 lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
103 } /* declare line comment chars */
105 for (p = line_separator_chars; *p; p++)
107 lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
108 } /* declare line separators */
110 /* Only allow slash-star comments if slash is not in use */
113 lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
115 /* FIXME-soon. This is a bad hack but otherwise, we can't do
116 c-style comments when '/' is a line comment char. xoxorich. */
119 lex['*'] = LEX_IS_TWOCHAR_COMMENT_2ND;
121 } /* do_scrub_begin() */
128 return getc (scrub_file);
135 ungetc (ch, scrub_file);
136 } /* scrub_to_file() */
139 char *scrub_last_string;
144 return scrub_string == scrub_last_string ? EOF : *scrub_string++;
145 } /* scrub_from_string() */
151 *--scrub_string = ch;
152 } /* scrub_to_string() */
154 /* Saved state of the scrubber */
156 static int old_state;
157 static char *out_string;
158 static char out_buf[20];
159 static int add_newlines = 0;
161 /* Data structure for saving the state of app across #include's. Note that
162 app is called asynchronously to the parsing of the .include's, so our
163 state at the time .include is interpreted is completely unrelated.
164 That's why we have to save it all. */
171 char out_buf[sizeof (out_buf)];
174 char *scrub_last_string;
181 register struct app_save *saved;
183 saved = (struct app_save *) xmalloc (sizeof (*saved));
184 saved->state = state;
185 saved->old_state = old_state;
186 saved->out_string = out_string;
187 memcpy (saved->out_buf, out_buf, sizeof (out_buf));
188 saved->add_newlines = add_newlines;
189 saved->scrub_string = scrub_string;
190 saved->scrub_last_string = scrub_last_string;
191 saved->scrub_file = scrub_file;
193 /* do_scrub_begin() is not useful, just wastes time. */
194 return (char *) saved;
201 register struct app_save *saved = (struct app_save *) arg;
203 /* There is no do_scrub_end (). */
204 state = saved->state;
205 old_state = saved->old_state;
206 out_string = saved->out_string;
207 memcpy (out_buf, saved->out_buf, sizeof (out_buf));
208 add_newlines = saved->add_newlines;
209 scrub_string = saved->scrub_string;
210 scrub_last_string = saved->scrub_last_string;
211 scrub_file = saved->scrub_file;
216 /* @@ This assumes that \n &c are the same on host and target. This is not
243 do_scrub_next_char (get, unget)
247 /*State 0: beginning of normal line
248 1: After first whitespace on line (flush more white)
249 2: After first non-white (opcode) on line (keep 1white)
250 3: after second white on line (into operands) (flush white)
251 4: after putting out a .line, put out digits
252 5: parsing a string, then go to old-state
253 6: putting out \ escape in a "d string.
254 7: After putting out a .appfile, put out string.
255 8: After putting out a .appfile string, flush until newline.
256 9: After seeing symbol char in state 3 (keep 1white after symchar)
257 10: After seeing whitespace in state 9 (keep white before symchar)
258 11: After seeing a symbol character in state 0 (eg a label definition)
259 -1: output string in out_string and go to the state in old_state
260 -2: flush text until a '*' '/' is seen, then go to state old_state
263 /* I added states 9 and 10 because the MIPS ECOFF assembler uses
264 constructs like ``.loc 1 20''. This was turning into ``.loc
265 120''. States 9 and 10 ensure that a space is never dropped in
266 between characters which could appear in a identifier. Ian
267 Taylor, ian@cygnus.com.
269 I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works
270 correctly on the PA (and any other target where colons are optional).
271 Jeff Law, law@cs.utah.edu. */
273 register int ch, ch2 = 0;
274 int not_cpp_line = 0;
280 if (*out_string == 0)
294 while (ch != EOF && ch != '\n' && ch != '*');
295 if (ch == '\n' || ch == EOF)
298 /* At this point, ch must be a '*' */
299 while ((ch = (*get) ()) == '*')
303 if (ch == EOF || ch == '/')
312 if (ch == EOF || (ch >= '0' && ch <= '9'))
316 while (ch != EOF && IS_WHITESPACE (ch))
321 out_string = "\n\t.appfile ";
324 return *out_string++;
328 while (ch != EOF && ch != '\n')
337 if (lex[ch] == LEX_IS_STRINGQUOTE)
342 #ifndef NO_STRING_ESCAPES
351 as_warn ("End of file in string: inserted '\"'");
366 /* Handle strings broken across lines, by turning '\n' into
382 #endif /* BACKSLASH_V */
394 #if defined(IGNORE_NONSTANDARD_ESCAPES) | defined(ONLY_STANDARD_ESCAPES)
396 as_warn ("Unknown escape '\\%c' in string: Ignored", ch);
398 #else /* ONLY_STANDARD_ESCAPES */
400 /* Accept \x as x for any x */
402 #endif /* ONLY_STANDARD_ESCAPES */
405 as_warn ("End of file in string: '\"' inserted");
424 /* OK, we are somewhere in states 0 through 4 or 9 through 11 */
432 as_warn ("End of file not at end of a line: Newline inserted.");
438 case LEX_IS_WHITESPACE:
440 /* Preserve a single whitespace character at the beginning of
449 while (ch != EOF && IS_WHITESPACE (ch));
454 || (state == 0 && IS_LINE_COMMENT (ch))
456 || IS_LINE_SEPARATOR (ch))
458 /* cpp never outputs a leading space before the #, so try to
459 avoid being confused. */
464 (*unget) (ch); /* Put back */
465 return ' '; /* Always return one space at start of line */
468 /* If we're in state 2 or 11, we've seen a non-white character
469 followed by whitespace. If the next character is ':', this
470 is whitespace after a label name which we *must* ignore. */
471 if ((state == 2 || state == 11) && lex[ch] == LEX_IS_COLON)
481 goto recycle; /* Punted leading sp */
483 /* We can arrive here if we leave a leading whitespace character
484 at the beginning of a line. */
489 return ' '; /* Sp after opco */
491 goto recycle; /* Sp in operands */
494 state = 10; /* Sp after symbol char */
499 return ' '; /* Sp after label definition. */
505 case LEX_IS_TWOCHAR_COMMENT_1ST:
507 if (ch2 != EOF && lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND)
514 if (ch2 != EOF && IS_NEWLINE (ch2))
518 (lex[ch2] != LEX_IS_TWOCHAR_COMMENT_2ND));
521 (lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND))
527 || lex[ch2] == LEX_IS_TWOCHAR_COMMENT_1ST)
532 as_warn ("End of file in multiline comment");
541 if (state == 9 || state == 10)
547 case LEX_IS_STRINGQUOTE:
548 if (state == 9 || state == 10)
556 case LEX_IS_ONECHAR_QUOTE:
560 as_warn ("End-of-file after a one-character quote; \\000 inserted");
566 ch = process_escape (ch);
568 sprintf (out_buf, "%d", (int) (unsigned char) ch);
571 /* None of these 'x constants for us. We want 'x'. */
572 if ((ch = (*get) ()) != '\'')
574 #ifdef REQUIRE_CHAR_CLOSE_QUOTE
575 as_warn ("Missing close quote: (assumed)");
580 if (strlen (out_buf) == 1)
584 if (state == 9 || state == 10)
589 out_string = out_buf;
590 return *out_string++;
594 if (state == 9 || state == 10)
601 /* Roll out a bunch of newlines from inside comments, etc. */
607 /* fall thru into... */
609 case LEX_IS_LINE_SEPARATOR:
613 case LEX_IS_LINE_COMMENT_START:
614 if (state == 0) /* Only comment at start of line. */
616 /* FIXME-someday: The two character comment stuff was badly
617 thought out. On i386, we want '/' as line comment start
618 AND we want C style comments. hence this hack. The
619 whole lexical process should be reworked. xoxorich. */
626 return (do_scrub_next_char (get, unget));
639 while (ch != EOF && IS_WHITESPACE (ch));
642 as_warn ("EOF in comment: Newline inserted");
645 if (ch < '0' || ch > '9' || not_cpp_line)
647 /* Non-numerics: Eat whole comment line */
648 while (ch != EOF && !IS_NEWLINE (ch))
651 as_warn ("EOF in Comment: Newline inserted");
655 /* Numerics begin comment. Perhaps CPP `# 123 "filename"' */
659 out_string = "\t.appline ";
660 return *out_string++;
663 /* We have a line comment character which is not at the start of
664 a line. If this is also a normal comment character, fall
665 through. Otherwise treat it as a default character. */
666 if (strchr (comment_chars, ch) == NULL)
669 case LEX_IS_COMMENT_START:
672 while (ch != EOF && !IS_NEWLINE (ch));
674 as_warn ("EOF in comment: Newline inserted");
678 case LEX_IS_SYMBOL_COMPONENT:
681 /* This is a symbol character following another symbol
682 character, with whitespace in between. We skipped the
683 whitespace earlier, so output it now. */
693 /* Some relatively `normal' character. */
696 state = 11; /* Now seeing label definition */
701 state = 2; /* Ditto */
706 if (lex[ch] != LEX_IS_SYMBOL_COMPONENT)
710 else if (state == 10)
717 return ch; /* Opcode or operands already */
725 const char comment_chars[] = "|";
726 const char line_comment_chars[] = "#";
733 while ((ch = do_scrub_next_char (stdin)) != EOF)