2 * main.c -- Expression tree constructors and main program for gawk.
6 * Copyright (C) 1986, 1988, 1989, 1991-2000 the Free Software Foundation, Inc.
8 * This file is part of GAWK, the GNU implementation of the
9 * AWK Programming Language.
11 * GAWK is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
16 * GAWK is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
25 * $FreeBSD: src/contrib/awk/main.c,v 1.4.2.1 2001/01/23 22:08:31 asmodai Exp $
26 * $DragonFly: src/contrib/awk/Attic/main.c,v 1.2 2003/06/17 04:23:58 dillon Exp $
31 #include "patchlevel.h"
33 static void usage P((int exitval, FILE *fp));
34 static void copyleft P((void));
35 static void cmdline_fs P((char *str));
36 static void init_args P((int argc0, int argc, char *argv0, char **argv));
37 static void init_vars P((void));
38 static void pre_assign P((char *v));
39 RETSIGTYPE catchsig P((int sig, int code));
40 static void nostalgia P((void));
41 static void version P((void));
43 /* These nodes store all the special variables AWK uses */
44 NODE *ARGC_node, *ARGIND_node, *ARGV_node, *CONVFMT_node, *ENVIRON_node;
45 NODE *ERRNO_node, *FIELDWIDTHS_node, *FILENAME_node, *FNR_node, *FS_node;
46 NODE *IGNORECASE_node, *NF_node, *NR_node, *OFMT_node, *OFS_node;
47 NODE *ORS_node, *RLENGTH_node, *RSTART_node, *RS_node, *RT_node, *SUBSEP_node;
58 * CONVFMT is a convenience pointer for the current number to string format.
59 * We must supply an initial value to avoid recursion problems of
60 * set_CONVFMT -> fmt_index -> r_force_string: gets NULL CONVFMT
63 char *CONVFMT = "%.6g";
65 int errcount = 0; /* error counter, used by yyerror() */
67 NODE *Nnull_string; /* The global null string */
69 /* The name the program was invoked under, for error messages */
72 /* A block of AWK code to be run before running the program */
73 NODE *begin_block = NULL;
75 /* A block of AWK code to be run after the last input file */
76 NODE *end_block = NULL;
78 int exiting = FALSE; /* Was an "exit" statement executed? */
79 int exit_val = 0; /* optional exit value */
81 #if defined(YYDEBUG) || defined(DEBUG)
85 struct src *srcfiles = NULL; /* source file name(s) */
86 long numfiles = -1; /* how many source files */
88 int do_traditional = FALSE; /* no gnu extensions, add traditional weirdnesses */
89 int do_posix = FALSE; /* turn off gnu and unix extensions */
90 int do_lint = FALSE; /* provide warnings about questionable stuff */
91 int do_lint_old = FALSE; /* warn about stuff not in V7 awk */
92 int do_nostalgia = FALSE; /* provide a blast from the past */
93 int do_intervals = FALSE; /* allow {...,...} in regexps */
95 int in_begin_rule = FALSE; /* we're in a BEGIN rule */
96 int in_end_rule = FALSE; /* we're in a END rule */
98 int output_is_tty = FALSE; /* control flushing of output */
100 extern char *version_string; /* current version, for printing */
102 /* The parse tree is stored here. */
103 NODE *expression_value;
105 static struct option optab[] = {
106 { "compat", no_argument, & do_traditional, 1 },
107 { "traditional", no_argument, & do_traditional, 1 },
108 { "lint", no_argument, & do_lint, 1 },
109 { "lint-old", no_argument, & do_lint_old, 1 },
110 { "posix", no_argument, & do_posix, 1 },
111 { "nostalgia", no_argument, & do_nostalgia, 1 },
112 { "copyleft", no_argument, NULL, 'C' },
113 { "copyright", no_argument, NULL, 'C' },
114 { "field-separator", required_argument, NULL, 'F' },
115 { "file", required_argument, NULL, 'f' },
116 { "re-interval", no_argument, & do_intervals, 1 },
117 { "source", required_argument, NULL, 's' },
118 { "assign", required_argument, NULL, 'v' },
119 { "version", no_argument, NULL, 'V' },
120 { "usage", no_argument, NULL, 'u' },
121 { "help", no_argument, NULL, 'u' },
123 { "parsedebug", no_argument, NULL, 'D' },
125 { NULL, 0, NULL, '\0' }
128 /* main --- process args, parse program, run it, clean up */
137 /* the + on the front tells GNU getopt not to rearrange argv */
138 const char *optlist = "+F:f:v:W;m:";
139 int stopped_early = FALSE;
145 setlocale(LC_CTYPE, "");
146 setlocale(LC_COLLATE, "");
148 (void) signal(SIGFPE, (RETSIGTYPE (*) P((int))) catchsig);
149 (void) signal(SIGSEGV, (RETSIGTYPE (*) P((int))) catchsig);
151 (void) signal(SIGBUS, (RETSIGTYPE (*) P((int))) catchsig);
154 myname = gawk_name(argv[0]);
155 argv[0] = (char *) myname;
156 os_arg_fixup(&argc, &argv); /* emulate redirection, expand wildcards */
158 /* remove sccs gunk */
159 if (strncmp(version_string, "@(#)", 4) == 0)
165 /* initialize the null string */
166 Nnull_string = make_string("", 0);
167 Nnull_string->numbr = 0.0;
168 Nnull_string->type = Node_val;
169 Nnull_string->flags = (PERM|STR|STRING|NUM|NUMBER);
172 * Tell the regex routines how they should work.
173 * Do this before initializing variables, since
174 * they could want to do a regexp compile.
178 /* Set up the special variables */
180 * Note that this must be done BEFORE arg parsing else -F
185 /* Set up the field variables */
187 * Do this before arg parsing so that `-v NF=blah' won't
193 emalloc(srcfiles, struct src *, argc * sizeof(struct src), "main");
194 memset(srcfiles, '\0', argc * sizeof(struct src));
196 /* we do error messages ourselves on invalid options */
199 /* option processing. ready, set, go! */
200 for (optopt = 0, old_optind = 1;
201 (c = getopt_long(argc, argv, optlist, optab, NULL)) != EOF;
202 optopt = 0, old_optind = optind) {
213 * a la MKS awk, allow multiple -f options.
214 * this makes function libraries real easy.
215 * most of the magic is in the scanner.
217 * The following is to allow for whitespace at the end
218 * of a #! /bin/gawk line in an executable file
221 while (ISSPACE(*scan))
225 srcfiles[numfiles].stype = SOURCEFILE;
227 srcfiles[numfiles].val = argv[optind++];
229 srcfiles[numfiles].val = optarg;
238 * Research awk extension.
239 * -mf nnn set # fields, gawk ignores
240 * -mr nnn set record length, ditto
243 warning("-m[fr] option irrelevant in gawk");
244 if (optarg[0] != 'r' && optarg[0] != 'f')
245 warning("-m option usage: `-m[fr] nnn'");
246 if (optarg[1] == '\0')
250 case 'W': /* gawk specific options - now in getopt_long */
251 fprintf(stderr, "%s: option `-W %s' unrecognized, ignored\n",
255 /* These can only come from long form options */
261 if (optarg[0] == '\0')
262 warning("empty argument to --source ignored");
264 srcfiles[++numfiles].stype = CMDLINE;
265 srcfiles[numfiles].val = optarg;
270 usage(0, stdout); /* per coding stds */
285 * getopt_long found an option that sets a variable
286 * instead of returning a letter. Do nothing, just
287 * cycle around for the next one.
294 * New behavior. If not posix, an unrecognized
295 * option stops argument processing so that it can
296 * go into ARGV for the awk program to see. This
297 * makes use of ``#! /bin/gawk -f'' easier.
299 * However, it's never simple. If optopt is set,
300 * an option that requires an argument didn't get the
301 * argument. We care because if opterr is 0, then
302 * getopt_long won't print the error message for us.
305 && (optopt == '\0' || strchr(optlist, optopt) == NULL)) {
307 * can't just do optind--. In case of an
308 * option with >= 2 letters, getopt_long
309 * won't have incremented optind.
312 stopped_early = TRUE;
314 } else if (optopt != '\0')
315 /* Use 1003.2 required message format */
317 "%s: option requires an argument -- %c\n",
320 let getopt print error message for us */
329 /* check for POSIXLY_CORRECT environment variable */
330 if (! do_posix && getenv("POSIXLY_CORRECT") != NULL) {
334 "environment variable `POSIXLY_CORRECT' set: turning on --posix");
338 if (do_traditional) /* both on command line */
339 warning("--posix overrides --traditional");
341 do_traditional = TRUE;
343 * POSIX compliance also implies
344 * no GNU extensions either.
349 * Tell the regex routines how they should work.
350 * Do this again, after argument processing, since do_posix
351 * and do_traditional are now paid attention to by resetup().
353 if (do_traditional || do_posix || do_intervals) {
356 /* now handle RS and FS. have to be careful with FS */
358 if (using_fieldwidths()) {
366 setbuf(stdout, (char *) NULL); /* make debugging easier */
368 if (isatty(fileno(stdout)))
369 output_is_tty = TRUE;
370 /* No -f or --source options, use next arg */
371 if (numfiles == -1) {
372 if (optind > argc - 1 || stopped_early) /* no args left or no program */
374 srcfiles[++numfiles].stype = CMDLINE;
375 srcfiles[numfiles].val = argv[optind];
379 init_args(optind, argc, (char *) myname, argv);
382 /* Read in the program */
383 if (yyparse() != 0 || errcount != 0)
385 /* recover any space from C based alloca */
390 if (do_lint && begin_block == NULL && expression_value == NULL
391 && end_block == NULL)
392 warning("no program");
394 if (begin_block != NULL) {
395 in_begin_rule = TRUE;
396 (void) interpret(begin_block);
398 in_begin_rule = FALSE;
399 if (! exiting && (expression_value != NULL || end_block != NULL))
401 if (end_block != NULL) {
403 (void) interpret(end_block);
406 if (close_io() != 0 && exit_val == 0)
408 exit(exit_val); /* more portable */
409 return exit_val; /* to suppress warnings */
412 /* usage --- print usage information and exit */
419 char *opt1 = " -f progfile [--]";
420 char *regops = " [POSIX or GNU style options]";
422 fprintf(fp, "Usage: %s%s%s file ...\n\t%s%s [--] %cprogram%c file ...\n",
423 myname, regops, opt1, myname, regops, quote, quote);
425 /* GNU long options info. Gack. */
426 fputs("POSIX options:\t\tGNU long options:\n", fp);
427 fputs("\t-f progfile\t\t--file=progfile\n", fp);
428 fputs("\t-F fs\t\t\t--field-separator=fs\n", fp);
429 fputs("\t-v var=val\t\t--assign=var=val\n", fp);
430 fputs("\t-m[fr] val\n", fp);
431 fputs("\t-W compat\t\t--compat\n", fp);
432 fputs("\t-W copyleft\t\t--copyleft\n", fp);
433 fputs("\t-W copyright\t\t--copyright\n", fp);
434 fputs("\t-W help\t\t\t--help\n", fp);
435 fputs("\t-W lint\t\t\t--lint\n", fp);
436 fputs("\t-W lint-old\t\t--lint-old\n", fp);
438 fputs("\t-W nostalgia\t\t--nostalgia\n", fp);
441 fputs("\t-W parsedebug\t\t--parsedebug\n", fp);
443 fputs("\t-W posix\t\t--posix\n", fp);
444 fputs("\t-W re-interval\t\t--re-interval\n", fp);
445 fputs("\t-W source=program-text\t--source=program-text\n", fp);
446 fputs("\t-W traditional\t\t--traditional\n", fp);
447 fputs("\t-W usage\t\t--usage\n", fp);
448 fputs("\t-W version\t\t--version\n", fp);
449 fputs("\nTo report bugs, see node `Bugs' in `gawk.info', which\n", fp);
450 fputs("is section `Reporting Problems and Bugs' in the\n", fp);
451 fputs("printed version.\n", fp);
455 /* copyleft --- print out the short GNU copyright information */
460 static char blurb_part1[] =
461 "Copyright (C) 1989, 1991-2000 Free Software Foundation.\n\
463 This program is free software; you can redistribute it and/or modify\n\
464 it under the terms of the GNU General Public License as published by\n\
465 the Free Software Foundation; either version 2 of the License, or\n\
466 (at your option) any later version.\n\
468 static char blurb_part2[] =
469 "This program is distributed in the hope that it will be useful,\n\
470 but WITHOUT ANY WARRANTY; without even the implied warranty of\n\
471 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n\
472 GNU General Public License for more details.\n\
474 static char blurb_part3[] =
475 "You should have received a copy of the GNU General Public License\n\
476 along with this program; if not, write to the Free Software\n\
477 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.\n";
479 /* multiple blurbs are needed for some brain dead compilers. */
480 fputs(blurb_part1, stdout);
481 fputs(blurb_part2, stdout);
482 fputs(blurb_part3, stdout);
487 /* cmdline_fs --- set FS from the command line */
495 tmp = get_lhs(FS_node, (Func_ptr *) 0);
498 * Only if in full compatibility mode check for the stupid special
499 * case so -F\t works as documented in awk book even though the shell
500 * hands us -Ft. Bleah!
502 * Thankfully, Posix didn't propogate this "feature".
504 if (str[0] == 't' && str[1] == '\0') {
506 warning("-Ft does not set FS to tab in POSIX awk");
507 if (do_traditional && ! do_posix)
510 *tmp = make_str_node(str, strlen(str), SCAN); /* do process escapes */
514 /* init_args --- set up ARGV from stuff on the command line */
517 init_args(argc0, argc, argv0, argv)
525 ARGV_node = install("ARGV", node(Nnull_string, Node_var_array, (NODE *) NULL));
526 aptr = assoc_lookup(ARGV_node, tmp_number(0.0));
527 *aptr = make_string(argv0, strlen(argv0));
528 (*aptr)->flags |= MAYBE_NUM;
529 for (i = argc0, j = 1; i < argc; i++) {
530 aptr = assoc_lookup(ARGV_node, tmp_number((AWKNUM) j));
531 *aptr = make_string(argv[i], strlen(argv[i]));
532 (*aptr)->flags |= MAYBE_NUM;
535 ARGC_node = install("ARGC",
536 node(make_number((AWKNUM) j), Node_var, (NODE *) NULL));
540 * Set all the special variables to their initial values.
541 * Note that some of the variables that have set_FOO routines should
542 * *N*O*T* have those routines called upon initialization, and thus
543 * they have NULL entries in that field. This is notably true of FS
554 static struct varinit varinit[] = {
555 {&CONVFMT_node, "CONVFMT", Node_CONVFMT, "%.6g", 0, set_CONVFMT },
556 {&NF_node, "NF", Node_NF, NULL, -1, set_NF },
557 {&FIELDWIDTHS_node, "FIELDWIDTHS", Node_FIELDWIDTHS, "", 0, NULL },
558 {&NR_node, "NR", Node_NR, NULL, 0, set_NR },
559 {&FNR_node, "FNR", Node_FNR, NULL, 0, set_FNR },
560 {&FS_node, "FS", Node_FS, " ", 0, NULL },
561 {&RS_node, "RS", Node_RS, "\n", 0, set_RS },
562 {&IGNORECASE_node, "IGNORECASE", Node_IGNORECASE, NULL, 0, NULL },
563 {&FILENAME_node, "FILENAME", Node_var, "", 0, NULL },
564 {&OFS_node, "OFS", Node_OFS, " ", 0, set_OFS },
565 {&ORS_node, "ORS", Node_ORS, "\n", 0, set_ORS },
566 {&OFMT_node, "OFMT", Node_OFMT, "%.6g", 0, set_OFMT },
567 {&RLENGTH_node, "RLENGTH", Node_var, NULL, 0, NULL },
568 {&RSTART_node, "RSTART", Node_var, NULL, 0, NULL },
569 {&SUBSEP_node, "SUBSEP", Node_var, "\034", 0, NULL },
570 {&ARGIND_node, "ARGIND", Node_var, NULL, 0, NULL },
571 {&ERRNO_node, "ERRNO", Node_var, NULL, 0, NULL },
572 {&RT_node, "RT", Node_var, "", 0, NULL },
573 {0, NULL, Node_illegal, NULL, 0, NULL },
576 /* init_vars --- actually initialize everything in the symbol table */
581 register struct varinit *vp;
583 for (vp = varinit; vp->name; vp++) {
584 *(vp->spec) = install((char *) vp->name,
585 node(vp->strval == NULL ? make_number(vp->numval)
586 : make_string((char *) vp->strval,
588 vp->type, (NODE *) NULL));
589 (*(vp->spec))->flags |= SCALAR;
595 /* load_environ --- populate the ENVIRON array */
600 #if ! (defined(MSDOS) && !defined(DJGPP)) && ! defined(OS2) && ! (defined(VMS) && defined(__DECC))
601 extern char **environ;
603 register char *var, *val, *cp;
607 ENVIRON_node = install("ENVIRON",
608 node(Nnull_string, Node_var, (NODE *) NULL));
609 for (i = 0; environ[i] != NULL; i++) {
610 static char nullstr[] = "";
613 val = strchr(var, '=');
618 aptr = assoc_lookup(ENVIRON_node, tmp_string(var, strlen(var)));
619 *aptr = make_string(val, strlen(val));
620 (*aptr)->flags |= (MAYBE_NUM|SCALAR);
622 /* restore '=' so that system() gets a valid environment */
627 * Put AWKPATH into ENVIRON if it's not there.
628 * This allows querying it from outside gawk.
630 if ((cp = getenv("AWKPATH")) == NULL) {
631 aptr = assoc_lookup(ENVIRON_node, tmp_string("AWKPATH", 7));
632 *aptr = make_string(defpath, strlen(defpath));
633 (*aptr)->flags |= SCALAR;
637 /* arg_assign --- process a command-line assignment */
645 Func_ptr after_assign = NULL;
650 cp = strchr(arg, '=');
653 /* first check that the variable name has valid syntax */
655 if (! isalpha(arg[0]) && arg[0] != '_')
658 for (cp2 = arg+1; *cp2; cp2++)
659 if (! isalnum(*cp2) && *cp2 != '_') {
666 warning("illegal name `%s' in variable assignment", arg);
667 *--cp = '='; /* restore original text of ARGV */
672 * Recent versions of nawk expand escapes inside assignments.
673 * This makes sense, so we do it too.
675 it = make_str_node(cp, strlen(cp), SCAN);
676 it->flags |= (MAYBE_NUM|SCALAR);
677 var = variable(arg, FALSE, Node_var);
678 lhs = get_lhs(var, &after_assign);
681 if (after_assign != NULL)
683 *--cp = '='; /* restore original text of ARGV */
688 /* pre_assign --- handle -v, print a message and die if a problem */
694 if (arg_assign(v) == NULL) {
696 "%s: `%s' argument to `-v' not in `var=value' form\n",
702 /* catchsig --- catch signals */
709 code = 0; sig = code; code = sig;
712 fatal("floating point exception");
713 } else if (sig == SIGSEGV
718 set_loc(__FILE__, __LINE__);
719 msg("fatal error: internal error");
720 /* fatal won't abort() if not compiled for debugging */
727 /* nostalgia --- print the famous error message and die */
732 fprintf(stderr, "awk: bailing out near line 1\n");
736 /* version --- print version message */
741 printf("%s.%d\n", version_string, PATCHLEVEL);
743 * Per GNU coding standards, print copyright info,
744 * then exit successfully, do nothing else.