2 * main.c -- Expression tree constructors and main program for gawk.
6 * Copyright (C) 1986, 1988, 1989, 1991-2000 the Free Software Foundation, Inc.
8 * This file is part of GAWK, the GNU implementation of the
9 * AWK Programming Language.
11 * GAWK is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
16 * GAWK is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
25 * $FreeBSD: src/contrib/awk/main.c,v 1.4.2.1 2001/01/23 22:08:31 asmodai Exp $
30 #include "patchlevel.h"
32 static void usage P((int exitval, FILE *fp));
33 static void copyleft P((void));
34 static void cmdline_fs P((char *str));
35 static void init_args P((int argc0, int argc, char *argv0, char **argv));
36 static void init_vars P((void));
37 static void pre_assign P((char *v));
38 RETSIGTYPE catchsig P((int sig, int code));
39 static void nostalgia P((void));
40 static void version P((void));
42 /* These nodes store all the special variables AWK uses */
43 NODE *ARGC_node, *ARGIND_node, *ARGV_node, *CONVFMT_node, *ENVIRON_node;
44 NODE *ERRNO_node, *FIELDWIDTHS_node, *FILENAME_node, *FNR_node, *FS_node;
45 NODE *IGNORECASE_node, *NF_node, *NR_node, *OFMT_node, *OFS_node;
46 NODE *ORS_node, *RLENGTH_node, *RSTART_node, *RS_node, *RT_node, *SUBSEP_node;
57 * CONVFMT is a convenience pointer for the current number to string format.
58 * We must supply an initial value to avoid recursion problems of
59 * set_CONVFMT -> fmt_index -> r_force_string: gets NULL CONVFMT
62 char *CONVFMT = "%.6g";
64 int errcount = 0; /* error counter, used by yyerror() */
66 NODE *Nnull_string; /* The global null string */
68 /* The name the program was invoked under, for error messages */
71 /* A block of AWK code to be run before running the program */
72 NODE *begin_block = NULL;
74 /* A block of AWK code to be run after the last input file */
75 NODE *end_block = NULL;
77 int exiting = FALSE; /* Was an "exit" statement executed? */
78 int exit_val = 0; /* optional exit value */
80 #if defined(YYDEBUG) || defined(DEBUG)
84 struct src *srcfiles = NULL; /* source file name(s) */
85 long numfiles = -1; /* how many source files */
87 int do_traditional = FALSE; /* no gnu extensions, add traditional weirdnesses */
88 int do_posix = FALSE; /* turn off gnu and unix extensions */
89 int do_lint = FALSE; /* provide warnings about questionable stuff */
90 int do_lint_old = FALSE; /* warn about stuff not in V7 awk */
91 int do_nostalgia = FALSE; /* provide a blast from the past */
92 int do_intervals = FALSE; /* allow {...,...} in regexps */
94 int in_begin_rule = FALSE; /* we're in a BEGIN rule */
95 int in_end_rule = FALSE; /* we're in a END rule */
97 int output_is_tty = FALSE; /* control flushing of output */
99 extern char *version_string; /* current version, for printing */
101 /* The parse tree is stored here. */
102 NODE *expression_value;
104 static struct option optab[] = {
105 { "compat", no_argument, & do_traditional, 1 },
106 { "traditional", no_argument, & do_traditional, 1 },
107 { "lint", no_argument, & do_lint, 1 },
108 { "lint-old", no_argument, & do_lint_old, 1 },
109 { "posix", no_argument, & do_posix, 1 },
110 { "nostalgia", no_argument, & do_nostalgia, 1 },
111 { "copyleft", no_argument, NULL, 'C' },
112 { "copyright", no_argument, NULL, 'C' },
113 { "field-separator", required_argument, NULL, 'F' },
114 { "file", required_argument, NULL, 'f' },
115 { "re-interval", no_argument, & do_intervals, 1 },
116 { "source", required_argument, NULL, 's' },
117 { "assign", required_argument, NULL, 'v' },
118 { "version", no_argument, NULL, 'V' },
119 { "usage", no_argument, NULL, 'u' },
120 { "help", no_argument, NULL, 'u' },
122 { "parsedebug", no_argument, NULL, 'D' },
124 { NULL, 0, NULL, '\0' }
127 /* main --- process args, parse program, run it, clean up */
136 /* the + on the front tells GNU getopt not to rearrange argv */
137 const char *optlist = "+F:f:v:W;m:";
138 int stopped_early = FALSE;
144 setlocale(LC_CTYPE, "");
145 setlocale(LC_COLLATE, "");
147 (void) signal(SIGFPE, (RETSIGTYPE (*) P((int))) catchsig);
148 (void) signal(SIGSEGV, (RETSIGTYPE (*) P((int))) catchsig);
150 (void) signal(SIGBUS, (RETSIGTYPE (*) P((int))) catchsig);
153 myname = gawk_name(argv[0]);
154 argv[0] = (char *) myname;
155 os_arg_fixup(&argc, &argv); /* emulate redirection, expand wildcards */
157 /* remove sccs gunk */
158 if (strncmp(version_string, "@(#)", 4) == 0)
164 /* initialize the null string */
165 Nnull_string = make_string("", 0);
166 Nnull_string->numbr = 0.0;
167 Nnull_string->type = Node_val;
168 Nnull_string->flags = (PERM|STR|STRING|NUM|NUMBER);
171 * Tell the regex routines how they should work.
172 * Do this before initializing variables, since
173 * they could want to do a regexp compile.
177 /* Set up the special variables */
179 * Note that this must be done BEFORE arg parsing else -F
184 /* Set up the field variables */
186 * Do this before arg parsing so that `-v NF=blah' won't
192 emalloc(srcfiles, struct src *, argc * sizeof(struct src), "main");
193 memset(srcfiles, '\0', argc * sizeof(struct src));
195 /* we do error messages ourselves on invalid options */
198 /* option processing. ready, set, go! */
199 for (optopt = 0, old_optind = 1;
200 (c = getopt_long(argc, argv, optlist, optab, NULL)) != EOF;
201 optopt = 0, old_optind = optind) {
212 * a la MKS awk, allow multiple -f options.
213 * this makes function libraries real easy.
214 * most of the magic is in the scanner.
216 * The following is to allow for whitespace at the end
217 * of a #! /bin/gawk line in an executable file
220 while (ISSPACE(*scan))
224 srcfiles[numfiles].stype = SOURCEFILE;
226 srcfiles[numfiles].val = argv[optind++];
228 srcfiles[numfiles].val = optarg;
237 * Research awk extension.
238 * -mf nnn set # fields, gawk ignores
239 * -mr nnn set record length, ditto
242 warning("-m[fr] option irrelevant in gawk");
243 if (optarg[0] != 'r' && optarg[0] != 'f')
244 warning("-m option usage: `-m[fr] nnn'");
245 if (optarg[1] == '\0')
249 case 'W': /* gawk specific options - now in getopt_long */
250 fprintf(stderr, "%s: option `-W %s' unrecognized, ignored\n",
254 /* These can only come from long form options */
260 if (optarg[0] == '\0')
261 warning("empty argument to --source ignored");
263 srcfiles[++numfiles].stype = CMDLINE;
264 srcfiles[numfiles].val = optarg;
269 usage(0, stdout); /* per coding stds */
284 * getopt_long found an option that sets a variable
285 * instead of returning a letter. Do nothing, just
286 * cycle around for the next one.
293 * New behavior. If not posix, an unrecognized
294 * option stops argument processing so that it can
295 * go into ARGV for the awk program to see. This
296 * makes use of ``#! /bin/gawk -f'' easier.
298 * However, it's never simple. If optopt is set,
299 * an option that requires an argument didn't get the
300 * argument. We care because if opterr is 0, then
301 * getopt_long won't print the error message for us.
304 && (optopt == '\0' || strchr(optlist, optopt) == NULL)) {
306 * can't just do optind--. In case of an
307 * option with >= 2 letters, getopt_long
308 * won't have incremented optind.
311 stopped_early = TRUE;
313 } else if (optopt != '\0')
314 /* Use 1003.2 required message format */
316 "%s: option requires an argument -- %c\n",
319 let getopt print error message for us */
328 /* check for POSIXLY_CORRECT environment variable */
329 if (! do_posix && getenv("POSIXLY_CORRECT") != NULL) {
333 "environment variable `POSIXLY_CORRECT' set: turning on --posix");
337 if (do_traditional) /* both on command line */
338 warning("--posix overrides --traditional");
340 do_traditional = TRUE;
342 * POSIX compliance also implies
343 * no GNU extensions either.
348 * Tell the regex routines how they should work.
349 * Do this again, after argument processing, since do_posix
350 * and do_traditional are now paid attention to by resetup().
352 if (do_traditional || do_posix || do_intervals) {
355 /* now handle RS and FS. have to be careful with FS */
357 if (using_fieldwidths()) {
365 setbuf(stdout, (char *) NULL); /* make debugging easier */
367 if (isatty(fileno(stdout)))
368 output_is_tty = TRUE;
369 /* No -f or --source options, use next arg */
370 if (numfiles == -1) {
371 if (optind > argc - 1 || stopped_early) /* no args left or no program */
373 srcfiles[++numfiles].stype = CMDLINE;
374 srcfiles[numfiles].val = argv[optind];
378 init_args(optind, argc, (char *) myname, argv);
381 /* Read in the program */
382 if (yyparse() != 0 || errcount != 0)
384 /* recover any space from C based alloca */
389 if (do_lint && begin_block == NULL && expression_value == NULL
390 && end_block == NULL)
391 warning("no program");
393 if (begin_block != NULL) {
394 in_begin_rule = TRUE;
395 (void) interpret(begin_block);
397 in_begin_rule = FALSE;
398 if (! exiting && (expression_value != NULL || end_block != NULL))
400 if (end_block != NULL) {
402 (void) interpret(end_block);
405 if (close_io() != 0 && exit_val == 0)
407 exit(exit_val); /* more portable */
408 return exit_val; /* to suppress warnings */
411 /* usage --- print usage information and exit */
418 char *opt1 = " -f progfile [--]";
419 char *regops = " [POSIX or GNU style options]";
421 fprintf(fp, "Usage: %s%s%s file ...\n\t%s%s [--] %cprogram%c file ...\n",
422 myname, regops, opt1, myname, regops, quote, quote);
424 /* GNU long options info. Gack. */
425 fputs("POSIX options:\t\tGNU long options:\n", fp);
426 fputs("\t-f progfile\t\t--file=progfile\n", fp);
427 fputs("\t-F fs\t\t\t--field-separator=fs\n", fp);
428 fputs("\t-v var=val\t\t--assign=var=val\n", fp);
429 fputs("\t-m[fr] val\n", fp);
430 fputs("\t-W compat\t\t--compat\n", fp);
431 fputs("\t-W copyleft\t\t--copyleft\n", fp);
432 fputs("\t-W copyright\t\t--copyright\n", fp);
433 fputs("\t-W help\t\t\t--help\n", fp);
434 fputs("\t-W lint\t\t\t--lint\n", fp);
435 fputs("\t-W lint-old\t\t--lint-old\n", fp);
437 fputs("\t-W nostalgia\t\t--nostalgia\n", fp);
440 fputs("\t-W parsedebug\t\t--parsedebug\n", fp);
442 fputs("\t-W posix\t\t--posix\n", fp);
443 fputs("\t-W re-interval\t\t--re-interval\n", fp);
444 fputs("\t-W source=program-text\t--source=program-text\n", fp);
445 fputs("\t-W traditional\t\t--traditional\n", fp);
446 fputs("\t-W usage\t\t--usage\n", fp);
447 fputs("\t-W version\t\t--version\n", fp);
448 fputs("\nTo report bugs, see node `Bugs' in `gawk.info', which\n", fp);
449 fputs("is section `Reporting Problems and Bugs' in the\n", fp);
450 fputs("printed version.\n", fp);
454 /* copyleft --- print out the short GNU copyright information */
459 static char blurb_part1[] =
460 "Copyright (C) 1989, 1991-2000 Free Software Foundation.\n\
462 This program is free software; you can redistribute it and/or modify\n\
463 it under the terms of the GNU General Public License as published by\n\
464 the Free Software Foundation; either version 2 of the License, or\n\
465 (at your option) any later version.\n\
467 static char blurb_part2[] =
468 "This program is distributed in the hope that it will be useful,\n\
469 but WITHOUT ANY WARRANTY; without even the implied warranty of\n\
470 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n\
471 GNU General Public License for more details.\n\
473 static char blurb_part3[] =
474 "You should have received a copy of the GNU General Public License\n\
475 along with this program; if not, write to the Free Software\n\
476 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.\n";
478 /* multiple blurbs are needed for some brain dead compilers. */
479 fputs(blurb_part1, stdout);
480 fputs(blurb_part2, stdout);
481 fputs(blurb_part3, stdout);
486 /* cmdline_fs --- set FS from the command line */
494 tmp = get_lhs(FS_node, (Func_ptr *) 0);
497 * Only if in full compatibility mode check for the stupid special
498 * case so -F\t works as documented in awk book even though the shell
499 * hands us -Ft. Bleah!
501 * Thankfully, Posix didn't propogate this "feature".
503 if (str[0] == 't' && str[1] == '\0') {
505 warning("-Ft does not set FS to tab in POSIX awk");
506 if (do_traditional && ! do_posix)
509 *tmp = make_str_node(str, strlen(str), SCAN); /* do process escapes */
513 /* init_args --- set up ARGV from stuff on the command line */
516 init_args(argc0, argc, argv0, argv)
524 ARGV_node = install("ARGV", node(Nnull_string, Node_var_array, (NODE *) NULL));
525 aptr = assoc_lookup(ARGV_node, tmp_number(0.0));
526 *aptr = make_string(argv0, strlen(argv0));
527 (*aptr)->flags |= MAYBE_NUM;
528 for (i = argc0, j = 1; i < argc; i++) {
529 aptr = assoc_lookup(ARGV_node, tmp_number((AWKNUM) j));
530 *aptr = make_string(argv[i], strlen(argv[i]));
531 (*aptr)->flags |= MAYBE_NUM;
534 ARGC_node = install("ARGC",
535 node(make_number((AWKNUM) j), Node_var, (NODE *) NULL));
539 * Set all the special variables to their initial values.
540 * Note that some of the variables that have set_FOO routines should
541 * *N*O*T* have those routines called upon initialization, and thus
542 * they have NULL entries in that field. This is notably true of FS
553 static struct varinit varinit[] = {
554 {&CONVFMT_node, "CONVFMT", Node_CONVFMT, "%.6g", 0, set_CONVFMT },
555 {&NF_node, "NF", Node_NF, NULL, -1, set_NF },
556 {&FIELDWIDTHS_node, "FIELDWIDTHS", Node_FIELDWIDTHS, "", 0, NULL },
557 {&NR_node, "NR", Node_NR, NULL, 0, set_NR },
558 {&FNR_node, "FNR", Node_FNR, NULL, 0, set_FNR },
559 {&FS_node, "FS", Node_FS, " ", 0, NULL },
560 {&RS_node, "RS", Node_RS, "\n", 0, set_RS },
561 {&IGNORECASE_node, "IGNORECASE", Node_IGNORECASE, NULL, 0, NULL },
562 {&FILENAME_node, "FILENAME", Node_var, "", 0, NULL },
563 {&OFS_node, "OFS", Node_OFS, " ", 0, set_OFS },
564 {&ORS_node, "ORS", Node_ORS, "\n", 0, set_ORS },
565 {&OFMT_node, "OFMT", Node_OFMT, "%.6g", 0, set_OFMT },
566 {&RLENGTH_node, "RLENGTH", Node_var, NULL, 0, NULL },
567 {&RSTART_node, "RSTART", Node_var, NULL, 0, NULL },
568 {&SUBSEP_node, "SUBSEP", Node_var, "\034", 0, NULL },
569 {&ARGIND_node, "ARGIND", Node_var, NULL, 0, NULL },
570 {&ERRNO_node, "ERRNO", Node_var, NULL, 0, NULL },
571 {&RT_node, "RT", Node_var, "", 0, NULL },
572 {0, NULL, Node_illegal, NULL, 0, NULL },
575 /* init_vars --- actually initialize everything in the symbol table */
580 register struct varinit *vp;
582 for (vp = varinit; vp->name; vp++) {
583 *(vp->spec) = install((char *) vp->name,
584 node(vp->strval == NULL ? make_number(vp->numval)
585 : make_string((char *) vp->strval,
587 vp->type, (NODE *) NULL));
588 (*(vp->spec))->flags |= SCALAR;
594 /* load_environ --- populate the ENVIRON array */
599 #if ! (defined(MSDOS) && !defined(DJGPP)) && ! defined(OS2) && ! (defined(VMS) && defined(__DECC))
600 extern char **environ;
602 register char *var, *val, *cp;
606 ENVIRON_node = install("ENVIRON",
607 node(Nnull_string, Node_var, (NODE *) NULL));
608 for (i = 0; environ[i] != NULL; i++) {
609 static char nullstr[] = "";
612 val = strchr(var, '=');
617 aptr = assoc_lookup(ENVIRON_node, tmp_string(var, strlen(var)));
618 *aptr = make_string(val, strlen(val));
619 (*aptr)->flags |= (MAYBE_NUM|SCALAR);
621 /* restore '=' so that system() gets a valid environment */
626 * Put AWKPATH into ENVIRON if it's not there.
627 * This allows querying it from outside gawk.
629 if ((cp = getenv("AWKPATH")) == NULL) {
630 aptr = assoc_lookup(ENVIRON_node, tmp_string("AWKPATH", 7));
631 *aptr = make_string(defpath, strlen(defpath));
632 (*aptr)->flags |= SCALAR;
636 /* arg_assign --- process a command-line assignment */
644 Func_ptr after_assign = NULL;
649 cp = strchr(arg, '=');
652 /* first check that the variable name has valid syntax */
654 if (! isalpha(arg[0]) && arg[0] != '_')
657 for (cp2 = arg+1; *cp2; cp2++)
658 if (! isalnum(*cp2) && *cp2 != '_') {
665 warning("illegal name `%s' in variable assignment", arg);
666 *--cp = '='; /* restore original text of ARGV */
671 * Recent versions of nawk expand escapes inside assignments.
672 * This makes sense, so we do it too.
674 it = make_str_node(cp, strlen(cp), SCAN);
675 it->flags |= (MAYBE_NUM|SCALAR);
676 var = variable(arg, FALSE, Node_var);
677 lhs = get_lhs(var, &after_assign);
680 if (after_assign != NULL)
682 *--cp = '='; /* restore original text of ARGV */
687 /* pre_assign --- handle -v, print a message and die if a problem */
693 if (arg_assign(v) == NULL) {
695 "%s: `%s' argument to `-v' not in `var=value' form\n",
701 /* catchsig --- catch signals */
708 code = 0; sig = code; code = sig;
711 fatal("floating point exception");
712 } else if (sig == SIGSEGV
717 set_loc(__FILE__, __LINE__);
718 msg("fatal error: internal error");
719 /* fatal won't abort() if not compiled for debugging */
726 /* nostalgia --- print the famous error message and die */
731 fprintf(stderr, "awk: bailing out near line 1\n");
735 /* version --- print version message */
740 printf("%s.%d\n", version_string, PATCHLEVEL);
742 * Per GNU coding standards, print copyright info,
743 * then exit successfully, do nothing else.