1 /****************************************************************
2 Copyright (C) Lucent Technologies 1997
5 Permission to use, copy, modify, and distribute this software and
6 its documentation for any purpose and without fee is hereby
7 granted, provided that the above copyright notice appear in all
8 copies and that both that the copyright notice and this
9 permission notice and warranty disclaimer appear in supporting
10 documentation, and that the name Lucent Technologies or any of
11 its entities not be used in advertising or publicity pertaining
12 to distribution of the software without specific, written prior
15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
23 ****************************************************************/
38 #include <sys/types.h>
43 static void stdinit(void);
44 static void flush_all(void);
47 #define tempfree(x) do { if (istemp(x)) tfree(x); } while (/*CONSTCOND*/0)
49 void tempfree(Cell *p) {
50 if (p->ctype == OCELL && (p->csub < CUNK || p->csub > CFREE)) {
51 WARNING("bad csub %d in Cell %d %s",
52 p->csub, p->ctype, p->sval);
59 /* do we really need these? */
61 /* #ifndef FOPEN_MAX */
62 /* #define FOPEN_MAX _NFILE */
66 /* #ifndef FOPEN_MAX */
67 /* #define FOPEN_MAX 40 */ /* max number of open files */
70 /* #ifndef RAND_MAX */
71 /* #define RAND_MAX 32767 */ /* all that ansi guarantees */
75 extern int pairstack[];
76 extern Awkfloat srand_seed;
78 Node *winner = NULL; /* root of parse tree */
79 Cell *tmps; /* free temporary cells for execution */
81 static Cell truecell ={ OBOOL, BTRUE, 0, 0, 1.0, NUM, NULL, NULL };
82 Cell *True = &truecell;
83 static Cell falsecell ={ OBOOL, BFALSE, 0, 0, 0.0, NUM, NULL, NULL };
84 Cell *False = &falsecell;
85 static Cell breakcell ={ OJUMP, JBREAK, 0, 0, 0.0, NUM, NULL, NULL };
86 Cell *jbreak = &breakcell;
87 static Cell contcell ={ OJUMP, JCONT, 0, 0, 0.0, NUM, NULL, NULL };
88 Cell *jcont = &contcell;
89 static Cell nextcell ={ OJUMP, JNEXT, 0, 0, 0.0, NUM, NULL, NULL };
90 Cell *jnext = &nextcell;
91 static Cell nextfilecell ={ OJUMP, JNEXTFILE, 0, 0, 0.0, NUM, NULL, NULL };
92 Cell *jnextfile = &nextfilecell;
93 static Cell exitcell ={ OJUMP, JEXIT, 0, 0, 0.0, NUM, NULL, NULL };
94 Cell *jexit = &exitcell;
95 static Cell retcell ={ OJUMP, JRET, 0, 0, 0.0, NUM, NULL, NULL };
96 Cell *jret = &retcell;
97 static Cell tempcell ={ OCELL, CTEMP, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL };
99 Node *curnode = NULL; /* the node being executed, for debugging */
101 /* buffer memory management */
102 int adjbuf(char **pbuf, int *psiz, int minlen, int quantum, char **pbptr,
104 /* pbuf: address of pointer to buffer being managed
105 * psiz: address of buffer size variable
106 * minlen: minimum length of buffer needed
107 * quantum: buffer size quantum
108 * pbptr: address of movable pointer into buffer, or 0 if none
109 * whatrtn: name of the calling routine if failure should cause fatal error
111 * return 0 for realloc failure, !=0 for success
114 if (minlen > *psiz) {
116 int rminlen = quantum ? minlen % quantum : 0;
117 int boff = pbptr ? *pbptr - *pbuf : 0;
118 /* round up to next multiple of quantum */
120 minlen += quantum - rminlen;
121 tbuf = realloc(*pbuf, minlen);
122 dprintf( ("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn, *psiz, minlen, *pbuf, tbuf) );
125 FATAL("out of memory in %s", whatrtn);
131 *pbptr = tbuf + boff;
136 void run(Node *a) /* execution of parse tree starts here */
144 Cell *execute(Node *u) /* execute a node of the parse tree */
146 Cell *(*proc)(Node **, int);
152 for (a = u; ; a = a->nnext) {
155 x = (Cell *) (a->narg[0]);
156 if (isfld(x) && !donefld)
158 else if (isrec(x) && !donerec)
162 if (notlegal(a->nobj)) /* probably a Cell* but too risky to print */
163 FATAL("illegal statement");
164 proc = proctab[a->nobj-FIRSTTOKEN];
165 x = (*proc)(a->narg, a->nobj);
166 if (isfld(x) && !donefld)
168 else if (isrec(x) && !donerec)
174 if (a->nnext == NULL)
181 Cell *program(Node **a, int n) /* execute an awk program */
182 { /* a[0] = BEGIN, a[1] = body, a[2] = END */
185 if (setjmp(env) != 0)
187 if (a[0]) { /* BEGIN */
192 FATAL("illegal break, continue, next or nextfile from BEGIN");
196 while (getrec(&record, &recsize, true) > 0) {
203 if (setjmp(env) != 0) /* handles exit within END */
205 if (a[2]) { /* END */
207 if (isbreak(x) || isnext(x) || iscont(x))
208 FATAL("illegal break, continue, next or nextfile from END");
215 struct Frame { /* stack frame for awk function calls */
216 int nargs; /* number of arguments in this call */
217 Cell *fcncell; /* pointer to Cell for function */
218 Cell **args; /* pointer to array of arguments after execute */
219 Cell *retval; /* return value */
222 #define NARGS 50 /* max args in a call */
224 struct Frame *frame = NULL; /* base of stack frames; dynamically allocated */
225 int nframe = 0; /* number of frames allocated */
226 struct Frame *frp = NULL; /* frame pointer. bottom level unused */
228 Cell *call(Node **a, int n) /* function call. very kludgy and fragile */
230 static const Cell newcopycell = { OCELL, CCOPY, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL };
232 int freed = 0; /* handles potential double freeing when fcn & param share a tempcell */
234 Cell *args[NARGS], *oargs[NARGS]; /* BUG: fixed size arrays */
238 fcn = execute(a[0]); /* the function itself */
241 FATAL("calling undefined function %s", s);
243 frp = frame = calloc(nframe += 100, sizeof(*frame));
245 FATAL("out of space for stack frames calling %s", s);
247 for (ncall = 0, x = a[1]; x != NULL; x = x->nnext) /* args in call */
249 ndef = (int) fcn->fval; /* args in defn */
250 dprintf( ("calling %s, %d args (%d in defn), frp=%d\n", s, ncall, ndef, (int) (frp-frame)) );
252 WARNING("function %s called with %d args, uses only %d",
254 if (ncall + ndef > NARGS)
255 FATAL("function %s has %d arguments, limit %d", s, ncall+ndef, NARGS);
256 for (i = 0, x = a[1]; x != NULL; i++, x = x->nnext) { /* get call args */
257 dprintf( ("evaluate args[%d], frp=%d:\n", i, (int) (frp-frame)) );
260 dprintf( ("args[%d]: %s %f <%s>, t=%o\n",
261 i, NN(y->nval), y->fval, isarr(y) ? "(array)" : NN(y->sval), y->tval) );
263 FATAL("can't use function %s as argument in %s", y->nval, s);
265 args[i] = y; /* arrays by ref */
267 args[i] = copycell(y);
270 for ( ; i < ndef; i++) { /* add null args for ones not provided */
272 *args[i] = newcopycell;
274 frp++; /* now ok to up frame */
275 if (frp >= frame + nframe) {
276 int dfp = frp - frame; /* old index */
277 frame = realloc(frame, (nframe += 100) * sizeof(*frame));
279 FATAL("out of space for stack frames in %s", s);
284 frp->nargs = ndef; /* number defined with (excess are locals) */
285 frp->retval = gettemp();
287 dprintf( ("start exec of %s, frp=%d\n", s, (int) (frp-frame)) );
288 y = execute((Node *)(fcn->sval)); /* execute body */
289 dprintf( ("finished exec of %s, frp=%d\n", s, (int) (frp-frame)) );
291 for (i = 0; i < ndef; i++) {
292 Cell *t = frp->args[i];
294 if (t->csub == CCOPY) {
300 oargs[i]->tval = t->tval;
301 oargs[i]->tval &= ~(STR|NUM|DONTFREE);
302 oargs[i]->sval = t->sval;
306 } else if (t != y) { /* kludge to prevent freeing twice */
309 } else if (t == y && t->csub == CCOPY) {
316 if (isexit(y) || isnext(y))
319 tempfree(y); /* don't free twice! */
321 z = frp->retval; /* return value */
322 dprintf( ("%s returns %g |%s| %o\n", s, getfval(z), getsval(z), z->tval) );
327 Cell *copycell(Cell *x) /* make a copy of a cell in a temp */
331 /* copy is not constant or field */
334 y->tval = x->tval & ~(CON|FLD|REC);
335 y->csub = CCOPY; /* prevents freeing until call is over */
336 y->nval = x->nval; /* BUG? */
337 if (isstr(x) /* || x->ctype == OCELL */) {
338 y->sval = tostring(x->sval);
339 y->tval &= ~DONTFREE;
346 Cell *arg(Node **a, int n) /* nth argument of a function */
349 n = ptoi(a[0]); /* argument number, counting from 0 */
350 dprintf( ("arg(%d), frp->nargs=%d\n", n, frp->nargs) );
351 if (n+1 > frp->nargs)
352 FATAL("argument #%d of function %s was not supplied",
353 n+1, frp->fcncell->nval);
357 Cell *jump(Node **a, int n) /* break, continue, next, nextfile, return */
365 errorflag = (int) getfval(y);
372 if ((y->tval & (STR|NUM)) == (STR|NUM)) {
373 setsval(frp->retval, getsval(y));
374 frp->retval->fval = getfval(y);
375 frp->retval->tval |= NUM;
377 else if (y->tval & STR)
378 setsval(frp->retval, getsval(y));
379 else if (y->tval & NUM)
380 setfval(frp->retval, getfval(y));
381 else /* can't happen */
382 FATAL("bad type variable %d", y->tval);
395 default: /* can't happen */
396 FATAL("illegal jump type %d", n);
398 return 0; /* not reached */
401 Cell *awkgetline(Node **a, int n) /* get next line from specific input */
402 { /* a[0] is variable, a[1] is operator, a[2] is filename */
404 extern Cell **fldtab;
407 int bufsize = recsize;
411 if ((buf = malloc(bufsize)) == NULL)
412 FATAL("out of memory in getline");
414 fflush(stdout); /* in case someone is waiting for a prompt */
416 if (a[1] != NULL) { /* getline < file */
417 x = execute(a[2]); /* filename */
419 if (mode == '|') /* input pipe */
420 mode = LE; /* arbitrary flag */
421 fp = openfile(mode, getsval(x), &newflag);
426 n = readrec(&buf, &bufsize, fp, newflag);
429 } else if (a[0] != NULL) { /* getline var <file */
432 if (is_number(x->sval)) {
433 x->fval = atof(x->sval);
437 } else { /* getline <file */
438 setsval(fldtab[0], buf);
439 if (is_number(fldtab[0]->sval)) {
440 fldtab[0]->fval = atof(fldtab[0]->sval);
441 fldtab[0]->tval |= NUM;
444 } else { /* bare getline; use current input */
445 if (a[0] == NULL) /* getline */
446 n = getrec(&record, &recsize, true);
447 else { /* getline var */
448 n = getrec(&buf, &bufsize, false);
451 if (is_number(x->sval)) {
452 x->fval = atof(x->sval);
458 setfval(r, (Awkfloat) n);
463 Cell *getnf(Node **a, int n) /* get NF */
467 return (Cell *) a[0];
471 makearraystring(Node *p, const char *func)
477 if ((buf = malloc(bufsz)) == NULL) {
478 FATAL("%s: out of memory", func);
483 seplen = strlen(getsval(subseploc));
485 for (; p; p = p->nnext) {
486 Cell *x = execute(p); /* expr */
487 char *s = getsval(x);
488 size_t nsub = p->nnext ? seplen : 0;
489 size_t slen = strlen(s);
490 size_t tlen = blen + slen + nsub;
492 if (!adjbuf(&buf, &bufsz, tlen + 1, recsize, 0, func)) {
493 FATAL("%s: out of memory %s[%s...]",
496 memcpy(buf + blen, s, slen);
498 memcpy(buf + blen + slen, *SUBSEP, nsub);
507 Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */
512 x = execute(a[0]); /* Cell* for symbol table */
513 buf = makearraystring(a[1], __func__);
515 dprintf( ("making %s into an array\n", NN(x->nval)) );
518 x->tval &= ~(STR|NUM|DONTFREE);
520 x->sval = (char *) makesymtab(NSYMTAB);
522 z = setsymtab(buf, "", 0.0, STR|NUM, (Array *) x->sval);
530 Cell *awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */
534 x = execute(a[0]); /* Cell* for symbol table */
535 if (x == symtabloc) {
536 FATAL("cannot delete SYMTAB or its elements");
540 if (a[1] == NULL) { /* delete the elements, not the table */
544 x->sval = (char *) makesymtab(NSYMTAB);
546 char *buf = makearraystring(a[1], __func__);
554 Cell *intest(Node **a, int n) /* a[0] is index (list), a[1] is symtab */
559 ap = execute(a[1]); /* array name */
561 dprintf( ("making %s into an array\n", ap->nval) );
564 ap->tval &= ~(STR|NUM|DONTFREE);
566 ap->sval = (char *) makesymtab(NSYMTAB);
568 buf = makearraystring(a[0], __func__);
569 k = lookup(buf, (Array *) ap->sval);
579 Cell *matchop(Node **a, int n) /* ~ and match() */
585 int (*mf)(fa *, const char *) = match, mode = 0;
591 x = execute(a[1]); /* a[1] = target text */
593 if (a[0] == NULL) /* a[1] == 0: already-compiled reg expr */
594 i = (*mf)((fa *) a[2], s);
596 y = execute(a[2]); /* a[2] = regular expr */
598 pfa = makedfa(t, mode);
604 int start = patbeg - s + 1;
607 setfval(rstartloc, (Awkfloat) start);
608 setfval(rlengthloc, (Awkfloat) patlen);
613 } else if ((n == MATCH && i == 1) || (n == NOTMATCH && i == 0))
620 Cell *boolop(Node **a, int n) /* a[0] || a[1], a[0] && a[1], !a[0] */
637 if ( !i ) return(False);
644 if (i) return(False);
646 default: /* can't happen */
647 FATAL("unknown boolean operator %d", n);
649 return 0; /*NOTREACHED*/
652 Cell *relop(Node **a, int n) /* a[0 < a[1], etc. */
660 if (x->tval&NUM && y->tval&NUM) {
661 j = x->fval - y->fval;
662 i = j<0? -1: (j>0? 1: 0);
664 i = strcmp(getsval(x), getsval(y));
669 case LT: if (i<0) return(True);
671 case LE: if (i<=0) return(True);
673 case NE: if (i!=0) return(True);
675 case EQ: if (i == 0) return(True);
677 case GE: if (i>=0) return(True);
679 case GT: if (i>0) return(True);
681 default: /* can't happen */
682 FATAL("unknown relational operator %d", n);
684 return 0; /*NOTREACHED*/
687 void tfree(Cell *a) /* free a tempcell */
690 dprintf( ("freeing %s %s %o\n", NN(a->nval), NN(a->sval), a->tval) );
694 FATAL("tempcell list is curdled");
699 Cell *gettemp(void) /* get a tempcell */
704 tmps = calloc(100, sizeof(*tmps));
706 FATAL("out of space for temporaries");
707 for (i = 1; i < 100; i++)
708 tmps[i-1].cnext = &tmps[i];
709 tmps[i-1].cnext = NULL;
717 Cell *indirect(Node **a, int n) /* $( a[0] ) */
725 val = getfval(x); /* freebsd: defend against super large field numbers */
726 if ((Awkfloat)INT_MAX < val)
727 FATAL("trying to access out of range field %s", x->nval);
729 if (m == 0 && !is_number(s = getsval(x))) /* suspicion! */
730 FATAL("illegal field $(%s), name \"%s\"", s, x->nval);
731 /* BUG: can x->nval ever be null??? */
734 x->ctype = OCELL; /* BUG? why are these needed? */
739 Cell *substr(Node **a, int nnn) /* substr(a[0], a[1], a[2]) */
744 Cell *x, *y, *z = NULL;
762 m = (int) getfval(y);
769 n = (int) getfval(z);
777 dprintf( ("substr: m=%d, n=%d, s=%s\n", m, n, s) );
779 temp = s[n+m-1]; /* with thanks to John Linderman */
781 setsval(y, s + m - 1);
787 Cell *sindex(Node **a, int nnn) /* index(a[0], a[1]) */
790 char *s1, *s2, *p1, *p2, *q;
799 for (p1 = s1; *p1 != '\0'; p1++) {
800 for (q = p1, p2 = s2; *p2 != '\0' && *q == *p2; q++, p2++)
803 v = (Awkfloat) (p1 - s1 + 1); /* origin 1 */
813 #define MAXNUMSIZE 50
815 int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like conversions */
822 int fmtwd; /* format width */
825 int bufsize = *pbufsize;
826 #define FMTSZ(a) (fmtsz - ((a) - fmt))
827 #define BUFSZ(a) (bufsize - ((a) - buf))
829 static bool first = true;
830 static bool have_a_format = false;
835 snprintf(xbuf, sizeof(xbuf), "%a", 42.0);
836 have_a_format = (strcmp(xbuf, "0x1.5p+5") == 0);
842 if ((fmt = malloc(fmtsz)) == NULL)
843 FATAL("out of memory in format()");
845 adjbuf(&buf, &bufsize, MAXNUMSIZE+1+p-buf, recsize, &p, "format1");
855 /* have to be real careful in case this is a huge number, eg, %100000d */
859 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format2");
860 for (t = fmt; (*t++ = *s) != '\0'; s++) {
861 if (!adjbuf(&fmt, &fmtsz, MAXNUMSIZE+1+t-fmt, recsize, &t, "format3"))
862 FATAL("format item %.30s... ran format() out of memory", os);
863 /* Ignore size specifiers */
864 if (strchr("hjLlqtz", *s) != NULL) { /* the ansi panoply */
868 if (isalpha((uschar)*s))
871 FATAL("'$' not permitted in awk formats");
875 FATAL("not enough args in printf(%s)", os);
879 snprintf(t - 1, FMTSZ(t - 1),
880 "%d", fmtwd=(int) getfval(x));
883 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format");
884 t = fmt + strlen(fmt);
891 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format4");
899 case 'f': case 'e': case 'g': case 'E': case 'G':
902 case 'd': case 'i': case 'o': case 'x': case 'X': case 'u':
903 flag = (*s == 'd' || *s == 'i') ? 'd' : 'u';
915 WARNING("weird printf conversion %s", fmt);
920 FATAL("not enough args in printf(%s)", os);
926 adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format5");
928 case '?': snprintf(p, BUFSZ(p), "%s", fmt); /* unknown, so dump it too */
933 adjbuf(&buf, &bufsize, 1+strlen(p)+n+p-buf, recsize, &p, "format6");
935 snprintf(p, BUFSZ(p), "%s", t);
939 case 'f': snprintf(p, BUFSZ(p), fmt, getfval(x)); break;
940 case 'd': snprintf(p, BUFSZ(p), fmt, (intmax_t) getfval(x)); break;
941 case 'u': snprintf(p, BUFSZ(p), fmt, (uintmax_t) getfval(x)); break;
947 if (!adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format7"))
948 FATAL("huge string/format (%d chars) in printf %.30s... ran format() out of memory", n, t);
949 snprintf(p, BUFSZ(p), fmt, t);
954 snprintf(p, BUFSZ(p), fmt, (int) getfval(x));
956 *p++ = '\0'; /* explicit null byte */
957 *p = '\0'; /* next output will start here */
960 snprintf(p, BUFSZ(p), fmt, getsval(x)[0]);
963 FATAL("can't happen: bad conversion %c in format()", flag);
971 for ( ; a; a = a->nnext) /* evaluate any remaining args */
978 Cell *awksprintf(Node **a, int n) /* sprintf(a[0]) */
985 if ((buf = malloc(bufsz)) == NULL)
986 FATAL("out of memory in awksprintf");
989 if (format(&buf, &bufsz, getsval(x), y) == -1)
990 FATAL("sprintf string %.30s... too long. can't happen.", buf);
998 Cell *awkprintf(Node **a, int n) /* printf */
999 { /* a[0] is list of args, starting with format string */
1000 /* a[1] is redirection operator, a[2] is redirection file */
1006 int bufsz=3*recsize;
1008 if ((buf = malloc(bufsz)) == NULL)
1009 FATAL("out of memory in awkprintf");
1012 if ((len = format(&buf, &bufsz, getsval(x), y)) == -1)
1013 FATAL("printf string %.30s... too long. can't happen.", buf);
1016 /* fputs(buf, stdout); */
1017 fwrite(buf, len, 1, stdout);
1019 FATAL("write error on stdout");
1021 fp = redirect(ptoi(a[1]), a[2]);
1022 /* fputs(buf, fp); */
1023 fwrite(buf, len, 1, fp);
1026 FATAL("write error on %s", filename(fp));
1032 Cell *arith(Node **a, int n) /* a[0] + a[1], etc. also -a[0] */
1041 if (n != UMINUS && n != UPLUS) {
1059 FATAL("division by zero");
1064 FATAL("division by zero in mod");
1071 case UPLUS: /* handled by getfval(), above */
1074 if (j >= 0 && modf(j, &v) == 0.0) /* pos integer exponent */
1075 i = ipow(i, (int) j);
1078 i = errcheck(pow(i, j), "pow");
1081 default: /* can't happen */
1082 FATAL("illegal arithmetic operator %d", n);
1088 double ipow(double x, int n) /* x**n. ought to be done by pow, but isn't always */
1101 Cell *incrdecr(Node **a, int n) /* a[0]++, etc. */
1109 k = (n == PREINCR || n == POSTINCR) ? 1 : -1;
1110 if (n == PREINCR || n == PREDECR) {
1121 Cell *assign(Node **a, int n) /* a[0] = a[1], a[0] += a[1], etc. */
1122 { /* this is subtle; don't muck with it. */
1129 if (n == ASSIGN) { /* ordinary assignment */
1130 if (x == y && !(x->tval & (FLD|REC)) && x != nfloc)
1131 ; /* self-assignment: leave alone unless it's a field or NF */
1132 else if ((y->tval & (STR|NUM)) == (STR|NUM)) {
1133 setsval(x, getsval(y));
1134 x->fval = getfval(y);
1138 setsval(x, getsval(y));
1140 setfval(x, getfval(y));
1142 funnyvar(y, "read value of");
1160 FATAL("division by zero in /=");
1165 FATAL("division by zero in %%=");
1170 if (yf >= 0 && modf(yf, &v) == 0.0) /* pos integer exponent */
1171 xf = ipow(xf, (int) yf);
1174 xf = errcheck(pow(xf, yf), "pow");
1178 FATAL("illegal assignment operator %d", n);
1186 Cell *cat(Node **a, int q) /* a[0] cat a[1] */
1194 n1 = strlen(getsval(x));
1197 n2 = strlen(getsval(y));
1199 adjbuf(&s, &ssz, n1 + n2 + 1, recsize, 0, "cat");
1200 memcpy(s, x->sval, n1);
1201 memcpy(s + n1, y->sval, n2);
1214 Cell *pastat(Node **a, int n) /* a[0] { a[1] } */
1230 Cell *dopa2(Node **a, int n) /* a[0], a[1] { a[2] } */
1236 if (pairstack[pair] == 0) {
1239 pairstack[pair] = 1;
1242 if (pairstack[pair] == 1) {
1245 pairstack[pair] = 0;
1253 Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */
1255 Cell *x = NULL, *y, *ap;
1256 const char *s, *origs, *t;
1257 const char *fs = NULL;
1258 char *origfs = NULL;
1261 int n, tempstat, arg3type;
1263 y = execute(a[0]); /* source string */
1264 origs = s = strdup(getsval(y));
1265 arg3type = ptoi(a[3]);
1266 if (a[2] == NULL) /* fs string */
1267 fs = getsval(fsloc);
1268 else if (arg3type == STRING) { /* split(str,arr,"string") */
1270 fs = origfs = strdup(getsval(x));
1272 } else if (arg3type == REGEXPR)
1273 fs = "(regexpr)"; /* split(str,arr,/regexpr/) */
1275 FATAL("illegal type of split");
1277 ap = execute(a[1]); /* array name */
1279 dprintf( ("split: s=|%s|, a=%s, sep=|%s|\n", s, NN(ap->nval), fs) );
1282 ap->sval = (char *) makesymtab(NSYMTAB);
1285 if (arg3type == REGEXPR && strlen((char*)((fa*)a[2])->restr) == 0) {
1286 /* split(s, a, //); have to arrange that it looks like empty sep */
1291 if (*s != '\0' && (strlen(fs) > 1 || arg3type == REGEXPR)) { /* reg expr */
1293 if (arg3type == REGEXPR) { /* it's ready already */
1296 pfa = makedfa(fs, 1);
1298 if (nematch(pfa,s)) {
1299 tempstat = pfa->initstat;
1303 snprintf(num, sizeof(num), "%d", n);
1305 setptr(patbeg, '\0');
1307 setsymtab(num, s, atof(s), STR|NUM, (Array *) ap->sval);
1309 setsymtab(num, s, 0.0, STR, (Array *) ap->sval);
1310 setptr(patbeg, temp);
1311 s = patbeg + patlen;
1312 if (*(patbeg+patlen-1) == '\0' || *s == '\0') {
1314 snprintf(num, sizeof(num), "%d", n);
1315 setsymtab(num, "", 0.0, STR, (Array *) ap->sval);
1316 pfa->initstat = tempstat;
1319 } while (nematch(pfa,s));
1320 pfa->initstat = tempstat; /* bwk: has to be here to reset */
1321 /* cf gsub and refldbld */
1324 snprintf(num, sizeof(num), "%d", n);
1326 setsymtab(num, s, atof(s), STR|NUM, (Array *) ap->sval);
1328 setsymtab(num, s, 0.0, STR, (Array *) ap->sval);
1331 } else if (sep == ' ') {
1333 #define ISWS(c) ((c) == ' ' || (c) == '\t' || (c) == '\n')
1342 while (*s != '\0' && !ISWS(*s));
1345 snprintf(num, sizeof(num), "%d", n);
1347 setsymtab(num, t, atof(t), STR|NUM, (Array *) ap->sval);
1349 setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
1354 } else if (sep == 0) { /* new: split(s, a, "") => 1 char/elem */
1355 for (n = 0; *s != '\0'; s++) {
1358 snprintf(num, sizeof(num), "%d", n);
1361 if (isdigit((uschar)buf[0]))
1362 setsymtab(num, buf, atof(buf), STR|NUM, (Array *) ap->sval);
1364 setsymtab(num, buf, 0.0, STR, (Array *) ap->sval);
1366 } else if (*s != '\0') {
1370 while (*s != sep && *s != '\n' && *s != '\0')
1374 snprintf(num, sizeof(num), "%d", n);
1376 setsymtab(num, t, atof(t), STR|NUM, (Array *) ap->sval);
1378 setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
1394 Cell *condexpr(Node **a, int n) /* a[0] ? a[1] : a[2] */
1409 Cell *ifstat(Node **a, int n) /* if (a[0]) a[1]; else a[2] */
1417 } else if (a[2] != NULL) {
1424 Cell *whilestat(Node **a, int n) /* while (a[0]) a[1] */
1438 if (isnext(x) || isexit(x) || isret(x))
1444 Cell *dostat(Node **a, int n) /* do a[0]; while(a[1]) */
1452 if (isnext(x) || isexit(x) || isret(x))
1462 Cell *forstat(Node **a, int n) /* for (a[0]; a[1]; a[2]) a[3] */
1471 if (!istrue(x)) return(x);
1475 if (isbreak(x)) /* turn off break */
1477 if (isnext(x) || isexit(x) || isret(x))
1485 Cell *instat(Node **a, int n) /* for (a[0] in a[1]) a[2] */
1487 Cell *x, *vp, *arrayp, *cp, *ncp;
1492 arrayp = execute(a[1]);
1493 if (!isarr(arrayp)) {
1496 tp = (Array *) arrayp->sval;
1498 for (i = 0; i < tp->size; i++) { /* this routine knows too much */
1499 for (cp = tp->tab[i]; cp != NULL; cp = ncp) {
1500 setsval(vp, cp->nval);
1507 if (isnext(x) || isexit(x) || isret(x)) {
1517 static char *nawk_convert(const char *s, int (*fun_c)(int),
1518 wint_t (*fun_wc)(wint_t))
1522 const char *ps = NULL;
1524 mbstate_t mbs, mbs2;
1526 size_t sz = MB_CUR_MAX;
1531 for (pbuf = buf; *pbuf; pbuf++)
1532 *pbuf = fun_c((uschar)*pbuf);
1536 /* upper/lower character may be shorter/longer */
1537 buf = tostringN(s, strlen(s) * sz + 1);
1539 memset(&mbs, 0, sizeof(mbs));
1540 memset(&mbs2, 0, sizeof(mbs2));
1544 while (n = mbrtowc(&wc, ps, sz, &mbs),
1545 n > 0 && n != (size_t)-1 && n != (size_t)-2)
1549 n = wcrtomb(pbuf, fun_wc(wc), &mbs2);
1550 if (n == (size_t)-1)
1551 FATAL("illegal wide character %s", s);
1559 FATAL("illegal byte sequence %s", s);
1565 static char *nawk_toupper(const char *s)
1567 return nawk_convert(s, toupper, towupper);
1570 static char *nawk_tolower(const char *s)
1572 return nawk_convert(s, tolower, towlower);
1575 Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg list */
1588 nextarg = a[1]->nnext;
1592 u = ((Array *) x->sval)->nelem; /* GROT. should be function*/
1594 u = strlen(getsval(x));
1598 u = errcheck(log(getfval(x)), "log");
1601 modf(getfval(x), &u); break;
1604 u = errcheck(exp(getfval(x)), "exp");
1608 u = errcheck(sqrt(getfval(x)), "sqrt");
1611 u = sin(getfval(x)); break;
1613 u = cos(getfval(x)); break;
1615 if (nextarg == NULL) {
1616 WARNING("atan2 requires two arguments; returning 1.0");
1619 y = execute(a[1]->nnext);
1620 u = atan2(getfval(x), getfval(y));
1622 nextarg = nextarg->nnext;
1626 fflush(stdout); /* in case something is buffered already */
1627 status = system(getsval(x));
1630 if (WIFEXITED(status)) {
1631 u = WEXITSTATUS(status);
1632 } else if (WIFSIGNALED(status)) {
1633 u = WTERMSIG(status) + 256;
1635 if (WCOREDUMP(status))
1638 } else /* something else?!? */
1643 /* random() returns numbers in [0..2^31-1]
1644 * in order to get a number in [0, 1), divide it by 2^31
1646 u = (Awkfloat) random() / (0x7fffffffL + 0x1UL);
1649 if (isrec(x)) /* no argument provided */
1650 u = time((time_t *)0);
1654 srandom((unsigned long) u);
1661 buf = nawk_toupper(getsval(x));
1663 buf = nawk_tolower(getsval(x));
1670 if (isrec(x) || strlen(getsval(x)) == 0) {
1671 flush_all(); /* fflush() or fflush("") -> all */
1673 } else if ((fp = openfile(FFLUSH, getsval(x), NULL)) == NULL)
1678 default: /* can't happen */
1679 FATAL("illegal function type %d", t);
1685 if (nextarg != NULL) {
1686 WARNING("warning: function has too many arguments");
1687 for ( ; nextarg; nextarg = nextarg->nnext)
1693 Cell *printstat(Node **a, int n) /* print a[0] */
1699 if (a[1] == NULL) /* a[1] is redirection operator, a[2] is file */
1702 fp = redirect(ptoi(a[1]), a[2]);
1703 for (x = a[0]; x != NULL; x = x->nnext) {
1705 fputs(getpssval(y), fp);
1707 if (x->nnext == NULL)
1708 fputs(getsval(orsloc), fp);
1710 fputs(getsval(ofsloc), fp);
1715 FATAL("write error on %s", filename(fp));
1719 Cell *nullproc(Node **a, int n)
1725 FILE *redirect(int a, Node *b) /* set up all i/o redirections */
1733 fp = openfile(a, fname, NULL);
1735 FATAL("can't open file %s", fname);
1743 int mode; /* '|', 'a', 'w' => LE/LT, GT */
1748 static void stdinit(void) /* in case stdin, etc., are not constants */
1751 files = calloc(nfiles, sizeof(*files));
1753 FATAL("can't allocate file memory for %zu files", nfiles);
1754 files[0].fp = stdin;
1755 files[0].fname = "/dev/stdin";
1757 files[1].fp = stdout;
1758 files[1].fname = "/dev/stdout";
1760 files[2].fp = stderr;
1761 files[2].fname = "/dev/stderr";
1765 FILE *openfile(int a, const char *us, bool *pnewflag)
1773 FATAL("null file name in print or getline");
1774 for (i = 0; i < nfiles; i++)
1775 if (files[i].fname && strcmp(s, files[i].fname) == 0 &&
1776 (a == files[i].mode || (a==APPEND && files[i].mode==GT) ||
1782 if (a == FFLUSH) /* didn't find it, so don't create it! */
1785 for (i = 0; i < nfiles; i++)
1786 if (files[i].fp == NULL)
1790 size_t nnf = nfiles + FOPEN_MAX;
1791 nf = realloc(files, nnf * sizeof(*nf));
1793 FATAL("cannot grow files for %s and %zu files", s, nnf);
1794 memset(&nf[nfiles], 0, FOPEN_MAX * sizeof(*nf));
1798 fflush(stdout); /* force a semblance of order */
1802 } else if (a == APPEND) {
1804 m = GT; /* so can mix > and >> */
1805 } else if (a == '|') { /* output pipe */
1807 } else if (a == LE) { /* input pipe */
1809 } else if (a == LT) { /* getline <file */
1810 fp = strcmp(s, "-") == 0 ? stdin : fopen(s, "r"); /* "-" is stdin */
1811 } else /* can't happen */
1812 FATAL("illegal redirection %d", a);
1814 files[i].fname = tostring(s);
1819 if (fp != stdin && fp != stdout && fp != stderr)
1820 (void) fcntl(fileno(fp), F_SETFD, FD_CLOEXEC);
1825 const char *filename(FILE *fp)
1829 for (i = 0; i < nfiles; i++)
1830 if (fp == files[i].fp)
1831 return files[i].fname;
1835 Cell *closefile(Node **a, int n)
1844 for (i = 0; i < nfiles; i++) {
1845 if (!files[i].fname || strcmp(x->sval, files[i].fname) != 0)
1847 if (ferror(files[i].fp))
1848 FATAL("i/o error occurred on %s", files[i].fname);
1849 if (files[i].mode == '|' || files[i].mode == LE)
1850 stat = pclose(files[i].fp) == -1;
1852 stat = fclose(files[i].fp) == EOF;
1854 FATAL("i/o error occurred closing %s", files[i].fname);
1855 if (i > 2) /* don't do /dev/std... */
1856 xfree(files[i].fname);
1857 files[i].fname = NULL; /* watch out for ref thru this */
1862 setfval(x, (Awkfloat) (stat ? -1 : 0));
1871 for (i = 0; i < nfiles; i++) {
1874 if (ferror(files[i].fp))
1875 FATAL( "i/o error occurred on %s", files[i].fname );
1876 if (files[i].mode == '|' || files[i].mode == LE)
1877 stat = pclose(files[i].fp) == -1;
1879 stat = fclose(files[i].fp) == EOF;
1881 FATAL( "i/o error occurred while closing %s", files[i].fname );
1885 static void flush_all(void)
1889 for (i = 0; i < nfiles; i++)
1891 fflush(files[i].fp);
1894 void backsub(char **pb_ptr, const char **sptr_ptr);
1896 Cell *sub(Node **a, int nnn) /* substitute command */
1898 const char *sptr, *q;
1899 Cell *x, *y, *result;
1902 int bufsz = recsize;
1904 if ((buf = malloc(bufsz)) == NULL)
1905 FATAL("out of memory in sub");
1906 x = execute(a[3]); /* target string */
1908 if (a[0] == NULL) /* 0 => a[1] is already-compiled regexpr */
1909 pfa = (fa *) a[1]; /* regular expression */
1912 pfa = makedfa(getsval(y), 1);
1915 y = execute(a[2]); /* replacement string */
1917 if (pmatch(pfa, t)) {
1919 adjbuf(&buf, &bufsz, 1+patbeg-sptr, recsize, 0, "sub");
1921 while (sptr < patbeg)
1924 while (*sptr != '\0') {
1925 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "sub");
1926 if (*sptr == '\\') {
1927 backsub(&pb, &sptr);
1928 } else if (*sptr == '&') {
1930 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "sub");
1931 for (q = patbeg; q < patbeg+patlen; )
1937 if (pb > buf + bufsz)
1938 FATAL("sub result1 %.30s too big; can't happen", buf);
1939 sptr = patbeg + patlen;
1940 if ((patlen == 0 && *patbeg) || (patlen && *(sptr-1))) {
1941 adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "sub");
1942 while ((*pb++ = *sptr++) != '\0')
1945 if (pb > buf + bufsz)
1946 FATAL("sub result2 %.30s too big; can't happen", buf);
1947 setsval(x, buf); /* BUG: should be able to avoid copy */
1956 Cell *gsub(Node **a, int nnn) /* global substitute */
1960 const char *q, *t, *sptr;
1963 int mflag, tempstat, num;
1964 int bufsz = recsize;
1966 if ((buf = malloc(bufsz)) == NULL)
1967 FATAL("out of memory in gsub");
1968 mflag = 0; /* if mflag == 0, can replace empty string */
1970 x = execute(a[3]); /* target string */
1972 if (a[0] == NULL) /* 0 => a[1] is already-compiled regexpr */
1973 pfa = (fa *) a[1]; /* regular expression */
1976 pfa = makedfa(getsval(y), 1);
1979 y = execute(a[2]); /* replacement string */
1980 if (pmatch(pfa, t)) {
1981 tempstat = pfa->initstat;
1986 if (patlen == 0 && *patbeg != '\0') { /* matched empty string */
1987 if (mflag == 0) { /* can replace empty */
1990 while (*sptr != '\0') {
1991 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub");
1992 if (*sptr == '\\') {
1993 backsub(&pb, &sptr);
1994 } else if (*sptr == '&') {
1996 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gsub");
1997 for (q = patbeg; q < patbeg+patlen; )
2003 if (*t == '\0') /* at end */
2005 adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gsub");
2007 if (pb > buf + bufsz) /* BUG: not sure of this test */
2008 FATAL("gsub result0 %.30s too big; can't happen", buf);
2011 else { /* matched nonempty string */
2014 adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gsub");
2015 while (sptr < patbeg)
2018 while (*sptr != '\0') {
2019 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub");
2020 if (*sptr == '\\') {
2021 backsub(&pb, &sptr);
2022 } else if (*sptr == '&') {
2024 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gsub");
2025 for (q = patbeg; q < patbeg+patlen; )
2030 t = patbeg + patlen;
2031 if (patlen == 0 || *t == '\0' || *(t-1) == '\0')
2033 if (pb > buf + bufsz)
2034 FATAL("gsub result1 %.30s too big; can't happen", buf);
2037 } while (pmatch(pfa,t));
2039 adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gsub");
2040 while ((*pb++ = *sptr++) != '\0')
2042 done: if (pb < buf + bufsz)
2044 else if (*(pb-1) != '\0')
2045 FATAL("gsub result2 %.30s truncated; can't happen", buf);
2046 setsval(x, buf); /* BUG: should be able to avoid copy + free */
2047 pfa->initstat = tempstat;
2058 void backsub(char **pb_ptr, const char **sptr_ptr) /* handle \\& variations */
2059 { /* sptr[0] == '\\' */
2061 const char *sptr = *sptr_ptr;
2062 static bool first = true;
2063 static bool do_posix = false;
2067 do_posix = (getenv("POSIXLY_CORRECT") != NULL);
2070 if (sptr[1] == '\\') {
2071 if (sptr[2] == '\\' && sptr[3] == '&') { /* \\\& -> \& */
2075 } else if (sptr[2] == '&') { /* \\& -> \ + matched */
2078 } else if (do_posix) { /* \\x -> \x */
2081 } else { /* \\x -> \\x */
2085 } else if (sptr[1] == '&') { /* literal & */
2088 } else /* literal \ */