* Sync comment with code's reality.
[dragonfly.git] / contrib / awk / builtin.c
1 /*
2  * builtin.c - Builtin functions and various utility procedures 
3  */
4
5 /* 
6  * Copyright (C) 1986, 1988, 1989, 1991-2000 the Free Software Foundation, Inc.
7  * 
8  * This file is part of GAWK, the GNU implementation of the
9  * AWK Programming Language.
10  * 
11  * GAWK is free software; you can redistribute it and/or modify
12  * it under the terms of the GNU General Public License as published by
13  * the Free Software Foundation; either version 2 of the License, or
14  * (at your option) any later version.
15  * 
16  * GAWK is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19  * GNU General Public License for more details.
20  * 
21  * You should have received a copy of the GNU General Public License
22  * along with this program; if not, write to the Free Software
23  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA
24  *
25  * $FreeBSD: src/contrib/awk/builtin.c,v 1.7.2.1 2001/01/23 22:08:30 asmodai Exp $
26  * $DragonFly: src/contrib/awk/Attic/builtin.c,v 1.2 2003/06/17 04:23:58 dillon Exp $
27  */
28
29
30 #include "awk.h"
31 #undef HUGE
32 #undef CHARBITS
33 #undef INTBITS
34 #include <math.h>
35 #ifndef __FreeBSD__
36 #include "random.h"
37
38 /* can declare these, since we always use the random shipped with gawk */
39 extern char *initstate P((unsigned seed, char *state, int n));
40 extern char *setstate P((char *state));
41 extern long random P((void));
42 extern void srandom P((unsigned int seed));
43 #endif
44
45 extern NODE **fields_arr;
46 extern int output_is_tty;
47
48 static NODE *sub_common P((NODE *tree, int how_many, int backdigs));
49 NODE *format_tree P((const char *, int, NODE *));
50
51 #ifdef _CRAY
52 /* Work around a problem in conversion of doubles to exact integers. */
53 #include <float.h>
54 #define Floor(n) floor((n) * (1.0 + DBL_EPSILON))
55 #define Ceil(n) ceil((n) * (1.0 + DBL_EPSILON))
56
57 /* Force the standard C compiler to use the library math functions. */
58 extern double exp(double);
59 double (*Exp)() = exp;
60 #define exp(x) (*Exp)(x)
61 extern double log(double);
62 double (*Log)() = log;
63 #define log(x) (*Log)(x)
64 #else
65 #define Floor(n) floor(n)
66 #define Ceil(n) ceil(n)
67 #endif
68
69 #define DEFAULT_G_PRECISION 6
70
71 #ifdef GFMT_WORKAROUND
72 /* semi-temporary hack, mostly to gracefully handle VMS */
73 static void sgfmt P((char *buf, const char *format, int alt,
74                      int fwidth, int precision, double value));
75 #endif /* GFMT_WORKAROUND */
76
77 /*
78  * Since we supply the version of random(), we know what
79  * value to use here.
80  */
81 #define GAWK_RANDOM_MAX 0x7fffffffL
82
83 static void efwrite P((const void *ptr, size_t size, size_t count, FILE *fp,
84                        const char *from, struct redirect *rp, int flush));
85
86 /* efwrite --- like fwrite, but with error checking */
87
88 static void
89 efwrite(ptr, size, count, fp, from, rp, flush)
90 const void *ptr;
91 size_t size, count;
92 FILE *fp;
93 const char *from;
94 struct redirect *rp;
95 int flush;
96 {
97         errno = 0;
98         if (fwrite(ptr, size, count, fp) != count)
99                 goto wrerror;
100         if (flush
101           && ((fp == stdout && output_is_tty)
102            || (rp && (rp->flag & RED_NOBUF)))) {
103                 fflush(fp);
104                 if (ferror(fp))
105                         goto wrerror;
106         }
107         return;
108
109 wrerror:
110         fatal("%s to \"%s\" failed (%s)", from,
111                 rp ? rp->value : "standard output",
112                 errno ? strerror(errno) : "reason unknown");
113 }
114
115 /* do_exp --- exponential function */
116
117 NODE *
118 do_exp(tree)
119 NODE *tree;
120 {
121         NODE *tmp;
122         double d, res;
123
124         tmp = tree_eval(tree->lnode);
125         d = force_number(tmp);
126         free_temp(tmp);
127         errno = 0;
128         res = exp(d);
129         if (errno == ERANGE)
130                 warning("exp argument %g is out of range", d);
131         return tmp_number((AWKNUM) res);
132 }
133
134 /* stdfile --- return fp for a standard file */
135
136 /*
137  * This function allows `fflush("/dev/stdout")' to work.
138  * The other files will be available via getredirect().
139  * /dev/stdin is not included, since fflush is only for output.
140  */
141
142 static FILE *
143 stdfile(name, len)
144 char *name;
145 size_t len;
146 {
147         if (len == 11) {
148                 if (STREQN(name, "/dev/stderr", 11))
149                         return stderr;
150                 else if (STREQN(name, "/dev/stdout", 11))
151                         return stdout;
152         }
153
154         return NULL;
155 }
156
157 /* do_fflush --- flush output, either named file or pipe or everything */
158
159 NODE *
160 do_fflush(tree)
161 NODE *tree;
162 {
163         struct redirect *rp;
164         NODE *tmp;
165         FILE *fp;
166         int status = 0;
167         char *file;
168
169         /* fflush() --- flush stdout */
170         if (tree == NULL) {
171                 status = fflush(stdout);
172                 return tmp_number((AWKNUM) status);
173         }
174
175         tmp = tree_eval(tree->lnode);
176         tmp = force_string(tmp);
177         file = tmp->stptr;
178
179         /* fflush("") --- flush all */
180         if (tmp->stlen == 0) {
181                 status = flush_io();
182                 free_temp(tmp);
183                 return tmp_number((AWKNUM) status);
184         }
185
186         rp = getredirect(tmp->stptr, tmp->stlen);
187         status = 1;
188         if (rp != NULL) {
189                 if ((rp->flag & (RED_WRITE|RED_APPEND)) == 0) {
190                         /* if (do_lint) */
191                                 warning(
192                 "fflush: cannot flush: %s `%s' opened for reading, not writing",
193                                 (rp->flag & RED_PIPE) ? "pipe" : "file",
194                                 file);
195                         free_temp(tmp);
196                         return tmp_number((AWKNUM) status);
197                 }
198                 fp = rp->fp;
199                 if (fp != NULL)
200                         status = fflush(fp);
201         } else if ((fp = stdfile(tmp->stptr, tmp->stlen)) != NULL) {
202                 status = fflush(fp);
203         } else
204                 warning("fflush: `%s' is not an open file or pipe", file);
205         free_temp(tmp);
206         return tmp_number((AWKNUM) status);
207 }
208
209 /* do_index --- find index of a string */
210
211 NODE *
212 do_index(tree)
213 NODE *tree;
214 {
215         NODE *s1, *s2;
216         register char *p1, *p2;
217         register size_t l1, l2;
218         long ret;
219
220
221         s1 = tree_eval(tree->lnode);
222         s2 = tree_eval(tree->rnode->lnode);
223         force_string(s1);
224         force_string(s2);
225         p1 = s1->stptr;
226         p2 = s2->stptr;
227         l1 = s1->stlen;
228         l2 = s2->stlen;
229         ret = 0;
230
231         /* IGNORECASE will already be false if posix */
232         if (IGNORECASE) {
233                 while (l1 > 0) {
234                         if (l2 > l1)
235                                 break;
236                         if (casetable[(int)*p1] == casetable[(int)*p2]
237                             && (l2 == 1 || strncasecmp(p1, p2, l2) == 0)) {
238                                 ret = 1 + s1->stlen - l1;
239                                 break;
240                         }
241                         l1--;
242                         p1++;
243                 }
244         } else {
245                 while (l1 > 0) {
246                         if (l2 > l1)
247                                 break;
248                         if (*p1 == *p2
249                             && (l2 == 1 || STREQN(p1, p2, l2))) {
250                                 ret = 1 + s1->stlen - l1;
251                                 break;
252                         }
253                         l1--;
254                         p1++;
255                 }
256         }
257         free_temp(s1);
258         free_temp(s2);
259         return tmp_number((AWKNUM) ret);
260 }
261
262 /* double_to_int --- convert double to int, used several places */
263
264 double
265 double_to_int(d)
266 double d;
267 {
268         if (d >= 0)
269                 d = Floor(d);
270         else
271                 d = Ceil(d);
272         return d;
273 }
274
275 /* do_int --- convert double to int for awk */
276
277 NODE *
278 do_int(tree)
279 NODE *tree;
280 {
281         NODE *tmp;
282         double d;
283
284         tmp = tree_eval(tree->lnode);
285         d = force_number(tmp);
286         d = double_to_int(d);
287         free_temp(tmp);
288         return tmp_number((AWKNUM) d);
289 }
290
291 /* do_length --- length of a string or $0 */
292
293 NODE *
294 do_length(tree)
295 NODE *tree;
296 {
297         NODE *tmp;
298         size_t len;
299
300         tmp = tree_eval(tree->lnode);
301         len = force_string(tmp)->stlen;
302         free_temp(tmp);
303         return tmp_number((AWKNUM) len);
304 }
305
306 /* do_log --- the log function */
307
308 NODE *
309 do_log(tree)
310 NODE *tree;
311 {
312         NODE *tmp;
313         double d, arg;
314
315         tmp = tree_eval(tree->lnode);
316         arg = (double) force_number(tmp);
317         if (arg < 0.0)
318                 warning("log called with negative argument %g", arg);
319         d = log(arg);
320         free_temp(tmp);
321         return tmp_number((AWKNUM) d);
322 }
323
324 /*
325  * format_tree() formats nodes of a tree, starting with a left node,
326  * and accordingly to a fmt_string providing a format like in
327  * printf family from C library.  Returns a string node which value
328  * is a formatted string.  Called by  sprintf function.
329  *
330  * It is one of the uglier parts of gawk.  Thanks to Michal Jaegermann
331  * for taming this beast and making it compatible with ANSI C.
332  */
333
334 NODE *
335 format_tree(fmt_string, n0, carg)
336 const char *fmt_string;
337 int n0;
338 register NODE *carg;
339 {
340 /* copy 'l' bytes from 's' to 'obufout' checking for space in the process */
341 /* difference of pointers should be of ptrdiff_t type, but let us be kind */
342 #define bchunk(s, l) if (l) { \
343         while ((l) > ofre) { \
344                 long olen = obufout - obuf; \
345                 erealloc(obuf, char *, osiz * 2, "format_tree"); \
346                 ofre += osiz; \
347                 osiz *= 2; \
348                 obufout = obuf + olen; \
349         } \
350         memcpy(obufout, s, (size_t) (l)); \
351         obufout += (l); \
352         ofre -= (l); \
353 }
354
355 /* copy one byte from 's' to 'obufout' checking for space in the process */
356 #define bchunk_one(s) { \
357         if (ofre <= 0) { \
358                 long olen = obufout - obuf; \
359                 erealloc(obuf, char *, osiz * 2, "format_tree"); \
360                 ofre += osiz; \
361                 osiz *= 2; \
362                 obufout = obuf + olen; \
363         } \
364         *obufout++ = *s; \
365         --ofre; \
366 }
367
368 /* Is there space for something L big in the buffer? */
369 #define chksize(l)  if ((l) > ofre) { \
370         long olen = obufout - obuf; \
371         erealloc(obuf, char *, osiz * 2, "format_tree"); \
372         obufout = obuf + olen; \
373         ofre += osiz; \
374         osiz *= 2; \
375 }
376
377 /*
378  * Get the next arg to be formatted.  If we've run out of args,
379  * return "" (Null string) 
380  */
381 #define parse_next_arg() { \
382         if (carg == NULL) { \
383                 toofew = TRUE; \
384                 break; \
385         } else { \
386                 arg = tree_eval(carg->lnode); \
387                 carg = carg->rnode; \
388         } \
389 }
390
391         NODE *r;
392         int toofew = FALSE;
393         char *obuf, *obufout;
394         size_t osiz, ofre;
395         char *chbuf;
396         const char *s0, *s1;
397         int cs1;
398         NODE *arg;
399         long fw, prec;
400         int lj, alt, big, bigbig, small, have_prec, need_format;
401         long *cur = NULL;
402 #ifdef sun386           /* Can't cast unsigned (int/long) from ptr->value */
403         long tmp_uval;  /* on 386i 4.0.1 C compiler -- it just hangs */
404 #endif
405         unsigned long uval;
406         int sgn;
407         int base = 0;
408         char cpbuf[30];         /* if we have numbers bigger than 30 */
409         char *cend = &cpbuf[30];/* chars, we lose, but seems unlikely */
410         char *cp;
411         char *fill;
412         double tmpval;
413         char signchar = FALSE;
414         size_t len;
415         int zero_flag = FALSE;
416         static char sp[] = " ";
417         static char zero_string[] = "0";
418         static char lchbuf[] = "0123456789abcdef";
419         static char Uchbuf[] = "0123456789ABCDEF";
420
421 #define INITIAL_OUT_SIZE        512
422         emalloc(obuf, char *, INITIAL_OUT_SIZE, "format_tree");
423         obufout = obuf;
424         osiz = INITIAL_OUT_SIZE;
425         ofre = osiz - 1;
426
427         need_format = FALSE;
428
429         s0 = s1 = fmt_string;
430         while (n0-- > 0) {
431                 if (*s1 != '%') {
432                         s1++;
433                         continue;
434                 }
435                 need_format = TRUE;
436                 bchunk(s0, s1 - s0);
437                 s0 = s1;
438                 cur = &fw;
439                 fw = 0;
440                 prec = 0;
441                 have_prec = FALSE;
442                 signchar = FALSE;
443                 zero_flag = FALSE;
444                 lj = alt = big = bigbig = small = FALSE;
445                 fill = sp;
446                 cp = cend;
447                 chbuf = lchbuf;
448                 s1++;
449
450 retry:
451                 if (n0-- <= 0)  /* ran out early! */
452                         break;
453
454                 switch (cs1 = *s1++) {
455                 case (-1):      /* dummy case to allow for checking */
456 check_pos:
457                         if (cur != &fw)
458                                 break;          /* reject as a valid format */
459                         goto retry;
460                 case '%':
461                         need_format = FALSE;
462                         bchunk_one("%");
463                         s0 = s1;
464                         break;
465
466                 case '0':
467                         /*
468                          * Only turn on zero_flag if we haven't seen
469                          * the field width or precision yet.  Otherwise,
470                          * screws up floating point formatting.
471                          */
472                         if (cur == & fw)
473                                 zero_flag = TRUE;
474                         if (lj)
475                                 goto retry;
476                         /* FALL through */
477                 case '1':
478                 case '2':
479                 case '3':
480                 case '4':
481                 case '5':
482                 case '6':
483                 case '7':
484                 case '8':
485                 case '9':
486                         if (cur == NULL)
487                                 break;
488                         if (prec >= 0)
489                                 *cur = cs1 - '0';
490                         /*
491                          * with a negative precision *cur is already set
492                          * to -1, so it will remain negative, but we have
493                          * to "eat" precision digits in any case
494                          */
495                         while (n0 > 0 && *s1 >= '0' && *s1 <= '9') {
496                                 --n0;
497                                 *cur = *cur * 10 + *s1++ - '0';
498                         }
499                         if (prec < 0)   /* negative precision is discarded */
500                                 have_prec = FALSE;
501                         if (cur == &prec)
502                                 cur = NULL;
503                         if (n0 == 0)    /* badly formatted control string */
504                                 continue;
505                         goto retry;
506                 case '*':
507                         if (cur == NULL)
508                                 break;
509                         parse_next_arg();
510                         *cur = force_number(arg);
511                         free_temp(arg);
512                         if (*cur < 0 && cur == &fw) {
513                                 *cur = -*cur;
514                                 lj++;
515                         }
516                         if (cur == &prec) {
517                                 if (*cur >= 0)
518                                         have_prec = TRUE;
519                                 else
520                                         have_prec = FALSE;
521                                 cur = NULL;
522                         }
523                         goto retry;
524                 case ' ':               /* print ' ' or '-' */
525                                         /* 'space' flag is ignored */
526                                         /* if '+' already present  */
527                         if (signchar != FALSE) 
528                                 goto check_pos;
529                         /* FALL THROUGH */
530                 case '+':               /* print '+' or '-' */
531                         signchar = cs1;
532                         goto check_pos;
533                 case '-':
534                         if (prec < 0)
535                                 break;
536                         if (cur == &prec) {
537                                 prec = -1;
538                                 goto retry;
539                         }
540                         fill = sp;      /* if left justified then other */
541                         lj++;           /* filling is ignored */
542                         goto check_pos;
543                 case '.':
544                         if (cur != &fw)
545                                 break;
546                         cur = &prec;
547                         have_prec = TRUE;
548                         goto retry;
549                 case '#':
550                         alt = TRUE;
551                         goto check_pos;
552                 case 'l':
553                         if (big)
554                                 break;
555                         else {
556                                 static int warned = FALSE;
557                                 
558                                 if (do_lint && ! warned) {
559                                         warning("`l' is meaningless in awk formats; ignored");
560                                         warned = TRUE;
561                                 }
562                                 if (do_posix)
563                                         fatal("'l' is not permitted in POSIX awk formats");
564                         }
565                         big = TRUE;
566                         goto retry;
567                 case 'L':
568                         if (bigbig)
569                                 break;
570                         else {
571                                 static int warned = FALSE;
572                                 
573                                 if (do_lint && ! warned) {
574                                         warning("`L' is meaningless in awk formats; ignored");
575                                         warned = TRUE;
576                                 }
577                                 if (do_posix)
578                                         fatal("'L' is not permitted in POSIX awk formats");
579                         }
580                         bigbig = TRUE;
581                         goto retry;
582                 case 'h':
583                         if (small)
584                                 break;
585                         else {
586                                 static int warned = FALSE;
587                                 
588                                 if (do_lint && ! warned) {
589                                         warning("`h' is meaningless in awk formats; ignored");
590                                         warned = TRUE;
591                                 }
592                                 if (do_posix)
593                                         fatal("'h' is not permitted in POSIX awk formats");
594                         }
595                         small = TRUE;
596                         goto retry;
597                 case 'c':
598                         need_format = FALSE;
599                         if (zero_flag && ! lj)
600                                 fill = zero_string;
601                         parse_next_arg();
602                         /* user input that looks numeric is numeric */
603                         if ((arg->flags & (MAYBE_NUM|NUMBER)) == MAYBE_NUM)
604                                 (void) force_number(arg);
605                         if (arg->flags & NUMBER) {
606 #ifdef sun386
607                                 tmp_uval = arg->numbr; 
608                                 uval = (unsigned long) tmp_uval;
609 #else
610                                 uval = (unsigned long) arg->numbr;
611 #endif
612                                 cpbuf[0] = uval;
613                                 prec = 1;
614                                 cp = cpbuf;
615                                 goto pr_tail;
616                         }
617                         if (have_prec == FALSE)
618                                 prec = 1;
619                         else if (prec > arg->stlen)
620                                 prec = arg->stlen;
621                         cp = arg->stptr;
622                         goto pr_tail;
623                 case 's':
624                         need_format = FALSE;
625                         if (zero_flag && ! lj)
626                                 fill = zero_string;
627                         parse_next_arg();
628                         arg = force_string(arg);
629                         if (! have_prec || prec > arg->stlen)
630                                 prec = arg->stlen;
631                         cp = arg->stptr;
632                         goto pr_tail;
633                 case 'd':
634                 case 'i':
635                         need_format = FALSE;
636                         parse_next_arg();
637                         tmpval = force_number(arg);
638
639                         /*
640                          * ``The result of converting a zero value with a
641                          * precision of zero is no characters.''
642                          */
643                         if (have_prec && prec == 0 && tmpval == 0)
644                                 goto pr_tail;
645
646                         if (tmpval < 0) {
647                                 if (tmpval < LONG_MIN)
648                                         goto out_of_range;
649                                 sgn = TRUE;
650                                 uval = - (unsigned long) (long) tmpval;
651                         } else {
652                                 /* Use !, so that NaNs are out of range.
653                                    The cast avoids a SunOS 4.1.x cc bug.  */
654                                 if (! (tmpval <= (unsigned long) ULONG_MAX))
655                                         goto out_of_range;
656                                 sgn = FALSE;
657                                 uval = (unsigned long) tmpval;
658                         }
659                         do {
660                                 *--cp = (char) ('0' + uval % 10);
661                                 uval /= 10;
662                         } while (uval > 0);
663
664                         /* add more output digits to match the precision */
665                         if (have_prec) {
666                                 while (cend - cp < prec)
667                                         *--cp = '0';
668                         }
669
670                         if (sgn)
671                                 *--cp = '-';
672                         else if (signchar)
673                                 *--cp = signchar;
674                         /*
675                          * When to fill with zeroes is of course not simple.
676                          * First: No zero fill if left-justifying.
677                          * Next: There seem to be two cases:
678                          *      A '0' without a precision, e.g. %06d
679                          *      A precision with no field width, e.g. %.10d
680                          * Any other case, we don't want to fill with zeroes.
681                          */
682                         if (! lj
683                             && ((zero_flag && ! have_prec)
684                                  || (fw == 0 && have_prec)))
685                                 fill = zero_string;
686                         if (prec > fw)
687                                 fw = prec;
688                         prec = cend - cp;
689                         if (fw > prec && ! lj && fill != sp
690                             && (*cp == '-' || signchar)) {
691                                 bchunk_one(cp);
692                                 cp++;
693                                 prec--;
694                                 fw--;
695                         }
696                         goto pr_tail;
697                 case 'X':
698                         chbuf = Uchbuf; /* FALL THROUGH */
699                 case 'x':
700                         base += 6;      /* FALL THROUGH */
701                 case 'u':
702                         base += 2;      /* FALL THROUGH */
703                 case 'o':
704                         base += 8;
705                         need_format = FALSE;
706                         parse_next_arg();
707                         tmpval = force_number(arg);
708
709                         /*
710                          * ``The result of converting a zero value with a
711                          * precision of zero is no characters.''
712                          *
713                          * If I remember the ANSI C standard, though,
714                          * it says that for octal conversions
715                          * the precision is artificially increased
716                          * to add an extra 0 if # is supplied.
717                          * Indeed, in C,
718                          *      printf("%#.0o\n", 0);
719                          * prints a single 0.
720                          */
721                         if (! alt && have_prec && prec == 0 && tmpval == 0)
722                                 goto pr_tail;
723
724                         if (tmpval < 0) {
725                                 if (tmpval < LONG_MIN)
726                                         goto out_of_range;
727                                 uval = (unsigned long) (long) tmpval;
728                         } else {
729                                 /* Use !, so that NaNs are out of range.
730                                    The cast avoids a SunOS 4.1.x cc bug.  */
731                                 if (! (tmpval <= (unsigned long) ULONG_MAX))
732                                         goto out_of_range;
733                                 uval = (unsigned long) tmpval;
734                         }
735                         /*
736                          * When to fill with zeroes is of course not simple.
737                          * First: No zero fill if left-justifying.
738                          * Next: There seem to be two cases:
739                          *      A '0' without a precision, e.g. %06d
740                          *      A precision with no field width, e.g. %.10d
741                          * Any other case, we don't want to fill with zeroes.
742                          */
743                         if (! lj
744                             && ((zero_flag && ! have_prec)
745                                  || (fw == 0 && have_prec)))
746                                 fill = zero_string;
747                         do {
748                                 *--cp = chbuf[uval % base];
749                                 uval /= base;
750                         } while (uval > 0);
751
752                         /* add more output digits to match the precision */
753                         if (have_prec) {
754                                 while (cend - cp < prec)
755                                         *--cp = '0';
756                         }
757
758                         if (alt && tmpval != 0) {
759                                 if (base == 16) {
760                                         *--cp = cs1;
761                                         *--cp = '0';
762                                         if (fill != sp) {
763                                                 bchunk(cp, 2);
764                                                 cp += 2;
765                                                 fw -= 2;
766                                         }
767                                 } else if (base == 8)
768                                         *--cp = '0';
769                         }
770                         base = 0;
771                         if (prec > fw)
772                                 fw = prec;
773                         prec = cend - cp;
774         pr_tail:
775                         if (! lj) {
776                                 while (fw > prec) {
777                                         bchunk_one(fill);
778                                         fw--;
779                                 }
780                         }
781                         bchunk(cp, (int) prec);
782                         while (fw > prec) {
783                                 bchunk_one(fill);
784                                 fw--;
785                         }
786                         s0 = s1;
787                         free_temp(arg);
788                         break;
789
790      out_of_range:
791                         /* out of range - emergency use of %g format */
792                         cs1 = 'g';
793                         goto format_float;
794
795                 case 'g':
796                 case 'G':
797                 case 'e':
798                 case 'f':
799                 case 'E':
800                         need_format = FALSE;
801                         parse_next_arg();
802                         tmpval = force_number(arg);
803      format_float:
804                         free_temp(arg);
805                         if (! have_prec)
806                                 prec = DEFAULT_G_PRECISION;
807                         chksize(fw + prec + 9); /* 9 == slop */
808
809                         cp = cpbuf;
810                         *cp++ = '%';
811                         if (lj)
812                                 *cp++ = '-';
813                         if (signchar)
814                                 *cp++ = signchar;
815                         if (alt)
816                                 *cp++ = '#';
817                         if (zero_flag)
818                                 *cp++ = '0';
819                         strcpy(cp, "*.*");
820                         cp += 3;
821                         *cp++ = cs1;
822                         *cp = '\0';
823 #ifndef GFMT_WORKAROUND
824                         (void) sprintf(obufout, cpbuf,
825                                        (int) fw, (int) prec, (double) tmpval);
826 #else   /* GFMT_WORKAROUND */
827                         if (cs1 == 'g' || cs1 == 'G')
828                                 sgfmt(obufout, cpbuf, (int) alt,
829                                        (int) fw, (int) prec, (double) tmpval);
830                         else
831                                 (void) sprintf(obufout, cpbuf,
832                                        (int) fw, (int) prec, (double) tmpval);
833 #endif  /* GFMT_WORKAROUND */
834                         len = strlen(obufout);
835                         ofre -= len;
836                         obufout += len;
837                         s0 = s1;
838                         break;
839                 default:
840                         break;
841                 }
842                 if (toofew)
843                         fatal("%s\n\t`%s'\n\t%*s%s",
844                         "not enough arguments to satisfy format string",
845                         fmt_string, s1 - fmt_string - 2, "",
846                         "^ ran out for this one"
847                         );
848         }
849         if (do_lint) {
850                 if (need_format)
851                         warning(
852                         "printf format specifier does not have control letter");
853                 if (carg != NULL)
854                         warning(
855                         "too many arguments supplied for format string");
856         }
857         bchunk(s0, s1 - s0);
858         r = make_str_node(obuf, obufout - obuf, ALREADY_MALLOCED);
859         r->flags |= TEMP;
860         return r;
861 }
862
863 /* do_sprintf --- perform sprintf */
864
865 NODE *
866 do_sprintf(tree)
867 NODE *tree;
868 {
869         NODE *r;
870         NODE *sfmt = force_string(tree_eval(tree->lnode));
871
872         r = format_tree(sfmt->stptr, sfmt->stlen, tree->rnode);
873         free_temp(sfmt);
874         return r;
875 }
876
877 /* do_printf --- perform printf, including redirection */
878
879 void
880 do_printf(tree)
881 register NODE *tree;
882 {
883         struct redirect *rp = NULL;
884         register FILE *fp;
885
886         if (tree->lnode == NULL) {
887                 if (do_traditional) {
888                         if (do_lint)
889                                 warning("printf: no arguments");
890                         return; /* bwk accepts it silently */
891                 }
892                 fatal("printf: no arguments");
893         }
894
895         if (tree->rnode != NULL) {
896                 int errflg;     /* not used, sigh */
897
898                 rp = redirect(tree->rnode, &errflg);
899                 if (rp != NULL) {
900                         fp = rp->fp;
901                         if (fp == NULL)
902                                 return;
903                 } else
904                         return;
905         } else
906                 fp = stdout;
907         tree = do_sprintf(tree->lnode);
908         efwrite(tree->stptr, sizeof(char), tree->stlen, fp, "printf", rp, TRUE);
909         free_temp(tree);
910 }
911
912 /* do_sqrt --- do the sqrt function */
913
914 NODE *
915 do_sqrt(tree)
916 NODE *tree;
917 {
918         NODE *tmp;
919         double arg;
920
921         tmp = tree_eval(tree->lnode);
922         arg = (double) force_number(tmp);
923         free_temp(tmp);
924         if (arg < 0.0)
925                 warning("sqrt called with negative argument %g", arg);
926         return tmp_number((AWKNUM) sqrt(arg));
927 }
928
929 /* do_substr --- do the substr function */
930
931 NODE *
932 do_substr(tree)
933 NODE *tree;
934 {
935         NODE *t1, *t2, *t3;
936         NODE *r;
937         register size_t indx;
938         size_t length;
939         double d_index, d_length;
940
941         t1 = force_string(tree_eval(tree->lnode));
942         t2 = tree_eval(tree->rnode->lnode);
943         d_index = force_number(t2);
944         free_temp(t2);
945
946         if (d_index < 1.0) {
947                 if (do_lint)
948                         warning("substr: start index %g invalid, using 1",
949                                 d_index);
950                 d_index = 1;
951         }
952         if (do_lint && double_to_int(d_index) != d_index)
953                 warning("substr: non-integer start index %g will be truncated",
954                         d_index);
955
956         indx = d_index - 1;     /* awk indices are from 1, C's are from 0 */
957
958         if (tree->rnode->rnode == NULL) {       /* third arg. missing */
959                 /* use remainder of string */
960                 length = t1->stlen - indx;
961         } else {
962                 t3 = tree_eval(tree->rnode->rnode->lnode);
963                 d_length = force_number(t3);
964                 free_temp(t3);
965                 if (d_length <= 0.0) {
966                         if (do_lint)
967                                 warning("substr: length %g is <= 0", d_length);
968                         free_temp(t1);
969                         return Nnull_string;
970                 }
971                 if (do_lint && double_to_int(d_length) != d_length)
972                         warning(
973                 "substr: non-integer length %g will be truncated",
974                                 d_length);
975                 length = d_length;
976         }
977
978         if (t1->stlen == 0) {
979                 if (do_lint)
980                         warning("substr: source string is zero length");
981                 free_temp(t1);
982                 return Nnull_string;
983         }
984         if ((indx + length) > t1->stlen) {
985                 if (do_lint)
986                         warning(
987         "substr: length %d at position %d exceeds length of first argument (%d)",
988                         length, indx+1, t1->stlen);
989                 length = t1->stlen - indx;
990         }
991         if (indx >= t1->stlen) {
992                 if (do_lint)
993                         warning("substr: start index %d is past end of string",
994                                 indx+1);
995                 free_temp(t1);
996                 return Nnull_string;
997         }
998         r = tmp_string(t1->stptr + indx, length);
999         free_temp(t1);
1000         return r;
1001 }
1002
1003 /* do_strftime --- format a time stamp */
1004
1005 NODE *
1006 do_strftime(tree)
1007 NODE *tree;
1008 {
1009         NODE *t1, *t2, *ret;
1010         struct tm *tm;
1011         time_t fclock;
1012         char *bufp;
1013         size_t buflen, bufsize;
1014         char buf[BUFSIZ];
1015         static char def_format[] = "%a %b %d %H:%M:%S %Z %Y";
1016         char *format;
1017         int formatlen;
1018
1019         /* set defaults first */
1020         format = def_format;    /* traditional date format */
1021         formatlen = strlen(format);
1022         (void) time(&fclock);   /* current time of day */
1023
1024         t1 = t2 = NULL;
1025         if (tree != NULL) {     /* have args */
1026                 if (tree->lnode != NULL) {
1027                         t1 = force_string(tree_eval(tree->lnode));
1028                         format = t1->stptr;
1029                         formatlen = t1->stlen;
1030                         if (formatlen == 0) {
1031                                 if (do_lint)
1032                                         warning("strftime called with empty format string");
1033                                 free_temp(t1);
1034                                 return tmp_string("", 0);
1035                         }
1036                 }
1037         
1038                 if (tree->rnode != NULL) {
1039                         t2 = tree_eval(tree->rnode->lnode);
1040                         fclock = (time_t) force_number(t2);
1041                         free_temp(t2);
1042                 }
1043         }
1044
1045         tm = localtime(&fclock);
1046
1047         bufp = buf;
1048         bufsize = sizeof(buf);
1049         for (;;) {
1050                 *bufp = '\0';
1051                 buflen = strftime(bufp, bufsize, format, tm);
1052                 /*
1053                  * buflen can be zero EITHER because there's not enough
1054                  * room in the string, or because the control command
1055                  * goes to the empty string. Make a reasonable guess that
1056                  * if the buffer is 1024 times bigger than the length of the
1057                  * format string, it's not failing for lack of room.
1058                  * Thanks to Paul Eggert for pointing out this issue.
1059                  */
1060                 if (buflen > 0 || bufsize >= 1024 * formatlen)
1061                         break;
1062                 bufsize *= 2;
1063                 if (bufp == buf)
1064                         emalloc(bufp, char *, bufsize, "do_strftime");
1065                 else
1066                         erealloc(bufp, char *, bufsize, "do_strftime");
1067         }
1068         ret = tmp_string(bufp, buflen);
1069         if (bufp != buf)
1070                 free(bufp);
1071         if (t1)
1072                 free_temp(t1);
1073         return ret;
1074 }
1075
1076 /* do_systime --- get the time of day */
1077
1078 NODE *
1079 do_systime(tree)
1080 NODE *tree;
1081 {
1082         time_t lclock;
1083
1084         (void) time(&lclock);
1085         return tmp_number((AWKNUM) lclock);
1086 }
1087
1088
1089
1090 /* do_system --- run an external command */
1091
1092 NODE *
1093 do_system(tree)
1094 NODE *tree;
1095 {
1096         NODE *tmp;
1097         int ret = 0;
1098         char *cmd;
1099         char save;
1100
1101         (void) flush_io();     /* so output is synchronous with gawk's */
1102         tmp = tree_eval(tree->lnode);
1103         cmd = force_string(tmp)->stptr;
1104
1105         if (cmd && *cmd) {
1106                 /* insure arg to system is zero-terminated */
1107
1108                 /*
1109                  * From: David Trueman <david@cs.dal.ca>
1110                  * To: arnold@cc.gatech.edu (Arnold Robbins)
1111                  * Date: Wed, 3 Nov 1993 12:49:41 -0400
1112                  * 
1113                  * It may not be necessary to save the character, but
1114                  * I'm not sure.  It would normally be the field
1115                  * separator.  If the parse has not yet gone beyond
1116                  * that, it could mess up (although I doubt it).  If
1117                  * FIELDWIDTHS is being used, it might be the first
1118                  * character of the next field.  Unless someone wants
1119                  * to check it out exhaustively, I suggest saving it
1120                  * for now...
1121                  */
1122                 save = cmd[tmp->stlen];
1123                 cmd[tmp->stlen] = '\0';
1124
1125                 ret = system(cmd);
1126                 ret = (ret >> 8) & 0xff;
1127
1128                 cmd[tmp->stlen] = save;
1129         }
1130         free_temp(tmp);
1131         return tmp_number((AWKNUM) ret);
1132 }
1133
1134 extern NODE **fmt_list;  /* declared in eval.c */
1135
1136 /* do_print --- print items, separated by OFS, terminated with ORS */
1137
1138 void 
1139 do_print(tree)
1140 register NODE *tree;
1141 {
1142         register NODE **t;
1143         struct redirect *rp = NULL;
1144         register FILE *fp;
1145         int numnodes, i;
1146         NODE *save;
1147         NODE *tval;
1148
1149         if (tree->rnode) {
1150                 int errflg;             /* not used, sigh */
1151
1152                 rp = redirect(tree->rnode, &errflg);
1153                 if (rp != NULL) {
1154                         fp = rp->fp;
1155                         if (fp == NULL)
1156                                 return;
1157                 } else
1158                         return;
1159         } else
1160                 fp = stdout;
1161
1162         /*
1163          * General idea is to evaluate all the expressions first and
1164          * then print them, otherwise you get suprising behavior.
1165          * See test/prtoeval.awk for an example program.
1166          */
1167         save = tree = tree->lnode;
1168         for (numnodes = 0; tree != NULL; tree = tree->rnode)
1169                 numnodes++;
1170         emalloc(t, NODE **, numnodes * sizeof(NODE *), "do_print");
1171
1172         tree = save;
1173         for (i = 0; tree != NULL; i++, tree = tree->rnode) {
1174                 NODE *n;
1175
1176                 /* Here lies the wumpus. R.I.P. */
1177                 n = tree_eval(tree->lnode);
1178                 t[i] = dupnode(n);
1179                 free_temp(n);
1180
1181                 if ((t[i]->flags & (NUMBER|STRING)) == NUMBER) {
1182                         if (OFMTidx == CONVFMTidx)
1183                                 (void) force_string(t[i]);
1184                         else {
1185                                 tval = tmp_number(t[i]->numbr);
1186                                 unref(t[i]);
1187                                 t[i] = format_val(OFMT, OFMTidx, tval);
1188                         }
1189                 }
1190         }
1191
1192         for (i = 0; i < numnodes; i++) {
1193                 efwrite(t[i]->stptr, sizeof(char), t[i]->stlen, fp, "print", rp, FALSE);
1194                 unref(t[i]);
1195
1196                 if (i != numnodes - 1 && OFSlen > 0)
1197                         efwrite(OFS, sizeof(char), (size_t) OFSlen,
1198                                 fp, "print", rp, FALSE);
1199
1200         }
1201         if (ORSlen > 0)
1202                 efwrite(ORS, sizeof(char), (size_t) ORSlen, fp, "print", rp, TRUE);
1203
1204         free(t);
1205 }
1206
1207 /* do_tolower --- lower case a string */
1208
1209 NODE *
1210 do_tolower(tree)
1211 NODE *tree;
1212 {
1213         NODE *t1, *t2;
1214         register unsigned char *cp, *cp2;
1215
1216         t1 = tree_eval(tree->lnode);
1217         t1 = force_string(t1);
1218         t2 = tmp_string(t1->stptr, t1->stlen);
1219         for (cp = (unsigned char *)t2->stptr,
1220              cp2 = (unsigned char *)(t2->stptr + t2->stlen); cp < cp2; cp++)
1221                 if (ISUPPER(*cp))
1222                         *cp = tolower(*cp);
1223         free_temp(t1);
1224         return t2;
1225 }
1226
1227 /* do_toupper --- upper case a string */
1228
1229 NODE *
1230 do_toupper(tree)
1231 NODE *tree;
1232 {
1233         NODE *t1, *t2;
1234         register unsigned char *cp, *cp2;
1235
1236         t1 = tree_eval(tree->lnode);
1237         t1 = force_string(t1);
1238         t2 = tmp_string(t1->stptr, t1->stlen);
1239         for (cp = (unsigned char *)t2->stptr,
1240              cp2 = (unsigned char *)(t2->stptr + t2->stlen); cp < cp2; cp++)
1241                 if (ISLOWER(*cp))
1242                         *cp = toupper(*cp);
1243         free_temp(t1);
1244         return t2;
1245 }
1246
1247 /* do_atan2 --- do the atan2 function */
1248
1249 NODE *
1250 do_atan2(tree)
1251 NODE *tree;
1252 {
1253         NODE *t1, *t2;
1254         double d1, d2;
1255
1256         t1 = tree_eval(tree->lnode);
1257         t2 = tree_eval(tree->rnode->lnode);
1258         d1 = force_number(t1);
1259         d2 = force_number(t2);
1260         free_temp(t1);
1261         free_temp(t2);
1262         return tmp_number((AWKNUM) atan2(d1, d2));
1263 }
1264
1265 /* do_sin --- do the sin function */
1266
1267 NODE *
1268 do_sin(tree)
1269 NODE *tree;
1270 {
1271         NODE *tmp;
1272         double d;
1273
1274         tmp = tree_eval(tree->lnode);
1275         d = sin((double) force_number(tmp));
1276         free_temp(tmp);
1277         return tmp_number((AWKNUM) d);
1278 }
1279
1280 /* do_cos --- do the cos function */
1281
1282 NODE *
1283 do_cos(tree)
1284 NODE *tree;
1285 {
1286         NODE *tmp;
1287         double d;
1288
1289         tmp = tree_eval(tree->lnode);
1290         d = cos((double) force_number(tmp));
1291         free_temp(tmp);
1292         return tmp_number((AWKNUM) d);
1293 }
1294
1295 /* do_rand --- do the rand function */
1296
1297 static int firstrand = TRUE;
1298 static char state[512];
1299
1300 /* ARGSUSED */
1301 NODE *
1302 do_rand(tree)
1303 NODE *tree;
1304 {
1305         if (firstrand) {
1306                 (void) initstate((unsigned) 1, state, sizeof state);
1307                 srandom(1);
1308                 firstrand = FALSE;
1309         }
1310         return tmp_number((AWKNUM) random() / GAWK_RANDOM_MAX);
1311 }
1312
1313 /* do_srand --- seed the random number generator */
1314
1315 NODE *
1316 do_srand(tree)
1317 NODE *tree;
1318 {
1319         NODE *tmp;
1320         static long save_seed = 1;
1321         long ret = save_seed;   /* SVR4 awk srand returns previous seed */
1322
1323         if (firstrand) {
1324                 (void) initstate((unsigned) 1, state, sizeof state);
1325                 /* don't need to srandom(1), we're changing the seed below */
1326                 firstrand = FALSE;
1327         } else
1328                 (void) setstate(state);
1329
1330         if (tree == NULL)
1331 #ifdef __FreeBSD__
1332                 srandom((unsigned int) (save_seed = (long) time((time_t *) 0) ^ getpid()));
1333 #else
1334                 srandom((unsigned int) (save_seed = (long) time((time_t *) 0)));
1335 #endif
1336         else {
1337                 tmp = tree_eval(tree->lnode);
1338                 srandom((unsigned int) (save_seed = (long) force_number(tmp)));
1339                 free_temp(tmp);
1340         }
1341         return tmp_number((AWKNUM) ret);
1342 }
1343
1344 /* do_match --- match a regexp, set RSTART and RLENGTH */
1345
1346 NODE *
1347 do_match(tree)
1348 NODE *tree;
1349 {
1350         NODE *t1;
1351         int rstart;
1352         AWKNUM rlength;
1353         Regexp *rp;
1354
1355         t1 = force_string(tree_eval(tree->lnode));
1356         tree = tree->rnode->lnode;
1357         rp = re_update(tree);
1358         rstart = research(rp, t1->stptr, 0, t1->stlen, TRUE);
1359         if (rstart >= 0) {      /* match succeded */
1360                 rstart++;       /* 1-based indexing */
1361                 rlength = REEND(rp, t1->stptr) - RESTART(rp, t1->stptr);
1362         } else {                /* match failed */
1363                 rstart = 0;
1364                 rlength = -1.0;
1365         }
1366         free_temp(t1);
1367         unref(RSTART_node->var_value);
1368         RSTART_node->var_value = make_number((AWKNUM) rstart);
1369         unref(RLENGTH_node->var_value);
1370         RLENGTH_node->var_value = make_number(rlength);
1371         return tmp_number((AWKNUM) rstart);
1372 }
1373
1374 /* sub_common --- the common code (does the work) for sub, gsub, and gensub */
1375
1376 /*
1377  * Gsub can be tricksy; particularly when handling the case of null strings.
1378  * The following awk code was useful in debugging problems.  It is too bad
1379  * that it does not readily translate directly into the C code, below.
1380  * 
1381  * #! /usr/local/bin/mawk -f
1382  * 
1383  * BEGIN {
1384  *      TRUE = 1; FALSE = 0
1385  *      print "--->", mygsub("abc", "b+", "FOO")
1386  *      print "--->", mygsub("abc", "x*", "X")
1387  *      print "--->", mygsub("abc", "b*", "X")
1388  *      print "--->", mygsub("abc", "c", "X")
1389  *      print "--->", mygsub("abc", "c+", "X")
1390  *      print "--->", mygsub("abc", "x*$", "X")
1391  * }
1392  * 
1393  * function mygsub(str, regex, replace, origstr, newstr, eosflag, nonzeroflag)
1394  * {
1395  *      origstr = str;
1396  *      eosflag = nonzeroflag = FALSE
1397  *      while (match(str, regex)) {
1398  *              if (RLENGTH > 0) {      # easy case
1399  *                      nonzeroflag = TRUE
1400  *                      if (RSTART == 1) {      # match at front of string
1401  *                              newstr = newstr replace
1402  *                      } else {
1403  *                              newstr = newstr substr(str, 1, RSTART-1) replace
1404  *                      }
1405  *                      str = substr(str, RSTART+RLENGTH)
1406  *              } else if (nonzeroflag) {
1407  *                      # last match was non-zero in length, and at the
1408  *                      # current character, we get a zero length match,
1409  *                      # which we don't really want, so skip over it
1410  *                      newstr = newstr substr(str, 1, 1)
1411  *                      str = substr(str, 2)
1412  *                      nonzeroflag = FALSE
1413  *              } else {
1414  *                      # 0-length match
1415  *                      if (RSTART == 1) {
1416  *                              newstr = newstr replace substr(str, 1, 1)
1417  *                              str = substr(str, 2)
1418  *                      } else {
1419  *                              return newstr str replace
1420  *                      }
1421  *              }
1422  *              if (length(str) == 0)
1423  *                      if (eosflag)
1424  *                              break;
1425  *                      else
1426  *                              eosflag = TRUE
1427  *      }
1428  *      if (length(str) > 0)
1429  *              newstr = newstr str     # rest of string
1430  * 
1431  *      return newstr
1432  * }
1433  */
1434
1435 /*
1436  * NB: `howmany' conflicts with a SunOS macro in <sys/param.h>.
1437  */
1438
1439 static NODE *
1440 sub_common(tree, how_many, backdigs)
1441 NODE *tree;
1442 int how_many, backdigs;
1443 {
1444         register char *scan;
1445         register char *bp, *cp;
1446         char *buf;
1447         size_t buflen;
1448         register char *matchend;
1449         register size_t len;
1450         char *matchstart;
1451         char *text;
1452         size_t textlen;
1453         char *repl;
1454         char *replend;
1455         size_t repllen;
1456         int sofar;
1457         int ampersands;
1458         int matches = 0;
1459         Regexp *rp;
1460         NODE *s;                /* subst. pattern */
1461         NODE *t;                /* string to make sub. in; $0 if none given */
1462         NODE *tmp;
1463         NODE **lhs = &tree;     /* value not used -- just different from NULL */
1464         int priv = FALSE;
1465         Func_ptr after_assign = NULL;
1466
1467         int global = (how_many == -1);
1468         long current;
1469         int lastmatchnonzero;
1470
1471         tmp = tree->lnode;
1472         rp = re_update(tmp);
1473
1474         tree = tree->rnode;
1475         s = tree->lnode;
1476
1477         tree = tree->rnode;
1478         tmp = tree->lnode;
1479         t = force_string(tree_eval(tmp));
1480
1481         /* do the search early to avoid work on non-match */
1482         if (research(rp, t->stptr, 0, t->stlen, TRUE) == -1 ||
1483             RESTART(rp, t->stptr) > t->stlen) {
1484                 free_temp(t);
1485                 return tmp_number((AWKNUM) 0.0);
1486         }
1487
1488         if (tmp->type == Node_val)
1489                 lhs = NULL;
1490         else
1491                 lhs = get_lhs(tmp, &after_assign);
1492         t->flags |= STRING;
1493         /*
1494          * create a private copy of the string
1495          */
1496         if (t->stref > 1 || (t->flags & (PERM|FIELD)) != 0) {
1497                 unsigned int saveflags;
1498
1499                 saveflags = t->flags;
1500                 t->flags &= ~MALLOC;
1501                 tmp = dupnode(t);
1502                 t->flags = saveflags;
1503                 t = tmp;
1504                 priv = TRUE;
1505         }
1506         text = t->stptr;
1507         textlen = t->stlen;
1508         buflen = textlen + 2;
1509
1510         s = force_string(tree_eval(s));
1511         repl = s->stptr;
1512         replend = repl + s->stlen;
1513         repllen = replend - repl;
1514         emalloc(buf, char *, buflen + 2, "sub_common");
1515         buf[buflen] = '\0';
1516         buf[buflen + 1] = '\0';
1517         ampersands = 0;
1518         for (scan = repl; scan < replend; scan++) {
1519                 if (*scan == '&') {
1520                         repllen--;
1521                         ampersands++;
1522                 } else if (*scan == '\\') {
1523                         if (backdigs) { /* gensub, behave sanely */
1524                                 if (ISDIGIT(scan[1])) {
1525                                         ampersands++;
1526                                         scan++;
1527                                 } else {        /* \q for any q --> q */
1528                                         repllen--;
1529                                         scan++;
1530                                 }
1531                         } else {        /* (proposed) posix '96 mode */
1532                                 if (strncmp(scan, "\\\\\\&", 4) == 0) {
1533                                         /* \\\& --> \& */
1534                                         repllen -= 2;
1535                                         scan += 3;
1536                                 } else if (strncmp(scan, "\\\\&", 3) == 0) {
1537                                         /* \\& --> \<string> */
1538                                         ampersands++;
1539                                         repllen--;
1540                                         scan += 2;
1541                                 } else if (scan[1] == '&') {
1542                                         /* \& --> & */
1543                                         repllen--;
1544                                         scan++;
1545                                 } /* else
1546                                         leave alone, it goes into the output */
1547                         }
1548                 }
1549         }
1550
1551         lastmatchnonzero = FALSE;
1552         bp = buf;
1553         for (current = 1;; current++) {
1554                 matches++;
1555                 matchstart = t->stptr + RESTART(rp, t->stptr);
1556                 matchend = t->stptr + REEND(rp, t->stptr);
1557
1558                 /*
1559                  * create the result, copying in parts of the original
1560                  * string 
1561                  */
1562                 len = matchstart - text + repllen
1563                       + ampersands * (matchend - matchstart);
1564                 sofar = bp - buf;
1565                 while (buflen < (sofar + len + 1)) {
1566                         buflen *= 2;
1567                         erealloc(buf, char *, buflen, "sub_common");
1568                         bp = buf + sofar;
1569                 }
1570                 for (scan = text; scan < matchstart; scan++)
1571                         *bp++ = *scan;
1572                 if (global || current == how_many) {
1573                         /*
1574                          * If the current match matched the null string,
1575                          * and the last match didn't and did a replacement,
1576                          * then skip this one.
1577                          */
1578                         if (lastmatchnonzero && matchstart == matchend) {
1579                                 lastmatchnonzero = FALSE;
1580                                 matches--;
1581                                 goto empty;
1582                         }
1583                         /*
1584                          * If replacing all occurrences, or this is the
1585                          * match we want, copy in the replacement text,
1586                          * making substitutions as we go.
1587                          */
1588                         for (scan = repl; scan < replend; scan++)
1589                                 if (*scan == '&')
1590                                         for (cp = matchstart; cp < matchend; cp++)
1591                                                 *bp++ = *cp;
1592                                 else if (*scan == '\\') {
1593                                         if (backdigs) { /* gensub, behave sanely */
1594                                                 if (ISDIGIT(scan[1])) {
1595                                                         int dig = scan[1] - '0';
1596                                                         char *start, *end;
1597                 
1598                                                         start = t->stptr
1599                                                               + SUBPATSTART(rp, t->stptr, dig);
1600                                                         end = t->stptr
1601                                                               + SUBPATEND(rp, t->stptr, dig);
1602                 
1603                                                         for (cp = start; cp < end; cp++)
1604                                                                 *bp++ = *cp;
1605                                                         scan++;
1606                                                 } else  /* \q for any q --> q */
1607                                                         *bp++ = *++scan;
1608                                         } else {        /* posix '96 mode, bleah */
1609                                                 if (strncmp(scan, "\\\\\\&", 4) == 0) {
1610                                                         /* \\\& --> \& */
1611                                                         *bp++ = '\\';
1612                                                         *bp++ = '&';
1613                                                         scan += 3;
1614                                                 } else if (strncmp(scan, "\\\\&", 3) == 0) {
1615                                                         /* \\& --> \<string> */
1616                                                         *bp++ = '\\';
1617                                                         for (cp = matchstart; cp < matchend; cp++)
1618                                                                 *bp++ = *cp;
1619                                                         scan += 2;
1620                                                 } else if (scan[1] == '&') {
1621                                                         /* \& --> & */
1622                                                         *bp++ = '&';
1623                                                         scan++;
1624                                                 } else
1625                                                         *bp++ = *scan;
1626                                         }
1627                                 } else
1628                                         *bp++ = *scan;
1629                         if (matchstart != matchend)
1630                                 lastmatchnonzero = TRUE;
1631                 } else {
1632                         /*
1633                          * don't want this match, skip over it by copying
1634                          * in current text.
1635                          */
1636                         for (cp = matchstart; cp < matchend; cp++)
1637                                 *bp++ = *cp;
1638                 }
1639         empty:
1640                 /* catch the case of gsub(//, "blah", whatever), i.e. empty regexp */
1641                 if (matchstart == matchend && matchend < text + textlen) {
1642                         *bp++ = *matchend;
1643                         matchend++;
1644                 }
1645                 textlen = text + textlen - matchend;
1646                 text = matchend;
1647
1648                 if ((current >= how_many && !global)
1649                     || ((long) textlen <= 0 && matchstart == matchend)
1650                     || research(rp, t->stptr, text - t->stptr, textlen, TRUE) == -1)
1651                         break;
1652
1653         }
1654         sofar = bp - buf;
1655         if (buflen - sofar - textlen - 1) {
1656                 buflen = sofar + textlen + 2;
1657                 erealloc(buf, char *, buflen, "sub_common");
1658                 bp = buf + sofar;
1659         }
1660         for (scan = matchend; scan < text + textlen; scan++)
1661                 *bp++ = *scan;
1662         *bp = '\0';
1663         textlen = bp - buf;
1664         free(t->stptr);
1665         t->stptr = buf;
1666         t->stlen = textlen;
1667
1668         free_temp(s);
1669         if (matches > 0 && lhs) {
1670                 if (priv) {
1671                         unref(*lhs);
1672                         *lhs = t;
1673                 }
1674                 if (after_assign != NULL)
1675                         (*after_assign)();
1676                 t->flags &= ~(NUM|NUMBER);
1677         }
1678         return tmp_number((AWKNUM) matches);
1679 }
1680
1681 /* do_gsub --- global substitution */
1682
1683 NODE *
1684 do_gsub(tree)
1685 NODE *tree;
1686 {
1687         return sub_common(tree, -1, FALSE);
1688 }
1689
1690 /* do_sub --- single substitution */
1691
1692 NODE *
1693 do_sub(tree)
1694 NODE *tree;
1695 {
1696         return sub_common(tree, 1, FALSE);
1697 }
1698
1699 /* do_gensub --- fix up the tree for sub_common for the gensub function */
1700
1701 NODE *
1702 do_gensub(tree)
1703 NODE *tree;
1704 {
1705         NODE n1, n2, n3, *t, *tmp, *target, *ret;
1706         long how_many = 1;      /* default is one substitution */
1707         double d;
1708
1709         /*
1710          * We have to pull out the value of the global flag, and
1711          * build up a tree without the flag in it, turning it into the
1712          * kind of tree that sub_common() expects.  It helps to draw
1713          * a picture of this ...
1714          */
1715         n1 = *tree;
1716         n2 = *(tree->rnode);
1717         n1.rnode = & n2;
1718
1719         t = tree_eval(n2.rnode->lnode); /* value of global flag */
1720
1721         tmp = force_string(tree_eval(n2.rnode->rnode->lnode));  /* target */
1722
1723         /*
1724          * We make copy of the original target string, and pass that
1725          * in to sub_common() as the target to make the substitution in.
1726          * We will then return the result string as the return value of
1727          * this function.
1728          */
1729         target = make_string(tmp->stptr, tmp->stlen);
1730         free_temp(tmp);
1731
1732         n3 = *(n2.rnode->rnode);
1733         n3.lnode = target;
1734         n2.rnode = & n3;
1735
1736         if ((t->flags & (STR|STRING)) != 0) {
1737                 if (t->stlen > 0 && (t->stptr[0] == 'g' || t->stptr[0] == 'G'))
1738                         how_many = -1;
1739                 else
1740                         how_many = 1;
1741         } else {
1742                 d = force_number(t);
1743                 if (d > 0)
1744                         how_many = d;
1745                 else
1746                         how_many = 1;
1747         }
1748
1749         free_temp(t);
1750
1751         ret = sub_common(&n1, how_many, TRUE);
1752         free_temp(ret);
1753
1754         /*
1755          * Note that we don't care what sub_common() returns, since the
1756          * easiest thing for the programmer is to return the string, even
1757          * if no substitutions were done.
1758          */
1759         target->flags |= TEMP;
1760         return target;
1761 }
1762
1763 #ifdef GFMT_WORKAROUND
1764 /*
1765  * printf's %g format [can't rely on gcvt()]
1766  *      caveat: don't use as argument to *printf()!
1767  * 'format' string HAS to be of "<flags>*.*g" kind, or we bomb!
1768  */
1769 static void
1770 sgfmt(buf, format, alt, fwidth, prec, g)
1771 char *buf;      /* return buffer; assumed big enough to hold result */
1772 const char *format;
1773 int alt;        /* use alternate form flag */
1774 int fwidth;     /* field width in a format */
1775 int prec;       /* indicates desired significant digits, not decimal places */
1776 double g;       /* value to format */
1777 {
1778         char dform[40];
1779         register char *gpos;
1780         register char *d, *e, *p;
1781         int again = FALSE;
1782
1783         strncpy(dform, format, sizeof dform - 1);
1784         dform[sizeof dform - 1] = '\0';
1785         gpos = strrchr(dform, '.');
1786
1787         if (g == 0.0 && ! alt) {        /* easy special case */
1788                 *gpos++ = 'd';
1789                 *gpos = '\0';
1790                 (void) sprintf(buf, dform, fwidth, 0);
1791                 return;
1792         }
1793
1794         /* advance to location of 'g' in the format */
1795         while (*gpos && *gpos != 'g' && *gpos != 'G')
1796                 gpos++;
1797
1798         if (prec <= 0)        /* negative precision is ignored */
1799                 prec = (prec < 0 ?  DEFAULT_G_PRECISION : 1);
1800
1801         if (*gpos == 'G')
1802                 again = TRUE;
1803         /* start with 'e' format (it'll provide nice exponent) */
1804         *gpos = 'e';
1805         prec--;
1806         (void) sprintf(buf, dform, fwidth, prec, g);
1807         if ((e = strrchr(buf, 'e')) != NULL) {  /* find exponent  */
1808                 int expn = atoi(e+1);           /* fetch exponent */
1809                 if (expn >= -4 && expn <= prec) {       /* per K&R2, B1.2 */
1810                         /* switch to 'f' format and re-do */
1811                         *gpos = 'f';
1812                         prec -= expn;           /* decimal precision */
1813                         (void) sprintf(buf, dform, fwidth, prec, g);
1814                         e = buf + strlen(buf);
1815                         while (*--e == ' ')
1816                                 continue;
1817                         e++;
1818                 }
1819                 else if (again)
1820                         *gpos = 'E';
1821
1822                 /* if 'alt' in force, then trailing zeros are not removed */
1823                 if (! alt && (d = strrchr(buf, '.')) != NULL) {
1824                         /* throw away an excess of precision */
1825                         for (p = e; p > d && *--p == '0'; )
1826                                 prec--;
1827                         if (d == p)
1828                                 prec--;
1829                         if (prec < 0)
1830                                 prec = 0;
1831                         /* and do that once again */
1832                         again = TRUE;
1833                 }
1834                 if (again)
1835                         (void) sprintf(buf, dform, fwidth, prec, g);
1836         }
1837 }
1838 #endif  /* GFMT_WORKAROUND */
1839
1840 #ifdef BITOPS
1841 #define BITS_PER_BYTE   8       /* if not true, you lose. too bad. */
1842
1843 /* do_lshift --- perform a << operation */
1844
1845 NODE *
1846 do_lshift(tree)
1847 NODE *tree;
1848 {
1849         NODE *s1, *s2;
1850         unsigned long uval, ushift, result;
1851         AWKNUM val, shift;
1852
1853         s1 = tree_eval(tree->lnode);
1854         s2 = tree_eval(tree->rnode->lnode);
1855         val = force_number(s1);
1856         shift = force_number(s2);
1857         free_temp(s1);
1858         free_temp(s2);
1859
1860         if (do_lint) {
1861                 if (val < 0 || shift < 0)
1862                         warning("lshift(%lf, %lf): negative values will give strange results", val, shift);
1863                 if (double_to_int(val) != val || double_to_int(shift) != shift)
1864                         warning("lshift(%lf, %lf): fractional values will be truncated", val, shift);
1865                 if (shift > (sizeof(unsigned long) * BITS_PER_BYTE))
1866                         warning("lshift(%lf, %lf): too large shift value will give strange results", val, shift);
1867         }
1868
1869         uval = (unsigned long) val;
1870         ushift = (unsigned long) shift;
1871
1872         result = uval << ushift;
1873         return tmp_number((AWKNUM) result);
1874 }
1875
1876 /* do_rshift --- perform a >> operation */
1877
1878 NODE *
1879 do_rshift(tree)
1880 NODE *tree;
1881 {
1882         NODE *s1, *s2;
1883         unsigned long uval, ushift, result;
1884         AWKNUM val, shift;
1885
1886         s1 = tree_eval(tree->lnode);
1887         s2 = tree_eval(tree->rnode->lnode);
1888         val = force_number(s1);
1889         shift = force_number(s2);
1890         free_temp(s1);
1891         free_temp(s2);
1892
1893         if (do_lint) {
1894                 if (val < 0 || shift < 0)
1895                         warning("rshift(%lf, %lf): negative values will give strange results", val, shift);
1896                 if (double_to_int(val) != val || double_to_int(shift) != shift)
1897                         warning("rshift(%lf, %lf): fractional values will be truncated", val, shift);
1898                 if (shift > (sizeof(unsigned long) * BITS_PER_BYTE))
1899                         warning("rshift(%lf, %lf): too large shift value will give strange results", val, shift);
1900         }
1901
1902         uval = (unsigned long) val;
1903         ushift = (unsigned long) shift;
1904
1905         result = uval >> ushift;
1906         return tmp_number((AWKNUM) result);
1907 }
1908
1909 /* do_and --- perform an & operation */
1910
1911 NODE *
1912 do_and(tree)
1913 NODE *tree;
1914 {
1915         NODE *s1, *s2;
1916         unsigned long uleft, uright, result;
1917         AWKNUM left, right;
1918
1919         s1 = tree_eval(tree->lnode);
1920         s2 = tree_eval(tree->rnode->lnode);
1921         left = force_number(s1);
1922         right = force_number(s2);
1923         free_temp(s1);
1924         free_temp(s2);
1925
1926         if (do_lint) {
1927                 if (left < 0 || right < 0)
1928                         warning("and(%lf, %lf): negative values will give strange results", left, right);
1929                 if (double_to_int(left) != left || double_to_int(right) != right)
1930                         warning("and(%lf, %lf): fractional values will be truncated", left, right);
1931         }
1932
1933         uleft = (unsigned long) left;
1934         uright = (unsigned long) right;
1935
1936         result = uleft & uright;
1937         return tmp_number((AWKNUM) result);
1938 }
1939
1940 /* do_or --- perform an | operation */
1941
1942 NODE *
1943 do_or(tree)
1944 NODE *tree;
1945 {
1946         NODE *s1, *s2;
1947         unsigned long uleft, uright, result;
1948         AWKNUM left, right;
1949
1950         s1 = tree_eval(tree->lnode);
1951         s2 = tree_eval(tree->rnode->lnode);
1952         left = force_number(s1);
1953         right = force_number(s2);
1954         free_temp(s1);
1955         free_temp(s2);
1956
1957         if (do_lint) {
1958                 if (left < 0 || right < 0)
1959                         warning("or(%lf, %lf): negative values will give strange results", left, right);
1960                 if (double_to_int(left) != left || double_to_int(right) != right)
1961                         warning("or(%lf, %lf): fractional values will be truncated", left, right);
1962         }
1963
1964         uleft = (unsigned long) left;
1965         uright = (unsigned long) right;
1966
1967         result = uleft | uright;
1968         return tmp_number((AWKNUM) result);
1969 }
1970
1971 /* do_xor --- perform an ^ operation */
1972
1973 NODE *
1974 do_xor(tree)
1975 NODE *tree;
1976 {
1977         NODE *s1, *s2;
1978         unsigned long uleft, uright, result;
1979         AWKNUM left, right;
1980
1981         s1 = tree_eval(tree->lnode);
1982         s2 = tree_eval(tree->rnode->lnode);
1983         left = force_number(s1);
1984         right = force_number(s2);
1985         free_temp(s1);
1986         free_temp(s2);
1987
1988         if (do_lint) {
1989                 if (left < 0 || right < 0)
1990                         warning("xor(%lf, %lf): negative values will give strange results", left, right);
1991                 if (double_to_int(left) != left || double_to_int(right) != right)
1992                         warning("xor(%lf, %lf): fractional values will be truncated", left, right);
1993         }
1994
1995         uleft = (unsigned long) left;
1996         uright = (unsigned long) right;
1997
1998         result = uleft ^ uright;
1999         return tmp_number((AWKNUM) result);
2000 }
2001
2002 /* do_compl --- perform a ~ operation */
2003
2004 NODE *
2005 do_compl(tree)
2006 NODE *tree;
2007 {
2008         NODE *tmp;
2009         double d;
2010         unsigned long uval;
2011
2012         tmp = tree_eval(tree->lnode);
2013         d = force_number(tmp);
2014         free_temp(tmp);
2015
2016         if (do_lint) {
2017                 if (d < 0)
2018                         warning("compl(%lf): negative value will give strange results", d);
2019                 if (double_to_int(d) != d)
2020                         warning("compl(%lf): fractional value will be truncated", d);
2021         }
2022
2023         uval = (unsigned long) d;
2024         uval = ~ uval;
2025         return tmp_number((AWKNUM) uval);
2026 }
2027
2028 /* do_strtonum --- the strtonum function */
2029
2030 NODE *
2031 do_strtonum(tree)
2032 NODE *tree;
2033 {
2034         NODE *tmp;
2035         double d, arg;
2036
2037         tmp = tree_eval(tree->lnode);
2038
2039         if ((tmp->flags & (NUM|NUMBER)) != 0)
2040                 d = (double) force_number(tmp);
2041         else if (isnondecimal(tmp->stptr))
2042                 d = nondec2awknum(tmp->stptr, tmp->stlen);
2043         else
2044                 d = (double) force_number(tmp);
2045
2046         free_temp(tmp);
2047         return tmp_number((AWKNUM) d);
2048 }
2049 #endif /* BITOPS */
2050
2051 #if defined(BITOPS) || defined(NONDECDATA)
2052 /* nondec2awknum --- convert octal or hex value to double */
2053
2054 /*
2055  * Because of awk's concatenation rules and the way awk.y:yylex()
2056  * collects a number, this routine has to be willing to stop on the
2057  * first invalid character.
2058  */
2059
2060 AWKNUM
2061 nondec2awknum(str, len)
2062 char *str;
2063 size_t len;
2064 {
2065         AWKNUM retval = 0.0;
2066         char save;
2067         short val;
2068
2069         if (*str == '0' && (str[1] == 'x' || str[1] == 'X')) {
2070                 assert(len > 2);
2071
2072                 for (str += 2, len -= 2; len > 0; len--, str++) {
2073                         switch (*str) {
2074                         case '0':
2075                         case '1':
2076                         case '2':
2077                         case '3':
2078                         case '4':
2079                         case '5':
2080                         case '6':
2081                         case '7':
2082                         case '8':
2083                         case '9':
2084                                 val = *str - '0';
2085                                 break;
2086                         case 'a':
2087                         case 'b':
2088                         case 'c':
2089                         case 'd':
2090                         case 'e':
2091                         case 'f':
2092                                 val = *str - 'a' + 10;
2093                                 break;
2094                         case 'A':
2095                         case 'B':
2096                         case 'C':
2097                         case 'D':
2098                         case 'E':
2099                         case 'F':
2100                                 val = *str - 'A' + 10;
2101                                 break;
2102                         default:
2103                                 goto done;
2104                         }
2105                         retval = (retval * 16) + val;
2106                 }
2107         } else if (*str == '0') {
2108                 if (strchr(str, '8') != NULL || strchr(str, '9') != NULL)
2109                         goto decimal;
2110                 for (; len > 0; len--) {
2111                         if (! isdigit(*str))
2112                                 goto done;
2113                         retval = (retval * 8) + (*str - '0');
2114                         str++;
2115                 }
2116         } else {
2117 decimal:
2118                 save = str[len];
2119                 retval = atof(str);
2120                 str[len] = save;
2121         }
2122 done:
2123         return retval;
2124 }
2125 #endif /* defined(BITOPS) || defined(NONDECDATA) */