2 * builtin.c - Builtin functions and various utility procedures
6 * Copyright (C) 1986, 1988, 1989, 1991-2000 the Free Software Foundation, Inc.
8 * This file is part of GAWK, the GNU implementation of the
9 * AWK Programming Language.
11 * GAWK is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
16 * GAWK is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
25 * $FreeBSD: src/contrib/awk/builtin.c,v 1.7.2.1 2001/01/23 22:08:30 asmodai Exp $
26 * $DragonFly: src/contrib/awk/Attic/builtin.c,v 1.2 2003/06/17 04:23:58 dillon Exp $
38 /* can declare these, since we always use the random shipped with gawk */
39 extern char *initstate P((unsigned seed, char *state, int n));
40 extern char *setstate P((char *state));
41 extern long random P((void));
42 extern void srandom P((unsigned int seed));
45 extern NODE **fields_arr;
46 extern int output_is_tty;
48 static NODE *sub_common P((NODE *tree, int how_many, int backdigs));
49 NODE *format_tree P((const char *, int, NODE *));
52 /* Work around a problem in conversion of doubles to exact integers. */
54 #define Floor(n) floor((n) * (1.0 + DBL_EPSILON))
55 #define Ceil(n) ceil((n) * (1.0 + DBL_EPSILON))
57 /* Force the standard C compiler to use the library math functions. */
58 extern double exp(double);
59 double (*Exp)() = exp;
60 #define exp(x) (*Exp)(x)
61 extern double log(double);
62 double (*Log)() = log;
63 #define log(x) (*Log)(x)
65 #define Floor(n) floor(n)
66 #define Ceil(n) ceil(n)
69 #define DEFAULT_G_PRECISION 6
71 #ifdef GFMT_WORKAROUND
72 /* semi-temporary hack, mostly to gracefully handle VMS */
73 static void sgfmt P((char *buf, const char *format, int alt,
74 int fwidth, int precision, double value));
75 #endif /* GFMT_WORKAROUND */
78 * Since we supply the version of random(), we know what
81 #define GAWK_RANDOM_MAX 0x7fffffffL
83 static void efwrite P((const void *ptr, size_t size, size_t count, FILE *fp,
84 const char *from, struct redirect *rp, int flush));
86 /* efwrite --- like fwrite, but with error checking */
89 efwrite(ptr, size, count, fp, from, rp, flush)
98 if (fwrite(ptr, size, count, fp) != count)
101 && ((fp == stdout && output_is_tty)
102 || (rp && (rp->flag & RED_NOBUF)))) {
110 fatal("%s to \"%s\" failed (%s)", from,
111 rp ? rp->value : "standard output",
112 errno ? strerror(errno) : "reason unknown");
115 /* do_exp --- exponential function */
124 tmp = tree_eval(tree->lnode);
125 d = force_number(tmp);
130 warning("exp argument %g is out of range", d);
131 return tmp_number((AWKNUM) res);
134 /* stdfile --- return fp for a standard file */
137 * This function allows `fflush("/dev/stdout")' to work.
138 * The other files will be available via getredirect().
139 * /dev/stdin is not included, since fflush is only for output.
148 if (STREQN(name, "/dev/stderr", 11))
150 else if (STREQN(name, "/dev/stdout", 11))
157 /* do_fflush --- flush output, either named file or pipe or everything */
169 /* fflush() --- flush stdout */
171 status = fflush(stdout);
172 return tmp_number((AWKNUM) status);
175 tmp = tree_eval(tree->lnode);
176 tmp = force_string(tmp);
179 /* fflush("") --- flush all */
180 if (tmp->stlen == 0) {
183 return tmp_number((AWKNUM) status);
186 rp = getredirect(tmp->stptr, tmp->stlen);
189 if ((rp->flag & (RED_WRITE|RED_APPEND)) == 0) {
192 "fflush: cannot flush: %s `%s' opened for reading, not writing",
193 (rp->flag & RED_PIPE) ? "pipe" : "file",
196 return tmp_number((AWKNUM) status);
201 } else if ((fp = stdfile(tmp->stptr, tmp->stlen)) != NULL) {
204 warning("fflush: `%s' is not an open file or pipe", file);
206 return tmp_number((AWKNUM) status);
209 /* do_index --- find index of a string */
216 register char *p1, *p2;
217 register size_t l1, l2;
221 s1 = tree_eval(tree->lnode);
222 s2 = tree_eval(tree->rnode->lnode);
231 /* IGNORECASE will already be false if posix */
236 if (casetable[(int)*p1] == casetable[(int)*p2]
237 && (l2 == 1 || strncasecmp(p1, p2, l2) == 0)) {
238 ret = 1 + s1->stlen - l1;
249 && (l2 == 1 || STREQN(p1, p2, l2))) {
250 ret = 1 + s1->stlen - l1;
259 return tmp_number((AWKNUM) ret);
262 /* double_to_int --- convert double to int, used several places */
275 /* do_int --- convert double to int for awk */
284 tmp = tree_eval(tree->lnode);
285 d = force_number(tmp);
286 d = double_to_int(d);
288 return tmp_number((AWKNUM) d);
291 /* do_length --- length of a string or $0 */
300 tmp = tree_eval(tree->lnode);
301 len = force_string(tmp)->stlen;
303 return tmp_number((AWKNUM) len);
306 /* do_log --- the log function */
315 tmp = tree_eval(tree->lnode);
316 arg = (double) force_number(tmp);
318 warning("log called with negative argument %g", arg);
321 return tmp_number((AWKNUM) d);
325 * format_tree() formats nodes of a tree, starting with a left node,
326 * and accordingly to a fmt_string providing a format like in
327 * printf family from C library. Returns a string node which value
328 * is a formatted string. Called by sprintf function.
330 * It is one of the uglier parts of gawk. Thanks to Michal Jaegermann
331 * for taming this beast and making it compatible with ANSI C.
335 format_tree(fmt_string, n0, carg)
336 const char *fmt_string;
340 /* copy 'l' bytes from 's' to 'obufout' checking for space in the process */
341 /* difference of pointers should be of ptrdiff_t type, but let us be kind */
342 #define bchunk(s, l) if (l) { \
343 while ((l) > ofre) { \
344 long olen = obufout - obuf; \
345 erealloc(obuf, char *, osiz * 2, "format_tree"); \
348 obufout = obuf + olen; \
350 memcpy(obufout, s, (size_t) (l)); \
355 /* copy one byte from 's' to 'obufout' checking for space in the process */
356 #define bchunk_one(s) { \
358 long olen = obufout - obuf; \
359 erealloc(obuf, char *, osiz * 2, "format_tree"); \
362 obufout = obuf + olen; \
368 /* Is there space for something L big in the buffer? */
369 #define chksize(l) if ((l) > ofre) { \
370 long olen = obufout - obuf; \
371 erealloc(obuf, char *, osiz * 2, "format_tree"); \
372 obufout = obuf + olen; \
378 * Get the next arg to be formatted. If we've run out of args,
379 * return "" (Null string)
381 #define parse_next_arg() { \
382 if (carg == NULL) { \
386 arg = tree_eval(carg->lnode); \
387 carg = carg->rnode; \
393 char *obuf, *obufout;
400 int lj, alt, big, bigbig, small, have_prec, need_format;
402 #ifdef sun386 /* Can't cast unsigned (int/long) from ptr->value */
403 long tmp_uval; /* on 386i 4.0.1 C compiler -- it just hangs */
408 char cpbuf[30]; /* if we have numbers bigger than 30 */
409 char *cend = &cpbuf[30];/* chars, we lose, but seems unlikely */
413 char signchar = FALSE;
415 int zero_flag = FALSE;
416 static char sp[] = " ";
417 static char zero_string[] = "0";
418 static char lchbuf[] = "0123456789abcdef";
419 static char Uchbuf[] = "0123456789ABCDEF";
421 #define INITIAL_OUT_SIZE 512
422 emalloc(obuf, char *, INITIAL_OUT_SIZE, "format_tree");
424 osiz = INITIAL_OUT_SIZE;
429 s0 = s1 = fmt_string;
444 lj = alt = big = bigbig = small = FALSE;
451 if (n0-- <= 0) /* ran out early! */
454 switch (cs1 = *s1++) {
455 case (-1): /* dummy case to allow for checking */
458 break; /* reject as a valid format */
468 * Only turn on zero_flag if we haven't seen
469 * the field width or precision yet. Otherwise,
470 * screws up floating point formatting.
491 * with a negative precision *cur is already set
492 * to -1, so it will remain negative, but we have
493 * to "eat" precision digits in any case
495 while (n0 > 0 && *s1 >= '0' && *s1 <= '9') {
497 *cur = *cur * 10 + *s1++ - '0';
499 if (prec < 0) /* negative precision is discarded */
503 if (n0 == 0) /* badly formatted control string */
510 *cur = force_number(arg);
512 if (*cur < 0 && cur == &fw) {
524 case ' ': /* print ' ' or '-' */
525 /* 'space' flag is ignored */
526 /* if '+' already present */
527 if (signchar != FALSE)
530 case '+': /* print '+' or '-' */
540 fill = sp; /* if left justified then other */
541 lj++; /* filling is ignored */
556 static int warned = FALSE;
558 if (do_lint && ! warned) {
559 warning("`l' is meaningless in awk formats; ignored");
563 fatal("'l' is not permitted in POSIX awk formats");
571 static int warned = FALSE;
573 if (do_lint && ! warned) {
574 warning("`L' is meaningless in awk formats; ignored");
578 fatal("'L' is not permitted in POSIX awk formats");
586 static int warned = FALSE;
588 if (do_lint && ! warned) {
589 warning("`h' is meaningless in awk formats; ignored");
593 fatal("'h' is not permitted in POSIX awk formats");
599 if (zero_flag && ! lj)
602 /* user input that looks numeric is numeric */
603 if ((arg->flags & (MAYBE_NUM|NUMBER)) == MAYBE_NUM)
604 (void) force_number(arg);
605 if (arg->flags & NUMBER) {
607 tmp_uval = arg->numbr;
608 uval = (unsigned long) tmp_uval;
610 uval = (unsigned long) arg->numbr;
617 if (have_prec == FALSE)
619 else if (prec > arg->stlen)
625 if (zero_flag && ! lj)
628 arg = force_string(arg);
629 if (! have_prec || prec > arg->stlen)
637 tmpval = force_number(arg);
640 * ``The result of converting a zero value with a
641 * precision of zero is no characters.''
643 if (have_prec && prec == 0 && tmpval == 0)
647 if (tmpval < LONG_MIN)
650 uval = - (unsigned long) (long) tmpval;
652 /* Use !, so that NaNs are out of range.
653 The cast avoids a SunOS 4.1.x cc bug. */
654 if (! (tmpval <= (unsigned long) ULONG_MAX))
657 uval = (unsigned long) tmpval;
660 *--cp = (char) ('0' + uval % 10);
664 /* add more output digits to match the precision */
666 while (cend - cp < prec)
675 * When to fill with zeroes is of course not simple.
676 * First: No zero fill if left-justifying.
677 * Next: There seem to be two cases:
678 * A '0' without a precision, e.g. %06d
679 * A precision with no field width, e.g. %.10d
680 * Any other case, we don't want to fill with zeroes.
683 && ((zero_flag && ! have_prec)
684 || (fw == 0 && have_prec)))
689 if (fw > prec && ! lj && fill != sp
690 && (*cp == '-' || signchar)) {
698 chbuf = Uchbuf; /* FALL THROUGH */
700 base += 6; /* FALL THROUGH */
702 base += 2; /* FALL THROUGH */
707 tmpval = force_number(arg);
710 * ``The result of converting a zero value with a
711 * precision of zero is no characters.''
713 * If I remember the ANSI C standard, though,
714 * it says that for octal conversions
715 * the precision is artificially increased
716 * to add an extra 0 if # is supplied.
718 * printf("%#.0o\n", 0);
721 if (! alt && have_prec && prec == 0 && tmpval == 0)
725 if (tmpval < LONG_MIN)
727 uval = (unsigned long) (long) tmpval;
729 /* Use !, so that NaNs are out of range.
730 The cast avoids a SunOS 4.1.x cc bug. */
731 if (! (tmpval <= (unsigned long) ULONG_MAX))
733 uval = (unsigned long) tmpval;
736 * When to fill with zeroes is of course not simple.
737 * First: No zero fill if left-justifying.
738 * Next: There seem to be two cases:
739 * A '0' without a precision, e.g. %06d
740 * A precision with no field width, e.g. %.10d
741 * Any other case, we don't want to fill with zeroes.
744 && ((zero_flag && ! have_prec)
745 || (fw == 0 && have_prec)))
748 *--cp = chbuf[uval % base];
752 /* add more output digits to match the precision */
754 while (cend - cp < prec)
758 if (alt && tmpval != 0) {
767 } else if (base == 8)
781 bchunk(cp, (int) prec);
791 /* out of range - emergency use of %g format */
802 tmpval = force_number(arg);
806 prec = DEFAULT_G_PRECISION;
807 chksize(fw + prec + 9); /* 9 == slop */
823 #ifndef GFMT_WORKAROUND
824 (void) sprintf(obufout, cpbuf,
825 (int) fw, (int) prec, (double) tmpval);
826 #else /* GFMT_WORKAROUND */
827 if (cs1 == 'g' || cs1 == 'G')
828 sgfmt(obufout, cpbuf, (int) alt,
829 (int) fw, (int) prec, (double) tmpval);
831 (void) sprintf(obufout, cpbuf,
832 (int) fw, (int) prec, (double) tmpval);
833 #endif /* GFMT_WORKAROUND */
834 len = strlen(obufout);
843 fatal("%s\n\t`%s'\n\t%*s%s",
844 "not enough arguments to satisfy format string",
845 fmt_string, s1 - fmt_string - 2, "",
846 "^ ran out for this one"
852 "printf format specifier does not have control letter");
855 "too many arguments supplied for format string");
858 r = make_str_node(obuf, obufout - obuf, ALREADY_MALLOCED);
863 /* do_sprintf --- perform sprintf */
870 NODE *sfmt = force_string(tree_eval(tree->lnode));
872 r = format_tree(sfmt->stptr, sfmt->stlen, tree->rnode);
877 /* do_printf --- perform printf, including redirection */
883 struct redirect *rp = NULL;
886 if (tree->lnode == NULL) {
887 if (do_traditional) {
889 warning("printf: no arguments");
890 return; /* bwk accepts it silently */
892 fatal("printf: no arguments");
895 if (tree->rnode != NULL) {
896 int errflg; /* not used, sigh */
898 rp = redirect(tree->rnode, &errflg);
907 tree = do_sprintf(tree->lnode);
908 efwrite(tree->stptr, sizeof(char), tree->stlen, fp, "printf", rp, TRUE);
912 /* do_sqrt --- do the sqrt function */
921 tmp = tree_eval(tree->lnode);
922 arg = (double) force_number(tmp);
925 warning("sqrt called with negative argument %g", arg);
926 return tmp_number((AWKNUM) sqrt(arg));
929 /* do_substr --- do the substr function */
937 register size_t indx;
939 double d_index, d_length;
941 t1 = force_string(tree_eval(tree->lnode));
942 t2 = tree_eval(tree->rnode->lnode);
943 d_index = force_number(t2);
948 warning("substr: start index %g invalid, using 1",
952 if (do_lint && double_to_int(d_index) != d_index)
953 warning("substr: non-integer start index %g will be truncated",
956 indx = d_index - 1; /* awk indices are from 1, C's are from 0 */
958 if (tree->rnode->rnode == NULL) { /* third arg. missing */
959 /* use remainder of string */
960 length = t1->stlen - indx;
962 t3 = tree_eval(tree->rnode->rnode->lnode);
963 d_length = force_number(t3);
965 if (d_length <= 0.0) {
967 warning("substr: length %g is <= 0", d_length);
971 if (do_lint && double_to_int(d_length) != d_length)
973 "substr: non-integer length %g will be truncated",
978 if (t1->stlen == 0) {
980 warning("substr: source string is zero length");
984 if ((indx + length) > t1->stlen) {
987 "substr: length %d at position %d exceeds length of first argument (%d)",
988 length, indx+1, t1->stlen);
989 length = t1->stlen - indx;
991 if (indx >= t1->stlen) {
993 warning("substr: start index %d is past end of string",
998 r = tmp_string(t1->stptr + indx, length);
1003 /* do_strftime --- format a time stamp */
1009 NODE *t1, *t2, *ret;
1013 size_t buflen, bufsize;
1015 static char def_format[] = "%a %b %d %H:%M:%S %Z %Y";
1019 /* set defaults first */
1020 format = def_format; /* traditional date format */
1021 formatlen = strlen(format);
1022 (void) time(&fclock); /* current time of day */
1025 if (tree != NULL) { /* have args */
1026 if (tree->lnode != NULL) {
1027 t1 = force_string(tree_eval(tree->lnode));
1029 formatlen = t1->stlen;
1030 if (formatlen == 0) {
1032 warning("strftime called with empty format string");
1034 return tmp_string("", 0);
1038 if (tree->rnode != NULL) {
1039 t2 = tree_eval(tree->rnode->lnode);
1040 fclock = (time_t) force_number(t2);
1045 tm = localtime(&fclock);
1048 bufsize = sizeof(buf);
1051 buflen = strftime(bufp, bufsize, format, tm);
1053 * buflen can be zero EITHER because there's not enough
1054 * room in the string, or because the control command
1055 * goes to the empty string. Make a reasonable guess that
1056 * if the buffer is 1024 times bigger than the length of the
1057 * format string, it's not failing for lack of room.
1058 * Thanks to Paul Eggert for pointing out this issue.
1060 if (buflen > 0 || bufsize >= 1024 * formatlen)
1064 emalloc(bufp, char *, bufsize, "do_strftime");
1066 erealloc(bufp, char *, bufsize, "do_strftime");
1068 ret = tmp_string(bufp, buflen);
1076 /* do_systime --- get the time of day */
1084 (void) time(&lclock);
1085 return tmp_number((AWKNUM) lclock);
1090 /* do_system --- run an external command */
1101 (void) flush_io(); /* so output is synchronous with gawk's */
1102 tmp = tree_eval(tree->lnode);
1103 cmd = force_string(tmp)->stptr;
1106 /* insure arg to system is zero-terminated */
1109 * From: David Trueman <david@cs.dal.ca>
1110 * To: arnold@cc.gatech.edu (Arnold Robbins)
1111 * Date: Wed, 3 Nov 1993 12:49:41 -0400
1113 * It may not be necessary to save the character, but
1114 * I'm not sure. It would normally be the field
1115 * separator. If the parse has not yet gone beyond
1116 * that, it could mess up (although I doubt it). If
1117 * FIELDWIDTHS is being used, it might be the first
1118 * character of the next field. Unless someone wants
1119 * to check it out exhaustively, I suggest saving it
1122 save = cmd[tmp->stlen];
1123 cmd[tmp->stlen] = '\0';
1126 ret = (ret >> 8) & 0xff;
1128 cmd[tmp->stlen] = save;
1131 return tmp_number((AWKNUM) ret);
1134 extern NODE **fmt_list; /* declared in eval.c */
1136 /* do_print --- print items, separated by OFS, terminated with ORS */
1140 register NODE *tree;
1143 struct redirect *rp = NULL;
1150 int errflg; /* not used, sigh */
1152 rp = redirect(tree->rnode, &errflg);
1163 * General idea is to evaluate all the expressions first and
1164 * then print them, otherwise you get suprising behavior.
1165 * See test/prtoeval.awk for an example program.
1167 save = tree = tree->lnode;
1168 for (numnodes = 0; tree != NULL; tree = tree->rnode)
1170 emalloc(t, NODE **, numnodes * sizeof(NODE *), "do_print");
1173 for (i = 0; tree != NULL; i++, tree = tree->rnode) {
1176 /* Here lies the wumpus. R.I.P. */
1177 n = tree_eval(tree->lnode);
1181 if ((t[i]->flags & (NUMBER|STRING)) == NUMBER) {
1182 if (OFMTidx == CONVFMTidx)
1183 (void) force_string(t[i]);
1185 tval = tmp_number(t[i]->numbr);
1187 t[i] = format_val(OFMT, OFMTidx, tval);
1192 for (i = 0; i < numnodes; i++) {
1193 efwrite(t[i]->stptr, sizeof(char), t[i]->stlen, fp, "print", rp, FALSE);
1196 if (i != numnodes - 1 && OFSlen > 0)
1197 efwrite(OFS, sizeof(char), (size_t) OFSlen,
1198 fp, "print", rp, FALSE);
1202 efwrite(ORS, sizeof(char), (size_t) ORSlen, fp, "print", rp, TRUE);
1207 /* do_tolower --- lower case a string */
1214 register unsigned char *cp, *cp2;
1216 t1 = tree_eval(tree->lnode);
1217 t1 = force_string(t1);
1218 t2 = tmp_string(t1->stptr, t1->stlen);
1219 for (cp = (unsigned char *)t2->stptr,
1220 cp2 = (unsigned char *)(t2->stptr + t2->stlen); cp < cp2; cp++)
1227 /* do_toupper --- upper case a string */
1234 register unsigned char *cp, *cp2;
1236 t1 = tree_eval(tree->lnode);
1237 t1 = force_string(t1);
1238 t2 = tmp_string(t1->stptr, t1->stlen);
1239 for (cp = (unsigned char *)t2->stptr,
1240 cp2 = (unsigned char *)(t2->stptr + t2->stlen); cp < cp2; cp++)
1247 /* do_atan2 --- do the atan2 function */
1256 t1 = tree_eval(tree->lnode);
1257 t2 = tree_eval(tree->rnode->lnode);
1258 d1 = force_number(t1);
1259 d2 = force_number(t2);
1262 return tmp_number((AWKNUM) atan2(d1, d2));
1265 /* do_sin --- do the sin function */
1274 tmp = tree_eval(tree->lnode);
1275 d = sin((double) force_number(tmp));
1277 return tmp_number((AWKNUM) d);
1280 /* do_cos --- do the cos function */
1289 tmp = tree_eval(tree->lnode);
1290 d = cos((double) force_number(tmp));
1292 return tmp_number((AWKNUM) d);
1295 /* do_rand --- do the rand function */
1297 static int firstrand = TRUE;
1298 static char state[512];
1306 (void) initstate((unsigned) 1, state, sizeof state);
1310 return tmp_number((AWKNUM) random() / GAWK_RANDOM_MAX);
1313 /* do_srand --- seed the random number generator */
1320 static long save_seed = 1;
1321 long ret = save_seed; /* SVR4 awk srand returns previous seed */
1324 (void) initstate((unsigned) 1, state, sizeof state);
1325 /* don't need to srandom(1), we're changing the seed below */
1328 (void) setstate(state);
1332 srandom((unsigned int) (save_seed = (long) time((time_t *) 0) ^ getpid()));
1334 srandom((unsigned int) (save_seed = (long) time((time_t *) 0)));
1337 tmp = tree_eval(tree->lnode);
1338 srandom((unsigned int) (save_seed = (long) force_number(tmp)));
1341 return tmp_number((AWKNUM) ret);
1344 /* do_match --- match a regexp, set RSTART and RLENGTH */
1355 t1 = force_string(tree_eval(tree->lnode));
1356 tree = tree->rnode->lnode;
1357 rp = re_update(tree);
1358 rstart = research(rp, t1->stptr, 0, t1->stlen, TRUE);
1359 if (rstart >= 0) { /* match succeded */
1360 rstart++; /* 1-based indexing */
1361 rlength = REEND(rp, t1->stptr) - RESTART(rp, t1->stptr);
1362 } else { /* match failed */
1367 unref(RSTART_node->var_value);
1368 RSTART_node->var_value = make_number((AWKNUM) rstart);
1369 unref(RLENGTH_node->var_value);
1370 RLENGTH_node->var_value = make_number(rlength);
1371 return tmp_number((AWKNUM) rstart);
1374 /* sub_common --- the common code (does the work) for sub, gsub, and gensub */
1377 * Gsub can be tricksy; particularly when handling the case of null strings.
1378 * The following awk code was useful in debugging problems. It is too bad
1379 * that it does not readily translate directly into the C code, below.
1381 * #! /usr/local/bin/mawk -f
1384 * TRUE = 1; FALSE = 0
1385 * print "--->", mygsub("abc", "b+", "FOO")
1386 * print "--->", mygsub("abc", "x*", "X")
1387 * print "--->", mygsub("abc", "b*", "X")
1388 * print "--->", mygsub("abc", "c", "X")
1389 * print "--->", mygsub("abc", "c+", "X")
1390 * print "--->", mygsub("abc", "x*$", "X")
1393 * function mygsub(str, regex, replace, origstr, newstr, eosflag, nonzeroflag)
1396 * eosflag = nonzeroflag = FALSE
1397 * while (match(str, regex)) {
1398 * if (RLENGTH > 0) { # easy case
1399 * nonzeroflag = TRUE
1400 * if (RSTART == 1) { # match at front of string
1401 * newstr = newstr replace
1403 * newstr = newstr substr(str, 1, RSTART-1) replace
1405 * str = substr(str, RSTART+RLENGTH)
1406 * } else if (nonzeroflag) {
1407 * # last match was non-zero in length, and at the
1408 * # current character, we get a zero length match,
1409 * # which we don't really want, so skip over it
1410 * newstr = newstr substr(str, 1, 1)
1411 * str = substr(str, 2)
1412 * nonzeroflag = FALSE
1415 * if (RSTART == 1) {
1416 * newstr = newstr replace substr(str, 1, 1)
1417 * str = substr(str, 2)
1419 * return newstr str replace
1422 * if (length(str) == 0)
1428 * if (length(str) > 0)
1429 * newstr = newstr str # rest of string
1436 * NB: `howmany' conflicts with a SunOS macro in <sys/param.h>.
1440 sub_common(tree, how_many, backdigs)
1442 int how_many, backdigs;
1444 register char *scan;
1445 register char *bp, *cp;
1448 register char *matchend;
1449 register size_t len;
1460 NODE *s; /* subst. pattern */
1461 NODE *t; /* string to make sub. in; $0 if none given */
1463 NODE **lhs = &tree; /* value not used -- just different from NULL */
1465 Func_ptr after_assign = NULL;
1467 int global = (how_many == -1);
1469 int lastmatchnonzero;
1472 rp = re_update(tmp);
1479 t = force_string(tree_eval(tmp));
1481 /* do the search early to avoid work on non-match */
1482 if (research(rp, t->stptr, 0, t->stlen, TRUE) == -1 ||
1483 RESTART(rp, t->stptr) > t->stlen) {
1485 return tmp_number((AWKNUM) 0.0);
1488 if (tmp->type == Node_val)
1491 lhs = get_lhs(tmp, &after_assign);
1494 * create a private copy of the string
1496 if (t->stref > 1 || (t->flags & (PERM|FIELD)) != 0) {
1497 unsigned int saveflags;
1499 saveflags = t->flags;
1500 t->flags &= ~MALLOC;
1502 t->flags = saveflags;
1508 buflen = textlen + 2;
1510 s = force_string(tree_eval(s));
1512 replend = repl + s->stlen;
1513 repllen = replend - repl;
1514 emalloc(buf, char *, buflen + 2, "sub_common");
1516 buf[buflen + 1] = '\0';
1518 for (scan = repl; scan < replend; scan++) {
1522 } else if (*scan == '\\') {
1523 if (backdigs) { /* gensub, behave sanely */
1524 if (ISDIGIT(scan[1])) {
1527 } else { /* \q for any q --> q */
1531 } else { /* (proposed) posix '96 mode */
1532 if (strncmp(scan, "\\\\\\&", 4) == 0) {
1536 } else if (strncmp(scan, "\\\\&", 3) == 0) {
1537 /* \\& --> \<string> */
1541 } else if (scan[1] == '&') {
1546 leave alone, it goes into the output */
1551 lastmatchnonzero = FALSE;
1553 for (current = 1;; current++) {
1555 matchstart = t->stptr + RESTART(rp, t->stptr);
1556 matchend = t->stptr + REEND(rp, t->stptr);
1559 * create the result, copying in parts of the original
1562 len = matchstart - text + repllen
1563 + ampersands * (matchend - matchstart);
1565 while (buflen < (sofar + len + 1)) {
1567 erealloc(buf, char *, buflen, "sub_common");
1570 for (scan = text; scan < matchstart; scan++)
1572 if (global || current == how_many) {
1574 * If the current match matched the null string,
1575 * and the last match didn't and did a replacement,
1576 * then skip this one.
1578 if (lastmatchnonzero && matchstart == matchend) {
1579 lastmatchnonzero = FALSE;
1584 * If replacing all occurrences, or this is the
1585 * match we want, copy in the replacement text,
1586 * making substitutions as we go.
1588 for (scan = repl; scan < replend; scan++)
1590 for (cp = matchstart; cp < matchend; cp++)
1592 else if (*scan == '\\') {
1593 if (backdigs) { /* gensub, behave sanely */
1594 if (ISDIGIT(scan[1])) {
1595 int dig = scan[1] - '0';
1599 + SUBPATSTART(rp, t->stptr, dig);
1601 + SUBPATEND(rp, t->stptr, dig);
1603 for (cp = start; cp < end; cp++)
1606 } else /* \q for any q --> q */
1608 } else { /* posix '96 mode, bleah */
1609 if (strncmp(scan, "\\\\\\&", 4) == 0) {
1614 } else if (strncmp(scan, "\\\\&", 3) == 0) {
1615 /* \\& --> \<string> */
1617 for (cp = matchstart; cp < matchend; cp++)
1620 } else if (scan[1] == '&') {
1629 if (matchstart != matchend)
1630 lastmatchnonzero = TRUE;
1633 * don't want this match, skip over it by copying
1636 for (cp = matchstart; cp < matchend; cp++)
1640 /* catch the case of gsub(//, "blah", whatever), i.e. empty regexp */
1641 if (matchstart == matchend && matchend < text + textlen) {
1645 textlen = text + textlen - matchend;
1648 if ((current >= how_many && !global)
1649 || ((long) textlen <= 0 && matchstart == matchend)
1650 || research(rp, t->stptr, text - t->stptr, textlen, TRUE) == -1)
1655 if (buflen - sofar - textlen - 1) {
1656 buflen = sofar + textlen + 2;
1657 erealloc(buf, char *, buflen, "sub_common");
1660 for (scan = matchend; scan < text + textlen; scan++)
1669 if (matches > 0 && lhs) {
1674 if (after_assign != NULL)
1676 t->flags &= ~(NUM|NUMBER);
1678 return tmp_number((AWKNUM) matches);
1681 /* do_gsub --- global substitution */
1687 return sub_common(tree, -1, FALSE);
1690 /* do_sub --- single substitution */
1696 return sub_common(tree, 1, FALSE);
1699 /* do_gensub --- fix up the tree for sub_common for the gensub function */
1705 NODE n1, n2, n3, *t, *tmp, *target, *ret;
1706 long how_many = 1; /* default is one substitution */
1710 * We have to pull out the value of the global flag, and
1711 * build up a tree without the flag in it, turning it into the
1712 * kind of tree that sub_common() expects. It helps to draw
1713 * a picture of this ...
1716 n2 = *(tree->rnode);
1719 t = tree_eval(n2.rnode->lnode); /* value of global flag */
1721 tmp = force_string(tree_eval(n2.rnode->rnode->lnode)); /* target */
1724 * We make copy of the original target string, and pass that
1725 * in to sub_common() as the target to make the substitution in.
1726 * We will then return the result string as the return value of
1729 target = make_string(tmp->stptr, tmp->stlen);
1732 n3 = *(n2.rnode->rnode);
1736 if ((t->flags & (STR|STRING)) != 0) {
1737 if (t->stlen > 0 && (t->stptr[0] == 'g' || t->stptr[0] == 'G'))
1742 d = force_number(t);
1751 ret = sub_common(&n1, how_many, TRUE);
1755 * Note that we don't care what sub_common() returns, since the
1756 * easiest thing for the programmer is to return the string, even
1757 * if no substitutions were done.
1759 target->flags |= TEMP;
1763 #ifdef GFMT_WORKAROUND
1765 * printf's %g format [can't rely on gcvt()]
1766 * caveat: don't use as argument to *printf()!
1767 * 'format' string HAS to be of "<flags>*.*g" kind, or we bomb!
1770 sgfmt(buf, format, alt, fwidth, prec, g)
1771 char *buf; /* return buffer; assumed big enough to hold result */
1773 int alt; /* use alternate form flag */
1774 int fwidth; /* field width in a format */
1775 int prec; /* indicates desired significant digits, not decimal places */
1776 double g; /* value to format */
1779 register char *gpos;
1780 register char *d, *e, *p;
1783 strncpy(dform, format, sizeof dform - 1);
1784 dform[sizeof dform - 1] = '\0';
1785 gpos = strrchr(dform, '.');
1787 if (g == 0.0 && ! alt) { /* easy special case */
1790 (void) sprintf(buf, dform, fwidth, 0);
1794 /* advance to location of 'g' in the format */
1795 while (*gpos && *gpos != 'g' && *gpos != 'G')
1798 if (prec <= 0) /* negative precision is ignored */
1799 prec = (prec < 0 ? DEFAULT_G_PRECISION : 1);
1803 /* start with 'e' format (it'll provide nice exponent) */
1806 (void) sprintf(buf, dform, fwidth, prec, g);
1807 if ((e = strrchr(buf, 'e')) != NULL) { /* find exponent */
1808 int expn = atoi(e+1); /* fetch exponent */
1809 if (expn >= -4 && expn <= prec) { /* per K&R2, B1.2 */
1810 /* switch to 'f' format and re-do */
1812 prec -= expn; /* decimal precision */
1813 (void) sprintf(buf, dform, fwidth, prec, g);
1814 e = buf + strlen(buf);
1822 /* if 'alt' in force, then trailing zeros are not removed */
1823 if (! alt && (d = strrchr(buf, '.')) != NULL) {
1824 /* throw away an excess of precision */
1825 for (p = e; p > d && *--p == '0'; )
1831 /* and do that once again */
1835 (void) sprintf(buf, dform, fwidth, prec, g);
1838 #endif /* GFMT_WORKAROUND */
1841 #define BITS_PER_BYTE 8 /* if not true, you lose. too bad. */
1843 /* do_lshift --- perform a << operation */
1850 unsigned long uval, ushift, result;
1853 s1 = tree_eval(tree->lnode);
1854 s2 = tree_eval(tree->rnode->lnode);
1855 val = force_number(s1);
1856 shift = force_number(s2);
1861 if (val < 0 || shift < 0)
1862 warning("lshift(%lf, %lf): negative values will give strange results", val, shift);
1863 if (double_to_int(val) != val || double_to_int(shift) != shift)
1864 warning("lshift(%lf, %lf): fractional values will be truncated", val, shift);
1865 if (shift > (sizeof(unsigned long) * BITS_PER_BYTE))
1866 warning("lshift(%lf, %lf): too large shift value will give strange results", val, shift);
1869 uval = (unsigned long) val;
1870 ushift = (unsigned long) shift;
1872 result = uval << ushift;
1873 return tmp_number((AWKNUM) result);
1876 /* do_rshift --- perform a >> operation */
1883 unsigned long uval, ushift, result;
1886 s1 = tree_eval(tree->lnode);
1887 s2 = tree_eval(tree->rnode->lnode);
1888 val = force_number(s1);
1889 shift = force_number(s2);
1894 if (val < 0 || shift < 0)
1895 warning("rshift(%lf, %lf): negative values will give strange results", val, shift);
1896 if (double_to_int(val) != val || double_to_int(shift) != shift)
1897 warning("rshift(%lf, %lf): fractional values will be truncated", val, shift);
1898 if (shift > (sizeof(unsigned long) * BITS_PER_BYTE))
1899 warning("rshift(%lf, %lf): too large shift value will give strange results", val, shift);
1902 uval = (unsigned long) val;
1903 ushift = (unsigned long) shift;
1905 result = uval >> ushift;
1906 return tmp_number((AWKNUM) result);
1909 /* do_and --- perform an & operation */
1916 unsigned long uleft, uright, result;
1919 s1 = tree_eval(tree->lnode);
1920 s2 = tree_eval(tree->rnode->lnode);
1921 left = force_number(s1);
1922 right = force_number(s2);
1927 if (left < 0 || right < 0)
1928 warning("and(%lf, %lf): negative values will give strange results", left, right);
1929 if (double_to_int(left) != left || double_to_int(right) != right)
1930 warning("and(%lf, %lf): fractional values will be truncated", left, right);
1933 uleft = (unsigned long) left;
1934 uright = (unsigned long) right;
1936 result = uleft & uright;
1937 return tmp_number((AWKNUM) result);
1940 /* do_or --- perform an | operation */
1947 unsigned long uleft, uright, result;
1950 s1 = tree_eval(tree->lnode);
1951 s2 = tree_eval(tree->rnode->lnode);
1952 left = force_number(s1);
1953 right = force_number(s2);
1958 if (left < 0 || right < 0)
1959 warning("or(%lf, %lf): negative values will give strange results", left, right);
1960 if (double_to_int(left) != left || double_to_int(right) != right)
1961 warning("or(%lf, %lf): fractional values will be truncated", left, right);
1964 uleft = (unsigned long) left;
1965 uright = (unsigned long) right;
1967 result = uleft | uright;
1968 return tmp_number((AWKNUM) result);
1971 /* do_xor --- perform an ^ operation */
1978 unsigned long uleft, uright, result;
1981 s1 = tree_eval(tree->lnode);
1982 s2 = tree_eval(tree->rnode->lnode);
1983 left = force_number(s1);
1984 right = force_number(s2);
1989 if (left < 0 || right < 0)
1990 warning("xor(%lf, %lf): negative values will give strange results", left, right);
1991 if (double_to_int(left) != left || double_to_int(right) != right)
1992 warning("xor(%lf, %lf): fractional values will be truncated", left, right);
1995 uleft = (unsigned long) left;
1996 uright = (unsigned long) right;
1998 result = uleft ^ uright;
1999 return tmp_number((AWKNUM) result);
2002 /* do_compl --- perform a ~ operation */
2012 tmp = tree_eval(tree->lnode);
2013 d = force_number(tmp);
2018 warning("compl(%lf): negative value will give strange results", d);
2019 if (double_to_int(d) != d)
2020 warning("compl(%lf): fractional value will be truncated", d);
2023 uval = (unsigned long) d;
2025 return tmp_number((AWKNUM) uval);
2028 /* do_strtonum --- the strtonum function */
2037 tmp = tree_eval(tree->lnode);
2039 if ((tmp->flags & (NUM|NUMBER)) != 0)
2040 d = (double) force_number(tmp);
2041 else if (isnondecimal(tmp->stptr))
2042 d = nondec2awknum(tmp->stptr, tmp->stlen);
2044 d = (double) force_number(tmp);
2047 return tmp_number((AWKNUM) d);
2051 #if defined(BITOPS) || defined(NONDECDATA)
2052 /* nondec2awknum --- convert octal or hex value to double */
2055 * Because of awk's concatenation rules and the way awk.y:yylex()
2056 * collects a number, this routine has to be willing to stop on the
2057 * first invalid character.
2061 nondec2awknum(str, len)
2065 AWKNUM retval = 0.0;
2069 if (*str == '0' && (str[1] == 'x' || str[1] == 'X')) {
2072 for (str += 2, len -= 2; len > 0; len--, str++) {
2092 val = *str - 'a' + 10;
2100 val = *str - 'A' + 10;
2105 retval = (retval * 16) + val;
2107 } else if (*str == '0') {
2108 if (strchr(str, '8') != NULL || strchr(str, '9') != NULL)
2110 for (; len > 0; len--) {
2111 if (! isdigit(*str))
2113 retval = (retval * 8) + (*str - '0');
2125 #endif /* defined(BITOPS) || defined(NONDECDATA) */