2 * io.c --- routines for dealing with input and output and records
6 * Copyright (C) 1976, 1988, 1989, 1991-2000 the Free Software Foundation, Inc.
8 * This file is part of GAWK, the GNU implementation of the
9 * AWK Programming Language.
11 * GAWK is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
16 * GAWK is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
25 * $FreeBSD: src/contrib/awk/io.c,v 1.4.2.1 2001/01/23 22:08:31 asmodai Exp $
29 #undef HAVE_MMAP /* for now, probably forever */
31 #ifdef HAVE_SYS_PARAM_H
32 #undef RE_DUP_MAX /* avoid spurious conflict w/regex.h */
33 #include <sys/param.h>
34 #endif /* HAVE_SYS_PARAM_H */
36 #ifdef HAVE_SYS_WAIT_H
38 #endif /* HAVE_SYS_WAIT_H */
43 #define MAP_FAILED ((caddr_t) -1)
44 #endif /* ! defined (MAP_FAILED) */
45 #endif /* HAVE_MMAP */
51 #define O_ACCMODE (O_RDONLY|O_WRONLY|O_RDWR)
54 #if ! defined(S_ISREG) && defined(S_IFREG)
55 #define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
58 #if ! defined(S_ISDIR) && defined(S_IFDIR)
59 #define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR)
70 #if defined(MSDOS) || defined(OS2) || defined(WIN32)
71 #define PIPES_SIMULATED
74 static IOBUF *nextfile P((int skipping));
75 static int inrec P((IOBUF *iop));
76 static int iop_close P((IOBUF *iop));
77 struct redirect *redirect P((NODE *tree, int *errflg));
78 static void close_one P((void));
79 static int close_redir P((struct redirect *rp, int exitwarn));
80 #ifndef PIPES_SIMULATED
81 static int wait_any P((int interesting));
83 static IOBUF *gawk_popen P((char *cmd, struct redirect *rp));
84 static IOBUF *iop_open P((const char *file, const char *how, IOBUF *buf));
85 static IOBUF *iop_alloc P((int fd, const char *name, IOBUF *buf));
86 static int gawk_pclose P((struct redirect *rp));
87 static int do_pathopen P((const char *file));
88 static int get_a_record P((char **out, IOBUF *iop, int rs, Regexp *RSre, int *errcode));
90 static int mmap_get_record P((char **out, IOBUF *iop, int rs, Regexp *RSre, int *errcode));
91 #endif /* HAVE_MMAP */
92 static int str2mode P((const char *mode));
93 static void spec_setup P((IOBUF *iop, int len, int allocate));
94 static int specfdopen P((IOBUF *iop, const char *name, const char *mode));
95 static int pidopen P((IOBUF *iop, const char *name, const char *mode));
96 static int useropen P((IOBUF *iop, const char *name, const char *mode));
98 #if defined (HAVE_POPEN_H)
102 static struct redirect *red_head = NULL;
104 static Regexp *RS_regexp;
108 extern int output_is_tty;
109 extern NODE *ARGC_node;
110 extern NODE *ARGV_node;
111 extern NODE *ARGIND_node;
112 extern NODE *ERRNO_node;
113 extern NODE **fields_arr;
115 static jmp_buf filebuf; /* for do_nextfile() */
118 /* File pointers have an extra level of indirection, and there are cases where
119 `stdin' can be null. That can crash gawk if fileno() is used as-is. */
120 static int vmsrtl_fileno P((FILE *));
121 static int vmsrtl_fileno(fp) FILE *fp; { return fileno(fp); }
123 #define fileno(FP) (((FP) && *(FP)) ? vmsrtl_fileno(FP) : -1)
126 /* do_nextfile --- implement gawk "nextfile" extension */
131 (void) nextfile(TRUE);
135 /* nextfile --- move to the next input data file */
142 static int files = 0;
144 static IOBUF *curfile = NULL;
154 if (curfile != NULL) {
155 if (curfile->cnt == EOF) {
156 (void) iop_close(curfile);
161 for (; i < (long) (ARGC_node->lnode->numbr); i++) {
162 arg = *assoc_lookup(ARGV_node, tmp_number((AWKNUM) i));
165 arg->stptr[arg->stlen] = '\0';
166 if (! do_traditional) {
167 unref(ARGIND_node->var_value);
168 ARGIND_node->var_value = make_number((AWKNUM) i);
170 if (! arg_assign(arg->stptr)) {
173 curfile = iop_open(fname, "r", &mybuf);
176 curfile->flag |= IOP_NOFREE_OBJ;
177 /* This is a kludge. */
178 unref(FILENAME_node->var_value);
179 FILENAME_node->var_value = dupnode(arg);
187 /* no args. -- use stdin */
188 /* FNR is init'ed to 0 */
189 FILENAME_node->var_value = make_string("-", 1);
191 curfile = iop_open(fname, "r", &mybuf);
194 curfile->flag |= IOP_NOFREE_OBJ;
199 fatal("cannot open file `%s' for reading (%s)",
200 fname, strerror(errno));
205 /* set_FNR --- update internal FNR from awk variable */
210 FNR = (long) FNR_node->var_value->numbr;
213 /* set_NR --- update internal NR from awk variable */
218 NR = (long) NR_node->var_value->numbr;
221 /* inrec --- This reads in a record from the input file */
231 if ((cnt = iop->cnt) != EOF)
232 cnt = (*(iop->getrec))
233 (&begin, iop, RS->stptr[0], RS_regexp, NULL);
240 set_record(begin, cnt, TRUE);
246 /* iop_close --- close an open IOP */
259 /* Work around bug in UNICOS popen */
264 /* save these for re-use; don't free the storage */
265 if ((iop->flag & IOP_IS_INTERNAL) != 0) {
267 iop->end = iop->buf + strlen(iop->buf);
273 /* Don't close standard files or else crufty code elsewhere will lose */
274 if (iop->fd == fileno(stdin)
275 || iop->fd == fileno(stdout)
276 || iop->fd == fileno(stderr)
277 || (iop->flag & IOP_MMAPPED) != 0)
280 ret = close(iop->fd);
283 warning("close of fd %d (`%s') failed (%s)", iop->fd,
284 iop->name, strerror(errno));
285 if ((iop->flag & IOP_NO_FREE) == 0) {
287 * Be careful -- $0 may still reference the buffer even though
288 * an explicit close is being done; in the future, maybe we
289 * can do this a bit better.
292 if ((fields_arr[0]->stptr >= iop->buf)
293 && (fields_arr[0]->stptr < (iop->buf + iop->secsiz + iop->size))) {
296 t = make_string(fields_arr[0]->stptr,
297 fields_arr[0]->stlen);
298 unref(fields_arr[0]);
302 if ((iop->flag & IOP_MMAPPED) == 0)
306 (void) munmap(iop->buf, iop->size);
309 if ((iop->flag & IOP_NOFREE_OBJ) == 0)
312 return ret == -1 ? 1 : 0;
315 /* do_input --- the main input processing loop */
323 (void) setjmp(filebuf); /* for `nextfile' */
325 while ((iop = nextfile(FALSE)) != NULL) {
327 while (interpret(expression_value) && inrec(iop) == 0)
330 /* recover any space from C based alloca */
338 /* redirect --- Redirection for printf and print commands */
341 redirect(tree, errflg)
346 register struct redirect *rp;
350 const char *direction = "to";
353 const char *what = NULL;
355 switch (tree->type) {
356 case Node_redirect_append:
359 case Node_redirect_output:
360 outflag = (RED_FILE|RED_WRITE);
362 if (tree->type == Node_redirect_output)
367 case Node_redirect_pipe:
368 tflag = (RED_PIPE|RED_WRITE);
371 case Node_redirect_pipein:
372 tflag = (RED_PIPE|RED_READ);
375 case Node_redirect_input:
376 tflag = (RED_FILE|RED_READ);
380 fatal("invalid tree type %d in redirect()", tree->type);
383 tmp = tree_eval(tree->subnode);
384 if (do_lint && (tmp->flags & STR) == 0)
385 warning("expression in `%s' redirection only has numeric value",
387 tmp = force_string(tmp);
390 if (str == NULL || *str == '\0')
391 fatal("expression for `%s' redirection has null string value",
395 && (STREQN(str, "0", tmp->stlen) || STREQN(str, "1", tmp->stlen)))
396 warning("filename `%s' for `%s' redirection may be result of logical expression", str, what);
397 for (rp = red_head; rp != NULL; rp = rp->next)
398 if (strlen(rp->value) == tmp->stlen
399 && STREQN(rp->value, str, tmp->stlen)
400 && ((rp->flag & ~(RED_NOBUF|RED_EOF)) == tflag
402 && (rp->flag & (RED_FILE|RED_WRITE)) == outflag)))
405 emalloc(rp, struct redirect *, sizeof(struct redirect),
407 emalloc(str, char *, tmp->stlen+1, "redirect");
408 memcpy(str, tmp->stptr, tmp->stlen);
409 str[tmp->stlen] = '\0';
414 rp->pid = 0; /* unlikely that we're worried about init */
416 /* maintain list in most-recently-used first order */
417 if (red_head != NULL)
423 str = rp->value; /* get \0 terminated string */
424 while (rp->fp == NULL && rp->iop == NULL) {
425 if (rp->flag & RED_EOF)
427 * encountered EOF on file or pipe -- must be cleared
428 * by explicit close() before reading more
433 switch (tree->type) {
434 case Node_redirect_output:
436 if ((rp->flag & RED_USED) != 0)
439 case Node_redirect_append:
442 case Node_redirect_pipe:
443 /* synchronize output before new pipe */
446 if ((rp->fp = popen(str, "w")) == NULL)
447 fatal("can't open pipe (\"%s\") for output (%s)",
448 str, strerror(errno));
449 rp->flag |= RED_NOBUF;
451 case Node_redirect_pipein:
453 if (gawk_popen(str, rp) == NULL)
454 fatal("can't open pipe (\"%s\") for input (%s)",
455 str, strerror(errno));
457 case Node_redirect_input:
459 rp->iop = iop_open(str, "r", NULL);
466 fd = devopen(str, mode);
467 if (fd > INVALID_HANDLE) {
468 if (fd == fileno(stdin))
470 else if (fd == fileno(stdout))
472 else if (fd == fileno(stderr))
475 rp->fp = fdopen(fd, (char *) mode);
476 /* don't leak file descriptors */
480 if (rp->fp != NULL && isatty(fd))
481 rp->flag |= RED_NOBUF;
482 /* Move rp to the head of the list. */
483 if (red_head != rp) {
484 if ((rp->prev->next = rp->next) != NULL)
485 rp->next->prev = rp->prev;
493 if (rp->fp == NULL && rp->iop == NULL) {
494 /* too many files open -- close one and try again */
495 if (errno == EMFILE || errno == ENFILE)
497 #if defined __MINGW32__ || defined HAVE_MMAP
498 /* this works for solaris 2.5, not sunos */
499 /* it is also needed for MINGW32 */
500 else if (errno == 0) /* HACK! */
504 /* Alpha/VMS V7.1's C RTL is returning this instead
505 of EMFILE (haven't tried other post-V6.2 systems) */
506 #define SS$_EXQUOTA 0x001C
507 else if (errno == EIO && vaxc$errno == SS$_EXQUOTA)
512 * Some other reason for failure.
514 * On redirection of input from a file,
515 * just return an error, so e.g. getline
516 * can return -1. For output to file,
517 * complain. The shell will complain on
518 * a bad command to a pipe.
522 if (tree->type == Node_redirect_output
523 || tree->type == Node_redirect_append)
524 fatal("can't redirect %s `%s' (%s)",
525 direction, str, strerror(errno));
537 /* getredirect --- find the struct redirect for this file or pipe */
540 getredirect(str, len)
546 for (rp = red_head; rp != NULL; rp = rp->next)
547 if (strlen(rp->value) == len && STREQN(rp->value, str, len))
553 /* close_one --- temporarily close an open file to re-use the fd */
558 register struct redirect *rp;
559 register struct redirect *rplast = NULL;
561 /* go to end of list first, to pick up least recently used entry */
562 for (rp = red_head; rp != NULL; rp = rp->next)
564 /* now work back up through the list */
565 for (rp = rplast; rp != NULL; rp = rp->prev)
566 if (rp->fp != NULL && (rp->flag & RED_FILE) != 0) {
567 rp->flag |= RED_USED;
569 if (/* do_lint && */ fclose(rp->fp) != 0)
570 warning("close of \"%s\" failed (%s).",
571 rp->value, strerror(errno));
576 /* surely this is the only reason ??? */
577 fatal("too many pipes or input files open");
580 /* do_close --- completely close an open file or pipe */
587 register struct redirect *rp;
589 tmp = force_string(tree_eval(tree->subnode));
591 for (rp = red_head; rp != NULL; rp = rp->next) {
592 if (strlen(rp->value) == tmp->stlen
593 && STREQN(rp->value, tmp->stptr, tmp->stlen))
597 if (rp == NULL) { /* no match */
598 /* icky special case: close(FILENAME) called. */
599 if (tree->subnode == FILENAME_node
600 || (tmp->stlen == FILENAME_node->var_value->stlen
601 && STREQN(tmp->stptr, FILENAME_node->var_value->stptr, tmp->stlen))) {
602 (void) nextfile(TRUE);
604 warning("close: `%.*s' is not an open file or pipe",
605 tmp->stlen, tmp->stptr);
608 return tmp_number((AWKNUM) 0.0);
611 fflush(stdout); /* synchronize regular output */
612 tmp = tmp_number((AWKNUM) close_redir(rp, FALSE));
617 /* close_redir --- close an open file or pipe */
620 close_redir(rp, exitwarn)
621 register struct redirect *rp;
629 if (rp->fp == stdout || rp->fp == stderr)
632 if ((rp->flag & (RED_PIPE|RED_WRITE)) == (RED_PIPE|RED_WRITE))
633 status = pclose(rp->fp);
634 else if (rp->fp != NULL)
635 status = fclose(rp->fp);
636 else if (rp->iop != NULL) {
637 if ((rp->flag & RED_PIPE) != 0)
638 status = gawk_pclose(rp);
640 status = iop_close(rp->iop);
645 what = ((rp->flag & RED_PIPE) != 0) ? "pipe" : "file";
647 /* SVR4 awk checks and warns about status of close */
649 char *s = strerror(errno);
652 * Too many people have complained about this.
653 * As of 2.15.6, it is now under lint control.
656 warning("failure status (%d) on %s close of \"%s\" (%s)",
657 status, what, rp->value, s);
659 if (! do_traditional) {
660 /* set ERRNO too so that program can get at it */
661 unref(ERRNO_node->var_value);
662 ERRNO_node->var_value = make_string(s, strlen(s));
667 warning("no explicit close of %s `%s' provided",
670 if (rp->next != NULL)
671 rp->next->prev = rp->prev;
672 if (rp->prev != NULL)
673 rp->prev->next = rp->next;
681 /* flush_io --- flush all open output files */
686 register struct redirect *rp;
690 if (fflush(stdout)) {
691 warning("error writing standard output (%s)", strerror(errno));
694 if (fflush(stderr)) {
695 warning("error writing standard error (%s)", strerror(errno));
698 for (rp = red_head; rp != NULL; rp = rp->next)
699 /* flush both files and pipes, what the heck */
700 if ((rp->flag & RED_WRITE) && rp->fp != NULL) {
701 if (fflush(rp->fp)) {
702 warning("%s flush of \"%s\" failed (%s).",
703 (rp->flag & RED_PIPE) ? "pipe" :
704 "file", rp->value, strerror(errno));
711 /* close_io --- close all open files, called when exiting */
716 register struct redirect *rp;
717 register struct redirect *next;
721 for (rp = red_head; rp != NULL; rp = next) {
724 * close_redir() will print a message if needed
725 * if do_lint, warn about lack of explicit close
727 if (close_redir(rp, do_lint))
732 * Some of the non-Unix os's have problems doing an fclose
733 * on stdout and stderr. Since we don't really need to close
734 * them, we just flush them, and do that across the board.
736 if (fflush(stdout)) {
737 warning("error writing standard output (%s)", strerror(errno));
740 if (fflush(stderr)) {
741 warning("error writing standard error (%s)", strerror(errno));
747 /* str2mode --- convert a string mode to an integer mode */
761 ret = O_WRONLY|O_CREAT|O_TRUNC;
765 ret = O_WRONLY|O_APPEND|O_CREAT;
775 /* devopen --- handle /dev/std{in,out,err}, /dev/fd/N, regular files */
778 * This separate version is still needed for output, since file and pipe
779 * output is done with stdio. iop_open() handles input with IOBUFs of
780 * more "special" files. Those files are not handled here since it makes
781 * no sense to use them for output.
786 const char *name, *mode;
793 extern double strtod();
795 flag = str2mode(mode);
797 if (STREQ(name, "-"))
798 openfd = fileno(stdin);
800 openfd = INVALID_HANDLE;
805 if ((openfd = os_devopen(name, flag)) >= 0)
808 if (STREQN(name, "/dev/", 5) && stat((char *) name, &buf) == -1) {
811 if (STREQ(cp, "stdin") && (flag & O_ACCMODE) == O_RDONLY)
812 openfd = fileno(stdin);
813 else if (STREQ(cp, "stdout") && (flag & O_ACCMODE) == O_WRONLY)
814 openfd = fileno(stdout);
815 else if (STREQ(cp, "stderr") && (flag & O_ACCMODE) == O_WRONLY)
816 openfd = fileno(stderr);
817 else if (STREQN(cp, "fd/", 3)) {
819 openfd = (int) strtod(cp, &ptr);
820 if (openfd <= INVALID_HANDLE || ptr == cp)
821 openfd = INVALID_HANDLE;
826 if (openfd == INVALID_HANDLE)
827 openfd = open(name, flag, 0666);
828 if (openfd != INVALID_HANDLE && fstat(openfd, &buf) > 0)
829 if (S_ISDIR(buf.st_mode))
830 fatal("file `%s' is a directory", name);
835 /* spec_setup --- setup an IOBUF for a special internal file */
838 spec_setup(iop, len, allocate)
846 emalloc(cp, char *, len+2, "spec_setup");
849 len = strlen(iop->buf);
850 iop->buf[len++] = '\n'; /* get_a_record clobbered it */
851 iop->buf[len] = '\0'; /* just in case */
857 iop->end = iop->buf + len;
859 iop->flag = IOP_IS_INTERNAL;
860 iop->getrec = get_a_record;
863 /* specfdopen --- open an fd special file */
866 specfdopen(iop, name, mode)
868 const char *name, *mode;
873 fd = devopen(name, mode);
874 if (fd == INVALID_HANDLE)
875 return INVALID_HANDLE;
876 tp = iop_alloc(fd, name, NULL);
878 /* don't leak fd's */
880 return INVALID_HANDLE;
883 iop->flag |= IOP_NO_FREE;
889 #define getpgrp_arg() /* nothing */
891 #define getpgrp_arg() getpid()
894 /* pidopen --- "open" /dev/pid, /dev/ppid, and /dev/pgrpid */
897 pidopen(iop, name, mode)
899 const char *name, *mode;
905 sprintf(tbuf, "%d\n", (int) getpgrp(getpgrp_arg()));
906 else if (name[6] == 'i')
907 sprintf(tbuf, "%d\n", (int) getpid());
909 sprintf(tbuf, "%d\n", (int) getppid());
911 spec_setup(iop, i, TRUE);
912 strcpy(iop->buf, tbuf);
916 /* useropen --- "open" /dev/user */
919 * /dev/user creates a record as follows:
924 * If multiple groups are supported, then $5 through $NF are the
925 * supplementary group set.
929 useropen(iop, name, mode)
931 const char *name, *mode;
933 char tbuf[BUFSIZ], *cp;
935 #if defined(NGROUPS_MAX) && NGROUPS_MAX > 0
936 GETGROUPS_T groupset[NGROUPS_MAX];
940 sprintf(tbuf, "%d %d %d %d", (int) getuid(), (int) geteuid(), (int) getgid(), (int) getegid());
942 cp = tbuf + strlen(tbuf);
943 #if defined(NGROUPS_MAX) && NGROUPS_MAX > 0
944 ngroups = getgroups(NGROUPS_MAX, groupset);
946 fatal("could not find groups: %s", strerror(errno));
948 for (i = 0; i < ngroups; i++) {
950 sprintf(cp, "%d", (int) groupset[i]);
958 spec_setup(iop, i, TRUE);
959 strcpy(iop->buf, tbuf);
963 /* iop_open --- handle special and regular files for input */
966 iop_open(name, mode, iop)
967 const char *name, *mode;
970 int openfd = INVALID_HANDLE;
973 static struct internal {
976 int (*fp) P((IOBUF *, const char *, const char *));
979 { "/dev/fd/", 8, specfdopen },
980 { "/dev/stdin", 10, specfdopen },
981 { "/dev/stdout", 11, specfdopen },
982 { "/dev/stderr", 11, specfdopen },
983 { "/dev/pid", 8, pidopen },
984 { "/dev/ppid", 9, pidopen },
985 { "/dev/pgrpid", 11, pidopen },
986 { "/dev/user", 9, useropen },
988 int devcount = sizeof(table) / sizeof(table[0]);
990 flag = str2mode(mode);
993 * FIXME: remove the stat call, and always process these files
996 if (STREQ(name, "-"))
997 openfd = fileno(stdin);
998 else if (do_traditional)
1000 else if (STREQN(name, "/dev/", 5) && stat((char *) name, &buf) == -1) {
1003 for (i = 0; i < devcount; i++) {
1004 if (STREQN(name, table[i].name, table[i].compare)) {
1005 iop = & table[i].iob;
1007 if (iop->buf != NULL) {
1008 spec_setup(iop, 0, FALSE);
1010 } else if ((*table[i].fp)(iop, name, mode) == 0)
1013 warning("could not open %s, mode `%s'",
1022 if (openfd == INVALID_HANDLE)
1023 openfd = open(name, flag, 0666);
1024 if (openfd != INVALID_HANDLE && fstat(openfd, &buf) > 0)
1025 if (S_ISDIR(buf.st_mode))
1026 fatal("file `%s' is a directory", name);
1027 return iop_alloc(openfd, name, iop);
1030 #ifndef PIPES_SIMULATED /* real pipes */
1032 /* wait_any --- wait for a child process, close associated pipe */
1035 wait_any(interesting)
1036 int interesting; /* pid of interest, if any */
1038 RETSIGTYPE (*hstat)(), (*istat)(), (*qstat)();
1041 struct redirect *redp;
1044 hstat = signal(SIGHUP, SIG_IGN);
1045 istat = signal(SIGINT, SIG_IGN);
1046 qstat = signal(SIGQUIT, SIG_IGN);
1048 #ifdef HAVE_SYS_WAIT_H /* Posix compatible sys/wait.h */
1049 pid = wait(&status);
1051 pid = wait((union wait *)&status);
1053 if (interesting && pid == interesting) {
1055 } else if (pid != -1) {
1056 for (redp = red_head; redp != NULL; redp = redp->next)
1057 if (pid == redp->pid) {
1059 redp->status = status;
1063 if (pid == -1 && errno == ECHILD)
1066 signal(SIGHUP, hstat);
1067 signal(SIGINT, istat);
1068 signal(SIGQUIT, qstat);
1072 /* gawk_popen --- open an IOBUF on a child process */
1077 struct redirect *rp;
1083 * used to wait for any children to synchronize input and output,
1084 * but this could cause gawk to hang when it is started in a pipeline
1085 * and thus has a child process feeding it input (shell dependant)
1087 /*(void) wait_any(0);*/ /* wait for outstanding processes */
1090 fatal("cannot open pipe \"%s\" (%s)", cmd, strerror(errno));
1091 if ((pid = fork()) == 0) {
1093 fatal("close of stdout in child failed (%s)",
1096 fatal("dup of pipe failed (%s)", strerror(errno));
1097 if (close(p[0]) == -1 || close(p[1]) == -1)
1098 fatal("close of pipe failed (%s)", strerror(errno));
1099 execl("/bin/sh", "sh", "-c", cmd, NULL);
1103 fatal("cannot fork for \"%s\" (%s)", cmd, strerror(errno));
1105 if (close(p[1]) == -1)
1106 fatal("close of pipe failed (%s)", strerror(errno));
1107 rp->iop = iop_alloc(p[0], cmd, NULL);
1108 if (rp->iop == NULL)
1113 /* gawk_pclose --- close an open child pipe */
1117 struct redirect *rp;
1119 (void) iop_close(rp->iop);
1122 /* process previously found, return stored status */
1124 return (rp->status >> 8) & 0xFF;
1125 rp->status = wait_any(rp->pid);
1127 return (rp->status >> 8) & 0xFF;
1130 #else /* PIPES_SIMULATED */
1133 * use temporary file rather than pipe
1134 * except if popen() provides real pipes too
1137 #if defined(VMS) || defined(OS2) || defined (MSDOS) || defined(WIN32)
1139 /* gawk_popen --- open an IOBUF on a child process */
1144 struct redirect *rp;
1148 if ((current = popen(cmd, "r")) == NULL)
1150 rp->iop = iop_alloc(fileno(current), cmd, NULL);
1151 if (rp->iop == NULL) {
1152 (void) pclose(current);
1159 /* gawk_pclose --- close an open child pipe */
1163 struct redirect *rp;
1165 int rval, aval, fd = rp->iop->fd;
1167 rp->iop->fd = dup(fd); /* kludge to allow close() + pclose() */
1168 rval = iop_close(rp->iop);
1170 aval = pclose(rp->ifp);
1172 return (rval < 0 ? rval : aval);
1174 #else /* not (VMS || OS2 || MSDOS) */
1176 static struct pipeinfo {
1181 /* gawk_popen --- open an IOBUF on a child process */
1186 struct redirect *rp;
1188 extern char *strdup P((const char *));
1191 static char cmdbuf[256];
1193 /* get a name to use */
1194 if ((name = tempnam(".", "pip")) == NULL)
1196 sprintf(cmdbuf, "%s > %s", cmd, name);
1198 if ((current = open(name, O_RDONLY)) == INVALID_HANDLE)
1200 pipes[current].name = name;
1201 pipes[current].command = strdup(cmd);
1202 rp->iop = iop_alloc(current, name, NULL);
1203 if (rp->iop == NULL)
1204 (void) close(current);
1208 /* gawk_pclose --- close an open child pipe */
1212 struct redirect *rp;
1214 int cur = rp->iop->fd;
1217 rval = iop_close(rp->iop);
1220 /* check for an open file */
1221 if (pipes[cur].name == NULL)
1223 unlink(pipes[cur].name);
1224 free(pipes[cur].name);
1225 pipes[cur].name = NULL;
1226 free(pipes[cur].command);
1229 #endif /* not (VMS || OS2 || MSDOS) */
1231 #endif /* PIPES_SIMULATED */
1233 /* do_getline --- read in a line, into var and with redirection, as needed */
1239 struct redirect *rp = NULL;
1245 while (cnt == EOF) {
1246 if (tree->rnode == NULL) { /* no redirection */
1247 iop = nextfile(FALSE);
1248 if (iop == NULL) /* end of input */
1249 return tmp_number((AWKNUM) 0.0);
1251 int redir_error = 0;
1253 rp = redirect(tree->rnode, &redir_error);
1254 if (rp == NULL && redir_error) { /* failed redirect */
1255 if (! do_traditional) {
1256 s = strerror(redir_error);
1258 unref(ERRNO_node->var_value);
1259 ERRNO_node->var_value =
1260 make_string(s, strlen(s));
1262 return tmp_number((AWKNUM) -1.0);
1265 if (iop == NULL) /* end of input */
1266 return tmp_number((AWKNUM) 0.0);
1269 cnt = (*(iop->getrec))(&s, iop, RS->stptr[0], RS_regexp, &errcode);
1271 if (! do_traditional) {
1272 s = strerror(errcode);
1274 unref(ERRNO_node->var_value);
1275 ERRNO_node->var_value = make_string(s, strlen(s));
1277 return tmp_number((AWKNUM) -1.0);
1282 * Don't do iop_close() here if we are
1283 * reading from a pipe; otherwise
1284 * gawk_pclose will not be called.
1286 if ((rp->flag & RED_PIPE) == 0) {
1287 (void) iop_close(iop);
1290 rp->flag |= RED_EOF; /* sticky EOF */
1291 return tmp_number((AWKNUM) 0.0);
1293 continue; /* try another file */
1299 if (tree->lnode == NULL) /* no optional var. */
1300 set_record(s, cnt, TRUE);
1301 else { /* assignment to variable */
1302 Func_ptr after_assign = NULL;
1305 lhs = get_lhs(tree->lnode, &after_assign);
1307 *lhs = make_string(s, cnt);
1308 (*lhs)->flags |= MAYBE_NUM;
1309 /* we may have to regenerate $0 here! */
1310 if (after_assign != NULL)
1314 return tmp_number((AWKNUM) 1.0);
1317 /* pathopen --- pathopen with default file extension handling */
1323 int fd = do_pathopen(file);
1325 #ifdef DEFAULT_FILETYPE
1326 if (! do_traditional && fd <= INVALID_HANDLE) {
1330 int vms_save = vaxc$errno;
1333 /* append ".awk" and try again */
1334 emalloc(file_awk, char *, strlen(file) +
1335 sizeof(DEFAULT_FILETYPE) + 1, "pathopen");
1336 sprintf(file_awk, "%s%s", file, DEFAULT_FILETYPE);
1337 fd = do_pathopen(file_awk);
1339 if (fd <= INVALID_HANDLE) {
1342 vaxc$errno = vms_save;
1346 #endif /*DEFAULT_FILETYPE*/
1351 /* do_pathopen --- search $AWKPATH for source file */
1357 static const char *savepath = NULL;
1358 static int first = TRUE;
1359 const char *awkpath;
1360 char *cp, trypath[BUFSIZ];
1363 if (STREQ(file, "-"))
1367 return (devopen(file, "r"));
1371 if ((awkpath = getenv("AWKPATH")) != NULL && *awkpath)
1372 savepath = awkpath; /* used for restarting */
1378 /* some kind of path name, no search */
1380 return (devopen(file, "r"));
1384 /* this should take into account limits on size of trypath */
1385 for (cp = trypath; *awkpath && *awkpath != envsep; )
1388 if (cp != trypath) { /* nun-null element in path */
1389 /* add directory punctuation only if needed */
1390 if (! isdirpunct(*(cp-1)))
1392 /* append filename */
1395 strcpy(trypath, file);
1396 if ((fd = devopen(trypath, "r")) > INVALID_HANDLE)
1399 /* no luck, keep going */
1400 if(*awkpath == envsep && awkpath[1] != '\0')
1401 awkpath++; /* skip colon */
1402 } while (*awkpath != '\0');
1404 * You might have one of the awk paths defined, WITHOUT the current
1405 * working directory in it. Therefore try to open the file in the
1406 * current directory.
1408 return (devopen(file, "r"));
1423 /* iop_alloc --- allocate an IOBUF structure for an open fd */
1426 iop_alloc(fd, name, iop)
1433 if (fd == INVALID_HANDLE)
1436 emalloc(iop, IOBUF *, sizeof(IOBUF), "iop_alloc");
1439 iop->flag |= IOP_IS_TTY;
1440 iop->size = optimal_bufsize(fd, & sbuf);
1441 if (do_lint && S_ISREG(sbuf.st_mode) && sbuf.st_size == 0)
1442 warning("data file `%s' is empty", name);
1446 iop->off = iop->buf = NULL;
1449 iop->getrec = get_a_record;
1451 /* Use mmap only for regular files with positive sizes.
1452 The size must fit into size_t, so that mmap works correctly.
1453 Also, it must fit into int, so that iop->cnt won't overflow. */
1454 if (S_ISREG(sbuf.st_mode) && sbuf.st_size > 0
1455 && sbuf.st_size == (size_t) sbuf.st_size
1456 && sbuf.st_size == (int) sbuf.st_size) {
1459 iop->buf = iop->off = mmap((caddr_t) 0, sbuf.st_size,
1460 PROT_READ|PROT_WRITE, MAP_PRIVATE,
1462 /* cast is for buggy compilers (e.g. DEC OSF/1) */
1463 if (iop->buf == (caddr_t)MAP_FAILED) {
1464 iop->buf = iop->off = NULL;
1468 iop->flag |= IOP_MMAPPED;
1469 iop->size = sbuf.st_size;
1471 iop->end = iop->buf + iop->size;
1472 iop->cnt = sbuf.st_size;
1473 iop->getrec = mmap_get_record;
1475 iop->fd = INVALID_HANDLE;
1477 #if defined(HAVE_MADVISE) && defined(MADV_SEQUENTIAL)
1478 madvise(iop->buf, iop->size, MADV_SEQUENTIAL);
1481 * The following is a really gross hack.
1482 * We want to ensure that we have a copy of the input
1483 * data that won't go away, on the off chance that someone
1484 * will truncate the data file we've just mmap'ed.
1485 * So, we go through and touch each page, forcing the
1486 * system to give us a private copy. A page size of 512
1487 * guarantees this will work, even on the least common
1488 * denominator system (like, oh say, a VAX).
1490 for (cp = iop->buf; cp < iop->end; cp += 512)
1494 #endif /* HAVE_MMAP */
1498 /* These macros used by both record reading routines */
1499 #define set_RT_to_null() \
1500 (void)(! do_traditional && (unref(RT_node->var_value), \
1501 RT_node->var_value = Nnull_string))
1503 #define set_RT(str, len) \
1504 (void)(! do_traditional && (unref(RT_node->var_value), \
1505 RT_node->var_value = make_string(str, len)))
1509 * Get the next record. Uses a "split buffer" where the latter part is
1510 * the normal read buffer and the head part is an "overflow" area that is used
1511 * when a record spans the end of the normal buffer, in which case the first
1512 * part of the record is copied into the overflow area just before the
1513 * normal buffer. Thus, the eventual full record can be returned as a
1514 * contiguous area of memory with a minimum of copying. The overflow area
1515 * is expanded as needed, so that records are unlimited in length.
1516 * We also mark both the end of the buffer and the end of the read() with
1517 * a sentinel character (the current record separator) so that the inside
1518 * loop can run as a single test.
1520 * Note that since we know or can compute the end of the read and the end
1521 * of the buffer, the sentinel character does not get in the way of regexp
1522 * based searching, since we simply search up to that character, but not
1527 get_a_record(out, iop, grRS, RSre, errcode)
1528 char **out; /* pointer to pointer to data */
1529 IOBUF *iop; /* input IOP */
1530 register int grRS; /* first char in RS->stptr */
1531 Regexp *RSre; /* regexp for RS */
1532 int *errcode; /* pointer to error variable */
1534 register char *bp = iop->off;
1536 char *start = iop->off; /* beginning of record */
1538 static Regexp *RS_null_re = NULL;
1539 Regexp *rsre = NULL;
1540 int continuing = FALSE, continued = FALSE; /* used for re matching */
1543 /* first time through */
1544 if (RS_null_re == NULL) {
1545 RS_null_re = make_regexp("\n\n+", 3, TRUE, TRUE);
1546 if (RS_null_re == NULL)
1547 fatal("internal error: file `%s', line %d\n",
1548 __FILE__, __LINE__);
1551 if (iop->cnt == EOF) { /* previous read hit EOF */
1557 if (RS_is_null) /* special case: RS == "" */
1562 onecase = (IGNORECASE && isalpha(rs));
1566 /* set up sentinel */
1568 bufend = iop->buf + iop->size + iop->secsiz;
1569 *bufend = rs; /* add sentinel to buffer */
1573 for (;;) { /* break on end of record, read error or EOF */
1574 /* buffer mgmt, chunk #1 */
1576 * Following code is entered on the first call of this routine
1577 * for a new iop, or when we scan to the end of the buffer.
1578 * In the latter case, we copy the current partial record to
1579 * the space preceding the normal read buffer. If necessary,
1580 * we expand this space. This is done so that we can return
1581 * the record as a contiguous area of memory.
1583 if ((iop->flag & IOP_IS_INTERNAL) == 0 && bp >= bufend) {
1584 char *oldbuf = NULL;
1585 char *oldsplit = iop->buf + iop->secsiz;
1586 long len; /* record length so far */
1589 if (len > iop->secsiz) {
1590 /* expand secondary buffer */
1591 if (iop->secsiz == -2)
1593 while (len > iop->secsiz)
1596 emalloc(iop->buf, char *,
1597 iop->size+iop->secsiz+2, "get_a_record");
1598 bufend = iop->buf + iop->size + iop->secsiz;
1602 char *newsplit = iop->buf + iop->secsiz;
1604 if (start < oldsplit) {
1605 memcpy(newsplit - len, start,
1607 memcpy(newsplit - (bp - oldsplit),
1608 oldsplit, bp - oldsplit);
1610 memcpy(newsplit - len, start, len);
1612 bp = iop->end = iop->off = iop->buf + iop->secsiz;
1614 if (oldbuf != NULL) {
1619 /* buffer mgmt, chunk #2 */
1621 * Following code is entered whenever we have no more data to
1622 * scan. In most cases this will read into the beginning of
1623 * the main buffer, but in some cases (terminal, pipe etc.)
1624 * we may be doing smallish reads into more advanced positions.
1626 if (bp >= iop->end) {
1627 if ((iop->flag & IOP_IS_INTERNAL) != 0) {
1631 iop->cnt = read(iop->fd, iop->end, bufend - iop->end);
1632 if (iop->cnt == -1) {
1633 if (! do_traditional && errcode != NULL) {
1638 fatal("error reading input file `%s': %s",
1639 iop->name, strerror(errno));
1640 } else if (iop->cnt == 0) {
1642 * hit EOF before matching RS, so end
1643 * the record and set RT to ""
1646 /* see comments below about this test */
1652 if (iop->cnt != EOF) {
1653 iop->end += iop->cnt;
1654 *iop->end = rs; /* reset the sentinel */
1657 /* buffers are now setup and filled with data */
1658 /* search for RS, #1, regexp based, or RS = "" */
1660 * Attempt to simplify the code a bit. The case where
1661 * RS = "" can also be described by a regexp, RS = "\n\n+".
1662 * The buffer managment and searching code can thus now
1663 * use a common case (the one for regexps) both when RS is
1664 * a regexp, and when RS = "". This particularly benefits
1665 * us for keeping track of how many newlines were matched
1666 * in order to set RT.
1668 if (! do_traditional && RSre != NULL) /* regexp */
1670 else if (RS_is_null) /* RS = "" */
1676 * Look for regexp match of RS. Non-match conditions are:
1677 * 1. No match at all
1678 * 2. Match of a null string
1679 * 3. Match ends at exact end of buffer
1680 * Number 3 is subtle; we have to add more to the buffer
1681 * in case the match would have extended further into the
1682 * file, since regexp match by definition always matches the
1683 * longest possible match.
1685 * It is even more subtle than you might think. Suppose
1686 * the re matches at exactly the end of file. We don't know
1687 * that until we try to add more to the buffer. Thus, we
1688 * set a flag to indicate, that if eof really does happen,
1689 * don't break early.
1694 /* cases 1 and 2 are simple, just keep going */
1695 if (research(rsre, start, 0, iop->end - start, TRUE) == -1
1696 || RESTART(rsre, start) == REEND(rsre, start)) {
1698 * Leading newlines at the beginning of the file
1699 * should be ignored. Whew!
1701 if (RS_is_null && *start == '\n') {
1703 * have to catch the case of a
1704 * single newline at the front of
1705 * the record, which the regex
1708 while (*start == '\n' && start < iop->end)
1715 /* case 3, regex match at exact end */
1716 if (start + REEND(rsre, start) >= iop->end) {
1717 if (iop->cnt != EOF) {
1719 continuing = continued = TRUE;
1725 * Leading newlines at the beginning of the file
1726 * should be ignored. Whew!
1728 * Is this code ever executed?
1730 if (RS_is_null && RESTART(rsre, start) == 0) {
1731 start += REEND(rsre, start);
1734 bp = start + RESTART(rsre, start);
1735 set_RT(bp, REEND(rsre, start) - RESTART(rsre, start));
1737 iop->off = start + REEND(rsre, start);
1740 /* search for RS, #2, RS = <single char> */
1742 while (casetable[(int) *bp++] != rs)
1755 if ((iop->flag & IOP_IS_INTERNAL) != 0)
1756 iop->cnt = bp - start;
1759 && (((iop->flag & IOP_IS_INTERNAL) != 0)
1760 || (start == bp && ! continued))) {
1766 if (do_traditional || rsre == NULL) {
1769 bstart = iop->off = bp;
1771 if (onecase ? casetable[(int) *bp] != rs : *bp != rs) {
1776 } else if (RS_is_null && iop->cnt == EOF) {
1778 * special case, delete trailing newlines,
1779 * should never be more than one.
1781 while (bp[-1] == '\n')
1803 bufsize = atoi(argv[1]);
1806 iop = iop_alloc(0, "stdin", NULL);
1807 while ((cnt = get_a_record(&out, iop, rs[0], NULL, NULL)) > 0) {
1808 fwrite(out, 1, cnt, stdout);
1809 fwrite(rs, 1, 1, stdout);
1816 /* mmap_get_record --- pull a record out of a memory-mapped file */
1819 mmap_get_record(out, iop, grRS, RSre, errcode)
1820 char **out; /* pointer to pointer to data */
1821 IOBUF *iop; /* input IOP */
1822 register int grRS; /* first char in RS->stptr */
1823 Regexp *RSre; /* regexp for RS */
1824 int *errcode; /* pointer to error variable */
1826 register char *bp = iop->off;
1827 char *start = iop->off; /* beginning of record */
1829 static Regexp *RS_null_re = NULL;
1830 Regexp *rsre = NULL;
1832 register char *end = iop->end;
1835 /* first time through */
1836 if (RS_null_re == NULL) {
1837 RS_null_re = make_regexp("\n\n+", 3, TRUE, TRUE);
1838 if (RS_null_re == NULL)
1839 fatal("internal error: file `%s', line %d\n",
1840 __FILE__, __LINE__);
1843 if (iop->off >= iop->end) { /* previous record was last */
1846 iop->cnt = EOF; /* tested by higher level code */
1850 if (RS_is_null) /* special case: RS == "" */
1855 onecase = (IGNORECASE && isalpha(rs));
1859 /* if RS = "", skip leading newlines at the front of the file */
1860 if (RS_is_null && iop->off == iop->buf) {
1861 for (bp = iop->off; *bp == '\n'; bp++)
1865 iop->off = start = bp;
1869 * Regexp based searching. Either RS = "" or RS = <regex>
1870 * See comments in get_a_record.
1872 if (! do_traditional && RSre != NULL) /* regexp */
1874 else if (RS_is_null) /* RS = "" */
1880 * Look for regexp match of RS. Non-match conditions are:
1881 * 1. No match at all
1882 * 2. Match of a null string
1883 * 3. Match ends at exact end of buffer
1885 * #1 means that the record ends the file
1886 * and there is no text that actually matched RS.
1888 * #2: is probably like #1.
1890 * #3 is simple; since we have the whole file mapped, it's
1891 * the last record in the file.
1894 if (research(rsre, start, 0, iop->end - start, TRUE) == -1
1895 || RESTART(rsre, start) == REEND(rsre, start)) {
1896 /* no matching text, we have the record */
1898 iop->off = iop->end; /* all done with the record */
1900 /* special case, don't allow trailing newlines */
1901 if (RS_is_null && *(iop->end - 1) == '\n')
1902 return iop->end - start - 1;
1904 return iop->end - start;
1909 bp = start + RESTART(rsre, start);
1910 set_RT(bp, REEND(rsre, start) - RESTART(rsre, start));
1912 iop->off = start + REEND(rsre, start);
1917 * RS = "?", i.e., one character based searching.
1919 * Alas, we can't just plug the sentinel character in at
1920 * the end of the mmapp'ed file ( *(iop->end) = rs; ). This
1921 * works if we're lucky enough to have a file that does not
1922 * take up all of its last disk block. But if we end up with
1923 * file whose size is an even multiple of the disk block size,
1924 * assigning past the end of it delivers a SIGBUS. So, we have to
1925 * add the extra test in the while loop at the front that looks
1926 * for going past the end of the mapped object. Sigh.
1928 /* search for RS, #2, RS = <single char> */
1930 while (bp < end && casetable[*bp++] != rs)
1933 while (bp < end && *bp++ != rs)
1936 cnt = (bp - start) - 1;
1937 if (bp >= iop->end) {
1938 /* at end, may have actually seen rs, or may not */
1940 set_RT(bp - 1, 1); /* real RS seen */
1952 #endif /* HAVE_MMAP */
1954 /* set_RS --- update things as appropriate when RS is set */
1959 static NODE *save_rs = NULL;
1961 if (save_rs && cmp_nodes(RS_node->var_value, save_rs) == 0)
1964 save_rs = dupnode(RS_node->var_value);
1966 RS = force_string(RS_node->var_value);
1967 if (RS_regexp != NULL) {
1973 else if (RS->stlen > 1) {
1974 static int warned = FALSE;
1976 RS_regexp = make_regexp(RS->stptr, RS->stlen, IGNORECASE, TRUE);
1978 if (do_lint && ! warned) {
1979 warning("multicharacter value of `RS' is not portable");
1984 set_FS_if_not_FIELDWIDTHS();