Add the DragonFly cvs id and perform general cleanups on cvs/rcs/sccs ids. Most
[dragonfly.git] / contrib / awk / io.c
CommitLineData
984263bc
MD
1/*
2 * io.c --- routines for dealing with input and output and records
3 */
4
5/*
6 * Copyright (C) 1976, 1988, 1989, 1991-2000 the Free Software Foundation, Inc.
7 *
8 * This file is part of GAWK, the GNU implementation of the
9 * AWK Programming Language.
10 *
11 * GAWK is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * GAWK is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
24 *
25 * $FreeBSD: src/contrib/awk/io.c,v 1.4.2.1 2001/01/23 22:08:31 asmodai Exp $
1de703da 26 * $DragonFly: src/contrib/awk/Attic/io.c,v 1.2 2003/06/17 04:23:58 dillon Exp $
984263bc
MD
27 */
28
29#include "awk.h"
30#undef HAVE_MMAP /* for now, probably forever */
31
32#ifdef HAVE_SYS_PARAM_H
33#undef RE_DUP_MAX /* avoid spurious conflict w/regex.h */
34#include <sys/param.h>
35#endif /* HAVE_SYS_PARAM_H */
36
37#ifdef HAVE_SYS_WAIT_H
38#include <sys/wait.h>
39#endif /* HAVE_SYS_WAIT_H */
40
41#ifdef HAVE_MMAP
42#include <sys/mman.h>
43#ifndef MAP_FAILED
44#define MAP_FAILED ((caddr_t) -1)
45#endif /* ! defined (MAP_FAILED) */
46#endif /* HAVE_MMAP */
47
48#ifndef O_RDONLY
49#include <fcntl.h>
50#endif
51#ifndef O_ACCMODE
52#define O_ACCMODE (O_RDONLY|O_WRONLY|O_RDWR)
53#endif
54
55#if ! defined(S_ISREG) && defined(S_IFREG)
56#define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
57#endif
58
59#if ! defined(S_ISDIR) && defined(S_IFDIR)
60#define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR)
61#endif
62
63#ifndef ENFILE
64#define ENFILE EMFILE
65#endif
66
67#ifdef atarist
68#include <stddef.h>
69#endif
70
71#if defined(MSDOS) || defined(OS2) || defined(WIN32)
72#define PIPES_SIMULATED
73#endif
74
75static IOBUF *nextfile P((int skipping));
76static int inrec P((IOBUF *iop));
77static int iop_close P((IOBUF *iop));
78struct redirect *redirect P((NODE *tree, int *errflg));
79static void close_one P((void));
80static int close_redir P((struct redirect *rp, int exitwarn));
81#ifndef PIPES_SIMULATED
82static int wait_any P((int interesting));
83#endif
84static IOBUF *gawk_popen P((char *cmd, struct redirect *rp));
85static IOBUF *iop_open P((const char *file, const char *how, IOBUF *buf));
86static IOBUF *iop_alloc P((int fd, const char *name, IOBUF *buf));
87static int gawk_pclose P((struct redirect *rp));
88static int do_pathopen P((const char *file));
89static int get_a_record P((char **out, IOBUF *iop, int rs, Regexp *RSre, int *errcode));
90#ifdef HAVE_MMAP
91static int mmap_get_record P((char **out, IOBUF *iop, int rs, Regexp *RSre, int *errcode));
92#endif /* HAVE_MMAP */
93static int str2mode P((const char *mode));
94static void spec_setup P((IOBUF *iop, int len, int allocate));
95static int specfdopen P((IOBUF *iop, const char *name, const char *mode));
96static int pidopen P((IOBUF *iop, const char *name, const char *mode));
97static int useropen P((IOBUF *iop, const char *name, const char *mode));
98
99#if defined (HAVE_POPEN_H)
100#include "popen.h"
101#endif
102
103static struct redirect *red_head = NULL;
104static NODE *RS;
105static Regexp *RS_regexp;
106
107int RS_is_null;
108
109extern int output_is_tty;
110extern NODE *ARGC_node;
111extern NODE *ARGV_node;
112extern NODE *ARGIND_node;
113extern NODE *ERRNO_node;
114extern NODE **fields_arr;
115
116static jmp_buf filebuf; /* for do_nextfile() */
117
118#ifdef VMS
119/* File pointers have an extra level of indirection, and there are cases where
120 `stdin' can be null. That can crash gawk if fileno() is used as-is. */
121static int vmsrtl_fileno P((FILE *));
122static int vmsrtl_fileno(fp) FILE *fp; { return fileno(fp); }
123#undef fileno
124#define fileno(FP) (((FP) && *(FP)) ? vmsrtl_fileno(FP) : -1)
125#endif /* VMS */
126
127/* do_nextfile --- implement gawk "nextfile" extension */
128
129void
130do_nextfile()
131{
132 (void) nextfile(TRUE);
133 longjmp(filebuf, 1);
134}
135
136/* nextfile --- move to the next input data file */
137
138static IOBUF *
139nextfile(skipping)
140int skipping;
141{
142 static long i = 1;
143 static int files = 0;
144 NODE *arg;
145 static IOBUF *curfile = NULL;
146 static IOBUF mybuf;
147 const char *fname;
148
149 if (skipping) {
150 if (curfile != NULL)
151 iop_close(curfile);
152 curfile = NULL;
153 return NULL;
154 }
155 if (curfile != NULL) {
156 if (curfile->cnt == EOF) {
157 (void) iop_close(curfile);
158 curfile = NULL;
159 } else
160 return curfile;
161 }
162 for (; i < (long) (ARGC_node->lnode->numbr); i++) {
163 arg = *assoc_lookup(ARGV_node, tmp_number((AWKNUM) i));
164 if (arg->stlen == 0)
165 continue;
166 arg->stptr[arg->stlen] = '\0';
167 if (! do_traditional) {
168 unref(ARGIND_node->var_value);
169 ARGIND_node->var_value = make_number((AWKNUM) i);
170 }
171 if (! arg_assign(arg->stptr)) {
172 files++;
173 fname = arg->stptr;
174 curfile = iop_open(fname, "r", &mybuf);
175 if (curfile == NULL)
176 goto give_up;
177 curfile->flag |= IOP_NOFREE_OBJ;
178 /* This is a kludge. */
179 unref(FILENAME_node->var_value);
180 FILENAME_node->var_value = dupnode(arg);
181 FNR = 0;
182 i++;
183 break;
184 }
185 }
186 if (files == 0) {
187 files++;
188 /* no args. -- use stdin */
189 /* FNR is init'ed to 0 */
190 FILENAME_node->var_value = make_string("-", 1);
191 fname = "-";
192 curfile = iop_open(fname, "r", &mybuf);
193 if (curfile == NULL)
194 goto give_up;
195 curfile->flag |= IOP_NOFREE_OBJ;
196 }
197 return curfile;
198
199 give_up:
200 fatal("cannot open file `%s' for reading (%s)",
201 fname, strerror(errno));
202 /* NOTREACHED */
203 return 0;
204}
205
206/* set_FNR --- update internal FNR from awk variable */
207
208void
209set_FNR()
210{
211 FNR = (long) FNR_node->var_value->numbr;
212}
213
214/* set_NR --- update internal NR from awk variable */
215
216void
217set_NR()
218{
219 NR = (long) NR_node->var_value->numbr;
220}
221
222/* inrec --- This reads in a record from the input file */
223
224static int
225inrec(iop)
226IOBUF *iop;
227{
228 char *begin;
229 register int cnt;
230 int retval = 0;
231
232 if ((cnt = iop->cnt) != EOF)
233 cnt = (*(iop->getrec))
234 (&begin, iop, RS->stptr[0], RS_regexp, NULL);
235 if (cnt == EOF) {
236 cnt = 0;
237 retval = 1;
238 } else {
239 NR += 1;
240 FNR += 1;
241 set_record(begin, cnt, TRUE);
242 }
243
244 return retval;
245}
246
247/* iop_close --- close an open IOP */
248
249static int
250iop_close(iop)
251IOBUF *iop;
252{
253 int ret;
254
255 if (iop == NULL)
256 return 0;
257 errno = 0;
258
259#ifdef _CRAY
260 /* Work around bug in UNICOS popen */
261 if (iop->fd < 3)
262 ret = 0;
263 else
264#endif
265 /* save these for re-use; don't free the storage */
266 if ((iop->flag & IOP_IS_INTERNAL) != 0) {
267 iop->off = iop->buf;
268 iop->end = iop->buf + strlen(iop->buf);
269 iop->cnt = 0;
270 iop->secsiz = 0;
271 return 0;
272 }
273
274 /* Don't close standard files or else crufty code elsewhere will lose */
275 if (iop->fd == fileno(stdin)
276 || iop->fd == fileno(stdout)
277 || iop->fd == fileno(stderr)
278 || (iop->flag & IOP_MMAPPED) != 0)
279 ret = 0;
280 else
281 ret = close(iop->fd);
282
283 if (ret == -1)
284 warning("close of fd %d (`%s') failed (%s)", iop->fd,
285 iop->name, strerror(errno));
286 if ((iop->flag & IOP_NO_FREE) == 0) {
287 /*
288 * Be careful -- $0 may still reference the buffer even though
289 * an explicit close is being done; in the future, maybe we
290 * can do this a bit better.
291 */
292 if (iop->buf) {
293 if ((fields_arr[0]->stptr >= iop->buf)
294 && (fields_arr[0]->stptr < (iop->buf + iop->secsiz + iop->size))) {
295 NODE *t;
296
297 t = make_string(fields_arr[0]->stptr,
298 fields_arr[0]->stlen);
299 unref(fields_arr[0]);
300 fields_arr[0] = t;
301 reset_record();
302 }
303 if ((iop->flag & IOP_MMAPPED) == 0)
304 free(iop->buf);
305#ifdef HAVE_MMAP
306 else
307 (void) munmap(iop->buf, iop->size);
308#endif
309 }
310 if ((iop->flag & IOP_NOFREE_OBJ) == 0)
311 free((char *) iop);
312 }
313 return ret == -1 ? 1 : 0;
314}
315
316/* do_input --- the main input processing loop */
317
318void
319do_input()
320{
321 IOBUF *iop;
322 extern int exiting;
323
324 (void) setjmp(filebuf); /* for `nextfile' */
325
326 while ((iop = nextfile(FALSE)) != NULL) {
327 if (inrec(iop) == 0)
328 while (interpret(expression_value) && inrec(iop) == 0)
329 continue;
330#ifdef C_ALLOCA
331 /* recover any space from C based alloca */
332 (void) alloca(0);
333#endif
334 if (exiting)
335 break;
336 }
337}
338
339/* redirect --- Redirection for printf and print commands */
340
341struct redirect *
342redirect(tree, errflg)
343NODE *tree;
344int *errflg;
345{
346 register NODE *tmp;
347 register struct redirect *rp;
348 register char *str;
349 int tflag = 0;
350 int outflag = 0;
351 const char *direction = "to";
352 const char *mode;
353 int fd;
354 const char *what = NULL;
355
356 switch (tree->type) {
357 case Node_redirect_append:
358 tflag = RED_APPEND;
359 /* FALL THROUGH */
360 case Node_redirect_output:
361 outflag = (RED_FILE|RED_WRITE);
362 tflag |= outflag;
363 if (tree->type == Node_redirect_output)
364 what = ">";
365 else
366 what = ">>";
367 break;
368 case Node_redirect_pipe:
369 tflag = (RED_PIPE|RED_WRITE);
370 what = "|";
371 break;
372 case Node_redirect_pipein:
373 tflag = (RED_PIPE|RED_READ);
374 what = "|";
375 break;
376 case Node_redirect_input:
377 tflag = (RED_FILE|RED_READ);
378 what = "<";
379 break;
380 default:
381 fatal("invalid tree type %d in redirect()", tree->type);
382 break;
383 }
384 tmp = tree_eval(tree->subnode);
385 if (do_lint && (tmp->flags & STR) == 0)
386 warning("expression in `%s' redirection only has numeric value",
387 what);
388 tmp = force_string(tmp);
389 str = tmp->stptr;
390
391 if (str == NULL || *str == '\0')
392 fatal("expression for `%s' redirection has null string value",
393 what);
394
395 if (do_lint
396 && (STREQN(str, "0", tmp->stlen) || STREQN(str, "1", tmp->stlen)))
397 warning("filename `%s' for `%s' redirection may be result of logical expression", str, what);
398 for (rp = red_head; rp != NULL; rp = rp->next)
399 if (strlen(rp->value) == tmp->stlen
400 && STREQN(rp->value, str, tmp->stlen)
401 && ((rp->flag & ~(RED_NOBUF|RED_EOF)) == tflag
402 || (outflag != 0
403 && (rp->flag & (RED_FILE|RED_WRITE)) == outflag)))
404 break;
405 if (rp == NULL) {
406 emalloc(rp, struct redirect *, sizeof(struct redirect),
407 "redirect");
408 emalloc(str, char *, tmp->stlen+1, "redirect");
409 memcpy(str, tmp->stptr, tmp->stlen);
410 str[tmp->stlen] = '\0';
411 rp->value = str;
412 rp->flag = tflag;
413 rp->fp = NULL;
414 rp->iop = NULL;
415 rp->pid = 0; /* unlikely that we're worried about init */
416 rp->status = 0;
417 /* maintain list in most-recently-used first order */
418 if (red_head != NULL)
419 red_head->prev = rp;
420 rp->prev = NULL;
421 rp->next = red_head;
422 red_head = rp;
423 } else
424 str = rp->value; /* get \0 terminated string */
425 while (rp->fp == NULL && rp->iop == NULL) {
426 if (rp->flag & RED_EOF)
427 /*
428 * encountered EOF on file or pipe -- must be cleared
429 * by explicit close() before reading more
430 */
431 return rp;
432 mode = NULL;
433 errno = 0;
434 switch (tree->type) {
435 case Node_redirect_output:
436 mode = "w";
437 if ((rp->flag & RED_USED) != 0)
438 mode = "a";
439 break;
440 case Node_redirect_append:
441 mode = "a";
442 break;
443 case Node_redirect_pipe:
444 /* synchronize output before new pipe */
445 (void) flush_io();
446
447 if ((rp->fp = popen(str, "w")) == NULL)
448 fatal("can't open pipe (\"%s\") for output (%s)",
449 str, strerror(errno));
450 rp->flag |= RED_NOBUF;
451 break;
452 case Node_redirect_pipein:
453 direction = "from";
454 if (gawk_popen(str, rp) == NULL)
455 fatal("can't open pipe (\"%s\") for input (%s)",
456 str, strerror(errno));
457 break;
458 case Node_redirect_input:
459 direction = "from";
460 rp->iop = iop_open(str, "r", NULL);
461 break;
462 default:
463 cant_happen();
464 }
465 if (mode != NULL) {
466 errno = 0;
467 fd = devopen(str, mode);
468 if (fd > INVALID_HANDLE) {
469 if (fd == fileno(stdin))
470 rp->fp = stdin;
471 else if (fd == fileno(stdout))
472 rp->fp = stdout;
473 else if (fd == fileno(stderr))
474 rp->fp = stderr;
475 else {
476 rp->fp = fdopen(fd, (char *) mode);
477 /* don't leak file descriptors */
478 if (rp->fp == NULL)
479 close(fd);
480 }
481 if (rp->fp != NULL && isatty(fd))
482 rp->flag |= RED_NOBUF;
483 /* Move rp to the head of the list. */
484 if (red_head != rp) {
485 if ((rp->prev->next = rp->next) != NULL)
486 rp->next->prev = rp->prev;
487 red_head->prev = rp;
488 rp->prev = NULL;
489 rp->next = red_head;
490 red_head = rp;
491 }
492 }
493 }
494 if (rp->fp == NULL && rp->iop == NULL) {
495 /* too many files open -- close one and try again */
496 if (errno == EMFILE || errno == ENFILE)
497 close_one();
498#if defined __MINGW32__ || defined HAVE_MMAP
499 /* this works for solaris 2.5, not sunos */
500 /* it is also needed for MINGW32 */
501 else if (errno == 0) /* HACK! */
502 close_one();
503#endif
504#ifdef VMS
505 /* Alpha/VMS V7.1's C RTL is returning this instead
506 of EMFILE (haven't tried other post-V6.2 systems) */
507#define SS$_EXQUOTA 0x001C
508 else if (errno == EIO && vaxc$errno == SS$_EXQUOTA)
509 close_one();
510#endif
511 else {
512 /*
513 * Some other reason for failure.
514 *
515 * On redirection of input from a file,
516 * just return an error, so e.g. getline
517 * can return -1. For output to file,
518 * complain. The shell will complain on
519 * a bad command to a pipe.
520 */
521 if (errflg != NULL)
522 *errflg = errno;
523 if (tree->type == Node_redirect_output
524 || tree->type == Node_redirect_append)
525 fatal("can't redirect %s `%s' (%s)",
526 direction, str, strerror(errno));
527 else {
528 free_temp(tmp);
529 return NULL;
530 }
531 }
532 }
533 }
534 free_temp(tmp);
535 return rp;
536}
537
538/* getredirect --- find the struct redirect for this file or pipe */
539
540struct redirect *
541getredirect(str, len)
542char *str;
543int len;
544{
545 struct redirect *rp;
546
547 for (rp = red_head; rp != NULL; rp = rp->next)
548 if (strlen(rp->value) == len && STREQN(rp->value, str, len))
549 return rp;
550
551 return NULL;
552}
553
554/* close_one --- temporarily close an open file to re-use the fd */
555
556static void
557close_one()
558{
559 register struct redirect *rp;
560 register struct redirect *rplast = NULL;
561
562 /* go to end of list first, to pick up least recently used entry */
563 for (rp = red_head; rp != NULL; rp = rp->next)
564 rplast = rp;
565 /* now work back up through the list */
566 for (rp = rplast; rp != NULL; rp = rp->prev)
567 if (rp->fp != NULL && (rp->flag & RED_FILE) != 0) {
568 rp->flag |= RED_USED;
569 errno = 0;
570 if (/* do_lint && */ fclose(rp->fp) != 0)
571 warning("close of \"%s\" failed (%s).",
572 rp->value, strerror(errno));
573 rp->fp = NULL;
574 break;
575 }
576 if (rp == NULL)
577 /* surely this is the only reason ??? */
578 fatal("too many pipes or input files open");
579}
580
581/* do_close --- completely close an open file or pipe */
582
583NODE *
584do_close(tree)
585NODE *tree;
586{
587 NODE *tmp;
588 register struct redirect *rp;
589
590 tmp = force_string(tree_eval(tree->subnode));
591
592 for (rp = red_head; rp != NULL; rp = rp->next) {
593 if (strlen(rp->value) == tmp->stlen
594 && STREQN(rp->value, tmp->stptr, tmp->stlen))
595 break;
596 }
597
598 if (rp == NULL) { /* no match */
599 /* icky special case: close(FILENAME) called. */
600 if (tree->subnode == FILENAME_node
601 || (tmp->stlen == FILENAME_node->var_value->stlen
602 && STREQN(tmp->stptr, FILENAME_node->var_value->stptr, tmp->stlen))) {
603 (void) nextfile(TRUE);
604 } else if (do_lint)
605 warning("close: `%.*s' is not an open file or pipe",
606 tmp->stlen, tmp->stptr);
607
608 free_temp(tmp);
609 return tmp_number((AWKNUM) 0.0);
610 }
611 free_temp(tmp);
612 fflush(stdout); /* synchronize regular output */
613 tmp = tmp_number((AWKNUM) close_redir(rp, FALSE));
614 rp = NULL;
615 return tmp;
616}
617
618/* close_redir --- close an open file or pipe */
619
620static int
621close_redir(rp, exitwarn)
622register struct redirect *rp;
623int exitwarn;
624{
625 int status = 0;
626 char *what;
627
628 if (rp == NULL)
629 return 0;
630 if (rp->fp == stdout || rp->fp == stderr)
631 return 0;
632 errno = 0;
633 if ((rp->flag & (RED_PIPE|RED_WRITE)) == (RED_PIPE|RED_WRITE))
634 status = pclose(rp->fp);
635 else if (rp->fp != NULL)
636 status = fclose(rp->fp);
637 else if (rp->iop != NULL) {
638 if ((rp->flag & RED_PIPE) != 0)
639 status = gawk_pclose(rp);
640 else {
641 status = iop_close(rp->iop);
642 rp->iop = NULL;
643 }
644 }
645
646 what = ((rp->flag & RED_PIPE) != 0) ? "pipe" : "file";
647
648 /* SVR4 awk checks and warns about status of close */
649 if (status != 0) {
650 char *s = strerror(errno);
651
652 /*
653 * Too many people have complained about this.
654 * As of 2.15.6, it is now under lint control.
655 */
656 if (do_lint)
657 warning("failure status (%d) on %s close of \"%s\" (%s)",
658 status, what, rp->value, s);
659
660 if (! do_traditional) {
661 /* set ERRNO too so that program can get at it */
662 unref(ERRNO_node->var_value);
663 ERRNO_node->var_value = make_string(s, strlen(s));
664 }
665 }
666
667 if (exitwarn)
668 warning("no explicit close of %s `%s' provided",
669 what, rp->value);
670
671 if (rp->next != NULL)
672 rp->next->prev = rp->prev;
673 if (rp->prev != NULL)
674 rp->prev->next = rp->next;
675 else
676 red_head = rp->next;
677 free(rp->value);
678 free((char *) rp);
679 return status;
680}
681
682/* flush_io --- flush all open output files */
683
684int
685flush_io()
686{
687 register struct redirect *rp;
688 int status = 0;
689
690 errno = 0;
691 if (fflush(stdout)) {
692 warning("error writing standard output (%s)", strerror(errno));
693 status++;
694 }
695 if (fflush(stderr)) {
696 warning("error writing standard error (%s)", strerror(errno));
697 status++;
698 }
699 for (rp = red_head; rp != NULL; rp = rp->next)
700 /* flush both files and pipes, what the heck */
701 if ((rp->flag & RED_WRITE) && rp->fp != NULL) {
702 if (fflush(rp->fp)) {
703 warning("%s flush of \"%s\" failed (%s).",
704 (rp->flag & RED_PIPE) ? "pipe" :
705 "file", rp->value, strerror(errno));
706 status++;
707 }
708 }
709 return status;
710}
711
712/* close_io --- close all open files, called when exiting */
713
714int
715close_io()
716{
717 register struct redirect *rp;
718 register struct redirect *next;
719 int status = 0;
720
721 errno = 0;
722 for (rp = red_head; rp != NULL; rp = next) {
723 next = rp->next;
724 /*
725 * close_redir() will print a message if needed
726 * if do_lint, warn about lack of explicit close
727 */
728 if (close_redir(rp, do_lint))
729 status++;
730 rp = NULL;
731 }
732 /*
733 * Some of the non-Unix os's have problems doing an fclose
734 * on stdout and stderr. Since we don't really need to close
735 * them, we just flush them, and do that across the board.
736 */
737 if (fflush(stdout)) {
738 warning("error writing standard output (%s)", strerror(errno));
739 status++;
740 }
741 if (fflush(stderr)) {
742 warning("error writing standard error (%s)", strerror(errno));
743 status++;
744 }
745 return status;
746}
747
748/* str2mode --- convert a string mode to an integer mode */
749
750static int
751str2mode(mode)
752const char *mode;
753{
754 int ret;
755
756 switch(mode[0]) {
757 case 'r':
758 ret = O_RDONLY;
759 break;
760
761 case 'w':
762 ret = O_WRONLY|O_CREAT|O_TRUNC;
763 break;
764
765 case 'a':
766 ret = O_WRONLY|O_APPEND|O_CREAT;
767 break;
768
769 default:
770 ret = 0; /* lint */
771 cant_happen();
772 }
773 return ret;
774}
775
776/* devopen --- handle /dev/std{in,out,err}, /dev/fd/N, regular files */
777
778/*
779 * This separate version is still needed for output, since file and pipe
780 * output is done with stdio. iop_open() handles input with IOBUFs of
781 * more "special" files. Those files are not handled here since it makes
782 * no sense to use them for output.
783 */
784
785int
786devopen(name, mode)
787const char *name, *mode;
788{
789 int openfd;
790 const char *cp;
791 char *ptr;
792 int flag = 0;
793 struct stat buf;
794 extern double strtod();
795
796 flag = str2mode(mode);
797
798 if (STREQ(name, "-"))
799 openfd = fileno(stdin);
800 else
801 openfd = INVALID_HANDLE;
802
803 if (do_traditional)
804 goto strictopen;
805
806 if ((openfd = os_devopen(name, flag)) >= 0)
807 return openfd;
808
809 if (STREQN(name, "/dev/", 5) && stat((char *) name, &buf) == -1) {
810 cp = name + 5;
811
812 if (STREQ(cp, "stdin") && (flag & O_ACCMODE) == O_RDONLY)
813 openfd = fileno(stdin);
814 else if (STREQ(cp, "stdout") && (flag & O_ACCMODE) == O_WRONLY)
815 openfd = fileno(stdout);
816 else if (STREQ(cp, "stderr") && (flag & O_ACCMODE) == O_WRONLY)
817 openfd = fileno(stderr);
818 else if (STREQN(cp, "fd/", 3)) {
819 cp += 3;
820 openfd = (int) strtod(cp, &ptr);
821 if (openfd <= INVALID_HANDLE || ptr == cp)
822 openfd = INVALID_HANDLE;
823 }
824 }
825
826strictopen:
827 if (openfd == INVALID_HANDLE)
828 openfd = open(name, flag, 0666);
829 if (openfd != INVALID_HANDLE && fstat(openfd, &buf) > 0)
830 if (S_ISDIR(buf.st_mode))
831 fatal("file `%s' is a directory", name);
832 return openfd;
833}
834
835
836/* spec_setup --- setup an IOBUF for a special internal file */
837
838static void
839spec_setup(iop, len, allocate)
840IOBUF *iop;
841int len;
842int allocate;
843{
844 char *cp;
845
846 if (allocate) {
847 emalloc(cp, char *, len+2, "spec_setup");
848 iop->buf = cp;
849 } else {
850 len = strlen(iop->buf);
851 iop->buf[len++] = '\n'; /* get_a_record clobbered it */
852 iop->buf[len] = '\0'; /* just in case */
853 }
854 iop->off = iop->buf;
855 iop->cnt = 0;
856 iop->secsiz = 0;
857 iop->size = len;
858 iop->end = iop->buf + len;
859 iop->fd = -1;
860 iop->flag = IOP_IS_INTERNAL;
861 iop->getrec = get_a_record;
862}
863
864/* specfdopen --- open an fd special file */
865
866static int
867specfdopen(iop, name, mode)
868IOBUF *iop;
869const char *name, *mode;
870{
871 int fd;
872 IOBUF *tp;
873
874 fd = devopen(name, mode);
875 if (fd == INVALID_HANDLE)
876 return INVALID_HANDLE;
877 tp = iop_alloc(fd, name, NULL);
878 if (tp == NULL) {
879 /* don't leak fd's */
880 close(fd);
881 return INVALID_HANDLE;
882 }
883 *iop = *tp;
884 iop->flag |= IOP_NO_FREE;
885 free(tp);
886 return 0;
887}
888
889#ifdef GETPGRP_VOID
890#define getpgrp_arg() /* nothing */
891#else
892#define getpgrp_arg() getpid()
893#endif
894
895/* pidopen --- "open" /dev/pid, /dev/ppid, and /dev/pgrpid */
896
897static int
898pidopen(iop, name, mode)
899IOBUF *iop;
900const char *name, *mode;
901{
902 char tbuf[BUFSIZ];
903 int i;
904
905 if (name[6] == 'g')
906 sprintf(tbuf, "%d\n", (int) getpgrp(getpgrp_arg()));
907 else if (name[6] == 'i')
908 sprintf(tbuf, "%d\n", (int) getpid());
909 else
910 sprintf(tbuf, "%d\n", (int) getppid());
911 i = strlen(tbuf);
912 spec_setup(iop, i, TRUE);
913 strcpy(iop->buf, tbuf);
914 return 0;
915}
916
917/* useropen --- "open" /dev/user */
918
919/*
920 * /dev/user creates a record as follows:
921 * $1 = getuid()
922 * $2 = geteuid()
923 * $3 = getgid()
924 * $4 = getegid()
925 * If multiple groups are supported, then $5 through $NF are the
926 * supplementary group set.
927 */
928
929static int
930useropen(iop, name, mode)
931IOBUF *iop;
932const char *name, *mode;
933{
934 char tbuf[BUFSIZ], *cp;
935 int i;
936#if defined(NGROUPS_MAX) && NGROUPS_MAX > 0
937 GETGROUPS_T groupset[NGROUPS_MAX];
938 int ngroups;
939#endif
940
941 sprintf(tbuf, "%d %d %d %d", (int) getuid(), (int) geteuid(), (int) getgid(), (int) getegid());
942
943 cp = tbuf + strlen(tbuf);
944#if defined(NGROUPS_MAX) && NGROUPS_MAX > 0
945 ngroups = getgroups(NGROUPS_MAX, groupset);
946 if (ngroups == -1)
947 fatal("could not find groups: %s", strerror(errno));
948
949 for (i = 0; i < ngroups; i++) {
950 *cp++ = ' ';
951 sprintf(cp, "%d", (int) groupset[i]);
952 cp += strlen(cp);
953 }
954#endif
955 *cp++ = '\n';
956 *cp++ = '\0';
957
958 i = strlen(tbuf);
959 spec_setup(iop, i, TRUE);
960 strcpy(iop->buf, tbuf);
961 return 0;
962}
963
964/* iop_open --- handle special and regular files for input */
965
966static IOBUF *
967iop_open(name, mode, iop)
968const char *name, *mode;
969IOBUF *iop;
970{
971 int openfd = INVALID_HANDLE;
972 int flag = 0;
973 struct stat buf;
974 static struct internal {
975 const char *name;
976 int compare;
977 int (*fp) P((IOBUF *, const char *, const char *));
978 IOBUF iob;
979 } table[] = {
980 { "/dev/fd/", 8, specfdopen },
981 { "/dev/stdin", 10, specfdopen },
982 { "/dev/stdout", 11, specfdopen },
983 { "/dev/stderr", 11, specfdopen },
984 { "/dev/pid", 8, pidopen },
985 { "/dev/ppid", 9, pidopen },
986 { "/dev/pgrpid", 11, pidopen },
987 { "/dev/user", 9, useropen },
988 };
989 int devcount = sizeof(table) / sizeof(table[0]);
990
991 flag = str2mode(mode);
992
993 /*
994 * FIXME: remove the stat call, and always process these files
995 * internally.
996 */
997 if (STREQ(name, "-"))
998 openfd = fileno(stdin);
999 else if (do_traditional)
1000 goto strictopen;
1001 else if (STREQN(name, "/dev/", 5) && stat((char *) name, &buf) == -1) {
1002 int i;
1003
1004 for (i = 0; i < devcount; i++) {
1005 if (STREQN(name, table[i].name, table[i].compare)) {
1006 iop = & table[i].iob;
1007
1008 if (iop->buf != NULL) {
1009 spec_setup(iop, 0, FALSE);
1010 return iop;
1011 } else if ((*table[i].fp)(iop, name, mode) == 0)
1012 return iop;
1013 else {
1014 warning("could not open %s, mode `%s'",
1015 name, mode);
1016 return NULL;
1017 }
1018 }
1019 }
1020 }
1021
1022strictopen:
1023 if (openfd == INVALID_HANDLE)
1024 openfd = open(name, flag, 0666);
1025 if (openfd != INVALID_HANDLE && fstat(openfd, &buf) > 0)
1026 if (S_ISDIR(buf.st_mode))
1027 fatal("file `%s' is a directory", name);
1028 return iop_alloc(openfd, name, iop);
1029}
1030
1031#ifndef PIPES_SIMULATED /* real pipes */
1032
1033/* wait_any --- wait for a child process, close associated pipe */
1034
1035static int
1036wait_any(interesting)
1037int interesting; /* pid of interest, if any */
1038{
1039 RETSIGTYPE (*hstat)(), (*istat)(), (*qstat)();
1040 int pid;
1041 int status = 0;
1042 struct redirect *redp;
1043 extern int errno;
1044
1045 hstat = signal(SIGHUP, SIG_IGN);
1046 istat = signal(SIGINT, SIG_IGN);
1047 qstat = signal(SIGQUIT, SIG_IGN);
1048 for (;;) {
1049#ifdef HAVE_SYS_WAIT_H /* Posix compatible sys/wait.h */
1050 pid = wait(&status);
1051#else
1052 pid = wait((union wait *)&status);
1053#endif /* NeXT */
1054 if (interesting && pid == interesting) {
1055 break;
1056 } else if (pid != -1) {
1057 for (redp = red_head; redp != NULL; redp = redp->next)
1058 if (pid == redp->pid) {
1059 redp->pid = -1;
1060 redp->status = status;
1061 break;
1062 }
1063 }
1064 if (pid == -1 && errno == ECHILD)
1065 break;
1066 }
1067 signal(SIGHUP, hstat);
1068 signal(SIGINT, istat);
1069 signal(SIGQUIT, qstat);
1070 return(status);
1071}
1072
1073/* gawk_popen --- open an IOBUF on a child process */
1074
1075static IOBUF *
1076gawk_popen(cmd, rp)
1077char *cmd;
1078struct redirect *rp;
1079{
1080 int p[2];
1081 register int pid;
1082
1083 /*
1084 * used to wait for any children to synchronize input and output,
1085 * but this could cause gawk to hang when it is started in a pipeline
1086 * and thus has a child process feeding it input (shell dependant)
1087 */
1088 /*(void) wait_any(0);*/ /* wait for outstanding processes */
1089
1090 if (pipe(p) < 0)
1091 fatal("cannot open pipe \"%s\" (%s)", cmd, strerror(errno));
1092 if ((pid = fork()) == 0) {
1093 if (close(1) == -1)
1094 fatal("close of stdout in child failed (%s)",
1095 strerror(errno));
1096 if (dup(p[1]) != 1)
1097 fatal("dup of pipe failed (%s)", strerror(errno));
1098 if (close(p[0]) == -1 || close(p[1]) == -1)
1099 fatal("close of pipe failed (%s)", strerror(errno));
1100 execl("/bin/sh", "sh", "-c", cmd, NULL);
1101 _exit(127);
1102 }
1103 if (pid == -1)
1104 fatal("cannot fork for \"%s\" (%s)", cmd, strerror(errno));
1105 rp->pid = pid;
1106 if (close(p[1]) == -1)
1107 fatal("close of pipe failed (%s)", strerror(errno));
1108 rp->iop = iop_alloc(p[0], cmd, NULL);
1109 if (rp->iop == NULL)
1110 (void) close(p[0]);
1111 return (rp->iop);
1112}
1113
1114/* gawk_pclose --- close an open child pipe */
1115
1116static int
1117gawk_pclose(rp)
1118struct redirect *rp;
1119{
1120 (void) iop_close(rp->iop);
1121 rp->iop = NULL;
1122
1123 /* process previously found, return stored status */
1124 if (rp->pid == -1)
1125 return (rp->status >> 8) & 0xFF;
1126 rp->status = wait_any(rp->pid);
1127 rp->pid = -1;
1128 return (rp->status >> 8) & 0xFF;
1129}
1130
1131#else /* PIPES_SIMULATED */
1132
1133/*
1134 * use temporary file rather than pipe
1135 * except if popen() provides real pipes too
1136 */
1137
1138#if defined(VMS) || defined(OS2) || defined (MSDOS) || defined(WIN32)
1139
1140/* gawk_popen --- open an IOBUF on a child process */
1141
1142static IOBUF *
1143gawk_popen(cmd, rp)
1144char *cmd;
1145struct redirect *rp;
1146{
1147 FILE *current;
1148
1149 if ((current = popen(cmd, "r")) == NULL)
1150 return NULL;
1151 rp->iop = iop_alloc(fileno(current), cmd, NULL);
1152 if (rp->iop == NULL) {
1153 (void) pclose(current);
1154 current = NULL;
1155 }
1156 rp->ifp = current;
1157 return (rp->iop);
1158}
1159
1160/* gawk_pclose --- close an open child pipe */
1161
1162static int
1163gawk_pclose(rp)
1164struct redirect *rp;
1165{
1166 int rval, aval, fd = rp->iop->fd;
1167
1168 rp->iop->fd = dup(fd); /* kludge to allow close() + pclose() */
1169 rval = iop_close(rp->iop);
1170 rp->iop = NULL;
1171 aval = pclose(rp->ifp);
1172 rp->ifp = NULL;
1173 return (rval < 0 ? rval : aval);
1174}
1175#else /* not (VMS || OS2 || MSDOS) */
1176
1177static struct pipeinfo {
1178 char *command;
1179 char *name;
1180} pipes[_NFILE];
1181
1182/* gawk_popen --- open an IOBUF on a child process */
1183
1184static IOBUF *
1185gawk_popen(cmd, rp)
1186char *cmd;
1187struct redirect *rp;
1188{
1189 extern char *strdup P((const char *));
1190 int current;
1191 char *name;
1192 static char cmdbuf[256];
1193
1194 /* get a name to use */
1195 if ((name = tempnam(".", "pip")) == NULL)
1196 return NULL;
1197 sprintf(cmdbuf, "%s > %s", cmd, name);
1198 system(cmdbuf);
1199 if ((current = open(name, O_RDONLY)) == INVALID_HANDLE)
1200 return NULL;
1201 pipes[current].name = name;
1202 pipes[current].command = strdup(cmd);
1203 rp->iop = iop_alloc(current, name, NULL);
1204 if (rp->iop == NULL)
1205 (void) close(current);
1206 return (rp->iop);
1207}
1208
1209/* gawk_pclose --- close an open child pipe */
1210
1211static int
1212gawk_pclose(rp)
1213struct redirect *rp;
1214{
1215 int cur = rp->iop->fd;
1216 int rval;
1217
1218 rval = iop_close(rp->iop);
1219 rp->iop = NULL;
1220
1221 /* check for an open file */
1222 if (pipes[cur].name == NULL)
1223 return -1;
1224 unlink(pipes[cur].name);
1225 free(pipes[cur].name);
1226 pipes[cur].name = NULL;
1227 free(pipes[cur].command);
1228 return rval;
1229}
1230#endif /* not (VMS || OS2 || MSDOS) */
1231
1232#endif /* PIPES_SIMULATED */
1233
1234/* do_getline --- read in a line, into var and with redirection, as needed */
1235
1236NODE *
1237do_getline(tree)
1238NODE *tree;
1239{
1240 struct redirect *rp = NULL;
1241 IOBUF *iop;
1242 int cnt = EOF;
1243 char *s = NULL;
1244 int errcode;
1245
1246 while (cnt == EOF) {
1247 if (tree->rnode == NULL) { /* no redirection */
1248 iop = nextfile(FALSE);
1249 if (iop == NULL) /* end of input */
1250 return tmp_number((AWKNUM) 0.0);
1251 } else {
1252 int redir_error = 0;
1253
1254 rp = redirect(tree->rnode, &redir_error);
1255 if (rp == NULL && redir_error) { /* failed redirect */
1256 if (! do_traditional) {
1257 s = strerror(redir_error);
1258
1259 unref(ERRNO_node->var_value);
1260 ERRNO_node->var_value =
1261 make_string(s, strlen(s));
1262 }
1263 return tmp_number((AWKNUM) -1.0);
1264 }
1265 iop = rp->iop;
1266 if (iop == NULL) /* end of input */
1267 return tmp_number((AWKNUM) 0.0);
1268 }
1269 errcode = 0;
1270 cnt = (*(iop->getrec))(&s, iop, RS->stptr[0], RS_regexp, &errcode);
1271 if (errcode != 0) {
1272 if (! do_traditional) {
1273 s = strerror(errcode);
1274
1275 unref(ERRNO_node->var_value);
1276 ERRNO_node->var_value = make_string(s, strlen(s));
1277 }
1278 return tmp_number((AWKNUM) -1.0);
1279 }
1280 if (cnt == EOF) {
1281 if (rp != NULL) {
1282 /*
1283 * Don't do iop_close() here if we are
1284 * reading from a pipe; otherwise
1285 * gawk_pclose will not be called.
1286 */
1287 if ((rp->flag & RED_PIPE) == 0) {
1288 (void) iop_close(iop);
1289 rp->iop = NULL;
1290 }
1291 rp->flag |= RED_EOF; /* sticky EOF */
1292 return tmp_number((AWKNUM) 0.0);
1293 } else
1294 continue; /* try another file */
1295 }
1296 if (rp == NULL) {
1297 NR++;
1298 FNR++;
1299 }
1300 if (tree->lnode == NULL) /* no optional var. */
1301 set_record(s, cnt, TRUE);
1302 else { /* assignment to variable */
1303 Func_ptr after_assign = NULL;
1304 NODE **lhs;
1305
1306 lhs = get_lhs(tree->lnode, &after_assign);
1307 unref(*lhs);
1308 *lhs = make_string(s, cnt);
1309 (*lhs)->flags |= MAYBE_NUM;
1310 /* we may have to regenerate $0 here! */
1311 if (after_assign != NULL)
1312 (*after_assign)();
1313 }
1314 }
1315 return tmp_number((AWKNUM) 1.0);
1316}
1317
1318/* pathopen --- pathopen with default file extension handling */
1319
1320int
1321pathopen(file)
1322const char *file;
1323{
1324 int fd = do_pathopen(file);
1325
1326#ifdef DEFAULT_FILETYPE
1327 if (! do_traditional && fd <= INVALID_HANDLE) {
1328 char *file_awk;
1329 int save = errno;
1330#ifdef VMS
1331 int vms_save = vaxc$errno;
1332#endif
1333
1334 /* append ".awk" and try again */
1335 emalloc(file_awk, char *, strlen(file) +
1336 sizeof(DEFAULT_FILETYPE) + 1, "pathopen");
1337 sprintf(file_awk, "%s%s", file, DEFAULT_FILETYPE);
1338 fd = do_pathopen(file_awk);
1339 free(file_awk);
1340 if (fd <= INVALID_HANDLE) {
1341 errno = save;
1342#ifdef VMS
1343 vaxc$errno = vms_save;
1344#endif
1345 }
1346 }
1347#endif /*DEFAULT_FILETYPE*/
1348
1349 return fd;
1350}
1351
1352/* do_pathopen --- search $AWKPATH for source file */
1353
1354static int
1355do_pathopen(file)
1356const char *file;
1357{
1358 static const char *savepath = NULL;
1359 static int first = TRUE;
1360 const char *awkpath;
1361 char *cp, trypath[BUFSIZ];
1362 int fd;
1363
1364 if (STREQ(file, "-"))
1365 return (0);
1366
1367 if (do_traditional)
1368 return (devopen(file, "r"));
1369
1370 if (first) {
1371 first = FALSE;
1372 if ((awkpath = getenv("AWKPATH")) != NULL && *awkpath)
1373 savepath = awkpath; /* used for restarting */
1374 else
1375 savepath = defpath;
1376 }
1377 awkpath = savepath;
1378
1379 /* some kind of path name, no search */
1380 if (ispath(file))
1381 return (devopen(file, "r"));
1382
1383 do {
1384 trypath[0] = '\0';
1385 /* this should take into account limits on size of trypath */
1386 for (cp = trypath; *awkpath && *awkpath != envsep; )
1387 *cp++ = *awkpath++;
1388
1389 if (cp != trypath) { /* nun-null element in path */
1390 /* add directory punctuation only if needed */
1391 if (! isdirpunct(*(cp-1)))
1392 *cp++ = '/';
1393 /* append filename */
1394 strcpy(cp, file);
1395 } else
1396 strcpy(trypath, file);
1397 if ((fd = devopen(trypath, "r")) > INVALID_HANDLE)
1398 return (fd);
1399
1400 /* no luck, keep going */
1401 if(*awkpath == envsep && awkpath[1] != '\0')
1402 awkpath++; /* skip colon */
1403 } while (*awkpath != '\0');
1404 /*
1405 * You might have one of the awk paths defined, WITHOUT the current
1406 * working directory in it. Therefore try to open the file in the
1407 * current directory.
1408 */
1409 return (devopen(file, "r"));
1410}
1411
1412#ifdef TEST
1413int bufsize = 8192;
1414
1415void
1416fatal(s)
1417char *s;
1418{
1419 printf("%s\n", s);
1420 exit(1);
1421}
1422#endif
1423
1424/* iop_alloc --- allocate an IOBUF structure for an open fd */
1425
1426static IOBUF *
1427iop_alloc(fd, name, iop)
1428int fd;
1429const char *name;
1430IOBUF *iop;
1431{
1432 struct stat sbuf;
1433
1434 if (fd == INVALID_HANDLE)
1435 return NULL;
1436 if (iop == NULL)
1437 emalloc(iop, IOBUF *, sizeof(IOBUF), "iop_alloc");
1438 iop->flag = 0;
1439 if (isatty(fd))
1440 iop->flag |= IOP_IS_TTY;
1441 iop->size = optimal_bufsize(fd, & sbuf);
1442 if (do_lint && S_ISREG(sbuf.st_mode) && sbuf.st_size == 0)
1443 warning("data file `%s' is empty", name);
1444 iop->secsiz = -2;
1445 errno = 0;
1446 iop->fd = fd;
1447 iop->off = iop->buf = NULL;
1448 iop->cnt = 0;
1449 iop->name = name;
1450 iop->getrec = get_a_record;
1451#ifdef HAVE_MMAP
1452 /* Use mmap only for regular files with positive sizes.
1453 The size must fit into size_t, so that mmap works correctly.
1454 Also, it must fit into int, so that iop->cnt won't overflow. */
1455 if (S_ISREG(sbuf.st_mode) && sbuf.st_size > 0
1456 && sbuf.st_size == (size_t) sbuf.st_size
1457 && sbuf.st_size == (int) sbuf.st_size) {
1458 register char *cp;
1459
1460 iop->buf = iop->off = mmap((caddr_t) 0, sbuf.st_size,
1461 PROT_READ|PROT_WRITE, MAP_PRIVATE,
1462 fd, 0L);
1463 /* cast is for buggy compilers (e.g. DEC OSF/1) */
1464 if (iop->buf == (caddr_t)MAP_FAILED) {
1465 iop->buf = iop->off = NULL;
1466 goto out;
1467 }
1468
1469 iop->flag |= IOP_MMAPPED;
1470 iop->size = sbuf.st_size;
1471 iop->secsiz = 0;
1472 iop->end = iop->buf + iop->size;
1473 iop->cnt = sbuf.st_size;
1474 iop->getrec = mmap_get_record;
1475 (void) close(fd);
1476 iop->fd = INVALID_HANDLE;
1477
1478#if defined(HAVE_MADVISE) && defined(MADV_SEQUENTIAL)
1479 madvise(iop->buf, iop->size, MADV_SEQUENTIAL);
1480#endif
1481 /*
1482 * The following is a really gross hack.
1483 * We want to ensure that we have a copy of the input
1484 * data that won't go away, on the off chance that someone
1485 * will truncate the data file we've just mmap'ed.
1486 * So, we go through and touch each page, forcing the
1487 * system to give us a private copy. A page size of 512
1488 * guarantees this will work, even on the least common
1489 * denominator system (like, oh say, a VAX).
1490 */
1491 for (cp = iop->buf; cp < iop->end; cp += 512)
1492 *cp = *cp;
1493 }
1494out:
1495#endif /* HAVE_MMAP */
1496 return iop;
1497}
1498
1499/* These macros used by both record reading routines */
1500#define set_RT_to_null() \
1501 (void)(! do_traditional && (unref(RT_node->var_value), \
1502 RT_node->var_value = Nnull_string))
1503
1504#define set_RT(str, len) \
1505 (void)(! do_traditional && (unref(RT_node->var_value), \
1506 RT_node->var_value = make_string(str, len)))
1507
1508/*
1509 * get_a_record:
1510 * Get the next record. Uses a "split buffer" where the latter part is
1511 * the normal read buffer and the head part is an "overflow" area that is used
1512 * when a record spans the end of the normal buffer, in which case the first
1513 * part of the record is copied into the overflow area just before the
1514 * normal buffer. Thus, the eventual full record can be returned as a
1515 * contiguous area of memory with a minimum of copying. The overflow area
1516 * is expanded as needed, so that records are unlimited in length.
1517 * We also mark both the end of the buffer and the end of the read() with
1518 * a sentinel character (the current record separator) so that the inside
1519 * loop can run as a single test.
1520 *
1521 * Note that since we know or can compute the end of the read and the end
1522 * of the buffer, the sentinel character does not get in the way of regexp
1523 * based searching, since we simply search up to that character, but not
1524 * including it.
1525 */
1526
1527static int
1528get_a_record(out, iop, grRS, RSre, errcode)
1529char **out; /* pointer to pointer to data */
1530IOBUF *iop; /* input IOP */
1531register int grRS; /* first char in RS->stptr */
1532Regexp *RSre; /* regexp for RS */
1533int *errcode; /* pointer to error variable */
1534{
1535 register char *bp = iop->off;
1536 char *bufend;
1537 char *start = iop->off; /* beginning of record */
1538 int rs;
1539 static Regexp *RS_null_re = NULL;
1540 Regexp *rsre = NULL;
1541 int continuing = FALSE, continued = FALSE; /* used for re matching */
1542 int onecase;
1543
1544 /* first time through */
1545 if (RS_null_re == NULL) {
1546 RS_null_re = make_regexp("\n\n+", 3, TRUE, TRUE);
1547 if (RS_null_re == NULL)
1548 fatal("internal error: file `%s', line %d\n",
1549 __FILE__, __LINE__);
1550 }
1551
1552 if (iop->cnt == EOF) { /* previous read hit EOF */
1553 *out = NULL;
1554 set_RT_to_null();
1555 return EOF;
1556 }
1557
1558 if (RS_is_null) /* special case: RS == "" */
1559 rs = '\n';
1560 else
1561 rs = (char) grRS;
1562
1563 onecase = (IGNORECASE && isalpha(rs));
1564 if (onecase)
1565 rs = casetable[rs];
1566
1567 /* set up sentinel */
1568 if (iop->buf) {
1569 bufend = iop->buf + iop->size + iop->secsiz;
1570 *bufend = rs; /* add sentinel to buffer */
1571 } else
1572 bufend = NULL;
1573
1574 for (;;) { /* break on end of record, read error or EOF */
1575/* buffer mgmt, chunk #1 */
1576 /*
1577 * Following code is entered on the first call of this routine
1578 * for a new iop, or when we scan to the end of the buffer.
1579 * In the latter case, we copy the current partial record to
1580 * the space preceding the normal read buffer. If necessary,
1581 * we expand this space. This is done so that we can return
1582 * the record as a contiguous area of memory.
1583 */
1584 if ((iop->flag & IOP_IS_INTERNAL) == 0 && bp >= bufend) {
1585 char *oldbuf = NULL;
1586 char *oldsplit = iop->buf + iop->secsiz;
1587 long len; /* record length so far */
1588
1589 len = bp - start;
1590 if (len > iop->secsiz) {
1591 /* expand secondary buffer */
1592 if (iop->secsiz == -2)
1593 iop->secsiz = 256;
1594 while (len > iop->secsiz)
1595 iop->secsiz *= 2;
1596 oldbuf = iop->buf;
1597 emalloc(iop->buf, char *,
1598 iop->size+iop->secsiz+2, "get_a_record");
1599 bufend = iop->buf + iop->size + iop->secsiz;
1600 *bufend = rs;
1601 }
1602 if (len > 0) {
1603 char *newsplit = iop->buf + iop->secsiz;
1604
1605 if (start < oldsplit) {
1606 memcpy(newsplit - len, start,
1607 oldsplit - start);
1608 memcpy(newsplit - (bp - oldsplit),
1609 oldsplit, bp - oldsplit);
1610 } else
1611 memcpy(newsplit - len, start, len);
1612 }
1613 bp = iop->end = iop->off = iop->buf + iop->secsiz;
1614 start = bp - len;
1615 if (oldbuf != NULL) {
1616 free(oldbuf);
1617 oldbuf = NULL;
1618 }
1619 }
1620/* buffer mgmt, chunk #2 */
1621 /*
1622 * Following code is entered whenever we have no more data to
1623 * scan. In most cases this will read into the beginning of
1624 * the main buffer, but in some cases (terminal, pipe etc.)
1625 * we may be doing smallish reads into more advanced positions.
1626 */
1627 if (bp >= iop->end) {
1628 if ((iop->flag & IOP_IS_INTERNAL) != 0) {
1629 iop->cnt = EOF;
1630 break;
1631 }
1632 iop->cnt = read(iop->fd, iop->end, bufend - iop->end);
1633 if (iop->cnt == -1) {
1634 if (! do_traditional && errcode != NULL) {
1635 *errcode = errno;
1636 iop->cnt = EOF;
1637 break;
1638 } else
1639 fatal("error reading input file `%s': %s",
1640 iop->name, strerror(errno));
1641 } else if (iop->cnt == 0) {
1642 /*
1643 * hit EOF before matching RS, so end
1644 * the record and set RT to ""
1645 */
1646 iop->cnt = EOF;
1647 /* see comments below about this test */
1648 if (! continuing) {
1649 set_RT_to_null();
1650 break;
1651 }
1652 }
1653 if (iop->cnt != EOF) {
1654 iop->end += iop->cnt;
1655 *iop->end = rs; /* reset the sentinel */
1656 }
1657 }
1658/* buffers are now setup and filled with data */
1659/* search for RS, #1, regexp based, or RS = "" */
1660 /*
1661 * Attempt to simplify the code a bit. The case where
1662 * RS = "" can also be described by a regexp, RS = "\n\n+".
1663 * The buffer managment and searching code can thus now
1664 * use a common case (the one for regexps) both when RS is
1665 * a regexp, and when RS = "". This particularly benefits
1666 * us for keeping track of how many newlines were matched
1667 * in order to set RT.
1668 */
1669 if (! do_traditional && RSre != NULL) /* regexp */
1670 rsre = RSre;
1671 else if (RS_is_null) /* RS = "" */
1672 rsre = RS_null_re;
1673 else
1674 rsre = NULL;
1675
1676 /*
1677 * Look for regexp match of RS. Non-match conditions are:
1678 * 1. No match at all
1679 * 2. Match of a null string
1680 * 3. Match ends at exact end of buffer
1681 * Number 3 is subtle; we have to add more to the buffer
1682 * in case the match would have extended further into the
1683 * file, since regexp match by definition always matches the
1684 * longest possible match.
1685 *
1686 * It is even more subtle than you might think. Suppose
1687 * the re matches at exactly the end of file. We don't know
1688 * that until we try to add more to the buffer. Thus, we
1689 * set a flag to indicate, that if eof really does happen,
1690 * don't break early.
1691 */
1692 continuing = FALSE;
1693 if (rsre != NULL) {
1694 again:
1695 /* cases 1 and 2 are simple, just keep going */
1696 if (research(rsre, start, 0, iop->end - start, TRUE) == -1
1697 || RESTART(rsre, start) == REEND(rsre, start)) {
1698 /*
1699 * Leading newlines at the beginning of the file
1700 * should be ignored. Whew!
1701 */
1702 if (RS_is_null && *start == '\n') {
1703 /*
1704 * have to catch the case of a
1705 * single newline at the front of
1706 * the record, which the regex
1707 * doesn't. gurr.
1708 */
1709 while (*start == '\n' && start < iop->end)
1710 start++;
1711 goto again;
1712 }
1713 bp = iop->end;
1714 continue;
1715 }
1716 /* case 3, regex match at exact end */
1717 if (start + REEND(rsre, start) >= iop->end) {
1718 if (iop->cnt != EOF) {
1719 bp = iop->end;
1720 continuing = continued = TRUE;
1721 continue;
1722 }
1723 }
1724 /* got a match! */
1725 /*
1726 * Leading newlines at the beginning of the file
1727 * should be ignored. Whew!
1728 *
1729 * Is this code ever executed?
1730 */
1731 if (RS_is_null && RESTART(rsre, start) == 0) {
1732 start += REEND(rsre, start);
1733 goto again;
1734 }
1735 bp = start + RESTART(rsre, start);
1736 set_RT(bp, REEND(rsre, start) - RESTART(rsre, start));
1737 *bp = '\0';
1738 iop->off = start + REEND(rsre, start);
1739 break;
1740 }
1741/* search for RS, #2, RS = <single char> */
1742 if (onecase) {
1743 while (casetable[(int) *bp++] != rs)
1744 continue;
1745 } else {
1746 while (*bp++ != rs)
1747 continue;
1748 }
1749 set_RT(bp - 1, 1);
1750
1751 if (bp <= iop->end)
1752 break;
1753 else
1754 bp--;
1755
1756 if ((iop->flag & IOP_IS_INTERNAL) != 0)
1757 iop->cnt = bp - start;
1758 }
1759 if (iop->cnt == EOF
1760 && (((iop->flag & IOP_IS_INTERNAL) != 0)
1761 || (start == bp && ! continued))) {
1762 *out = NULL;
1763 set_RT_to_null();
1764 return EOF;
1765 }
1766
1767 if (do_traditional || rsre == NULL) {
1768 char *bstart;
1769
1770 bstart = iop->off = bp;
1771 bp--;
1772 if (onecase ? casetable[(int) *bp] != rs : *bp != rs) {
1773 bp++;
1774 bstart = bp;
1775 }
1776 *bp = '\0';
1777 } else if (RS_is_null && iop->cnt == EOF) {
1778 /*
1779 * special case, delete trailing newlines,
1780 * should never be more than one.
1781 */
1782 while (bp[-1] == '\n')
1783 bp--;
1784 *bp = '\0';
1785 }
1786
1787 *out = start;
1788 return bp - start;
1789}
1790
1791#ifdef TEST
1792int
1793main(argc, argv)
1794int argc;
1795char *argv[];
1796{
1797 IOBUF *iop;
1798 char *out;
1799 int cnt;
1800 char rs[2];
1801
1802 rs[0] = '\0';
1803 if (argc > 1)
1804 bufsize = atoi(argv[1]);
1805 if (argc > 2)
1806 rs[0] = *argv[2];
1807 iop = iop_alloc(0, "stdin", NULL);
1808 while ((cnt = get_a_record(&out, iop, rs[0], NULL, NULL)) > 0) {
1809 fwrite(out, 1, cnt, stdout);
1810 fwrite(rs, 1, 1, stdout);
1811 }
1812 return 0;
1813}
1814#endif
1815
1816#ifdef HAVE_MMAP
1817/* mmap_get_record --- pull a record out of a memory-mapped file */
1818
1819static int
1820mmap_get_record(out, iop, grRS, RSre, errcode)
1821char **out; /* pointer to pointer to data */
1822IOBUF *iop; /* input IOP */
1823register int grRS; /* first char in RS->stptr */
1824Regexp *RSre; /* regexp for RS */
1825int *errcode; /* pointer to error variable */
1826{
1827 register char *bp = iop->off;
1828 char *start = iop->off; /* beginning of record */
1829 int rs;
1830 static Regexp *RS_null_re = NULL;
1831 Regexp *rsre = NULL;
1832 int onecase;
1833 register char *end = iop->end;
1834 int cnt;
1835
1836 /* first time through */
1837 if (RS_null_re == NULL) {
1838 RS_null_re = make_regexp("\n\n+", 3, TRUE, TRUE);
1839 if (RS_null_re == NULL)
1840 fatal("internal error: file `%s', line %d\n",
1841 __FILE__, __LINE__);
1842 }
1843
1844 if (iop->off >= iop->end) { /* previous record was last */
1845 *out = NULL;
1846 set_RT_to_null();
1847 iop->cnt = EOF; /* tested by higher level code */
1848 return EOF;
1849 }
1850
1851 if (RS_is_null) /* special case: RS == "" */
1852 rs = '\n';
1853 else
1854 rs = (char) grRS;
1855
1856 onecase = (IGNORECASE && isalpha(rs));
1857 if (onecase)
1858 rs = casetable[rs];
1859
1860 /* if RS = "", skip leading newlines at the front of the file */
1861 if (RS_is_null && iop->off == iop->buf) {
1862 for (bp = iop->off; *bp == '\n'; bp++)
1863 continue;
1864
1865 if (bp != iop->off)
1866 iop->off = start = bp;
1867 }
1868
1869 /*
1870 * Regexp based searching. Either RS = "" or RS = <regex>
1871 * See comments in get_a_record.
1872 */
1873 if (! do_traditional && RSre != NULL) /* regexp */
1874 rsre = RSre;
1875 else if (RS_is_null) /* RS = "" */
1876 rsre = RS_null_re;
1877 else
1878 rsre = NULL;
1879
1880 /*
1881 * Look for regexp match of RS. Non-match conditions are:
1882 * 1. No match at all
1883 * 2. Match of a null string
1884 * 3. Match ends at exact end of buffer
1885 *
1886 * #1 means that the record ends the file
1887 * and there is no text that actually matched RS.
1888 *
1889 * #2: is probably like #1.
1890 *
1891 * #3 is simple; since we have the whole file mapped, it's
1892 * the last record in the file.
1893 */
1894 if (rsre != NULL) {
1895 if (research(rsre, start, 0, iop->end - start, TRUE) == -1
1896 || RESTART(rsre, start) == REEND(rsre, start)) {
1897 /* no matching text, we have the record */
1898 *out = start;
1899 iop->off = iop->end; /* all done with the record */
1900 set_RT_to_null();
1901 /* special case, don't allow trailing newlines */
1902 if (RS_is_null && *(iop->end - 1) == '\n')
1903 return iop->end - start - 1;
1904 else
1905 return iop->end - start;
1906
1907 }
1908 /* have a match */
1909 *out = start;
1910 bp = start + RESTART(rsre, start);
1911 set_RT(bp, REEND(rsre, start) - RESTART(rsre, start));
1912 *bp = '\0';
1913 iop->off = start + REEND(rsre, start);
1914 return bp - start;
1915 }
1916
1917 /*
1918 * RS = "?", i.e., one character based searching.
1919 *
1920 * Alas, we can't just plug the sentinel character in at
1921 * the end of the mmapp'ed file ( *(iop->end) = rs; ). This
1922 * works if we're lucky enough to have a file that does not
1923 * take up all of its last disk block. But if we end up with
1924 * file whose size is an even multiple of the disk block size,
1925 * assigning past the end of it delivers a SIGBUS. So, we have to
1926 * add the extra test in the while loop at the front that looks
1927 * for going past the end of the mapped object. Sigh.
1928 */
1929 /* search for RS, #2, RS = <single char> */
1930 if (onecase) {
1931 while (bp < end && casetable[*bp++] != rs)
1932 continue;
1933 } else {
1934 while (bp < end && *bp++ != rs)
1935 continue;
1936 }
1937 cnt = (bp - start) - 1;
1938 if (bp >= iop->end) {
1939 /* at end, may have actually seen rs, or may not */
1940 if (*(bp-1) == rs)
1941 set_RT(bp - 1, 1); /* real RS seen */
1942 else {
1943 cnt++;
1944 set_RT_to_null();
1945 }
1946 } else
1947 set_RT(bp - 1, 1);
1948
1949 iop->off = bp;
1950 *out = start;
1951 return cnt;
1952}
1953#endif /* HAVE_MMAP */
1954
1955/* set_RS --- update things as appropriate when RS is set */
1956
1957void
1958set_RS()
1959{
1960 static NODE *save_rs = NULL;
1961
1962 if (save_rs && cmp_nodes(RS_node->var_value, save_rs) == 0)
1963 return;
1964 unref(save_rs);
1965 save_rs = dupnode(RS_node->var_value);
1966 RS_is_null = FALSE;
1967 RS = force_string(RS_node->var_value);
1968 if (RS_regexp != NULL) {
1969 refree(RS_regexp);
1970 RS_regexp = NULL;
1971 }
1972 if (RS->stlen == 0)
1973 RS_is_null = TRUE;
1974 else if (RS->stlen > 1) {
1975 static int warned = FALSE;
1976
1977 RS_regexp = make_regexp(RS->stptr, RS->stlen, IGNORECASE, TRUE);
1978
1979 if (do_lint && ! warned) {
1980 warning("multicharacter value of `RS' is not portable");
1981 warned = TRUE;
1982 }
1983 }
1984
1985 set_FS_if_not_FIELDWIDTHS();
1986}