sed(1): Sync with FreeBSD (adds -u switch)
authorJohn Marino <draco@marino.st>
Thu, 6 Nov 2014 08:08:48 +0000 (09:08 +0100)
committerJohn Marino <draco@marino.st>
Thu, 6 Nov 2014 11:36:51 +0000 (12:36 +0100)
This is a minor sync with FreeBSD.  The -r option, an alias of -E, was
recently added.  This adds "-u" which makes the output unbuffered.  There
are minor tweaks to newline and file handling.

usr.bin/sed/compile.c
usr.bin/sed/defs.h
usr.bin/sed/extern.h
usr.bin/sed/main.c
usr.bin/sed/misc.c
usr.bin/sed/process.c
usr.bin/sed/sed.1

index 72c5cf5..6a11527 100644 (file)
@@ -62,7 +62,7 @@ static struct labhash {
 
 static char     *compile_addr(char *, struct s_addr *);
 static char     *compile_ccl(char **, char *);
-static char     *compile_delimited(char *, char *);
+static char     *compile_delimited(char *, char *, int);
 static char     *compile_flags(char *, struct s_subst *);
 static regex_t  *compile_re(char *, int);
 static char     *compile_subst(char *, struct s_subst *);
@@ -316,7 +316,7 @@ nonsel:             /* Now parse the command */
                                        linenum, fname);
                        if ((cmd->u.s = calloc(1, sizeof(struct s_subst))) == NULL)
                                err(1, "malloc");
-                       p = compile_delimited(p, re);
+                       p = compile_delimited(p, re, 0);
                        if (p == NULL)
                                errx(1,
                                "%lu: %s: unterminated substitute pattern", linenum, fname);
@@ -369,7 +369,7 @@ nonsel:             /* Now parse the command */
  * with the processed string.
  */
 static char *
-compile_delimited(char *p, char *d)
+compile_delimited(char *p, char *d, int is_tr)
 {
        char c;
 
@@ -383,7 +383,7 @@ compile_delimited(char *p, char *d)
                errx(1, "%lu: %s: newline can not be used as a string delimiter",
                                linenum, fname);
        while (*p) {
-               if (*p == '[') {
+               if (*p == '[' && *p != c) {
                        if ((d = compile_ccl(&p, d)) == NULL)
                                errx(1, "%lu: %s: unbalanced brackets ([])", linenum, fname);
                        continue;
@@ -395,9 +395,12 @@ compile_delimited(char *p, char *d)
                        *d++ = '\n';
                        p += 2;
                        continue;
-               } else if (*p == '\\' && p[1] == '\\')
-                       *d++ = *p++;
-               else if (*p == c) {
+               } else if (*p == '\\' && p[1] == '\\') {
+                       if (is_tr)
+                               p++;
+                       else
+                               *d++ = *p++;
+               } else if (*p == c) {
                        *d = '\0';
                        return (p + 1);
                }
@@ -425,8 +428,7 @@ compile_ccl(char **sp, char *t)
                        for (c = *s; (*t = *s) != ']' || c != d; s++, t++)
                                if ((c = *s) == '\0')
                                        return NULL;
-               } else if (*s == '\\' && s[1] == 'n')
-                           *t = '\n', s++;
+               }
        return (*s == ']') ? *sp = ++s, ++t : NULL;
 }
 
@@ -651,11 +653,11 @@ compile_tr(char *p, struct s_tr **py)
                errx(1,
        "%lu: %s: transform pattern can not be delimited by newline or backslash",
                        linenum, fname);
-       p = compile_delimited(p, old);
+       p = compile_delimited(p, old, 1);
        if (p == NULL)
                errx(1, "%lu: %s: unterminated transform source string",
                                linenum, fname);
-       p = compile_delimited(p - 1, new);
+       p = compile_delimited(p - 1, new, 1);
        if (p == NULL)
                errx(1, "%lu: %s: unterminated transform target string",
                                linenum, fname);
@@ -778,7 +780,7 @@ compile_addr(char *p, struct s_addr *a)
                ++p;
                /* FALLTHROUGH */
        case '/':                               /* Context address */
-               p = compile_delimited(p, re);
+               p = compile_delimited(p, re, 0);
                if (p == NULL)
                        errx(1, "%lu: %s: unterminated regular expression", linenum, fname);
                /* Check for case insensitive regexp flag */
index 73dff7e..86529af 100644 (file)
@@ -32,7 +32,6 @@
  *
  *     @(#)defs.h      8.1 (Berkeley) 6/6/93
  * $FreeBSD: src/usr.bin/sed/defs.h,v 1.8 2009/05/25 06:45:33 brian Exp $
- * $DragonFly: src/usr.bin/sed/defs.h,v 1.2 2008/04/08 13:23:38 swildner Exp $
  */
 
 /*
@@ -144,6 +143,7 @@ typedef struct {
        char *space;            /* Current space pointer. */
        size_t len;             /* Current length. */
        int deleted;            /* If deleted. */
+       int append_newline;     /* If originally terminated by \n. */
        char *back;             /* Backing memory. */
        size_t blen;            /* Backing memory length. */
 } SPACE;
index 1f33542..488468f 100644 (file)
@@ -32,7 +32,6 @@
  *
  *     @(#)extern.h    8.1 (Berkeley) 6/6/93
  * $FreeBSD: src/usr.bin/sed/extern.h,v 1.15 2007/06/12 12:05:23 yar Exp $
- * $DragonFly: src/usr.bin/sed/extern.h,v 1.3 2008/04/08 13:23:38 swildner Exp $
  */
 
 extern struct s_command *prog;
index 9b68dca..3937e30 100644 (file)
@@ -1,4 +1,5 @@
 /*-
+ * Copyright (c) 2013 Johann 'Myrkraverk' Oskarsson.
  * Copyright (c) 1992 Diomidis Spinellis.
  * Copyright (c) 1992, 1993
  *     The Regents of the University of California.  All rights reserved.
@@ -33,7 +34,6 @@
  * @(#) Copyright (c) 1992, 1993 The Regents of the University of California.  All rights reserved.
  * @(#)main.c  8.2 (Berkeley) 1/3/94
  * $FreeBSD: src/usr.bin/sed/main.c,v 1.41 2008/02/09 09:12:02 dwmalone Exp $
- * $DragonFly: src/usr.bin/sed/main.c,v 1.4 2008/04/08 13:23:38 swildner Exp $
  */
 
 #include <sys/types.h>
@@ -122,7 +122,7 @@ main(int argc, char *argv[])
        fflag = 0;
        inplace = NULL;
 
-       while ((c = getopt(argc, argv, "EI:ae:f:i:lnr")) != -1)
+       while ((c = getopt(argc, argv, "EI:ae:f:i:lnru")) != -1)
                switch (c) {
                case 'r':               /* GNU sed compat */
                case 'E':
@@ -152,12 +152,16 @@ main(int argc, char *argv[])
                        ispan = 0;      /* don't span across input files */
                        break;
                case 'l':
-                       if(setlinebuf(stdout) != 0)
-                               warnx("setlinebuf() failed");
+                       if(setvbuf(stdout, NULL, _IOLBF, 0) != 0)
+                               warnx("setting line buffered output failed");
                        break;
                case 'n':
                        nflag = 1;
                        break;
+               case 'u':
+                       if(setvbuf(stdout, NULL, _IONBF, 0) != 0)
+                               warnx("setting unbuffered output failed");
+                       break;
                default:
                case '?':
                        usage();
@@ -189,9 +193,10 @@ main(int argc, char *argv[])
 static void
 usage(void)
 {
-       (void)fprintf(stderr, "%s\n%s\n",
-               "usage: sed script [-Ealn] [-i extension] [file ...]",
-               "       sed [-Ealn] [-i extension] [-e script] ... [-f script_file] ... [file ...]");
+       (void)fprintf(stderr,
+           "usage: %s script [-Ealnru] [-i extension] [file ...]\n"
+           "\t%s [-Ealnu] [-i extension] [-e script] ... [-f script_file]"
+           " ... [file ...]\n", getprogname(), getprogname());
        exit(1);
 }
 
@@ -299,8 +304,9 @@ int
 mf_fgets(SPACE *sp, enum e_spflag spflag)
 {
        struct stat sb;
-       size_t len;
-       char *p;
+       ssize_t len;
+       static char *p = NULL;
+       static size_t plen = 0;
        int c;
        static int firstfile;
 
@@ -330,18 +336,35 @@ mf_fgets(SPACE *sp, enum e_spflag spflag)
                if (infile != NULL) {
                        fclose(infile);
                        if (*oldfname != '\0') {
-                               if (rename(fname, oldfname) != 0) {
+                               /* if there was a backup file, remove it */
+                               unlink(oldfname);
+                               /*
+                                * Backup the original.  Note that hard links
+                                * are not supported on all filesystems.
+                                */
+                               if ((link(fname, oldfname) != 0) &&
+                                  (rename(fname, oldfname) != 0)) {
                                        warn("rename()");
-                                       unlink(tmpfname);
+                                       if (*tmpfname)
+                                               unlink(tmpfname);
                                        exit(1);
                                }
                                *oldfname = '\0';
                        }
                        if (*tmpfname != '\0') {
                                if (outfile != NULL && outfile != stdout)
-                                       fclose(outfile);
+                                       if (fclose(outfile) != 0) {
+                                               warn("fclose()");
+                                               unlink(tmpfname);
+                                               exit(1);
+                                       }
                                outfile = NULL;
-                               rename(tmpfname, fname);
+                               if (rename(tmpfname, fname) != 0) {
+                                       /* this should not happen really! */
+                                       warn("rename()");
+                                       unlink(tmpfname);
+                                       exit(1);
+                               }
                                *tmpfname = '\0';
                        }
                        outfname = NULL;
@@ -367,13 +390,13 @@ mf_fgets(SPACE *sp, enum e_spflag spflag)
                                    sizeof(oldfname));
                                len = strlcat(oldfname, inplace,
                                    sizeof(oldfname));
-                               if (len > sizeof(oldfname))
+                               if (len > (ssize_t)sizeof(oldfname))
                                        errx(1, "%s: name too long", fname);
                        }
                        len = snprintf(tmpfname, sizeof(tmpfname),
                            "%s/.!%ld!%s", dirname(fname), (long)getpid(),
                            basename(fname));
-                       if (len >= sizeof(tmpfname))
+                       if (len >= (ssize_t)sizeof(tmpfname))
                                errx(1, "%s: name too long", fname);
                        unlink(tmpfname);
                        if ((outfile = fopen(tmpfname, "w")) == NULL)
@@ -399,15 +422,21 @@ mf_fgets(SPACE *sp, enum e_spflag spflag)
         * We are here only when infile is open and we still have something
         * to read from it.
         *
-        * Use fgetln so that we can handle essentially infinite input data.
-        * Can't use the pointer into the stdio buffer as the process space
-        * because the ungetc() can cause it to move.
+        * Use getline() so that we can handle essentially infinite input
+        * data.  The p and plen are static so each invocation gives
+        * getline() the same buffer which is expanded as needed.
         */
-       p = fgetln(infile, &len);
-       if (ferror(infile))
-               errx(1, "%s: %s", fname, strerror(errno ? errno : EIO));
-       if (len != 0 && p[len - 1] == '\n')
+       len = getline(&p, &plen, infile);
+       if (len == -1)
+               err(1, "%s", fname);
+       if (len != 0 && p[len - 1] == '\n') {
+               sp->append_newline = 1;
                len--;
+       } else if (!lastline()) {
+               sp->append_newline = 1;
+       } else {
+               sp->append_newline = 0;
+       }
        cspace(sp, p, len, spflag);
 
        linenum++;
@@ -448,15 +477,49 @@ add_file(char *s)
        fl_nextp = &fp->next;
 }
 
+static int
+next_files_have_lines(void)
+{
+       struct s_flist *file;
+       FILE *file_fd;
+       int ch;
+
+       file = files;
+       while ((file = file->next) != NULL) {
+               if ((file_fd = fopen(file->fname, "r")) == NULL)
+                       continue;
+
+               if ((ch = getc(file_fd)) != EOF) {
+                       /*
+                        * This next file has content, therefore current
+                        * file doesn't contains the last line.
+                        */
+                       ungetc(ch, file_fd);
+                       fclose(file_fd);
+                       return (1);
+               }
+
+               fclose(file_fd);
+       }
+
+       return (0);
+}
+
 int
 lastline(void)
 {
        int ch;
 
-       if (files->next != NULL && (inplace == NULL || ispan))
-               return (0);
-       if ((ch = getc(infile)) == EOF)
-               return (1);
+       if (feof(infile)) {
+               return !(
+                   (inplace == NULL || ispan) &&
+                   next_files_have_lines());
+       }
+       if ((ch = getc(infile)) == EOF) {
+               return !(
+                   (inplace == NULL || ispan) &&
+                   next_files_have_lines());
+       }
        ungetc(ch, infile);
        return (0);
 }
index 149b306..10cc407 100644 (file)
@@ -32,7 +32,6 @@
  *
  * @(#)misc.c  8.1 (Berkeley) 6/6/93
  * $FreeBSD: src/usr.bin/sed/misc.c,v 1.10 2004/08/09 15:29:41 dds Exp $
- * $DragonFly: src/usr.bin/sed/misc.c,v 1.4 2008/04/08 13:23:38 swildner Exp $
  */
 
 #include <sys/types.h>
index e9e8c0a..5606f11 100644 (file)
@@ -32,7 +32,6 @@
  *
  * @(#)process.c       8.6 (Berkeley) 4/20/94
  * $FreeBSD: src/usr.bin/sed/process.c,v 1.50 2009/05/25 06:45:33 brian Exp $
- * $DragonFly: src/usr.bin/sed/process.c,v 1.6 2008/04/08 13:23:38 swildner Exp $
  */
 
 #include <sys/types.h>
@@ -60,6 +59,7 @@ static SPACE HS, PS, SS, YS;
 #define        pd              PS.deleted
 #define        ps              PS.space
 #define        psl             PS.len
+#define        psanl           PS.append_newline
 #define        hs              HS.space
 #define        hsl             HS.len
 
@@ -82,7 +82,10 @@ static regex_t *defpreg;
 size_t maxnsub;
 regmatch_t *match;
 
-#define OUT() do {fwrite(ps, 1, psl, outfile); fputc('\n', outfile);} while (0)
+#define OUT() do {                             \
+       fwrite(ps, 1, psl, outfile);            \
+       if (psanl) fputc('\n', outfile);        \
+} while (0)
 
 void
 process(void)
@@ -91,6 +94,7 @@ process(void)
        SPACE tspace;
        size_t oldpsl = 0;
        char *p;
+       int oldpsanl;
 
        p = NULL;
 
@@ -187,11 +191,15 @@ redirect:
                                        break;
                                if ((p = memchr(ps, '\n', psl)) != NULL) {
                                        oldpsl = psl;
+                                       oldpsanl = psanl;
                                        psl = p - ps;
+                                       psanl = 1;
                                }
                                OUT();
-                               if (p != NULL)
+                               if (p != NULL) {
                                        psl = oldpsl;
+                                       psanl = oldpsanl;
+                               }
                                break;
                        case 'q':
                                if (!nflag && !pd)
@@ -241,6 +249,7 @@ redirect:
                                        cspace(&HS, "", 0, REPLACE);
                                tspace = PS;
                                PS = HS;
+                               psanl = tspace.append_newline;
                                HS = tspace;
                                break;
                        case 'y':
@@ -285,24 +294,32 @@ applies(struct s_command *cp)
                r = 1;
        else if (cp->a2)
                if (cp->startline > 0) {
-                       if (MATCH(cp->a2)) {
-                               cp->startline = 0;
-                               lastaddr = 1;
-                               r = 1;
-                       } else if (linenum - cp->startline <= cp->a2->u.l)
-                               r = 1;
-                       else if ((cp->a2->type == AT_LINE &&
-                                  linenum > cp->a2->u.l) ||
-                                  (cp->a2->type == AT_RELLINE &&
-                                  linenum - cp->startline > cp->a2->u.l)) {
-                               /*
-                                * We missed the 2nd address due to a branch,
-                                * so just close the range and return false.
-                                */
-                               cp->startline = 0;
-                               r = 0;
-                       } else
-                               r = 1;
+                       switch (cp->a2->type) {
+                       case AT_RELLINE:
+                               if (linenum - cp->startline <= cp->a2->u.l)
+                                       r = 1;
+                               else {
+                                       cp->startline = 0;
+                                       r = 0;
+                               }
+                               break;
+                       default:
+                               if (MATCH(cp->a2)) {
+                                       cp->startline = 0;
+                                       lastaddr = 1;
+                                       r = 1;
+                               } else if (cp->a2->type == AT_LINE &&
+                                           linenum > cp->a2->u.l) {
+                                       /*
+                                        * We missed the 2nd address due to a
+                                        * branch, so just close the range and
+                                        * return false.
+                                        */
+                                       cp->startline = 0;
+                                       r = 0;
+                               } else
+                                       r = 1;
+                       }
                } else if (MATCH(cp->a1)) {
                        /*
                         * If the second address is a number less than or
@@ -441,6 +458,7 @@ substitute(struct s_command *cp)
         */
        tspace = PS;
        PS = SS;
+       psanl = tspace.append_newline;
        SS = tspace;
        SS.space = SS.back;
 
@@ -510,6 +528,7 @@ do_tr(struct s_tr *y)
                /* Swap the translation space and the pattern space. */
                tmp = PS;
                PS = YS;
+               psanl = tmp.append_newline;
                YS = tmp;
                YS.space = YS.back;
        }
index ad4d47b..aaa4668 100644 (file)
@@ -31,7 +31,7 @@
 .\"    @(#)sed.1       8.2 (Berkeley) 12/30/93
 .\" $FreeBSD: src/usr.bin/sed/sed.1,v 1.50 2009/05/25 21:29:06 brian Exp $
 .\"
-.Dd December 9, 2013
+.Dd June 20, 2014
 .Dt SED 1
 .Os
 .Sh NAME
@@ -39,7 +39,7 @@
 .Nd stream editor
 .Sh SYNOPSIS
 .Nm
-.Op Fl Ealnr
+.Op Fl Ealnru
 .Ar command
 .Op Ar
 .Nm
@@ -117,7 +117,7 @@ file boundaries, and the
 .Dq $
 address matches only the last line of the last file.
 (See
-.Sx SED ADDRESSES . )
+.Sx "Sed Addresses" . )
 That can lead to unexpected results in many cases of in-place editing,
 where using
 .Fl i
@@ -132,7 +132,7 @@ the
 address matches the last line of the current file,
 and address ranges are limited to the current file.
 (See
-.Sx SED ADDRESSES . )
+.Sx "Sed Addresses" . )
 The net result is as though each file were edited by a separate
 .Nm
 instance.
@@ -148,6 +148,8 @@ option suppresses this behavior.
 Same as
 .Fl E
 for compatibility with GNU sed.
+.It Fl u
+Make output unbuffered.
 .El
 .Pp
 The form of a
@@ -174,7 +176,7 @@ deletes the pattern space.
 Some of the functions use a
 .Em "hold space"
 to save all or part of the pattern space for subsequent retrieval.
-.Sh SED ADDRESSES
+.Sh "Sed Addresses"
 An address is not required, but if specified must have one of the
 following formats:
 .Bl -bullet -offset indent
@@ -195,8 +197,7 @@ option was specified);
 .It
 a context address
 that consists of a regular expression preceded and followed by a
-delimiter.
-The closing delimiter can also optionally be followed by the
+delimiter. The closing delimiter can also optionally be followed by the
 .Dq i
 character, to indicate that the regular expression is to be matched
 in a case-insensitive way.
@@ -233,7 +234,7 @@ Editing commands can be applied to non-selected pattern spaces by use
 of the exclamation character
 .Pq Dq \&!
 function.
-.Sh SED REGULAR EXPRESSIONS
+.Sh "Sed Regular Expressions"
 The regular expressions used in
 .Nm ,
 by default, are basic regular expressions (BREs, see
@@ -290,7 +291,7 @@ will substitute
 .Dq XXX
 for the pattern
 .Dq abc .
-.Sh SED FUNCTIONS
+.Sh "Sed Functions"
 In the following list of commands, the maximum number of permissible
 addresses for each command is indicated by [0addr], [1addr], or [2addr],
 representing zero, one, or two addresses.
@@ -344,7 +345,7 @@ can be preceded by white space and can be followed by white space.
 The function can be preceded by white space.
 The terminating
 .Dq }
-must be preceded by a newline or optional white space.
+must be preceded by a newline, and may also be preceded by white space.
 .Pp
 .Bl -tag -width "XXXXXX" -compact
 .It [2addr] function-list