From 8506af8a722b9ccbd07c3cef62b280e258a5eb14 Mon Sep 17 00:00:00 2001 From: John Marino Date: Fri, 31 Jul 2015 12:00:55 +0200 Subject: [PATCH] libc/regex: Sync with FreeBSD --- lib/libc/regex/engine.c | 21 ++++++++++----------- lib/libc/regex/re_format.7 | 12 +++++++++--- lib/libc/regex/regcomp.c | 25 ++++++++++++++++++++----- 3 files changed, 39 insertions(+), 19 deletions(-) diff --git a/lib/libc/regex/engine.c b/lib/libc/regex/engine.c index dcadf398a3..739541182e 100644 --- a/lib/libc/regex/engine.c +++ b/lib/libc/regex/engine.c @@ -30,11 +30,12 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * + * $FreeBSD: head/lib/libc/regex/engine.c 279104 2015-02-21 15:02:27Z pfg $ + * * @(#)engine.c 8.5 (Berkeley) 3/20/94 - * $FreeBSD: src/lib/libc/regex/engine.c,v 1.21 2007/05/25 12:44:58 delphij Exp $ - * $DragonFly: src/lib/libc/regex/engine.c,v 1.7 2005/11/20 09:18:37 swildner Exp $ */ + /* * The matching engine and friends. This file is #included by regexec.c * after suitable #defines of a variety of macros used herein, so that @@ -156,7 +157,7 @@ matcher(struct re_guts *g, int i; struct match mv; struct match *m = &mv; - const char *dp; + const char *dp = NULL; const sopno gf = g->firststate+1; /* +1 for OEND */ const sopno gl = g->laststate; const char *start; @@ -243,9 +244,11 @@ matcher(struct re_guts *g, ZAPSTATE(&m->mbs); /* Adjust start according to moffset, to speed things up */ - if (g->moffset > -1) + if (dp != NULL && g->moffset > -1) start = ((dp - g->moffset) < start) ? start : dp - g->moffset; + SP("mloop", m->st, *start); + /* this loop does only one repetition except for backrefs */ for (;;) { endp = fast(m, start, stop, gf, gl); @@ -683,19 +686,16 @@ backref(struct match *m, while (m->g->strip[ss] != SOP(O_BACK, i)) ss++; return(backref(m, sp+len, stop, ss+1, stopst, lev, rec)); - break; case OQUEST_: /* to null or not */ dp = backref(m, sp, stop, ss+1, stopst, lev, rec); if (dp != NULL) return(dp); /* not */ return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev, rec)); - break; case OPLUS_: assert(m->lastpos != NULL); assert(lev+1 <= m->g->nplus); m->lastpos[lev+1] = sp; return(backref(m, sp, stop, ss+1, stopst, lev+1, rec)); - break; case O_PLUS: if (sp == m->lastpos[lev]) /* last pass matched null */ return(backref(m, sp, stop, ss+1, stopst, lev-1, rec)); @@ -706,7 +706,6 @@ backref(struct match *m, return(backref(m, sp, stop, ss+1, stopst, lev-1, rec)); else return(dp); - break; case OCH_: /* find the right one, if any */ ssub = ss + 1; esub = ss + OPND(s) - 1; @@ -727,6 +726,7 @@ backref(struct match *m, else assert(OP(m->g->strip[esub]) == O_CH); } + /* NOTREACHED */ break; case OLPAREN: /* must undo assignment if rest fails */ i = OPND(s); @@ -738,7 +738,6 @@ backref(struct match *m, return(dp); m->pmatch[i].rm_so = offsave; return(NULL); - break; case ORPAREN: /* must undo assignment if rest fails */ i = OPND(s); assert(0 < i && i <= m->g->nsub); @@ -749,7 +748,6 @@ backref(struct match *m, return(dp); m->pmatch[i].rm_eo = offsave; return(NULL); - break; default: /* uh oh */ assert(nope); break; @@ -786,6 +784,7 @@ fast( struct match *m, CLEAR(st); SET1(st, startst); + SP("fast", st, *p); st = step(m->g, startst, stopst, st, NOTHING, st); ASSIGN(fresh, st); SP("start", st, *p); @@ -1071,7 +1070,7 @@ step(struct re_guts *g, OP(s = g->strip[pc+look]) != O_CH; look += OPND(s)) assert(OP(s) == OOR2); - FWD(aft, aft, look); + FWD(aft, aft, look + 1); } break; case OOR2: /* propagate OCH_'s marking */ diff --git a/lib/libc/regex/re_format.7 b/lib/libc/regex/re_format.7 index d5e6fa4645..bcc6a4a3b3 100644 --- a/lib/libc/regex/re_format.7 +++ b/lib/libc/regex/re_format.7 @@ -30,10 +30,9 @@ .\" SUCH DAMAGE. .\" .\" @(#)re_format.7 8.3 (Berkeley) 3/20/94 -.\" $FreeBSD: src/lib/libc/regex/re_format.7,v 1.12 2008/09/05 17:41:20 keramida Exp $ -.\" $DragonFly: src/lib/libc/regex/re_format.7,v 1.3 2008/05/02 02:05:04 swildner Exp $ +.\" $FreeBSD: head/lib/libc/regex/re_format.7 282007 2015-04-26 10:55:39Z bapt $ .\" -.Dd March 20, 1994 +.Dd June 30, 2014 .Dt RE_FORMAT 7 .Os .Sh NAME @@ -311,6 +310,13 @@ compatible with but not specified by .St -p1003.2 , and should be used with caution in software intended to be portable to other systems. +The additional word delimiters +.Ql \e< +and +.Ql \e> +are provided to ease compatibility with traditional +.Xr svr4 4 +systems but are not portable and should be avoided. .Pp In the event that an RE could match more than one substring of a given string, diff --git a/lib/libc/regex/regcomp.c b/lib/libc/regex/regcomp.c index 18bc71b8ad..413d53e29d 100644 --- a/lib/libc/regex/regcomp.c +++ b/lib/libc/regex/regcomp.c @@ -36,7 +36,7 @@ * SUCH DAMAGE. * * @(#)regcomp.c 8.5 (Berkeley) 3/20/94 - * $FreeBSD: head/lib/libc/regex/regcomp.c 247596 2013-03-01 23:26:13Z delphij $ + * $FreeBSD: head/lib/libc/regex/regcomp.c 281858 2015-04-22 17:09:02Z pfg $ * * @(#)regcomp.c 8.5 (Berkeley) 3/20/94 */ @@ -426,7 +426,17 @@ p_ere_exp(struct parse *p) case '\\': (void)REQUIRE(MORE(), REG_EESCAPE); wc = WGETNEXT(); - ordinary(p, wc); + switch (wc) { + case '<': + EMIT(OBOW, 0); + break; + case '>': + EMIT(OEOW, 0); + break; + default: + ordinary(p, wc); + break; + } break; case '{': /* okay as ordinary except if digit follows */ (void)REQUIRE(!MORE() || !isdigit((uch)PEEK()), REG_BADRPT); @@ -583,6 +593,12 @@ p_simp_re(struct parse *p, case '[': p_bracket(p); break; + case BACKSL|'<': + EMIT(OBOW, 0); + break; + case BACKSL|'>': + EMIT(OEOW, 0); + break; case BACKSL|'{': SETERROR(REG_BADRPT); break; @@ -760,7 +776,6 @@ p_b_term(struct parse *p, cset *cs) case '-': SETERROR(REG_ERANGE); return; /* NOTE RETURN */ - break; default: c = '\0'; break; @@ -1404,8 +1419,8 @@ static void findmust(struct parse *p, struct re_guts *g) { sop *scan; - sop *start; - sop *newstart; + sop *start = NULL; + sop *newstart = NULL; sopno newlen; sop s; char *cp; -- 2.41.0