/* Parse a string, yielding a struct partime that describes it. */ /* Copyright 1993, 1994, 1995, 1997 Paul Eggert Distributed under license by the Free Software Foundation, Inc. This file is part of RCS. RCS is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. RCS is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with RCS; see the file COPYING. If not, write to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. Report problems and direct all questions to: rcs-bugs@cs.purdue.edu */ #if has_conf_h # include #else # if HAVE_CONFIG_H # include # else # ifndef __STDC__ # define const # endif # endif # if HAVE_LIMITS_H # include # endif # ifndef LONG_MIN # define LONG_MIN (-1-2147483647L) # endif # if STDC_HEADERS # include # endif # include # ifdef __STDC__ # define P(x) x # else # define P(x) () # endif #endif #include #if STDC_HEADERS # define CTYPE_DOMAIN(c) 1 #else # define CTYPE_DOMAIN(c) ((unsigned) (c) <= 0177) #endif #define ISALNUM(c) (CTYPE_DOMAIN (c) && isalnum (c)) #define ISALPHA(c) (CTYPE_DOMAIN (c) && isalpha (c)) #define ISSPACE(c) (CTYPE_DOMAIN (c) && isspace (c)) #define ISUPPER(c) (CTYPE_DOMAIN (c) && isupper (c)) #define ISDIGIT(c) ((unsigned) (c) - '0' <= 9) #include char const partimeId[] = "$Id: partime.c,v 5.16 1997/05/19 06:33:53 eggert Exp $"; /* Lookup tables for names of months, weekdays, time zones. */ #define NAME_LENGTH_MAXIMUM 4 struct name_val { char name[NAME_LENGTH_MAXIMUM]; int val; }; static char const *parse_decimal P ((char const *, int, int, int, int, int *, int *)); static char const *parse_fixed P ((char const *, int, int *)); static char const *parse_pattern_letter P ((char const *, int, struct partime *)); static char const *parse_prefix P ((char const *, struct partime *, int *)); static char const *parse_ranged P ((char const *, int, int, int, int *)); static int lookup P ((char const *, struct name_val const[])); static int merge_partime P ((struct partime *, struct partime const *)); static void undefine P ((struct partime *)); static struct name_val const month_names[] = { {"jan", 0}, {"feb", 1}, {"mar", 2}, {"apr", 3}, {"may", 4}, {"jun", 5}, {"jul", 6}, {"aug", 7}, {"sep", 8}, {"oct", 9}, {"nov", 10}, {"dec", 11}, {"", TM_UNDEFINED} }; static struct name_val const weekday_names[] = { {"sun", 0}, {"mon", 1}, {"tue", 2}, {"wed", 3}, {"thu", 4}, {"fri", 5}, {"sat", 6}, {"", TM_UNDEFINED} }; #define hr60nonnegative(t) ((t)/100 * 60 + (t)%100) #define hr60(t) ((t)<0 ? -hr60nonnegative(-(t)) : hr60nonnegative(t)) #define zs(t,s) {s, hr60(t)} #define zd(t,s,d) zs(t, s), zs((t)+100, d) static struct name_val const zone_names[] = { zs (-1000, "hst"), /* Hawaii */ zd (-1000, "hast", "hadt"), /* Hawaii-Aleutian */ zd (- 900, "akst", "akdt"), /* Alaska */ zd (- 800, "pst" , "pdt" ), /* Pacific */ zd (- 700, "mst" , "mdt" ), /* Mountain */ zd (- 600, "cst" , "cdt" ), /* Central */ zd (- 500, "est" , "edt" ), /* Eastern */ zd (- 400, "ast" , "adt" ), /* Atlantic */ zd (- 330, "nst" , "ndt" ), /* Newfoundland */ zs ( 000, "utc" ), /* Coordinated Universal */ zs ( 000, "uct" ), /* " */ zs ( 000, "cut" ), /* " */ zs ( 000, "ut"), /* Universal */ zs ( 000, "z"), /* Zulu (required by ISO 8601) */ zd ( 000, "gmt" , "bst" ), /* Greenwich Mean, British Summer */ zd ( 000, "wet" , "west"), /* Western European */ zd ( 100, "cet" , "cest"), /* Central European */ zd ( 100, "met" , "mest"), /* Middle European (bug in old tz versions) */ zd ( 100, "mez" , "mesz"), /* Mittel-Europaeische Zeit */ zd ( 200, "eet" , "eest"), /* Eastern European */ zs ( 530, "ist" ), /* India */ zd ( 900, "jst" , "jdt" ), /* Japan */ zd ( 900, "kst" , "kdt" ), /* Korea */ zd ( 1200, "nzst", "nzdt"), /* New Zealand */ {"lt", 1}, #if 0 /* The following names are duplicates or are not well attested. There are lots more where these came from. */ zs (-1100, "sst" ), /* Samoan */ zd (- 900, "yst" , "ydt" ), /* Yukon - name is no longer used */ zd (- 500, "ast" , "adt" ), /* Acre */ zd (- 400, "wst" , "wdt" ), /* Western Brazil */ zd (- 400, "cst" , "cdt" ), /* Chile */ zd (- 200, "fst" , "fdt" ), /* Fernando de Noronha */ zs ( 000, "wat" ), /* West African */ zs ( 100, "cat" ), /* Central African */ zs ( 200, "sat" ), /* South African */ zd ( 200, "ist" , "idt" ), /* Israel */ zs ( 300, "eat" ), /* East African */ zd ( 300, "msk" , "msd" ), /* Moscow */ zd ( 330, "ist" , "idt" ), /* Iran */ zs ( 800, "hkt" ), /* Hong Kong */ zs ( 800, "sgt" ), /* Singapore */ zd ( 800, "cst" , "cdt" ), /* China */ zd ( 800, "wst" , "wst" ), /* Western Australia */ zd ( 930, "cst" , "cst" ), /* Central Australia */ zs ( 1000, "gst" ), /* Guam */ zd ( 1000, "est" , "est" ), /* Eastern Australia */ #endif {"", -1} }; /* Look for a prefix of S in TABLE, returning val for first matching entry. */ static int lookup (s, table) char const *s; struct name_val const table[]; { int j; char buf[NAME_LENGTH_MAXIMUM]; for (j = 0; j < NAME_LENGTH_MAXIMUM; j++) { unsigned char c = *s++; if (! ISALPHA (c)) { buf[j] = '\0'; break; } buf[j] = ISUPPER (c) ? tolower (c) : c; } for (;; table++) for (j = 0; ; j++) if (j == NAME_LENGTH_MAXIMUM || ! table[0].name[j]) return table[0].val; else if (buf[j] != table[0].name[j]) break; } /* Set *T to ``undefined'' values. */ static void undefine (t) struct partime *t; { t->tm.tm_sec = t->tm.tm_min = t->tm.tm_hour = t->tm.tm_mday = t->tm.tm_mon = t->tm.tm_year = t->tm.tm_wday = t->tm.tm_yday = t->ymodulus = t->yweek = TM_UNDEFINED; t->zone = TM_UNDEFINED_ZONE; } /* Array of patterns to look for in a date string. Order is important: we look for the first matching pattern whose values do not contradict values that we already know about. See `parse_pattern_letter' below for the meaning of the pattern codes. */ static char const *const patterns[] = { /* These traditional patterns must come first, to prevent an ISO 8601 format from misinterpreting their prefixes. */ "E_n_y", "x", /* RFC 822 */ "E_n", "n_E", "n", "t:m:s_A", "t:m_A", "t_A", /* traditional */ "y/N/D$", /* traditional RCS */ /* ISO 8601:1988 formats, generalized a bit. */ "y-N-D$", "4ND$", "Y-N$", "RND$", "-R=N$", "-R$", "--N=D$", "N=DT", "--N$", "---D$", "DT", "Y-d$", "4d$", "R=d$", "-d$", "dT", "y-W-X", "yWX", "y=W", "-r-W-X", "r-W-XT", "-rWX", "rWXT", "-W=X", "W=XT", "-W", "-w-X", "w-XT", "---X$", "XT", "4$", "T", "h:m:s$", "hms$", "h:m$", "hm$", "h$", "-m:s$", "-ms$", "-m$", "--s$", "Y", "Z", 0 }; /* Parse an initial prefix of STR, setting *T accordingly. Return the first character after the prefix, or 0 if it couldn't be parsed. Start with pattern *PI; if success, set *PI to the next pattern to try. Set *PI to -1 if we know there are no more patterns to try; if *PI is initially negative, give up immediately. */ static char const * parse_prefix (str, t, pi) char const *str; struct partime *t; int *pi; { int i = *pi; char const *pat; unsigned char c; if (i < 0) return 0; /* Remove initial noise. */ while (! ISALNUM (c = *str) && c != '-' && c != '+') { if (! c) { undefine (t); *pi = -1; return str; } str++; } /* Try a pattern until one succeeds. */ while ((pat = patterns[i++]) != 0) { char const *s = str; undefine (t); do { if (! (c = *pat++)) { *pi = i; return s; } } while ((s = parse_pattern_letter (s, c, t)) != 0); } return 0; } /* Parse an initial prefix of S of length DIGITS; it must be a number. Store the parsed number into *RES. Return the first character after the prefix, or 0 if it wasn't parsed. */ static char const * parse_fixed (s, digits, res) char const *s; int digits, *res; { int n = 0; char const *lim = s + digits; while (s < lim) { unsigned d = *s++ - '0'; if (9 < d) return 0; n = 10 * n + d; } *res = n; return s; } /* Parse an initial prefix of S of length DIGITS; it must be a number in the range LO through HI. Store the parsed number into *RES. Return the first character after the prefix, or 0 if it wasn't parsed. */ static char const * parse_ranged (s, digits, lo, hi, res) char const *s; int digits, lo, hi, *res; { s = parse_fixed (s, digits, res); return s && lo <= *res && *res <= hi ? s : 0; } /* Parse an initial prefix of S of length DIGITS; it must be a number in the range LO through HI and it may be followed by a fraction to be computed using RESOLUTION. Store the parsed number into *RES; store the fraction times RESOLUTION, rounded to the nearest integer, into *FRES. Return the first character after the prefix, or 0 if it wasn't parsed. */ static char const * parse_decimal (s, digits, lo, hi, resolution, res, fres) char const *s; int digits, lo, hi, resolution, *res, *fres; { s = parse_fixed (s, digits, res); if (s && lo <= *res && *res <= hi) { int f = 0; if ((s[0] == ',' || s[0] == '.') && ISDIGIT (s[1])) { char const *s1 = ++s; int num10 = 0, denom10 = 10, product; while (ISDIGIT (*++s)) { int d = denom10 * 10; if (d / 10 != denom10) return 0; /* overflow */ denom10 = d; } s = parse_fixed (s1, (int) (s - s1), &num10); product = num10 * resolution; f = (product + (denom10 >> 1)) / denom10; f -= f & (product % denom10 == denom10 >> 1); /* round to even */ if (f < 0 || product/resolution != num10) return 0; /* overflow */ } *fres = f; return s; } return 0; } /* Parse an initial prefix of S; it must denote a time zone. Set *ZONE to the number of seconds east of GMT, or to TM_LOCAL_ZONE if it is the local time zone. Return the first character after the prefix, or 0 if it wasn't parsed. */ char * parzone (s, zone) char const *s; long *zone; { char sign; int hh, mm, ss; int minutesEastOfUTC; long offset, z; /* The formats are LT, n, n DST, nDST, no, o where n is a time zone name and o is a time zone offset of the form [-+]hh[:mm[:ss]]. */ switch (*s) { case '-': case '+': z = 0; break; default: minutesEastOfUTC = lookup (s, zone_names); if (minutesEastOfUTC == -1) return 0; /* Don't bother to check rest of spelling. */ while (ISALPHA ((unsigned char) *s)) s++; /* Don't modify LT. */ if (minutesEastOfUTC == 1) { *zone = TM_LOCAL_ZONE; return (char *) s; } z = minutesEastOfUTC * 60L; /* Look for trailing " DST". */ if ((s[-1] == 'T' || s[-1] == 't') && (s[-2] == 'S' || s[-2] == 's') && (s[-3] == 'D' || s[-3] == 'd')) goto trailing_dst; while (ISSPACE ((unsigned char) *s)) s++; if ((s[0] == 'D' || s[0] == 'd') && (s[1] == 'S' || s[1] == 's') && (s[2] == 'T' || s[2] == 't')) { s += 3; trailing_dst: *zone = z + 60*60; return (char *) s; } switch (*s) { case '-': case '+': break; default: *zone = z; return (char *) s; } break; } sign = *s++; if (! (s = parse_ranged (s, 2, 0, 23, &hh))) return 0; mm = ss = 0; if (*s == ':') s++; if (ISDIGIT (*s)) { if (! (s = parse_ranged (s, 2, 0, 59, &mm))) return 0; if (*s == ':' && s[-3] == ':' && ISDIGIT (s[1]) && ! (s = parse_ranged (s + 1, 2, 0, 59, &ss))) return 0; } if (ISDIGIT (*s)) return 0; offset = (hh * 60 + mm) * 60L + ss; *zone = z + (sign == '-' ? -offset : offset); /* ?? Are fractions allowed here? If so, they're not implemented. */ return (char *) s; } /* Parse an initial prefix of S, matching the pattern whose code is C. Set *T accordingly. Return the first character after the prefix, or 0 if it wasn't parsed. */ static char const * parse_pattern_letter (s, c, t) char const *s; int c; struct partime *t; { switch (c) { case '$': /* The next character must be a non-digit. */ if (ISDIGIT (*s)) return 0; break; case '-': case '/': case ':': /* These characters stand for themselves. */ if (*s++ != c) return 0; break; case '4': /* 4-digit year */ s = parse_fixed (s, 4, &t->tm.tm_year); break; case '=': /* optional '-' */ s += *s == '-'; break; case 'A': /* AM or PM */ /* This matches the regular expression [AaPp][Mm]?. It must not be followed by a letter or digit; otherwise it would match prefixes of strings like "PST". */ switch (*s++) { case 'A': case 'a': if (t->tm.tm_hour == 12) t->tm.tm_hour = 0; break; case 'P': case 'p': if (t->tm.tm_hour != 12) t->tm.tm_hour += 12; break; default: return 0; } switch (*s) { case 'M': case 'm': s++; break; } if (ISALNUM ((unsigned char) *s)) return 0; break; case 'D': /* day of month [01-31] */ s = parse_ranged (s, 2, 1, 31, &t->tm.tm_mday); break; case 'd': /* day of year [001-366] */ s = parse_ranged (s, 3, 1, 366, &t->tm.tm_yday); t->tm.tm_yday--; break; case 'E': /* extended day of month [1-9, 01-31] */ s = parse_ranged (s, (ISDIGIT (s[0]) && ISDIGIT (s[1])) + 1, 1, 31, &t->tm.tm_mday); break; case 'h': /* hour [00-23 followed by optional fraction] */ { int frac; s = parse_decimal (s, 2, 0, 23, 60 * 60, &t->tm.tm_hour, &frac); t->tm.tm_min = frac / 60; t->tm.tm_sec = frac % 60; } break; case 'm': /* minute [00-59 followed by optional fraction] */ s = parse_decimal (s, 2, 0, 59, 60, &t->tm.tm_min, &t->tm.tm_sec); break; case 'n': /* month name [e.g. "Jan"] */ if (! TM_DEFINED (t->tm.tm_mon = lookup (s, month_names))) return 0; /* Don't bother to check rest of spelling. */ while (ISALPHA ((unsigned char) *s)) s++; break; case 'N': /* month [01-12] */ s = parse_ranged (s, 2, 1, 12, &t->tm.tm_mon); t->tm.tm_mon--; break; case 'r': /* year % 10 (remainder in origin-0 decade) [0-9] */ s = parse_fixed (s, 1, &t->tm.tm_year); t->ymodulus = 10; break; case_R: case 'R': /* year % 100 (remainder in origin-0 century) [00-99] */ s = parse_fixed (s, 2, &t->tm.tm_year); t->ymodulus = 100; break; case 's': /* second [00-60 followed by optional fraction] */ { int frac; s = parse_decimal (s, 2, 0, 60, 1, &t->tm.tm_sec, &frac); t->tm.tm_sec += frac; } break; case 'T': /* 'T' or 't' */ switch (*s++) { case 'T': case 't': break; default: return 0; } break; case 't': /* traditional hour [1-9 or 01-12] */ s = parse_ranged (s, (ISDIGIT (s[0]) && ISDIGIT (s[1])) + 1, 1, 12, &t->tm.tm_hour); break; case 'w': /* 'W' or 'w' only (stands for current week) */ switch (*s++) { case 'W': case 'w': break; default: return 0; } break; case 'W': /* 'W' or 'w', followed by a week of year [00-53] */ switch (*s++) { case 'W': case 'w': break; default: return 0; } s = parse_ranged (s, 2, 0, 53, &t->yweek); break; case 'X': /* weekday (1=Mon ... 7=Sun) [1-7] */ s = parse_ranged (s, 1, 1, 7, &t->tm.tm_wday); t->tm.tm_wday--; break; case 'x': /* weekday name [e.g. "Sun"] */ if (! TM_DEFINED (t->tm.tm_wday = lookup (s, weekday_names))) return 0; /* Don't bother to check rest of spelling. */ while (ISALPHA ((unsigned char) *s)) s++; break; case 'y': /* either R or Y */ if (ISDIGIT (s[0]) && ISDIGIT (s[1]) && ! ISDIGIT (s[2])) goto case_R; /* fall into */ case 'Y': /* year in full [4 or more digits] */ { int len = 0; while (ISDIGIT (s[len])) len++; if (len < 4) return 0; s = parse_fixed (s, len, &t->tm.tm_year); } break; case 'Z': /* time zone */ s = parzone (s, &t->zone); break; case '_': /* possibly empty sequence of non-alphanumerics */ while (! ISALNUM ((unsigned char) *s) && *s) s++; break; default: /* bad pattern */ return 0; } return s; } /* If there is no conflict, merge into *T the additional information in *U and return 0. Otherwise do nothing and return -1. */ static int merge_partime (t, u) struct partime *t; struct partime const *u; { # define conflict(a,b) ((a) != (b) && TM_DEFINED (a) && TM_DEFINED (b)) if (conflict (t->tm.tm_sec, u->tm.tm_sec) || conflict (t->tm.tm_min, u->tm.tm_min) || conflict (t->tm.tm_hour, u->tm.tm_hour) || conflict (t->tm.tm_mday, u->tm.tm_mday) || conflict (t->tm.tm_mon, u->tm.tm_mon) || conflict (t->tm.tm_year, u->tm.tm_year) || conflict (t->tm.tm_wday, u->tm.tm_yday) || conflict (t->ymodulus, u->ymodulus) || conflict (t->yweek, u->yweek) || (t->zone != u->zone && t->zone != TM_UNDEFINED_ZONE && u->zone != TM_UNDEFINED_ZONE)) return -1; # undef conflict # define merge_(a,b) if (TM_DEFINED (b)) (a) = (b); merge_ (t->tm.tm_sec, u->tm.tm_sec) merge_ (t->tm.tm_min, u->tm.tm_min) merge_ (t->tm.tm_hour, u->tm.tm_hour) merge_ (t->tm.tm_mday, u->tm.tm_mday) merge_ (t->tm.tm_mon, u->tm.tm_mon) merge_ (t->tm.tm_year, u->tm.tm_year) merge_ (t->tm.tm_wday, u->tm.tm_yday) merge_ (t->ymodulus, u->ymodulus) merge_ (t->yweek, u->yweek) # undef merge_ if (u->zone != TM_UNDEFINED_ZONE) t->zone = u->zone; return 0; } /* Parse a date/time prefix of S, putting the parsed result into *T. Return the first character after the prefix. The prefix may contain no useful information; in that case, *T will contain only undefined values. */ char * partime (s, t) char const *s; struct partime *t; { struct partime p; undefine (t); while (*s) { int i = 0; char const *s1; do { if (! (s1 = parse_prefix (s, &p, &i))) return (char *) s; } while (merge_partime (t, &p) != 0); s = s1; } return (char *) s; }