7f3084bfbc8d6d6dbc14d49d4ec23d4e1dfc25ca
[dragonfly.git] / lib / libc / stdtime / strptime.c
1 /*      $NetBSD: strptime.c,v 1.31 2008/11/04 21:08:33 christos Exp $   */
2 /*      $DragonFly: src/lib/libc/stdtime/strptime.c,v 1.5 2005/12/04 23:25:40 swildner Exp $ */
3
4 /*-
5  * Copyright (c) 1997, 1998, 2005, 2008 The NetBSD Foundation, Inc.
6  * All rights reserved.
7  *
8  * This code was contributed to The NetBSD Foundation by Klaus Klein.
9  * Heavily optimised by David Laight
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32
33 #include <sys/localedef.h>
34 #include <ctype.h>
35 #include <locale.h>
36 #include <string.h>
37 #include <time.h>
38 #include "private.h"
39 #include "tzfile.h"
40
41 #define _ctloc(x)               (_CurrentTimeLocale->x)
42
43 /*
44  * We do not implement alternate representations. However, we always
45  * check whether a given modifier is allowed for a certain conversion.
46  */
47 #define ALT_E                   0x01
48 #define ALT_O                   0x02
49 #define LEGAL_ALT(x)            { if (alt_format & ~(x)) return NULL; }
50
51 static char gmt[] = { "GMT" };
52 static char utc[] = { "UTC" };
53
54 static const u_char *conv_num(const unsigned char *, int *, uint, uint);
55 static const u_char *find_string(const u_char *, int *, const char * const *,
56         const char * const *, int);
57
58 char *
59 strptime(const char *buf, const char *fmt, struct tm *tm)
60 {
61         unsigned char c;
62         const unsigned char *bp;
63         int alt_format, i, split_year = 0, neg, offs;
64         const char *new_fmt;
65
66         bp = (const u_char *)buf;
67
68         while (bp != NULL && (c = *fmt++) != '\0') {
69                 /* Clear `alternate' modifier prior to new conversion. */
70                 alt_format = 0;
71                 i = 0;
72
73                 /* Eat up white-space. */
74                 if (isspace(c)) {
75                         while (isspace(*bp))
76                                 bp++;
77                         continue;
78                 }
79
80                 if (c != '%')
81                         goto literal;
82
83
84 again:          switch (c = *fmt++) {
85                 case '%':       /* "%%" is converted to "%". */
86 literal:
87                         if (c != *bp++)
88                                 return NULL;
89                         LEGAL_ALT(0);
90                         continue;
91
92                 /*
93                  * "Alternative" modifiers. Just set the appropriate flag
94                  * and start over again.
95                  */
96                 case 'E':       /* "%E?" alternative conversion modifier. */
97                         LEGAL_ALT(0);
98                         alt_format |= ALT_E;
99                         goto again;
100
101                 case 'O':       /* "%O?" alternative conversion modifier. */
102                         LEGAL_ALT(0);
103                         alt_format |= ALT_O;
104                         goto again;
105
106                 /*
107                  * "Complex" conversion rules, implemented through recursion.
108                  */
109                 case 'c':       /* Date and time, using the locale's format. */
110                         new_fmt = _ctloc(d_t_fmt);
111                         goto recurse;
112
113                 case 'D':       /* The date as "%m/%d/%y". */
114                         new_fmt = "%m/%d/%y";
115                         LEGAL_ALT(0);
116                         goto recurse;
117
118                 case 'F':       /* The date as "%Y-%m-%d". */
119                         new_fmt = "%Y-%m-%d";
120                         LEGAL_ALT(0);
121                         goto recurse;
122
123                 case 'R':       /* The time as "%H:%M". */
124                         new_fmt = "%H:%M";
125                         LEGAL_ALT(0);
126                         goto recurse;
127
128                 case 'r':       /* The time in 12-hour clock representation. */
129                         new_fmt =_ctloc(t_fmt_ampm);
130                         LEGAL_ALT(0);
131                         goto recurse;
132
133                 case 'T':       /* The time as "%H:%M:%S". */
134                         new_fmt = "%H:%M:%S";
135                         LEGAL_ALT(0);
136                         goto recurse;
137
138                 case 'X':       /* The time, using the locale's format. */
139                         new_fmt =_ctloc(t_fmt);
140                         goto recurse;
141
142                 case 'x':       /* The date, using the locale's format. */
143                         new_fmt =_ctloc(d_fmt);
144                     recurse:
145                         bp = (const u_char *)strptime((const char *)bp,
146                                                             new_fmt, tm);
147                         LEGAL_ALT(ALT_E);
148                         continue;
149
150                 /*
151                  * "Elementary" conversion rules.
152                  */
153                 case 'A':       /* The day of week, using the locale's form. */
154                 case 'a':
155                         bp = find_string(bp, &tm->tm_wday, _ctloc(day),
156                                         _ctloc(abday), 7);
157                         LEGAL_ALT(0);
158                         continue;
159
160                 case 'B':       /* The month, using the locale's form. */
161                 case 'b':
162                 case 'h':
163                         bp = find_string(bp, &tm->tm_mon, _ctloc(mon),
164                                         _ctloc(abmon), 12);
165                         LEGAL_ALT(0);
166                         continue;
167
168                 case 'C':       /* The century number. */
169                         i = 20;
170                         bp = conv_num(bp, &i, 0, 99);
171
172                         i = i * 100 - TM_YEAR_BASE;
173                         if (split_year)
174                                 i += tm->tm_year % 100;
175                         split_year = 1;
176                         tm->tm_year = i;
177                         LEGAL_ALT(ALT_E);
178                         continue;
179
180                 case 'd':       /* The day of month. */
181                 case 'e':
182                         bp = conv_num(bp, &tm->tm_mday, 1, 31);
183                         LEGAL_ALT(ALT_O);
184                         continue;
185
186                 case 'k':       /* The hour (24-hour clock representation). */
187                         LEGAL_ALT(0);
188                         /* FALLTHROUGH */
189                 case 'H':
190                         bp = conv_num(bp, &tm->tm_hour, 0, 23);
191                         LEGAL_ALT(ALT_O);
192                         continue;
193
194                 case 'l':       /* The hour (12-hour clock representation). */
195                         LEGAL_ALT(0);
196                         /* FALLTHROUGH */
197                 case 'I':
198                         bp = conv_num(bp, &tm->tm_hour, 1, 12);
199                         if (tm->tm_hour == 12)
200                                 tm->tm_hour = 0;
201                         LEGAL_ALT(ALT_O);
202                         continue;
203
204                 case 'j':       /* The day of year. */
205                         i = 1;
206                         bp = conv_num(bp, &i, 1, 366);
207                         tm->tm_yday = i - 1;
208                         LEGAL_ALT(0);
209                         continue;
210
211                 case 'M':       /* The minute. */
212                         bp = conv_num(bp, &tm->tm_min, 0, 59);
213                         LEGAL_ALT(ALT_O);
214                         continue;
215
216                 case 'm':       /* The month. */
217                         i = 1;
218                         bp = conv_num(bp, &i, 1, 12);
219                         tm->tm_mon = i - 1;
220                         LEGAL_ALT(ALT_O);
221                         continue;
222
223                 case 'p':       /* The locale's equivalent of AM/PM. */
224                         bp = find_string(bp, &i, _ctloc(am_pm), NULL, 2);
225                         if (tm->tm_hour > 11)
226                                 return NULL;
227                         tm->tm_hour += i * 12;
228                         LEGAL_ALT(0);
229                         continue;
230
231                 case 'S':       /* The seconds. */
232                         bp = conv_num(bp, &tm->tm_sec, 0, 61);
233                         LEGAL_ALT(ALT_O);
234                         continue;
235
236                 case 'U':       /* The week of year, beginning on sunday. */
237                 case 'W':       /* The week of year, beginning on monday. */
238                         /*
239                          * XXX This is bogus, as we can not assume any valid
240                          * information present in the tm structure at this
241                          * point to calculate a real value, so just check the
242                          * range for now.
243                          */
244                          bp = conv_num(bp, &i, 0, 53);
245                          LEGAL_ALT(ALT_O);
246                          continue;
247
248                 case 'w':       /* The day of week, beginning on sunday. */
249                         bp = conv_num(bp, &tm->tm_wday, 0, 6);
250                         LEGAL_ALT(ALT_O);
251                         continue;
252
253                 case 'u':       /* The day of week, monday = 1. */
254                         bp = conv_num(bp, &i, 1, 7);
255                         tm->tm_wday = i % 7;
256                         LEGAL_ALT(ALT_O);
257                         continue;
258
259                 case 'g':       /* The year corresponding to the ISO week
260                                  * number but without the century.
261                                  */
262                         bp = conv_num(bp, &i, 0, 99);
263                         continue;
264
265                 case 'G':       /* The year corresponding to the ISO week
266                                  * number with century.
267                                  */
268                         do
269                                 bp++;
270                         while (isdigit(*bp));
271                         continue;
272
273                 case 'V':       /* The ISO 8601:1988 week number as decimal */
274                         bp = conv_num(bp, &i, 0, 53);
275                         continue;
276
277                 case 'Y':       /* The year. */
278                         i = TM_YEAR_BASE;       /* just for data sanity... */
279                         bp = conv_num(bp, &i, 0, 9999);
280                         tm->tm_year = i - TM_YEAR_BASE;
281                         LEGAL_ALT(ALT_E);
282                         continue;
283
284                 case 'y':       /* The year within 100 years of the epoch. */
285                         /* LEGAL_ALT(ALT_E | ALT_O); */
286                         bp = conv_num(bp, &i, 0, 99);
287
288                         if (split_year)
289                                 /* preserve century */
290                                 i += (tm->tm_year / 100) * 100;
291                         else {
292                                 split_year = 1;
293                                 if (i <= 68)
294                                         i = i + 2000 - TM_YEAR_BASE;
295                                 else
296                                         i = i + 1900 - TM_YEAR_BASE;
297                         }
298                         tm->tm_year = i;
299                         continue;
300
301                 case 'Z':
302                         tzset();
303                         if (strncmp((const char *)bp, gmt, 3) == 0) {
304                                 tm->tm_isdst = 0;
305 #ifdef TM_GMTOFF
306                                 tm->TM_GMTOFF = 0;
307 #endif
308 #ifdef TM_ZONE
309                                 tm->TM_ZONE = gmt;
310 #endif
311                                 bp += 3;
312                         } else {
313                                 const unsigned char *ep;
314
315                                 ep = find_string(bp, &i,
316                                                  (const char * const *)tzname,
317                                                   NULL, 2);
318                                 if (ep != NULL) {
319                                         tm->tm_isdst = i;
320 #ifdef TM_GMTOFF
321                                         tm->TM_GMTOFF = -(timezone);
322 #endif
323 #ifdef TM_ZONE
324                                         tm->TM_ZONE = tzname[i];
325 #endif
326                                 }
327                                 bp = ep;
328                         }
329                         continue;
330
331                 case 'z':
332                         /*
333                          * We recognize all ISO 8601 formats:
334                          * Z    = Zulu time/UTC
335                          * [+-]hhmm
336                          * [+-]hh:mm
337                          * [+-]hh
338                          */
339                         while (isspace(*bp))
340                                 bp++;
341
342                         switch (*bp++) {
343                         case 'Z':
344                                 tm->tm_isdst = 0;
345 #ifdef TM_GMTOFF
346                                 tm->TM_GMTOFF = 0;
347 #endif
348 #ifdef TM_ZONE
349                                 tm->TM_ZONE = utc;
350 #endif
351                                 continue;
352                         case '+':
353                                 neg = 0;
354                                 break;
355                         case '-':
356                                 neg = 1;
357                                 break;
358                         default:
359                                 return NULL;
360                         }
361                         offs = 0;
362                         for (i = 0; i < 4; ) {
363                                 if (isdigit(*bp)) {
364                                         offs = offs * 10 + (*bp++ - '0');
365                                         i++;
366                                         continue;
367                                 }
368                                 if (i == 2 && *bp == ':') {
369                                         bp++;
370                                         continue;
371                                 }
372                                 break;
373                         }
374                         switch (i) {
375                         case 2:
376                                 offs *= 100;
377                                 break;
378                         case 4:
379                                 i = offs % 100;
380                                 if (i >= 60)
381                                         return NULL;
382                                 /* Convert minutes into decimal */
383                                 offs = (offs / 100) * 100 + (i * 50) / 30;
384                                 break;
385                         default:
386                                 return NULL;
387                         }
388                         if (neg)
389                                 offs = -offs;
390                         tm->tm_isdst = 0;       /* XXX */
391 #ifdef TM_GMTOFF
392                         tm->TM_GMTOFF = offs;
393 #endif
394 #ifdef TM_ZONE
395                         tm->TM_ZONE = NULL;     /* XXX */
396 #endif
397                         continue;
398
399                 /*
400                  * Miscellaneous conversions.
401                  */
402                 case 'n':       /* Any kind of white-space. */
403                 case 't':
404                         while (isspace(*bp))
405                                 bp++;
406                         LEGAL_ALT(0);
407                         continue;
408
409
410                 default:        /* Unknown/unsupported conversion. */
411                         return NULL;
412                 }
413         }
414
415         return __DECONST(char *, bp);
416 }
417
418
419 static const u_char *
420 conv_num(const unsigned char *buf, int *dest, uint llim, uint ulim)
421 {
422         uint result = 0;
423         unsigned char ch;
424
425         /* The limit also determines the number of valid digits. */
426         uint rulim = ulim;
427
428         ch = *buf;
429         if (ch < '0' || ch > '9')
430                 return NULL;
431
432         do {
433                 result *= 10;
434                 result += ch - '0';
435                 rulim /= 10;
436                 ch = *++buf;
437         } while ((result * 10 <= ulim) && rulim && ch >= '0' && ch <= '9');
438
439         if (result < llim || result > ulim)
440                 return NULL;
441
442         *dest = result;
443         return buf;
444 }
445
446 static const u_char *
447 find_string(const u_char *bp, int *tgt, const char * const *n1,
448                 const char * const *n2, int c)
449 {
450         int i;
451         unsigned int len;
452
453         /* check full name - then abbreviated ones */
454         for (; n1 != NULL; n1 = n2, n2 = NULL) {
455                 for (i = 0; i < c; i++, n1++) {
456                         len = strlen(*n1);
457                         if (strncasecmp(*n1, (const char *)bp, len) == 0) {
458                                 *tgt = i;
459                                 return bp + len;
460                         }
461                 }
462         }
463
464         /* Nothing matched */
465         return NULL;
466 }