Merge branch 'vendor/MDOCML'
[dragonfly.git] / contrib / diffutils / lib / printf-parse.c
1 /* Formatted output to strings.
2    Copyright (C) 1999-2000, 2002-2003, 2006-2013 Free Software Foundation, Inc.
3
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation; either version 3, or (at your option)
7    any later version.
8
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13
14    You should have received a copy of the GNU General Public License along
15    with this program; if not, see <http://www.gnu.org/licenses/>.  */
16
17 /* This file can be parametrized with the following macros:
18      CHAR_T             The element type of the format string.
19      CHAR_T_ONLY_ASCII  Set to 1 to enable verification that all characters
20                         in the format string are ASCII.
21      DIRECTIVE          Structure denoting a format directive.
22                         Depends on CHAR_T.
23      DIRECTIVES         Structure denoting the set of format directives of a
24                         format string.  Depends on CHAR_T.
25      PRINTF_PARSE       Function that parses a format string.
26                         Depends on CHAR_T.
27      STATIC             Set to 'static' to declare the function static.
28      ENABLE_UNISTDIO    Set to 1 to enable the unistdio extensions.  */
29
30 #ifndef PRINTF_PARSE
31 # include <config.h>
32 #endif
33
34 /* Specification.  */
35 #ifndef PRINTF_PARSE
36 # include "printf-parse.h"
37 #endif
38
39 /* Default parameters.  */
40 #ifndef PRINTF_PARSE
41 # define PRINTF_PARSE printf_parse
42 # define CHAR_T char
43 # define DIRECTIVE char_directive
44 # define DIRECTIVES char_directives
45 #endif
46
47 /* Get size_t, NULL.  */
48 #include <stddef.h>
49
50 /* Get intmax_t.  */
51 #if defined IN_LIBINTL || defined IN_LIBASPRINTF
52 # if HAVE_STDINT_H_WITH_UINTMAX
53 #  include <stdint.h>
54 # endif
55 # if HAVE_INTTYPES_H_WITH_UINTMAX
56 #  include <inttypes.h>
57 # endif
58 #else
59 # include <stdint.h>
60 #endif
61
62 /* malloc(), realloc(), free().  */
63 #include <stdlib.h>
64
65 /* memcpy().  */
66 #include <string.h>
67
68 /* errno.  */
69 #include <errno.h>
70
71 /* Checked size_t computations.  */
72 #include "xsize.h"
73
74 #if CHAR_T_ONLY_ASCII
75 /* c_isascii().  */
76 # include "c-ctype.h"
77 #endif
78
79 #ifdef STATIC
80 STATIC
81 #endif
82 int
83 PRINTF_PARSE (const CHAR_T *format, DIRECTIVES *d, arguments *a)
84 {
85   const CHAR_T *cp = format;    /* pointer into format */
86   size_t arg_posn = 0;          /* number of regular arguments consumed */
87   size_t d_allocated;           /* allocated elements of d->dir */
88   size_t a_allocated;           /* allocated elements of a->arg */
89   size_t max_width_length = 0;
90   size_t max_precision_length = 0;
91
92   d->count = 0;
93   d_allocated = N_DIRECT_ALLOC_DIRECTIVES;
94   d->dir = d->direct_alloc_dir;
95
96   a->count = 0;
97   a_allocated = N_DIRECT_ALLOC_ARGUMENTS;
98   a->arg = a->direct_alloc_arg;
99
100 #define REGISTER_ARG(_index_,_type_) \
101   {                                                                     \
102     size_t n = (_index_);                                               \
103     if (n >= a_allocated)                                               \
104       {                                                                 \
105         size_t memory_size;                                             \
106         argument *memory;                                               \
107                                                                         \
108         a_allocated = xtimes (a_allocated, 2);                          \
109         if (a_allocated <= n)                                           \
110           a_allocated = xsum (n, 1);                                    \
111         memory_size = xtimes (a_allocated, sizeof (argument));          \
112         if (size_overflow_p (memory_size))                              \
113           /* Overflow, would lead to out of memory.  */                 \
114           goto out_of_memory;                                           \
115         memory = (argument *) (a->arg != a->direct_alloc_arg            \
116                                ? realloc (a->arg, memory_size)          \
117                                : malloc (memory_size));                 \
118         if (memory == NULL)                                             \
119           /* Out of memory.  */                                         \
120           goto out_of_memory;                                           \
121         if (a->arg == a->direct_alloc_arg)                              \
122           memcpy (memory, a->arg, a->count * sizeof (argument));        \
123         a->arg = memory;                                                \
124       }                                                                 \
125     while (a->count <= n)                                               \
126       a->arg[a->count++].type = TYPE_NONE;                              \
127     if (a->arg[n].type == TYPE_NONE)                                    \
128       a->arg[n].type = (_type_);                                        \
129     else if (a->arg[n].type != (_type_))                                \
130       /* Ambiguous type for positional argument.  */                    \
131       goto error;                                                       \
132   }
133
134   while (*cp != '\0')
135     {
136       CHAR_T c = *cp++;
137       if (c == '%')
138         {
139           size_t arg_index = ARG_NONE;
140           DIRECTIVE *dp = &d->dir[d->count]; /* pointer to next directive */
141
142           /* Initialize the next directive.  */
143           dp->dir_start = cp - 1;
144           dp->flags = 0;
145           dp->width_start = NULL;
146           dp->width_end = NULL;
147           dp->width_arg_index = ARG_NONE;
148           dp->precision_start = NULL;
149           dp->precision_end = NULL;
150           dp->precision_arg_index = ARG_NONE;
151           dp->arg_index = ARG_NONE;
152
153           /* Test for positional argument.  */
154           if (*cp >= '0' && *cp <= '9')
155             {
156               const CHAR_T *np;
157
158               for (np = cp; *np >= '0' && *np <= '9'; np++)
159                 ;
160               if (*np == '$')
161                 {
162                   size_t n = 0;
163
164                   for (np = cp; *np >= '0' && *np <= '9'; np++)
165                     n = xsum (xtimes (n, 10), *np - '0');
166                   if (n == 0)
167                     /* Positional argument 0.  */
168                     goto error;
169                   if (size_overflow_p (n))
170                     /* n too large, would lead to out of memory later.  */
171                     goto error;
172                   arg_index = n - 1;
173                   cp = np + 1;
174                 }
175             }
176
177           /* Read the flags.  */
178           for (;;)
179             {
180               if (*cp == '\'')
181                 {
182                   dp->flags |= FLAG_GROUP;
183                   cp++;
184                 }
185               else if (*cp == '-')
186                 {
187                   dp->flags |= FLAG_LEFT;
188                   cp++;
189                 }
190               else if (*cp == '+')
191                 {
192                   dp->flags |= FLAG_SHOWSIGN;
193                   cp++;
194                 }
195               else if (*cp == ' ')
196                 {
197                   dp->flags |= FLAG_SPACE;
198                   cp++;
199                 }
200               else if (*cp == '#')
201                 {
202                   dp->flags |= FLAG_ALT;
203                   cp++;
204                 }
205               else if (*cp == '0')
206                 {
207                   dp->flags |= FLAG_ZERO;
208                   cp++;
209                 }
210 #if __GLIBC__ >= 2 && !defined __UCLIBC__
211               else if (*cp == 'I')
212                 {
213                   dp->flags |= FLAG_LOCALIZED;
214                   cp++;
215                 }
216 #endif
217               else
218                 break;
219             }
220
221           /* Parse the field width.  */
222           if (*cp == '*')
223             {
224               dp->width_start = cp;
225               cp++;
226               dp->width_end = cp;
227               if (max_width_length < 1)
228                 max_width_length = 1;
229
230               /* Test for positional argument.  */
231               if (*cp >= '0' && *cp <= '9')
232                 {
233                   const CHAR_T *np;
234
235                   for (np = cp; *np >= '0' && *np <= '9'; np++)
236                     ;
237                   if (*np == '$')
238                     {
239                       size_t n = 0;
240
241                       for (np = cp; *np >= '0' && *np <= '9'; np++)
242                         n = xsum (xtimes (n, 10), *np - '0');
243                       if (n == 0)
244                         /* Positional argument 0.  */
245                         goto error;
246                       if (size_overflow_p (n))
247                         /* n too large, would lead to out of memory later.  */
248                         goto error;
249                       dp->width_arg_index = n - 1;
250                       cp = np + 1;
251                     }
252                 }
253               if (dp->width_arg_index == ARG_NONE)
254                 {
255                   dp->width_arg_index = arg_posn++;
256                   if (dp->width_arg_index == ARG_NONE)
257                     /* arg_posn wrapped around.  */
258                     goto error;
259                 }
260               REGISTER_ARG (dp->width_arg_index, TYPE_INT);
261             }
262           else if (*cp >= '0' && *cp <= '9')
263             {
264               size_t width_length;
265
266               dp->width_start = cp;
267               for (; *cp >= '0' && *cp <= '9'; cp++)
268                 ;
269               dp->width_end = cp;
270               width_length = dp->width_end - dp->width_start;
271               if (max_width_length < width_length)
272                 max_width_length = width_length;
273             }
274
275           /* Parse the precision.  */
276           if (*cp == '.')
277             {
278               cp++;
279               if (*cp == '*')
280                 {
281                   dp->precision_start = cp - 1;
282                   cp++;
283                   dp->precision_end = cp;
284                   if (max_precision_length < 2)
285                     max_precision_length = 2;
286
287                   /* Test for positional argument.  */
288                   if (*cp >= '0' && *cp <= '9')
289                     {
290                       const CHAR_T *np;
291
292                       for (np = cp; *np >= '0' && *np <= '9'; np++)
293                         ;
294                       if (*np == '$')
295                         {
296                           size_t n = 0;
297
298                           for (np = cp; *np >= '0' && *np <= '9'; np++)
299                             n = xsum (xtimes (n, 10), *np - '0');
300                           if (n == 0)
301                             /* Positional argument 0.  */
302                             goto error;
303                           if (size_overflow_p (n))
304                             /* n too large, would lead to out of memory
305                                later.  */
306                             goto error;
307                           dp->precision_arg_index = n - 1;
308                           cp = np + 1;
309                         }
310                     }
311                   if (dp->precision_arg_index == ARG_NONE)
312                     {
313                       dp->precision_arg_index = arg_posn++;
314                       if (dp->precision_arg_index == ARG_NONE)
315                         /* arg_posn wrapped around.  */
316                         goto error;
317                     }
318                   REGISTER_ARG (dp->precision_arg_index, TYPE_INT);
319                 }
320               else
321                 {
322                   size_t precision_length;
323
324                   dp->precision_start = cp - 1;
325                   for (; *cp >= '0' && *cp <= '9'; cp++)
326                     ;
327                   dp->precision_end = cp;
328                   precision_length = dp->precision_end - dp->precision_start;
329                   if (max_precision_length < precision_length)
330                     max_precision_length = precision_length;
331                 }
332             }
333
334           {
335             arg_type type;
336
337             /* Parse argument type/size specifiers.  */
338             {
339               int flags = 0;
340
341               for (;;)
342                 {
343                   if (*cp == 'h')
344                     {
345                       flags |= (1 << (flags & 1));
346                       cp++;
347                     }
348                   else if (*cp == 'L')
349                     {
350                       flags |= 4;
351                       cp++;
352                     }
353                   else if (*cp == 'l')
354                     {
355                       flags += 8;
356                       cp++;
357                     }
358                   else if (*cp == 'j')
359                     {
360                       if (sizeof (intmax_t) > sizeof (long))
361                         {
362                           /* intmax_t = long long */
363                           flags += 16;
364                         }
365                       else if (sizeof (intmax_t) > sizeof (int))
366                         {
367                           /* intmax_t = long */
368                           flags += 8;
369                         }
370                       cp++;
371                     }
372                   else if (*cp == 'z' || *cp == 'Z')
373                     {
374                       /* 'z' is standardized in ISO C 99, but glibc uses 'Z'
375                          because the warning facility in gcc-2.95.2 understands
376                          only 'Z' (see gcc-2.95.2/gcc/c-common.c:1784).  */
377                       if (sizeof (size_t) > sizeof (long))
378                         {
379                           /* size_t = long long */
380                           flags += 16;
381                         }
382                       else if (sizeof (size_t) > sizeof (int))
383                         {
384                           /* size_t = long */
385                           flags += 8;
386                         }
387                       cp++;
388                     }
389                   else if (*cp == 't')
390                     {
391                       if (sizeof (ptrdiff_t) > sizeof (long))
392                         {
393                           /* ptrdiff_t = long long */
394                           flags += 16;
395                         }
396                       else if (sizeof (ptrdiff_t) > sizeof (int))
397                         {
398                           /* ptrdiff_t = long */
399                           flags += 8;
400                         }
401                       cp++;
402                     }
403 #if defined __APPLE__ && defined __MACH__
404                   /* On Mac OS X 10.3, PRIdMAX is defined as "qd".
405                      We cannot change it to "lld" because PRIdMAX must also
406                      be understood by the system's printf routines.  */
407                   else if (*cp == 'q')
408                     {
409                       if (64 / 8 > sizeof (long))
410                         {
411                           /* int64_t = long long */
412                           flags += 16;
413                         }
414                       else
415                         {
416                           /* int64_t = long */
417                           flags += 8;
418                         }
419                       cp++;
420                     }
421 #endif
422 #if (defined _WIN32 || defined __WIN32__) && ! defined __CYGWIN__
423                   /* On native Windows, PRIdMAX is defined as "I64d".
424                      We cannot change it to "lld" because PRIdMAX must also
425                      be understood by the system's printf routines.  */
426                   else if (*cp == 'I' && cp[1] == '6' && cp[2] == '4')
427                     {
428                       if (64 / 8 > sizeof (long))
429                         {
430                           /* __int64 = long long */
431                           flags += 16;
432                         }
433                       else
434                         {
435                           /* __int64 = long */
436                           flags += 8;
437                         }
438                       cp += 3;
439                     }
440 #endif
441                   else
442                     break;
443                 }
444
445               /* Read the conversion character.  */
446               c = *cp++;
447               switch (c)
448                 {
449                 case 'd': case 'i':
450 #if HAVE_LONG_LONG_INT
451                   /* If 'long long' exists and is larger than 'long':  */
452                   if (flags >= 16 || (flags & 4))
453                     type = TYPE_LONGLONGINT;
454                   else
455 #endif
456                   /* If 'long long' exists and is the same as 'long', we parse
457                      "lld" into TYPE_LONGINT.  */
458                   if (flags >= 8)
459                     type = TYPE_LONGINT;
460                   else if (flags & 2)
461                     type = TYPE_SCHAR;
462                   else if (flags & 1)
463                     type = TYPE_SHORT;
464                   else
465                     type = TYPE_INT;
466                   break;
467                 case 'o': case 'u': case 'x': case 'X':
468 #if HAVE_LONG_LONG_INT
469                   /* If 'long long' exists and is larger than 'long':  */
470                   if (flags >= 16 || (flags & 4))
471                     type = TYPE_ULONGLONGINT;
472                   else
473 #endif
474                   /* If 'unsigned long long' exists and is the same as
475                      'unsigned long', we parse "llu" into TYPE_ULONGINT.  */
476                   if (flags >= 8)
477                     type = TYPE_ULONGINT;
478                   else if (flags & 2)
479                     type = TYPE_UCHAR;
480                   else if (flags & 1)
481                     type = TYPE_USHORT;
482                   else
483                     type = TYPE_UINT;
484                   break;
485                 case 'f': case 'F': case 'e': case 'E': case 'g': case 'G':
486                 case 'a': case 'A':
487                   if (flags >= 16 || (flags & 4))
488                     type = TYPE_LONGDOUBLE;
489                   else
490                     type = TYPE_DOUBLE;
491                   break;
492                 case 'c':
493                   if (flags >= 8)
494 #if HAVE_WINT_T
495                     type = TYPE_WIDE_CHAR;
496 #else
497                     goto error;
498 #endif
499                   else
500                     type = TYPE_CHAR;
501                   break;
502 #if HAVE_WINT_T
503                 case 'C':
504                   type = TYPE_WIDE_CHAR;
505                   c = 'c';
506                   break;
507 #endif
508                 case 's':
509                   if (flags >= 8)
510 #if HAVE_WCHAR_T
511                     type = TYPE_WIDE_STRING;
512 #else
513                     goto error;
514 #endif
515                   else
516                     type = TYPE_STRING;
517                   break;
518 #if HAVE_WCHAR_T
519                 case 'S':
520                   type = TYPE_WIDE_STRING;
521                   c = 's';
522                   break;
523 #endif
524                 case 'p':
525                   type = TYPE_POINTER;
526                   break;
527                 case 'n':
528 #if HAVE_LONG_LONG_INT
529                   /* If 'long long' exists and is larger than 'long':  */
530                   if (flags >= 16 || (flags & 4))
531                     type = TYPE_COUNT_LONGLONGINT_POINTER;
532                   else
533 #endif
534                   /* If 'long long' exists and is the same as 'long', we parse
535                      "lln" into TYPE_COUNT_LONGINT_POINTER.  */
536                   if (flags >= 8)
537                     type = TYPE_COUNT_LONGINT_POINTER;
538                   else if (flags & 2)
539                     type = TYPE_COUNT_SCHAR_POINTER;
540                   else if (flags & 1)
541                     type = TYPE_COUNT_SHORT_POINTER;
542                   else
543                     type = TYPE_COUNT_INT_POINTER;
544                   break;
545 #if ENABLE_UNISTDIO
546                 /* The unistdio extensions.  */
547                 case 'U':
548                   if (flags >= 16)
549                     type = TYPE_U32_STRING;
550                   else if (flags >= 8)
551                     type = TYPE_U16_STRING;
552                   else
553                     type = TYPE_U8_STRING;
554                   break;
555 #endif
556                 case '%':
557                   type = TYPE_NONE;
558                   break;
559                 default:
560                   /* Unknown conversion character.  */
561                   goto error;
562                 }
563             }
564
565             if (type != TYPE_NONE)
566               {
567                 dp->arg_index = arg_index;
568                 if (dp->arg_index == ARG_NONE)
569                   {
570                     dp->arg_index = arg_posn++;
571                     if (dp->arg_index == ARG_NONE)
572                       /* arg_posn wrapped around.  */
573                       goto error;
574                   }
575                 REGISTER_ARG (dp->arg_index, type);
576               }
577             dp->conversion = c;
578             dp->dir_end = cp;
579           }
580
581           d->count++;
582           if (d->count >= d_allocated)
583             {
584               size_t memory_size;
585               DIRECTIVE *memory;
586
587               d_allocated = xtimes (d_allocated, 2);
588               memory_size = xtimes (d_allocated, sizeof (DIRECTIVE));
589               if (size_overflow_p (memory_size))
590                 /* Overflow, would lead to out of memory.  */
591                 goto out_of_memory;
592               memory = (DIRECTIVE *) (d->dir != d->direct_alloc_dir
593                                       ? realloc (d->dir, memory_size)
594                                       : malloc (memory_size));
595               if (memory == NULL)
596                 /* Out of memory.  */
597                 goto out_of_memory;
598               if (d->dir == d->direct_alloc_dir)
599                 memcpy (memory, d->dir, d->count * sizeof (DIRECTIVE));
600               d->dir = memory;
601             }
602         }
603 #if CHAR_T_ONLY_ASCII
604       else if (!c_isascii (c))
605         {
606           /* Non-ASCII character.  Not supported.  */
607           goto error;
608         }
609 #endif
610     }
611   d->dir[d->count].dir_start = cp;
612
613   d->max_width_length = max_width_length;
614   d->max_precision_length = max_precision_length;
615   return 0;
616
617 error:
618   if (a->arg != a->direct_alloc_arg)
619     free (a->arg);
620   if (d->dir != d->direct_alloc_dir)
621     free (d->dir);
622   errno = EINVAL;
623   return -1;
624
625 out_of_memory:
626   if (a->arg != a->direct_alloc_arg)
627     free (a->arg);
628   if (d->dir != d->direct_alloc_dir)
629     free (d->dir);
630   errno = ENOMEM;
631   return -1;
632 }
633
634 #undef PRINTF_PARSE
635 #undef DIRECTIVES
636 #undef DIRECTIVE
637 #undef CHAR_T_ONLY_ASCII
638 #undef CHAR_T