82a98f83f2f5fc2013eff444b43e28a399dd9439
[dragonfly.git] / lib / libc / stdio / vfscanf.c
1 /*-
2  * Copyright (c) 1990, 1993
3  *      The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Chris Torek.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *      This product includes software developed by the University of
19  *      California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  * @(#)vfscanf.c        8.1 (Berkeley) 6/4/93
37  * $FreeBSD: /repoman/r/ncvs/src/lib/libc/stdio/vfscanf.c,v 1.35 2004/01/31 23:16:09 das Exp $
38  * $DragonFly: src/lib/libc/stdio/vfscanf.c,v 1.11 2006/02/18 17:55:52 joerg Exp $
39  */
40
41 #include "namespace.h"
42 #include <ctype.h>
43 #include <inttypes.h>
44 #include <stddef.h>
45 #include <stdio.h>
46 #include <stdlib.h>
47 #include <stdarg.h>
48 #include <string.h>
49 #include <wchar.h>
50 #include <wctype.h>
51 #include "un-namespace.h"
52
53 #include "collate.h"
54 #include "libc_private.h"
55 #include "local.h"
56 #include "priv_stdio.h"
57
58 #include <locale.h>
59 #include "floatio.h"
60
61 #define BUF             513     /* Maximum length of numeric string. */
62
63 /*
64  * Flags used during conversion.
65  */
66 #define LONG            0x01    /* l: long or double */
67 #define LONGDBL         0x02    /* L: long double */
68 #define SHORT           0x04    /* h: short */
69 #define SUPPRESS        0x08    /* *: suppress assignment */
70 #define POINTER         0x10    /* p: void * (as hex) */
71 #define NOSKIP          0x20    /* [ or c: do not skip blanks */
72 #define LONGLONG        0x400   /* ll: long long (+ deprecated q: quad) */
73 #define INTMAXT         0x800   /* j: intmax_t */
74 #define PTRDIFFT        0x1000  /* t: ptrdiff_t */
75 #define SIZET           0x2000  /* z: size_t */
76 #define SHORTSHORT      0x4000  /* hh: char */
77 #define UNSIGNED        0x8000  /* %[oupxX] conversions */
78
79 /*
80  * The following are used in integral conversions only:
81  * SIGNOK, NDIGITS, PFXOK, and NZDIGITS
82  */
83 #define SIGNOK          0x40    /* +/- is (still) legal */
84 #define NDIGITS         0x80    /* no digits detected */
85 #define PFXOK           0x100   /* 0x prefix is (still) legal */
86 #define NZDIGITS        0x200   /* no zero digits detected */
87 #define HAVESIGN        0x10000 /* sign detected */
88
89 /*
90  * Conversion types.
91  */
92 #define CT_CHAR         0       /* %c conversion */
93 #define CT_CCL          1       /* %[...] conversion */
94 #define CT_STRING       2       /* %s conversion */
95 #define CT_INT          3       /* %[dioupxX] conversion */
96 #define CT_FLOAT        4       /* %[efgEFG] conversion */
97
98 static const u_char *__sccl(char *, const u_char *);
99 static int parsefloat(FILE *, char *, char *);
100
101 int __scanfdebug = 0;
102
103 __weak_reference(__vfscanf, vfscanf);
104
105 /*
106  * __vfscanf - MT-safe version
107  */
108 int
109 __vfscanf(FILE *fp, const char *fmt0, va_list ap)
110 {
111         int ret;
112
113         FLOCKFILE(fp);
114         ret = __svfscanf(fp, fmt0, ap);
115         FUNLOCKFILE(fp);
116         return (ret);
117 }
118
119 /*
120  * __svfscanf - non-MT-safe version of __vfscanf
121  */
122 int
123 __svfscanf(FILE *fp, const char *fmt0, va_list ap)
124 {
125         const u_char *fmt = (const u_char *)fmt0;
126         int c;                  /* character from format, or conversion */
127         size_t width;           /* field width, or 0 */
128         char *p;                /* points into all kinds of strings */
129         int n;                  /* handy integer */
130         int flags;              /* flags as defined above */
131         char *p0;               /* saves original value of p when necessary */
132         int nassigned;          /* number of fields assigned */
133         int nconversions;       /* number of conversions */
134         int nread;              /* number of characters consumed from fp */
135         int base;               /* base argument to conversion function */
136         char ccltab[256];       /* character class table for %[...] */
137         char buf[BUF];          /* buffer for numeric and mb conversions */
138         wchar_t *wcp;           /* handy wide character pointer */
139         size_t nconv;           /* length of multibyte sequence converted */
140         static const mbstate_t initial;
141         mbstate_t mbs;
142
143         /* `basefix' is used to avoid `if' tests in the integer scanner */
144         static short basefix[17] =
145                 { 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
146
147         _SET_ORIENTATION(fp, -1);
148
149         nassigned = 0;
150         nconversions = 0;
151         nread = 0;
152         for (;;) {
153                 c = *fmt++;
154                 if (c == 0)
155                         return (nassigned);
156                 if (isspace(c)) {
157                         while ((fp->pub._r > 0 || __srefill(fp) == 0) && isspace(*fp->pub._p))
158                                 nread++, fp->pub._r--, fp->pub._p++;
159                         continue;
160                 }
161                 if (c != '%')
162                         goto literal;
163                 width = 0;
164                 flags = 0;
165                 /*
166                  * switch on the format.  continue if done;
167                  * break once format type is derived.
168                  */
169 again:          c = *fmt++;
170                 switch (c) {
171                 case '%':
172 literal:
173                         if (fp->pub._r <= 0 && __srefill(fp))
174                                 goto input_failure;
175                         if (*fp->pub._p != c)
176                                 goto match_failure;
177                         fp->pub._r--, fp->pub._p++;
178                         nread++;
179                         continue;
180
181                 case '*':
182                         flags |= SUPPRESS;
183                         goto again;
184                 case 'j':
185                         flags |= INTMAXT;
186                         goto again;
187                 case 'l':
188                         if (flags & LONG) {
189                                 flags &= ~LONG;
190                                 flags |= LONGLONG;
191                         } else {
192                                 flags |= LONG;
193                         }
194                         goto again;
195                 case 'q':
196                         flags |= LONGLONG;      /* assume long long == quad */
197                         goto again;
198                 case 't':
199                         flags |= PTRDIFFT;
200                         goto again;
201                 case 'z':
202                         flags |= SIZET;
203                         goto again;
204                 case 'L':
205                         flags |= LONGDBL;
206                         goto again;
207                 case 'h':
208                         if (flags & SHORT) {
209                                 flags &= ~SHORT;
210                                 flags |= SHORTSHORT;
211                         } else {
212                                 flags |= SHORT;
213                         }
214                         goto again;
215
216                 case '0': case '1': case '2': case '3': case '4':
217                 case '5': case '6': case '7': case '8': case '9':
218                         width = width * 10 + c - '0';
219                         goto again;
220
221                 /*
222                  * Conversions.
223                  */
224                 case 'd':
225                         c = CT_INT;
226                         base = 10;
227                         break;
228
229                 case 'i':
230                         c = CT_INT;
231                         base = 0;
232                         break;
233
234                 case 'o':
235                         c = CT_INT;
236                         flags |= UNSIGNED;
237                         base = 8;
238                         break;
239
240                 case 'u':
241                         c = CT_INT;
242                         flags |= UNSIGNED;
243                         base = 10;
244                         break;
245
246                 case 'X':
247                 case 'x':
248                         flags |= PFXOK; /* enable 0x prefixing */
249                         c = CT_INT;
250                         flags |= UNSIGNED;
251                         base = 16;
252                         break;
253
254 #ifndef NO_FLOATING_POINT
255                 case 'A': case 'E': case 'F': case 'G':
256                 case 'a': case 'e': case 'f': case 'g':
257                         c = CT_FLOAT;
258                         break;
259 #endif
260
261                 case 'S':
262                         flags |= LONG;
263                         /* FALLTHROUGH */
264                 case 's':
265                         c = CT_STRING;
266                         break;
267
268                 case '[':
269                         fmt = __sccl(ccltab, fmt);
270                         flags |= NOSKIP;
271                         c = CT_CCL;
272                         break;
273
274                 case 'C':
275                         flags |= LONG;
276                         /* FALLTHROUGH */
277                 case 'c':
278                         flags |= NOSKIP;
279                         c = CT_CHAR;
280                         break;
281
282                 case 'p':       /* pointer format is like hex */
283                         flags |= POINTER | PFXOK;
284                         c = CT_INT;             /* assumes sizeof(uintmax_t) */
285                         flags |= UNSIGNED;      /*      >= sizeof(uintptr_t) */
286                         base = 16;
287                         break;
288
289                 case 'n':
290                         nconversions++;
291                         if (flags & SUPPRESS)   /* ??? */
292                                 continue;
293                         if (flags & SHORTSHORT)
294                                 *va_arg(ap, char *) = nread;
295                         else if (flags & SHORT)
296                                 *va_arg(ap, short *) = nread;
297                         else if (flags & LONG)
298                                 *va_arg(ap, long *) = nread;
299                         else if (flags & LONGLONG)
300                                 *va_arg(ap, long long *) = nread;
301                         else if (flags & INTMAXT)
302                                 *va_arg(ap, intmax_t *) = nread;
303                         else if (flags & SIZET)
304                                 *va_arg(ap, size_t *) = nread;
305                         else if (flags & PTRDIFFT)
306                                 *va_arg(ap, ptrdiff_t *) = nread;
307                         else
308                                 *va_arg(ap, int *) = nread;
309                         continue;
310                 default:
311                         goto match_failure;
312
313                 /*
314                  * Disgusting backwards compatibility hack.     XXX
315                  */
316                 case '\0':      /* compat */
317                         return (EOF);
318                 }
319
320                 /*
321                  * We have a conversion that requires input.
322                  */
323                 if (fp->pub._r <= 0 && __srefill(fp))
324                         goto input_failure;
325
326                 /*
327                  * Consume leading white space, except for formats
328                  * that suppress this.
329                  */
330                 if ((flags & NOSKIP) == 0) {
331                         while (isspace(*fp->pub._p)) {
332                                 nread++;
333                                 if (--fp->pub._r > 0)
334                                         fp->pub._p++;
335                                 else if (__srefill(fp))
336                                         goto input_failure;
337                         }
338                         /*
339                          * Note that there is at least one character in
340                          * the buffer, so conversions that do not set NOSKIP
341                          * ca no longer result in an input failure.
342                          */
343                 }
344
345                 /*
346                  * Do the conversion.
347                  */
348                 switch (c) {
349
350                 case CT_CHAR:
351                         /* scan arbitrary characters (sets NOSKIP) */
352                         if (width == 0)
353                                 width = 1;
354                         if (flags & LONG) {
355                                 if ((flags & SUPPRESS) == 0)
356                                         wcp = va_arg(ap, wchar_t *);
357                                 else
358                                         wcp = NULL;
359                                 n = 0;
360                                 while (width != 0) {
361                                         if (n == MB_CUR_MAX) {
362                                                 fp->pub._flags |= __SERR;
363                                                 goto input_failure;
364                                         }
365                                         buf[n++] = *fp->pub._p;
366                                         fp->pub._p++;
367                                         fp->pub._r--;
368                                         mbs = initial;
369                                         nconv = mbrtowc(wcp, buf, n, &mbs);
370                                         if (nconv == (size_t)-1) {
371                                                 fp->pub._flags |= __SERR;
372                                                 goto input_failure;
373                                         }
374                                         if (nconv == 0 && !(flags & SUPPRESS))
375                                                 *wcp = L'\0';
376                                         if (nconv != (size_t)-2) {
377                                                 nread += n;
378                                                 width--;
379                                                 if (!(flags & SUPPRESS))
380                                                         wcp++;
381                                                 n = 0;
382                                         }
383                                         if (fp->pub._r <= 0 && __srefill(fp)) {
384                                                 if (n != 0) {
385                                                         fp->pub._flags |= __SERR;
386                                                         goto input_failure;
387                                                 }
388                                                 break;
389                                         }
390                                 }
391                                 if ((flags & SUPPRESS) == 0)
392                                         nassigned++;
393                         } else if (flags & SUPPRESS) {
394                                 size_t sum = 0;
395                                 for (;;) {
396                                         if ((n = fp->pub._r) < width) {
397                                                 sum += n;
398                                                 width -= n;
399                                                 fp->pub._p += n;
400                                                 if (__srefill(fp)) {
401                                                         if (sum == 0)
402                                                             goto input_failure;
403                                                         break;
404                                                 }
405                                         } else {
406                                                 sum += width;
407                                                 fp->pub._r -= width;
408                                                 fp->pub._p += width;
409                                                 break;
410                                         }
411                                 }
412                                 nread += sum;
413                         } else {
414                                 size_t r = __fread((void *)va_arg(ap, char *),
415                                         1, width, fp);
416
417                                 if (r == 0)
418                                         goto input_failure;
419                                 nread += r;
420                                 nassigned++;
421                         }
422                         nconversions++;
423                         break;
424
425                 case CT_CCL:
426                         /* scan a (nonempty) character class (sets NOSKIP) */
427                         if (width == 0)
428                                 width = (size_t)~0;     /* `infinity' */
429                         /* take only those things in the class */
430                         if (flags & LONG) {
431                                 wchar_t twc;
432                                 int nchars;
433
434                                 if ((flags & SUPPRESS) == 0)
435                                         wcp = va_arg(ap, wchar_t *);
436                                 else
437                                         wcp = &twc;
438                                 n = 0;
439                                 nchars = 0;
440                                 while (width != 0) {
441                                         if (n == MB_CUR_MAX) {
442                                                 fp->pub._flags |= __SERR;
443                                                 goto input_failure;
444                                         }
445                                         buf[n++] = *fp->pub._p;
446                                         fp->pub._p++;
447                                         fp->pub._r--;
448                                         mbs = initial;
449                                         nconv = mbrtowc(wcp, buf, n, &mbs);
450                                         if (nconv == (size_t)-1) {
451                                                 fp->pub._flags |= __SERR;
452                                                 goto input_failure;
453                                         }
454                                         if (nconv == 0)
455                                                 *wcp = L'\0';
456                                         if (nconv != (size_t)-2) {
457                                                 if (wctob(*wcp) != EOF &&
458                                                     !ccltab[wctob(*wcp)]) {
459                                                         while (n != 0) {
460                                                                 n--;
461                                                                 __ungetc(buf[n],
462                                                                     fp);
463                                                         }
464                                                         break;
465                                                 }
466                                                 nread += n;
467                                                 width--;
468                                                 if (!(flags & SUPPRESS))
469                                                         wcp++;
470                                                 nchars++;
471                                                 n = 0;
472                                         }
473                                         if (fp->pub._r <= 0 && __srefill(fp)) {
474                                                 if (n != 0) {
475                                                         fp->pub._flags |= __SERR;
476                                                         goto input_failure;
477                                                 }
478                                                 break;
479                                         }
480                                 }
481                                 if (n != 0) {
482                                         fp->pub._flags |= __SERR;
483                                         goto input_failure;
484                                 }
485                                 n = nchars;
486                                 if (n == 0)
487                                         goto match_failure;
488                                 if (!(flags & SUPPRESS)) {
489                                         *wcp = L'\0';
490                                         nassigned++;
491                                 }
492                         } else if (flags & SUPPRESS) {
493                                 n = 0;
494                                 while (ccltab[*fp->pub._p]) {
495                                         n++, fp->pub._r--, fp->pub._p++;
496                                         if (--width == 0)
497                                                 break;
498                                         if (fp->pub._r <= 0 && __srefill(fp)) {
499                                                 if (n == 0)
500                                                         goto input_failure;
501                                                 break;
502                                         }
503                                 }
504                                 if (n == 0)
505                                         goto match_failure;
506                         } else {
507                                 p0 = p = va_arg(ap, char *);
508                                 while (ccltab[*fp->pub._p]) {
509                                         fp->pub._r--;
510                                         *p++ = *fp->pub._p++;
511                                         if (--width == 0)
512                                                 break;
513                                         if (fp->pub._r <= 0 && __srefill(fp)) {
514                                                 if (p == p0)
515                                                         goto input_failure;
516                                                 break;
517                                         }
518                                 }
519                                 n = p - p0;
520                                 if (n == 0)
521                                         goto match_failure;
522                                 *p = 0;
523                                 nassigned++;
524                         }
525                         nread += n;
526                         nconversions++;
527                         break;
528
529                 case CT_STRING:
530                         /* like CCL, but zero-length string OK, & no NOSKIP */
531                         if (width == 0)
532                                 width = (size_t)~0;
533                         if (flags & LONG) {
534                                 wchar_t twc;
535
536                                 if ((flags & SUPPRESS) == 0)
537                                         wcp = va_arg(ap, wchar_t *);
538                                 else
539                                         wcp = &twc;
540                                 n = 0;
541                                 while (!isspace(*fp->pub._p) && width != 0) {
542                                         if (n == MB_CUR_MAX) {
543                                                 fp->pub._flags |= __SERR;
544                                                 goto input_failure;
545                                         }
546                                         buf[n++] = *fp->pub._p;
547                                         fp->pub._p++;
548                                         fp->pub._r--;
549                                         mbs = initial;
550                                         nconv = mbrtowc(wcp, buf, n, &mbs);
551                                         if (nconv == (size_t)-1) {
552                                                 fp->pub._flags |= __SERR;
553                                                 goto input_failure;
554                                         }
555                                         if (nconv == 0)
556                                                 *wcp = L'\0';
557                                         if (nconv != (size_t)-2) {
558                                                 if (iswspace(*wcp)) {
559                                                         while (n != 0) {
560                                                                 n--;
561                                                                 __ungetc(buf[n],
562                                                                     fp);
563                                                         }
564                                                         break;
565                                                 }
566                                                 nread += n;
567                                                 width--;
568                                                 if (!(flags & SUPPRESS))
569                                                         wcp++;
570                                                 n = 0;
571                                         }
572                                         if (fp->pub._r <= 0 && __srefill(fp)) {
573                                                 if (n != 0) {
574                                                         fp->pub._flags |= __SERR;
575                                                         goto input_failure;
576                                                 }
577                                                 break;
578                                         }
579                                 }
580                                 if (!(flags & SUPPRESS)) {
581                                         *wcp = L'\0';
582                                         nassigned++;
583                                 }
584                         } else if (flags & SUPPRESS) {
585                                 n = 0;
586                                 while (!isspace(*fp->pub._p)) {
587                                         n++, fp->pub._r--, fp->pub._p++;
588                                         if (--width == 0)
589                                                 break;
590                                         if (fp->pub._r <= 0 && __srefill(fp))
591                                                 break;
592                                 }
593                                 nread += n;
594                         } else {
595                                 p0 = p = va_arg(ap, char *);
596                                 while (!isspace(*fp->pub._p)) {
597                                         fp->pub._r--;
598                                         *p++ = *fp->pub._p++;
599                                         if (--width == 0)
600                                                 break;
601                                         if (fp->pub._r <= 0 && __srefill(fp))
602                                                 break;
603                                 }
604                                 *p = 0;
605                                 nread += p - p0;
606                                 nassigned++;
607                         }
608                         nconversions++;
609                         continue;
610
611                 case CT_INT:
612                         /* scan an integer as if by the conversion function */
613 #ifdef hardway
614                         if (width == 0 || width > sizeof(buf) - 1)
615                                 width = sizeof(buf) - 1;
616 #else
617                         /* size_t is unsigned, hence this optimisation */
618                         if (--width > sizeof(buf) - 2)
619                                 width = sizeof(buf) - 2;
620                         width++;
621 #endif
622                         flags |= SIGNOK | NDIGITS | NZDIGITS;
623                         for (p = buf; width; width--) {
624                                 c = *fp->pub._p;
625                                 /*
626                                  * Switch on the character; `goto ok'
627                                  * if we accept it as a part of number.
628                                  */
629                                 switch (c) {
630
631                                 /*
632                                  * The digit 0 is always legal, but is
633                                  * special.  For %i conversions, if no
634                                  * digits (zero or nonzero) have been
635                                  * scanned (only signs), we will have
636                                  * base==0.  In that case, we should set
637                                  * it to 8 and enable 0x prefixing.
638                                  * Also, if we have not scanned zero digits
639                                  * before this, do not turn off prefixing
640                                  * (someone else will turn it off if we
641                                  * have scanned any nonzero digits).
642                                  */
643                                 case '0':
644                                         if (base == 0) {
645                                                 base = 8;
646                                                 flags |= PFXOK;
647                                         }
648                                         if (flags & NZDIGITS)
649                                             flags &= ~(SIGNOK|NZDIGITS|NDIGITS);
650                                         else
651                                             flags &= ~(SIGNOK|PFXOK|NDIGITS);
652                                         goto ok;
653
654                                 /* 1 through 7 always legal */
655                                 case '1': case '2': case '3':
656                                 case '4': case '5': case '6': case '7':
657                                         base = basefix[base];
658                                         flags &= ~(SIGNOK | PFXOK | NDIGITS);
659                                         goto ok;
660
661                                 /* digits 8 and 9 ok iff decimal or hex */
662                                 case '8': case '9':
663                                         base = basefix[base];
664                                         if (base <= 8)
665                                                 break;  /* not legal here */
666                                         flags &= ~(SIGNOK | PFXOK | NDIGITS);
667                                         goto ok;
668
669                                 /* letters ok iff hex */
670                                 case 'A': case 'B': case 'C':
671                                 case 'D': case 'E': case 'F':
672                                 case 'a': case 'b': case 'c':
673                                 case 'd': case 'e': case 'f':
674                                         /* no need to fix base here */
675                                         if (base <= 10)
676                                                 break;  /* not legal here */
677                                         flags &= ~(SIGNOK | PFXOK | NDIGITS);
678                                         goto ok;
679
680                                 /* sign ok only as first character */
681                                 case '+': case '-':
682                                         if (flags & SIGNOK) {
683                                                 flags &= ~SIGNOK;
684                                                 flags |= HAVESIGN;
685                                                 goto ok;
686                                         }
687                                         break;
688                                         
689                                 /*
690                                  * x ok iff flag still set & 2nd char (or
691                                  * 3rd char if we have a sign).
692                                  */
693                                 case 'x': case 'X':
694                                         if (flags & PFXOK && p ==
695                                             buf + 1 + !!(flags & HAVESIGN)) {
696                                                 base = 16;      /* if %i */
697                                                 flags &= ~PFXOK;
698                                                 goto ok;
699                                         }
700                                         break;
701                                 }
702
703                                 /*
704                                  * If we got here, c is not a legal character
705                                  * for a number.  Stop accumulating digits.
706                                  */
707                                 break;
708                 ok:
709                                 /*
710                                  * c is legal: store it and look at the next.
711                                  */
712                                 *p++ = c;
713                                 if (--fp->pub._r > 0)
714                                         fp->pub._p++;
715                                 else if (__srefill(fp))
716                                         break;          /* EOF */
717                         }
718                         /*
719                          * If we had only a sign, it is no good; push
720                          * back the sign.  If the number ends in `x',
721                          * it was [sign] '0' 'x', so push back the x
722                          * and treat it as [sign] '0'.
723                          */
724                         if (flags & NDIGITS) {
725                                 if (p > buf)
726                                         __ungetc(*(u_char *)--p, fp);
727                                 goto match_failure;
728                         }
729                         c = ((u_char *)p)[-1];
730                         if (c == 'x' || c == 'X') {
731                                 --p;
732                                 __ungetc(c, fp);
733                         }
734                         if ((flags & SUPPRESS) == 0) {
735                                 uintmax_t res;
736
737                                 *p = 0;
738                                 if ((flags & UNSIGNED) == 0)
739                                     res = strtoimax(buf, (char **)NULL, base);
740                                 else
741                                     res = strtoumax(buf, (char **)NULL, base);
742                                 if (flags & POINTER)
743                                         *va_arg(ap, void **) =
744                                                         (void *)(uintptr_t)res;
745                                 else if (flags & SHORTSHORT)
746                                         *va_arg(ap, char *) = res;
747                                 else if (flags & SHORT)
748                                         *va_arg(ap, short *) = res;
749                                 else if (flags & LONG)
750                                         *va_arg(ap, long *) = res;
751                                 else if (flags & LONGLONG)
752                                         *va_arg(ap, long long *) = res;
753                                 else if (flags & INTMAXT)
754                                         *va_arg(ap, intmax_t *) = res;
755                                 else if (flags & PTRDIFFT)
756                                         *va_arg(ap, ptrdiff_t *) = res;
757                                 else if (flags & SIZET)
758                                         *va_arg(ap, size_t *) = res;
759                                 else
760                                         *va_arg(ap, int *) = res;
761                                 nassigned++;
762                         }
763                         nread += p - buf;
764                         nconversions++;
765                         break;
766
767 #ifndef NO_FLOATING_POINT
768                 case CT_FLOAT:
769                         /* scan a floating point number as if by strtod */
770                         if (width == 0 || width > sizeof(buf) - 1)
771                                 width = sizeof(buf) - 1;
772                         if ((width = parsefloat(fp, buf, buf + width)) == 0)
773                                 goto match_failure;
774                         if ((flags & SUPPRESS) == 0) {
775                                 if (flags & LONGDBL) {
776 #if 0 /* XXX no strtold (yet) */
777                                         long double res = strtold(buf, &p);
778                                         *va_arg(ap, long double *) = res;
779 #else
780                                         double res = strtod(buf, &p);
781                                         *va_arg(ap, long double *) = res;
782 #endif
783                                 } else if (flags & LONG) {
784                                         double res = strtod(buf, &p);
785                                         *va_arg(ap, double *) = res;
786                                 } else {
787 #if 0 /* XXX no strtof (yet) */
788                                         float res = strtof(buf, &p);
789                                         *va_arg(ap, float *) = res;
790 #else
791                                         float res = strtod(buf, &p);
792                                         *va_arg(ap, float *) = res;
793 #endif
794                                 }
795                                 if (__scanfdebug && p - buf != width)
796                                         abort();
797                                 nassigned++;
798                         }
799                         nread += width;
800                         nconversions++;
801                         break;
802 #endif /* !NO_FLOATING_POINT */
803                 }
804         }
805 input_failure:
806         return (nconversions != 0 ? nassigned : EOF);
807 match_failure:
808         return (nassigned);
809 }
810
811 /*
812  * Fill in the given table from the scanset at the given format
813  * (just after `[').  Return a pointer to the character past the
814  * closing `]'.  The table has a 1 wherever characters should be
815  * considered part of the scanset.
816  */
817 static const u_char *
818 __sccl(char *tab, const u_char *fmt)
819 {
820         int c, n, v, i;
821
822         /* first `clear' the whole table */
823         c = *fmt++;             /* first char hat => negated scanset */
824         if (c == '^') {
825                 v = 1;          /* default => accept */
826                 c = *fmt++;     /* get new first char */
827         } else
828                 v = 0;          /* default => reject */
829
830         /* XXX: Will not work if sizeof(tab*) > sizeof(char) */
831         memset(tab, v, 256);
832
833         if (c == 0)
834                 return (fmt - 1);/* format ended before closing ] */
835
836         /*
837          * Now set the entries corresponding to the actual scanset
838          * to the opposite of the above.
839          *
840          * The first character may be ']' (or '-') without being special;
841          * the last character may be '-'.
842          */
843         v = 1 - v;
844         for (;;) {
845                 tab[c] = v;             /* take character c */
846 doswitch:
847                 n = *fmt++;             /* and examine the next */
848                 switch (n) {
849
850                 case 0:                 /* format ended too soon */
851                         return (fmt - 1);
852
853                 case '-':
854                         /*
855                          * A scanset of the form
856                          *      [01+-]
857                          * is defined as `the digit 0, the digit 1,
858                          * the character +, the character -', but
859                          * the effect of a scanset such as
860                          *      [a-zA-Z0-9]
861                          * is implementation defined.  The V7 Unix
862                          * scanf treats `a-z' as `the letters a through
863                          * z', but treats `a-a' as `the letter a, the
864                          * character -, and the letter a'.
865                          *
866                          * For compatibility, the `-' is not considerd
867                          * to define a range if the character following
868                          * it is either a close bracket (required by ANSI)
869                          * or is not numerically greater than the character
870                          * we just stored in the table (c).
871                          */
872                         n = *fmt;
873                         if (n == ']'
874                             || (__collate_load_error ? n < c :
875                                 __collate_range_cmp (n, c) < 0
876                                )
877                            ) {
878                                 c = '-';
879                                 break;  /* resume the for(;;) */
880                         }
881                         fmt++;
882                         /* fill in the range */
883                         if (__collate_load_error) {
884                                 do {
885                                         tab[++c] = v;
886                                 } while (c < n);
887                         } else {
888                                 for (i = 0; i < 256; i ++)
889                                         if (   __collate_range_cmp (c, i) < 0
890                                             && __collate_range_cmp (i, n) <= 0
891                                            )
892                                                 tab[i] = v;
893                         }
894 #if 1   /* XXX another disgusting compatibility hack */
895                         c = n;
896                         /*
897                          * Alas, the V7 Unix scanf also treats formats
898                          * such as [a-c-e] as `the letters a through e'.
899                          * This too is permitted by the standard....
900                          */
901                         goto doswitch;
902 #else
903                         c = *fmt++;
904                         if (c == 0)
905                                 return (fmt - 1);
906                         if (c == ']')
907                                 return (fmt);
908 #endif
909                         break;
910
911                 case ']':               /* end of scanset */
912                         return (fmt);
913
914                 default:                /* just another character */
915                         c = n;
916                         break;
917                 }
918         }
919         /* NOTREACHED */
920 }
921
922 #ifndef NO_FLOATING_POINT
923 static int
924 parsefloat(FILE *fp, char *buf, char *end)
925 {
926         char *commit, *p;
927         int infnanpos = 0;
928         enum {
929                 S_START, S_GOTSIGN, S_INF, S_NAN, S_MAYBEHEX,
930                 S_DIGITS, S_FRAC, S_EXP, S_EXPDIGITS
931         } state = S_START;
932         unsigned char c;
933         char decpt = *localeconv()->decimal_point;
934         _Bool gotmantdig = 0, ishex = 0;
935
936         /*
937          * We set commit = p whenever the string we have read so far
938          * constitutes a valid representation of a floating point
939          * number by itself.  At some point, the parse will complete
940          * or fail, and we will ungetc() back to the last commit point.
941          * To ensure that the file offset gets updated properly, it is
942          * always necessary to read at least one character that doesn't
943          * match; thus, we can't short-circuit "infinity" or "nan(...)".
944          */
945         commit = buf - 1;
946         for (p = buf; p < end; ) {
947                 c = *fp->pub._p;
948 reswitch:
949                 switch (state) {
950                 case S_START:
951                         state = S_GOTSIGN;
952                         if (c == '-' || c == '+')
953                                 break;
954                         else
955                                 goto reswitch;
956                 case S_GOTSIGN:
957                         switch (c) {
958                         case '0':
959                                 state = S_MAYBEHEX;
960                                 commit = p;
961                                 break;
962                         case 'I':
963                         case 'i':
964                                 state = S_INF;
965                                 break;
966                         case 'N':
967                         case 'n':
968                                 state = S_NAN;
969                                 break;
970                         default:
971                                 state = S_DIGITS;
972                                 goto reswitch;
973                         }
974                         break;
975                 case S_INF:
976                         if (infnanpos > 6 ||
977                             (c != "nfinity"[infnanpos] &&
978                              c != "NFINITY"[infnanpos]))
979                                 goto parsedone;
980                         if (infnanpos == 1 || infnanpos == 6)
981                                 commit = p;     /* inf or infinity */
982                         infnanpos++;
983                         break;
984                 case S_NAN:
985                         switch (infnanpos) {
986                         case -1:        /* XXX kludge to deal with nan(...) */
987                                 goto parsedone;
988                         case 0:
989                                 if (c != 'A' && c != 'a')
990                                         goto parsedone;
991                                 break;
992                         case 1:
993                                 if (c != 'N' && c != 'n')
994                                         goto parsedone;
995                                 else
996                                         commit = p;
997                                 break;
998                         case 2:
999                                 if (c != '(')
1000                                         goto parsedone;
1001                                 break;
1002                         default:
1003                                 if (c == ')') {
1004                                         commit = p;
1005                                         infnanpos = -2;
1006                                 } else if (!isalnum(c) && c != '_')
1007                                         goto parsedone;
1008                                 break;
1009                         }
1010                         infnanpos++;
1011                         break;
1012                 case S_MAYBEHEX:
1013                         state = S_DIGITS;
1014                         if (c == 'X' || c == 'x') {
1015                                 ishex = 1;
1016                                 break;
1017                         } else {        /* we saw a '0', but no 'x' */
1018                                 gotmantdig = 1;
1019                                 goto reswitch;
1020                         }
1021                 case S_DIGITS:
1022                         if ((ishex && isxdigit(c)) || isdigit(c))
1023                                 gotmantdig = 1;
1024                         else {
1025                                 state = S_FRAC;
1026                                 if (c != decpt)
1027                                         goto reswitch;
1028                         }
1029                         if (gotmantdig)
1030                                 commit = p;
1031                         break;
1032                 case S_FRAC:
1033                         if (((c == 'E' || c == 'e') && !ishex) ||
1034                             ((c == 'P' || c == 'p') && ishex)) {
1035                                 if (!gotmantdig)
1036                                         goto parsedone;
1037                                 else
1038                                         state = S_EXP;
1039                         } else if ((ishex && isxdigit(c)) || isdigit(c)) {
1040                                 commit = p;
1041                                 gotmantdig = 1;
1042                         } else
1043                                 goto parsedone;
1044                         break;
1045                 case S_EXP:
1046                         state = S_EXPDIGITS;
1047                         if (c == '-' || c == '+')
1048                                 break;
1049                         else
1050                                 goto reswitch;
1051                 case S_EXPDIGITS:
1052                         if (isdigit(c))
1053                                 commit = p;
1054                         else
1055                                 goto parsedone;
1056                         break;
1057                 default:
1058                         abort();
1059                 }
1060                 *p++ = c;
1061                 if (--fp->pub._r > 0)
1062                         fp->pub._p++;
1063                 else if (__srefill(fp))
1064                         break;  /* EOF */
1065         }
1066
1067 parsedone:
1068         while (commit < --p)
1069                 __ungetc(*(u_char *)p, fp);
1070         *++commit = '\0';
1071         return (commit - buf);
1072 }
1073 #endif