bootstrap: Remove helpers for upgrading directly from pre 4.4
[dragonfly.git] / usr.bin / localedef / parser.y
1 %{
2 /*
3  * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
4  * Copyright 2015 John Marino <draco@marino.st>
5  *
6  * This source code is derived from the illumos localedef command, and
7  * provided under BSD-style license terms by Nexenta Systems, Inc.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  *
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31
32 /*
33  * POSIX localedef grammar.
34  */
35
36 #include <wchar.h>
37 #include <stdio.h>
38 #include <limits.h>
39 #include "localedef.h"
40
41 %}
42 %union {
43         int             num;
44         wchar_t         wc;
45         char            *token;
46         collsym_t       *collsym;
47         collelem_t      *collelem;
48 }
49
50 %token          T_CODE_SET
51 %token          T_MB_CUR_MAX
52 %token          T_MB_CUR_MIN
53 %token          T_COM_CHAR
54 %token          T_ESC_CHAR
55 %token          T_LT
56 %token          T_GT
57 %token          T_NL
58 %token          T_SEMI
59 %token          T_COMMA
60 %token          T_ELLIPSIS
61 %token          T_RPAREN
62 %token          T_LPAREN
63 %token          T_QUOTE
64 %token          T_NULL
65 %token          T_WS
66 %token          T_END
67 %token          T_COPY
68 %token          T_CHARMAP
69 %token          T_WIDTH
70 %token          T_CTYPE
71 %token          T_ISUPPER
72 %token          T_ISLOWER
73 %token          T_ISALPHA
74 %token          T_ISDIGIT
75 %token          T_ISPUNCT
76 %token          T_ISXDIGIT
77 %token          T_ISSPACE
78 %token          T_ISPRINT
79 %token          T_ISGRAPH
80 %token          T_ISBLANK
81 %token          T_ISCNTRL
82 %token          T_ISALNUM
83 %token          T_ISSPECIAL
84 %token          T_ISPHONOGRAM
85 %token          T_ISIDEOGRAM
86 %token          T_ISENGLISH
87 %token          T_ISNUMBER
88 %token          T_TOUPPER
89 %token          T_TOLOWER
90 %token          T_COLLATE
91 %token          T_COLLATING_SYMBOL
92 %token          T_COLLATING_ELEMENT
93 %token          T_ORDER_START
94 %token          T_ORDER_END
95 %token          T_FORWARD
96 %token          T_BACKWARD
97 %token          T_POSITION
98 %token          T_FROM
99 %token          T_UNDEFINED
100 %token          T_IGNORE
101 %token          T_MESSAGES
102 %token          T_YESSTR
103 %token          T_NOSTR
104 %token          T_YESEXPR
105 %token          T_NOEXPR
106 %token          T_MONETARY
107 %token          T_INT_CURR_SYMBOL
108 %token          T_CURRENCY_SYMBOL
109 %token          T_MON_DECIMAL_POINT
110 %token          T_MON_THOUSANDS_SEP
111 %token          T_POSITIVE_SIGN
112 %token          T_NEGATIVE_SIGN
113 %token          T_MON_GROUPING
114 %token          T_INT_FRAC_DIGITS
115 %token          T_FRAC_DIGITS
116 %token          T_P_CS_PRECEDES
117 %token          T_P_SEP_BY_SPACE
118 %token          T_N_CS_PRECEDES
119 %token          T_N_SEP_BY_SPACE
120 %token          T_P_SIGN_POSN
121 %token          T_N_SIGN_POSN
122 %token          T_INT_P_CS_PRECEDES
123 %token          T_INT_N_CS_PRECEDES
124 %token          T_INT_P_SEP_BY_SPACE
125 %token          T_INT_N_SEP_BY_SPACE
126 %token          T_INT_P_SIGN_POSN
127 %token          T_INT_N_SIGN_POSN
128 %token          T_NUMERIC
129 %token          T_DECIMAL_POINT
130 %token          T_THOUSANDS_SEP
131 %token          T_GROUPING
132 %token          T_TIME
133 %token          T_ABDAY
134 %token          T_DAY
135 %token          T_ABMON
136 %token          T_MON
137 %token          T_ERA
138 %token          T_ERA_D_FMT
139 %token          T_ERA_T_FMT
140 %token          T_ERA_D_T_FMT
141 %token          T_ALT_DIGITS
142 %token          T_D_T_FMT
143 %token          T_D_FMT
144 %token          T_T_FMT
145 %token          T_AM_PM
146 %token          T_T_FMT_AMPM
147 %token          T_DATE_FMT
148 %token  <wc>            T_CHAR
149 %token  <token>         T_NAME
150 %token  <num>           T_NUMBER
151 %token  <token>         T_SYMBOL
152 %token  <collsym>       T_COLLSYM
153 %token  <collelem>      T_COLLELEM
154
155 %%
156
157 localedef       : setting_list categories
158                 | categories
159                 ;
160
161 string          : T_QUOTE charlist T_QUOTE
162                 | T_QUOTE T_QUOTE
163                 ;
164
165 charlist        : charlist T_CHAR
166                 {
167                         add_wcs($2);
168                 }
169                 | T_CHAR
170                 {
171                         add_wcs($1);
172                 }
173                 ;
174
175 setting_list    : setting_list setting
176                 | setting
177                 ;
178
179
180 setting         : T_COM_CHAR T_CHAR T_NL
181                 {
182                         com_char = $2;
183                 }
184                 | T_ESC_CHAR T_CHAR T_NL
185                 {
186                         esc_char = $2;
187                 }
188                 | T_MB_CUR_MAX T_NUMBER T_NL
189                 {
190                         mb_cur_max = $2;
191                 }
192                 | T_MB_CUR_MIN T_NUMBER T_NL
193                 {
194                         mb_cur_min = $2;
195                 }
196                 | T_CODE_SET string T_NL
197                 {
198                         wchar_t *w = get_wcs();
199                         set_wide_encoding(to_mb_string(w));
200                         free(w);
201                 }
202                 | T_CODE_SET T_NAME T_NL
203                 {
204                         set_wide_encoding($2);
205                 }
206                 ;
207
208 copycat         : T_COPY T_NAME T_NL
209                 {
210                         copy_category($2);
211                 }
212                 | T_COPY string T_NL
213                 {
214                         wchar_t *w = get_wcs();
215                         copy_category(to_mb_string(w));
216                         free(w);
217                 }
218                 ;
219
220 categories      : categories category
221                 | category
222                 ;
223
224
225 category        : charmap
226                 | messages
227                 | monetary
228                 | ctype
229                 | collate
230                 | numeric
231                 | time
232                 ;
233
234
235 charmap         : T_CHARMAP T_NL charmap_list T_END T_CHARMAP T_NL
236                 | T_WIDTH T_NL width_list T_END T_WIDTH T_NL
237                 ;
238
239
240 charmap_list    : charmap_list charmap_entry
241                 | charmap_entry
242                 ;
243
244
245 charmap_entry   : T_SYMBOL T_CHAR
246                 {
247                         add_charmap($1, $2);
248                         scan_to_eol();
249                 }
250                 | T_SYMBOL T_ELLIPSIS T_SYMBOL T_CHAR
251                 {
252                         add_charmap_range($1, $3, $4);
253                         scan_to_eol();
254                 }
255                 | T_NL
256                 ;
257
258 width_list      : width_list width_entry
259                 | width_entry
260                 ;
261
262 width_entry     : T_CHAR T_NUMBER T_NL
263                 {
264                         add_width($1, $2);
265                 }
266                 | T_SYMBOL T_NUMBER T_NL
267                 {
268                         add_charmap_undefined($1);
269                 }
270                 | T_CHAR T_ELLIPSIS T_CHAR T_NUMBER T_NL
271                 {
272                         add_width_range($1, $3, $4);
273                 }
274                 | T_SYMBOL T_ELLIPSIS T_SYMBOL T_NUMBER T_NL
275                 {
276                         add_charmap_undefined($1);
277                         add_charmap_undefined($3);
278                 }
279                 | T_CHAR T_ELLIPSIS T_SYMBOL T_NUMBER T_NL
280                 {
281                         add_width($1, $4);
282                         add_charmap_undefined($3);
283                 }
284                 | T_SYMBOL T_ELLIPSIS T_CHAR T_NUMBER T_NL
285                 {
286                         add_width($3, $4);
287                         add_charmap_undefined($1);
288                 }
289                 | T_NL
290                 ;
291
292 ctype           : T_CTYPE T_NL ctype_list T_END T_CTYPE T_NL
293                 {
294                         dump_ctype();
295                 }
296                 | T_CTYPE T_NL copycat  T_END T_CTYPE T_NL
297                 ;
298
299 ctype_list      : ctype_list ctype_kw
300                 | ctype_kw
301                 ;
302
303 ctype_kw        : T_ISUPPER cc_list T_NL
304                 | T_ISLOWER cc_list T_NL
305                 | T_ISALPHA cc_list T_NL
306                 | T_ISDIGIT cc_list T_NL
307                 | T_ISPUNCT cc_list T_NL
308                 | T_ISXDIGIT cc_list T_NL
309                 | T_ISSPACE cc_list T_NL
310                 | T_ISPRINT cc_list T_NL
311                 | T_ISGRAPH cc_list T_NL
312                 | T_ISBLANK cc_list T_NL
313                 | T_ISCNTRL cc_list T_NL
314                 | T_ISALNUM cc_list T_NL
315                 | T_ISSPECIAL cc_list T_NL
316                 | T_ISENGLISH cc_list T_NL
317                 | T_ISNUMBER cc_list T_NL
318                 | T_ISIDEOGRAM cc_list T_NL
319                 | T_ISPHONOGRAM cc_list T_NL
320                 | T_TOUPPER conv_list T_NL
321                 | T_TOLOWER conv_list T_NL
322                 ;
323
324 cc_list         : cc_list T_SEMI cc_range_end
325                 | cc_list T_SEMI cc_char
326                 | cc_char
327                 ;
328
329 cc_range_end    : T_ELLIPSIS T_SEMI T_CHAR
330                 {
331                         add_ctype_range($3);
332                 }
333                 ;
334
335 cc_char         : T_CHAR
336                 {
337                         add_ctype($1);
338                 }
339                 | T_SYMBOL
340                 {
341                         add_charmap_undefined($1);
342                 }
343                 ;
344
345 conv_list       : conv_list T_SEMI conv_pair
346                 | conv_pair
347                 ;
348
349
350 conv_pair       : T_LPAREN T_CHAR T_COMMA T_CHAR T_RPAREN
351                 {
352                         add_caseconv($2, $4);
353                 }
354                 | T_LPAREN T_SYMBOL T_COMMA T_CHAR T_RPAREN
355                 {
356                         add_charmap_undefined($2);
357                 }
358                 | T_LPAREN T_SYMBOL T_COMMA T_SYMBOL T_RPAREN
359                 {
360                         add_charmap_undefined($2);
361                         add_charmap_undefined($4);
362                 }
363                 | T_LPAREN T_CHAR T_COMMA T_SYMBOL T_RPAREN
364                 {
365                         add_charmap_undefined($4);
366                 }
367                 ;
368
369 collate         : T_COLLATE T_NL coll_order T_END T_COLLATE T_NL
370                 {
371                         dump_collate();
372                 }
373                 | T_COLLATE T_NL coll_optional coll_order T_END T_COLLATE T_NL
374                 {
375                         dump_collate();
376                 }
377                 | T_COLLATE T_NL copycat T_END T_COLLATE T_NL
378                 ;
379
380
381 coll_optional   : coll_optional coll_symbols
382                 | coll_optional coll_elements
383                 | coll_symbols
384                 | coll_elements
385                 ;
386
387
388 coll_symbols    : T_COLLATING_SYMBOL T_SYMBOL T_NL
389                 {
390                         define_collsym($2);
391                 }
392                 ;
393
394
395 coll_elements   : T_COLLATING_ELEMENT T_SYMBOL T_FROM string T_NL
396                 {
397                         define_collelem($2, get_wcs());
398                 }
399                 ;
400
401 coll_order      : T_ORDER_START T_NL order_list T_ORDER_END T_NL
402                 {
403                         /* If no order list supplied default to one forward */
404                         add_order_bit(T_FORWARD);
405                         add_order_directive();
406                 }
407                 | T_ORDER_START order_args T_NL order_list T_ORDER_END T_NL
408                 ;
409
410
411 order_args      : order_args T_SEMI order_arg
412                 {
413                         add_order_directive();
414                 }
415                 | order_arg
416                 {
417                         add_order_directive();
418                 }
419                 ;
420
421 order_arg       : order_arg T_COMMA order_dir
422                 | order_dir
423                 ;
424
425 order_dir       : T_FORWARD
426                 {
427                         add_order_bit(T_FORWARD);
428                 }
429                 | T_BACKWARD
430                 {
431                         add_order_bit(T_BACKWARD);
432                 }
433                 | T_POSITION
434                 {
435                         add_order_bit(T_POSITION);
436                 }
437                 ;
438
439 order_list      : order_list order_item
440                 | order_item
441                 ;
442
443 order_item      : T_COLLSYM T_NL
444                 {
445                         end_order_collsym($1);
446                 }
447                 | order_itemkw T_NL
448                 {
449                         end_order();
450                 }
451                 | order_itemkw order_weights T_NL
452                 {
453                         end_order();
454                 }
455                 ;
456
457 order_itemkw    : T_CHAR
458                 {
459                         start_order_char($1);
460                 }
461                 | T_ELLIPSIS
462                 {
463                         start_order_ellipsis();
464                 }
465                 | T_COLLELEM
466                 {
467                         start_order_collelem($1);
468                 }
469                 | T_UNDEFINED
470                 {
471                         start_order_undefined();
472                 }
473                 | T_SYMBOL
474                 {
475                         start_order_symbol($1);
476                 }
477                 ;
478
479 order_weights   : order_weights T_SEMI order_weight
480                 | order_weights T_SEMI
481                 | order_weight
482                 ;
483
484 order_weight    : T_COLLELEM
485                 {
486                         add_order_collelem($1);
487                 }
488                 | T_COLLSYM
489                 {
490                         add_order_collsym($1);
491                 }
492                 | T_CHAR
493                 {
494                         add_order_char($1);
495                 }
496                 | T_ELLIPSIS
497                 {
498                         add_order_ellipsis();
499                 }
500                 | T_IGNORE
501                 {
502                         add_order_ignore();
503                 }
504                 | T_SYMBOL
505                 {
506                         add_order_symbol($1);
507                 }
508                 | T_QUOTE order_str T_QUOTE
509                 {
510                         add_order_subst();
511                 }
512                 ;
513
514 order_str       : order_str order_stritem
515                 | order_stritem
516                 ;
517
518 order_stritem   : T_CHAR
519                 {
520                         add_subst_char($1);
521                 }
522                 | T_COLLSYM
523                 {
524                         add_subst_collsym($1);
525                 }
526                 | T_COLLELEM
527                 {
528                         add_subst_collelem($1);
529                 }
530                 | T_SYMBOL
531                 {
532                         add_subst_symbol($1);
533                 }
534                 ;
535
536 messages        : T_MESSAGES T_NL messages_list T_END T_MESSAGES T_NL
537                 {
538                         dump_messages();
539                 }
540                 | T_MESSAGES T_NL copycat T_END T_MESSAGES T_NL
541                 ;
542
543 messages_list   : messages_list messages_item
544                 | messages_item
545                 ;
546
547 messages_kw     : T_YESSTR
548                 | T_NOSTR
549                 | T_YESEXPR
550                 | T_NOEXPR
551                 ;
552
553 messages_item   : messages_kw string T_NL
554                 {
555                         add_message(get_wcs());
556                 }
557                 ;
558
559 monetary        : T_MONETARY T_NL monetary_list T_END T_MONETARY T_NL
560                 {
561                         dump_monetary();
562                 }
563                 | T_MONETARY T_NL copycat T_END T_MONETARY T_NL
564                 ;
565
566 monetary_list   : monetary_list monetary_kw
567                 | monetary_kw
568                 ;
569
570 monetary_strkw  : T_INT_CURR_SYMBOL
571                 | T_CURRENCY_SYMBOL
572                 | T_MON_DECIMAL_POINT
573                 | T_MON_THOUSANDS_SEP
574                 | T_POSITIVE_SIGN
575                 | T_NEGATIVE_SIGN
576                 ;
577
578 monetary_numkw  : T_INT_FRAC_DIGITS
579                 | T_FRAC_DIGITS
580                 | T_P_CS_PRECEDES
581                 | T_P_SEP_BY_SPACE
582                 | T_N_CS_PRECEDES
583                 | T_N_SEP_BY_SPACE
584                 | T_P_SIGN_POSN
585                 | T_N_SIGN_POSN
586                 | T_INT_P_CS_PRECEDES
587                 | T_INT_N_CS_PRECEDES
588                 | T_INT_P_SEP_BY_SPACE
589                 | T_INT_N_SEP_BY_SPACE
590                 | T_INT_P_SIGN_POSN
591                 | T_INT_N_SIGN_POSN
592                 ;
593
594 monetary_kw     : monetary_strkw string T_NL
595                 {
596                         add_monetary_str(get_wcs());
597                 }
598                 | monetary_numkw T_NUMBER T_NL
599                 {
600                         add_monetary_num($2);
601                 }
602                 | T_MON_GROUPING mon_group_list T_NL
603                 ;
604
605 mon_group_list  : T_NUMBER
606                 {
607                         reset_monetary_group();
608                         add_monetary_group($1);
609                 }
610                 | mon_group_list T_SEMI T_NUMBER
611                 {
612                         add_monetary_group($3);
613                 }
614                 ;
615
616
617 numeric         : T_NUMERIC T_NL numeric_list T_END T_NUMERIC T_NL
618                 {
619                         dump_numeric();
620                 }
621                 | T_NUMERIC T_NL copycat T_END T_NUMERIC T_NL
622                 ;
623
624
625 numeric_list    : numeric_list numeric_item
626                 | numeric_item
627                 ;
628
629
630 numeric_item    : numeric_strkw string T_NL
631                 {
632                         add_numeric_str(get_wcs());
633                 }
634                 | T_GROUPING group_list T_NL
635                 ;
636
637 numeric_strkw   : T_DECIMAL_POINT
638                 | T_THOUSANDS_SEP
639                 ;
640
641
642 group_list      : T_NUMBER
643                 {
644                         reset_numeric_group();
645                         add_numeric_group($1);
646                 }
647                 | group_list T_SEMI T_NUMBER
648                 {
649                         add_numeric_group($3);
650                 }
651                 ;
652
653
654 time            : T_TIME T_NL time_kwlist T_END T_TIME T_NL
655                 {
656                         dump_time();
657                 }
658                 | T_TIME T_NL copycat T_END T_NUMERIC T_NL
659                 ;
660
661 time_kwlist     : time_kwlist time_kw
662                 | time_kw
663                 ;
664
665 time_kw         : time_strkw string T_NL
666                 {
667                         add_time_str(get_wcs());
668                 }
669                 | time_listkw time_list T_NL
670                 {
671                         check_time_list();
672                 }
673                 ;
674
675 time_listkw     : T_ABDAY
676                 | T_DAY
677                 | T_ABMON
678                 | T_MON
679                 | T_ERA
680                 | T_ALT_DIGITS
681                 | T_AM_PM
682                 ;
683
684 time_strkw      : T_ERA_D_T_FMT
685                 | T_ERA_T_FMT
686                 | T_ERA_D_FMT
687                 | T_D_T_FMT
688                 | T_D_FMT
689                 | T_T_FMT
690                 | T_T_FMT_AMPM
691                 | T_DATE_FMT
692                 ;
693
694 time_list       : time_list T_SEMI string
695                 {
696                         add_time_list(get_wcs());
697                 }
698                 | string
699                 {
700                         reset_time_list();
701                         add_time_list(get_wcs());
702                 }
703                 ;