Merge branch 'vendor/LESS'
[dragonfly.git] / usr.bin / localedef / parser.y
1 %{
2 /*
3  * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
4  * Copyright 2015 John Marino <draco@marino.st>
5  *
6  * This source code is derived from the illumos localedef command, and
7  * provided under BSD-style license terms by Nexenta Systems, Inc.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  *
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31
32 /*
33  * POSIX localedef grammar.
34  */
35
36 #include <wchar.h>
37 #include <stdio.h>
38 #include <limits.h>
39 #include "localedef.h"
40
41 %}
42 %union {
43         int             num;
44         wchar_t         wc;
45         char            *token;
46         collsym_t       *collsym;
47         collelem_t      *collelem;
48 }
49
50 %token          T_CODE_SET
51 %token          T_MB_CUR_MAX
52 %token          T_MB_CUR_MIN
53 %token          T_COM_CHAR
54 %token          T_ESC_CHAR
55 %token          T_LT
56 %token          T_GT
57 %token          T_NL
58 %token          T_SEMI
59 %token          T_COMMA
60 %token          T_ELLIPSIS
61 %token          T_RPAREN
62 %token          T_LPAREN
63 %token          T_QUOTE
64 %token          T_NULL
65 %token          T_WS
66 %token          T_END
67 %token          T_COPY
68 %token          T_CHARMAP
69 %token          T_WIDTH
70 %token          T_CTYPE
71 %token          T_ISUPPER
72 %token          T_ISLOWER
73 %token          T_ISALPHA
74 %token          T_ISDIGIT
75 %token          T_ISPUNCT
76 %token          T_ISXDIGIT
77 %token          T_ISSPACE
78 %token          T_ISPRINT
79 %token          T_ISGRAPH
80 %token          T_ISBLANK
81 %token          T_ISCNTRL
82 %token          T_ISALNUM
83 %token          T_ISSPECIAL
84 %token          T_ISPHONOGRAM
85 %token          T_ISIDEOGRAM
86 %token          T_ISENGLISH
87 %token          T_ISNUMBER
88 %token          T_TOUPPER
89 %token          T_TOLOWER
90 %token          T_COLLATE
91 %token          T_COLLATING_SYMBOL
92 %token          T_COLLATING_ELEMENT
93 %token          T_ORDER_START
94 %token          T_ORDER_END
95 %token          T_FORWARD
96 %token          T_BACKWARD
97 %token          T_POSITION
98 %token          T_FROM
99 %token          T_UNDEFINED
100 %token          T_IGNORE
101 %token          T_MESSAGES
102 %token          T_YESSTR
103 %token          T_NOSTR
104 %token          T_YESEXPR
105 %token          T_NOEXPR
106 %token          T_MONETARY
107 %token          T_INT_CURR_SYMBOL
108 %token          T_CURRENCY_SYMBOL
109 %token          T_MON_DECIMAL_POINT
110 %token          T_MON_THOUSANDS_SEP
111 %token          T_POSITIVE_SIGN
112 %token          T_NEGATIVE_SIGN
113 %token          T_MON_GROUPING
114 %token          T_INT_FRAC_DIGITS
115 %token          T_FRAC_DIGITS
116 %token          T_P_CS_PRECEDES
117 %token          T_P_SEP_BY_SPACE
118 %token          T_N_CS_PRECEDES
119 %token          T_N_SEP_BY_SPACE
120 %token          T_P_SIGN_POSN
121 %token          T_N_SIGN_POSN
122 %token          T_INT_P_CS_PRECEDES
123 %token          T_INT_N_CS_PRECEDES
124 %token          T_INT_P_SEP_BY_SPACE
125 %token          T_INT_N_SEP_BY_SPACE
126 %token          T_INT_P_SIGN_POSN
127 %token          T_INT_N_SIGN_POSN
128 %token          T_NUMERIC
129 %token          T_DECIMAL_POINT
130 %token          T_THOUSANDS_SEP
131 %token          T_GROUPING
132 %token          T_TIME
133 %token          T_ABDAY
134 %token          T_DAY
135 %token          T_ABMON
136 %token          T_MON
137 %token          T_ERA
138 %token          T_ERA_D_FMT
139 %token          T_ERA_T_FMT
140 %token          T_ERA_D_T_FMT
141 %token          T_ALT_DIGITS
142 %token          T_D_T_FMT
143 %token          T_D_FMT
144 %token          T_T_FMT
145 %token          T_AM_PM
146 %token          T_T_FMT_AMPM
147 %token          T_DATE_FMT
148 %token  <wc>            T_CHAR
149 %token  <token>         T_NAME
150 %token  <num>           T_NUMBER
151 %token  <token>         T_SYMBOL
152 %token  <collsym>       T_COLLSYM
153 %token  <collelem>      T_COLLELEM
154
155 %%
156
157 localedef       : setting_list categories
158                 | categories
159                 ;
160
161 string          : T_QUOTE charlist T_QUOTE
162                 | T_QUOTE T_QUOTE
163                 ;
164
165 charlist        : charlist T_CHAR
166                 {
167                         add_wcs($2);
168                 }
169                 | T_CHAR
170                 {
171                         add_wcs($1);
172                 }
173                 ;
174
175 setting_list    : setting_list setting
176                 | setting
177                 ;
178
179
180 setting         : T_COM_CHAR T_CHAR T_NL
181                 {
182                         com_char = $2;
183                 }
184                 | T_ESC_CHAR T_CHAR T_NL
185                 {
186                         esc_char = $2;
187                 }
188                 | T_MB_CUR_MAX T_NUMBER T_NL
189                 {
190                         mb_cur_max = $2;
191                 }
192                 | T_MB_CUR_MIN T_NUMBER T_NL
193                 {
194                         mb_cur_min = $2;
195                 }
196                 | T_CODE_SET string T_NL
197                 {
198                         wchar_t *w = get_wcs();
199                         set_wide_encoding(to_mb_string(w));
200                         free(w);
201                 }
202                 | T_CODE_SET T_NAME T_NL
203                 {
204                         set_wide_encoding($2);
205                 }
206                 ;
207
208 copycat         : T_COPY T_NAME T_NL
209                 {
210                         copy_category($2);
211                 }
212                 | T_COPY string T_NL
213                 {
214                         wchar_t *w = get_wcs();
215                         copy_category(to_mb_string(w));
216                         free(w);
217                 }
218                 ;
219
220 categories      : categories category
221                 | category
222                 ;
223
224
225 category        : charmap
226                 | messages
227                 | monetary
228                 | ctype
229                 | collate
230                 | numeric
231                 | time
232                 ;
233
234
235 charmap         : T_CHARMAP T_NL charmap_list T_END T_CHARMAP T_NL
236                 | T_WIDTH T_NL width_list T_END T_WIDTH T_NL
237                 ;
238
239
240 charmap_list    : charmap_list charmap_entry
241                 | charmap_entry
242                 ;
243
244
245 charmap_entry   : T_SYMBOL T_CHAR
246                 {
247                         add_charmap($1, $2);
248                         scan_to_eol();
249                 }
250                 | T_SYMBOL T_ELLIPSIS T_SYMBOL T_CHAR
251                 {
252                         add_charmap_range($1, $3, $4);
253                         scan_to_eol();
254                 }
255                 | T_NL
256                 ;
257
258 width_list      : width_list width_entry
259                 | width_entry
260                 ;
261
262 width_entry     : T_CHAR T_NUMBER T_NL
263                 {
264                         add_width($1, $2);
265                 }
266                 | T_SYMBOL T_NUMBER T_NL
267                 {
268                         add_charmap_undefined($1);
269                 }
270                 | T_CHAR T_ELLIPSIS T_CHAR T_NUMBER T_NL
271                 {
272                         add_width_range($1, $3, $4);
273                 }
274                 | T_SYMBOL T_ELLIPSIS T_SYMBOL T_NUMBER T_NL
275                 {
276                         add_charmap_undefined($1);
277                         add_charmap_undefined($3);
278                 }
279                 | T_CHAR T_ELLIPSIS T_SYMBOL T_NUMBER T_NL
280                 {
281                         add_width($1, $4);
282                         add_charmap_undefined($3);
283                 }
284                 | T_SYMBOL T_ELLIPSIS T_CHAR T_NUMBER T_NL
285                 {
286                         add_width($3, $4);
287                         add_charmap_undefined($1);
288                 }
289                 | T_NL
290                 ;
291
292 ctype           : T_CTYPE T_NL ctype_list T_END T_CTYPE T_NL
293                 {
294                         dump_ctype();
295                 }
296                 | T_CTYPE T_NL copycat  T_END T_CTYPE T_NL
297                 ;
298
299 ctype_list      : ctype_list ctype_kw
300                 | ctype_kw
301                 ;
302
303 ctype_kw        : T_ISUPPER cc_list T_NL
304                 | T_ISLOWER cc_list T_NL
305                 | T_ISALPHA cc_list T_NL
306                 | T_ISDIGIT cc_list T_NL
307                 | T_ISPUNCT cc_list T_NL
308                 | T_ISXDIGIT cc_list T_NL
309                 | T_ISSPACE cc_list T_NL
310                 | T_ISPRINT cc_list T_NL
311                 | T_ISGRAPH cc_list T_NL
312                 | T_ISBLANK cc_list T_NL
313                 | T_ISCNTRL cc_list T_NL
314                 | T_ISALNUM cc_list T_NL
315                 | T_ISSPECIAL cc_list T_NL
316                 | T_ISENGLISH cc_list T_NL
317                 | T_ISNUMBER cc_list T_NL
318                 | T_ISIDEOGRAM cc_list T_NL
319                 | T_ISPHONOGRAM cc_list T_NL
320                 | T_TOUPPER conv_list T_NL
321                 | T_TOLOWER conv_list T_NL
322                 ;
323
324
325 cc_list         : cc_list T_SEMI T_CHAR
326                 {
327                         add_ctype($3);
328                 }
329                 | cc_list T_SEMI T_SYMBOL
330                 {
331                         add_charmap_undefined($3);
332                 }
333                 | cc_list T_SEMI T_ELLIPSIS T_SEMI T_CHAR
334                 {
335                         /* note that the endpoints *must* be characters */
336                         add_ctype_range($5);
337                 }
338                 | T_CHAR
339                 {
340                         add_ctype($1);
341                 }
342                 | T_SYMBOL
343                 {
344                         add_charmap_undefined($1);
345                 }
346                 ;
347
348 conv_list       : conv_list T_SEMI conv_pair
349                 | conv_pair
350                 ;
351
352
353 conv_pair       : T_LPAREN T_CHAR T_COMMA T_CHAR T_RPAREN
354                 {
355                         add_caseconv($2, $4);
356                 }
357                 | T_LPAREN T_SYMBOL T_COMMA T_CHAR T_RPAREN
358                 {
359                         add_charmap_undefined($2);
360                 }
361                 | T_LPAREN T_SYMBOL T_COMMA T_SYMBOL T_RPAREN
362                 {
363                         add_charmap_undefined($2);
364                         add_charmap_undefined($4);
365                 }
366                 | T_LPAREN T_CHAR T_COMMA T_SYMBOL T_RPAREN
367                 {
368                         add_charmap_undefined($4);
369                 }
370                 ;
371
372 collate         : T_COLLATE T_NL coll_order T_END T_COLLATE T_NL
373                 {
374                         dump_collate();
375                 }
376                 | T_COLLATE T_NL coll_optional coll_order T_END T_COLLATE T_NL
377                 {
378                         dump_collate();
379                 }
380                 | T_COLLATE T_NL copycat T_END T_COLLATE T_NL
381                 ;
382
383
384 coll_optional   : coll_optional coll_symbols
385                 | coll_optional coll_elements
386                 | coll_symbols
387                 | coll_elements
388                 ;
389
390
391 coll_symbols    : T_COLLATING_SYMBOL T_SYMBOL T_NL
392                 {
393                         define_collsym($2);
394                 }
395                 ;
396
397
398 coll_elements   : T_COLLATING_ELEMENT T_SYMBOL T_FROM string T_NL
399                 {
400                         define_collelem($2, get_wcs());
401                 }
402                 ;
403
404 coll_order      : T_ORDER_START T_NL order_list T_ORDER_END T_NL
405                 {
406                         /* If no order list supplied default to one forward */
407                         add_order_bit(T_FORWARD);
408                         add_order_directive();
409                 }
410                 | T_ORDER_START order_args T_NL order_list T_ORDER_END T_NL
411                 ;
412
413
414 order_args      : order_args T_SEMI order_arg
415                 {
416                         add_order_directive();
417                 }
418                 | order_arg
419                 {
420                         add_order_directive();
421                 }
422                 ;
423
424 order_arg       : order_arg T_COMMA order_dir
425                 | order_dir
426                 ;
427
428 order_dir       : T_FORWARD
429                 {
430                         add_order_bit(T_FORWARD);
431                 }
432                 | T_BACKWARD
433                 {
434                         add_order_bit(T_BACKWARD);
435                 }
436                 | T_POSITION
437                 {
438                         add_order_bit(T_POSITION);
439                 }
440                 ;
441
442 order_list      : order_list order_item
443                 | order_item
444                 ;
445
446 order_item      : T_COLLSYM T_NL
447                 {
448                         end_order_collsym($1);
449                 }
450                 | order_itemkw T_NL
451                 {
452                         end_order();
453                 }
454                 | order_itemkw order_weights T_NL
455                 {
456                         end_order();
457                 }
458                 ;
459
460 order_itemkw    : T_CHAR
461                 {
462                         start_order_char($1);
463                 }
464                 | T_ELLIPSIS
465                 {
466                         start_order_ellipsis();
467                 }
468                 | T_COLLELEM
469                 {
470                         start_order_collelem($1);
471                 }
472                 | T_UNDEFINED
473                 {
474                         start_order_undefined();
475                 }
476                 | T_SYMBOL
477                 {
478                         start_order_symbol($1);
479                 }
480                 ;
481
482 order_weights   : order_weights T_SEMI order_weight
483                 | order_weights T_SEMI
484                 | order_weight
485                 ;
486
487 order_weight    : T_COLLELEM
488                 {
489                         add_order_collelem($1);
490                 }
491                 | T_COLLSYM
492                 {
493                         add_order_collsym($1);
494                 }
495                 | T_CHAR
496                 {
497                         add_order_char($1);
498                 }
499                 | T_ELLIPSIS
500                 {
501                         add_order_ellipsis();
502                 }
503                 | T_IGNORE
504                 {
505                         add_order_ignore();
506                 }
507                 | T_SYMBOL
508                 {
509                         add_order_symbol($1);
510                 }
511                 | T_QUOTE order_str T_QUOTE
512                 {
513                         add_order_subst();
514                 }
515                 ;
516
517 order_str       : order_str order_stritem
518                 | order_stritem
519                 ;
520
521 order_stritem   : T_CHAR
522                 {
523                         add_subst_char($1);
524                 }
525                 | T_COLLSYM
526                 {
527                         add_subst_collsym($1);
528                 }
529                 | T_COLLELEM
530                 {
531                         add_subst_collelem($1);
532                 }
533                 | T_SYMBOL
534                 {
535                         add_subst_symbol($1);
536                 }
537                 ;
538
539 messages        : T_MESSAGES T_NL messages_list T_END T_MESSAGES T_NL
540                 {
541                         dump_messages();
542                 }
543                 | T_MESSAGES T_NL copycat T_END T_MESSAGES T_NL
544                 ;
545
546 messages_list   : messages_list messages_item
547                 | messages_item
548                 ;
549
550 messages_kw     : T_YESSTR
551                 | T_NOSTR
552                 | T_YESEXPR
553                 | T_NOEXPR
554                 ;
555
556 messages_item   : messages_kw string T_NL
557                 {
558                         add_message(get_wcs());
559                 }
560                 ;
561
562 monetary        : T_MONETARY T_NL monetary_list T_END T_MONETARY T_NL
563                 {
564                         dump_monetary();
565                 }
566                 | T_MONETARY T_NL copycat T_END T_MONETARY T_NL
567                 ;
568
569 monetary_list   : monetary_list monetary_kw
570                 | monetary_kw
571                 ;
572
573 monetary_strkw  : T_INT_CURR_SYMBOL
574                 | T_CURRENCY_SYMBOL
575                 | T_MON_DECIMAL_POINT
576                 | T_MON_THOUSANDS_SEP
577                 | T_POSITIVE_SIGN
578                 | T_NEGATIVE_SIGN
579                 ;
580
581 monetary_numkw  : T_INT_FRAC_DIGITS
582                 | T_FRAC_DIGITS
583                 | T_P_CS_PRECEDES
584                 | T_P_SEP_BY_SPACE
585                 | T_N_CS_PRECEDES
586                 | T_N_SEP_BY_SPACE
587                 | T_P_SIGN_POSN
588                 | T_N_SIGN_POSN
589                 | T_INT_P_CS_PRECEDES
590                 | T_INT_N_CS_PRECEDES
591                 | T_INT_P_SEP_BY_SPACE
592                 | T_INT_N_SEP_BY_SPACE
593                 | T_INT_P_SIGN_POSN
594                 | T_INT_N_SIGN_POSN
595                 ;
596
597 monetary_kw     : monetary_strkw string T_NL
598                 {
599                         add_monetary_str(get_wcs());
600                 }
601                 | monetary_numkw T_NUMBER T_NL
602                 {
603                         add_monetary_num($2);
604                 }
605                 | T_MON_GROUPING mon_group_list T_NL
606                 ;
607
608 mon_group_list  : T_NUMBER
609                 {
610                         reset_monetary_group();
611                         add_monetary_group($1);
612                 }
613                 | mon_group_list T_SEMI T_NUMBER
614                 {
615                         add_monetary_group($3);
616                 }
617                 ;
618
619
620 numeric         : T_NUMERIC T_NL numeric_list T_END T_NUMERIC T_NL
621                 {
622                         dump_numeric();
623                 }
624                 | T_NUMERIC T_NL copycat T_END T_NUMERIC T_NL
625                 ;
626
627
628 numeric_list    : numeric_list numeric_item
629                 | numeric_item
630                 ;
631
632
633 numeric_item    : numeric_strkw string T_NL
634                 {
635                         add_numeric_str(get_wcs());
636                 }
637                 | T_GROUPING group_list T_NL
638                 ;
639
640 numeric_strkw   : T_DECIMAL_POINT
641                 | T_THOUSANDS_SEP
642                 ;
643
644
645 group_list      : T_NUMBER
646                 {
647                         reset_numeric_group();
648                         add_numeric_group($1);
649                 }
650                 | group_list T_SEMI T_NUMBER
651                 {
652                         add_numeric_group($3);
653                 }
654                 ;
655
656
657 time            : T_TIME T_NL time_kwlist T_END T_TIME T_NL
658                 {
659                         dump_time();
660                 }
661                 | T_TIME T_NL copycat T_END T_NUMERIC T_NL
662                 ;
663
664 time_kwlist     : time_kwlist time_kw
665                 | time_kw
666                 ;
667
668 time_kw         : time_strkw string T_NL
669                 {
670                         add_time_str(get_wcs());
671                 }
672                 | time_listkw time_list T_NL
673                 {
674                         check_time_list();
675                 }
676                 ;
677
678 time_listkw     : T_ABDAY
679                 | T_DAY
680                 | T_ABMON
681                 | T_MON
682                 | T_ERA
683                 | T_ALT_DIGITS
684                 | T_AM_PM
685                 ;
686
687 time_strkw      : T_ERA_D_T_FMT
688                 | T_ERA_T_FMT
689                 | T_ERA_D_FMT
690                 | T_D_T_FMT
691                 | T_D_FMT
692                 | T_T_FMT
693                 | T_T_FMT_AMPM
694                 | T_DATE_FMT
695                 ;
696
697 time_list       : time_list T_SEMI string
698                 {
699                         add_time_list(get_wcs());
700                 }
701                 | string
702                 {
703                         reset_time_list();
704                         add_time_list(get_wcs());
705                 }
706                 ;