Add localedef(1), a locale definition generator tool
[dragonfly.git] / usr.bin / localedef / parser.y
1 %{
2 /*
3  * This file and its contents are supplied under the terms of the
4  * Common Development and Distribution License ("CDDL"), version 1.0.
5  * You may only use this file in accordance with the terms of version
6  * 1.0 of the CDDL.
7  *
8  * A full copy of the text of the CDDL should have accompanied this
9  * source.  A copy of the CDDL is also available via the Internet at
10  * http://www.illumos.org/license/CDDL.
11  */
12
13 /*
14  * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
15  * Copyright 2013 DEY Storage Systems, Inc.
16  * Copyright 2015 John Marino <draco@marino.st>
17  */
18
19 /*
20  * POSIX localedef grammar.
21  */
22
23 #include <wchar.h>
24 #include <stdio.h>
25 #include <limits.h>
26 #include "localedef.h"
27
28 %}
29 %union {
30         int             num;
31         wchar_t         wc;
32         char            *token;
33         collsym_t       *collsym;
34         collelem_t      *collelem;
35 }
36
37 %token          T_CODE_SET
38 %token          T_MB_CUR_MAX
39 %token          T_MB_CUR_MIN
40 %token          T_COM_CHAR
41 %token          T_ESC_CHAR
42 %token          T_LT
43 %token          T_GT
44 %token          T_NL
45 %token          T_SEMI
46 %token          T_COMMA
47 %token          T_ELLIPSIS
48 %token          T_RPAREN
49 %token          T_LPAREN
50 %token          T_QUOTE
51 %token          T_NULL
52 %token          T_WS
53 %token          T_END
54 %token          T_COPY
55 %token          T_CHARMAP
56 %token          T_WIDTH
57 %token          T_CTYPE
58 %token          T_ISUPPER
59 %token          T_ISLOWER
60 %token          T_ISALPHA
61 %token          T_ISDIGIT
62 %token          T_ISPUNCT
63 %token          T_ISXDIGIT
64 %token          T_ISSPACE
65 %token          T_ISPRINT
66 %token          T_ISGRAPH
67 %token          T_ISBLANK
68 %token          T_ISCNTRL
69 %token          T_ISALNUM
70 %token          T_ISSPECIAL
71 %token          T_ISPHONOGRAM
72 %token          T_ISIDEOGRAM
73 %token          T_ISENGLISH
74 %token          T_ISNUMBER
75 %token          T_TOUPPER
76 %token          T_TOLOWER
77 %token          T_COLLATE
78 %token          T_COLLATING_SYMBOL
79 %token          T_COLLATING_ELEMENT
80 %token          T_ORDER_START
81 %token          T_ORDER_END
82 %token          T_FORWARD
83 %token          T_BACKWARD
84 %token          T_POSITION
85 %token          T_FROM
86 %token          T_UNDEFINED
87 %token          T_IGNORE
88 %token          T_MESSAGES
89 %token          T_YESSTR
90 %token          T_NOSTR
91 %token          T_YESEXPR
92 %token          T_NOEXPR
93 %token          T_MONETARY
94 %token          T_INT_CURR_SYMBOL
95 %token          T_CURRENCY_SYMBOL
96 %token          T_MON_DECIMAL_POINT
97 %token          T_MON_THOUSANDS_SEP
98 %token          T_POSITIVE_SIGN
99 %token          T_NEGATIVE_SIGN
100 %token          T_MON_GROUPING
101 %token          T_INT_FRAC_DIGITS
102 %token          T_FRAC_DIGITS
103 %token          T_P_CS_PRECEDES
104 %token          T_P_SEP_BY_SPACE
105 %token          T_N_CS_PRECEDES
106 %token          T_N_SEP_BY_SPACE
107 %token          T_P_SIGN_POSN
108 %token          T_N_SIGN_POSN
109 %token          T_INT_P_CS_PRECEDES
110 %token          T_INT_N_CS_PRECEDES
111 %token          T_INT_P_SEP_BY_SPACE
112 %token          T_INT_N_SEP_BY_SPACE
113 %token          T_INT_P_SIGN_POSN
114 %token          T_INT_N_SIGN_POSN
115 %token          T_NUMERIC
116 %token          T_DECIMAL_POINT
117 %token          T_THOUSANDS_SEP
118 %token          T_GROUPING
119 %token          T_TIME
120 %token          T_ABDAY
121 %token          T_DAY
122 %token          T_ABMON
123 %token          T_MON
124 %token          T_ERA
125 %token          T_ERA_D_FMT
126 %token          T_ERA_T_FMT
127 %token          T_ERA_D_T_FMT
128 %token          T_ALT_DIGITS
129 %token          T_D_T_FMT
130 %token          T_D_FMT
131 %token          T_T_FMT
132 %token          T_AM_PM
133 %token          T_T_FMT_AMPM
134 %token          T_DATE_FMT
135 %token  <wc>            T_CHAR
136 %token  <token>         T_NAME
137 %token  <num>           T_NUMBER
138 %token  <token>         T_SYMBOL
139 %token  <collsym>       T_COLLSYM
140 %token  <collelem>      T_COLLELEM
141
142 %%
143
144 localedef       : setting_list categories
145                 | categories
146                 ;
147
148 string          : T_QUOTE charlist T_QUOTE
149                 | T_QUOTE T_QUOTE
150                 ;
151
152 charlist        : charlist T_CHAR
153                 {
154                         add_wcs($2);
155                 }
156                 | T_CHAR
157                 {
158                         add_wcs($1);
159                 }
160                 ;
161
162 setting_list    : setting_list setting
163                 | setting
164                 ;
165
166
167 setting         : T_COM_CHAR T_CHAR T_NL
168                 {
169                         com_char = $2;
170                 }
171                 | T_ESC_CHAR T_CHAR T_NL
172                 {
173                         esc_char = $2;
174                 }
175                 | T_MB_CUR_MAX T_NUMBER T_NL
176                 {
177                         mb_cur_max = $2;
178                 }
179                 | T_MB_CUR_MIN T_NUMBER T_NL
180                 {
181                         mb_cur_min = $2;
182                 }
183                 | T_CODE_SET string T_NL
184                 {
185                         wchar_t *w = get_wcs();
186                         set_wide_encoding(to_mb_string(w));
187                         free(w);
188                 }
189                 | T_CODE_SET T_NAME T_NL
190                 {
191                         set_wide_encoding($2);
192                 }
193                 ;
194
195 copycat         : T_COPY T_NAME T_NL
196                 {
197                         copy_category($2);
198                 }
199                 | T_COPY string T_NL
200                 {
201                         wchar_t *w = get_wcs();
202                         copy_category(to_mb_string(w));
203                         free(w);
204                 }
205                 ;
206
207 categories      : categories category
208                 | category
209                 ;
210
211
212 category        : charmap
213                 | messages
214                 | monetary
215                 | ctype
216                 | collate
217                 | numeric
218                 | time
219                 ;
220
221
222 charmap         : T_CHARMAP T_NL charmap_list T_END T_CHARMAP T_NL
223                 | T_WIDTH T_NL width_list T_END T_WIDTH T_NL
224                 ;
225
226
227 charmap_list    : charmap_list charmap_entry
228                 | charmap_entry
229                 ;
230
231
232 charmap_entry   : T_SYMBOL T_CHAR
233                 {
234                         add_charmap($1, $2);
235                         scan_to_eol();
236                 }
237                 | T_SYMBOL T_ELLIPSIS T_SYMBOL T_CHAR
238                 {
239                         add_charmap_range($1, $3, $4);
240                         scan_to_eol();
241                 }
242                 | T_NL
243                 ;
244
245 width_list      : width_list width_entry
246                 | width_entry
247                 ;
248
249 width_entry     : T_CHAR T_NUMBER T_NL
250                 {
251                         add_width($1, $2);
252                 }
253                 | T_SYMBOL T_NUMBER T_NL
254                 {
255                         add_charmap_undefined($1);
256                 }
257                 | T_CHAR T_ELLIPSIS T_CHAR T_NUMBER T_NL
258                 {
259                         add_width_range($1, $3, $4);
260                 }
261                 | T_SYMBOL T_ELLIPSIS T_SYMBOL T_NUMBER T_NL
262                 {
263                         add_charmap_undefined($1);
264                         add_charmap_undefined($3);
265                 }
266                 | T_CHAR T_ELLIPSIS T_SYMBOL T_NUMBER T_NL
267                 {
268                         add_width($1, $4);
269                         add_charmap_undefined($3);
270                 }
271                 | T_SYMBOL T_ELLIPSIS T_CHAR T_NUMBER T_NL
272                 {
273                         add_width($3, $4);
274                         add_charmap_undefined($1);
275                 }
276                 | T_NL
277                 ;
278
279 ctype           : T_CTYPE T_NL ctype_list T_END T_CTYPE T_NL
280                 {
281                         dump_ctype();
282                 }
283                 | T_CTYPE T_NL copycat  T_END T_CTYPE T_NL
284                 ;
285
286 ctype_list      : ctype_list ctype_kw
287                 | ctype_kw
288                 ;
289
290 ctype_kw        : T_ISUPPER cc_list T_NL
291                 | T_ISLOWER cc_list T_NL
292                 | T_ISALPHA cc_list T_NL
293                 | T_ISDIGIT cc_list T_NL
294                 | T_ISPUNCT cc_list T_NL
295                 | T_ISXDIGIT cc_list T_NL
296                 | T_ISSPACE cc_list T_NL
297                 | T_ISPRINT cc_list T_NL
298                 | T_ISGRAPH cc_list T_NL
299                 | T_ISBLANK cc_list T_NL
300                 | T_ISCNTRL cc_list T_NL
301                 | T_ISALNUM cc_list T_NL
302                 | T_ISSPECIAL cc_list T_NL
303                 | T_ISENGLISH cc_list T_NL
304                 | T_ISNUMBER cc_list T_NL
305                 | T_ISIDEOGRAM cc_list T_NL
306                 | T_ISPHONOGRAM cc_list T_NL
307                 | T_TOUPPER conv_list T_NL
308                 | T_TOLOWER conv_list T_NL
309                 ;
310
311
312 cc_list         : cc_list T_SEMI T_CHAR
313                 {
314                         add_ctype($3);
315                 }
316                 | cc_list T_SEMI T_SYMBOL
317                 {
318                         add_charmap_undefined($3);
319                 }
320                 | cc_list T_SEMI T_ELLIPSIS T_SEMI T_CHAR
321                 {
322                         /* note that the endpoints *must* be characters */
323                         add_ctype_range($5);
324                 }
325                 | T_CHAR
326                 {
327                         add_ctype($1);
328                 }
329                 | T_SYMBOL
330                 {
331                         add_charmap_undefined($1);
332                 }
333                 ;
334
335 conv_list       : conv_list T_SEMI conv_pair
336                 | conv_pair
337                 ;
338
339
340 conv_pair       : T_LPAREN T_CHAR T_COMMA T_CHAR T_RPAREN
341                 {
342                         add_caseconv($2, $4);
343                 }
344                 | T_LPAREN T_SYMBOL T_COMMA T_CHAR T_RPAREN
345                 {
346                         add_charmap_undefined($2);
347                 }
348                 | T_LPAREN T_SYMBOL T_COMMA T_SYMBOL T_RPAREN
349                 {
350                         add_charmap_undefined($2);
351                         add_charmap_undefined($4);
352                 }
353                 | T_LPAREN T_CHAR T_COMMA T_SYMBOL T_RPAREN
354                 {
355                         add_charmap_undefined($4);
356                 }
357                 ;
358
359 collate         : T_COLLATE T_NL coll_order T_END T_COLLATE T_NL
360                 {
361                         dump_collate();
362                 }
363                 | T_COLLATE T_NL coll_optional coll_order T_END T_COLLATE T_NL
364                 {
365                         dump_collate();
366                 }
367                 | T_COLLATE T_NL copycat T_END T_COLLATE T_NL
368                 ;
369
370
371 coll_optional   : coll_optional coll_symbols
372                 | coll_optional coll_elements
373                 | coll_symbols
374                 | coll_elements
375                 ;
376
377
378 coll_symbols    : T_COLLATING_SYMBOL T_SYMBOL T_NL
379                 {
380                         define_collsym($2);
381                 }
382                 ;
383
384
385 coll_elements   : T_COLLATING_ELEMENT T_SYMBOL T_FROM string T_NL
386                 {
387                         define_collelem($2, get_wcs());
388                 }
389                 ;
390
391 coll_order      : T_ORDER_START T_NL order_list T_ORDER_END T_NL
392                 {
393                         /* If no order list supplied default to one forward */
394                         add_order_bit(T_FORWARD);
395                         add_order_directive();
396                 }
397                 | T_ORDER_START order_args T_NL order_list T_ORDER_END T_NL
398                 ;
399
400
401 order_args      : order_args T_SEMI order_arg
402                 {
403                         add_order_directive();
404                 }
405                 | order_arg
406                 {
407                         add_order_directive();
408                 }
409                 ;
410
411 order_arg       : order_arg T_COMMA order_dir
412                 | order_dir
413                 ;
414
415 order_dir       : T_FORWARD
416                 {
417                         add_order_bit(T_FORWARD);
418                 }
419                 | T_BACKWARD
420                 {
421                         add_order_bit(T_BACKWARD);
422                 }
423                 | T_POSITION
424                 {
425                         add_order_bit(T_POSITION);
426                 }
427                 ;
428
429 order_list      : order_list order_item
430                 | order_item
431                 ;
432
433 order_item      : T_COLLSYM T_NL
434                 {
435                         end_order_collsym($1);
436                 }
437                 | order_itemkw T_NL
438                 {
439                         end_order();
440                 }
441                 | order_itemkw order_weights T_NL
442                 {
443                         end_order();
444                 }
445                 ;
446
447 order_itemkw    : T_CHAR
448                 {
449                         start_order_char($1);
450                 }
451                 | T_ELLIPSIS
452                 {
453                         start_order_ellipsis();
454                 }
455                 | T_COLLELEM
456                 {
457                         start_order_collelem($1);
458                 }
459                 | T_UNDEFINED
460                 {
461                         start_order_undefined();
462                 }
463                 | T_SYMBOL
464                 {
465                         start_order_symbol($1);
466                 }
467                 ;
468
469 order_weights   : order_weights T_SEMI order_weight
470                 | order_weights T_SEMI
471                 | order_weight
472                 ;
473
474 order_weight    : T_COLLELEM
475                 {
476                         add_order_collelem($1);
477                 }
478                 | T_COLLSYM
479                 {
480                         add_order_collsym($1);
481                 }
482                 | T_CHAR
483                 {
484                         add_order_char($1);
485                 }
486                 | T_ELLIPSIS
487                 {
488                         add_order_ellipsis();
489                 }
490                 | T_IGNORE
491                 {
492                         add_order_ignore();
493                 }
494                 | T_SYMBOL
495                 {
496                         add_order_symbol($1);
497                 }
498                 | T_QUOTE order_str T_QUOTE
499                 {
500                         add_order_subst();
501                 }
502                 ;
503
504 order_str       : order_str order_stritem
505                 | order_stritem
506                 ;
507
508 order_stritem   : T_CHAR
509                 {
510                         add_subst_char($1);
511                 }
512                 | T_COLLSYM
513                 {
514                         add_subst_collsym($1);
515                 }
516                 | T_COLLELEM
517                 {
518                         add_subst_collelem($1);
519                 }
520                 | T_SYMBOL
521                 {
522                         add_subst_symbol($1);
523                 }
524                 ;
525
526 messages        : T_MESSAGES T_NL messages_list T_END T_MESSAGES T_NL
527                 {
528                         dump_messages();
529                 }
530                 | T_MESSAGES T_NL copycat T_END T_MESSAGES T_NL
531                 ;
532
533 messages_list   : messages_list messages_item
534                 | messages_item
535                 ;
536
537 messages_kw     : T_YESSTR
538                 | T_NOSTR
539                 | T_YESEXPR
540                 | T_NOEXPR
541                 ;
542
543 messages_item   : messages_kw string T_NL
544                 {
545                         add_message(get_wcs());
546                 }
547                 ;
548
549 monetary        : T_MONETARY T_NL monetary_list T_END T_MONETARY T_NL
550                 {
551                         dump_monetary();
552                 }
553                 | T_MONETARY T_NL copycat T_END T_MONETARY T_NL
554                 ;
555
556 monetary_list   : monetary_list monetary_kw
557                 | monetary_kw
558                 ;
559
560 monetary_strkw  : T_INT_CURR_SYMBOL
561                 | T_CURRENCY_SYMBOL
562                 | T_MON_DECIMAL_POINT
563                 | T_MON_THOUSANDS_SEP
564                 | T_POSITIVE_SIGN
565                 | T_NEGATIVE_SIGN
566                 ;
567
568 monetary_numkw  : T_INT_FRAC_DIGITS
569                 | T_FRAC_DIGITS
570                 | T_P_CS_PRECEDES
571                 | T_P_SEP_BY_SPACE
572                 | T_N_CS_PRECEDES
573                 | T_N_SEP_BY_SPACE
574                 | T_P_SIGN_POSN
575                 | T_N_SIGN_POSN
576                 | T_INT_P_CS_PRECEDES
577                 | T_INT_N_CS_PRECEDES
578                 | T_INT_P_SEP_BY_SPACE
579                 | T_INT_N_SEP_BY_SPACE
580                 | T_INT_P_SIGN_POSN
581                 | T_INT_N_SIGN_POSN
582                 ;
583
584 monetary_kw     : monetary_strkw string T_NL
585                 {
586                         add_monetary_str(get_wcs());
587                 }
588                 | monetary_numkw T_NUMBER T_NL
589                 {
590                         add_monetary_num($2);
591                 }
592                 | T_MON_GROUPING mon_group_list T_NL
593                 ;
594
595 mon_group_list  : T_NUMBER
596                 {
597                         reset_monetary_group();
598                         add_monetary_group($1);
599                 }
600                 | mon_group_list T_SEMI T_NUMBER
601                 {
602                         add_monetary_group($3);
603                 }
604                 ;
605
606
607 numeric         : T_NUMERIC T_NL numeric_list T_END T_NUMERIC T_NL
608                 {
609                         dump_numeric();
610                 }
611                 | T_NUMERIC T_NL copycat T_END T_NUMERIC T_NL
612                 ;
613
614
615 numeric_list    : numeric_list numeric_item
616                 | numeric_item
617                 ;
618
619
620 numeric_item    : numeric_strkw string T_NL
621                 {
622                         add_numeric_str(get_wcs());
623                 }
624                 | T_GROUPING group_list T_NL
625                 ;
626
627 numeric_strkw   : T_DECIMAL_POINT
628                 | T_THOUSANDS_SEP
629                 ;
630
631
632 group_list      : T_NUMBER
633                 {
634                         reset_numeric_group();
635                         add_numeric_group($1);
636                 }
637                 | group_list T_SEMI T_NUMBER
638                 {
639                         add_numeric_group($3);
640                 }
641                 ;
642
643
644 time            : T_TIME T_NL time_kwlist T_END T_TIME T_NL
645                 {
646                         dump_time();
647                 }
648                 | T_TIME T_NL copycat T_END T_NUMERIC T_NL
649                 ;
650
651 time_kwlist     : time_kwlist time_kw
652                 | time_kw
653                 ;
654
655 time_kw         : time_strkw string T_NL
656                 {
657                         add_time_str(get_wcs());
658                 }
659                 | time_listkw time_list T_NL
660                 {
661                         check_time_list();
662                 }
663                 ;
664
665 time_listkw     : T_ABDAY
666                 | T_DAY
667                 | T_ABMON
668                 | T_MON
669                 | T_ERA
670                 | T_ALT_DIGITS
671                 | T_AM_PM
672                 ;
673
674 time_strkw      : T_ERA_D_T_FMT
675                 | T_ERA_T_FMT
676                 | T_ERA_D_FMT
677                 | T_D_T_FMT
678                 | T_D_FMT
679                 | T_T_FMT
680                 | T_T_FMT_AMPM
681                 | T_DATE_FMT
682                 ;
683
684 time_list       : time_list T_SEMI string
685                 {
686                         add_time_list(get_wcs());
687                 }
688                 | string
689                 {
690                         reset_time_list();
691                         add_time_list(get_wcs());
692                 }
693                 ;