866d0a0b5b5bcdda274ad1fcf46790b14da157c0
[dragonfly.git] / usr.bin / lex / dfa.c
1 /* dfa - DFA construction routines */
2
3 /*-
4  * Copyright (c) 1990 The Regents of the University of California.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Vern Paxson.
9  * 
10  * The United States Government has rights in this work pursuant
11  * to contract no. DE-AC03-76SF00098 between the United States
12  * Department of Energy and the University of California.
13  *
14  * Redistribution and use in source and binary forms are permitted provided
15  * that: (1) source distributions retain this entire copyright notice and
16  * comment, and (2) distributions including binaries display the following
17  * acknowledgement:  ``This product includes software developed by the
18  * University of California, Berkeley and its contributors'' in the
19  * documentation or other materials provided with the distribution and in
20  * all advertising materials mentioning features or use of this software.
21  * Neither the name of the University nor the names of its contributors may
22  * be used to endorse or promote products derived from this software without
23  * specific prior written permission.
24  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
25  * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
26  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
27  */
28
29 /* $Header: /home/daffy/u0/vern/flex/RCS/dfa.c,v 2.26 95/04/20 13:53:14 vern Exp $ */
30 /* $FreeBSD: src/usr.bin/lex/dfa.c,v 1.5 1999/10/27 07:56:43 obrien Exp $ */
31 /* $DragonFly: src/usr.bin/lex/dfa.c,v 1.4 2005/08/04 17:31:22 drhodus Exp $ */
32
33 #include "flexdef.h"
34
35
36 /* declare functions that have forward references */
37
38 void dump_associated_rules PROTO((FILE*, int));
39 void dump_transitions PROTO((FILE*, int[]));
40 void sympartition PROTO((int[], int, int[], int[]));
41 int symfollowset PROTO((int[], int, int, int[]));
42
43
44 /* check_for_backing_up - check a DFA state for backing up
45  *
46  * synopsis
47  *     void check_for_backing_up( int ds, int state[numecs] );
48  *
49  * ds is the number of the state to check and state[] is its out-transitions,
50  * indexed by equivalence class.
51  */
52
53 void check_for_backing_up(int ds, int *state)
54         {
55         if ( (reject && ! dfaacc[ds].dfaacc_set) ||
56              (! reject && ! dfaacc[ds].dfaacc_state) )
57                 { /* state is non-accepting */
58                 ++num_backing_up;
59
60                 if ( backing_up_report )
61                         {
62                         fprintf( backing_up_file,
63                                 _( "State #%d is non-accepting -\n" ), ds );
64
65                         /* identify the state */
66                         dump_associated_rules( backing_up_file, ds );
67
68                         /* Now identify it further using the out- and
69                          * jam-transitions.
70                          */
71                         dump_transitions( backing_up_file, state );
72
73                         putc( '\n', backing_up_file );
74                         }
75                 }
76         }
77
78
79 /* check_trailing_context - check to see if NFA state set constitutes
80  *                          "dangerous" trailing context
81  *
82  * synopsis
83  *    void check_trailing_context( int nfa_states[num_states+1], int num_states,
84  *                              int accset[nacc+1], int nacc );
85  *
86  * NOTES
87  *  Trailing context is "dangerous" if both the head and the trailing
88  *  part are of variable size \and/ there's a DFA state which contains
89  *  both an accepting state for the head part of the rule and NFA states
90  *  which occur after the beginning of the trailing context.
91  *
92  *  When such a rule is matched, it's impossible to tell if having been
93  *  in the DFA state indicates the beginning of the trailing context or
94  *  further-along scanning of the pattern.  In these cases, a warning
95  *  message is issued.
96  *
97  *    nfa_states[1 .. num_states] is the list of NFA states in the DFA.
98  *    accset[1 .. nacc] is the list of accepting numbers for the DFA state.
99  */
100
101 void check_trailing_context(int *nfa_states, int num_states, int *accset, 
102                             int nacc)
103         {
104         int i, j;
105
106         for ( i = 1; i <= num_states; ++i )
107                 {
108                 int ns = nfa_states[i];
109                 int type = state_type[ns];
110                 int ar = assoc_rule[ns];
111
112                 if ( type == STATE_NORMAL || rule_type[ar] != RULE_VARIABLE )
113                         { /* do nothing */
114                         }
115
116                 else if ( type == STATE_TRAILING_CONTEXT )
117                         {
118                         /* Potential trouble.  Scan set of accepting numbers
119                          * for the one marking the end of the "head".  We
120                          * assume that this looping will be fairly cheap
121                          * since it's rare that an accepting number set
122                          * is large.
123                          */
124                         for ( j = 1; j <= nacc; ++j )
125                                 if ( accset[j] & YY_TRAILING_HEAD_MASK )
126                                         {
127                                         line_warning(
128                                         _( "dangerous trailing context" ),
129                                                 rule_linenum[ar] );
130                                         return;
131                                         }
132                         }
133                 }
134         }
135
136
137 /* dump_associated_rules - list the rules associated with a DFA state
138  *
139  * Goes through the set of NFA states associated with the DFA and
140  * extracts the first MAX_ASSOC_RULES unique rules, sorts them,
141  * and writes a report to the given file.
142  */
143
144 void dump_associated_rules(FILE *file, int ds)
145         {
146         int i, j;
147         int num_associated_rules = 0;
148         int rule_set[MAX_ASSOC_RULES + 1];
149         int *dset = dss[ds];
150         int size = dfasiz[ds];
151
152         for ( i = 1; i <= size; ++i )
153                 {
154                 int rule_num = rule_linenum[assoc_rule[dset[i]]];
155
156                 for ( j = 1; j <= num_associated_rules; ++j )
157                         if ( rule_num == rule_set[j] )
158                                 break;
159
160                 if ( j > num_associated_rules )
161                         { /* new rule */
162                         if ( num_associated_rules < MAX_ASSOC_RULES )
163                                 rule_set[++num_associated_rules] = rule_num;
164                         }
165                 }
166
167         bubble( rule_set, num_associated_rules );
168
169         fprintf( file, _( " associated rule line numbers:" ) );
170
171         for ( i = 1; i <= num_associated_rules; ++i )
172                 {
173                 if ( i % 8 == 1 )
174                         putc( '\n', file );
175
176                 fprintf( file, "\t%d", rule_set[i] );
177                 }
178
179         putc( '\n', file );
180         }
181
182
183 /* dump_transitions - list the transitions associated with a DFA state
184  *
185  * synopsis
186  *     dump_transitions( FILE *file, int state[numecs] );
187  *
188  * Goes through the set of out-transitions and lists them in human-readable
189  * form (i.e., not as equivalence classes); also lists jam transitions
190  * (i.e., all those which are not out-transitions, plus EOF).  The dump
191  * is done to the given file.
192  */
193
194 void dump_transitions(FILE *file, int *state)
195         {
196         int i, ec;
197         int out_char_set[CSIZE];
198
199         for ( i = 0; i < csize; ++i )
200                 {
201                 ec = ABS( ecgroup[i] );
202                 out_char_set[i] = state[ec];
203                 }
204
205         fprintf( file, _( " out-transitions: " ) );
206
207         list_character_set( file, out_char_set );
208
209         /* now invert the members of the set to get the jam transitions */
210         for ( i = 0; i < csize; ++i )
211                 out_char_set[i] = ! out_char_set[i];
212
213         fprintf( file, _( "\n jam-transitions: EOF " ) );
214
215         list_character_set( file, out_char_set );
216
217         putc( '\n', file );
218         }
219
220
221 /* epsclosure - construct the epsilon closure of a set of ndfa states
222  *
223  * synopsis
224  *    int *epsclosure( int t[num_states], int *numstates_addr,
225  *                      int accset[num_rules+1], int *nacc_addr,
226  *                      int *hashval_addr );
227  *
228  * NOTES
229  *  The epsilon closure is the set of all states reachable by an arbitrary
230  *  number of epsilon transitions, which themselves do not have epsilon
231  *  transitions going out, unioned with the set of states which have non-null
232  *  accepting numbers.  t is an array of size numstates of nfa state numbers.
233  *  Upon return, t holds the epsilon closure and *numstates_addr is updated.
234  *  accset holds a list of the accepting numbers, and the size of accset is
235  *  given by *nacc_addr.  t may be subjected to reallocation if it is not
236  *  large enough to hold the epsilon closure.
237  *
238  *  hashval is the hash value for the dfa corresponding to the state set.
239  */
240
241 int *epsclosure(int *t, int *ns_addr, int *accset, int *nacc_addr, int *hv_addr)
242         {
243         int stkpos, ns, tsp;
244         int numstates = *ns_addr, nacc, hashval, transsym, nfaccnum;
245         int stkend, nstate;
246         static int did_stk_init = false, *stk; 
247
248 #define MARK_STATE(state) \
249 trans1[state] = trans1[state] - MARKER_DIFFERENCE;
250
251 #define IS_MARKED(state) (trans1[state] < 0)
252
253 #define UNMARK_STATE(state) \
254 trans1[state] = trans1[state] + MARKER_DIFFERENCE;
255
256 #define CHECK_ACCEPT(state) \
257 { \
258 nfaccnum = accptnum[state]; \
259 if ( nfaccnum != NIL ) \
260 accset[++nacc] = nfaccnum; \
261 }
262
263 #define DO_REALLOCATION \
264 { \
265 current_max_dfa_size += MAX_DFA_SIZE_INCREMENT; \
266 ++num_reallocs; \
267 t = reallocate_integer_array( t, current_max_dfa_size ); \
268 stk = reallocate_integer_array( stk, current_max_dfa_size ); \
269 } \
270
271 #define PUT_ON_STACK(state) \
272 { \
273 if ( ++stkend >= current_max_dfa_size ) \
274 DO_REALLOCATION \
275 stk[stkend] = state; \
276 MARK_STATE(state) \
277 }
278
279 #define ADD_STATE(state) \
280 { \
281 if ( ++numstates >= current_max_dfa_size ) \
282 DO_REALLOCATION \
283 t[numstates] = state; \
284 hashval += state; \
285 }
286
287 #define STACK_STATE(state) \
288 { \
289 PUT_ON_STACK(state) \
290 CHECK_ACCEPT(state) \
291 if ( nfaccnum != NIL || transchar[state] != SYM_EPSILON ) \
292 ADD_STATE(state) \
293 }
294
295
296         if ( ! did_stk_init )
297                 {
298                 stk = allocate_integer_array( current_max_dfa_size );
299                 did_stk_init = true;
300                 }
301
302         nacc = stkend = hashval = 0;
303
304         for ( nstate = 1; nstate <= numstates; ++nstate )
305                 {
306                 ns = t[nstate];
307
308                 /* The state could be marked if we've already pushed it onto
309                  * the stack.
310                  */
311                 if ( ! IS_MARKED(ns) )
312                         {
313                         PUT_ON_STACK(ns)
314                         CHECK_ACCEPT(ns)
315                         hashval += ns;
316                         }
317                 }
318
319         for ( stkpos = 1; stkpos <= stkend; ++stkpos )
320                 {
321                 ns = stk[stkpos];
322                 transsym = transchar[ns];
323
324                 if ( transsym == SYM_EPSILON )
325                         {
326                         tsp = trans1[ns] + MARKER_DIFFERENCE;
327
328                         if ( tsp != NO_TRANSITION )
329                                 {
330                                 if ( ! IS_MARKED(tsp) )
331                                         STACK_STATE(tsp)
332
333                                 tsp = trans2[ns];
334
335                                 if ( tsp != NO_TRANSITION && ! IS_MARKED(tsp) )
336                                         STACK_STATE(tsp)
337                                 }
338                         }
339                 }
340
341         /* Clear out "visit" markers. */
342
343         for ( stkpos = 1; stkpos <= stkend; ++stkpos )
344                 {
345                 if ( IS_MARKED(stk[stkpos]) )
346                         UNMARK_STATE(stk[stkpos])
347                 else
348                         flexfatal(
349                         _( "consistency check failed in epsclosure()" ) );
350                 }
351
352         *ns_addr = numstates;
353         *hv_addr = hashval;
354         *nacc_addr = nacc;
355
356         return t;
357         }
358
359
360 /* increase_max_dfas - increase the maximum number of DFAs */
361
362 void increase_max_dfas(void)
363         {
364         current_max_dfas += MAX_DFAS_INCREMENT;
365
366         ++num_reallocs;
367
368         base = reallocate_integer_array( base, current_max_dfas );
369         def = reallocate_integer_array( def, current_max_dfas );
370         dfasiz = reallocate_integer_array( dfasiz, current_max_dfas );
371         accsiz = reallocate_integer_array( accsiz, current_max_dfas );
372         dhash = reallocate_integer_array( dhash, current_max_dfas );
373         dss = reallocate_int_ptr_array( dss, current_max_dfas );
374         dfaacc = reallocate_dfaacc_union( dfaacc, current_max_dfas );
375
376         if ( nultrans )
377                 nultrans =
378                         reallocate_integer_array( nultrans, current_max_dfas );
379         }
380
381
382 /* ntod - convert an ndfa to a dfa
383  *
384  * Creates the dfa corresponding to the ndfa we've constructed.  The
385  * dfa starts out in state #1.
386  */
387
388 void ntod(void)
389         {
390         int *accset, ds, nacc, newds;
391         int sym, hashval, numstates, dsize;
392         int num_full_table_rows;        /* used only for -f */
393         int *nset, *dset;
394         int targptr, totaltrans, i, comstate, comfreq, targ;
395         int symlist[CSIZE + 1];
396         int num_start_states;
397         int todo_head, todo_next;
398
399         /* Note that the following are indexed by *equivalence classes*
400          * and not by characters.  Since equivalence classes are indexed
401          * beginning with 1, even if the scanner accepts NUL's, this
402          * means that (since every character is potentially in its own
403          * equivalence class) these arrays must have room for indices
404          * from 1 to CSIZE, so their size must be CSIZE + 1.
405          */
406         int duplist[CSIZE + 1], state[CSIZE + 1];
407         int targfreq[CSIZE + 1], targstate[CSIZE + 1];
408
409         accset = allocate_integer_array( num_rules + 1 );
410         nset = allocate_integer_array( current_max_dfa_size );
411
412         /* The "todo" queue is represented by the head, which is the DFA
413          * state currently being processed, and the "next", which is the
414          * next DFA state number available (not in use).  We depend on the
415          * fact that snstods() returns DFA's \in increasing order/, and thus
416          * need only know the bounds of the dfas to be processed.
417          */
418         todo_head = todo_next = 0;
419
420         for ( i = 0; i <= csize; ++i )
421                 {
422                 duplist[i] = NIL;
423                 symlist[i] = false;
424                 }
425
426         for ( i = 0; i <= num_rules; ++i )
427                 accset[i] = NIL;
428
429         if ( trace )
430                 {
431                 dumpnfa( scset[1] );
432                 fputs( _( "\n\nDFA Dump:\n\n" ), stderr );
433                 }
434
435         inittbl();
436
437         /* Check to see whether we should build a separate table for
438          * transitions on NUL characters.  We don't do this for full-speed
439          * (-F) scanners, since for them we don't have a simple state
440          * number lying around with which to index the table.  We also
441          * don't bother doing it for scanners unless (1) NUL is in its own
442          * equivalence class (indicated by a positive value of
443          * ecgroup[NUL]), (2) NUL's equivalence class is the last
444          * equivalence class, and (3) the number of equivalence classes is
445          * the same as the number of characters.  This latter case comes
446          * about when useecs is false or when it's true but every character
447          * still manages to land in its own class (unlikely, but it's
448          * cheap to check for).  If all these things are true then the
449          * character code needed to represent NUL's equivalence class for
450          * indexing the tables is going to take one more bit than the
451          * number of characters, and therefore we won't be assured of
452          * being able to fit it into a YY_CHAR variable.  This rules out
453          * storing the transitions in a compressed table, since the code
454          * for interpreting them uses a YY_CHAR variable (perhaps it
455          * should just use an integer, though; this is worth pondering ...
456          * ###).
457          *
458          * Finally, for full tables, we want the number of entries in the
459          * table to be a power of two so the array references go fast (it
460          * will just take a shift to compute the major index).  If
461          * encoding NUL's transitions in the table will spoil this, we
462          * give it its own table (note that this will be the case if we're
463          * not using equivalence classes).
464          */
465
466         /* Note that the test for ecgroup[0] == numecs below accomplishes
467          * both (1) and (2) above
468          */
469         if ( ! fullspd && ecgroup[0] == numecs )
470                 {
471                 /* NUL is alone in its equivalence class, which is the
472                  * last one.
473                  */
474                 int use_NUL_table = (numecs == csize);
475
476                 if ( fulltbl && ! use_NUL_table )
477                         {
478                         /* We still may want to use the table if numecs
479                          * is a power of 2.
480                          */
481                         int power_of_two;
482
483                         for ( power_of_two = 1; power_of_two <= csize;
484                               power_of_two *= 2 )
485                                 if ( numecs == power_of_two )
486                                         {
487                                         use_NUL_table = true;
488                                         break;
489                                         }
490                         }
491
492                 if ( use_NUL_table )
493                         nultrans = allocate_integer_array( current_max_dfas );
494
495                 /* From now on, nultrans != nil indicates that we're
496                  * saving null transitions for later, separate encoding.
497                  */
498                 }
499
500
501         if ( fullspd )
502                 {
503                 for ( i = 0; i <= numecs; ++i )
504                         state[i] = 0;
505
506                 place_state( state, 0, 0 );
507                 dfaacc[0].dfaacc_state = 0;
508                 }
509
510         else if ( fulltbl )
511                 {
512                 if ( nultrans )
513                         /* We won't be including NUL's transitions in the
514                          * table, so build it for entries from 0 .. numecs - 1.
515                          */
516                         num_full_table_rows = numecs;
517
518                 else
519                         /* Take into account the fact that we'll be including
520                          * the NUL entries in the transition table.  Build it
521                          * from 0 .. numecs.
522                          */
523                         num_full_table_rows = numecs + 1;
524
525                 /* Unless -Ca, declare it "short" because it's a real
526                  * long-shot that that won't be large enough.
527                  */
528                 out_str_dec( "static yyconst %s yy_nxt[][%d] =\n    {\n",
529                         /* '}' so vi doesn't get too confused */
530                         long_align ? "long" : "short", num_full_table_rows );
531
532                 outn( "    {" );
533
534                 /* Generate 0 entries for state #0. */
535                 for ( i = 0; i < num_full_table_rows; ++i )
536                         mk2data( 0 );
537
538                 dataflush();
539                 outn( "    },\n" );
540                 }
541
542         /* Create the first states. */
543
544         num_start_states = lastsc * 2;
545
546         for ( i = 1; i <= num_start_states; ++i )
547                 {
548                 numstates = 1;
549
550                 /* For each start condition, make one state for the case when
551                  * we're at the beginning of the line (the '^' operator) and
552                  * one for the case when we're not.
553                  */
554                 if ( i % 2 == 1 )
555                         nset[numstates] = scset[(i / 2) + 1];
556                 else
557                         nset[numstates] =
558                                 mkbranch( scbol[i / 2], scset[i / 2] );
559
560                 nset = epsclosure( nset, &numstates, accset, &nacc, &hashval );
561
562                 if ( snstods( nset, numstates, accset, nacc, hashval, &ds ) )
563                         {
564                         numas += nacc;
565                         totnst += numstates;
566                         ++todo_next;
567
568                         if ( variable_trailing_context_rules && nacc > 0 )
569                                 check_trailing_context( nset, numstates,
570                                                         accset, nacc );
571                         }
572                 }
573
574         if ( ! fullspd )
575                 {
576                 if ( ! snstods( nset, 0, accset, 0, 0, &end_of_buffer_state ) )
577                         flexfatal(
578                         _( "could not create unique end-of-buffer state" ) );
579
580                 ++numas;
581                 ++num_start_states;
582                 ++todo_next;
583                 }
584
585         while ( todo_head < todo_next )
586                 {
587                 targptr = 0;
588                 totaltrans = 0;
589
590                 for ( i = 1; i <= numecs; ++i )
591                         state[i] = 0;
592
593                 ds = ++todo_head;
594
595                 dset = dss[ds];
596                 dsize = dfasiz[ds];
597
598                 if ( trace )
599                         fprintf( stderr, _( "state # %d:\n" ), ds );
600
601                 sympartition( dset, dsize, symlist, duplist );
602
603                 for ( sym = 1; sym <= numecs; ++sym )
604                         {
605                         if ( symlist[sym] )
606                                 {
607                                 symlist[sym] = 0;
608
609                                 if ( duplist[sym] == NIL )
610                                         {
611                                         /* Symbol has unique out-transitions. */
612                                         numstates = symfollowset( dset, dsize,
613                                                                 sym, nset );
614                                         nset = epsclosure( nset, &numstates,
615                                                 accset, &nacc, &hashval );
616
617                                         if ( snstods( nset, numstates, accset,
618                                                 nacc, hashval, &newds ) )
619                                                 {
620                                                 totnst = totnst + numstates;
621                                                 ++todo_next;
622                                                 numas += nacc;
623
624                                                 if (
625                                         variable_trailing_context_rules &&
626                                                         nacc > 0 )
627                                                         check_trailing_context(
628                                                                 nset, numstates,
629                                                                 accset, nacc );
630                                                 }
631
632                                         state[sym] = newds;
633
634                                         if ( trace )
635                                                 fprintf( stderr, "\t%d\t%d\n",
636                                                         sym, newds );
637
638                                         targfreq[++targptr] = 1;
639                                         targstate[targptr] = newds;
640                                         ++numuniq;
641                                         }
642
643                                 else
644                                         {
645                                         /* sym's equivalence class has the same
646                                          * transitions as duplist(sym)'s
647                                          * equivalence class.
648                                          */
649                                         targ = state[duplist[sym]];
650                                         state[sym] = targ;
651
652                                         if ( trace )
653                                                 fprintf( stderr, "\t%d\t%d\n",
654                                                         sym, targ );
655
656                                         /* Update frequency count for
657                                          * destination state.
658                                          */
659
660                                         i = 0;
661                                         while ( targstate[++i] != targ )
662                                                 ;
663
664                                         ++targfreq[i];
665                                         ++numdup;
666                                         }
667
668                                 ++totaltrans;
669                                 duplist[sym] = NIL;
670                                 }
671                         }
672
673                 if ( caseins && ! useecs )
674                         {
675                         int j;
676
677                         for ( i = 'A', j = 'a'; i <= 'Z'; ++i, ++j )
678                                 {
679                                 if ( state[i] == 0 && state[j] != 0 )
680                                         /* We're adding a transition. */
681                                         ++totaltrans;
682
683                                 else if ( state[i] != 0 && state[j] == 0 )
684                                         /* We're taking away a transition. */
685                                         --totaltrans;
686
687                                 state[i] = state[j];
688                                 }
689                         }
690
691                 numsnpairs += totaltrans;
692
693                 if ( ds > num_start_states )
694                         check_for_backing_up( ds, state );
695
696                 if ( nultrans )
697                         {
698                         nultrans[ds] = state[NUL_ec];
699                         state[NUL_ec] = 0;      /* remove transition */
700                         }
701
702                 if ( fulltbl )
703                         {
704                         outn( "    {" );
705
706                         /* Supply array's 0-element. */
707                         if ( ds == end_of_buffer_state )
708                                 mk2data( -end_of_buffer_state );
709                         else
710                                 mk2data( end_of_buffer_state );
711
712                         for ( i = 1; i < num_full_table_rows; ++i )
713                                 /* Jams are marked by negative of state
714                                  * number.
715                                  */
716                                 mk2data( state[i] ? state[i] : -ds );
717
718                         dataflush();
719                         outn( "    },\n" );
720                         }
721
722                 else if ( fullspd )
723                         place_state( state, ds, totaltrans );
724
725                 else if ( ds == end_of_buffer_state )
726                         /* Special case this state to make sure it does what
727                          * it's supposed to, i.e., jam on end-of-buffer.
728                          */
729                         stack1( ds, 0, 0, JAMSTATE );
730
731                 else /* normal, compressed state */
732                         {
733                         /* Determine which destination state is the most
734                          * common, and how many transitions to it there are.
735                          */
736
737                         comfreq = 0;
738                         comstate = 0;
739
740                         for ( i = 1; i <= targptr; ++i )
741                                 if ( targfreq[i] > comfreq )
742                                         {
743                                         comfreq = targfreq[i];
744                                         comstate = targstate[i];
745                                         }
746
747                         bldtbl( state, ds, totaltrans, comstate, comfreq );
748                         }
749                 }
750
751         if ( fulltbl )
752                 dataend();
753
754         else if ( ! fullspd )
755                 {
756                 cmptmps();  /* create compressed template entries */
757
758                 /* Create tables for all the states with only one
759                  * out-transition.
760                  */
761                 while ( onesp > 0 )
762                         {
763                         mk1tbl( onestate[onesp], onesym[onesp], onenext[onesp],
764                         onedef[onesp] );
765                         --onesp;
766                         }
767
768                 mkdeftbl();
769                 }
770
771         flex_free( (void *) accset );
772         flex_free( (void *) nset );
773         }
774
775
776 /* snstods - converts a set of ndfa states into a dfa state
777  *
778  * synopsis
779  *    is_new_state = snstods( int sns[numstates], int numstates,
780  *                              int accset[num_rules+1], int nacc,
781  *                              int hashval, int *newds_addr );
782  *
783  * On return, the dfa state number is in newds.
784  */
785
786 int snstods(int *sns, int numstates, int *accset, int nacc, int hashval,
787             int *newds_addr)
788         {
789         int didsort = 0;
790         int i, j;
791         int newds, *oldsns;
792
793         for ( i = 1; i <= lastdfa; ++i )
794                 if ( hashval == dhash[i] )
795                         {
796                         if ( numstates == dfasiz[i] )
797                                 {
798                                 oldsns = dss[i];
799
800                                 if ( ! didsort )
801                                         {
802                                         /* We sort the states in sns so we
803                                          * can compare it to oldsns quickly.
804                                          * We use bubble because there probably
805                                          * aren't very many states.
806                                          */
807                                         bubble( sns, numstates );
808                                         didsort = 1;
809                                         }
810
811                                 for ( j = 1; j <= numstates; ++j )
812                                         if ( sns[j] != oldsns[j] )
813                                                 break;
814
815                                 if ( j > numstates )
816                                         {
817                                         ++dfaeql;
818                                         *newds_addr = i;
819                                         return 0;
820                                         }
821
822                                 ++hshcol;
823                                 }
824
825                         else
826                                 ++hshsave;
827                         }
828
829         /* Make a new dfa. */
830
831         if ( ++lastdfa >= current_max_dfas )
832                 increase_max_dfas();
833
834         newds = lastdfa;
835
836         dss[newds] = allocate_integer_array( numstates + 1 );
837
838         /* If we haven't already sorted the states in sns, we do so now,
839          * so that future comparisons with it can be made quickly.
840          */
841
842         if ( ! didsort )
843                 bubble( sns, numstates );
844
845         for ( i = 1; i <= numstates; ++i )
846                 dss[newds][i] = sns[i];
847
848         dfasiz[newds] = numstates;
849         dhash[newds] = hashval;
850
851         if ( nacc == 0 )
852                 {
853                 if ( reject )
854                         dfaacc[newds].dfaacc_set = (int *) 0;
855                 else
856                         dfaacc[newds].dfaacc_state = 0;
857
858                 accsiz[newds] = 0;
859                 }
860
861         else if ( reject )
862                 {
863                 /* We sort the accepting set in increasing order so the
864                  * disambiguating rule that the first rule listed is considered
865                  * match in the event of ties will work.  We use a bubble
866                  * sort since the list is probably quite small.
867                  */
868
869                 bubble( accset, nacc );
870
871                 dfaacc[newds].dfaacc_set = allocate_integer_array( nacc + 1 );
872
873                 /* Save the accepting set for later */
874                 for ( i = 1; i <= nacc; ++i )
875                         {
876                         dfaacc[newds].dfaacc_set[i] = accset[i];
877
878                         if ( accset[i] <= num_rules )
879                                 /* Who knows, perhaps a REJECT can yield
880                                  * this rule.
881                                  */
882                                 rule_useful[accset[i]] = true;
883                         }
884
885                 accsiz[newds] = nacc;
886                 }
887
888         else
889                 {
890                 /* Find lowest numbered rule so the disambiguating rule
891                  * will work.
892                  */
893                 j = num_rules + 1;
894
895                 for ( i = 1; i <= nacc; ++i )
896                         if ( accset[i] < j )
897                                 j = accset[i];
898
899                 dfaacc[newds].dfaacc_state = j;
900
901                 if ( j <= num_rules )
902                         rule_useful[j] = true;
903                 }
904
905         *newds_addr = newds;
906
907         return 1;
908         }
909
910
911 /* symfollowset - follow the symbol transitions one step
912  *
913  * synopsis
914  *    numstates = symfollowset( int ds[current_max_dfa_size], int dsize,
915  *                              int transsym, int nset[current_max_dfa_size] );
916  */
917
918 int symfollowset(int *ds, int dsize, int transsym, int *nset)
919         {
920         int ns, tsp, sym, i, j, lenccl, ch, numstates, ccllist;
921
922         numstates = 0;
923
924         for ( i = 1; i <= dsize; ++i )
925                 { /* for each nfa state ns in the state set of ds */
926                 ns = ds[i];
927                 sym = transchar[ns];
928                 tsp = trans1[ns];
929
930                 if ( sym < 0 )
931                         { /* it's a character class */
932                         sym = -sym;
933                         ccllist = cclmap[sym];
934                         lenccl = ccllen[sym];
935
936                         if ( cclng[sym] )
937                                 {
938                                 for ( j = 0; j < lenccl; ++j )
939                                         {
940                                         /* Loop through negated character
941                                          * class.
942                                          */
943                                         ch = ccltbl[ccllist + j];
944
945                                         if ( ch == 0 )
946                                                 ch = NUL_ec;
947
948                                         if ( ch > transsym )
949                                                 /* Transsym isn't in negated
950                                                  * ccl.
951                                                  */
952                                                 break;
953
954                                         else if ( ch == transsym )
955                                                 /* next 2 */ goto bottom;
956                                         }
957
958                                 /* Didn't find transsym in ccl. */
959                                 nset[++numstates] = tsp;
960                                 }
961
962                         else
963                                 for ( j = 0; j < lenccl; ++j )
964                                         {
965                                         ch = ccltbl[ccllist + j];
966
967                                         if ( ch == 0 )
968                                                 ch = NUL_ec;
969
970                                         if ( ch > transsym )
971                                                 break;
972                                         else if ( ch == transsym )
973                                                 {
974                                                 nset[++numstates] = tsp;
975                                                 break;
976                                                 }
977                                         }
978                         }
979
980                 else if ( sym >= 'A' && sym <= 'Z' && caseins )
981                         flexfatal(
982                         _( "consistency check failed in symfollowset" ) );
983
984                 else if ( sym == SYM_EPSILON )
985                         { /* do nothing */
986                         }
987
988                 else if ( ABS( ecgroup[sym] ) == transsym )
989                         nset[++numstates] = tsp;
990
991                 bottom: ;
992                 }
993
994         return numstates;
995         }
996
997
998 /* sympartition - partition characters with same out-transitions
999  *
1000  * synopsis
1001  *    sympartition( int ds[current_max_dfa_size], int numstates,
1002  *                      int symlist[numecs], int duplist[numecs] );
1003  */
1004
1005 void sympartition(int *ds, int numstates, int *symlist, int *duplist)
1006         {
1007         int tch, i, j, k, ns, dupfwd[CSIZE + 1], lenccl, cclp, ich;
1008
1009         /* Partitioning is done by creating equivalence classes for those
1010          * characters which have out-transitions from the given state.  Thus
1011          * we are really creating equivalence classes of equivalence classes.
1012          */
1013
1014         for ( i = 1; i <= numecs; ++i )
1015                 { /* initialize equivalence class list */
1016                 duplist[i] = i - 1;
1017                 dupfwd[i] = i + 1;
1018                 }
1019
1020         duplist[1] = NIL;
1021         dupfwd[numecs] = NIL;
1022
1023         for ( i = 1; i <= numstates; ++i )
1024                 {
1025                 ns = ds[i];
1026                 tch = transchar[ns];
1027
1028                 if ( tch != SYM_EPSILON )
1029                         {
1030                         if ( tch < -lastccl || tch >= csize )
1031                                 {
1032                                 flexfatal(
1033                 _( "bad transition character detected in sympartition()" ) );
1034                                 }
1035
1036                         if ( tch >= 0 )
1037                                 { /* character transition */
1038                                 int ec = ecgroup[tch];
1039
1040                                 mkechar( ec, dupfwd, duplist );
1041                                 symlist[ec] = 1;
1042                                 }
1043
1044                         else
1045                                 { /* character class */
1046                                 tch = -tch;
1047
1048                                 lenccl = ccllen[tch];
1049                                 cclp = cclmap[tch];
1050                                 mkeccl( ccltbl + cclp, lenccl, dupfwd,
1051                                         duplist, numecs, NUL_ec );
1052
1053                                 if ( cclng[tch] )
1054                                         {
1055                                         j = 0;
1056
1057                                         for ( k = 0; k < lenccl; ++k )
1058                                                 {
1059                                                 ich = ccltbl[cclp + k];
1060
1061                                                 if ( ich == 0 )
1062                                                         ich = NUL_ec;
1063
1064                                                 for ( ++j; j < ich; ++j )
1065                                                         symlist[j] = 1;
1066                                                 }
1067
1068                                         for ( ++j; j <= numecs; ++j )
1069                                                 symlist[j] = 1;
1070                                         }
1071
1072                                 else
1073                                         for ( k = 0; k < lenccl; ++k )
1074                                                 {
1075                                                 ich = ccltbl[cclp + k];
1076
1077                                                 if ( ich == 0 )
1078                                                         ich = NUL_ec;
1079
1080                                                 symlist[ich] = 1;
1081                                                 }
1082                                 }
1083                         }
1084                 }
1085         }