usr.bin/lex/dfa.c

   1 /* dfa - DFA construction routines */
   2
   3 /*-
   4  * Copyright (c) 1990 The Regents of the University of California.
   5  * All rights reserved.
   6  *
   7  * This code is derived from software contributed to Berkeley by
   8  * Vern Paxson.
   9  *
  10  * The United States Government has rights in this work pursuant
  11  * to contract no. DE-AC03-76SF00098 between the United States
  12  * Department of Energy and the University of California.
  13  *
  14  * Redistribution and use in source and binary forms are permitted provided
  15  * that: (1) source distributions retain this entire copyright notice and
  16  * comment, and (2) distributions including binaries display the following
  17  * acknowledgement:  ``This product includes software developed by the
  18  * University of California, Berkeley and its contributors'' in the
  19  * documentation or other materials provided with the distribution and in
  20  * all advertising materials mentioning features or use of this software.
  21  * Neither the name of the University nor the names of its contributors may
  22  * be used to endorse or promote products derived from this software without
  23  * specific prior written permission.
  24  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
  25  * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
  26  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
  27  */
  28
  29 /* $Header: /home/daffy/u0/vern/flex/RCS/dfa.c,v 2.26 95/04/20 13:53:14 vern Exp $ */
  30 /* $FreeBSD: src/usr.bin/lex/dfa.c,v 1.5 1999/10/27 07:56:43 obrien Exp $ */
  31
  32 #include "flexdef.h"
  33
  34
  35 /* declare functions that have forward references */
  36
  37 void dump_associated_rules PROTO((FILE*, int));
  38 void dump_transitions PROTO((FILE*, int[]));
  39 void sympartition PROTO((int[], int, int[], int[]));
  40 int symfollowset PROTO((int[], int, int, int[]));
  41
  42
  43 /* check_for_backing_up - check a DFA state for backing up
  44  *
  45  * synopsis
  46  *     void check_for_backing_up( int ds, int state[numecs] );
  47  *
  48  * ds is the number of the state to check and state[] is its out-transitions,
  49  * indexed by equivalence class.
  50  */
  51
  52 void check_for_backing_up( ds, state )
  53 int ds;
  54 int state[];
  55         {
  56         if ( (reject && ! dfaacc[ds].dfaacc_set) ||
  57              (! reject && ! dfaacc[ds].dfaacc_state) )
  58                 { /* state is non-accepting */
  59                 ++num_backing_up;
  60
  61                 if ( backing_up_report )
  62                         {
  63                         fprintf( backing_up_file,
  64                                 _( "State #%d is non-accepting -\n" ), ds );
  65
  66                         /* identify the state */
  67                         dump_associated_rules( backing_up_file, ds );
  68
  69                         /* Now identify it further using the out- and
  70                          * jam-transitions.
  71                          */
  72                         dump_transitions( backing_up_file, state );
  73
  74                         putc( '\n', backing_up_file );
  75                         }
  76                 }
  77         }
  78
  79
  80 /* check_trailing_context - check to see if NFA state set constitutes
  81  *                          "dangerous" trailing context
  82  *
  83  * synopsis
  84  *    void check_trailing_context( int nfa_states[num_states+1], int num_states,
  85  *                              int accset[nacc+1], int nacc );
  86  *
  87  * NOTES
  88  *  Trailing context is "dangerous" if both the head and the trailing
  89  *  part are of variable size \and/ there's a DFA state which contains
  90  *  both an accepting state for the head part of the rule and NFA states
  91  *  which occur after the beginning of the trailing context.
  92  *
  93  *  When such a rule is matched, it's impossible to tell if having been
  94  *  in the DFA state indicates the beginning of the trailing context or
  95  *  further-along scanning of the pattern.  In these cases, a warning
  96  *  message is issued.
  97  *
  98  *    nfa_states[1 .. num_states] is the list of NFA states in the DFA.
  99  *    accset[1 .. nacc] is the list of accepting numbers for the DFA state.
 100  */
 101
 102 void check_trailing_context( nfa_states, num_states, accset, nacc )
 103 int *nfa_states, num_states;
 104 int *accset;
 105 int nacc;
 106         {
 107         register int i, j;
 108
 109         for ( i = 1; i <= num_states; ++i )
 110                 {
 111                 int ns = nfa_states[i];
 112                 register int type = state_type[ns];
 113                 register int ar = assoc_rule[ns];
 114
 115                 if ( type == STATE_NORMAL || rule_type[ar] != RULE_VARIABLE )
 116                         { /* do nothing */
 117                         }
 118
 119                 else if ( type == STATE_TRAILING_CONTEXT )
 120                         {
 121                         /* Potential trouble.  Scan set of accepting numbers
 122                          * for the one marking the end of the "head".  We
 123                          * assume that this looping will be fairly cheap
 124                          * since it's rare that an accepting number set
 125                          * is large.
 126                          */
 127                         for ( j = 1; j <= nacc; ++j )
 128                                 if ( accset[j] & YY_TRAILING_HEAD_MASK )
 129                                         {
 130                                         line_warning(
 131                                         _( "dangerous trailing context" ),
 132                                                 rule_linenum[ar] );
 133                                         return;
 134                                         }
 135                         }
 136                 }
 137         }
 138
 139
 140 /* dump_associated_rules - list the rules associated with a DFA state
 141  *
 142  * Goes through the set of NFA states associated with the DFA and
 143  * extracts the first MAX_ASSOC_RULES unique rules, sorts them,
 144  * and writes a report to the given file.
 145  */
 146
 147 void dump_associated_rules( file, ds )
 148 FILE *file;
 149 int ds;
 150         {
 151         register int i, j;
 152         register int num_associated_rules = 0;
 153         int rule_set[MAX_ASSOC_RULES + 1];
 154         int *dset = dss[ds];
 155         int size = dfasiz[ds];
 156
 157         for ( i = 1; i <= size; ++i )
 158                 {
 159                 register int rule_num = rule_linenum[assoc_rule[dset[i]]];
 160
 161                 for ( j = 1; j <= num_associated_rules; ++j )
 162                         if ( rule_num == rule_set[j] )
 163                                 break;
 164
 165                 if ( j > num_associated_rules )
 166                         { /* new rule */
 167                         if ( num_associated_rules < MAX_ASSOC_RULES )
 168                                 rule_set[++num_associated_rules] = rule_num;
 169                         }
 170                 }
 171
 172         bubble( rule_set, num_associated_rules );
 173
 174         fprintf( file, _( " associated rule line numbers:" ) );
 175
 176         for ( i = 1; i <= num_associated_rules; ++i )
 177                 {
 178                 if ( i % 8 == 1 )
 179                         putc( '\n', file );
 180
 181                 fprintf( file, "\t%d", rule_set[i] );
 182                 }
 183
 184         putc( '\n', file );
 185         }
 186
 187
 188 /* dump_transitions - list the transitions associated with a DFA state
 189  *
 190  * synopsis
 191  *     dump_transitions( FILE *file, int state[numecs] );
 192  *
 193  * Goes through the set of out-transitions and lists them in human-readable
 194  * form (i.e., not as equivalence classes); also lists jam transitions
 195  * (i.e., all those which are not out-transitions, plus EOF).  The dump
 196  * is done to the given file.
 197  */
 198
 199 void dump_transitions( file, state )
 200 FILE *file;
 201 int state[];
 202         {
 203         register int i, ec;
 204         int out_char_set[CSIZE];
 205
 206         for ( i = 0; i < csize; ++i )
 207                 {
 208                 ec = ABS( ecgroup[i] );
 209                 out_char_set[i] = state[ec];
 210                 }
 211
 212         fprintf( file, _( " out-transitions: " ) );
 213
 214         list_character_set( file, out_char_set );
 215
 216         /* now invert the members of the set to get the jam transitions */
 217         for ( i = 0; i < csize; ++i )
 218                 out_char_set[i] = ! out_char_set[i];
 219
 220         fprintf( file, _( "\n jam-transitions: EOF " ) );
 221
 222         list_character_set( file, out_char_set );
 223
 224         putc( '\n', file );
 225         }
 226
 227
 228 /* epsclosure - construct the epsilon closure of a set of ndfa states
 229  *
 230  * synopsis
 231  *    int *epsclosure( int t[num_states], int *numstates_addr,
 232  *                      int accset[num_rules+1], int *nacc_addr,
 233  *                      int *hashval_addr );
 234  *
 235  * NOTES
 236  *  The epsilon closure is the set of all states reachable by an arbitrary
 237  *  number of epsilon transitions, which themselves do not have epsilon
 238  *  transitions going out, unioned with the set of states which have non-null
 239  *  accepting numbers.  t is an array of size numstates of nfa state numbers.
 240  *  Upon return, t holds the epsilon closure and *numstates_addr is updated.
 241  *  accset holds a list of the accepting numbers, and the size of accset is
 242  *  given by *nacc_addr.  t may be subjected to reallocation if it is not
 243  *  large enough to hold the epsilon closure.
 244  *
 245  *  hashval is the hash value for the dfa corresponding to the state set.
 246  */
 247
 248 int *epsclosure( t, ns_addr, accset, nacc_addr, hv_addr )
 249 int *t, *ns_addr, accset[], *nacc_addr, *hv_addr;
 250         {
 251         register int stkpos, ns, tsp;
 252         int numstates = *ns_addr, nacc, hashval, transsym, nfaccnum;
 253         int stkend, nstate;
 254         static int did_stk_init = false, *stk;
 255
 256 #define MARK_STATE(state) \
 257 trans1[state] = trans1[state] - MARKER_DIFFERENCE;
 258
 259 #define IS_MARKED(state) (trans1[state] < 0)
 260
 261 #define UNMARK_STATE(state) \
 262 trans1[state] = trans1[state] + MARKER_DIFFERENCE;
 263
 264 #define CHECK_ACCEPT(state) \
 265 { \
 266 nfaccnum = accptnum[state]; \
 267 if ( nfaccnum != NIL ) \
 268 accset[++nacc] = nfaccnum; \
 269 }
 270
 271 #define DO_REALLOCATION \
 272 { \
 273 current_max_dfa_size += MAX_DFA_SIZE_INCREMENT; \
 274 ++num_reallocs; \
 275 t = reallocate_integer_array( t, current_max_dfa_size ); \
 276 stk = reallocate_integer_array( stk, current_max_dfa_size ); \
 277 } \
 278
 279 #define PUT_ON_STACK(state) \
 280 { \
 281 if ( ++stkend >= current_max_dfa_size ) \
 282 DO_REALLOCATION \
 283 stk[stkend] = state; \
 284 MARK_STATE(state) \
 285 }
 286
 287 #define ADD_STATE(state) \
 288 { \
 289 if ( ++numstates >= current_max_dfa_size ) \
 290 DO_REALLOCATION \
 291 t[numstates] = state; \
 292 hashval += state; \
 293 }
 294
 295 #define STACK_STATE(state) \
 296 { \
 297 PUT_ON_STACK(state) \
 298 CHECK_ACCEPT(state) \
 299 if ( nfaccnum != NIL || transchar[state] != SYM_EPSILON ) \
 300 ADD_STATE(state) \
 301 }
 302
 303
 304         if ( ! did_stk_init )
 305                 {
 306                 stk = allocate_integer_array( current_max_dfa_size );
 307                 did_stk_init = true;
 308                 }
 309
 310         nacc = stkend = hashval = 0;
 311
 312         for ( nstate = 1; nstate <= numstates; ++nstate )
 313                 {
 314                 ns = t[nstate];
 315
 316                 /* The state could be marked if we've already pushed it onto
 317                  * the stack.
 318                  */
 319                 if ( ! IS_MARKED(ns) )
 320                         {
 321                         PUT_ON_STACK(ns)
 322                         CHECK_ACCEPT(ns)
 323                         hashval += ns;
 324                         }
 325                 }
 326
 327         for ( stkpos = 1; stkpos <= stkend; ++stkpos )
 328                 {
 329                 ns = stk[stkpos];
 330                 transsym = transchar[ns];
 331
 332                 if ( transsym == SYM_EPSILON )
 333                         {
 334                         tsp = trans1[ns] + MARKER_DIFFERENCE;
 335
 336                         if ( tsp != NO_TRANSITION )
 337                                 {
 338                                 if ( ! IS_MARKED(tsp) )
 339                                         STACK_STATE(tsp)
 340
 341                                 tsp = trans2[ns];
 342
 343                                 if ( tsp != NO_TRANSITION && ! IS_MARKED(tsp) )
 344                                         STACK_STATE(tsp)
 345                                 }
 346                         }
 347                 }
 348
 349         /* Clear out "visit" markers. */
 350
 351         for ( stkpos = 1; stkpos <= stkend; ++stkpos )
 352                 {
 353                 if ( IS_MARKED(stk[stkpos]) )
 354                         UNMARK_STATE(stk[stkpos])
 355                 else
 356                         flexfatal(
 357                         _( "consistency check failed in epsclosure()" ) );
 358                 }
 359
 360         *ns_addr = numstates;
 361         *hv_addr = hashval;
 362         *nacc_addr = nacc;
 363
 364         return t;
 365         }
 366
 367
 368 /* increase_max_dfas - increase the maximum number of DFAs */
 369
 370 void increase_max_dfas()
 371         {
 372         current_max_dfas += MAX_DFAS_INCREMENT;
 373
 374         ++num_reallocs;
 375
 376         base = reallocate_integer_array( base, current_max_dfas );
 377         def = reallocate_integer_array( def, current_max_dfas );
 378         dfasiz = reallocate_integer_array( dfasiz, current_max_dfas );
 379         accsiz = reallocate_integer_array( accsiz, current_max_dfas );
 380         dhash = reallocate_integer_array( dhash, current_max_dfas );
 381         dss = reallocate_int_ptr_array( dss, current_max_dfas );
 382         dfaacc = reallocate_dfaacc_union( dfaacc, current_max_dfas );
 383
 384         if ( nultrans )
 385                 nultrans =
 386                         reallocate_integer_array( nultrans, current_max_dfas );
 387         }
 388
 389
 390 /* ntod - convert an ndfa to a dfa
 391  *
 392  * Creates the dfa corresponding to the ndfa we've constructed.  The
 393  * dfa starts out in state #1.
 394  */
 395
 396 void ntod()
 397         {
 398         int *accset, ds, nacc, newds;
 399         int sym, hashval, numstates, dsize;
 400         int num_full_table_rows;        /* used only for -f */
 401         int *nset, *dset;
 402         int targptr, totaltrans, i, comstate, comfreq, targ;
 403         int symlist[CSIZE + 1];
 404         int num_start_states;
 405         int todo_head, todo_next;
 406
 407         /* Note that the following are indexed by *equivalence classes*
 408          * and not by characters.  Since equivalence classes are indexed
 409          * beginning with 1, even if the scanner accepts NUL's, this
 410          * means that (since every character is potentially in its own
 411          * equivalence class) these arrays must have room for indices
 412          * from 1 to CSIZE, so their size must be CSIZE + 1.
 413          */
 414         int duplist[CSIZE + 1], state[CSIZE + 1];
 415         int targfreq[CSIZE + 1], targstate[CSIZE + 1];
 416
 417         accset = allocate_integer_array( num_rules + 1 );
 418         nset = allocate_integer_array( current_max_dfa_size );
 419
 420         /* The "todo" queue is represented by the head, which is the DFA
 421          * state currently being processed, and the "next", which is the
 422          * next DFA state number available (not in use).  We depend on the
 423          * fact that snstods() returns DFA's \in increasing order/, and thus
 424          * need only know the bounds of the dfas to be processed.
 425          */
 426         todo_head = todo_next = 0;
 427
 428         for ( i = 0; i <= csize; ++i )
 429                 {
 430                 duplist[i] = NIL;
 431                 symlist[i] = false;
 432                 }
 433
 434         for ( i = 0; i <= num_rules; ++i )
 435                 accset[i] = NIL;
 436
 437         if ( trace )
 438                 {
 439                 dumpnfa( scset[1] );
 440                 fputs( _( "\n\nDFA Dump:\n\n" ), stderr );
 441                 }
 442
 443         inittbl();
 444
 445         /* Check to see whether we should build a separate table for
 446          * transitions on NUL characters.  We don't do this for full-speed
 447          * (-F) scanners, since for them we don't have a simple state
 448          * number lying around with which to index the table.  We also
 449          * don't bother doing it for scanners unless (1) NUL is in its own
 450          * equivalence class (indicated by a positive value of
 451          * ecgroup[NUL]), (2) NUL's equivalence class is the last
 452          * equivalence class, and (3) the number of equivalence classes is
 453          * the same as the number of characters.  This latter case comes
 454          * about when useecs is false or when it's true but every character
 455          * still manages to land in its own class (unlikely, but it's
 456          * cheap to check for).  If all these things are true then the
 457          * character code needed to represent NUL's equivalence class for
 458          * indexing the tables is going to take one more bit than the
 459          * number of characters, and therefore we won't be assured of
 460          * being able to fit it into a YY_CHAR variable.  This rules out
 461          * storing the transitions in a compressed table, since the code
 462          * for interpreting them uses a YY_CHAR variable (perhaps it
 463          * should just use an integer, though; this is worth pondering ...
 464          * ###).
 465          *
 466          * Finally, for full tables, we want the number of entries in the
 467          * table to be a power of two so the array references go fast (it
 468          * will just take a shift to compute the major index).  If
 469          * encoding NUL's transitions in the table will spoil this, we
 470          * give it its own table (note that this will be the case if we're
 471          * not using equivalence classes).
 472          */
 473
 474         /* Note that the test for ecgroup[0] == numecs below accomplishes
 475          * both (1) and (2) above
 476          */
 477         if ( ! fullspd && ecgroup[0] == numecs )
 478                 {
 479                 /* NUL is alone in its equivalence class, which is the
 480                  * last one.
 481                  */
 482                 int use_NUL_table = (numecs == csize);
 483
 484                 if ( fulltbl && ! use_NUL_table )
 485                         {
 486                         /* We still may want to use the table if numecs
 487                          * is a power of 2.
 488                          */
 489                         int power_of_two;
 490
 491                         for ( power_of_two = 1; power_of_two <= csize;
 492                               power_of_two *= 2 )
 493                                 if ( numecs == power_of_two )
 494                                         {
 495                                         use_NUL_table = true;
 496                                         break;
 497                                         }
 498                         }
 499
 500                 if ( use_NUL_table )
 501                         nultrans = allocate_integer_array( current_max_dfas );
 502
 503                 /* From now on, nultrans != nil indicates that we're
 504                  * saving null transitions for later, separate encoding.
 505                  */
 506                 }
 507
 508
 509         if ( fullspd )
 510                 {
 511                 for ( i = 0; i <= numecs; ++i )
 512                         state[i] = 0;
 513
 514                 place_state( state, 0, 0 );
 515                 dfaacc[0].dfaacc_state = 0;
 516                 }
 517
 518         else if ( fulltbl )
 519                 {
 520                 if ( nultrans )
 521                         /* We won't be including NUL's transitions in the
 522                          * table, so build it for entries from 0 .. numecs - 1.
 523                          */
 524                         num_full_table_rows = numecs;
 525
 526                 else
 527                         /* Take into account the fact that we'll be including
 528                          * the NUL entries in the transition table.  Build it
 529                          * from 0 .. numecs.
 530                          */
 531                         num_full_table_rows = numecs + 1;
 532
 533                 /* Unless -Ca, declare it "short" because it's a real
 534                  * long-shot that that won't be large enough.
 535                  */
 536                 out_str_dec( "static yyconst %s yy_nxt[][%d] =\n    {\n",
 537                         /* '}' so vi doesn't get too confused */
 538                         long_align ? "long" : "short", num_full_table_rows );
 539
 540                 outn( "    {" );
 541
 542                 /* Generate 0 entries for state #0. */
 543                 for ( i = 0; i < num_full_table_rows; ++i )
 544                         mk2data( 0 );
 545
 546                 dataflush();
 547                 outn( "    },\n" );
 548                 }
 549
 550         /* Create the first states. */
 551
 552         num_start_states = lastsc * 2;
 553
 554         for ( i = 1; i <= num_start_states; ++i )
 555                 {
 556                 numstates = 1;
 557
 558                 /* For each start condition, make one state for the case when
 559                  * we're at the beginning of the line (the '^' operator) and
 560                  * one for the case when we're not.
 561                  */
 562                 if ( i % 2 == 1 )
 563                         nset[numstates] = scset[(i / 2) + 1];
 564                 else
 565                         nset[numstates] =
 566                                 mkbranch( scbol[i / 2], scset[i / 2] );
 567
 568                 nset = epsclosure( nset, &numstates, accset, &nacc, &hashval );
 569
 570                 if ( snstods( nset, numstates, accset, nacc, hashval, &ds ) )
 571                         {
 572                         numas += nacc;
 573                         totnst += numstates;
 574                         ++todo_next;
 575
 576                         if ( variable_trailing_context_rules && nacc > 0 )
 577                                 check_trailing_context( nset, numstates,
 578                                                         accset, nacc );
 579                         }
 580                 }
 581
 582         if ( ! fullspd )
 583                 {
 584                 if ( ! snstods( nset, 0, accset, 0, 0, &end_of_buffer_state ) )
 585                         flexfatal(
 586                         _( "could not create unique end-of-buffer state" ) );
 587
 588                 ++numas;
 589                 ++num_start_states;
 590                 ++todo_next;
 591                 }
 592
 593         while ( todo_head < todo_next )
 594                 {
 595                 targptr = 0;
 596                 totaltrans = 0;
 597
 598                 for ( i = 1; i <= numecs; ++i )
 599                         state[i] = 0;
 600
 601                 ds = ++todo_head;
 602
 603                 dset = dss[ds];
 604                 dsize = dfasiz[ds];
 605
 606                 if ( trace )
 607                         fprintf( stderr, _( "state # %d:\n" ), ds );
 608
 609                 sympartition( dset, dsize, symlist, duplist );
 610
 611                 for ( sym = 1; sym <= numecs; ++sym )
 612                         {
 613                         if ( symlist[sym] )
 614                                 {
 615                                 symlist[sym] = 0;
 616
 617                                 if ( duplist[sym] == NIL )
 618                                         {
 619                                         /* Symbol has unique out-transitions. */
 620                                         numstates = symfollowset( dset, dsize,
 621                                                                 sym, nset );
 622                                         nset = epsclosure( nset, &numstates,
 623                                                 accset, &nacc, &hashval );
 624
 625                                         if ( snstods( nset, numstates, accset,
 626                                                 nacc, hashval, &newds ) )
 627                                                 {
 628                                                 totnst = totnst + numstates;
 629                                                 ++todo_next;
 630                                                 numas += nacc;
 631
 632                                                 if (
 633                                         variable_trailing_context_rules &&
 634                                                         nacc > 0 )
 635                                                         check_trailing_context(
 636                                                                 nset, numstates,
 637                                                                 accset, nacc );
 638                                                 }
 639
 640                                         state[sym] = newds;
 641
 642                                         if ( trace )
 643                                                 fprintf( stderr, "\t%d\t%d\n",
 644                                                         sym, newds );
 645
 646                                         targfreq[++targptr] = 1;
 647                                         targstate[targptr] = newds;
 648                                         ++numuniq;
 649                                         }
 650
 651                                 else
 652                                         {
 653                                         /* sym's equivalence class has the same
 654                                          * transitions as duplist(sym)'s
 655                                          * equivalence class.
 656                                          */
 657                                         targ = state[duplist[sym]];
 658                                         state[sym] = targ;
 659
 660                                         if ( trace )
 661                                                 fprintf( stderr, "\t%d\t%d\n",
 662                                                         sym, targ );
 663
 664                                         /* Update frequency count for
 665                                          * destination state.
 666                                          */
 667
 668                                         i = 0;
 669                                         while ( targstate[++i] != targ )
 670                                                 ;
 671
 672                                         ++targfreq[i];
 673                                         ++numdup;
 674                                         }
 675
 676                                 ++totaltrans;
 677                                 duplist[sym] = NIL;
 678                                 }
 679                         }
 680
 681                 if ( caseins && ! useecs )
 682                         {
 683                         register int j;
 684
 685                         for ( i = 'A', j = 'a'; i <= 'Z'; ++i, ++j )
 686                                 {
 687                                 if ( state[i] == 0 && state[j] != 0 )
 688                                         /* We're adding a transition. */
 689                                         ++totaltrans;
 690
 691                                 else if ( state[i] != 0 && state[j] == 0 )
 692                                         /* We're taking away a transition. */
 693                                         --totaltrans;
 694
 695                                 state[i] = state[j];
 696                                 }
 697                         }
 698
 699                 numsnpairs += totaltrans;
 700
 701                 if ( ds > num_start_states )
 702                         check_for_backing_up( ds, state );
 703
 704                 if ( nultrans )
 705                         {
 706                         nultrans[ds] = state[NUL_ec];
 707                         state[NUL_ec] = 0;      /* remove transition */
 708                         }
 709
 710                 if ( fulltbl )
 711                         {
 712                         outn( "    {" );
 713
 714                         /* Supply array's 0-element. */
 715                         if ( ds == end_of_buffer_state )
 716                                 mk2data( -end_of_buffer_state );
 717                         else
 718                                 mk2data( end_of_buffer_state );
 719
 720                         for ( i = 1; i < num_full_table_rows; ++i )
 721                                 /* Jams are marked by negative of state
 722                                  * number.
 723                                  */
 724                                 mk2data( state[i] ? state[i] : -ds );
 725
 726                         dataflush();
 727                         outn( "    },\n" );
 728                         }
 729
 730                 else if ( fullspd )
 731                         place_state( state, ds, totaltrans );
 732
 733                 else if ( ds == end_of_buffer_state )
 734                         /* Special case this state to make sure it does what
 735                          * it's supposed to, i.e., jam on end-of-buffer.
 736                          */
 737                         stack1( ds, 0, 0, JAMSTATE );
 738
 739                 else /* normal, compressed state */
 740                         {
 741                         /* Determine which destination state is the most
 742                          * common, and how many transitions to it there are.
 743                          */
 744
 745                         comfreq = 0;
 746                         comstate = 0;
 747
 748                         for ( i = 1; i <= targptr; ++i )
 749                                 if ( targfreq[i] > comfreq )
 750                                         {
 751                                         comfreq = targfreq[i];
 752                                         comstate = targstate[i];
 753                                         }
 754
 755                         bldtbl( state, ds, totaltrans, comstate, comfreq );
 756                         }
 757                 }
 758
 759         if ( fulltbl )
 760                 dataend();
 761
 762         else if ( ! fullspd )
 763                 {
 764                 cmptmps();  /* create compressed template entries */
 765
 766                 /* Create tables for all the states with only one
 767                  * out-transition.
 768                  */
 769                 while ( onesp > 0 )
 770                         {
 771                         mk1tbl( onestate[onesp], onesym[onesp], onenext[onesp],
 772                         onedef[onesp] );
 773                         --onesp;
 774                         }
 775
 776                 mkdeftbl();
 777                 }
 778
 779         flex_free( (void *) accset );
 780         flex_free( (void *) nset );
 781         }
 782
 783
 784 /* snstods - converts a set of ndfa states into a dfa state
 785  *
 786  * synopsis
 787  *    is_new_state = snstods( int sns[numstates], int numstates,
 788  *                              int accset[num_rules+1], int nacc,
 789  *                              int hashval, int *newds_addr );
 790  *
 791  * On return, the dfa state number is in newds.
 792  */
 793
 794 int snstods( sns, numstates, accset, nacc, hashval, newds_addr )
 795 int sns[], numstates, accset[], nacc, hashval, *newds_addr;
 796         {
 797         int didsort = 0;
 798         register int i, j;
 799         int newds, *oldsns;
 800
 801         for ( i = 1; i <= lastdfa; ++i )
 802                 if ( hashval == dhash[i] )
 803                         {
 804                         if ( numstates == dfasiz[i] )
 805                                 {
 806                                 oldsns = dss[i];
 807
 808                                 if ( ! didsort )
 809                                         {
 810                                         /* We sort the states in sns so we
 811                                          * can compare it to oldsns quickly.
 812                                          * We use bubble because there probably
 813                                          * aren't very many states.
 814                                          */
 815                                         bubble( sns, numstates );
 816                                         didsort = 1;
 817                                         }
 818
 819                                 for ( j = 1; j <= numstates; ++j )
 820                                         if ( sns[j] != oldsns[j] )
 821                                                 break;
 822
 823                                 if ( j > numstates )
 824                                         {
 825                                         ++dfaeql;
 826                                         *newds_addr = i;
 827                                         return 0;
 828                                         }
 829
 830                                 ++hshcol;
 831                                 }
 832
 833                         else
 834                                 ++hshsave;
 835                         }
 836
 837         /* Make a new dfa. */
 838
 839         if ( ++lastdfa >= current_max_dfas )
 840                 increase_max_dfas();
 841
 842         newds = lastdfa;
 843
 844         dss[newds] = allocate_integer_array( numstates + 1 );
 845
 846         /* If we haven't already sorted the states in sns, we do so now,
 847          * so that future comparisons with it can be made quickly.
 848          */
 849
 850         if ( ! didsort )
 851                 bubble( sns, numstates );
 852
 853         for ( i = 1; i <= numstates; ++i )
 854                 dss[newds][i] = sns[i];
 855
 856         dfasiz[newds] = numstates;
 857         dhash[newds] = hashval;
 858
 859         if ( nacc == 0 )
 860                 {
 861                 if ( reject )
 862                         dfaacc[newds].dfaacc_set = (int *) 0;
 863                 else
 864                         dfaacc[newds].dfaacc_state = 0;
 865
 866                 accsiz[newds] = 0;
 867                 }
 868
 869         else if ( reject )
 870                 {
 871                 /* We sort the accepting set in increasing order so the
 872                  * disambiguating rule that the first rule listed is considered
 873                  * match in the event of ties will work.  We use a bubble
 874                  * sort since the list is probably quite small.
 875                  */
 876
 877                 bubble( accset, nacc );
 878
 879                 dfaacc[newds].dfaacc_set = allocate_integer_array( nacc + 1 );
 880
 881                 /* Save the accepting set for later */
 882                 for ( i = 1; i <= nacc; ++i )
 883                         {
 884                         dfaacc[newds].dfaacc_set[i] = accset[i];
 885
 886                         if ( accset[i] <= num_rules )
 887                                 /* Who knows, perhaps a REJECT can yield
 888                                  * this rule.
 889                                  */
 890                                 rule_useful[accset[i]] = true;
 891                         }
 892
 893                 accsiz[newds] = nacc;
 894                 }
 895
 896         else
 897                 {
 898                 /* Find lowest numbered rule so the disambiguating rule
 899                  * will work.
 900                  */
 901                 j = num_rules + 1;
 902
 903                 for ( i = 1; i <= nacc; ++i )
 904                         if ( accset[i] < j )
 905                                 j = accset[i];
 906
 907                 dfaacc[newds].dfaacc_state = j;
 908
 909                 if ( j <= num_rules )
 910                         rule_useful[j] = true;
 911                 }
 912
 913         *newds_addr = newds;
 914
 915         return 1;
 916         }
 917
 918
 919 /* symfollowset - follow the symbol transitions one step
 920  *
 921  * synopsis
 922  *    numstates = symfollowset( int ds[current_max_dfa_size], int dsize,
 923  *                              int transsym, int nset[current_max_dfa_size] );
 924  */
 925
 926 int symfollowset( ds, dsize, transsym, nset )
 927 int ds[], dsize, transsym, nset[];
 928         {
 929         int ns, tsp, sym, i, j, lenccl, ch, numstates, ccllist;
 930
 931         numstates = 0;
 932
 933         for ( i = 1; i <= dsize; ++i )
 934                 { /* for each nfa state ns in the state set of ds */
 935                 ns = ds[i];
 936                 sym = transchar[ns];
 937                 tsp = trans1[ns];
 938
 939                 if ( sym < 0 )
 940                         { /* it's a character class */
 941                         sym = -sym;
 942                         ccllist = cclmap[sym];
 943                         lenccl = ccllen[sym];
 944
 945                         if ( cclng[sym] )
 946                                 {
 947                                 for ( j = 0; j < lenccl; ++j )
 948                                         {
 949                                         /* Loop through negated character
 950                                          * class.
 951                                          */
 952                                         ch = ccltbl[ccllist + j];
 953
 954                                         if ( ch == 0 )
 955                                                 ch = NUL_ec;
 956
 957                                         if ( ch > transsym )
 958                                                 /* Transsym isn't in negated
 959                                                  * ccl.
 960                                                  */
 961                                                 break;
 962
 963                                         else if ( ch == transsym )
 964                                                 /* next 2 */ goto bottom;
 965                                         }
 966
 967                                 /* Didn't find transsym in ccl. */
 968                                 nset[++numstates] = tsp;
 969                                 }
 970
 971                         else
 972                                 for ( j = 0; j < lenccl; ++j )
 973                                         {
 974                                         ch = ccltbl[ccllist + j];
 975
 976                                         if ( ch == 0 )
 977                                                 ch = NUL_ec;
 978
 979                                         if ( ch > transsym )
 980                                                 break;
 981                                         else if ( ch == transsym )
 982                                                 {
 983                                                 nset[++numstates] = tsp;
 984                                                 break;
 985                                                 }
 986                                         }
 987                         }
 988
 989                 else if ( sym >= 'A' && sym <= 'Z' && caseins )
 990                         flexfatal(
 991                         _( "consistency check failed in symfollowset" ) );
 992
 993                 else if ( sym == SYM_EPSILON )
 994                         { /* do nothing */
 995                         }
 996
 997                 else if ( ABS( ecgroup[sym] ) == transsym )
 998                         nset[++numstates] = tsp;
 999
1000                 bottom: ;
1001                 }
1002
1003         return numstates;
1004         }
1005
1006
1007 /* sympartition - partition characters with same out-transitions
1008  *
1009  * synopsis
1010  *    sympartition( int ds[current_max_dfa_size], int numstates,
1011  *                      int symlist[numecs], int duplist[numecs] );
1012  */
1013
1014 void sympartition( ds, numstates, symlist, duplist )
1015 int ds[], numstates;
1016 int symlist[], duplist[];
1017         {
1018         int tch, i, j, k, ns, dupfwd[CSIZE + 1], lenccl, cclp, ich;
1019
1020         /* Partitioning is done by creating equivalence classes for those
1021          * characters which have out-transitions from the given state.  Thus
1022          * we are really creating equivalence classes of equivalence classes.
1023          */
1024
1025         for ( i = 1; i <= numecs; ++i )
1026                 { /* initialize equivalence class list */
1027                 duplist[i] = i - 1;
1028                 dupfwd[i] = i + 1;
1029                 }
1030
1031         duplist[1] = NIL;
1032         dupfwd[numecs] = NIL;
1033
1034         for ( i = 1; i <= numstates; ++i )
1035                 {
1036                 ns = ds[i];
1037                 tch = transchar[ns];
1038
1039                 if ( tch != SYM_EPSILON )
1040                         {
1041                         if ( tch < -lastccl || tch >= csize )
1042                                 {
1043                                 flexfatal(
1044                 _( "bad transition character detected in sympartition()" ) );
1045                                 }
1046
1047                         if ( tch >= 0 )
1048                                 { /* character transition */
1049                                 int ec = ecgroup[tch];
1050
1051                                 mkechar( ec, dupfwd, duplist );
1052                                 symlist[ec] = 1;
1053                                 }
1054
1055                         else
1056                                 { /* character class */
1057                                 tch = -tch;
1058
1059                                 lenccl = ccllen[tch];
1060                                 cclp = cclmap[tch];
1061                                 mkeccl( ccltbl + cclp, lenccl, dupfwd,
1062                                         duplist, numecs, NUL_ec );
1063
1064                                 if ( cclng[tch] )
1065                                         {
1066                                         j = 0;
1067
1068                                         for ( k = 0; k < lenccl; ++k )
1069                                                 {
1070                                                 ich = ccltbl[cclp + k];
1071
1072                                                 if ( ich == 0 )
1073                                                         ich = NUL_ec;
1074
1075                                                 for ( ++j; j < ich; ++j )
1076                                                         symlist[j] = 1;
1077                                                 }
1078
1079                                         for ( ++j; j <= numecs; ++j )
1080                                                 symlist[j] = 1;
1081                                         }
1082
1083                                 else
1084                                         for ( k = 0; k < lenccl; ++k )
1085                                                 {
1086                                                 ich = ccltbl[cclp + k];
1087
1088                                                 if ( ich == 0 )
1089                                                         ich = NUL_ec;
1090
1091                                                 symlist[ich] = 1;
1092                                                 }
1093                                 }
1094                         }
1095                 }
1096         }