This source file includes following definitions.
- streq
- isasciidigit
- isblank
- to_uchar
- newline_constraint
- letter_constraint
- other_constraint
- succeeds_in_context
- prev_newline_dependent
- prev_letter_dependent
- accepting
- accepts_in_context
- mbs_to_wchar
- prtok
- tstbit
- setbit
- clrbit
- zeroset
- fillset
- notset
- equal
- emptyset
- maybe_realloc
- charclass_index
- unibyte_word_constituent
- char_context
- setbit_wc
- setbit_case_fold_c
- fetch_wc
- bracket_fetch_wc
- find_pred
- parse_bracket_exp
- push_lex_state
- pop_lex_state
- lex
- addtok_mb
- addtok
- addtok_wc
- add_utf8_anychar
- atom
- nsubtoks
- copytoks
- closure
- branch
- regexp
- dfaparse
- copy
- alloc_position_set
- insert
- append
- merge_constrained
- merge
- merge2
- delete
- replace
- state_index
- epsclosure
- charclass_context
- state_separate_contexts
- merge_nfa_state
- compare
- reorder_tokens
- dfaoptimize
- dfaanalyze
- realloc_trans_if_necessary
- build_state
- transit_state_singlebyte
- transit_state
- skip_remains_mb
- dfaexec_main
- dfaexec_mb
- dfaexec_sb
- dfaexec_noop
- dfaexec
- dfasuperset
- dfaisfast
- free_mbdata
- dfasupported
- maybe_disable_superset_dfa
- dfassbuild
- dfacomp
- dfafree
- icatalloc
- freelist
- enlistnew
- enlist
- comsubs
- addlists
- inboth
- allocmust
- resetmust
- freemust
- dfamust
- dfamustfree
- dfaalloc
- dfasyntax
- dfacopysyntax
   1 
   2 
   3 
   4 
   5 
   6 
   7 
   8 
   9 
  10 
  11 
  12 
  13 
  14 
  15 
  16 
  17 
  18 
  19 
  20 
  21 
  22 
  23 #include <config.h>
  24 
  25 #include "dfa.h"
  26 
  27 #include "flexmember.h"
  28 #include "idx.h"
  29 #include "verify.h"
  30 
  31 #include <assert.h>
  32 #include <ctype.h>
  33 #include <stdint.h>
  34 #include <stdio.h>
  35 #include <stdlib.h>
  36 #include <limits.h>
  37 #include <string.h>
  38 
  39 
  40 
  41 
  42 
  43 
  44 #define assume_nonnull(x) assume ((x) != NULL)
  45 
  46 static bool
  47 streq (char const *a, char const *b)
     
  48 {
  49   return strcmp (a, b) == 0;
  50 }
  51 
  52 static bool
  53 isasciidigit (char c)
     
  54 {
  55   return '0' <= c && c <= '9';
  56 }
  57 
  58 #include "gettext.h"
  59 #define _(str) gettext (str)
  60 
  61 #include <wchar.h>
  62 
  63 #include "xalloc.h"
  64 #include "localeinfo.h"
  65 
  66 #ifndef FALLTHROUGH
  67 # if 201710L < __STDC_VERSION__
  68 #  define FALLTHROUGH [[__fallthrough__]]
  69 # elif (__GNUC__ >= 7) || (__clang_major__ >= 10)
  70 #  define FALLTHROUGH __attribute__ ((__fallthrough__))
  71 # else
  72 #  define FALLTHROUGH ((void) 0)
  73 # endif
  74 #endif
  75 
  76 #ifndef MIN
  77 # define MIN(a,b) ((a) < (b) ? (a) : (b))
  78 #endif
  79 
  80 
  81 #ifdef setbit
  82 # undef setbit
  83 #endif
  84 #ifdef clrbit
  85 # undef clrbit
  86 #endif
  87 
  88 
  89 #if !defined isblank && !defined HAVE_ISBLANK && !defined GNULIB_ISBLANK
  90 # define isblank dfa_isblank
  91 static int
  92 isblank (int c)
     
  93 {
  94   return c == ' ' || c == '\t';
  95 }
  96 #endif
  97 
  98 
  99 enum { NOTCHAR = 1 << CHAR_BIT };
 100 
 101 #ifdef UINT_LEAST64_MAX
 102 
 103 
 104 enum { CHARCLASS_WORD_BITS = 64 };
 105 
 106 
 107 
 108 typedef uint_least64_t charclass_word;
 109 
 110 
 111 
 112 
 113 # define CHARCLASS_PAIR(lo, hi) (((charclass_word) (hi) << 32) + (lo))
 114 
 115 #else
 116 
 117 enum { CHARCLASS_WORD_BITS = 32 };
 118 typedef unsigned long charclass_word;
 119 # define CHARCLASS_PAIR(lo, hi) lo, hi
 120 #endif
 121 
 122 
 123 #define CHARCLASS_INIT(a, b, c, d, e, f, g, h)          \
 124    {{                                                   \
 125       CHARCLASS_PAIR (a, b), CHARCLASS_PAIR (c, d),     \
 126       CHARCLASS_PAIR (e, f), CHARCLASS_PAIR (g, h)      \
 127    }}
 128 
 129 
 130 static charclass_word const CHARCLASS_WORD_MASK
 131   = ((charclass_word) 1 << (CHARCLASS_WORD_BITS - 1) << 1) - 1;
 132 
 133 
 134 enum
 135 {
 136   CHARCLASS_WORDS = (NOTCHAR + CHARCLASS_WORD_BITS - 1) / CHARCLASS_WORD_BITS
 137 };
 138 
 139 
 140 typedef struct { charclass_word w[CHARCLASS_WORDS]; } charclass;
 141 
 142 
 143 
 144 
 145 static unsigned char
 146 to_uchar (char ch)
     
 147 {
 148   return ch;
 149 }
 150 
 151 
 152 
 153 
 154 
 155 
 156 
 157 
 158 
 159 
 160 
 161 
 162 
 163 enum
 164   {
 165     CTX_NONE = 1,
 166     CTX_LETTER = 2,
 167     CTX_NEWLINE = 4,
 168     CTX_ANY = 7
 169   };
 170 
 171 
 172 
 173 
 174 
 175 
 176 
 177 
 178 
 179 
 180 
 181 
 182 
 183 
 184 
 185 
 186 static int
 187 newline_constraint (int constraint)
     
 188 {
 189   return (constraint >> 6) & 7;
 190 }
 191 static int
 192 letter_constraint (int constraint)
     
 193 {
 194   return (constraint >> 3) & 7;
 195 }
 196 static int
 197 other_constraint (int constraint)
     
 198 {
 199   return constraint & 7;
 200 }
 201 
 202 static bool
 203 succeeds_in_context (int constraint, int prev, int curr)
     
 204 {
 205   return !! (((curr & CTX_NONE      ? other_constraint (constraint) : 0) \
 206               | (curr & CTX_LETTER  ? letter_constraint (constraint) : 0) \
 207               | (curr & CTX_NEWLINE ? newline_constraint (constraint) : 0)) \
 208              & prev);
 209 }
 210 
 211 
 212 static bool
 213 prev_newline_dependent (int constraint)
     
 214 {
 215   return ((constraint ^ constraint >> 2) & 0111) != 0;
 216 }
 217 static bool
 218 prev_letter_dependent (int constraint)
     
 219 {
 220   return ((constraint ^ constraint >> 1) & 0111) != 0;
 221 }
 222 
 223 
 224 
 225 
 226 
 227 enum
 228   {
 229     NO_CONSTRAINT = 0777,
 230     BEGLINE_CONSTRAINT = 0444,
 231     ENDLINE_CONSTRAINT = 0700,
 232     BEGWORD_CONSTRAINT = 0050,
 233     ENDWORD_CONSTRAINT = 0202,
 234     LIMWORD_CONSTRAINT = 0252,
 235     NOTLIMWORD_CONSTRAINT = 0525
 236   };
 237 
 238 
 239 
 240 
 241 
 242 typedef ptrdiff_t token;
 243 static token const TOKEN_MAX = PTRDIFF_MAX;
 244 
 245 
 246 
 247 typedef ptrdiff_t state_num;
 248 
 249 
 250 enum
 251 {
 252   END = -1,                     
 253 
 254 
 255 
 256 
 257 
 258 
 259 
 260   
 261 
 262   
 263 
 264 
 265 
 266 
 267   EMPTY = NOTCHAR,              
 268 
 269 
 270   QMARK,                        
 271 
 272 
 273 
 274   STAR,                         
 275 
 276 
 277 
 278   PLUS,                         
 279 
 280 
 281 
 282   REPMN,                        
 283 
 284 
 285 
 286   CAT,                          
 287 
 288 
 289 
 290 
 291   OR,                           
 292 
 293 
 294   LPAREN,                       
 295 
 296 
 297   RPAREN,                       
 298 
 299   WCHAR,                        
 300 
 301 
 302   ANYCHAR,                      
 303 
 304 
 305 
 306   BEG,                          
 307 
 308 
 309   BEGLINE,                      
 310 
 311 
 312 
 313   ENDLINE,                      
 314 
 315 
 316   BEGWORD,                      
 317 
 318 
 319 
 320   ENDWORD,                      
 321 
 322 
 323   LIMWORD,                      
 324 
 325 
 326 
 327   NOTLIMWORD,                   
 328 
 329 
 330 
 331   BACKREF,                      
 332 
 333 
 334 
 335 
 336 
 337 
 338 
 339   MBCSET,                       
 340 
 341 
 342   CSET                          
 343 
 344 
 345 };
 346 
 347 
 348 
 349 
 350 
 351 
 352 typedef struct
 353 {
 354   idx_t index;                  
 355   unsigned int constraint;      
 356 } position;
 357 
 358 
 359 typedef struct
 360 {
 361   position *elems;              
 362   idx_t nelem;                  
 363   idx_t alloc;                  
 364 } position_set;
 365 
 366 
 367 
 368 
 369 typedef struct
 370 {
 371   size_t hash;                  
 372   position_set elems;           
 373   unsigned char context;        
 374   unsigned short constraint;    
 375   position_set mbps;            
 376 
 377 
 378   state_num mb_trindex;         
 379 
 380 
 381 } dfa_state;
 382 
 383 
 384 
 385 enum { MAX_TRCOUNT = 1024 };
 386 
 387 
 388 
 389 struct mb_char_classes
 390 {
 391   ptrdiff_t cset;
 392   bool invert;
 393   wchar_t *chars;               
 394   idx_t nchars;
 395   idx_t nchars_alloc;
 396 };
 397 
 398 struct regex_syntax
 399 {
 400   
 401   reg_syntax_t syntax_bits;
 402   bool syntax_bits_set;
 403 
 404   
 405   bool case_fold;
 406 
 407   
 408 
 409   bool anchor;
 410 
 411   
 412   unsigned char eolbyte;
 413 
 414   
 415   char sbit[NOTCHAR];
 416 
 417   
 418 
 419   bool never_trail[NOTCHAR];
 420 
 421   
 422   charclass letters;
 423 
 424   
 425   charclass newline;
 426 };
 427 
 428 
 429 
 430 
 431 
 432 struct lexer_state
 433 {
 434   char const *ptr;      
 435   idx_t left;           
 436   token lasttok;        
 437   idx_t parens;         
 438   int minrep, maxrep;   
 439 
 440   
 441 
 442 
 443   wint_t wctok;
 444 
 445   
 446   struct mb_char_classes brack;
 447 
 448   
 449   bool laststart;
 450 };
 451 
 452 
 453 
 454 struct parser_state
 455 {
 456   token tok;               
 457   idx_t depth;             
 458 
 459 
 460 
 461 
 462 };
 463 
 464 
 465 struct dfa
 466 {
 467   
 468   charclass *charclasses;       
 469   idx_t cindex;                 
 470   idx_t calloc;                 
 471   ptrdiff_t canychar;           
 472 
 473   
 474   struct lexer_state lex;
 475 
 476   
 477   struct parser_state parse;
 478 
 479   
 480   token *tokens;                
 481   idx_t tindex;                 
 482   idx_t talloc;                 
 483   idx_t depth;                  
 484 
 485 
 486   idx_t nleaves;                
 487 
 488   idx_t nregexps;               
 489 
 490   bool fast;                    
 491   bool epsilon;                 
 492   token utf8_anychar_classes[9]; 
 493   mbstate_t mbs;                
 494 
 495   
 496 
 497   
 498 
 499 
 500 
 501 
 502 
 503 
 504 
 505 
 506 
 507 
 508 
 509 
 510 
 511   char *multibyte_prop;
 512 
 513   
 514   struct dfa *superset;             
 515 
 516   
 517   dfa_state *states;            
 518   state_num sindex;             
 519   idx_t salloc;                 
 520 
 521   
 522   position_set *follows;        
 523 
 524 
 525 
 526 
 527 
 528 
 529   bool searchflag;              
 530 
 531 
 532 
 533 
 534 
 535 
 536 
 537   
 538   int *constraints;             
 539 
 540   int *separates;               
 541 
 542 
 543   
 544   state_num tralloc;            
 545 
 546 
 547   int trcount;                  
 548 
 549 
 550   int min_trcount;              
 551 
 552 
 553   state_num **trans;            
 554 
 555 
 556 
 557 
 558 
 559 
 560 
 561   state_num **fails;            
 562 
 563 
 564 
 565   char *success;                
 566 
 567   state_num *newlines;          
 568 
 569 
 570 
 571 
 572 
 573 
 574   state_num initstate_notbol;   
 575 
 576 
 577 
 578   position_set mb_follows;      
 579   state_num **mb_trans;         
 580 
 581   state_num mb_trcount;         
 582 
 583 
 584   
 585 
 586   struct regex_syntax syntax;
 587 
 588   
 589 
 590 
 591   
 592   char *(*dfaexec) (struct dfa *, char const *, char *,
 593                     bool, idx_t *, bool *);
 594 
 595   
 596   struct localeinfo localeinfo;
 597 };
 598 
 599 
 600 
 601 
 602 static bool
 603 accepting (state_num s, struct dfa const *r)
     
 604 {
 605   return r->states[s].constraint != 0;
 606 }
 607 
 608 
 609 static bool
 610 accepts_in_context (int prev, int curr, state_num state, struct dfa const *dfa)
     
 611 {
 612   return succeeds_in_context (dfa->states[state].constraint, prev, curr);
 613 }
 614 
 615 static void regexp (struct dfa *dfa);
 616 
 617 
 618 
 619 
 620 
 621 
 622 
 623 
 624 
 625 
 626 
 627 
 628 
 629 
 630 
 631 
 632 
 633 
 634 static int
 635 mbs_to_wchar (wint_t *pwc, char const *s, idx_t n, struct dfa *d)
     
 636 {
 637   unsigned char uc = s[0];
 638   wint_t wc = d->localeinfo.sbctowc[uc];
 639 
 640   if (wc == WEOF)
 641     {
 642       wchar_t wch;
 643       size_t nbytes = mbrtowc (&wch, s, n, &d->mbs);
 644       if (0 < nbytes && nbytes < (size_t) -2)
 645         {
 646           *pwc = wch;
 647           return nbytes;
 648         }
 649       memset (&d->mbs, 0, sizeof d->mbs);
 650     }
 651 
 652   *pwc = wc;
 653   return 1;
 654 }
 655 
 656 #ifdef DEBUG
 657 
 658 static void
 659 prtok (token t)
     
 660 {
 661   if (t <= END)
 662     fprintf (stderr, "END");
 663   else if (0 <= t && t < NOTCHAR)
 664     {
 665       unsigned int ch = t;
 666       fprintf (stderr, "0x%02x", ch);
 667     }
 668   else
 669     {
 670       char const *s;
 671       switch (t)
 672         {
 673         case BEG:
 674           s = "BEG";
 675           break;
 676         case EMPTY:
 677           s = "EMPTY";
 678           break;
 679         case BACKREF:
 680           s = "BACKREF";
 681           break;
 682         case BEGLINE:
 683           s = "BEGLINE";
 684           break;
 685         case ENDLINE:
 686           s = "ENDLINE";
 687           break;
 688         case BEGWORD:
 689           s = "BEGWORD";
 690           break;
 691         case ENDWORD:
 692           s = "ENDWORD";
 693           break;
 694         case LIMWORD:
 695           s = "LIMWORD";
 696           break;
 697         case NOTLIMWORD:
 698           s = "NOTLIMWORD";
 699           break;
 700         case QMARK:
 701           s = "QMARK";
 702           break;
 703         case STAR:
 704           s = "STAR";
 705           break;
 706         case PLUS:
 707           s = "PLUS";
 708           break;
 709         case CAT:
 710           s = "CAT";
 711           break;
 712         case OR:
 713           s = "OR";
 714           break;
 715         case LPAREN:
 716           s = "LPAREN";
 717           break;
 718         case RPAREN:
 719           s = "RPAREN";
 720           break;
 721         case ANYCHAR:
 722           s = "ANYCHAR";
 723           break;
 724         case MBCSET:
 725           s = "MBCSET";
 726           break;
 727         default:
 728           s = "CSET";
 729           break;
 730         }
 731       fprintf (stderr, "%s", s);
 732     }
 733 }
 734 #endif 
 735 
 736 
 737 
 738 static bool
 739 tstbit (unsigned int b, charclass const *c)
     
 740 {
 741   return c->w[b / CHARCLASS_WORD_BITS] >> b % CHARCLASS_WORD_BITS & 1;
 742 }
 743 
 744 static void
 745 setbit (unsigned int b, charclass *c)
     
 746 {
 747   charclass_word one = 1;
 748   c->w[b / CHARCLASS_WORD_BITS] |= one << b % CHARCLASS_WORD_BITS;
 749 }
 750 
 751 static void
 752 clrbit (unsigned int b, charclass *c)
     
 753 {
 754   charclass_word one = 1;
 755   c->w[b / CHARCLASS_WORD_BITS] &= ~(one << b % CHARCLASS_WORD_BITS);
 756 }
 757 
 758 static void
 759 zeroset (charclass *s)
     
 760 {
 761   memset (s, 0, sizeof *s);
 762 }
 763 
 764 static void
 765 fillset (charclass *s)
     
 766 {
 767   for (int i = 0; i < CHARCLASS_WORDS; i++)
 768     s->w[i] = CHARCLASS_WORD_MASK;
 769 }
 770 
 771 static void
 772 notset (charclass *s)
     
 773 {
 774   for (int i = 0; i < CHARCLASS_WORDS; ++i)
 775     s->w[i] = CHARCLASS_WORD_MASK & ~s->w[i];
 776 }
 777 
 778 static bool
 779 equal (charclass const *s1, charclass const *s2)
     
 780 {
 781   charclass_word w = 0;
 782   for (int i = 0; i < CHARCLASS_WORDS; i++)
 783     w |= s1->w[i] ^ s2->w[i];
 784   return w == 0;
 785 }
 786 
 787 static bool
 788 emptyset (charclass const *s)
     
 789 {
 790   charclass_word w = 0;
 791   for (int i = 0; i < CHARCLASS_WORDS; i++)
 792     w |= s->w[i];
 793   return w == 0;
 794 }
 795 
 796 
 797 
 798 
 799 
 800 
 801 
 802 
 803 
 804 
 805 static void *
 806 maybe_realloc (void *pa, idx_t i, idx_t *nitems,
     
 807                ptrdiff_t nitems_max, idx_t item_size)
 808 {
 809   if (i < *nitems)
 810     return pa;
 811   return xpalloc (pa, nitems, 1, nitems_max, item_size);
 812 }
 813 
 814 
 815 static idx_t
 816 charclass_index (struct dfa *d, charclass const *s)
     
 817 {
 818   idx_t i;
 819 
 820   for (i = 0; i < d->cindex; ++i)
 821     if (equal (s, &d->charclasses[i]))
 822       return i;
 823   d->charclasses = maybe_realloc (d->charclasses, d->cindex, &d->calloc,
 824                                   TOKEN_MAX - CSET, sizeof *d->charclasses);
 825   ++d->cindex;
 826   d->charclasses[i] = *s;
 827   return i;
 828 }
 829 
 830 static bool
 831 unibyte_word_constituent (struct dfa const *dfa, unsigned char c)
     
 832 {
 833   return dfa->localeinfo.sbctowc[c] != WEOF && (isalnum (c) || (c) == '_');
 834 }
 835 
 836 static int
 837 char_context (struct dfa const *dfa, unsigned char c)
     
 838 {
 839   if (c == dfa->syntax.eolbyte && !dfa->syntax.anchor)
 840     return CTX_NEWLINE;
 841   if (unibyte_word_constituent (dfa, c))
 842     return CTX_LETTER;
 843   return CTX_NONE;
 844 }
 845 
 846 
 847 
 848 
 849 
 850 
 851 static bool
 852 setbit_wc (wint_t wc, charclass *c)
     
 853 {
 854   int b = wctob (wc);
 855   if (b < 0)
 856     return false;
 857 
 858   setbit (b, c);
 859   return true;
 860 }
 861 
 862 
 863 
 864 static void
 865 setbit_case_fold_c (int b, charclass *c)
     
 866 {
 867   int ub = toupper (b);
 868   for (int i = 0; i < NOTCHAR; i++)
 869     if (toupper (i) == ub)
 870       setbit (i, c);
 871 }
 872 
 873 
 874 
 875 
 876 
 877 
 878 
 879 static int
 880 fetch_wc (struct dfa *dfa)
     
 881 {
 882   int nbytes = mbs_to_wchar (&dfa->lex.wctok, dfa->lex.ptr, dfa->lex.left,
 883                              dfa);
 884   int c = nbytes == 1 ? to_uchar (dfa->lex.ptr[0]) : EOF;
 885   dfa->lex.ptr += nbytes;
 886   dfa->lex.left -= nbytes;
 887   return c;
 888 }
 889 
 890 
 891 
 892 static int
 893 bracket_fetch_wc (struct dfa *dfa)
     
 894 {
 895   if (! dfa->lex.left)
 896     dfaerror (_("unbalanced ["));
 897   return fetch_wc (dfa);
 898 }
 899 
 900 typedef int predicate (int);
 901 
 902 
 903 
 904 
 905 
 906 struct dfa_ctype
 907 {
 908   const char *name;
 909   predicate *func;
 910   bool single_byte_only;
 911 };
 912 
 913 static const struct dfa_ctype prednames[] = {
 914   {"alpha", isalpha, false},
 915   {"upper", isupper, false},
 916   {"lower", islower, false},
 917   {"digit", isdigit, true},
 918   {"xdigit", isxdigit, false},
 919   {"space", isspace, false},
 920   {"punct", ispunct, false},
 921   {"alnum", isalnum, false},
 922   {"print", isprint, false},
 923   {"graph", isgraph, false},
 924   {"cntrl", iscntrl, false},
 925   {"blank", isblank, false},
 926   {NULL, NULL, false}
 927 };
 928 
 929 static const struct dfa_ctype *_GL_ATTRIBUTE_PURE
 930 find_pred (const char *str)
     
 931 {
 932   for (int i = 0; prednames[i].name; i++)
 933     if (streq (str, prednames[i].name))
 934       return &prednames[i];
 935   return NULL;
 936 }
 937 
 938 
 939 
 940 static token
 941 parse_bracket_exp (struct dfa *dfa)
     
 942 {
 943   
 944 
 945   bool known_bracket_exp = true;
 946 
 947   
 948 
 949 
 950 
 951 
 952   int colon_warning_state;
 953 
 954   dfa->lex.brack.nchars = 0;
 955   charclass ccl;
 956   zeroset (&ccl);
 957   int c = bracket_fetch_wc (dfa);
 958   bool invert = c == '^';
 959   if (invert)
 960     {
 961       c = bracket_fetch_wc (dfa);
 962       known_bracket_exp = dfa->localeinfo.simple;
 963     }
 964   wint_t wc = dfa->lex.wctok;
 965   int c1;
 966   wint_t wc1;
 967   colon_warning_state = (c == ':');
 968   do
 969     {
 970       c1 = NOTCHAR;     
 971       colon_warning_state &= ~2;
 972 
 973       
 974 
 975 
 976 
 977       if (c == '[')
 978         {
 979           c1 = bracket_fetch_wc (dfa);
 980           wc1 = dfa->lex.wctok;
 981 
 982           if ((c1 == ':' && (dfa->syntax.syntax_bits & RE_CHAR_CLASSES))
 983               || c1 == '.' || c1 == '=')
 984             {
 985               enum { MAX_BRACKET_STRING_LEN = 32 };
 986               char str[MAX_BRACKET_STRING_LEN + 1];
 987               int len = 0;
 988               for (;;)
 989                 {
 990                   c = bracket_fetch_wc (dfa);
 991                   if (dfa->lex.left == 0
 992                       || (c == c1 && dfa->lex.ptr[0] == ']'))
 993                     break;
 994                   if (len < MAX_BRACKET_STRING_LEN)
 995                     str[len++] = c;
 996                   else
 997                     
 998                     str[0] = '\0';
 999                 }
1000               str[len] = '\0';
1001 
1002               
1003               c = bracket_fetch_wc (dfa);
1004               wc = dfa->lex.wctok;
1005               if (c1 == ':')
1006                 
1007 
1008 
1009 
1010                 {
1011                   char const *class
1012                     = (dfa->syntax.case_fold && (streq (str, "upper")
1013                                                  || streq (str, "lower"))
1014                        ? "alpha" : str);
1015                   const struct dfa_ctype *pred = find_pred (class);
1016                   if (!pred)
1017                     dfaerror (_("invalid character class"));
1018 
1019                   if (dfa->localeinfo.multibyte && !pred->single_byte_only)
1020                     known_bracket_exp = false;
1021                   else
1022                     for (int c2 = 0; c2 < NOTCHAR; ++c2)
1023                       if (pred->func (c2))
1024                         setbit (c2, &ccl);
1025                 }
1026               else
1027                 known_bracket_exp = false;
1028 
1029               colon_warning_state |= 8;
1030 
1031               
1032               c1 = bracket_fetch_wc (dfa);
1033               wc1 = dfa->lex.wctok;
1034               continue;
1035             }
1036 
1037           
1038 
1039         }
1040 
1041       if (c == '\\'
1042           && (dfa->syntax.syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS))
1043         {
1044           c = bracket_fetch_wc (dfa);
1045           wc = dfa->lex.wctok;
1046         }
1047 
1048       if (c1 == NOTCHAR)
1049         {
1050           c1 = bracket_fetch_wc (dfa);
1051           wc1 = dfa->lex.wctok;
1052         }
1053 
1054       if (c1 == '-')
1055         
1056         {
1057           int c2 = bracket_fetch_wc (dfa);
1058           wint_t wc2 = dfa->lex.wctok;
1059 
1060           
1061 
1062 
1063           if (c2 == '[' && dfa->lex.ptr[0] == '.')
1064             {
1065               known_bracket_exp = false;
1066               c2 = ']';
1067             }
1068 
1069           if (c2 == ']')
1070             {
1071               
1072 
1073               dfa->lex.ptr--;
1074               dfa->lex.left++;
1075             }
1076           else
1077             {
1078               if (c2 == '\\' && (dfa->syntax.syntax_bits
1079                                  & RE_BACKSLASH_ESCAPE_IN_LISTS))
1080                 {
1081                   c2 = bracket_fetch_wc (dfa);
1082                   wc2 = dfa->lex.wctok;
1083                 }
1084 
1085               colon_warning_state |= 8;
1086               c1 = bracket_fetch_wc (dfa);
1087               wc1 = dfa->lex.wctok;
1088 
1089               
1090               if (wc != wc2 || wc == WEOF)
1091                 {
1092                   if (dfa->localeinfo.simple
1093                       || (isasciidigit (c) & isasciidigit (c2)))
1094                     {
1095                       for (int ci = c; ci <= c2; ci++)
1096                         if (dfa->syntax.case_fold && isalpha (ci))
1097                           setbit_case_fold_c (ci, &ccl);
1098                         else
1099                           setbit (ci, &ccl);
1100                     }
1101                   else
1102                     known_bracket_exp = false;
1103 
1104                   continue;
1105                 }
1106             }
1107         }
1108 
1109       colon_warning_state |= (c == ':') ? 2 : 4;
1110 
1111       if (!dfa->localeinfo.multibyte)
1112         {
1113           if (dfa->syntax.case_fold && isalpha (c))
1114             setbit_case_fold_c (c, &ccl);
1115           else
1116             setbit (c, &ccl);
1117           continue;
1118         }
1119 
1120       if (wc == WEOF)
1121         known_bracket_exp = false;
1122       else
1123         {
1124           wchar_t folded[CASE_FOLDED_BUFSIZE + 1];
1125           int n = (dfa->syntax.case_fold
1126                    ? case_folded_counterparts (wc, folded + 1) + 1
1127                    : 1);
1128           folded[0] = wc;
1129           for (int i = 0; i < n; i++)
1130             if (!setbit_wc (folded[i], &ccl))
1131               {
1132                 dfa->lex.brack.chars
1133                   = maybe_realloc (dfa->lex.brack.chars, dfa->lex.brack.nchars,
1134                                    &dfa->lex.brack.nchars_alloc, -1,
1135                                    sizeof *dfa->lex.brack.chars);
1136                 dfa->lex.brack.chars[dfa->lex.brack.nchars++] = folded[i];
1137               }
1138         }
1139     }
1140   while ((wc = wc1, (c = c1) != ']'));
1141 
1142   if (colon_warning_state == 7)
1143     dfawarn (_("character class syntax is [[:space:]], not [:space:]"));
1144 
1145   if (! known_bracket_exp)
1146     return BACKREF;
1147 
1148   if (dfa->localeinfo.multibyte && (invert || dfa->lex.brack.nchars != 0))
1149     {
1150       dfa->lex.brack.invert = invert;
1151       dfa->lex.brack.cset = emptyset (&ccl) ? -1 : charclass_index (dfa, &ccl);
1152       return MBCSET;
1153     }
1154 
1155   if (invert)
1156     {
1157       notset (&ccl);
1158       if (dfa->syntax.syntax_bits & RE_HAT_LISTS_NOT_NEWLINE)
1159         clrbit ('\n', &ccl);
1160     }
1161 
1162   return CSET + charclass_index (dfa, &ccl);
1163 }
1164 
1165 struct lexptr
1166 {
1167   char const *ptr;
1168   idx_t left;
1169 };
1170 
1171 static void
1172 push_lex_state (struct dfa *dfa, struct lexptr *ls, char const *s)
     
1173 {
1174   ls->ptr = dfa->lex.ptr;
1175   ls->left = dfa->lex.left;
1176   dfa->lex.ptr = s;
1177   dfa->lex.left = strlen (s);
1178 }
1179 
1180 static void
1181 pop_lex_state (struct dfa *dfa, struct lexptr const *ls)
     
1182 {
1183   dfa->lex.ptr = ls->ptr;
1184   dfa->lex.left = ls->left;
1185 }
1186 
1187 static token
1188 lex (struct dfa *dfa)
     
1189 {
1190   bool backslash = false;
1191 
1192   
1193 
1194 
1195 
1196 
1197 
1198   for (int i = 0; i < 2; ++i)
1199     {
1200       if (! dfa->lex.left)
1201         return dfa->lex.lasttok = END;
1202       int c = fetch_wc (dfa);
1203 
1204       switch (c)
1205         {
1206         case '\\':
1207           if (backslash)
1208             goto normal_char;
1209           if (dfa->lex.left == 0)
1210             dfaerror (_("unfinished \\ escape"));
1211           backslash = true;
1212           break;
1213 
1214         case '^':
1215           if (backslash)
1216             goto normal_char;
1217           if (dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_ANCHORS
1218               || dfa->lex.lasttok == END || dfa->lex.lasttok == LPAREN
1219               || dfa->lex.lasttok == OR)
1220             return dfa->lex.lasttok = BEGLINE;
1221           goto normal_char;
1222 
1223         case '$':
1224           if (backslash)
1225             goto normal_char;
1226           if (dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_ANCHORS
1227               || dfa->lex.left == 0
1228               || ((dfa->lex.left
1229                    > !(dfa->syntax.syntax_bits & RE_NO_BK_PARENS))
1230                   && (dfa->lex.ptr[!(dfa->syntax.syntax_bits & RE_NO_BK_PARENS)
1231                                    & (dfa->lex.ptr[0] == '\\')]
1232                       == ')'))
1233               || ((dfa->lex.left
1234                    > !(dfa->syntax.syntax_bits & RE_NO_BK_VBAR))
1235                   && (dfa->lex.ptr[!(dfa->syntax.syntax_bits & RE_NO_BK_VBAR)
1236                                    & (dfa->lex.ptr[0] == '\\')]
1237                       == '|'))
1238               || ((dfa->syntax.syntax_bits & RE_NEWLINE_ALT)
1239                   && dfa->lex.left > 0 && dfa->lex.ptr[0] == '\n'))
1240             return dfa->lex.lasttok = ENDLINE;
1241           goto normal_char;
1242 
1243         case '1':
1244         case '2':
1245         case '3':
1246         case '4':
1247         case '5':
1248         case '6':
1249         case '7':
1250         case '8':
1251         case '9':
1252           if (backslash && !(dfa->syntax.syntax_bits & RE_NO_BK_REFS))
1253             {
1254               dfa->lex.laststart = false;
1255               return dfa->lex.lasttok = BACKREF;
1256             }
1257           goto normal_char;
1258 
1259         case '`':
1260           if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS))
1261             {
1262               
1263               return dfa->lex.lasttok = BEGLINE;
1264             }
1265           goto normal_char;
1266 
1267         case '\'':
1268           if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS))
1269             {
1270               
1271               return dfa->lex.lasttok = ENDLINE;
1272             }
1273           goto normal_char;
1274 
1275         case '<':
1276           if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS))
1277             return dfa->lex.lasttok = BEGWORD;
1278           goto normal_char;
1279 
1280         case '>':
1281           if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS))
1282             return dfa->lex.lasttok = ENDWORD;
1283           goto normal_char;
1284 
1285         case 'b':
1286           if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS))
1287             return dfa->lex.lasttok = LIMWORD;
1288           goto normal_char;
1289 
1290         case 'B':
1291           if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS))
1292             return dfa->lex.lasttok = NOTLIMWORD;
1293           goto normal_char;
1294 
1295         case '?':
1296           if (dfa->syntax.syntax_bits & RE_LIMITED_OPS)
1297             goto normal_char;
1298           if (backslash != ((dfa->syntax.syntax_bits & RE_BK_PLUS_QM) != 0))
1299             goto normal_char;
1300           if (!(dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_OPS)
1301               && dfa->lex.laststart)
1302             goto normal_char;
1303           return dfa->lex.lasttok = QMARK;
1304 
1305         case '*':
1306           if (backslash)
1307             goto normal_char;
1308           if (!(dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_OPS)
1309               && dfa->lex.laststart)
1310             goto normal_char;
1311           return dfa->lex.lasttok = STAR;
1312 
1313         case '+':
1314           if (dfa->syntax.syntax_bits & RE_LIMITED_OPS)
1315             goto normal_char;
1316           if (backslash != ((dfa->syntax.syntax_bits & RE_BK_PLUS_QM) != 0))
1317             goto normal_char;
1318           if (!(dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_OPS)
1319               && dfa->lex.laststart)
1320             goto normal_char;
1321           return dfa->lex.lasttok = PLUS;
1322 
1323         case '{':
1324           if (!(dfa->syntax.syntax_bits & RE_INTERVALS))
1325             goto normal_char;
1326           if (backslash != ((dfa->syntax.syntax_bits & RE_NO_BK_BRACES) == 0))
1327             goto normal_char;
1328           if (!(dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_OPS)
1329               && dfa->lex.laststart)
1330             goto normal_char;
1331 
1332           
1333 
1334 
1335 
1336 
1337 
1338           {
1339             char const *p = dfa->lex.ptr;
1340             char const *lim = p + dfa->lex.left;
1341             dfa->lex.minrep = dfa->lex.maxrep = -1;
1342             for (; p != lim && isasciidigit (*p); p++)
1343               dfa->lex.minrep = (dfa->lex.minrep < 0
1344                                  ? *p - '0'
1345                                  : MIN (RE_DUP_MAX + 1,
1346                                         dfa->lex.minrep * 10 + *p - '0'));
1347             if (p != lim)
1348               {
1349                 if (*p != ',')
1350                   dfa->lex.maxrep = dfa->lex.minrep;
1351                 else
1352                   {
1353                     if (dfa->lex.minrep < 0)
1354                       dfa->lex.minrep = 0;
1355                     while (++p != lim && isasciidigit (*p))
1356                       dfa->lex.maxrep
1357                         = (dfa->lex.maxrep < 0
1358                            ? *p - '0'
1359                            : MIN (RE_DUP_MAX + 1,
1360                                   dfa->lex.maxrep * 10 + *p - '0'));
1361                   }
1362               }
1363             if (! ((! backslash || (p != lim && *p++ == '\\'))
1364                    && p != lim && *p++ == '}'
1365                    && 0 <= dfa->lex.minrep
1366                    && (dfa->lex.maxrep < 0
1367                        || dfa->lex.minrep <= dfa->lex.maxrep)))
1368               {
1369                 if (dfa->syntax.syntax_bits & RE_INVALID_INTERVAL_ORD)
1370                   goto normal_char;
1371                 dfaerror (_("invalid content of \\{\\}"));
1372               }
1373             if (RE_DUP_MAX < dfa->lex.maxrep)
1374               dfaerror (_("regular expression too big"));
1375             dfa->lex.ptr = p;
1376             dfa->lex.left = lim - p;
1377           }
1378           dfa->lex.laststart = false;
1379           return dfa->lex.lasttok = REPMN;
1380 
1381         case '|':
1382           if (dfa->syntax.syntax_bits & RE_LIMITED_OPS)
1383             goto normal_char;
1384           if (backslash != ((dfa->syntax.syntax_bits & RE_NO_BK_VBAR) == 0))
1385             goto normal_char;
1386           dfa->lex.laststart = true;
1387           return dfa->lex.lasttok = OR;
1388 
1389         case '\n':
1390           if (dfa->syntax.syntax_bits & RE_LIMITED_OPS
1391               || backslash || !(dfa->syntax.syntax_bits & RE_NEWLINE_ALT))
1392             goto normal_char;
1393           dfa->lex.laststart = true;
1394           return dfa->lex.lasttok = OR;
1395 
1396         case '(':
1397           if (backslash != ((dfa->syntax.syntax_bits & RE_NO_BK_PARENS) == 0))
1398             goto normal_char;
1399           dfa->lex.parens++;
1400           dfa->lex.laststart = true;
1401           return dfa->lex.lasttok = LPAREN;
1402 
1403         case ')':
1404           if (backslash != ((dfa->syntax.syntax_bits & RE_NO_BK_PARENS) == 0))
1405             goto normal_char;
1406           if (dfa->lex.parens == 0
1407               && dfa->syntax.syntax_bits & RE_UNMATCHED_RIGHT_PAREN_ORD)
1408             goto normal_char;
1409           dfa->lex.parens--;
1410           dfa->lex.laststart = false;
1411           return dfa->lex.lasttok = RPAREN;
1412 
1413         case '.':
1414           if (backslash)
1415             goto normal_char;
1416           if (dfa->canychar < 0)
1417             {
1418               charclass ccl;
1419               fillset (&ccl);
1420               if (!(dfa->syntax.syntax_bits & RE_DOT_NEWLINE))
1421                 clrbit ('\n', &ccl);
1422               if (dfa->syntax.syntax_bits & RE_DOT_NOT_NULL)
1423                 clrbit ('\0', &ccl);
1424               if (dfa->localeinfo.multibyte)
1425                 for (int c2 = 0; c2 < NOTCHAR; c2++)
1426                   if (dfa->localeinfo.sbctowc[c2] == WEOF)
1427                     clrbit (c2, &ccl);
1428               dfa->canychar = charclass_index (dfa, &ccl);
1429             }
1430           dfa->lex.laststart = false;
1431           return dfa->lex.lasttok = (dfa->localeinfo.multibyte
1432                                      ? ANYCHAR
1433                                      : CSET + dfa->canychar);
1434 
1435         case 's':
1436         case 'S':
1437           if (!backslash || (dfa->syntax.syntax_bits & RE_NO_GNU_OPS))
1438             goto normal_char;
1439           if (!dfa->localeinfo.multibyte)
1440             {
1441               charclass ccl;
1442               zeroset (&ccl);
1443               for (int c2 = 0; c2 < NOTCHAR; ++c2)
1444                 if (isspace (c2))
1445                   setbit (c2, &ccl);
1446               if (c == 'S')
1447                 notset (&ccl);
1448               dfa->lex.laststart = false;
1449               return dfa->lex.lasttok = CSET + charclass_index (dfa, &ccl);
1450             }
1451 
1452           
1453 
1454 
1455           
1456 
1457 
1458           {
1459             struct lexptr ls;
1460             push_lex_state (dfa, &ls, &"^[:space:]]"[c == 's']);
1461             dfa->lex.lasttok = parse_bracket_exp (dfa);
1462             pop_lex_state (dfa, &ls);
1463           }
1464 
1465           dfa->lex.laststart = false;
1466           return dfa->lex.lasttok;
1467 
1468         case 'w':
1469         case 'W':
1470           if (!backslash || (dfa->syntax.syntax_bits & RE_NO_GNU_OPS))
1471             goto normal_char;
1472 
1473           if (!dfa->localeinfo.multibyte)
1474             {
1475               charclass ccl;
1476               zeroset (&ccl);
1477               for (int c2 = 0; c2 < NOTCHAR; ++c2)
1478                 if (dfa->syntax.sbit[c2] == CTX_LETTER)
1479                   setbit (c2, &ccl);
1480               if (c == 'W')
1481                 notset (&ccl);
1482               dfa->lex.laststart = false;
1483               return dfa->lex.lasttok = CSET + charclass_index (dfa, &ccl);
1484             }
1485 
1486           
1487 
1488 
1489           
1490 
1491 
1492           {
1493             struct lexptr ls;
1494             push_lex_state (dfa, &ls, &"^_[:alnum:]]"[c == 'w']);
1495             dfa->lex.lasttok = parse_bracket_exp (dfa);
1496             pop_lex_state (dfa, &ls);
1497           }
1498 
1499           dfa->lex.laststart = false;
1500           return dfa->lex.lasttok;
1501 
1502         case '[':
1503           if (backslash)
1504             goto normal_char;
1505           dfa->lex.laststart = false;
1506           return dfa->lex.lasttok = parse_bracket_exp (dfa);
1507 
1508         default:
1509         normal_char:
1510           dfa->lex.laststart = false;
1511           
1512 
1513           if (dfa->localeinfo.multibyte)
1514             return dfa->lex.lasttok = WCHAR;
1515 
1516           if (dfa->syntax.case_fold && isalpha (c))
1517             {
1518               charclass ccl;
1519               zeroset (&ccl);
1520               setbit_case_fold_c (c, &ccl);
1521               return dfa->lex.lasttok = CSET + charclass_index (dfa, &ccl);
1522             }
1523 
1524           return dfa->lex.lasttok = c;
1525         }
1526     }
1527 
1528   
1529 
1530   abort ();
1531   return END;                   
1532 }
1533 
1534 static void
1535 addtok_mb (struct dfa *dfa, token t, char mbprop)
     
1536 {
1537   if (dfa->talloc == dfa->tindex)
1538     {
1539       dfa->tokens = xpalloc (dfa->tokens, &dfa->talloc, 1, -1,
1540                              sizeof *dfa->tokens);
1541       if (dfa->localeinfo.multibyte)
1542         dfa->multibyte_prop = xreallocarray (dfa->multibyte_prop, dfa->talloc,
1543                                              sizeof *dfa->multibyte_prop);
1544     }
1545   if (dfa->localeinfo.multibyte)
1546     dfa->multibyte_prop[dfa->tindex] = mbprop;
1547   dfa->tokens[dfa->tindex++] = t;
1548 
1549   switch (t)
1550     {
1551     case QMARK:
1552     case STAR:
1553     case PLUS:
1554       break;
1555 
1556     case CAT:
1557     case OR:
1558       dfa->parse.depth--;
1559       break;
1560 
1561     case EMPTY:
1562       dfa->epsilon = true;
1563       goto increment_depth;
1564 
1565     case BACKREF:
1566       dfa->fast = false;
1567       goto increment_nleaves;
1568 
1569     case BEGLINE:
1570     case ENDLINE:
1571     case BEGWORD:
1572     case ENDWORD:
1573     case LIMWORD:
1574     case NOTLIMWORD:
1575       dfa->epsilon = true;
1576       FALLTHROUGH;
1577     default:
1578     increment_nleaves:
1579       dfa->nleaves++;
1580     increment_depth:
1581       dfa->parse.depth++;
1582       if (dfa->depth < dfa->parse.depth)
1583         dfa->depth = dfa->parse.depth;
1584       break;
1585     }
1586 }
1587 
1588 static void addtok_wc (struct dfa *dfa, wint_t wc);
1589 
1590 
1591 
1592 static void
1593 addtok (struct dfa *dfa, token t)
     
1594 {
1595   if (dfa->localeinfo.multibyte && t == MBCSET)
1596     {
1597       bool need_or = false;
1598 
1599       
1600 
1601       for (idx_t i = 0; i < dfa->lex.brack.nchars; i++)
1602         {
1603           addtok_wc (dfa, dfa->lex.brack.chars[i]);
1604           if (need_or)
1605             addtok (dfa, OR);
1606           need_or = true;
1607         }
1608       dfa->lex.brack.nchars = 0;
1609 
1610       
1611 
1612       if (dfa->lex.brack.cset != -1)
1613         {
1614           addtok (dfa, CSET + dfa->lex.brack.cset);
1615           if (need_or)
1616             addtok (dfa, OR);
1617         }
1618     }
1619   else
1620     {
1621       addtok_mb (dfa, t, 3);
1622     }
1623 }
1624 
1625 
1626 
1627 
1628 
1629 
1630 
1631 static void
1632 addtok_wc (struct dfa *dfa, wint_t wc)
     
1633 {
1634   unsigned char buf[MB_LEN_MAX];
1635   mbstate_t s = { 0 };
1636   size_t stored_bytes = wcrtomb ((char *) buf, wc, &s);
1637   int buflen;
1638 
1639   if (stored_bytes != (size_t) -1)
1640     buflen = stored_bytes;
1641   else
1642     {
1643       
1644 
1645       buflen = 1;
1646       buf[0] = 0;
1647     }
1648 
1649   addtok_mb (dfa, buf[0], buflen == 1 ? 3 : 1);
1650   for (int i = 1; i < buflen; i++)
1651     {
1652       addtok_mb (dfa, buf[i], i == buflen - 1 ? 2 : 0);
1653       addtok (dfa, CAT);
1654     }
1655 }
1656 
1657 static void
1658 add_utf8_anychar (struct dfa *dfa)
     
1659 {
1660   
1661 
1662 
1663 
1664 
1665 
1666 
1667 
1668 
1669 
1670 
1671 
1672 
1673 
1674 
1675 
1676 
1677 
1678 
1679 
1680   
1681   enum { A, B, C, E, F, H, J, K, M };
1682 
1683   
1684   enum { D_token = 0xe0, G_token = 0xed, I_token = 0xf0, L_token = 0xf4 };
1685 
1686   static charclass const utf8_classes[] = {
1687     
1688     CHARCLASS_INIT (0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0, 0, 0, 0),
1689 
1690     
1691     CHARCLASS_INIT (0, 0, 0, 0, 0, 0, 0xfffffffc, 0),
1692 
1693     
1694     CHARCLASS_INIT (0, 0, 0, 0, 0xffffffff, 0xffffffff, 0, 0),
1695 
1696     
1697 
1698     
1699     CHARCLASS_INIT (0, 0, 0, 0, 0, 0xffffffff, 0, 0),
1700 
1701     
1702     CHARCLASS_INIT (0, 0, 0, 0, 0, 0, 0, 0xdffe),
1703 
1704     
1705 
1706     
1707     CHARCLASS_INIT (0, 0, 0, 0, 0xffff, 0, 0, 0),
1708 
1709     
1710 
1711     
1712     CHARCLASS_INIT (0, 0, 0, 0, 0xffff0000, 0xffffffff, 0, 0),
1713 
1714     
1715     CHARCLASS_INIT (0, 0, 0, 0, 0, 0, 0, 0xe0000),
1716 
1717     
1718 
1719     
1720     CHARCLASS_INIT (0, 0, 0, 0, 0xff, 0, 0, 0),
1721   };
1722 
1723   
1724   if (dfa->utf8_anychar_classes[0] == 0)
1725     {
1726       charclass c = utf8_classes[0];
1727       if (! (dfa->syntax.syntax_bits & RE_DOT_NEWLINE))
1728         clrbit ('\n', &c);
1729       if (dfa->syntax.syntax_bits & RE_DOT_NOT_NULL)
1730         clrbit ('\0', &c);
1731       dfa->utf8_anychar_classes[0] = CSET + charclass_index (dfa, &c);
1732 
1733       for (int i = 1; i < sizeof utf8_classes / sizeof *utf8_classes; i++)
1734         dfa->utf8_anychar_classes[i]
1735           = CSET + charclass_index (dfa, &utf8_classes[i]);
1736     }
1737 
1738   
1739 
1740 
1741 
1742   addtok (dfa, dfa->utf8_anychar_classes[A]);
1743   addtok (dfa, dfa->utf8_anychar_classes[B]);
1744   addtok (dfa, D_token);
1745   addtok (dfa, dfa->utf8_anychar_classes[E]);
1746   addtok (dfa, CAT);
1747   addtok (dfa, OR);
1748   addtok (dfa, G_token);
1749   addtok (dfa, dfa->utf8_anychar_classes[H]);
1750   addtok (dfa, CAT);
1751   addtok (dfa, OR);
1752   addtok (dfa, dfa->utf8_anychar_classes[F]);
1753   addtok (dfa, I_token);
1754   addtok (dfa, dfa->utf8_anychar_classes[J]);
1755   addtok (dfa, CAT);
1756   addtok (dfa, OR);
1757   addtok (dfa, L_token);
1758   addtok (dfa, dfa->utf8_anychar_classes[M]);
1759   addtok (dfa, CAT);
1760   addtok (dfa, OR);
1761   addtok (dfa, dfa->utf8_anychar_classes[K]);
1762   for (int i = 0; i < 3; i++)
1763     {
1764       addtok (dfa, dfa->utf8_anychar_classes[C]);
1765       addtok (dfa, CAT);
1766       addtok (dfa, OR);
1767     }
1768 }
1769 
1770 
1771 
1772 
1773 
1774 
1775 
1776 
1777 
1778 
1779 
1780 
1781 
1782 
1783 
1784 
1785 
1786 
1787 
1788 
1789 
1790 
1791 
1792 
1793 
1794 
1795 
1796 
1797 
1798 
1799 
1800 
1801 
1802 
1803 
1804 
1805 static void
1806 atom (struct dfa *dfa)
     
1807 {
1808   if ((0 <= dfa->parse.tok && dfa->parse.tok < NOTCHAR)
1809       || dfa->parse.tok >= CSET
1810       || dfa->parse.tok == BEG || dfa->parse.tok == BACKREF
1811       || dfa->parse.tok == BEGLINE || dfa->parse.tok == ENDLINE
1812       || dfa->parse.tok == BEGWORD || dfa->parse.tok == ENDWORD
1813       || dfa->parse.tok == LIMWORD || dfa->parse.tok == NOTLIMWORD
1814       || dfa->parse.tok == ANYCHAR || dfa->parse.tok == MBCSET)
1815     {
1816       if (dfa->parse.tok == ANYCHAR && dfa->localeinfo.using_utf8)
1817         {
1818           
1819 
1820 
1821 
1822 
1823 
1824 
1825           add_utf8_anychar (dfa);
1826         }
1827       else
1828         addtok (dfa, dfa->parse.tok);
1829       dfa->parse.tok = lex (dfa);
1830     }
1831   else if (dfa->parse.tok == WCHAR)
1832     {
1833       if (dfa->lex.wctok == WEOF)
1834         addtok (dfa, BACKREF);
1835       else
1836         {
1837           addtok_wc (dfa, dfa->lex.wctok);
1838 
1839           if (dfa->syntax.case_fold)
1840             {
1841               wchar_t folded[CASE_FOLDED_BUFSIZE];
1842               int n = case_folded_counterparts (dfa->lex.wctok, folded);
1843               for (int i = 0; i < n; i++)
1844                 {
1845                   addtok_wc (dfa, folded[i]);
1846                   addtok (dfa, OR);
1847                 }
1848             }
1849         }
1850 
1851       dfa->parse.tok = lex (dfa);
1852     }
1853   else if (dfa->parse.tok == LPAREN)
1854     {
1855       dfa->parse.tok = lex (dfa);
1856       regexp (dfa);
1857       if (dfa->parse.tok != RPAREN)
1858         dfaerror (_("unbalanced ("));
1859       dfa->parse.tok = lex (dfa);
1860     }
1861   else
1862     addtok (dfa, EMPTY);
1863 }
1864 
1865 
1866 static idx_t _GL_ATTRIBUTE_PURE
1867 nsubtoks (struct dfa const *dfa, idx_t tindex)
     
1868 {
1869   switch (dfa->tokens[tindex - 1])
1870     {
1871     default:
1872       return 1;
1873     case QMARK:
1874     case STAR:
1875     case PLUS:
1876       return 1 + nsubtoks (dfa, tindex - 1);
1877     case CAT:
1878     case OR:
1879       {
1880         idx_t ntoks1 = nsubtoks (dfa, tindex - 1);
1881         return 1 + ntoks1 + nsubtoks (dfa, tindex - 1 - ntoks1);
1882       }
1883     }
1884 }
1885 
1886 
1887 static void
1888 copytoks (struct dfa *dfa, idx_t tindex, idx_t ntokens)
     
1889 {
1890   if (dfa->localeinfo.multibyte)
1891     for (idx_t i = 0; i < ntokens; i++)
1892       addtok_mb (dfa, dfa->tokens[tindex + i],
1893                  dfa->multibyte_prop[tindex + i]);
1894   else
1895     for (idx_t i = 0; i < ntokens; i++)
1896       addtok_mb (dfa, dfa->tokens[tindex + i], 3);
1897 }
1898 
1899 static void
1900 closure (struct dfa *dfa)
     
1901 {
1902   atom (dfa);
1903   while (dfa->parse.tok == QMARK || dfa->parse.tok == STAR
1904          || dfa->parse.tok == PLUS || dfa->parse.tok == REPMN)
1905     if (dfa->parse.tok == REPMN && (dfa->lex.minrep || dfa->lex.maxrep))
1906       {
1907         idx_t ntokens = nsubtoks (dfa, dfa->tindex);
1908         idx_t tindex = dfa->tindex - ntokens;
1909         if (dfa->lex.maxrep < 0)
1910           addtok (dfa, PLUS);
1911         if (dfa->lex.minrep == 0)
1912           addtok (dfa, QMARK);
1913         int i;
1914         for (i = 1; i < dfa->lex.minrep; i++)
1915           {
1916             copytoks (dfa, tindex, ntokens);
1917             addtok (dfa, CAT);
1918           }
1919         for (; i < dfa->lex.maxrep; i++)
1920           {
1921             copytoks (dfa, tindex, ntokens);
1922             addtok (dfa, QMARK);
1923             addtok (dfa, CAT);
1924           }
1925         dfa->parse.tok = lex (dfa);
1926       }
1927     else if (dfa->parse.tok == REPMN)
1928       {
1929         dfa->tindex -= nsubtoks (dfa, dfa->tindex);
1930         dfa->parse.tok = lex (dfa);
1931         closure (dfa);
1932       }
1933     else
1934       {
1935         addtok (dfa, dfa->parse.tok);
1936         dfa->parse.tok = lex (dfa);
1937       }
1938 }
1939 
1940 static void
1941 branch (struct dfa* dfa)
     
1942 {
1943   closure (dfa);
1944   while (dfa->parse.tok != RPAREN && dfa->parse.tok != OR
1945          && dfa->parse.tok >= 0)
1946     {
1947       closure (dfa);
1948       addtok (dfa, CAT);
1949     }
1950 }
1951 
1952 static void
1953 regexp (struct dfa *dfa)
     
1954 {
1955   branch (dfa);
1956   while (dfa->parse.tok == OR)
1957     {
1958       dfa->parse.tok = lex (dfa);
1959       branch (dfa);
1960       addtok (dfa, OR);
1961     }
1962 }
1963 
1964 
1965 
1966 void
1967 dfaparse (char const *s, idx_t len, struct dfa *d)
     
1968 {
1969   d->lex.ptr = s;
1970   d->lex.left = len;
1971   d->lex.lasttok = END;
1972   d->lex.laststart = true;
1973 
1974   if (!d->syntax.syntax_bits_set)
1975     dfaerror (_("no syntax specified"));
1976 
1977   if (!d->nregexps)
1978     addtok (d, BEG);
1979 
1980   d->parse.tok = lex (d);
1981   d->parse.depth = d->depth;
1982 
1983   regexp (d);
1984 
1985   if (d->parse.tok != END)
1986     dfaerror (_("unbalanced )"));
1987 
1988   addtok (d, END - d->nregexps);
1989   addtok (d, CAT);
1990 
1991   if (d->nregexps)
1992     addtok (d, OR);
1993 
1994   ++d->nregexps;
1995 }
1996 
1997 
1998 
1999 
2000 static void
2001 copy (position_set const *src, position_set *dst)
     
2002 {
2003   if (dst->alloc < src->nelem)
2004     {
2005       free (dst->elems);
2006       dst->elems = xpalloc (NULL, &dst->alloc, src->nelem - dst->alloc, -1,
2007                             sizeof *dst->elems);
2008     }
2009   dst->nelem = src->nelem;
2010   if (src->nelem != 0)
2011     memcpy (dst->elems, src->elems, src->nelem * sizeof *dst->elems);
2012 }
2013 
2014 static void
2015 alloc_position_set (position_set *s, idx_t size)
     
2016 {
2017   s->elems = xnmalloc (size, sizeof *s->elems);
2018   s->alloc = size;
2019   s->nelem = 0;
2020 }
2021 
2022 
2023 
2024 
2025 
2026 static void
2027 insert (position p, position_set *s)
     
2028 {
2029   idx_t count = s->nelem;
2030   idx_t lo = 0, hi = count;
2031   while (lo < hi)
2032     {
2033       idx_t mid = (lo + hi) >> 1;
2034       if (s->elems[mid].index < p.index)
2035         lo = mid + 1;
2036       else if (s->elems[mid].index == p.index)
2037         {
2038           s->elems[mid].constraint |= p.constraint;
2039           return;
2040         }
2041       else
2042         hi = mid;
2043     }
2044 
2045   s->elems = maybe_realloc (s->elems, count, &s->alloc, -1, sizeof *s->elems);
2046   for (idx_t i = count; i > lo; i--)
2047     s->elems[i] = s->elems[i - 1];
2048   s->elems[lo] = p;
2049   ++s->nelem;
2050 }
2051 
2052 static void
2053 append (position p, position_set *s)
     
2054 {
2055   idx_t count = s->nelem;
2056   s->elems = maybe_realloc (s->elems, count, &s->alloc, -1, sizeof *s->elems);
2057   s->elems[s->nelem++] = p;
2058 }
2059 
2060 
2061 
2062 
2063 static void
2064 merge_constrained (position_set const *s1, position_set const *s2,
     
2065                    unsigned int c2, position_set *m)
2066 {
2067   idx_t i = 0, j = 0;
2068 
2069   if (m->alloc - s1->nelem < s2->nelem)
2070     {
2071       free (m->elems);
2072       m->alloc = s1->nelem;
2073       m->elems = xpalloc (NULL, &m->alloc, s2->nelem, -1, sizeof *m->elems);
2074     }
2075   m->nelem = 0;
2076   while (i < s1->nelem || j < s2->nelem)
2077     if (! (j < s2->nelem)
2078         || (i < s1->nelem && s1->elems[i].index <= s2->elems[j].index))
2079       {
2080         unsigned int c = ((i < s1->nelem && j < s2->nelem
2081                            && s1->elems[i].index == s2->elems[j].index)
2082                           ? s2->elems[j++].constraint & c2
2083                           : 0);
2084         m->elems[m->nelem].index = s1->elems[i].index;
2085         m->elems[m->nelem++].constraint = s1->elems[i++].constraint | c;
2086       }
2087     else
2088       {
2089         if (s2->elems[j].constraint & c2)
2090           {
2091             m->elems[m->nelem].index = s2->elems[j].index;
2092             m->elems[m->nelem++].constraint = s2->elems[j].constraint & c2;
2093           }
2094         j++;
2095       }
2096 }
2097 
2098 
2099 
2100 static void
2101 merge (position_set const *s1, position_set const *s2, position_set *m)
     
2102 {
2103   merge_constrained (s1, s2, -1, m);
2104 }
2105 
2106 
2107 
2108 static void
2109 merge2 (position_set *dst, position_set const *src, position_set *m)
     
2110 {
2111   if (src->nelem < 4)
2112     {
2113       for (idx_t i = 0; i < src->nelem; i++)
2114         insert (src->elems[i], dst);
2115     }
2116    else
2117     {
2118       merge (src, dst, m);
2119       copy (m, dst);
2120     }
2121 }
2122 
2123 
2124 
2125 static unsigned int
2126 delete (idx_t del, position_set *s)
     
2127 {
2128   idx_t count = s->nelem;
2129   idx_t lo = 0, hi = count;
2130   while (lo < hi)
2131     {
2132       idx_t mid = (lo + hi) >> 1;
2133       if (s->elems[mid].index < del)
2134         lo = mid + 1;
2135       else if (s->elems[mid].index == del)
2136         {
2137           unsigned int c = s->elems[mid].constraint;
2138           idx_t i;
2139           for (i = mid; i + 1 < count; i++)
2140             s->elems[i] = s->elems[i + 1];
2141           s->nelem = i;
2142           return c;
2143         }
2144       else
2145         hi = mid;
2146     }
2147   return 0;
2148 }
2149 
2150 
2151 static void
2152 replace (position_set *dst, idx_t del, position_set *add,
     
2153          unsigned int constraint, position_set *tmp)
2154 {
2155   unsigned int c = delete (del, dst) & constraint;
2156 
2157   if (c)
2158     {
2159       copy (dst, tmp);
2160       merge_constrained (tmp, add, c, dst);
2161     }
2162 }
2163 
2164 
2165 
2166 
2167 static state_num
2168 state_index (struct dfa *d, position_set const *s, int context)
     
2169 {
2170   size_t hash = 0;
2171   int constraint = 0;
2172   state_num i;
2173 
2174   for (i = 0; i < s->nelem; ++i)
2175     {
2176       idx_t ind = s->elems[i].index;
2177       hash ^= ind + s->elems[i].constraint;
2178     }
2179 
2180   
2181   for (i = 0; i < d->sindex; ++i)
2182     {
2183       if (hash != d->states[i].hash || s->nelem != d->states[i].elems.nelem
2184           || context != d->states[i].context)
2185         continue;
2186       state_num j;
2187       for (j = 0; j < s->nelem; ++j)
2188         if (s->elems[j].constraint != d->states[i].elems.elems[j].constraint
2189             || s->elems[j].index != d->states[i].elems.elems[j].index)
2190           break;
2191       if (j == s->nelem)
2192         return i;
2193     }
2194 
2195 #ifdef DEBUG
2196   fprintf (stderr, "new state %td\n nextpos:", i);
2197   for (state_num j = 0; j < s->nelem; j++)
2198     {
2199       fprintf (stderr, " %td:", s->elems[j].index);
2200       prtok (d->tokens[s->elems[j].index]);
2201     }
2202   fprintf (stderr, "\n context:");
2203   if (context ^ CTX_ANY)
2204     {
2205       if (context & CTX_NONE)
2206         fprintf (stderr, " CTX_NONE");
2207       if (context & CTX_LETTER)
2208         fprintf (stderr, " CTX_LETTER");
2209       if (context & CTX_NEWLINE)
2210         fprintf (stderr, " CTX_NEWLINE");
2211     }
2212   else
2213     fprintf (stderr, " CTX_ANY");
2214   fprintf (stderr, "\n");
2215 #endif
2216 
2217   for (state_num j = 0; j < s->nelem; j++)
2218     {
2219       int c = d->constraints[s->elems[j].index];
2220 
2221       if (c != 0)
2222         {
2223           if (succeeds_in_context (c, context, CTX_ANY))
2224             constraint |= c;
2225         }
2226       else if (d->tokens[s->elems[j].index] == BACKREF)
2227         constraint = NO_CONSTRAINT;
2228     }
2229 
2230 
2231   
2232   d->states = maybe_realloc (d->states, d->sindex, &d->salloc, -1,
2233                              sizeof *d->states);
2234   d->states[i].hash = hash;
2235   alloc_position_set (&d->states[i].elems, s->nelem);
2236   copy (s, &d->states[i].elems);
2237   d->states[i].context = context;
2238   d->states[i].constraint = constraint;
2239   d->states[i].mbps.nelem = 0;
2240   d->states[i].mbps.elems = NULL;
2241   d->states[i].mb_trindex = -1;
2242 
2243   ++d->sindex;
2244 
2245   return i;
2246 }
2247 
2248 
2249 
2250 
2251 
2252 
2253 
2254 static void
2255 epsclosure (struct dfa const *d, position_set *backward)
     
2256 {
2257   position_set tmp;
2258   alloc_position_set (&tmp, d->nleaves);
2259   for (idx_t i = 0; i < d->tindex; i++)
2260     if (0 < d->follows[i].nelem)
2261       {
2262         unsigned int constraint;
2263         switch (d->tokens[i])
2264           {
2265           default:
2266             continue;
2267 
2268           case BEGLINE:
2269             constraint = BEGLINE_CONSTRAINT;
2270             break;
2271           case ENDLINE:
2272             constraint = ENDLINE_CONSTRAINT;
2273             break;
2274           case BEGWORD:
2275             constraint = BEGWORD_CONSTRAINT;
2276             break;
2277           case ENDWORD:
2278             constraint = ENDWORD_CONSTRAINT;
2279             break;
2280           case LIMWORD:
2281             constraint = LIMWORD_CONSTRAINT;
2282             break;
2283           case NOTLIMWORD:
2284             constraint = NOTLIMWORD_CONSTRAINT;
2285             break;
2286           case EMPTY:
2287             constraint = NO_CONSTRAINT;
2288             break;
2289           }
2290 
2291         delete (i, &d->follows[i]);
2292 
2293         for (idx_t j = 0; j < backward[i].nelem; j++)
2294           replace (&d->follows[backward[i].elems[j].index], i, &d->follows[i],
2295                    constraint, &tmp);
2296         for (idx_t j = 0; j < d->follows[i].nelem; j++)
2297           replace (&backward[d->follows[i].elems[j].index], i, &backward[i],
2298                    NO_CONSTRAINT, &tmp);
2299       }
2300   free (tmp.elems);
2301 }
2302 
2303 
2304 
2305 
2306 static int
2307 charclass_context (struct dfa const *dfa, charclass const *c)
     
2308 {
2309   int context = 0;
2310 
2311   for (int j = 0; j < CHARCLASS_WORDS; j++)
2312     {
2313       if (c->w[j] & dfa->syntax.newline.w[j])
2314         context |= CTX_NEWLINE;
2315       if (c->w[j] & dfa->syntax.letters.w[j])
2316         context |= CTX_LETTER;
2317       if (c->w[j] & ~(dfa->syntax.letters.w[j] | dfa->syntax.newline.w[j]))
2318         context |= CTX_NONE;
2319     }
2320 
2321   return context;
2322 }
2323 
2324 
2325 
2326 
2327 
2328 
2329 
2330 static int _GL_ATTRIBUTE_PURE
2331 state_separate_contexts (struct dfa *d, position_set const *s)
     
2332 {
2333   int separate_contexts = 0;
2334 
2335   for (idx_t j = 0; j < s->nelem; j++)
2336     separate_contexts |= d->separates[s->elems[j].index];
2337 
2338   return separate_contexts;
2339 }
2340 
2341 enum
2342 {
2343   
2344   OPT_REPEAT = (1 << 0),
2345 
2346   
2347 
2348   OPT_LPAREN = (1 << 1),
2349 
2350   
2351   OPT_RPAREN = (1 << 2),
2352 
2353   
2354 
2355   OPT_WALKED = (1 << 3),
2356 
2357   
2358   OPT_QUEUED = (1 << 4)
2359 };
2360 
2361 static void
2362 merge_nfa_state (struct dfa *d, idx_t tindex, char *flags,
     
2363                  position_set *merged)
2364 {
2365   position_set *follows = d->follows;
2366   idx_t nelem = 0;
2367 
2368   for (idx_t i = 0; i < follows[tindex].nelem; i++)
2369     {
2370       idx_t sindex = follows[tindex].elems[i].index;
2371 
2372       
2373       unsigned int iconstraint = follows[tindex].elems[i].constraint;
2374       if (iconstraint == 0)
2375         continue;
2376 
2377       if (d->tokens[follows[tindex].elems[i].index] <= END)
2378         {
2379           d->constraints[tindex] |= follows[tindex].elems[i].constraint;
2380           continue;
2381         }
2382 
2383       if (sindex != tindex && !(flags[sindex] & (OPT_LPAREN | OPT_RPAREN)))
2384         {
2385           idx_t j;
2386 
2387           for (j = 0; j < nelem; j++)
2388             {
2389               idx_t dindex = follows[tindex].elems[j].index;
2390 
2391               if (dindex == tindex)
2392                 continue;
2393 
2394               if (follows[tindex].elems[j].constraint != iconstraint)
2395                 continue;
2396 
2397               if (flags[dindex] & (OPT_LPAREN | OPT_RPAREN))
2398                 continue;
2399 
2400               if (d->tokens[sindex] != d->tokens[dindex])
2401                 continue;
2402 
2403               if ((flags[sindex] ^ flags[dindex]) & OPT_REPEAT)
2404                 continue;
2405 
2406               if (flags[sindex] & OPT_REPEAT)
2407                 delete (sindex, &follows[sindex]);
2408 
2409               merge2 (&follows[dindex], &follows[sindex], merged);
2410 
2411               break;
2412             }
2413 
2414           if (j < nelem)
2415             continue;
2416         }
2417 
2418       follows[tindex].elems[nelem++] = follows[tindex].elems[i];
2419       flags[sindex] |= OPT_QUEUED;
2420     }
2421 
2422   follows[tindex].nelem = nelem;
2423 }
2424 
2425 static int
2426 compare (const void *a, const void *b)
     
2427 {
2428   position const *p = a, *q = b;
2429   return (p->index > q->index) - (p->index < q->index);
2430 }
2431 
2432 static void
2433 reorder_tokens (struct dfa *d)
     
2434 {
2435   idx_t nleaves = 0;
2436   ptrdiff_t *map = xnmalloc (d->tindex, sizeof *map);
2437   map[0] = nleaves++;
2438   for (idx_t i = 1; i < d->tindex; i++)
2439     map[i] = -1;
2440 
2441   token *tokens = xnmalloc (d->nleaves, sizeof *tokens);
2442   position_set *follows = xnmalloc (d->nleaves, sizeof *follows);
2443   int *constraints = xnmalloc (d->nleaves, sizeof *constraints);
2444   char *multibyte_prop = (d->localeinfo.multibyte
2445                           ? xnmalloc (d->nleaves, sizeof *multibyte_prop)
2446                           : NULL);
2447 
2448   for (idx_t i = 0; i < d->tindex; i++)
2449     {
2450       if (map[i] < 0)
2451         {
2452           free (d->follows[i].elems);
2453           d->follows[i].elems = NULL;
2454           d->follows[i].nelem = 0;
2455           continue;
2456         }
2457 
2458       tokens[map[i]] = d->tokens[i];
2459       follows[map[i]] = d->follows[i];
2460       constraints[map[i]] = d->constraints[i];
2461 
2462       if (multibyte_prop != NULL)
2463         multibyte_prop[map[i]] = d->multibyte_prop[i];
2464 
2465       for (idx_t j = 0; j < d->follows[i].nelem; j++)
2466         {
2467           if (map[d->follows[i].elems[j].index] == -1)
2468             map[d->follows[i].elems[j].index] = nleaves++;
2469 
2470           d->follows[i].elems[j].index = map[d->follows[i].elems[j].index];
2471         }
2472 
2473       qsort (d->follows[i].elems, d->follows[i].nelem,
2474              sizeof *d->follows[i].elems, compare);
2475     }
2476 
2477   for (idx_t i = 0; i < nleaves; i++)
2478     {
2479       d->tokens[i] = tokens[i];
2480       d->follows[i] = follows[i];
2481       d->constraints[i] = constraints[i];
2482 
2483       if (multibyte_prop != NULL)
2484         d->multibyte_prop[i] = multibyte_prop[i];
2485     }
2486 
2487   d->tindex = d->nleaves = nleaves;
2488 
2489   free (tokens);
2490   free (follows);
2491   free (constraints);
2492   free (multibyte_prop);
2493   free (map);
2494 }
2495 
2496 static void
2497 dfaoptimize (struct dfa *d)
     
2498 {
2499   char *flags = xizalloc (d->tindex);
2500 
2501   for (idx_t i = 0; i < d->tindex; i++)
2502     {
2503       for (idx_t j = 0; j < d->follows[i].nelem; j++)
2504         {
2505           if (d->follows[i].elems[j].index == i)
2506             flags[d->follows[i].elems[j].index] |= OPT_REPEAT;
2507           else if (d->follows[i].elems[j].index < i)
2508             flags[d->follows[i].elems[j].index] |= OPT_LPAREN;
2509           else if (flags[d->follows[i].elems[j].index] &= OPT_WALKED)
2510             flags[d->follows[i].elems[j].index] |= OPT_RPAREN;
2511           else
2512             flags[d->follows[i].elems[j].index] |= OPT_WALKED;
2513         }
2514     }
2515 
2516   flags[0] |= OPT_QUEUED;
2517 
2518   position_set merged0;
2519   position_set *merged = &merged0;
2520   alloc_position_set (merged, d->nleaves);
2521 
2522   d->constraints = xicalloc (d->tindex, sizeof *d->constraints);
2523 
2524   for (idx_t i = 0; i < d->tindex; i++)
2525     if (flags[i] & OPT_QUEUED)
2526       merge_nfa_state (d, i, flags, merged);
2527 
2528   reorder_tokens (d);
2529 
2530   free (merged->elems);
2531   free (flags);
2532 }
2533 
2534 
2535 
2536 
2537 
2538 
2539 
2540 
2541 
2542 
2543 
2544 
2545 
2546 
2547 
2548 
2549 
2550 
2551 
2552 
2553 
2554 
2555 
2556 
2557 
2558 
2559 
2560 
2561 
2562 
2563 
2564 
2565 
2566 
2567 
2568 
2569 
2570 
2571 
2572 
2573 
2574 
2575 
2576 
2577 
2578 
2579 
2580 
2581 
2582 
2583 
2584 
2585 
2586 static void
2587 dfaanalyze (struct dfa *d, bool searchflag)
     
2588 {
2589   
2590   position *posalloc = xnmalloc (d->nleaves, 2 * sizeof *posalloc);
2591   
2592   position *firstpos = posalloc;
2593   position *lastpos = firstpos + d->nleaves;
2594   position pos;
2595   position_set tmp;
2596 
2597   
2598   struct
2599   {
2600     
2601     bool nullable;
2602 
2603     
2604     idx_t nfirstpos;
2605     idx_t nlastpos;
2606   } *stkalloc = xnmalloc (d->depth, sizeof *stkalloc), *stk = stkalloc;
2607 
2608   position_set merged;          
2609 
2610   addtok (d, CAT);
2611   idx_t tindex = d->tindex;
2612 
2613 #ifdef DEBUG
2614   fprintf (stderr, "dfaanalyze:\n");
2615   for (idx_t i = 0; i < tindex; i++)
2616     {
2617       fprintf (stderr, " %td:", i);
2618       prtok (d->tokens[i]);
2619     }
2620   putc ('\n', stderr);
2621 #endif
2622 
2623   d->searchflag = searchflag;
2624   alloc_position_set (&merged, d->nleaves);
2625   d->follows = xicalloc (tindex, sizeof *d->follows);
2626   position_set *backward
2627     = d->epsilon ? xicalloc (tindex, sizeof *backward) : NULL;
2628 
2629   for (idx_t i = 0; i < tindex; i++)
2630     {
2631       switch (d->tokens[i])
2632         {
2633         case EMPTY:
2634           
2635           stk->nullable = true;
2636 
2637           
2638           stk->nfirstpos = stk->nlastpos = 0;
2639           stk++;
2640           break;
2641 
2642         case STAR:
2643         case PLUS:
2644           
2645 
2646           if (d->epsilon)
2647             {
2648               tmp.elems = lastpos - stk[-1].nlastpos;
2649               tmp.nelem = stk[-1].nlastpos;
2650               for (position *p = firstpos - stk[-1].nfirstpos;
2651                    p < firstpos; p++)
2652                 merge2 (&backward[p->index], &tmp, &merged);
2653             }
2654 
2655           
2656 
2657           {
2658             tmp.elems = firstpos - stk[-1].nfirstpos;
2659             tmp.nelem = stk[-1].nfirstpos;
2660             for (position *p = lastpos - stk[-1].nlastpos; p < lastpos; p++)
2661               merge2 (&d->follows[p->index], &tmp, &merged);
2662           }
2663           FALLTHROUGH;
2664         case QMARK:
2665           
2666           if (d->tokens[i] != PLUS)
2667             stk[-1].nullable = true;
2668           break;
2669 
2670         case CAT:
2671           
2672 
2673 
2674           if (backward)
2675             {
2676               tmp.nelem = stk[-2].nlastpos;
2677               tmp.elems = lastpos - stk[-1].nlastpos - stk[-2].nlastpos;
2678               for (position *p = firstpos - stk[-1].nfirstpos;
2679                    p < firstpos; p++)
2680                 merge2 (&backward[p->index], &tmp, &merged);
2681             }
2682 
2683           
2684 
2685           {
2686             tmp.nelem = stk[-1].nfirstpos;
2687             tmp.elems = firstpos - stk[-1].nfirstpos;
2688             for (position *plim = lastpos - stk[-1].nlastpos,
2689                    *p = plim - stk[-2].nlastpos;
2690                  p < plim; p++)
2691               merge2 (&d->follows[p->index], &tmp, &merged);
2692           }
2693 
2694           
2695 
2696           if (stk[-2].nullable)
2697             stk[-2].nfirstpos += stk[-1].nfirstpos;
2698           else
2699             firstpos -= stk[-1].nfirstpos;
2700 
2701           
2702 
2703           if (stk[-1].nullable)
2704             stk[-2].nlastpos += stk[-1].nlastpos;
2705           else
2706             {
2707               position *p = lastpos - stk[-1].nlastpos - stk[-2].nlastpos;
2708               for (idx_t j = 0; j < stk[-1].nlastpos; j++)
2709                 p[j] = p[j + stk[-2].nlastpos];
2710               lastpos -= stk[-2].nlastpos;
2711               stk[-2].nlastpos = stk[-1].nlastpos;
2712             }
2713 
2714           
2715           stk[-2].nullable &= stk[-1].nullable;
2716           stk--;
2717           break;
2718 
2719         case OR:
2720           
2721           stk[-2].nfirstpos += stk[-1].nfirstpos;
2722 
2723           
2724           stk[-2].nlastpos += stk[-1].nlastpos;
2725 
2726           
2727           stk[-2].nullable |= stk[-1].nullable;
2728           stk--;
2729           break;
2730 
2731         default:
2732           
2733 
2734 
2735 
2736 
2737           stk->nullable = d->tokens[i] == BACKREF;
2738 
2739           
2740           stk->nfirstpos = stk->nlastpos = 1;
2741           stk++;
2742 
2743           firstpos->index = lastpos->index = i;
2744           firstpos->constraint = lastpos->constraint = NO_CONSTRAINT;
2745           firstpos++, lastpos++;
2746 
2747           break;
2748         }
2749 #ifdef DEBUG
2750       
2751       fprintf (stderr, "node %td:", i);
2752       prtok (d->tokens[i]);
2753       putc ('\n', stderr);
2754       fprintf (stderr,
2755                stk[-1].nullable ? " nullable: yes\n" : " nullable: no\n");
2756       fprintf (stderr, " firstpos:");
2757       for (idx_t j = 0; j < stk[-1].nfirstpos; j++)
2758         {
2759           fprintf (stderr, " %td:", firstpos[j - stk[-1].nfirstpos].index);
2760           prtok (d->tokens[firstpos[j - stk[-1].nfirstpos].index]);
2761         }
2762       fprintf (stderr, "\n lastpos:");
2763       for (idx_t j = 0; j < stk[-1].nlastpos; j++)
2764         {
2765           fprintf (stderr, " %td:", lastpos[j - stk[-1].nlastpos].index);
2766           prtok (d->tokens[lastpos[j - stk[-1].nlastpos].index]);
2767         }
2768       putc ('\n', stderr);
2769 #endif
2770     }
2771 
2772   if (backward)
2773     {
2774       
2775 
2776       epsclosure (d, backward);
2777 
2778       for (idx_t i = 0; i < tindex; i++)
2779         free (backward[i].elems);
2780       free (backward);
2781     }
2782 
2783   dfaoptimize (d);
2784 
2785 #ifdef DEBUG
2786   for (idx_t i = 0; i < tindex; i++)
2787     if (d->tokens[i] == BEG || d->tokens[i] < NOTCHAR
2788         || d->tokens[i] == BACKREF || d->tokens[i] == ANYCHAR
2789         || d->tokens[i] == MBCSET || d->tokens[i] >= CSET)
2790       {
2791         fprintf (stderr, "follows(%td:", i);
2792         prtok (d->tokens[i]);
2793         fprintf (stderr, "):");
2794         for (idx_t j = 0; j < d->follows[i].nelem; j++)
2795           {
2796             fprintf (stderr, " %td:", d->follows[i].elems[j].index);
2797             prtok (d->tokens[d->follows[i].elems[j].index]);
2798           }
2799         putc ('\n', stderr);
2800       }
2801 #endif
2802 
2803   pos.index = 0;
2804   pos.constraint = NO_CONSTRAINT;
2805 
2806   alloc_position_set (&tmp, 1);
2807 
2808   append (pos, &tmp);
2809 
2810   d->separates = xicalloc (tindex, sizeof *d->separates);
2811 
2812   for (idx_t i = 0; i < tindex; i++)
2813     {
2814       if (prev_newline_dependent (d->constraints[i]))
2815         d->separates[i] |= CTX_NEWLINE;
2816       if (prev_letter_dependent (d->constraints[i]))
2817         d->separates[i] |= CTX_LETTER;
2818 
2819       for (idx_t j = 0; j < d->follows[i].nelem; j++)
2820         {
2821           if (prev_newline_dependent (d->follows[i].elems[j].constraint))
2822             d->separates[i] |= CTX_NEWLINE;
2823           if (prev_letter_dependent (d->follows[i].elems[j].constraint))
2824             d->separates[i] |= CTX_LETTER;
2825         }
2826     }
2827 
2828   
2829   int separate_contexts = state_separate_contexts (d, &tmp);
2830 
2831   
2832   if (separate_contexts & CTX_NEWLINE)
2833     state_index (d, &tmp, CTX_NEWLINE);
2834   d->initstate_notbol = d->min_trcount
2835     = state_index (d, &tmp, separate_contexts ^ CTX_ANY);
2836   if (separate_contexts & CTX_LETTER)
2837     d->min_trcount = state_index (d, &tmp, CTX_LETTER);
2838   d->min_trcount++;
2839   d->trcount = 0;
2840 
2841   free (posalloc);
2842   free (stkalloc);
2843   free (merged.elems);
2844   free (tmp.elems);
2845 }
2846 
2847 
2848 static void
2849 realloc_trans_if_necessary (struct dfa *d)
     
2850 {
2851   state_num oldalloc = d->tralloc;
2852   if (oldalloc < d->sindex)
2853     {
2854       state_num **realtrans = d->trans ? d->trans - 2 : NULL;
2855       idx_t newalloc1 = realtrans ? d->tralloc + 2 : 0;
2856       realtrans = xpalloc (realtrans, &newalloc1, d->sindex - oldalloc,
2857                            -1, sizeof *realtrans);
2858       realtrans[0] = realtrans[1] = NULL;
2859       d->trans = realtrans + 2;
2860       idx_t newalloc = d->tralloc = newalloc1 - 2;
2861       d->fails = xreallocarray (d->fails, newalloc, sizeof *d->fails);
2862       d->success = xreallocarray (d->success, newalloc, sizeof *d->success);
2863       d->newlines = xreallocarray (d->newlines, newalloc, sizeof *d->newlines);
2864       if (d->localeinfo.multibyte)
2865         {
2866           realtrans = d->mb_trans ? d->mb_trans - 2 : NULL;
2867           realtrans = xreallocarray (realtrans, newalloc1, sizeof *realtrans);
2868           if (oldalloc == 0)
2869             realtrans[0] = realtrans[1] = NULL;
2870           d->mb_trans = realtrans + 2;
2871         }
2872       for (; oldalloc < newalloc; oldalloc++)
2873         {
2874           d->trans[oldalloc] = NULL;
2875           d->fails[oldalloc] = NULL;
2876           if (d->localeinfo.multibyte)
2877             d->mb_trans[oldalloc] = NULL;
2878         }
2879     }
2880 }
2881 
2882 
2883 
2884 
2885 
2886 
2887 
2888 
2889 
2890 
2891 
2892 
2893 
2894 
2895 
2896 
2897 
2898 
2899 
2900 
2901 
2902 
2903 
2904 
2905 
2906 
2907 
2908 
2909 
2910 
2911 
2912 
2913 
2914 
2915 static state_num
2916 build_state (state_num s, struct dfa *d, unsigned char uc)
     
2917 {
2918   position_set follows;         
2919 
2920   position_set group;           
2921   position_set tmp;             
2922   state_num state;              
2923   state_num state_newline;      
2924   state_num state_letter;       
2925 
2926 #ifdef DEBUG
2927   fprintf (stderr, "build state %td\n", s);
2928 #endif
2929 
2930   
2931   state_num **ptrans = (accepting (s, d) ? d->fails : d->trans) + s;
2932   state_num *trans = *ptrans;
2933 
2934   if (!trans)
2935     {
2936       
2937 
2938 
2939 
2940       if (MAX_TRCOUNT <= d->trcount)
2941         {
2942           for (state_num i = d->min_trcount; i < d->tralloc; i++)
2943             {
2944               free (d->trans[i]);
2945               free (d->fails[i]);
2946               d->trans[i] = d->fails[i] = NULL;
2947             }
2948           d->trcount = 0;
2949         }
2950 
2951       d->trcount++;
2952       *ptrans = trans = xmalloc (NOTCHAR * sizeof *trans);
2953 
2954       
2955 
2956       for (int i = 0; i < NOTCHAR; i++)
2957         trans[i] = -2;
2958     }
2959 
2960   
2961   d->success[s] = 0;
2962   if (accepts_in_context (d->states[s].context, CTX_NEWLINE, s, d))
2963     d->success[s] |= CTX_NEWLINE;
2964   if (accepts_in_context (d->states[s].context, CTX_LETTER, s, d))
2965     d->success[s] |= CTX_LETTER;
2966   if (accepts_in_context (d->states[s].context, CTX_NONE, s, d))
2967     d->success[s] |= CTX_NONE;
2968 
2969   alloc_position_set (&follows, d->nleaves);
2970 
2971   
2972 
2973   for (idx_t j = 0; j < d->states[s].elems.nelem; j++)
2974     for (idx_t k = 0;
2975          k < d->follows[d->states[s].elems.elems[j].index].nelem; ++k)
2976       insert (d->follows[d->states[s].elems.elems[j].index].elems[k],
2977               &follows);
2978 
2979   
2980   alloc_position_set (&group, d->nleaves);
2981 
2982   
2983   charclass label;
2984   fillset (&label);
2985 
2986   for (idx_t i = 0; i < follows.nelem; i++)
2987     {
2988       charclass matches;            
2989       position pos = follows.elems[i];
2990       bool matched = false;
2991       if (d->tokens[pos.index] >= 0 && d->tokens[pos.index] < NOTCHAR)
2992         {
2993           zeroset (&matches);
2994           setbit (d->tokens[pos.index], &matches);
2995           if (d->tokens[pos.index] == uc)
2996             matched = true;
2997         }
2998       else if (d->tokens[pos.index] >= CSET)
2999         {
3000           matches = d->charclasses[d->tokens[pos.index] - CSET];
3001           if (tstbit (uc, &matches))
3002             matched = true;
3003         }
3004       else if (d->tokens[pos.index] == ANYCHAR)
3005         {
3006           matches = d->charclasses[d->canychar];
3007           if (tstbit (uc, &matches))
3008             matched = true;
3009 
3010           
3011 
3012 
3013 
3014 
3015 
3016           if (succeeds_in_context (pos.constraint, d->states[s].context,
3017                                    CTX_NONE))
3018             {
3019               if (d->states[s].mbps.nelem == 0)
3020                 alloc_position_set (&d->states[s].mbps, 1);
3021               insert (pos, &d->states[s].mbps);
3022             }
3023         }
3024       else
3025         continue;
3026 
3027       
3028 
3029       if (pos.constraint != NO_CONSTRAINT)
3030         {
3031           if (!succeeds_in_context (pos.constraint,
3032                                     d->states[s].context, CTX_NEWLINE))
3033             for (int j = 0; j < CHARCLASS_WORDS; j++)
3034               matches.w[j] &= ~d->syntax.newline.w[j];
3035           if (!succeeds_in_context (pos.constraint,
3036                                     d->states[s].context, CTX_LETTER))
3037             for (int j = 0; j < CHARCLASS_WORDS; ++j)
3038               matches.w[j] &= ~d->syntax.letters.w[j];
3039           if (!succeeds_in_context (pos.constraint,
3040                                     d->states[s].context, CTX_NONE))
3041             for (int j = 0; j < CHARCLASS_WORDS; ++j)
3042               matches.w[j] &= d->syntax.letters.w[j] | d->syntax.newline.w[j];
3043 
3044           
3045           if (emptyset (&matches))
3046             continue;
3047 
3048           
3049 
3050 
3051           if (!tstbit (uc, &matches))
3052             matched = false;
3053         }
3054 
3055 #ifdef DEBUG
3056       fprintf (stderr, " nextpos %td:", pos.index);
3057       prtok (d->tokens[pos.index]);
3058       fprintf (stderr, " of");
3059       for (unsigned j = 0; j < NOTCHAR; j++)
3060         if (tstbit (j, &matches))
3061           fprintf (stderr, " 0x%02x", j);
3062       fprintf (stderr, "\n");
3063 #endif
3064 
3065       if (matched)
3066         {
3067           for (int k = 0; k < CHARCLASS_WORDS; ++k)
3068             label.w[k] &= matches.w[k];
3069           append (pos, &group);
3070         }
3071       else
3072         {
3073           for (int k = 0; k < CHARCLASS_WORDS; ++k)
3074             label.w[k] &= ~matches.w[k];
3075         }
3076     }
3077 
3078   alloc_position_set (&tmp, d->nleaves);
3079 
3080   if (group.nelem > 0)
3081     {
3082       
3083 
3084       if (d->searchflag)
3085         {
3086           
3087 
3088 
3089 
3090 
3091 
3092 
3093 
3094 
3095 
3096 
3097 
3098 
3099 
3100 
3101 
3102 
3103           bool mergeit = !d->localeinfo.multibyte;
3104           if (!mergeit)
3105             {
3106               mergeit = true;
3107               for (idx_t j = 0; mergeit && j < group.nelem; j++)
3108                 mergeit &= d->multibyte_prop[group.elems[j].index];
3109             }
3110           if (mergeit)
3111             merge2 (&group, &d->states[0].elems, &tmp);
3112         }
3113 
3114       
3115 
3116 
3117       int possible_contexts = charclass_context (d, &label);
3118       int separate_contexts = state_separate_contexts (d, &group);
3119 
3120       
3121       if (possible_contexts & ~separate_contexts)
3122         state = state_index (d, &group, separate_contexts ^ CTX_ANY);
3123       else
3124         state = -1;
3125       if (separate_contexts & possible_contexts & CTX_NEWLINE)
3126         state_newline = state_index (d, &group, CTX_NEWLINE);
3127       else
3128         state_newline = state;
3129       if (separate_contexts & possible_contexts & CTX_LETTER)
3130         state_letter = state_index (d, &group, CTX_LETTER);
3131       else
3132         state_letter = state;
3133 
3134       
3135       realloc_trans_if_necessary (d);
3136     }
3137 
3138   
3139 
3140 
3141   else if (d->searchflag)
3142     {
3143       state_newline = 0;
3144       state_letter = d->min_trcount - 1;
3145       state = d->initstate_notbol;
3146     }
3147   else
3148     {
3149       state_newline = -1;
3150       state_letter = -1;
3151       state = -1;
3152     }
3153 
3154   
3155   for (int i = 0; i < NOTCHAR; i++)
3156     if (tstbit (i, &label))
3157       switch (d->syntax.sbit[i])
3158         {
3159         case CTX_NEWLINE:
3160           trans[i] = state_newline;
3161           break;
3162         case CTX_LETTER:
3163           trans[i] = state_letter;
3164           break;
3165         default:
3166           trans[i] = state;
3167           break;
3168         }
3169 
3170 #ifdef DEBUG
3171   fprintf (stderr, "trans table %td", s);
3172   for (int i = 0; i < NOTCHAR; ++i)
3173     {
3174       if (!(i & 0xf))
3175         fprintf (stderr, "\n");
3176       fprintf (stderr, " %2td", trans[i]);
3177     }
3178   fprintf (stderr, "\n");
3179 #endif
3180 
3181   free (group.elems);
3182   free (follows.elems);
3183   free (tmp.elems);
3184 
3185   
3186 
3187   if (tstbit (d->syntax.eolbyte, &label))
3188     {
3189       d->newlines[s] = trans[d->syntax.eolbyte];
3190       trans[d->syntax.eolbyte] = -1;
3191     }
3192 
3193   return trans[uc];
3194 }
3195 
3196 
3197 
3198 
3199 
3200 
3201 
3202 static state_num
3203 transit_state_singlebyte (struct dfa *d, state_num s, unsigned char const **pp)
     
3204 {
3205   state_num *t;
3206 
3207   if (d->trans[s])
3208     t = d->trans[s];
3209   else if (d->fails[s])
3210     t = d->fails[s];
3211   else
3212     {
3213       build_state (s, d, **pp);
3214       if (d->trans[s])
3215         t = d->trans[s];
3216       else
3217         {
3218           t = d->fails[s];
3219           assert (t);
3220         }
3221     }
3222 
3223   if (t[**pp] == -2)
3224     build_state (s, d, **pp);
3225 
3226   return t[*(*pp)++];
3227 }
3228 
3229 
3230 
3231 
3232 static state_num
3233 transit_state (struct dfa *d, state_num s, unsigned char const **pp,
     
3234                unsigned char const *end)
3235 {
3236   wint_t wc;
3237 
3238   int mbclen = mbs_to_wchar (&wc, (char const *) *pp, end - *pp, d);
3239 
3240   
3241   d->mb_follows.nelem = 0;
3242 
3243   
3244 
3245   state_num s1 = s;
3246   int mbci;
3247   for (mbci = 0; mbci < mbclen && (mbci == 0 || d->min_trcount <= s); mbci++)
3248     s = transit_state_singlebyte (d, s, pp);
3249   *pp += mbclen - mbci;
3250 
3251   if (wc == WEOF)
3252     {
3253       
3254       return s;
3255     }
3256 
3257   
3258 
3259 
3260   if (d->states[s1].mb_trindex < 0)
3261     {
3262       if (MAX_TRCOUNT <= d->mb_trcount)
3263         {
3264           state_num s3;
3265           for (s3 = -1; s3 < d->tralloc; s3++)
3266             {
3267               free (d->mb_trans[s3]);
3268               d->mb_trans[s3] = NULL;
3269             }
3270 
3271           for (state_num i = 0; i < d->sindex; i++)
3272             d->states[i].mb_trindex = -1;
3273           d->mb_trcount = 0;
3274         }
3275       d->states[s1].mb_trindex = d->mb_trcount++;
3276     }
3277 
3278   if (! d->mb_trans[s])
3279     {
3280       enum { TRANSPTR_SIZE = sizeof *d->mb_trans[s] };
3281       enum { TRANSALLOC_SIZE = MAX_TRCOUNT * TRANSPTR_SIZE };
3282       d->mb_trans[s] = xmalloc (TRANSALLOC_SIZE);
3283       for (int i = 0; i < MAX_TRCOUNT; i++)
3284         d->mb_trans[s][i] = -1;
3285     }
3286   else if (d->mb_trans[s][d->states[s1].mb_trindex] >= 0)
3287     return d->mb_trans[s][d->states[s1].mb_trindex];
3288 
3289   if (s == -1)
3290     copy (&d->states[s1].mbps, &d->mb_follows);
3291   else
3292     merge (&d->states[s1].mbps, &d->states[s].elems, &d->mb_follows);
3293 
3294   int separate_contexts = state_separate_contexts (d, &d->mb_follows);
3295   state_num s2 = state_index (d, &d->mb_follows, separate_contexts ^ CTX_ANY);
3296   realloc_trans_if_necessary (d);
3297 
3298   d->mb_trans[s][d->states[s1].mb_trindex] = s2;
3299 
3300   return s2;
3301 }
3302 
3303 
3304 
3305 
3306 
3307 
3308 
3309 
3310 
3311 
3312 
3313 
3314 
3315 
3316 
3317 
3318 static unsigned char const *
3319 skip_remains_mb (struct dfa *d, unsigned char const *p,
     
3320                  unsigned char const *mbp, char const *end)
3321 {
3322   if (d->syntax.never_trail[*p])
3323     return p;
3324   while (mbp < p)
3325     {
3326       wint_t wc;
3327       mbp += mbs_to_wchar (&wc, (char const *) mbp,
3328                            end - (char const *) mbp, d);
3329     }
3330   return mbp;
3331 }
3332 
3333 
3334 
3335 
3336 
3337 
3338 
3339 
3340 
3341 
3342 
3343 
3344 
3345 
3346 
3347 
3348 
3349 
3350 
3351 
3352 
3353 static inline char *
3354 dfaexec_main (struct dfa *d, char const *begin, char *end, bool allow_nl,
     
3355               idx_t *count, bool multibyte)
3356 {
3357   if (MAX_TRCOUNT <= d->sindex)
3358     {
3359       for (state_num s = d->min_trcount; s < d->sindex; s++)
3360         {
3361           free (d->states[s].elems.elems);
3362           free (d->states[s].mbps.elems);
3363         }
3364       d->sindex = d->min_trcount;
3365 
3366       if (d->trans)
3367         {
3368           for (state_num s = 0; s < d->tralloc; s++)
3369             {
3370               free (d->trans[s]);
3371               free (d->fails[s]);
3372               d->trans[s] = d->fails[s] = NULL;
3373             }
3374           d->trcount = 0;
3375         }
3376 
3377       if (d->localeinfo.multibyte && d->mb_trans)
3378         {
3379           for (state_num s = -1; s < d->tralloc; s++)
3380             {
3381               free (d->mb_trans[s]);
3382               d->mb_trans[s] = NULL;
3383             }
3384           for (state_num s = 0; s < d->min_trcount; s++)
3385             d->states[s].mb_trindex = -1;
3386           d->mb_trcount = 0;
3387         }
3388     }
3389 
3390   if (!d->tralloc)
3391     realloc_trans_if_necessary (d);
3392 
3393   
3394   state_num s = 0, s1 = 0;
3395 
3396   
3397   unsigned char const *p = (unsigned char const *) begin;
3398   unsigned char const *mbp = p;
3399 
3400   
3401   state_num **trans = d->trans;
3402   unsigned char eol = d->syntax.eolbyte;  
3403   unsigned char saved_end = *(unsigned char *) end;
3404   *end = eol;
3405 
3406   if (multibyte)
3407     {
3408       memset (&d->mbs, 0, sizeof d->mbs);
3409       if (d->mb_follows.alloc == 0)
3410         alloc_position_set (&d->mb_follows, d->nleaves);
3411     }
3412 
3413   idx_t nlcount = 0;
3414   for (;;)
3415     {
3416       state_num *t;
3417       while ((t = trans[s]) != NULL)
3418         {
3419           if (s < d->min_trcount)
3420             {
3421               if (!multibyte || d->states[s].mbps.nelem == 0)
3422                 {
3423                   while (t[*p] == s)
3424                     p++;
3425                 }
3426               if (multibyte)
3427                 p = mbp = skip_remains_mb (d, p, mbp, end);
3428             }
3429 
3430           if (multibyte)
3431             {
3432               s1 = s;
3433 
3434               if (d->states[s].mbps.nelem == 0
3435                   || d->localeinfo.sbctowc[*p] != WEOF || (char *) p >= end)
3436                 {
3437                   
3438 
3439                   s = t[*p++];
3440                 }
3441               else
3442                 {
3443                   s = transit_state (d, s, &p, (unsigned char *) end);
3444                   mbp = p;
3445                   trans = d->trans;
3446                 }
3447             }
3448           else
3449             {
3450               s1 = t[*p++];
3451               t = trans[s1];
3452               if (! t)
3453                 {
3454                   state_num tmp = s;
3455                   s = s1;
3456                   s1 = tmp;     
3457                   break;
3458                 }
3459               if (s < d->min_trcount)
3460                 {
3461                   while (t[*p] == s1)
3462                     p++;
3463                 }
3464               s = t[*p++];
3465             }
3466         }
3467 
3468       if (s < 0)
3469         {
3470           if (s == -2)
3471             {
3472               s = build_state (s1, d, p[-1]);
3473               trans = d->trans;
3474             }
3475           else if ((char *) p <= end && p[-1] == eol && 0 <= d->newlines[s1])
3476             {
3477               
3478 
3479               nlcount++;
3480               mbp = p;
3481 
3482               s = (allow_nl ? d->newlines[s1]
3483                    : d->syntax.sbit[eol] == CTX_NEWLINE ? 0
3484                    : d->syntax.sbit[eol] == CTX_LETTER ? d->min_trcount - 1
3485                    : d->initstate_notbol);
3486             }
3487           else
3488             {
3489               p = NULL;
3490               goto done;
3491             }
3492         }
3493       else if (d->fails[s])
3494         {
3495           if ((d->success[s] & d->syntax.sbit[*p])
3496               || ((char *) p == end
3497                   && accepts_in_context (d->states[s].context, CTX_NEWLINE, s,
3498                                          d)))
3499             goto done;
3500 
3501           if (multibyte && s < d->min_trcount)
3502             p = mbp = skip_remains_mb (d, p, mbp, end);
3503 
3504           s1 = s;
3505           if (!multibyte || d->states[s].mbps.nelem == 0
3506               || d->localeinfo.sbctowc[*p] != WEOF || (char *) p >= end)
3507             {
3508               
3509 
3510               s = d->fails[s][*p++];
3511             }
3512           else
3513             {
3514               s = transit_state (d, s, &p, (unsigned char *) end);
3515               mbp = p;
3516               trans = d->trans;
3517             }
3518         }
3519       else
3520         {
3521           build_state (s, d, p[0]);
3522           trans = d->trans;
3523         }
3524     }
3525 
3526  done:
3527   if (count)
3528     *count += nlcount;
3529   *end = saved_end;
3530   return (char *) p;
3531 }
3532 
3533 
3534 
3535 
3536 static char *
3537 dfaexec_mb (struct dfa *d, char const *begin, char *end,
     
3538             bool allow_nl, idx_t *count, bool *backref)
3539 {
3540   return dfaexec_main (d, begin, end, allow_nl, count, true);
3541 }
3542 
3543 static char *
3544 dfaexec_sb (struct dfa *d, char const *begin, char *end,
     
3545             bool allow_nl, idx_t *count, bool *backref)
3546 {
3547   return dfaexec_main (d, begin, end, allow_nl, count, false);
3548 }
3549 
3550 
3551 
3552 static char *
3553 dfaexec_noop (struct dfa *d, char const *begin, char *end,
     
3554               bool allow_nl, idx_t *count, bool *backref)
3555 {
3556   *backref = true;
3557   return (char *) begin;
3558 }
3559 
3560 
3561 
3562 
3563 
3564 char *
3565 dfaexec (struct dfa *d, char const *begin, char *end,
     
3566          bool allow_nl, idx_t *count, bool *backref)
3567 {
3568   return d->dfaexec (d, begin, end, allow_nl, count, backref);
3569 }
3570 
3571 struct dfa *
3572 dfasuperset (struct dfa const *d)
     
3573 {
3574   return d->superset;
3575 }
3576 
3577 bool
3578 dfaisfast (struct dfa const *d)
     
3579 {
3580   return d->fast;
3581 }
3582 
3583 static void
3584 free_mbdata (struct dfa *d)
     
3585 {
3586   free (d->multibyte_prop);
3587   free (d->lex.brack.chars);
3588   free (d->mb_follows.elems);
3589 
3590   if (d->mb_trans)
3591     {
3592       state_num s;
3593       for (s = -1; s < d->tralloc; s++)
3594         free (d->mb_trans[s]);
3595       free (d->mb_trans - 2);
3596     }
3597 }
3598 
3599 
3600 bool
3601 dfasupported (struct dfa const *d)
     
3602 {
3603   for (idx_t i = 0; i < d->tindex; i++)
3604     {
3605       switch (d->tokens[i])
3606         {
3607         case BEGWORD:
3608         case ENDWORD:
3609         case LIMWORD:
3610         case NOTLIMWORD:
3611           if (!d->localeinfo.multibyte)
3612             continue;
3613           FALLTHROUGH;
3614         case BACKREF:
3615         case MBCSET:
3616           return false;
3617         }
3618     }
3619   return true;
3620 }
3621 
3622 
3623 
3624 static void
3625 maybe_disable_superset_dfa (struct dfa *d)
     
3626 {
3627   if (!d->localeinfo.using_utf8)
3628     return;
3629 
3630   bool have_backref = false;
3631   for (idx_t i = 0; i < d->tindex; i++)
3632     {
3633       switch (d->tokens[i])
3634         {
3635         case ANYCHAR:
3636           
3637           abort ();
3638         case BACKREF:
3639           have_backref = true;
3640           break;
3641         case MBCSET:
3642           
3643           return;
3644         default:
3645           break;
3646         }
3647     }
3648 
3649   if (!have_backref && d->superset)
3650     {
3651       
3652       dfafree (d->superset);
3653       free (d->superset);
3654       d->superset = NULL;
3655     }
3656 
3657   free_mbdata (d);
3658   d->localeinfo.multibyte = false;
3659   d->dfaexec = dfaexec_sb;
3660   d->fast = true;
3661 }
3662 
3663 static void
3664 dfassbuild (struct dfa *d)
     
3665 {
3666   struct dfa *sup = dfaalloc ();
3667 
3668   *sup = *d;
3669   sup->localeinfo.multibyte = false;
3670   sup->dfaexec = dfaexec_sb;
3671   sup->multibyte_prop = NULL;
3672   sup->superset = NULL;
3673   sup->states = NULL;
3674   sup->sindex = 0;
3675   sup->constraints = NULL;
3676   sup->separates = NULL;
3677   sup->follows = NULL;
3678   sup->tralloc = 0;
3679   sup->trans = NULL;
3680   sup->fails = NULL;
3681   sup->success = NULL;
3682   sup->newlines = NULL;
3683 
3684   sup->charclasses = xnmalloc (sup->calloc, sizeof *sup->charclasses);
3685   if (d->cindex)
3686     {
3687       memcpy (sup->charclasses, d->charclasses,
3688               d->cindex * sizeof *sup->charclasses);
3689     }
3690 
3691   sup->tokens = xnmalloc (d->tindex, 2 * sizeof *sup->tokens);
3692   sup->talloc = d->tindex * 2;
3693 
3694   bool have_achar = false;
3695   bool have_nchar = false;
3696   idx_t j;
3697   for (idx_t i = j = 0; i < d->tindex; i++)
3698     {
3699       switch (d->tokens[i])
3700         {
3701         case ANYCHAR:
3702         case MBCSET:
3703         case BACKREF:
3704           {
3705             charclass ccl;
3706             fillset (&ccl);
3707             sup->tokens[j++] = CSET + charclass_index (sup, &ccl);
3708             sup->tokens[j++] = STAR;
3709             if (d->tokens[i + 1] == QMARK || d->tokens[i + 1] == STAR
3710                 || d->tokens[i + 1] == PLUS)
3711               i++;
3712             have_achar = true;
3713           }
3714           break;
3715         case BEGWORD:
3716         case ENDWORD:
3717         case LIMWORD:
3718         case NOTLIMWORD:
3719           if (d->localeinfo.multibyte)
3720             {
3721               
3722 
3723               sup->tokens[j++] = EMPTY;
3724               break;
3725             }
3726           FALLTHROUGH;
3727         default:
3728           sup->tokens[j++] = d->tokens[i];
3729           if ((0 <= d->tokens[i] && d->tokens[i] < NOTCHAR)
3730               || d->tokens[i] >= CSET)
3731             have_nchar = true;
3732           break;
3733         }
3734     }
3735   sup->tindex = j;
3736 
3737   if (have_nchar && (have_achar || d->localeinfo.multibyte))
3738     d->superset = sup;
3739   else
3740     {
3741       dfafree (sup);
3742       free (sup);
3743     }
3744 }
3745 
3746 
3747 
3748 
3749 void
3750 dfacomp (char const *s, idx_t len, struct dfa *d, bool searchflag)
     
3751 {
3752   if (s != NULL)
3753     dfaparse (s, len, d);
3754 
3755   dfassbuild (d);
3756 
3757   if (dfasupported (d))
3758     {
3759       maybe_disable_superset_dfa (d);
3760       dfaanalyze (d, searchflag);
3761     }
3762   else
3763     {
3764       d->dfaexec = dfaexec_noop;
3765     }
3766 
3767   if (d->superset)
3768     {
3769       d->fast = true;
3770       dfaanalyze (d->superset, searchflag);
3771     }
3772 }
3773 
3774 
3775 void
3776 dfafree (struct dfa *d)
     
3777 {
3778   free (d->charclasses);
3779   free (d->tokens);
3780 
3781   if (d->localeinfo.multibyte)
3782     free_mbdata (d);
3783 
3784   free (d->constraints);
3785   free (d->separates);
3786 
3787   for (idx_t i = 0; i < d->sindex; i++)
3788     {
3789       free (d->states[i].elems.elems);
3790       free (d->states[i].mbps.elems);
3791     }
3792   free (d->states);
3793 
3794   if (d->follows)
3795     {
3796       for (idx_t i = 0; i < d->tindex; i++)
3797         free (d->follows[i].elems);
3798       free (d->follows);
3799     }
3800 
3801   if (d->trans)
3802     {
3803       for (idx_t i = 0; i < d->tralloc; i++)
3804         {
3805           free (d->trans[i]);
3806           free (d->fails[i]);
3807         }
3808 
3809       free (d->trans - 2);
3810       free (d->fails);
3811       free (d->newlines);
3812       free (d->success);
3813     }
3814 
3815   if (d->superset)
3816     {
3817       dfafree (d->superset);
3818       free (d->superset);
3819     }
3820 }
3821 
3822 
3823 
3824 
3825 
3826 
3827 
3828 
3829 
3830 
3831 
3832 
3833 
3834 
3835 
3836 
3837 
3838 
3839 
3840 
3841 
3842 
3843 
3844 
3845 
3846 
3847 
3848 
3849 
3850 
3851 
3852 
3853 
3854 
3855 
3856 
3857 
3858 
3859 
3860 
3861 
3862 
3863 
3864 
3865 
3866 
3867 
3868 
3869 
3870 
3871 
3872 
3873 
3874 
3875 
3876 
3877 
3878 
3879 
3880 
3881 
3882 
3883 
3884 
3885 
3886 
3887 
3888 
3889 
3890 
3891 
3892 
3893 
3894 
3895 
3896 
3897 
3898 
3899 
3900 
3901 
3902 
3903 
3904 static char *
3905 icatalloc (char *old, char const *new)
     
3906 {
3907   idx_t newsize = strlen (new);
3908   if (newsize == 0)
3909     return old;
3910   idx_t oldsize = strlen (old);
3911   char *result = xirealloc (old, oldsize + newsize + 1);
3912   memcpy (result + oldsize, new, newsize + 1);
3913   return result;
3914 }
3915 
3916 static void
3917 freelist (char **cpp)
     
3918 {
3919   while (*cpp)
3920     free (*cpp++);
3921 }
3922 
3923 static char **
3924 enlistnew (char **cpp, char *new)
     
3925 {
3926   
3927   idx_t i;
3928   for (i = 0; cpp[i] != NULL; i++)
3929     if (strstr (cpp[i], new) != NULL)
3930       {
3931         free (new);
3932         return cpp;
3933       }
3934   
3935   for (idx_t j = 0; cpp[j] != NULL; )
3936     if (strstr (new, cpp[j]) == NULL)
3937       ++j;
3938     else
3939       {
3940         free (cpp[j]);
3941         if (--i == j)
3942           break;
3943         cpp[j] = cpp[i];
3944         cpp[i] = NULL;
3945       }
3946   
3947   cpp = xreallocarray (cpp, i + 2, sizeof *cpp);
3948   cpp[i] = new;
3949   cpp[i + 1] = NULL;
3950   return cpp;
3951 }
3952 
3953 static char **
3954 enlist (char **cpp, char const *str, idx_t len)
     
3955 {
3956   return enlistnew (cpp, ximemdup0 (str, len));
3957 }
3958 
3959 
3960 
3961 static char **
3962 comsubs (char *left, char const *right)
     
3963 {
3964   char **cpp = xzalloc (sizeof *cpp);
3965 
3966   for (char *lcp = left; *lcp != '\0'; lcp++)
3967     {
3968       idx_t len = 0;
3969       char *rcp = strchr (right, *lcp);
3970       while (rcp != NULL)
3971         {
3972           idx_t i;
3973           for (i = 1; lcp[i] != '\0' && lcp[i] == rcp[i]; ++i)
3974             continue;
3975           if (i > len)
3976             len = i;
3977           rcp = strchr (rcp + 1, *lcp);
3978         }
3979       if (len != 0)
3980         cpp = enlist (cpp, lcp, len);
3981     }
3982   return cpp;
3983 }
3984 
3985 static char **
3986 addlists (char **old, char **new)
     
3987 {
3988   for (; *new; new++)
3989     old = enlistnew (old, xstrdup (*new));
3990   return old;
3991 }
3992 
3993 
3994 
3995 static char **
3996 inboth (char **left, char **right)
     
3997 {
3998   char **both = xzalloc (sizeof *both);
3999 
4000   for (idx_t lnum = 0; left[lnum] != NULL; lnum++)
4001     {
4002       for (idx_t rnum = 0; right[rnum] != NULL; rnum++)
4003         {
4004           char **temp = comsubs (left[lnum], right[rnum]);
4005           both = addlists (both, temp);
4006           freelist (temp);
4007           free (temp);
4008         }
4009     }
4010   return both;
4011 }
4012 
4013 typedef struct must must;
4014 
4015 struct must
4016 {
4017   char **in;
4018   char *left;
4019   char *right;
4020   char *is;
4021   bool begline;
4022   bool endline;
4023   must *prev;
4024 };
4025 
4026 static must *
4027 allocmust (must *mp, idx_t size)
     
4028 {
4029   must *new_mp = xmalloc (sizeof *new_mp);
4030   new_mp->in = xzalloc (sizeof *new_mp->in);
4031   new_mp->left = xizalloc (size);
4032   new_mp->right = xizalloc (size);
4033   new_mp->is = xizalloc (size);
4034   new_mp->begline = false;
4035   new_mp->endline = false;
4036   new_mp->prev = mp;
4037   return new_mp;
4038 }
4039 
4040 static void
4041 resetmust (must *mp)
     
4042 {
4043   freelist (mp->in);
4044   mp->in[0] = NULL;
4045   mp->left[0] = mp->right[0] = mp->is[0] = '\0';
4046   mp->begline = false;
4047   mp->endline = false;
4048 }
4049 
4050 static void
4051 freemust (must *mp)
     
4052 {
4053   freelist (mp->in);
4054   free (mp->in);
4055   free (mp->left);
4056   free (mp->right);
4057   free (mp->is);
4058   free (mp);
4059 }
4060 
4061 struct dfamust *
4062 dfamust (struct dfa const *d)
     
4063 {
4064   must *mp = NULL;
4065   char const *result = "";
4066   bool exact = false;
4067   bool begline = false;
4068   bool endline = false;
4069   bool need_begline = false;
4070   bool need_endline = false;
4071   bool case_fold_unibyte = d->syntax.case_fold & !d->localeinfo.multibyte;
4072 
4073   for (idx_t ri = 1; ri + 1 < d->tindex; ri++)
4074     {
4075       token t = d->tokens[ri];
4076       switch (t)
4077         {
4078         case BEGLINE:
4079           mp = allocmust (mp, 2);
4080           mp->begline = true;
4081           need_begline = true;
4082           break;
4083         case ENDLINE:
4084           mp = allocmust (mp, 2);
4085           mp->endline = true;
4086           need_endline = true;
4087           break;
4088         case LPAREN:
4089         case RPAREN:
4090           assert (!"neither LPAREN nor RPAREN may appear here");
4091 
4092         case EMPTY:
4093         case BEGWORD:
4094         case ENDWORD:
4095         case LIMWORD:
4096         case NOTLIMWORD:
4097         case BACKREF:
4098         case ANYCHAR:
4099         case MBCSET:
4100           mp = allocmust (mp, 2);
4101           break;
4102 
4103         case STAR:
4104         case QMARK:
4105           assume_nonnull (mp);
4106           resetmust (mp);
4107           break;
4108 
4109         case OR:
4110           {
4111             char **new;
4112             must *rmp = mp;
4113             assume_nonnull (rmp);
4114             must *lmp = mp = mp->prev;
4115             assume_nonnull (lmp);
4116             idx_t j, ln, rn, n;
4117 
4118             
4119             if (streq (lmp->is, rmp->is))
4120               {
4121                 lmp->begline &= rmp->begline;
4122                 lmp->endline &= rmp->endline;
4123               }
4124             else
4125               {
4126                 lmp->is[0] = '\0';
4127                 lmp->begline = false;
4128                 lmp->endline = false;
4129               }
4130             
4131             idx_t i = 0;
4132             while (lmp->left[i] != '\0' && lmp->left[i] == rmp->left[i])
4133               ++i;
4134             lmp->left[i] = '\0';
4135             
4136             ln = strlen (lmp->right);
4137             rn = strlen (rmp->right);
4138             n = ln;
4139             if (n > rn)
4140               n = rn;
4141             for (i = 0; i < n; ++i)
4142               if (lmp->right[ln - i - 1] != rmp->right[rn - i - 1])
4143                 break;
4144             for (j = 0; j < i; ++j)
4145               lmp->right[j] = lmp->right[(ln - i) + j];
4146             lmp->right[j] = '\0';
4147             new = inboth (lmp->in, rmp->in);
4148             freelist (lmp->in);
4149             free (lmp->in);
4150             lmp->in = new;
4151             freemust (rmp);
4152           }
4153           break;
4154 
4155         case PLUS:
4156           assume_nonnull (mp);
4157           mp->is[0] = '\0';
4158           break;
4159 
4160         case END:
4161           assume_nonnull (mp);
4162           assert (!mp->prev);
4163           for (idx_t i = 0; mp->in[i] != NULL; i++)
4164             if (strlen (mp->in[i]) > strlen (result))
4165               result = mp->in[i];
4166           if (streq (result, mp->is))
4167             {
4168               if ((!need_begline || mp->begline) && (!need_endline
4169                                                      || mp->endline))
4170                 exact = true;
4171               begline = mp->begline;
4172               endline = mp->endline;
4173             }
4174           goto done;
4175 
4176         case CAT:
4177           {
4178             must *rmp = mp;
4179             assume_nonnull (rmp);
4180             must *lmp = mp = mp->prev;
4181             assume_nonnull (lmp);
4182 
4183             
4184 
4185 
4186             lmp->in = addlists (lmp->in, rmp->in);
4187             if (lmp->right[0] != '\0' && rmp->left[0] != '\0')
4188               {
4189                 idx_t lrlen = strlen (lmp->right);
4190                 idx_t rllen = strlen (rmp->left);
4191                 char *tp = ximalloc (lrlen + rllen + 1);
4192                 memcpy (tp + lrlen, rmp->left, rllen + 1);
4193                 memcpy (tp, lmp->right, lrlen);
4194                 lmp->in = enlistnew (lmp->in, tp);
4195               }
4196             
4197             if (lmp->is[0] != '\0')
4198               lmp->left = icatalloc (lmp->left, rmp->left);
4199             
4200             if (rmp->is[0] == '\0')
4201               lmp->right[0] = '\0';
4202             lmp->right = icatalloc (lmp->right, rmp->right);
4203             
4204             if ((lmp->is[0] != '\0' || lmp->begline)
4205                 && (rmp->is[0] != '\0' || rmp->endline))
4206               {
4207                 lmp->is = icatalloc (lmp->is, rmp->is);
4208                 lmp->endline = rmp->endline;
4209               }
4210             else
4211               {
4212                 lmp->is[0] = '\0';
4213                 lmp->begline = false;
4214                 lmp->endline = false;
4215               }
4216             freemust (rmp);
4217           }
4218           break;
4219 
4220         case '\0':
4221           
4222           goto done;
4223 
4224         default:
4225           if (CSET <= t)
4226             {
4227               
4228 
4229 
4230 
4231               charclass *ccl = &d->charclasses[t - CSET];
4232               int j;
4233               for (j = 0; j < NOTCHAR; j++)
4234                 if (tstbit (j, ccl))
4235                   break;
4236               if (! (j < NOTCHAR))
4237                 {
4238                   mp = allocmust (mp, 2);
4239                   break;
4240                 }
4241               t = j;
4242               while (++j < NOTCHAR)
4243                 if (tstbit (j, ccl)
4244                     && ! (case_fold_unibyte
4245                           && toupper (j) == toupper (t)))
4246                   break;
4247               if (j < NOTCHAR)
4248                 {
4249                   mp = allocmust (mp, 2);
4250                   break;
4251                 }
4252             }
4253 
4254           idx_t rj = ri + 2;
4255           if (d->tokens[ri + 1] == CAT)
4256             {
4257               for (; rj < d->tindex - 1; rj += 2)
4258                 {
4259                   if ((rj != ri && (d->tokens[rj] <= 0
4260                                     || NOTCHAR <= d->tokens[rj]))
4261                       || d->tokens[rj + 1] != CAT)
4262                     break;
4263                 }
4264             }
4265           mp = allocmust (mp, ((rj - ri) >> 1) + 1);
4266           mp->is[0] = mp->left[0] = mp->right[0]
4267             = case_fold_unibyte ? toupper (t) : t;
4268 
4269           idx_t i;
4270           for (i = 1; ri + 2 < rj; i++)
4271             {
4272               ri += 2;
4273               t = d->tokens[ri];
4274               mp->is[i] = mp->left[i] = mp->right[i]
4275                 = case_fold_unibyte ? toupper (t) : t;
4276             }
4277           mp->is[i] = mp->left[i] = mp->right[i] = '\0';
4278           mp->in = enlist (mp->in, mp->is, i);
4279           break;
4280         }
4281     }
4282  done:;
4283 
4284   struct dfamust *dm = NULL;
4285   if (*result)
4286     {
4287       dm = xmalloc (FLEXSIZEOF (struct dfamust, must, strlen (result) + 1));
4288       dm->exact = exact;
4289       dm->begline = begline;
4290       dm->endline = endline;
4291       strcpy (dm->must, result);
4292     }
4293 
4294   while (mp)
4295     {
4296       must *prev = mp->prev;
4297       freemust (mp);
4298       mp = prev;
4299     }
4300 
4301   return dm;
4302 }
4303 
4304 void
4305 dfamustfree (struct dfamust *dm)
     
4306 {
4307   free (dm);
4308 }
4309 
4310 struct dfa *
4311 dfaalloc (void)
     
4312 {
4313   return xmalloc (sizeof (struct dfa));
4314 }
4315 
4316 
4317 void
4318 dfasyntax (struct dfa *dfa, struct localeinfo const *linfo,
     
4319            reg_syntax_t bits, int dfaopts)
4320 {
4321   memset (dfa, 0, offsetof (struct dfa, dfaexec));
4322   dfa->dfaexec = linfo->multibyte ? dfaexec_mb : dfaexec_sb;
4323   dfa->localeinfo = *linfo;
4324 
4325   dfa->fast = !dfa->localeinfo.multibyte;
4326 
4327   dfa->canychar = -1;
4328   dfa->syntax.syntax_bits_set = true;
4329   dfa->syntax.case_fold = (bits & RE_ICASE) != 0;
4330   dfa->syntax.anchor = (dfaopts & DFA_ANCHOR) != 0;
4331   dfa->syntax.eolbyte = dfaopts & DFA_EOL_NUL ? '\0' : '\n';
4332   dfa->syntax.syntax_bits = bits;
4333 
4334   for (int i = CHAR_MIN; i <= CHAR_MAX; ++i)
4335     {
4336       unsigned char uc = i;
4337 
4338       dfa->syntax.sbit[uc] = char_context (dfa, uc);
4339       switch (dfa->syntax.sbit[uc])
4340         {
4341         case CTX_LETTER:
4342           setbit (uc, &dfa->syntax.letters);
4343           break;
4344         case CTX_NEWLINE:
4345           setbit (uc, &dfa->syntax.newline);
4346           break;
4347         }
4348 
4349       
4350 
4351       dfa->syntax.never_trail[uc] = (dfa->localeinfo.using_utf8
4352                                      ? (uc & 0xc0) != 0x80
4353                                      : strchr ("\n\r./", uc) != NULL);
4354     }
4355 }
4356 
4357 
4358 void
4359 dfacopysyntax (struct dfa *to, struct dfa const *from)
     
4360 {
4361   memset (to, 0, offsetof (struct dfa, syntax));
4362   to->canychar = -1;
4363   to->fast = from->fast;
4364   to->syntax = from->syntax;
4365   to->dfaexec = from->dfaexec;
4366   to->localeinfo = from->localeinfo;
4367 }
4368 
4369