root/maint/gnulib/lib/regexec.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. regexec
  2. libc_hidden_def
  3. re_match
  4. weak_alias
  5. weak_alias
  6. weak_alias
  7. weak_alias
  8. re_search_stub
  9. re_copy_regs
  10. re_set_registers
  11. weak_alias
  12. re_search_internal
  13. prune_impossible_nodes
  14. acquire_init_state_context
  15. check_matching
  16. check_halt_node_context
  17. check_halt_state_context
  18. proceed_next_node
  19. push_fail_stack
  20. pop_fail_stack
  21. set_regs
  22. free_fail_stack_return
  23. update_regs
  24. sift_states_backward
  25. build_sifted_states
  26. clean_state_log_if_needed
  27. merge_state_array
  28. update_cur_sifted_state
  29. add_epsilon_src_nodes
  30. sub_epsilon_src_nodes
  31. check_dst_limits
  32. check_dst_limits_calc_pos_1
  33. check_dst_limits_calc_pos
  34. check_subexp_limits
  35. sift_states_bkref
  36. sift_states_iter_mb
  37. transit_state
  38. merge_state_with_log
  39. find_recover_state
  40. check_subexp_matching_top
  41. transit_state_sb
  42. transit_state_mb
  43. transit_state_bkref
  44. get_subexp
  45. get_subexp_sub
  46. find_subexp_node
  47. check_arrival
  48. check_arrival_add_next_nodes
  49. check_arrival_expand_ecl
  50. check_arrival_expand_ecl_sub
  51. expand_bkref_cache
  52. build_trtable
  53. group_nodes_into_DFAstates
  54. check_node_accept_bytes
  55. find_collation_sequence_value
  56. check_node_accept
  57. extend_buffers
  58. match_ctx_init
  59. match_ctx_clean
  60. match_ctx_free
  61. match_ctx_add_entry
  62. search_cur_bkref_entry
  63. match_ctx_add_subtop
  64. match_ctx_add_sublast
  65. sift_ctx_init

   1 /* Extended regular expression matching and search library.
   2    Copyright (C) 2002-2021 Free Software Foundation, Inc.
   3    This file is part of the GNU C Library.
   4    Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
   5 
   6    The GNU C Library is free software; you can redistribute it and/or
   7    modify it under the terms of the GNU Lesser General Public
   8    License as published by the Free Software Foundation; either
   9    version 2.1 of the License, or (at your option) any later version.
  10 
  11    The GNU C Library is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14    Lesser General Public License for more details.
  15 
  16    You should have received a copy of the GNU Lesser General Public
  17    License along with the GNU C Library; if not, see
  18    <https://www.gnu.org/licenses/>.  */
  19 
  20 static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
  21                                      Idx n);
  22 static void match_ctx_clean (re_match_context_t *mctx);
  23 static void match_ctx_free (re_match_context_t *cache);
  24 static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, Idx node,
  25                                           Idx str_idx, Idx from, Idx to);
  26 static Idx search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx);
  27 static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, Idx node,
  28                                            Idx str_idx);
  29 static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
  30                                                     Idx node, Idx str_idx);
  31 static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
  32                            re_dfastate_t **limited_sts, Idx last_node,
  33                            Idx last_str_idx);
  34 static reg_errcode_t re_search_internal (const regex_t *preg,
  35                                          const char *string, Idx length,
  36                                          Idx start, Idx last_start, Idx stop,
  37                                          size_t nmatch, regmatch_t pmatch[],
  38                                          int eflags);
  39 static regoff_t re_search_2_stub (struct re_pattern_buffer *bufp,
  40                                   const char *string1, Idx length1,
  41                                   const char *string2, Idx length2,
  42                                   Idx start, regoff_t range,
  43                                   struct re_registers *regs,
  44                                   Idx stop, bool ret_len);
  45 static regoff_t re_search_stub (struct re_pattern_buffer *bufp,
  46                                 const char *string, Idx length, Idx start,
  47                                 regoff_t range, Idx stop,
  48                                 struct re_registers *regs,
  49                                 bool ret_len);
  50 static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
  51                               Idx nregs, int regs_allocated);
  52 static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx);
  53 static Idx check_matching (re_match_context_t *mctx, bool fl_longest_match,
  54                            Idx *p_match_first);
  55 static Idx check_halt_state_context (const re_match_context_t *mctx,
  56                                      const re_dfastate_t *state, Idx idx);
  57 static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
  58                          regmatch_t *prev_idx_match, Idx cur_node,
  59                          Idx cur_idx, Idx nmatch);
  60 static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
  61                                       Idx str_idx, Idx dest_node, Idx nregs,
  62                                       regmatch_t *regs, regmatch_t *prevregs,
  63                                       re_node_set *eps_via_nodes);
  64 static reg_errcode_t set_regs (const regex_t *preg,
  65                                const re_match_context_t *mctx,
  66                                size_t nmatch, regmatch_t *pmatch,
  67                                bool fl_backtrack);
  68 static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs);
  69 
  70 static int sift_states_iter_mb (const re_match_context_t *mctx,
  71                                 re_sift_context_t *sctx,
  72                                 Idx node_idx, Idx str_idx, Idx max_str_idx);
  73 static reg_errcode_t sift_states_backward (const re_match_context_t *mctx,
  74                                            re_sift_context_t *sctx);
  75 static reg_errcode_t build_sifted_states (const re_match_context_t *mctx,
  76                                           re_sift_context_t *sctx, Idx str_idx,
  77                                           re_node_set *cur_dest);
  78 static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx,
  79                                               re_sift_context_t *sctx,
  80                                               Idx str_idx,
  81                                               re_node_set *dest_nodes);
  82 static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa,
  83                                             re_node_set *dest_nodes,
  84                                             const re_node_set *candidates);
  85 static bool check_dst_limits (const re_match_context_t *mctx,
  86                               const re_node_set *limits,
  87                               Idx dst_node, Idx dst_idx, Idx src_node,
  88                               Idx src_idx);
  89 static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx,
  90                                         int boundaries, Idx subexp_idx,
  91                                         Idx from_node, Idx bkref_idx);
  92 static int check_dst_limits_calc_pos (const re_match_context_t *mctx,
  93                                       Idx limit, Idx subexp_idx,
  94                                       Idx node, Idx str_idx,
  95                                       Idx bkref_idx);
  96 static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa,
  97                                           re_node_set *dest_nodes,
  98                                           const re_node_set *candidates,
  99                                           re_node_set *limits,
 100                                           struct re_backref_cache_entry *bkref_ents,
 101                                           Idx str_idx);
 102 static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx,
 103                                         re_sift_context_t *sctx,
 104                                         Idx str_idx, const re_node_set *candidates);
 105 static reg_errcode_t merge_state_array (const re_dfa_t *dfa,
 106                                         re_dfastate_t **dst,
 107                                         re_dfastate_t **src, Idx num);
 108 static re_dfastate_t *find_recover_state (reg_errcode_t *err,
 109                                          re_match_context_t *mctx);
 110 static re_dfastate_t *transit_state (reg_errcode_t *err,
 111                                      re_match_context_t *mctx,
 112                                      re_dfastate_t *state);
 113 static re_dfastate_t *merge_state_with_log (reg_errcode_t *err,
 114                                             re_match_context_t *mctx,
 115                                             re_dfastate_t *next_state);
 116 static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
 117                                                 re_node_set *cur_nodes,
 118                                                 Idx str_idx);
 119 #if 0
 120 static re_dfastate_t *transit_state_sb (reg_errcode_t *err,
 121                                         re_match_context_t *mctx,
 122                                         re_dfastate_t *pstate);
 123 #endif
 124 static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
 125                                        re_dfastate_t *pstate);
 126 static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
 127                                           const re_node_set *nodes);
 128 static reg_errcode_t get_subexp (re_match_context_t *mctx,
 129                                  Idx bkref_node, Idx bkref_str_idx);
 130 static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
 131                                      const re_sub_match_top_t *sub_top,
 132                                      re_sub_match_last_t *sub_last,
 133                                      Idx bkref_node, Idx bkref_str);
 134 static Idx find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
 135                              Idx subexp_idx, int type);
 136 static reg_errcode_t check_arrival (re_match_context_t *mctx,
 137                                     state_array_t *path, Idx top_node,
 138                                     Idx top_str, Idx last_node, Idx last_str,
 139                                     int type);
 140 static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
 141                                                    Idx str_idx,
 142                                                    re_node_set *cur_nodes,
 143                                                    re_node_set *next_nodes);
 144 static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa,
 145                                                re_node_set *cur_nodes,
 146                                                Idx ex_subexp, int type);
 147 static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa,
 148                                                    re_node_set *dst_nodes,
 149                                                    Idx target, Idx ex_subexp,
 150                                                    int type);
 151 static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
 152                                          re_node_set *cur_nodes, Idx cur_str,
 153                                          Idx subexp_num, int type);
 154 static bool build_trtable (const re_dfa_t *dfa, re_dfastate_t *state);
 155 static int check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
 156                                     const re_string_t *input, Idx idx);
 157 #ifdef _LIBC
 158 static unsigned int find_collation_sequence_value (const unsigned char *mbs,
 159                                                    size_t name_len);
 160 #endif
 161 static Idx group_nodes_into_DFAstates (const re_dfa_t *dfa,
 162                                        const re_dfastate_t *state,
 163                                        re_node_set *states_node,
 164                                        bitset_t *states_ch);
 165 static bool check_node_accept (const re_match_context_t *mctx,
 166                                const re_token_t *node, Idx idx);
 167 static reg_errcode_t extend_buffers (re_match_context_t *mctx, int min_len);
 168 
 169 /* Entry point for POSIX code.  */
 170 
 171 /* regexec searches for a given pattern, specified by PREG, in the
 172    string STRING.
 173 
 174    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
 175    'regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
 176    least NMATCH elements, and we set them to the offsets of the
 177    corresponding matched substrings.
 178 
 179    EFLAGS specifies "execution flags" which affect matching: if
 180    REG_NOTBOL is set, then ^ does not match at the beginning of the
 181    string; if REG_NOTEOL is set, then $ does not match at the end.
 182 
 183    Return 0 if a match is found, REG_NOMATCH if not, REG_BADPAT if
 184    EFLAGS is invalid.  */
 185 
 186 int
 187 regexec (const regex_t *__restrict preg, const char *__restrict string,
     /* [previous][next][first][last][top][bottom][index][help] */
 188          size_t nmatch, regmatch_t pmatch[_REGEX_NELTS (nmatch)], int eflags)
 189 {
 190   reg_errcode_t err;
 191   Idx start, length;
 192   re_dfa_t *dfa = preg->buffer;
 193 
 194   if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))
 195     return REG_BADPAT;
 196 
 197   if (eflags & REG_STARTEND)
 198     {
 199       start = pmatch[0].rm_so;
 200       length = pmatch[0].rm_eo;
 201     }
 202   else
 203     {
 204       start = 0;
 205       length = strlen (string);
 206     }
 207 
 208   lock_lock (dfa->lock);
 209   if (preg->no_sub)
 210     err = re_search_internal (preg, string, length, start, length,
 211                               length, 0, NULL, eflags);
 212   else
 213     err = re_search_internal (preg, string, length, start, length,
 214                               length, nmatch, pmatch, eflags);
 215   lock_unlock (dfa->lock);
 216   return err != REG_NOERROR;
 217 }
 218 
 219 #ifdef _LIBC
 220 libc_hidden_def (__regexec)
     /* [previous][next][first][last][top][bottom][index][help] */
 221 
 222 # include <shlib-compat.h>
 223 versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
 224 
 225 # if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
 226 __typeof__ (__regexec) __compat_regexec;
 227 
 228 int
 229 attribute_compat_text_section
 230 __compat_regexec (const regex_t *__restrict preg,
 231                   const char *__restrict string, size_t nmatch,
 232                   regmatch_t pmatch[_REGEX_NELTS (nmatch)], int eflags)
 233 {
 234   return regexec (preg, string, nmatch, pmatch,
 235                   eflags & (REG_NOTBOL | REG_NOTEOL));
 236 }
 237 compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
 238 # endif
 239 #endif
 240 
 241 /* Entry points for GNU code.  */
 242 
 243 /* re_match, re_search, re_match_2, re_search_2
 244 
 245    The former two functions operate on STRING with length LENGTH,
 246    while the later two operate on concatenation of STRING1 and STRING2
 247    with lengths LENGTH1 and LENGTH2, respectively.
 248 
 249    re_match() matches the compiled pattern in BUFP against the string,
 250    starting at index START.
 251 
 252    re_search() first tries matching at index START, then it tries to match
 253    starting from index START + 1, and so on.  The last start position tried
 254    is START + RANGE.  (Thus RANGE = 0 forces re_search to operate the same
 255    way as re_match().)
 256 
 257    The parameter STOP of re_{match,search}_2 specifies that no match exceeding
 258    the first STOP characters of the concatenation of the strings should be
 259    concerned.
 260 
 261    If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
 262    and all groups is stored in REGS.  (For the "_2" variants, the offsets are
 263    computed relative to the concatenation, not relative to the individual
 264    strings.)
 265 
 266    On success, re_match* functions return the length of the match, re_search*
 267    return the position of the start of the match.  They return -1 on
 268    match failure, -2 on error.  */
 269 
 270 regoff_t
 271 re_match (struct re_pattern_buffer *bufp, const char *string, Idx length,
     /* [previous][next][first][last][top][bottom][index][help] */
 272           Idx start, struct re_registers *regs)
 273 {
 274   return re_search_stub (bufp, string, length, start, 0, length, regs, true);
 275 }
 276 #ifdef _LIBC
 277 weak_alias (__re_match, re_match)
     /* [previous][next][first][last][top][bottom][index][help] */
 278 #endif
 279 
 280 regoff_t
 281 re_search (struct re_pattern_buffer *bufp, const char *string, Idx length,
 282            Idx start, regoff_t range, struct re_registers *regs)
 283 {
 284   return re_search_stub (bufp, string, length, start, range, length, regs,
 285                          false);
 286 }
 287 #ifdef _LIBC
 288 weak_alias (__re_search, re_search)
     /* [previous][next][first][last][top][bottom][index][help] */
 289 #endif
 290 
 291 regoff_t
 292 re_match_2 (struct re_pattern_buffer *bufp, const char *string1, Idx length1,
 293             const char *string2, Idx length2, Idx start,
 294             struct re_registers *regs, Idx stop)
 295 {
 296   return re_search_2_stub (bufp, string1, length1, string2, length2,
 297                            start, 0, regs, stop, true);
 298 }
 299 #ifdef _LIBC
 300 weak_alias (__re_match_2, re_match_2)
     /* [previous][next][first][last][top][bottom][index][help] */
 301 #endif
 302 
 303 regoff_t
 304 re_search_2 (struct re_pattern_buffer *bufp, const char *string1, Idx length1,
 305              const char *string2, Idx length2, Idx start, regoff_t range,
 306              struct re_registers *regs, Idx stop)
 307 {
 308   return re_search_2_stub (bufp, string1, length1, string2, length2,
 309                            start, range, regs, stop, false);
 310 }
 311 #ifdef _LIBC
 312 weak_alias (__re_search_2, re_search_2)
     /* [previous][next][first][last][top][bottom][index][help] */
 313 #endif
 314 
 315 static regoff_t
 316 re_search_2_stub (struct re_pattern_buffer *bufp, const char *string1,
 317                   Idx length1, const char *string2, Idx length2, Idx start,
 318                   regoff_t range, struct re_registers *regs,
 319                   Idx stop, bool ret_len)
 320 {
 321   const char *str;
 322   regoff_t rval;
 323   Idx len;
 324   char *s = NULL;
 325 
 326   if (__glibc_unlikely ((length1 < 0 || length2 < 0 || stop < 0
 327                          || INT_ADD_WRAPV (length1, length2, &len))))
 328     return -2;
 329 
 330   /* Concatenate the strings.  */
 331   if (length2 > 0)
 332     if (length1 > 0)
 333       {
 334         s = re_malloc (char, len);
 335 
 336         if (__glibc_unlikely (s == NULL))
 337           return -2;
 338 #ifdef _LIBC
 339         memcpy (__mempcpy (s, string1, length1), string2, length2);
 340 #else
 341         memcpy (s, string1, length1);
 342         memcpy (s + length1, string2, length2);
 343 #endif
 344         str = s;
 345       }
 346     else
 347       str = string2;
 348   else
 349     str = string1;
 350 
 351   rval = re_search_stub (bufp, str, len, start, range, stop, regs,
 352                          ret_len);
 353   re_free (s);
 354   return rval;
 355 }
 356 
 357 /* The parameters have the same meaning as those of re_search.
 358    Additional parameters:
 359    If RET_LEN is true the length of the match is returned (re_match style);
 360    otherwise the position of the match is returned.  */
 361 
 362 static regoff_t
 363 re_search_stub (struct re_pattern_buffer *bufp, const char *string, Idx length,
     /* [previous][next][first][last][top][bottom][index][help] */
 364                 Idx start, regoff_t range, Idx stop, struct re_registers *regs,
 365                 bool ret_len)
 366 {
 367   reg_errcode_t result;
 368   regmatch_t *pmatch;
 369   Idx nregs;
 370   regoff_t rval;
 371   int eflags = 0;
 372   re_dfa_t *dfa = bufp->buffer;
 373   Idx last_start = start + range;
 374 
 375   /* Check for out-of-range.  */
 376   if (__glibc_unlikely (start < 0 || start > length))
 377     return -1;
 378   if (__glibc_unlikely (length < last_start
 379                         || (0 <= range && last_start < start)))
 380     last_start = length;
 381   else if (__glibc_unlikely (last_start < 0
 382                              || (range < 0 && start <= last_start)))
 383     last_start = 0;
 384 
 385   lock_lock (dfa->lock);
 386 
 387   eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
 388   eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
 389 
 390   /* Compile fastmap if we haven't yet.  */
 391   if (start < last_start && bufp->fastmap != NULL && !bufp->fastmap_accurate)
 392     re_compile_fastmap (bufp);
 393 
 394   if (__glibc_unlikely (bufp->no_sub))
 395     regs = NULL;
 396 
 397   /* We need at least 1 register.  */
 398   if (regs == NULL)
 399     nregs = 1;
 400   else if (__glibc_unlikely (bufp->regs_allocated == REGS_FIXED
 401                              && regs->num_regs <= bufp->re_nsub))
 402     {
 403       nregs = regs->num_regs;
 404       if (__glibc_unlikely (nregs < 1))
 405         {
 406           /* Nothing can be copied to regs.  */
 407           regs = NULL;
 408           nregs = 1;
 409         }
 410     }
 411   else
 412     nregs = bufp->re_nsub + 1;
 413   pmatch = re_malloc (regmatch_t, nregs);
 414   if (__glibc_unlikely (pmatch == NULL))
 415     {
 416       rval = -2;
 417       goto out;
 418     }
 419 
 420   result = re_search_internal (bufp, string, length, start, last_start, stop,
 421                                nregs, pmatch, eflags);
 422 
 423   rval = 0;
 424 
 425   /* I hope we needn't fill their regs with -1's when no match was found.  */
 426   if (result != REG_NOERROR)
 427     rval = result == REG_NOMATCH ? -1 : -2;
 428   else if (regs != NULL)
 429     {
 430       /* If caller wants register contents data back, copy them.  */
 431       bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
 432                                            bufp->regs_allocated);
 433       if (__glibc_unlikely (bufp->regs_allocated == REGS_UNALLOCATED))
 434         rval = -2;
 435     }
 436 
 437   if (__glibc_likely (rval == 0))
 438     {
 439       if (ret_len)
 440         {
 441           DEBUG_ASSERT (pmatch[0].rm_so == start);
 442           rval = pmatch[0].rm_eo - start;
 443         }
 444       else
 445         rval = pmatch[0].rm_so;
 446     }
 447   re_free (pmatch);
 448  out:
 449   lock_unlock (dfa->lock);
 450   return rval;
 451 }
 452 
 453 static unsigned
 454 re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, Idx nregs,
     /* [previous][next][first][last][top][bottom][index][help] */
 455               int regs_allocated)
 456 {
 457   int rval = REGS_REALLOCATE;
 458   Idx i;
 459   Idx need_regs = nregs + 1;
 460   /* We need one extra element beyond 'num_regs' for the '-1' marker GNU code
 461      uses.  */
 462 
 463   /* Have the register data arrays been allocated?  */
 464   if (regs_allocated == REGS_UNALLOCATED)
 465     { /* No.  So allocate them with malloc.  */
 466       regs->start = re_malloc (regoff_t, need_regs);
 467       if (__glibc_unlikely (regs->start == NULL))
 468         return REGS_UNALLOCATED;
 469       regs->end = re_malloc (regoff_t, need_regs);
 470       if (__glibc_unlikely (regs->end == NULL))
 471         {
 472           re_free (regs->start);
 473           return REGS_UNALLOCATED;
 474         }
 475       regs->num_regs = need_regs;
 476     }
 477   else if (regs_allocated == REGS_REALLOCATE)
 478     { /* Yes.  If we need more elements than were already
 479          allocated, reallocate them.  If we need fewer, just
 480          leave it alone.  */
 481       if (__glibc_unlikely (need_regs > regs->num_regs))
 482         {
 483           regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);
 484           regoff_t *new_end;
 485           if (__glibc_unlikely (new_start == NULL))
 486             return REGS_UNALLOCATED;
 487           new_end = re_realloc (regs->end, regoff_t, need_regs);
 488           if (__glibc_unlikely (new_end == NULL))
 489             {
 490               re_free (new_start);
 491               return REGS_UNALLOCATED;
 492             }
 493           regs->start = new_start;
 494           regs->end = new_end;
 495           regs->num_regs = need_regs;
 496         }
 497     }
 498   else
 499     {
 500       DEBUG_ASSERT (regs_allocated == REGS_FIXED);
 501       /* This function may not be called with REGS_FIXED and nregs too big.  */
 502       DEBUG_ASSERT (nregs <= regs->num_regs);
 503       rval = REGS_FIXED;
 504     }
 505 
 506   /* Copy the regs.  */
 507   for (i = 0; i < nregs; ++i)
 508     {
 509       regs->start[i] = pmatch[i].rm_so;
 510       regs->end[i] = pmatch[i].rm_eo;
 511     }
 512   for ( ; i < regs->num_regs; ++i)
 513     regs->start[i] = regs->end[i] = -1;
 514 
 515   return rval;
 516 }
 517 
 518 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
 519    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
 520    this memory for recording register information.  STARTS and ENDS
 521    must be allocated using the malloc library routine, and must each
 522    be at least NUM_REGS * sizeof (regoff_t) bytes long.
 523 
 524    If NUM_REGS == 0, then subsequent matches should allocate their own
 525    register data.
 526 
 527    Unless this function is called, the first search or match using
 528    PATTERN_BUFFER will allocate its own register data, without
 529    freeing the old data.  */
 530 
 531 void
 532 re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs,
     /* [previous][next][first][last][top][bottom][index][help] */
 533                   __re_size_t num_regs, regoff_t *starts, regoff_t *ends)
 534 {
 535   if (num_regs)
 536     {
 537       bufp->regs_allocated = REGS_REALLOCATE;
 538       regs->num_regs = num_regs;
 539       regs->start = starts;
 540       regs->end = ends;
 541     }
 542   else
 543     {
 544       bufp->regs_allocated = REGS_UNALLOCATED;
 545       regs->num_regs = 0;
 546       regs->start = regs->end = NULL;
 547     }
 548 }
 549 #ifdef _LIBC
 550 weak_alias (__re_set_registers, re_set_registers)
     /* [previous][next][first][last][top][bottom][index][help] */
 551 #endif
 552 
 553 /* Entry points compatible with 4.2 BSD regex library.  We don't define
 554    them unless specifically requested.  */
 555 
 556 #if defined _REGEX_RE_COMP || defined _LIBC
 557 int
 558 # ifdef _LIBC
 559 weak_function
 560 # endif
 561 re_exec (const char *s)
 562 {
 563   return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
 564 }
 565 #endif /* _REGEX_RE_COMP */
 566 
 567 /* Internal entry point.  */
 568 
 569 /* Searches for a compiled pattern PREG in the string STRING, whose
 570    length is LENGTH.  NMATCH, PMATCH, and EFLAGS have the same
 571    meaning as with regexec.  LAST_START is START + RANGE, where
 572    START and RANGE have the same meaning as with re_search.
 573    Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
 574    otherwise return the error code.
 575    Note: We assume front end functions already check ranges.
 576    (0 <= LAST_START && LAST_START <= LENGTH)  */
 577 
 578 static reg_errcode_t
 579 __attribute_warn_unused_result__
 580 re_search_internal (const regex_t *preg, const char *string, Idx length,
     /* [previous][next][first][last][top][bottom][index][help] */
 581                     Idx start, Idx last_start, Idx stop, size_t nmatch,
 582                     regmatch_t pmatch[], int eflags)
 583 {
 584   reg_errcode_t err;
 585   const re_dfa_t *dfa = preg->buffer;
 586   Idx left_lim, right_lim;
 587   int incr;
 588   bool fl_longest_match;
 589   int match_kind;
 590   Idx match_first;
 591   Idx match_last = -1;
 592   Idx extra_nmatch;
 593   bool sb;
 594   int ch;
 595   re_match_context_t mctx = { .dfa = dfa };
 596   char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate
 597                     && start != last_start && !preg->can_be_null)
 598                    ? preg->fastmap : NULL);
 599   RE_TRANSLATE_TYPE t = preg->translate;
 600 
 601   extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;
 602   nmatch -= extra_nmatch;
 603 
 604   /* Check if the DFA haven't been compiled.  */
 605   if (__glibc_unlikely (preg->used == 0 || dfa->init_state == NULL
 606                         || dfa->init_state_word == NULL
 607                         || dfa->init_state_nl == NULL
 608                         || dfa->init_state_begbuf == NULL))
 609     return REG_NOMATCH;
 610 
 611   /* We assume front-end functions already check them.  */
 612   DEBUG_ASSERT (0 <= last_start && last_start <= length);
 613 
 614   /* If initial states with non-begbuf contexts have no elements,
 615      the regex must be anchored.  If preg->newline_anchor is set,
 616      we'll never use init_state_nl, so do not check it.  */
 617   if (dfa->init_state->nodes.nelem == 0
 618       && dfa->init_state_word->nodes.nelem == 0
 619       && (dfa->init_state_nl->nodes.nelem == 0
 620           || !preg->newline_anchor))
 621     {
 622       if (start != 0 && last_start != 0)
 623         return REG_NOMATCH;
 624       start = last_start = 0;
 625     }
 626 
 627   /* We must check the longest matching, if nmatch > 0.  */
 628   fl_longest_match = (nmatch != 0 || dfa->nbackref);
 629 
 630   err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1,
 631                             preg->translate, (preg->syntax & RE_ICASE) != 0,
 632                             dfa);
 633   if (__glibc_unlikely (err != REG_NOERROR))
 634     goto free_return;
 635   mctx.input.stop = stop;
 636   mctx.input.raw_stop = stop;
 637   mctx.input.newline_anchor = preg->newline_anchor;
 638 
 639   err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
 640   if (__glibc_unlikely (err != REG_NOERROR))
 641     goto free_return;
 642 
 643   /* We will log all the DFA states through which the dfa pass,
 644      if nmatch > 1, or this dfa has "multibyte node", which is a
 645      back-reference or a node which can accept multibyte character or
 646      multi character collating element.  */
 647   if (nmatch > 1 || dfa->has_mb_node)
 648     {
 649       /* Avoid overflow.  */
 650       if (__glibc_unlikely ((MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *))
 651                              <= mctx.input.bufs_len)))
 652         {
 653           err = REG_ESPACE;
 654           goto free_return;
 655         }
 656 
 657       mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1);
 658       if (__glibc_unlikely (mctx.state_log == NULL))
 659         {
 660           err = REG_ESPACE;
 661           goto free_return;
 662         }
 663     }
 664 
 665   match_first = start;
 666   mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
 667                            : CONTEXT_NEWLINE | CONTEXT_BEGBUF;
 668 
 669   /* Check incrementally whether the input string matches.  */
 670   incr = (last_start < start) ? -1 : 1;
 671   left_lim = (last_start < start) ? last_start : start;
 672   right_lim = (last_start < start) ? start : last_start;
 673   sb = dfa->mb_cur_max == 1;
 674   match_kind =
 675     (fastmap
 676      ? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0)
 677         | (start <= last_start ? 2 : 0)
 678         | (t != NULL ? 1 : 0))
 679      : 8);
 680 
 681   for (;; match_first += incr)
 682     {
 683       err = REG_NOMATCH;
 684       if (match_first < left_lim || right_lim < match_first)
 685         goto free_return;
 686 
 687       /* Advance as rapidly as possible through the string, until we
 688          find a plausible place to start matching.  This may be done
 689          with varying efficiency, so there are various possibilities:
 690          only the most common of them are specialized, in order to
 691          save on code size.  We use a switch statement for speed.  */
 692       switch (match_kind)
 693         {
 694         case 8:
 695           /* No fastmap.  */
 696           break;
 697 
 698         case 7:
 699           /* Fastmap with single-byte translation, match forward.  */
 700           while (__glibc_likely (match_first < right_lim)
 701                  && !fastmap[t[(unsigned char) string[match_first]]])
 702             ++match_first;
 703           goto forward_match_found_start_or_reached_end;
 704 
 705         case 6:
 706           /* Fastmap without translation, match forward.  */
 707           while (__glibc_likely (match_first < right_lim)
 708                  && !fastmap[(unsigned char) string[match_first]])
 709             ++match_first;
 710 
 711         forward_match_found_start_or_reached_end:
 712           if (__glibc_unlikely (match_first == right_lim))
 713             {
 714               ch = match_first >= length
 715                        ? 0 : (unsigned char) string[match_first];
 716               if (!fastmap[t ? t[ch] : ch])
 717                 goto free_return;
 718             }
 719           break;
 720 
 721         case 4:
 722         case 5:
 723           /* Fastmap without multi-byte translation, match backwards.  */
 724           while (match_first >= left_lim)
 725             {
 726               ch = match_first >= length
 727                        ? 0 : (unsigned char) string[match_first];
 728               if (fastmap[t ? t[ch] : ch])
 729                 break;
 730               --match_first;
 731             }
 732           if (match_first < left_lim)
 733             goto free_return;
 734           break;
 735 
 736         default:
 737           /* In this case, we can't determine easily the current byte,
 738              since it might be a component byte of a multibyte
 739              character.  Then we use the constructed buffer instead.  */
 740           for (;;)
 741             {
 742               /* If MATCH_FIRST is out of the valid range, reconstruct the
 743                  buffers.  */
 744               __re_size_t offset = match_first - mctx.input.raw_mbs_idx;
 745               if (__glibc_unlikely (offset
 746                                     >= (__re_size_t) mctx.input.valid_raw_len))
 747                 {
 748                   err = re_string_reconstruct (&mctx.input, match_first,
 749                                                eflags);
 750                   if (__glibc_unlikely (err != REG_NOERROR))
 751                     goto free_return;
 752 
 753                   offset = match_first - mctx.input.raw_mbs_idx;
 754                 }
 755               /* Use buffer byte if OFFSET is in buffer, otherwise '\0'.  */
 756               ch = (offset < mctx.input.valid_len
 757                     ? re_string_byte_at (&mctx.input, offset) : 0);
 758               if (fastmap[ch])
 759                 break;
 760               match_first += incr;
 761               if (match_first < left_lim || match_first > right_lim)
 762                 {
 763                   err = REG_NOMATCH;
 764                   goto free_return;
 765                 }
 766             }
 767           break;
 768         }
 769 
 770       /* Reconstruct the buffers so that the matcher can assume that
 771          the matching starts from the beginning of the buffer.  */
 772       err = re_string_reconstruct (&mctx.input, match_first, eflags);
 773       if (__glibc_unlikely (err != REG_NOERROR))
 774         goto free_return;
 775 
 776       /* Don't consider this char as a possible match start if it part,
 777          yet isn't the head, of a multibyte character.  */
 778       if (!sb && !re_string_first_byte (&mctx.input, 0))
 779         continue;
 780 
 781       /* It seems to be appropriate one, then use the matcher.  */
 782       /* We assume that the matching starts from 0.  */
 783       mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
 784       match_last = check_matching (&mctx, fl_longest_match,
 785                                    start <= last_start ? &match_first : NULL);
 786       if (match_last != -1)
 787         {
 788           if (__glibc_unlikely (match_last == -2))
 789             {
 790               err = REG_ESPACE;
 791               goto free_return;
 792             }
 793           else
 794             {
 795               mctx.match_last = match_last;
 796               if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
 797                 {
 798                   re_dfastate_t *pstate = mctx.state_log[match_last];
 799                   mctx.last_node = check_halt_state_context (&mctx, pstate,
 800                                                              match_last);
 801                 }
 802               if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
 803                   || dfa->nbackref)
 804                 {
 805                   err = prune_impossible_nodes (&mctx);
 806                   if (err == REG_NOERROR)
 807                     break;
 808                   if (__glibc_unlikely (err != REG_NOMATCH))
 809                     goto free_return;
 810                   match_last = -1;
 811                 }
 812               else
 813                 break; /* We found a match.  */
 814             }
 815         }
 816 
 817       match_ctx_clean (&mctx);
 818     }
 819 
 820   DEBUG_ASSERT (match_last != -1);
 821   DEBUG_ASSERT (err == REG_NOERROR);
 822 
 823   /* Set pmatch[] if we need.  */
 824   if (nmatch > 0)
 825     {
 826       Idx reg_idx;
 827 
 828       /* Initialize registers.  */
 829       for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)
 830         pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
 831 
 832       /* Set the points where matching start/end.  */
 833       pmatch[0].rm_so = 0;
 834       pmatch[0].rm_eo = mctx.match_last;
 835       /* FIXME: This function should fail if mctx.match_last exceeds
 836          the maximum possible regoff_t value.  We need a new error
 837          code REG_OVERFLOW.  */
 838 
 839       if (!preg->no_sub && nmatch > 1)
 840         {
 841           err = set_regs (preg, &mctx, nmatch, pmatch,
 842                           dfa->has_plural_match && dfa->nbackref > 0);
 843           if (__glibc_unlikely (err != REG_NOERROR))
 844             goto free_return;
 845         }
 846 
 847       /* At last, add the offset to each register, since we slid
 848          the buffers so that we could assume that the matching starts
 849          from 0.  */
 850       for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
 851         if (pmatch[reg_idx].rm_so != -1)
 852           {
 853             if (__glibc_unlikely (mctx.input.offsets_needed != 0))
 854               {
 855                 pmatch[reg_idx].rm_so =
 856                   (pmatch[reg_idx].rm_so == mctx.input.valid_len
 857                    ? mctx.input.valid_raw_len
 858                    : mctx.input.offsets[pmatch[reg_idx].rm_so]);
 859                 pmatch[reg_idx].rm_eo =
 860                   (pmatch[reg_idx].rm_eo == mctx.input.valid_len
 861                    ? mctx.input.valid_raw_len
 862                    : mctx.input.offsets[pmatch[reg_idx].rm_eo]);
 863               }
 864             pmatch[reg_idx].rm_so += match_first;
 865             pmatch[reg_idx].rm_eo += match_first;
 866           }
 867       for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)
 868         {
 869           pmatch[nmatch + reg_idx].rm_so = -1;
 870           pmatch[nmatch + reg_idx].rm_eo = -1;
 871         }
 872 
 873       if (dfa->subexp_map)
 874         for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
 875           if (dfa->subexp_map[reg_idx] != reg_idx)
 876             {
 877               pmatch[reg_idx + 1].rm_so
 878                 = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
 879               pmatch[reg_idx + 1].rm_eo
 880                 = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
 881             }
 882     }
 883 
 884  free_return:
 885   re_free (mctx.state_log);
 886   if (dfa->nbackref)
 887     match_ctx_free (&mctx);
 888   re_string_destruct (&mctx.input);
 889   return err;
 890 }
 891 
 892 static reg_errcode_t
 893 __attribute_warn_unused_result__
 894 prune_impossible_nodes (re_match_context_t *mctx)
     /* [previous][next][first][last][top][bottom][index][help] */
 895 {
 896   const re_dfa_t *const dfa = mctx->dfa;
 897   Idx halt_node, match_last;
 898   reg_errcode_t ret;
 899   re_dfastate_t **sifted_states;
 900   re_dfastate_t **lim_states = NULL;
 901   re_sift_context_t sctx;
 902   DEBUG_ASSERT (mctx->state_log != NULL);
 903   match_last = mctx->match_last;
 904   halt_node = mctx->last_node;
 905 
 906   /* Avoid overflow.  */
 907   if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *))
 908                         <= match_last))
 909     return REG_ESPACE;
 910 
 911   sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
 912   if (__glibc_unlikely (sifted_states == NULL))
 913     {
 914       ret = REG_ESPACE;
 915       goto free_return;
 916     }
 917   if (dfa->nbackref)
 918     {
 919       lim_states = re_malloc (re_dfastate_t *, match_last + 1);
 920       if (__glibc_unlikely (lim_states == NULL))
 921         {
 922           ret = REG_ESPACE;
 923           goto free_return;
 924         }
 925       while (1)
 926         {
 927           memset (lim_states, '\0',
 928                   sizeof (re_dfastate_t *) * (match_last + 1));
 929           sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
 930                          match_last);
 931           ret = sift_states_backward (mctx, &sctx);
 932           re_node_set_free (&sctx.limits);
 933           if (__glibc_unlikely (ret != REG_NOERROR))
 934               goto free_return;
 935           if (sifted_states[0] != NULL || lim_states[0] != NULL)
 936             break;
 937           do
 938             {
 939               --match_last;
 940               if (match_last < 0)
 941                 {
 942                   ret = REG_NOMATCH;
 943                   goto free_return;
 944                 }
 945             } while (mctx->state_log[match_last] == NULL
 946                      || !mctx->state_log[match_last]->halt);
 947           halt_node = check_halt_state_context (mctx,
 948                                                 mctx->state_log[match_last],
 949                                                 match_last);
 950         }
 951       ret = merge_state_array (dfa, sifted_states, lim_states,
 952                                match_last + 1);
 953       re_free (lim_states);
 954       lim_states = NULL;
 955       if (__glibc_unlikely (ret != REG_NOERROR))
 956         goto free_return;
 957     }
 958   else
 959     {
 960       sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);
 961       ret = sift_states_backward (mctx, &sctx);
 962       re_node_set_free (&sctx.limits);
 963       if (__glibc_unlikely (ret != REG_NOERROR))
 964         goto free_return;
 965       if (sifted_states[0] == NULL)
 966         {
 967           ret = REG_NOMATCH;
 968           goto free_return;
 969         }
 970     }
 971   re_free (mctx->state_log);
 972   mctx->state_log = sifted_states;
 973   sifted_states = NULL;
 974   mctx->last_node = halt_node;
 975   mctx->match_last = match_last;
 976   ret = REG_NOERROR;
 977  free_return:
 978   re_free (sifted_states);
 979   re_free (lim_states);
 980   return ret;
 981 }
 982 
 983 /* Acquire an initial state and return it.
 984    We must select appropriate initial state depending on the context,
 985    since initial states may have constraints like "\<", "^", etc..  */
 986 
 987 static __always_inline re_dfastate_t *
 988 acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
     /* [previous][next][first][last][top][bottom][index][help] */
 989                             Idx idx)
 990 {
 991   const re_dfa_t *const dfa = mctx->dfa;
 992   if (dfa->init_state->has_constraint)
 993     {
 994       unsigned int context;
 995       context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags);
 996       if (IS_WORD_CONTEXT (context))
 997         return dfa->init_state_word;
 998       else if (IS_ORDINARY_CONTEXT (context))
 999         return dfa->init_state;
1000       else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
1001         return dfa->init_state_begbuf;
1002       else if (IS_NEWLINE_CONTEXT (context))
1003         return dfa->init_state_nl;
1004       else if (IS_BEGBUF_CONTEXT (context))
1005         {
1006           /* It is relatively rare case, then calculate on demand.  */
1007           return re_acquire_state_context (err, dfa,
1008                                            dfa->init_state->entrance_nodes,
1009                                            context);
1010         }
1011       else
1012         /* Must not happen?  */
1013         return dfa->init_state;
1014     }
1015   else
1016     return dfa->init_state;
1017 }
1018 
1019 /* Check whether the regular expression match input string INPUT or not,
1020    and return the index where the matching end.  Return -1 if
1021    there is no match, and return -2 in case of an error.
1022    FL_LONGEST_MATCH means we want the POSIX longest matching.
1023    If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
1024    next place where we may want to try matching.
1025    Note that the matcher assumes that the matching starts from the current
1026    index of the buffer.  */
1027 
1028 static Idx
1029 __attribute_warn_unused_result__
1030 check_matching (re_match_context_t *mctx, bool fl_longest_match,
     /* [previous][next][first][last][top][bottom][index][help] */
1031                 Idx *p_match_first)
1032 {
1033   const re_dfa_t *const dfa = mctx->dfa;
1034   reg_errcode_t err;
1035   Idx match = 0;
1036   Idx match_last = -1;
1037   Idx cur_str_idx = re_string_cur_idx (&mctx->input);
1038   re_dfastate_t *cur_state;
1039   bool at_init_state = p_match_first != NULL;
1040   Idx next_start_idx = cur_str_idx;
1041 
1042   err = REG_NOERROR;
1043   cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
1044   /* An initial state must not be NULL (invalid).  */
1045   if (__glibc_unlikely (cur_state == NULL))
1046     {
1047       DEBUG_ASSERT (err == REG_ESPACE);
1048       return -2;
1049     }
1050 
1051   if (mctx->state_log != NULL)
1052     {
1053       mctx->state_log[cur_str_idx] = cur_state;
1054 
1055       /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
1056          later.  E.g. Processing back references.  */
1057       if (__glibc_unlikely (dfa->nbackref))
1058         {
1059           at_init_state = false;
1060           err = check_subexp_matching_top (mctx, &cur_state->nodes, 0);
1061           if (__glibc_unlikely (err != REG_NOERROR))
1062             return err;
1063 
1064           if (cur_state->has_backref)
1065             {
1066               err = transit_state_bkref (mctx, &cur_state->nodes);
1067               if (__glibc_unlikely (err != REG_NOERROR))
1068                 return err;
1069             }
1070         }
1071     }
1072 
1073   /* If the RE accepts NULL string.  */
1074   if (__glibc_unlikely (cur_state->halt))
1075     {
1076       if (!cur_state->has_constraint
1077           || check_halt_state_context (mctx, cur_state, cur_str_idx))
1078         {
1079           if (!fl_longest_match)
1080             return cur_str_idx;
1081           else
1082             {
1083               match_last = cur_str_idx;
1084               match = 1;
1085             }
1086         }
1087     }
1088 
1089   while (!re_string_eoi (&mctx->input))
1090     {
1091       re_dfastate_t *old_state = cur_state;
1092       Idx next_char_idx = re_string_cur_idx (&mctx->input) + 1;
1093 
1094       if ((__glibc_unlikely (next_char_idx >= mctx->input.bufs_len)
1095            && mctx->input.bufs_len < mctx->input.len)
1096           || (__glibc_unlikely (next_char_idx >= mctx->input.valid_len)
1097               && mctx->input.valid_len < mctx->input.len))
1098         {
1099           err = extend_buffers (mctx, next_char_idx + 1);
1100           if (__glibc_unlikely (err != REG_NOERROR))
1101             {
1102               DEBUG_ASSERT (err == REG_ESPACE);
1103               return -2;
1104             }
1105         }
1106 
1107       cur_state = transit_state (&err, mctx, cur_state);
1108       if (mctx->state_log != NULL)
1109         cur_state = merge_state_with_log (&err, mctx, cur_state);
1110 
1111       if (cur_state == NULL)
1112         {
1113           /* Reached the invalid state or an error.  Try to recover a valid
1114              state using the state log, if available and if we have not
1115              already found a valid (even if not the longest) match.  */
1116           if (__glibc_unlikely (err != REG_NOERROR))
1117             return -2;
1118 
1119           if (mctx->state_log == NULL
1120               || (match && !fl_longest_match)
1121               || (cur_state = find_recover_state (&err, mctx)) == NULL)
1122             break;
1123         }
1124 
1125       if (__glibc_unlikely (at_init_state))
1126         {
1127           if (old_state == cur_state)
1128             next_start_idx = next_char_idx;
1129           else
1130             at_init_state = false;
1131         }
1132 
1133       if (cur_state->halt)
1134         {
1135           /* Reached a halt state.
1136              Check the halt state can satisfy the current context.  */
1137           if (!cur_state->has_constraint
1138               || check_halt_state_context (mctx, cur_state,
1139                                            re_string_cur_idx (&mctx->input)))
1140             {
1141               /* We found an appropriate halt state.  */
1142               match_last = re_string_cur_idx (&mctx->input);
1143               match = 1;
1144 
1145               /* We found a match, do not modify match_first below.  */
1146               p_match_first = NULL;
1147               if (!fl_longest_match)
1148                 break;
1149             }
1150         }
1151     }
1152 
1153   if (p_match_first)
1154     *p_match_first += next_start_idx;
1155 
1156   return match_last;
1157 }
1158 
1159 /* Check NODE match the current context.  */
1160 
1161 static bool
1162 check_halt_node_context (const re_dfa_t *dfa, Idx node, unsigned int context)
     /* [previous][next][first][last][top][bottom][index][help] */
1163 {
1164   re_token_type_t type = dfa->nodes[node].type;
1165   unsigned int constraint = dfa->nodes[node].constraint;
1166   if (type != END_OF_RE)
1167     return false;
1168   if (!constraint)
1169     return true;
1170   if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
1171     return false;
1172   return true;
1173 }
1174 
1175 /* Check the halt state STATE match the current context.
1176    Return 0 if not match, if the node, STATE has, is a halt node and
1177    match the context, return the node.  */
1178 
1179 static Idx
1180 check_halt_state_context (const re_match_context_t *mctx,
     /* [previous][next][first][last][top][bottom][index][help] */
1181                           const re_dfastate_t *state, Idx idx)
1182 {
1183   Idx i;
1184   unsigned int context;
1185   DEBUG_ASSERT (state->halt);
1186   context = re_string_context_at (&mctx->input, idx, mctx->eflags);
1187   for (i = 0; i < state->nodes.nelem; ++i)
1188     if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
1189       return state->nodes.elems[i];
1190   return 0;
1191 }
1192 
1193 /* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
1194    corresponding to the DFA).
1195    Return the destination node, and update EPS_VIA_NODES;
1196    return -1 on match failure, -2 on error.  */
1197 
1198 static Idx
1199 proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs,
     /* [previous][next][first][last][top][bottom][index][help] */
1200                    regmatch_t *prevregs,
1201                    Idx *pidx, Idx node, re_node_set *eps_via_nodes,
1202                    struct re_fail_stack_t *fs)
1203 {
1204   const re_dfa_t *const dfa = mctx->dfa;
1205   if (IS_EPSILON_NODE (dfa->nodes[node].type))
1206     {
1207       re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
1208       re_node_set *edests = &dfa->edests[node];
1209 
1210       if (! re_node_set_contains (eps_via_nodes, node))
1211         {
1212           bool ok = re_node_set_insert (eps_via_nodes, node);
1213           if (__glibc_unlikely (! ok))
1214             return -2;
1215         }
1216 
1217       /* Pick a valid destination, or return -1 if none is found.  */
1218       Idx dest_node = -1;
1219       for (Idx i = 0; i < edests->nelem; i++)
1220         {
1221           Idx candidate = edests->elems[i];
1222           if (!re_node_set_contains (cur_nodes, candidate))
1223             continue;
1224           if (dest_node == -1)
1225             dest_node = candidate;
1226 
1227           else
1228             {
1229               /* In order to avoid infinite loop like "(a*)*", return the second
1230                  epsilon-transition if the first was already considered.  */
1231               if (re_node_set_contains (eps_via_nodes, dest_node))
1232                 return candidate;
1233 
1234               /* Otherwise, push the second epsilon-transition on the fail stack.  */
1235               else if (fs != NULL
1236                        && push_fail_stack (fs, *pidx, candidate, nregs, regs,
1237                                            prevregs, eps_via_nodes))
1238                 return -2;
1239 
1240               /* We know we are going to exit.  */
1241               break;
1242             }
1243         }
1244       return dest_node;
1245     }
1246   else
1247     {
1248       Idx naccepted = 0;
1249       re_token_type_t type = dfa->nodes[node].type;
1250 
1251       if (dfa->nodes[node].accept_mb)
1252         naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
1253       else if (type == OP_BACK_REF)
1254         {
1255           Idx subexp_idx = dfa->nodes[node].opr.idx + 1;
1256           if (subexp_idx < nregs)
1257             naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
1258           if (fs != NULL)
1259             {
1260               if (subexp_idx >= nregs
1261                   || regs[subexp_idx].rm_so == -1
1262                   || regs[subexp_idx].rm_eo == -1)
1263                 return -1;
1264               else if (naccepted)
1265                 {
1266                   char *buf = (char *) re_string_get_buffer (&mctx->input);
1267                   if (mctx->input.valid_len - *pidx < naccepted
1268                       || (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
1269                                   naccepted)
1270                           != 0))
1271                     return -1;
1272                 }
1273             }
1274 
1275           if (naccepted == 0)
1276             {
1277               Idx dest_node;
1278               bool ok = re_node_set_insert (eps_via_nodes, node);
1279               if (__glibc_unlikely (! ok))
1280                 return -2;
1281               dest_node = dfa->edests[node].elems[0];
1282               if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1283                                         dest_node))
1284                 return dest_node;
1285             }
1286         }
1287 
1288       if (naccepted != 0
1289           || check_node_accept (mctx, dfa->nodes + node, *pidx))
1290         {
1291           Idx dest_node = dfa->nexts[node];
1292           *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
1293           if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
1294                      || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1295                                                dest_node)))
1296             return -1;
1297           re_node_set_empty (eps_via_nodes);
1298           return dest_node;
1299         }
1300     }
1301   return -1;
1302 }
1303 
1304 static reg_errcode_t
1305 __attribute_warn_unused_result__
1306 push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node,
     /* [previous][next][first][last][top][bottom][index][help] */
1307                  Idx nregs, regmatch_t *regs, regmatch_t *prevregs,
1308                  re_node_set *eps_via_nodes)
1309 {
1310   reg_errcode_t err;
1311   Idx num = fs->num++;
1312   if (fs->num == fs->alloc)
1313     {
1314       struct re_fail_stack_ent_t *new_array;
1315       new_array = re_realloc (fs->stack, struct re_fail_stack_ent_t,
1316                               fs->alloc * 2);
1317       if (new_array == NULL)
1318         return REG_ESPACE;
1319       fs->alloc *= 2;
1320       fs->stack = new_array;
1321     }
1322   fs->stack[num].idx = str_idx;
1323   fs->stack[num].node = dest_node;
1324   fs->stack[num].regs = re_malloc (regmatch_t, 2 * nregs);
1325   if (fs->stack[num].regs == NULL)
1326     return REG_ESPACE;
1327   memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
1328   memcpy (fs->stack[num].regs + nregs, prevregs, sizeof (regmatch_t) * nregs);
1329   err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
1330   return err;
1331 }
1332 
1333 static Idx
1334 pop_fail_stack (struct re_fail_stack_t *fs, Idx *pidx, Idx nregs,
     /* [previous][next][first][last][top][bottom][index][help] */
1335                 regmatch_t *regs, regmatch_t *prevregs,
1336                 re_node_set *eps_via_nodes)
1337 {
1338   if (fs == NULL || fs->num == 0)
1339     return -1;
1340   Idx num = --fs->num;
1341   *pidx = fs->stack[num].idx;
1342   memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
1343   memcpy (prevregs, fs->stack[num].regs + nregs, sizeof (regmatch_t) * nregs);
1344   re_node_set_free (eps_via_nodes);
1345   re_free (fs->stack[num].regs);
1346   *eps_via_nodes = fs->stack[num].eps_via_nodes;
1347   DEBUG_ASSERT (0 <= fs->stack[num].node);
1348   return fs->stack[num].node;
1349 }
1350 
1351 
1352 #define DYNARRAY_STRUCT  regmatch_list
1353 #define DYNARRAY_ELEMENT regmatch_t
1354 #define DYNARRAY_PREFIX  regmatch_list_
1355 #include <malloc/dynarray-skeleton.c>
1356 
1357 /* Set the positions where the subexpressions are starts/ends to registers
1358    PMATCH.
1359    Note: We assume that pmatch[0] is already set, and
1360    pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch.  */
1361 
1362 static reg_errcode_t
1363 __attribute_warn_unused_result__
1364 set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
     /* [previous][next][first][last][top][bottom][index][help] */
1365           regmatch_t *pmatch, bool fl_backtrack)
1366 {
1367   const re_dfa_t *dfa = preg->buffer;
1368   Idx idx, cur_node;
1369   re_node_set eps_via_nodes;
1370   struct re_fail_stack_t *fs;
1371   struct re_fail_stack_t fs_body = { 0, 2, NULL };
1372   struct regmatch_list prev_match;
1373   regmatch_list_init (&prev_match);
1374 
1375   DEBUG_ASSERT (nmatch > 1);
1376   DEBUG_ASSERT (mctx->state_log != NULL);
1377   if (fl_backtrack)
1378     {
1379       fs = &fs_body;
1380       fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
1381       if (fs->stack == NULL)
1382         return REG_ESPACE;
1383     }
1384   else
1385     fs = NULL;
1386 
1387   cur_node = dfa->init_node;
1388   re_node_set_init_empty (&eps_via_nodes);
1389 
1390   if (!regmatch_list_resize (&prev_match, nmatch))
1391     {
1392       regmatch_list_free (&prev_match);
1393       free_fail_stack_return (fs);
1394       return REG_ESPACE;
1395     }
1396   regmatch_t *prev_idx_match = regmatch_list_begin (&prev_match);
1397   memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1398 
1399   for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
1400     {
1401       update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
1402 
1403       if ((idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
1404           || (fs && re_node_set_contains (&eps_via_nodes, cur_node)))
1405         {
1406           Idx reg_idx;
1407           cur_node = -1;
1408           if (fs)
1409             {
1410               for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
1411                 if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
1412                   {
1413                     cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1414                                                prev_idx_match, &eps_via_nodes);
1415                     break;
1416                   }
1417             }
1418           if (cur_node < 0)
1419             {
1420               re_node_set_free (&eps_via_nodes);
1421               regmatch_list_free (&prev_match);
1422               return free_fail_stack_return (fs);
1423             }
1424         }
1425 
1426       /* Proceed to next node.  */
1427       cur_node = proceed_next_node (mctx, nmatch, pmatch, prev_idx_match,
1428                                     &idx, cur_node,
1429                                     &eps_via_nodes, fs);
1430 
1431       if (__glibc_unlikely (cur_node < 0))
1432         {
1433           if (__glibc_unlikely (cur_node == -2))
1434             {
1435               re_node_set_free (&eps_via_nodes);
1436               regmatch_list_free (&prev_match);
1437               free_fail_stack_return (fs);
1438               return REG_ESPACE;
1439             }
1440           cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1441                                      prev_idx_match, &eps_via_nodes);
1442           if (cur_node < 0)
1443             {
1444               re_node_set_free (&eps_via_nodes);
1445               regmatch_list_free (&prev_match);
1446               free_fail_stack_return (fs);
1447               return REG_NOMATCH;
1448             }
1449         }
1450     }
1451   re_node_set_free (&eps_via_nodes);
1452   regmatch_list_free (&prev_match);
1453   return free_fail_stack_return (fs);
1454 }
1455 
1456 static reg_errcode_t
1457 free_fail_stack_return (struct re_fail_stack_t *fs)
     /* [previous][next][first][last][top][bottom][index][help] */
1458 {
1459   if (fs)
1460     {
1461       Idx fs_idx;
1462       for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
1463         {
1464           re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
1465           re_free (fs->stack[fs_idx].regs);
1466         }
1467       re_free (fs->stack);
1468     }
1469   return REG_NOERROR;
1470 }
1471 
1472 static void
1473 update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
     /* [previous][next][first][last][top][bottom][index][help] */
1474              regmatch_t *prev_idx_match, Idx cur_node, Idx cur_idx, Idx nmatch)
1475 {
1476   int type = dfa->nodes[cur_node].type;
1477   if (type == OP_OPEN_SUBEXP)
1478     {
1479       Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
1480 
1481       /* We are at the first node of this sub expression.  */
1482       if (reg_num < nmatch)
1483         {
1484           pmatch[reg_num].rm_so = cur_idx;
1485           pmatch[reg_num].rm_eo = -1;
1486         }
1487     }
1488   else if (type == OP_CLOSE_SUBEXP)
1489     {
1490       /* We are at the last node of this sub expression.  */
1491       Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
1492       if (reg_num < nmatch)
1493         {
1494           if (pmatch[reg_num].rm_so < cur_idx)
1495             {
1496               pmatch[reg_num].rm_eo = cur_idx;
1497               /* This is a non-empty match or we are not inside an optional
1498                  subexpression.  Accept this right away.  */
1499               memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1500             }
1501           else
1502             {
1503               if (dfa->nodes[cur_node].opt_subexp
1504                   && prev_idx_match[reg_num].rm_so != -1)
1505                 /* We transited through an empty match for an optional
1506                    subexpression, like (a?)*, and this is not the subexp's
1507                    first match.  Copy back the old content of the registers
1508                    so that matches of an inner subexpression are undone as
1509                    well, like in ((a?))*.  */
1510                 memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
1511               else
1512                 /* We completed a subexpression, but it may be part of
1513                    an optional one, so do not update PREV_IDX_MATCH.  */
1514                 pmatch[reg_num].rm_eo = cur_idx;
1515             }
1516         }
1517     }
1518 }
1519 
1520 /* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
1521    and sift the nodes in each states according to the following rules.
1522    Updated state_log will be wrote to STATE_LOG.
1523 
1524    Rules: We throw away the Node 'a' in the STATE_LOG[STR_IDX] if...
1525      1. When STR_IDX == MATCH_LAST(the last index in the state_log):
1526         If 'a' isn't the LAST_NODE and 'a' can't epsilon transit to
1527         the LAST_NODE, we throw away the node 'a'.
1528      2. When 0 <= STR_IDX < MATCH_LAST and 'a' accepts
1529         string 's' and transit to 'b':
1530         i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
1531            away the node 'a'.
1532         ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
1533             thrown away, we throw away the node 'a'.
1534      3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
1535         i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
1536            node 'a'.
1537         ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
1538             we throw away the node 'a'.  */
1539 
1540 #define STATE_NODE_CONTAINS(state,node) \
1541   ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
1542 
1543 static reg_errcode_t
1544 sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx)
     /* [previous][next][first][last][top][bottom][index][help] */
1545 {
1546   reg_errcode_t err;
1547   int null_cnt = 0;
1548   Idx str_idx = sctx->last_str_idx;
1549   re_node_set cur_dest;
1550 
1551   DEBUG_ASSERT (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
1552 
1553   /* Build sifted state_log[str_idx].  It has the nodes which can epsilon
1554      transit to the last_node and the last_node itself.  */
1555   err = re_node_set_init_1 (&cur_dest, sctx->last_node);
1556   if (__glibc_unlikely (err != REG_NOERROR))
1557     return err;
1558   err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1559   if (__glibc_unlikely (err != REG_NOERROR))
1560     goto free_return;
1561 
1562   /* Then check each states in the state_log.  */
1563   while (str_idx > 0)
1564     {
1565       /* Update counters.  */
1566       null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
1567       if (null_cnt > mctx->max_mb_elem_len)
1568         {
1569           memset (sctx->sifted_states, '\0',
1570                   sizeof (re_dfastate_t *) * str_idx);
1571           re_node_set_free (&cur_dest);
1572           return REG_NOERROR;
1573         }
1574       re_node_set_empty (&cur_dest);
1575       --str_idx;
1576 
1577       if (mctx->state_log[str_idx])
1578         {
1579           err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
1580           if (__glibc_unlikely (err != REG_NOERROR))
1581             goto free_return;
1582         }
1583 
1584       /* Add all the nodes which satisfy the following conditions:
1585          - It can epsilon transit to a node in CUR_DEST.
1586          - It is in CUR_SRC.
1587          And update state_log.  */
1588       err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1589       if (__glibc_unlikely (err != REG_NOERROR))
1590         goto free_return;
1591     }
1592   err = REG_NOERROR;
1593  free_return:
1594   re_node_set_free (&cur_dest);
1595   return err;
1596 }
1597 
1598 static reg_errcode_t
1599 __attribute_warn_unused_result__
1600 build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx,
     /* [previous][next][first][last][top][bottom][index][help] */
1601                      Idx str_idx, re_node_set *cur_dest)
1602 {
1603   const re_dfa_t *const dfa = mctx->dfa;
1604   const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
1605   Idx i;
1606 
1607   /* Then build the next sifted state.
1608      We build the next sifted state on 'cur_dest', and update
1609      'sifted_states[str_idx]' with 'cur_dest'.
1610      Note:
1611      'cur_dest' is the sifted state from 'state_log[str_idx + 1]'.
1612      'cur_src' points the node_set of the old 'state_log[str_idx]'
1613      (with the epsilon nodes pre-filtered out).  */
1614   for (i = 0; i < cur_src->nelem; i++)
1615     {
1616       Idx prev_node = cur_src->elems[i];
1617       int naccepted = 0;
1618       bool ok;
1619       DEBUG_ASSERT (!IS_EPSILON_NODE (dfa->nodes[prev_node].type));
1620 
1621       /* If the node may accept "multi byte".  */
1622       if (dfa->nodes[prev_node].accept_mb)
1623         naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
1624                                          str_idx, sctx->last_str_idx);
1625 
1626       /* We don't check backreferences here.
1627          See update_cur_sifted_state().  */
1628       if (!naccepted
1629           && check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
1630           && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
1631                                   dfa->nexts[prev_node]))
1632         naccepted = 1;
1633 
1634       if (naccepted == 0)
1635         continue;
1636 
1637       if (sctx->limits.nelem)
1638         {
1639           Idx to_idx = str_idx + naccepted;
1640           if (check_dst_limits (mctx, &sctx->limits,
1641                                 dfa->nexts[prev_node], to_idx,
1642                                 prev_node, str_idx))
1643             continue;
1644         }
1645       ok = re_node_set_insert (cur_dest, prev_node);
1646       if (__glibc_unlikely (! ok))
1647         return REG_ESPACE;
1648     }
1649 
1650   return REG_NOERROR;
1651 }
1652 
1653 /* Helper functions.  */
1654 
1655 static reg_errcode_t
1656 clean_state_log_if_needed (re_match_context_t *mctx, Idx next_state_log_idx)
     /* [previous][next][first][last][top][bottom][index][help] */
1657 {
1658   Idx top = mctx->state_log_top;
1659 
1660   if ((next_state_log_idx >= mctx->input.bufs_len
1661        && mctx->input.bufs_len < mctx->input.len)
1662       || (next_state_log_idx >= mctx->input.valid_len
1663           && mctx->input.valid_len < mctx->input.len))
1664     {
1665       reg_errcode_t err;
1666       err = extend_buffers (mctx, next_state_log_idx + 1);
1667       if (__glibc_unlikely (err != REG_NOERROR))
1668         return err;
1669     }
1670 
1671   if (top < next_state_log_idx)
1672     {
1673       DEBUG_ASSERT (mctx->state_log != NULL);
1674       memset (mctx->state_log + top + 1, '\0',
1675               sizeof (re_dfastate_t *) * (next_state_log_idx - top));
1676       mctx->state_log_top = next_state_log_idx;
1677     }
1678   return REG_NOERROR;
1679 }
1680 
1681 static reg_errcode_t
1682 merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst,
     /* [previous][next][first][last][top][bottom][index][help] */
1683                    re_dfastate_t **src, Idx num)
1684 {
1685   Idx st_idx;
1686   reg_errcode_t err;
1687   for (st_idx = 0; st_idx < num; ++st_idx)
1688     {
1689       if (dst[st_idx] == NULL)
1690         dst[st_idx] = src[st_idx];
1691       else if (src[st_idx] != NULL)
1692         {
1693           re_node_set merged_set;
1694           err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
1695                                         &src[st_idx]->nodes);
1696           if (__glibc_unlikely (err != REG_NOERROR))
1697             return err;
1698           dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
1699           re_node_set_free (&merged_set);
1700           if (__glibc_unlikely (err != REG_NOERROR))
1701             return err;
1702         }
1703     }
1704   return REG_NOERROR;
1705 }
1706 
1707 static reg_errcode_t
1708 update_cur_sifted_state (const re_match_context_t *mctx,
     /* [previous][next][first][last][top][bottom][index][help] */
1709                          re_sift_context_t *sctx, Idx str_idx,
1710                          re_node_set *dest_nodes)
1711 {
1712   const re_dfa_t *const dfa = mctx->dfa;
1713   reg_errcode_t err = REG_NOERROR;
1714   const re_node_set *candidates;
1715   candidates = ((mctx->state_log[str_idx] == NULL) ? NULL
1716                 : &mctx->state_log[str_idx]->nodes);
1717 
1718   if (dest_nodes->nelem == 0)
1719     sctx->sifted_states[str_idx] = NULL;
1720   else
1721     {
1722       if (candidates)
1723         {
1724           /* At first, add the nodes which can epsilon transit to a node in
1725              DEST_NODE.  */
1726           err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
1727           if (__glibc_unlikely (err != REG_NOERROR))
1728             return err;
1729 
1730           /* Then, check the limitations in the current sift_context.  */
1731           if (sctx->limits.nelem)
1732             {
1733               err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
1734                                          mctx->bkref_ents, str_idx);
1735               if (__glibc_unlikely (err != REG_NOERROR))
1736                 return err;
1737             }
1738         }
1739 
1740       sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
1741       if (__glibc_unlikely (err != REG_NOERROR))
1742         return err;
1743     }
1744 
1745   if (candidates && mctx->state_log[str_idx]->has_backref)
1746     {
1747       err = sift_states_bkref (mctx, sctx, str_idx, candidates);
1748       if (__glibc_unlikely (err != REG_NOERROR))
1749         return err;
1750     }
1751   return REG_NOERROR;
1752 }
1753 
1754 static reg_errcode_t
1755 __attribute_warn_unused_result__
1756 add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes,
     /* [previous][next][first][last][top][bottom][index][help] */
1757                        const re_node_set *candidates)
1758 {
1759   reg_errcode_t err = REG_NOERROR;
1760   Idx i;
1761 
1762   re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);
1763   if (__glibc_unlikely (err != REG_NOERROR))
1764     return err;
1765 
1766   if (!state->inveclosure.alloc)
1767     {
1768       err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
1769       if (__glibc_unlikely (err != REG_NOERROR))
1770         return REG_ESPACE;
1771       for (i = 0; i < dest_nodes->nelem; i++)
1772         {
1773           err = re_node_set_merge (&state->inveclosure,
1774                                    dfa->inveclosures + dest_nodes->elems[i]);
1775           if (__glibc_unlikely (err != REG_NOERROR))
1776             return REG_ESPACE;
1777         }
1778     }
1779   return re_node_set_add_intersect (dest_nodes, candidates,
1780                                     &state->inveclosure);
1781 }
1782 
1783 static reg_errcode_t
1784 sub_epsilon_src_nodes (const re_dfa_t *dfa, Idx node, re_node_set *dest_nodes,
     /* [previous][next][first][last][top][bottom][index][help] */
1785                        const re_node_set *candidates)
1786 {
1787     Idx ecl_idx;
1788     reg_errcode_t err;
1789     re_node_set *inv_eclosure = dfa->inveclosures + node;
1790     re_node_set except_nodes;
1791     re_node_set_init_empty (&except_nodes);
1792     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1793       {
1794         Idx cur_node = inv_eclosure->elems[ecl_idx];
1795         if (cur_node == node)
1796           continue;
1797         if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
1798           {
1799             Idx edst1 = dfa->edests[cur_node].elems[0];
1800             Idx edst2 = ((dfa->edests[cur_node].nelem > 1)
1801                          ? dfa->edests[cur_node].elems[1] : -1);
1802             if ((!re_node_set_contains (inv_eclosure, edst1)
1803                  && re_node_set_contains (dest_nodes, edst1))
1804                 || (edst2 > 0
1805                     && !re_node_set_contains (inv_eclosure, edst2)
1806                     && re_node_set_contains (dest_nodes, edst2)))
1807               {
1808                 err = re_node_set_add_intersect (&except_nodes, candidates,
1809                                                  dfa->inveclosures + cur_node);
1810                 if (__glibc_unlikely (err != REG_NOERROR))
1811                   {
1812                     re_node_set_free (&except_nodes);
1813                     return err;
1814                   }
1815               }
1816           }
1817       }
1818     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1819       {
1820         Idx cur_node = inv_eclosure->elems[ecl_idx];
1821         if (!re_node_set_contains (&except_nodes, cur_node))
1822           {
1823             Idx idx = re_node_set_contains (dest_nodes, cur_node) - 1;
1824             re_node_set_remove_at (dest_nodes, idx);
1825           }
1826       }
1827     re_node_set_free (&except_nodes);
1828     return REG_NOERROR;
1829 }
1830 
1831 static bool
1832 check_dst_limits (const re_match_context_t *mctx, const re_node_set *limits,
     /* [previous][next][first][last][top][bottom][index][help] */
1833                   Idx dst_node, Idx dst_idx, Idx src_node, Idx src_idx)
1834 {
1835   const re_dfa_t *const dfa = mctx->dfa;
1836   Idx lim_idx, src_pos, dst_pos;
1837 
1838   Idx dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);
1839   Idx src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);
1840   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
1841     {
1842       Idx subexp_idx;
1843       struct re_backref_cache_entry *ent;
1844       ent = mctx->bkref_ents + limits->elems[lim_idx];
1845       subexp_idx = dfa->nodes[ent->node].opr.idx;
1846 
1847       dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1848                                            subexp_idx, dst_node, dst_idx,
1849                                            dst_bkref_idx);
1850       src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1851                                            subexp_idx, src_node, src_idx,
1852                                            src_bkref_idx);
1853 
1854       /* In case of:
1855          <src> <dst> ( <subexp> )
1856          ( <subexp> ) <src> <dst>
1857          ( <subexp1> <src> <subexp2> <dst> <subexp3> )  */
1858       if (src_pos == dst_pos)
1859         continue; /* This is unrelated limitation.  */
1860       else
1861         return true;
1862     }
1863   return false;
1864 }
1865 
1866 static int
1867 check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
     /* [previous][next][first][last][top][bottom][index][help] */
1868                              Idx subexp_idx, Idx from_node, Idx bkref_idx)
1869 {
1870   const re_dfa_t *const dfa = mctx->dfa;
1871   const re_node_set *eclosures = dfa->eclosures + from_node;
1872   Idx node_idx;
1873 
1874   /* Else, we are on the boundary: examine the nodes on the epsilon
1875      closure.  */
1876   for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
1877     {
1878       Idx node = eclosures->elems[node_idx];
1879       switch (dfa->nodes[node].type)
1880         {
1881         case OP_BACK_REF:
1882           if (bkref_idx != -1)
1883             {
1884               struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
1885               do
1886                 {
1887                   Idx dst;
1888                   int cpos;
1889 
1890                   if (ent->node != node)
1891                     continue;
1892 
1893                   if (subexp_idx < BITSET_WORD_BITS
1894                       && !(ent->eps_reachable_subexps_map
1895                            & ((bitset_word_t) 1 << subexp_idx)))
1896                     continue;
1897 
1898                   /* Recurse trying to reach the OP_OPEN_SUBEXP and
1899                      OP_CLOSE_SUBEXP cases below.  But, if the
1900                      destination node is the same node as the source
1901                      node, don't recurse because it would cause an
1902                      infinite loop: a regex that exhibits this behavior
1903                      is ()\1*\1*  */
1904                   dst = dfa->edests[node].elems[0];
1905                   if (dst == from_node)
1906                     {
1907                       if (boundaries & 1)
1908                         return -1;
1909                       else /* if (boundaries & 2) */
1910                         return 0;
1911                     }
1912 
1913                   cpos =
1914                     check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
1915                                                  dst, bkref_idx);
1916                   if (cpos == -1 /* && (boundaries & 1) */)
1917                     return -1;
1918                   if (cpos == 0 && (boundaries & 2))
1919                     return 0;
1920 
1921                   if (subexp_idx < BITSET_WORD_BITS)
1922                     ent->eps_reachable_subexps_map
1923                       &= ~((bitset_word_t) 1 << subexp_idx);
1924                 }
1925               while (ent++->more);
1926             }
1927           break;
1928 
1929         case OP_OPEN_SUBEXP:
1930           if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx)
1931             return -1;
1932           break;
1933 
1934         case OP_CLOSE_SUBEXP:
1935           if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx)
1936             return 0;
1937           break;
1938 
1939         default:
1940             break;
1941         }
1942     }
1943 
1944   return (boundaries & 2) ? 1 : 0;
1945 }
1946 
1947 static int
1948 check_dst_limits_calc_pos (const re_match_context_t *mctx, Idx limit,
     /* [previous][next][first][last][top][bottom][index][help] */
1949                            Idx subexp_idx, Idx from_node, Idx str_idx,
1950                            Idx bkref_idx)
1951 {
1952   struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
1953   int boundaries;
1954 
1955   /* If we are outside the range of the subexpression, return -1 or 1.  */
1956   if (str_idx < lim->subexp_from)
1957     return -1;
1958 
1959   if (lim->subexp_to < str_idx)
1960     return 1;
1961 
1962   /* If we are within the subexpression, return 0.  */
1963   boundaries = (str_idx == lim->subexp_from);
1964   boundaries |= (str_idx == lim->subexp_to) << 1;
1965   if (boundaries == 0)
1966     return 0;
1967 
1968   /* Else, examine epsilon closure.  */
1969   return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
1970                                       from_node, bkref_idx);
1971 }
1972 
1973 /* Check the limitations of sub expressions LIMITS, and remove the nodes
1974    which are against limitations from DEST_NODES. */
1975 
1976 static reg_errcode_t
1977 check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes,
     /* [previous][next][first][last][top][bottom][index][help] */
1978                      const re_node_set *candidates, re_node_set *limits,
1979                      struct re_backref_cache_entry *bkref_ents, Idx str_idx)
1980 {
1981   reg_errcode_t err;
1982   Idx node_idx, lim_idx;
1983 
1984   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
1985     {
1986       Idx subexp_idx;
1987       struct re_backref_cache_entry *ent;
1988       ent = bkref_ents + limits->elems[lim_idx];
1989 
1990       if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
1991         continue; /* This is unrelated limitation.  */
1992 
1993       subexp_idx = dfa->nodes[ent->node].opr.idx;
1994       if (ent->subexp_to == str_idx)
1995         {
1996           Idx ops_node = -1;
1997           Idx cls_node = -1;
1998           for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
1999             {
2000               Idx node = dest_nodes->elems[node_idx];
2001               re_token_type_t type = dfa->nodes[node].type;
2002               if (type == OP_OPEN_SUBEXP
2003                   && subexp_idx == dfa->nodes[node].opr.idx)
2004                 ops_node = node;
2005               else if (type == OP_CLOSE_SUBEXP
2006                        && subexp_idx == dfa->nodes[node].opr.idx)
2007                 cls_node = node;
2008             }
2009 
2010           /* Check the limitation of the open subexpression.  */
2011           /* Note that (ent->subexp_to = str_idx != ent->subexp_from).  */
2012           if (ops_node >= 0)
2013             {
2014               err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
2015                                            candidates);
2016               if (__glibc_unlikely (err != REG_NOERROR))
2017                 return err;
2018             }
2019 
2020           /* Check the limitation of the close subexpression.  */
2021           if (cls_node >= 0)
2022             for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2023               {
2024                 Idx node = dest_nodes->elems[node_idx];
2025                 if (!re_node_set_contains (dfa->inveclosures + node,
2026                                            cls_node)
2027                     && !re_node_set_contains (dfa->eclosures + node,
2028                                               cls_node))
2029                   {
2030                     /* It is against this limitation.
2031                        Remove it form the current sifted state.  */
2032                     err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2033                                                  candidates);
2034                     if (__glibc_unlikely (err != REG_NOERROR))
2035                       return err;
2036                     --node_idx;
2037                   }
2038               }
2039         }
2040       else /* (ent->subexp_to != str_idx)  */
2041         {
2042           for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2043             {
2044               Idx node = dest_nodes->elems[node_idx];
2045               re_token_type_t type = dfa->nodes[node].type;
2046               if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
2047                 {
2048                   if (subexp_idx != dfa->nodes[node].opr.idx)
2049                     continue;
2050                   /* It is against this limitation.
2051                      Remove it form the current sifted state.  */
2052                   err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2053                                                candidates);
2054                   if (__glibc_unlikely (err != REG_NOERROR))
2055                     return err;
2056                 }
2057             }
2058         }
2059     }
2060   return REG_NOERROR;
2061 }
2062 
2063 static reg_errcode_t
2064 __attribute_warn_unused_result__
2065 sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx,
     /* [previous][next][first][last][top][bottom][index][help] */
2066                    Idx str_idx, const re_node_set *candidates)
2067 {
2068   const re_dfa_t *const dfa = mctx->dfa;
2069   reg_errcode_t err;
2070   Idx node_idx, node;
2071   re_sift_context_t local_sctx;
2072   Idx first_idx = search_cur_bkref_entry (mctx, str_idx);
2073 
2074   if (first_idx == -1)
2075     return REG_NOERROR;
2076 
2077   local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized.  */
2078 
2079   for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
2080     {
2081       Idx enabled_idx;
2082       re_token_type_t type;
2083       struct re_backref_cache_entry *entry;
2084       node = candidates->elems[node_idx];
2085       type = dfa->nodes[node].type;
2086       /* Avoid infinite loop for the REs like "()\1+".  */
2087       if (node == sctx->last_node && str_idx == sctx->last_str_idx)
2088         continue;
2089       if (type != OP_BACK_REF)
2090         continue;
2091 
2092       entry = mctx->bkref_ents + first_idx;
2093       enabled_idx = first_idx;
2094       do
2095         {
2096           Idx subexp_len;
2097           Idx to_idx;
2098           Idx dst_node;
2099           bool ok;
2100           re_dfastate_t *cur_state;
2101 
2102           if (entry->node != node)
2103             continue;
2104           subexp_len = entry->subexp_to - entry->subexp_from;
2105           to_idx = str_idx + subexp_len;
2106           dst_node = (subexp_len ? dfa->nexts[node]
2107                       : dfa->edests[node].elems[0]);
2108 
2109           if (to_idx > sctx->last_str_idx
2110               || sctx->sifted_states[to_idx] == NULL
2111               || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)
2112               || check_dst_limits (mctx, &sctx->limits, node,
2113                                    str_idx, dst_node, to_idx))
2114             continue;
2115 
2116           if (local_sctx.sifted_states == NULL)
2117             {
2118               local_sctx = *sctx;
2119               err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
2120               if (__glibc_unlikely (err != REG_NOERROR))
2121                 goto free_return;
2122             }
2123           local_sctx.last_node = node;
2124           local_sctx.last_str_idx = str_idx;
2125           ok = re_node_set_insert (&local_sctx.limits, enabled_idx);
2126           if (__glibc_unlikely (! ok))
2127             {
2128               err = REG_ESPACE;
2129               goto free_return;
2130             }
2131           cur_state = local_sctx.sifted_states[str_idx];
2132           err = sift_states_backward (mctx, &local_sctx);
2133           if (__glibc_unlikely (err != REG_NOERROR))
2134             goto free_return;
2135           if (sctx->limited_states != NULL)
2136             {
2137               err = merge_state_array (dfa, sctx->limited_states,
2138                                        local_sctx.sifted_states,
2139                                        str_idx + 1);
2140               if (__glibc_unlikely (err != REG_NOERROR))
2141                 goto free_return;
2142             }
2143           local_sctx.sifted_states[str_idx] = cur_state;
2144           re_node_set_remove (&local_sctx.limits, enabled_idx);
2145 
2146           /* mctx->bkref_ents may have changed, reload the pointer.  */
2147           entry = mctx->bkref_ents + enabled_idx;
2148         }
2149       while (enabled_idx++, entry++->more);
2150     }
2151   err = REG_NOERROR;
2152  free_return:
2153   if (local_sctx.sifted_states != NULL)
2154     {
2155       re_node_set_free (&local_sctx.limits);
2156     }
2157 
2158   return err;
2159 }
2160 
2161 
2162 static int
2163 sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,
     /* [previous][next][first][last][top][bottom][index][help] */
2164                      Idx node_idx, Idx str_idx, Idx max_str_idx)
2165 {
2166   const re_dfa_t *const dfa = mctx->dfa;
2167   int naccepted;
2168   /* Check the node can accept "multi byte".  */
2169   naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
2170   if (naccepted > 0 && str_idx + naccepted <= max_str_idx
2171       && !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
2172                                dfa->nexts[node_idx]))
2173     /* The node can't accept the "multi byte", or the
2174        destination was already thrown away, then the node
2175        couldn't accept the current input "multi byte".   */
2176     naccepted = 0;
2177   /* Otherwise, it is sure that the node could accept
2178      'naccepted' bytes input.  */
2179   return naccepted;
2180 }
2181 
2182 /* Functions for state transition.  */
2183 
2184 /* Return the next state to which the current state STATE will transit by
2185    accepting the current input byte, and update STATE_LOG if necessary.
2186    Return NULL on failure.
2187    If STATE can accept a multibyte char/collating element/back reference
2188    update the destination of STATE_LOG.  */
2189 
2190 static re_dfastate_t *
2191 __attribute_warn_unused_result__
2192 transit_state (reg_errcode_t *err, re_match_context_t *mctx,
     /* [previous][next][first][last][top][bottom][index][help] */
2193                re_dfastate_t *state)
2194 {
2195   re_dfastate_t **trtable;
2196   unsigned char ch;
2197 
2198   /* If the current state can accept multibyte.  */
2199   if (__glibc_unlikely (state->accept_mb))
2200     {
2201       *err = transit_state_mb (mctx, state);
2202       if (__glibc_unlikely (*err != REG_NOERROR))
2203         return NULL;
2204     }
2205 
2206   /* Then decide the next state with the single byte.  */
2207 #if 0
2208   if (0)
2209     /* don't use transition table  */
2210     return transit_state_sb (err, mctx, state);
2211 #endif
2212 
2213   /* Use transition table  */
2214   ch = re_string_fetch_byte (&mctx->input);
2215   for (;;)
2216     {
2217       trtable = state->trtable;
2218       if (__glibc_likely (trtable != NULL))
2219         return trtable[ch];
2220 
2221       trtable = state->word_trtable;
2222       if (__glibc_likely (trtable != NULL))
2223         {
2224           unsigned int context;
2225           context
2226             = re_string_context_at (&mctx->input,
2227                                     re_string_cur_idx (&mctx->input) - 1,
2228                                     mctx->eflags);
2229           if (IS_WORD_CONTEXT (context))
2230             return trtable[ch + SBC_MAX];
2231           else
2232             return trtable[ch];
2233         }
2234 
2235       if (!build_trtable (mctx->dfa, state))
2236         {
2237           *err = REG_ESPACE;
2238           return NULL;
2239         }
2240 
2241       /* Retry, we now have a transition table.  */
2242     }
2243 }
2244 
2245 /* Update the state_log if we need */
2246 static re_dfastate_t *
2247 merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,
     /* [previous][next][first][last][top][bottom][index][help] */
2248                       re_dfastate_t *next_state)
2249 {
2250   const re_dfa_t *const dfa = mctx->dfa;
2251   Idx cur_idx = re_string_cur_idx (&mctx->input);
2252 
2253   if (cur_idx > mctx->state_log_top)
2254     {
2255       mctx->state_log[cur_idx] = next_state;
2256       mctx->state_log_top = cur_idx;
2257     }
2258   else if (mctx->state_log[cur_idx] == 0)
2259     {
2260       mctx->state_log[cur_idx] = next_state;
2261     }
2262   else
2263     {
2264       re_dfastate_t *pstate;
2265       unsigned int context;
2266       re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
2267       /* If (state_log[cur_idx] != 0), it implies that cur_idx is
2268          the destination of a multibyte char/collating element/
2269          back reference.  Then the next state is the union set of
2270          these destinations and the results of the transition table.  */
2271       pstate = mctx->state_log[cur_idx];
2272       log_nodes = pstate->entrance_nodes;
2273       if (next_state != NULL)
2274         {
2275           table_nodes = next_state->entrance_nodes;
2276           *err = re_node_set_init_union (&next_nodes, table_nodes,
2277                                              log_nodes);
2278           if (__glibc_unlikely (*err != REG_NOERROR))
2279             return NULL;
2280         }
2281       else
2282         next_nodes = *log_nodes;
2283       /* Note: We already add the nodes of the initial state,
2284          then we don't need to add them here.  */
2285 
2286       context = re_string_context_at (&mctx->input,
2287                                       re_string_cur_idx (&mctx->input) - 1,
2288                                       mctx->eflags);
2289       next_state = mctx->state_log[cur_idx]
2290         = re_acquire_state_context (err, dfa, &next_nodes, context);
2291       /* We don't need to check errors here, since the return value of
2292          this function is next_state and ERR is already set.  */
2293 
2294       if (table_nodes != NULL)
2295         re_node_set_free (&next_nodes);
2296     }
2297 
2298   if (__glibc_unlikely (dfa->nbackref) && next_state != NULL)
2299     {
2300       /* Check OP_OPEN_SUBEXP in the current state in case that we use them
2301          later.  We must check them here, since the back references in the
2302          next state might use them.  */
2303       *err = check_subexp_matching_top (mctx, &next_state->nodes,
2304                                         cur_idx);
2305       if (__glibc_unlikely (*err != REG_NOERROR))
2306         return NULL;
2307 
2308       /* If the next state has back references.  */
2309       if (next_state->has_backref)
2310         {
2311           *err = transit_state_bkref (mctx, &next_state->nodes);
2312           if (__glibc_unlikely (*err != REG_NOERROR))
2313             return NULL;
2314           next_state = mctx->state_log[cur_idx];
2315         }
2316     }
2317 
2318   return next_state;
2319 }
2320 
2321 /* Skip bytes in the input that correspond to part of a
2322    multi-byte match, then look in the log for a state
2323    from which to restart matching.  */
2324 static re_dfastate_t *
2325 find_recover_state (reg_errcode_t *err, re_match_context_t *mctx)
     /* [previous][next][first][last][top][bottom][index][help] */
2326 {
2327   re_dfastate_t *cur_state;
2328   do
2329     {
2330       Idx max = mctx->state_log_top;
2331       Idx cur_str_idx = re_string_cur_idx (&mctx->input);
2332 
2333       do
2334         {
2335           if (++cur_str_idx > max)
2336             return NULL;
2337           re_string_skip_bytes (&mctx->input, 1);
2338         }
2339       while (mctx->state_log[cur_str_idx] == NULL);
2340 
2341       cur_state = merge_state_with_log (err, mctx, NULL);
2342     }
2343   while (*err == REG_NOERROR && cur_state == NULL);
2344   return cur_state;
2345 }
2346 
2347 /* Helper functions for transit_state.  */
2348 
2349 /* From the node set CUR_NODES, pick up the nodes whose types are
2350    OP_OPEN_SUBEXP and which have corresponding back references in the regular
2351    expression. And register them to use them later for evaluating the
2352    corresponding back references.  */
2353 
2354 static reg_errcode_t
2355 check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes,
     /* [previous][next][first][last][top][bottom][index][help] */
2356                            Idx str_idx)
2357 {
2358   const re_dfa_t *const dfa = mctx->dfa;
2359   Idx node_idx;
2360   reg_errcode_t err;
2361 
2362   /* TODO: This isn't efficient.
2363            Because there might be more than one nodes whose types are
2364            OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2365            nodes.
2366            E.g. RE: (a){2}  */
2367   for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
2368     {
2369       Idx node = cur_nodes->elems[node_idx];
2370       if (dfa->nodes[node].type == OP_OPEN_SUBEXP
2371           && dfa->nodes[node].opr.idx < BITSET_WORD_BITS
2372           && (dfa->used_bkref_map
2373               & ((bitset_word_t) 1 << dfa->nodes[node].opr.idx)))
2374         {
2375           err = match_ctx_add_subtop (mctx, node, str_idx);
2376           if (__glibc_unlikely (err != REG_NOERROR))
2377             return err;
2378         }
2379     }
2380   return REG_NOERROR;
2381 }
2382 
2383 #if 0
2384 /* Return the next state to which the current state STATE will transit by
2385    accepting the current input byte.  Return NULL on failure.  */
2386 
2387 static re_dfastate_t *
2388 transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx,
     /* [previous][next][first][last][top][bottom][index][help] */
2389                   re_dfastate_t *state)
2390 {
2391   const re_dfa_t *const dfa = mctx->dfa;
2392   re_node_set next_nodes;
2393   re_dfastate_t *next_state;
2394   Idx node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
2395   unsigned int context;
2396 
2397   *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
2398   if (__glibc_unlikely (*err != REG_NOERROR))
2399     return NULL;
2400   for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
2401     {
2402       Idx cur_node = state->nodes.elems[node_cnt];
2403       if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))
2404         {
2405           *err = re_node_set_merge (&next_nodes,
2406                                     dfa->eclosures + dfa->nexts[cur_node]);
2407           if (__glibc_unlikely (*err != REG_NOERROR))
2408             {
2409               re_node_set_free (&next_nodes);
2410               return NULL;
2411             }
2412         }
2413     }
2414   context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);
2415   next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
2416   /* We don't need to check errors here, since the return value of
2417      this function is next_state and ERR is already set.  */
2418 
2419   re_node_set_free (&next_nodes);
2420   re_string_skip_bytes (&mctx->input, 1);
2421   return next_state;
2422 }
2423 #endif
2424 
2425 static reg_errcode_t
2426 transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
     /* [previous][next][first][last][top][bottom][index][help] */
2427 {
2428   const re_dfa_t *const dfa = mctx->dfa;
2429   reg_errcode_t err;
2430   Idx i;
2431 
2432   for (i = 0; i < pstate->nodes.nelem; ++i)
2433     {
2434       re_node_set dest_nodes, *new_nodes;
2435       Idx cur_node_idx = pstate->nodes.elems[i];
2436       int naccepted;
2437       Idx dest_idx;
2438       unsigned int context;
2439       re_dfastate_t *dest_state;
2440 
2441       if (!dfa->nodes[cur_node_idx].accept_mb)
2442         continue;
2443 
2444       if (dfa->nodes[cur_node_idx].constraint)
2445         {
2446           context = re_string_context_at (&mctx->input,
2447                                           re_string_cur_idx (&mctx->input),
2448                                           mctx->eflags);
2449           if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
2450                                            context))
2451             continue;
2452         }
2453 
2454       /* How many bytes the node can accept?  */
2455       naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
2456                                            re_string_cur_idx (&mctx->input));
2457       if (naccepted == 0)
2458         continue;
2459 
2460       /* The node can accepts 'naccepted' bytes.  */
2461       dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
2462       mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
2463                                : mctx->max_mb_elem_len);
2464       err = clean_state_log_if_needed (mctx, dest_idx);
2465       if (__glibc_unlikely (err != REG_NOERROR))
2466         return err;
2467       DEBUG_ASSERT (dfa->nexts[cur_node_idx] != -1);
2468       new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
2469 
2470       dest_state = mctx->state_log[dest_idx];
2471       if (dest_state == NULL)
2472         dest_nodes = *new_nodes;
2473       else
2474         {
2475           err = re_node_set_init_union (&dest_nodes,
2476                                         dest_state->entrance_nodes, new_nodes);
2477           if (__glibc_unlikely (err != REG_NOERROR))
2478             return err;
2479         }
2480       context = re_string_context_at (&mctx->input, dest_idx - 1,
2481                                       mctx->eflags);
2482       mctx->state_log[dest_idx]
2483         = re_acquire_state_context (&err, dfa, &dest_nodes, context);
2484       if (dest_state != NULL)
2485         re_node_set_free (&dest_nodes);
2486       if (__glibc_unlikely (mctx->state_log[dest_idx] == NULL
2487                             && err != REG_NOERROR))
2488         return err;
2489     }
2490   return REG_NOERROR;
2491 }
2492 
2493 static reg_errcode_t
2494 transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
     /* [previous][next][first][last][top][bottom][index][help] */
2495 {
2496   const re_dfa_t *const dfa = mctx->dfa;
2497   reg_errcode_t err;
2498   Idx i;
2499   Idx cur_str_idx = re_string_cur_idx (&mctx->input);
2500 
2501   for (i = 0; i < nodes->nelem; ++i)
2502     {
2503       Idx dest_str_idx, prev_nelem, bkc_idx;
2504       Idx node_idx = nodes->elems[i];
2505       unsigned int context;
2506       const re_token_t *node = dfa->nodes + node_idx;
2507       re_node_set *new_dest_nodes;
2508 
2509       /* Check whether 'node' is a backreference or not.  */
2510       if (node->type != OP_BACK_REF)
2511         continue;
2512 
2513       if (node->constraint)
2514         {
2515           context = re_string_context_at (&mctx->input, cur_str_idx,
2516                                           mctx->eflags);
2517           if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
2518             continue;
2519         }
2520 
2521       /* 'node' is a backreference.
2522          Check the substring which the substring matched.  */
2523       bkc_idx = mctx->nbkref_ents;
2524       err = get_subexp (mctx, node_idx, cur_str_idx);
2525       if (__glibc_unlikely (err != REG_NOERROR))
2526         goto free_return;
2527 
2528       /* And add the epsilon closures (which is 'new_dest_nodes') of
2529          the backreference to appropriate state_log.  */
2530       DEBUG_ASSERT (dfa->nexts[node_idx] != -1);
2531       for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
2532         {
2533           Idx subexp_len;
2534           re_dfastate_t *dest_state;
2535           struct re_backref_cache_entry *bkref_ent;
2536           bkref_ent = mctx->bkref_ents + bkc_idx;
2537           if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
2538             continue;
2539           subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
2540           new_dest_nodes = (subexp_len == 0
2541                             ? dfa->eclosures + dfa->edests[node_idx].elems[0]
2542                             : dfa->eclosures + dfa->nexts[node_idx]);
2543           dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
2544                           - bkref_ent->subexp_from);
2545           context = re_string_context_at (&mctx->input, dest_str_idx - 1,
2546                                           mctx->eflags);
2547           dest_state = mctx->state_log[dest_str_idx];
2548           prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
2549                         : mctx->state_log[cur_str_idx]->nodes.nelem);
2550           /* Add 'new_dest_node' to state_log.  */
2551           if (dest_state == NULL)
2552             {
2553               mctx->state_log[dest_str_idx]
2554                 = re_acquire_state_context (&err, dfa, new_dest_nodes,
2555                                             context);
2556               if (__glibc_unlikely (mctx->state_log[dest_str_idx] == NULL
2557                                     && err != REG_NOERROR))
2558                 goto free_return;
2559             }
2560           else
2561             {
2562               re_node_set dest_nodes;
2563               err = re_node_set_init_union (&dest_nodes,
2564                                             dest_state->entrance_nodes,
2565                                             new_dest_nodes);
2566               if (__glibc_unlikely (err != REG_NOERROR))
2567                 {
2568                   re_node_set_free (&dest_nodes);
2569                   goto free_return;
2570                 }
2571               mctx->state_log[dest_str_idx]
2572                 = re_acquire_state_context (&err, dfa, &dest_nodes, context);
2573               re_node_set_free (&dest_nodes);
2574               if (__glibc_unlikely (mctx->state_log[dest_str_idx] == NULL
2575                                     && err != REG_NOERROR))
2576                 goto free_return;
2577             }
2578           /* We need to check recursively if the backreference can epsilon
2579              transit.  */
2580           if (subexp_len == 0
2581               && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
2582             {
2583               err = check_subexp_matching_top (mctx, new_dest_nodes,
2584                                                cur_str_idx);
2585               if (__glibc_unlikely (err != REG_NOERROR))
2586                 goto free_return;
2587               err = transit_state_bkref (mctx, new_dest_nodes);
2588               if (__glibc_unlikely (err != REG_NOERROR))
2589                 goto free_return;
2590             }
2591         }
2592     }
2593   err = REG_NOERROR;
2594  free_return:
2595   return err;
2596 }
2597 
2598 /* Enumerate all the candidates which the backreference BKREF_NODE can match
2599    at BKREF_STR_IDX, and register them by match_ctx_add_entry().
2600    Note that we might collect inappropriate candidates here.
2601    However, the cost of checking them strictly here is too high, then we
2602    delay these checking for prune_impossible_nodes().  */
2603 
2604 static reg_errcode_t
2605 __attribute_warn_unused_result__
2606 get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx)
     /* [previous][next][first][last][top][bottom][index][help] */
2607 {
2608   const re_dfa_t *const dfa = mctx->dfa;
2609   Idx subexp_num, sub_top_idx;
2610   const char *buf = (const char *) re_string_get_buffer (&mctx->input);
2611   /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX.  */
2612   Idx cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
2613   if (cache_idx != -1)
2614     {
2615       const struct re_backref_cache_entry *entry
2616         = mctx->bkref_ents + cache_idx;
2617       do
2618         if (entry->node == bkref_node)
2619           return REG_NOERROR; /* We already checked it.  */
2620       while (entry++->more);
2621     }
2622 
2623   subexp_num = dfa->nodes[bkref_node].opr.idx;
2624 
2625   /* For each sub expression  */
2626   for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
2627     {
2628       reg_errcode_t err;
2629       re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
2630       re_sub_match_last_t *sub_last;
2631       Idx sub_last_idx, sl_str, bkref_str_off;
2632 
2633       if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
2634         continue; /* It isn't related.  */
2635 
2636       sl_str = sub_top->str_idx;
2637       bkref_str_off = bkref_str_idx;
2638       /* At first, check the last node of sub expressions we already
2639          evaluated.  */
2640       for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
2641         {
2642           regoff_t sl_str_diff;
2643           sub_last = sub_top->lasts[sub_last_idx];
2644           sl_str_diff = sub_last->str_idx - sl_str;
2645           /* The matched string by the sub expression match with the substring
2646              at the back reference?  */
2647           if (sl_str_diff > 0)
2648             {
2649               if (__glibc_unlikely (bkref_str_off + sl_str_diff
2650                                     > mctx->input.valid_len))
2651                 {
2652                   /* Not enough chars for a successful match.  */
2653                   if (bkref_str_off + sl_str_diff > mctx->input.len)
2654                     break;
2655 
2656                   err = clean_state_log_if_needed (mctx,
2657                                                    bkref_str_off
2658                                                    + sl_str_diff);
2659                   if (__glibc_unlikely (err != REG_NOERROR))
2660                     return err;
2661                   buf = (const char *) re_string_get_buffer (&mctx->input);
2662                 }
2663               if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)
2664                 /* We don't need to search this sub expression any more.  */
2665                 break;
2666             }
2667           bkref_str_off += sl_str_diff;
2668           sl_str += sl_str_diff;
2669           err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2670                                 bkref_str_idx);
2671 
2672           /* Reload buf, since the preceding call might have reallocated
2673              the buffer.  */
2674           buf = (const char *) re_string_get_buffer (&mctx->input);
2675 
2676           if (err == REG_NOMATCH)
2677             continue;
2678           if (__glibc_unlikely (err != REG_NOERROR))
2679             return err;
2680         }
2681 
2682       if (sub_last_idx < sub_top->nlasts)
2683         continue;
2684       if (sub_last_idx > 0)
2685         ++sl_str;
2686       /* Then, search for the other last nodes of the sub expression.  */
2687       for (; sl_str <= bkref_str_idx; ++sl_str)
2688         {
2689           Idx cls_node;
2690           regoff_t sl_str_off;
2691           const re_node_set *nodes;
2692           sl_str_off = sl_str - sub_top->str_idx;
2693           /* The matched string by the sub expression match with the substring
2694              at the back reference?  */
2695           if (sl_str_off > 0)
2696             {
2697               if (__glibc_unlikely (bkref_str_off >= mctx->input.valid_len))
2698                 {
2699                   /* If we are at the end of the input, we cannot match.  */
2700                   if (bkref_str_off >= mctx->input.len)
2701                     break;
2702 
2703                   err = extend_buffers (mctx, bkref_str_off + 1);
2704                   if (__glibc_unlikely (err != REG_NOERROR))
2705                     return err;
2706 
2707                   buf = (const char *) re_string_get_buffer (&mctx->input);
2708                 }
2709               if (buf [bkref_str_off++] != buf[sl_str - 1])
2710                 break; /* We don't need to search this sub expression
2711                           any more.  */
2712             }
2713           if (mctx->state_log[sl_str] == NULL)
2714             continue;
2715           /* Does this state have a ')' of the sub expression?  */
2716           nodes = &mctx->state_log[sl_str]->nodes;
2717           cls_node = find_subexp_node (dfa, nodes, subexp_num,
2718                                        OP_CLOSE_SUBEXP);
2719           if (cls_node == -1)
2720             continue; /* No.  */
2721           if (sub_top->path == NULL)
2722             {
2723               sub_top->path = calloc (sizeof (state_array_t),
2724                                       sl_str - sub_top->str_idx + 1);
2725               if (sub_top->path == NULL)
2726                 return REG_ESPACE;
2727             }
2728           /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
2729              in the current context?  */
2730           err = check_arrival (mctx, sub_top->path, sub_top->node,
2731                                sub_top->str_idx, cls_node, sl_str,
2732                                OP_CLOSE_SUBEXP);
2733           if (err == REG_NOMATCH)
2734               continue;
2735           if (__glibc_unlikely (err != REG_NOERROR))
2736               return err;
2737           sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
2738           if (__glibc_unlikely (sub_last == NULL))
2739             return REG_ESPACE;
2740           err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2741                                 bkref_str_idx);
2742           buf = (const char *) re_string_get_buffer (&mctx->input);
2743           if (err == REG_NOMATCH)
2744             continue;
2745           if (__glibc_unlikely (err != REG_NOERROR))
2746             return err;
2747         }
2748     }
2749   return REG_NOERROR;
2750 }
2751 
2752 /* Helper functions for get_subexp().  */
2753 
2754 /* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
2755    If it can arrive, register the sub expression expressed with SUB_TOP
2756    and SUB_LAST.  */
2757 
2758 static reg_errcode_t
2759 get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top,
     /* [previous][next][first][last][top][bottom][index][help] */
2760                 re_sub_match_last_t *sub_last, Idx bkref_node, Idx bkref_str)
2761 {
2762   reg_errcode_t err;
2763   Idx to_idx;
2764   /* Can the subexpression arrive the back reference?  */
2765   err = check_arrival (mctx, &sub_last->path, sub_last->node,
2766                        sub_last->str_idx, bkref_node, bkref_str,
2767                        OP_OPEN_SUBEXP);
2768   if (err != REG_NOERROR)
2769     return err;
2770   err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
2771                              sub_last->str_idx);
2772   if (__glibc_unlikely (err != REG_NOERROR))
2773     return err;
2774   to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
2775   return clean_state_log_if_needed (mctx, to_idx);
2776 }
2777 
2778 /* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
2779    Search '(' if FL_OPEN, or search ')' otherwise.
2780    TODO: This function isn't efficient...
2781          Because there might be more than one nodes whose types are
2782          OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2783          nodes.
2784          E.g. RE: (a){2}  */
2785 
2786 static Idx
2787 find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
     /* [previous][next][first][last][top][bottom][index][help] */
2788                   Idx subexp_idx, int type)
2789 {
2790   Idx cls_idx;
2791   for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
2792     {
2793       Idx cls_node = nodes->elems[cls_idx];
2794       const re_token_t *node = dfa->nodes + cls_node;
2795       if (node->type == type
2796           && node->opr.idx == subexp_idx)
2797         return cls_node;
2798     }
2799   return -1;
2800 }
2801 
2802 /* Check whether the node TOP_NODE at TOP_STR can arrive to the node
2803    LAST_NODE at LAST_STR.  We record the path onto PATH since it will be
2804    heavily reused.
2805    Return REG_NOERROR if it can arrive, REG_NOMATCH if it cannot,
2806    REG_ESPACE if memory is exhausted.  */
2807 
2808 static reg_errcode_t
2809 __attribute_warn_unused_result__
2810 check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node,
     /* [previous][next][first][last][top][bottom][index][help] */
2811                Idx top_str, Idx last_node, Idx last_str, int type)
2812 {
2813   const re_dfa_t *const dfa = mctx->dfa;
2814   reg_errcode_t err = REG_NOERROR;
2815   Idx subexp_num, backup_cur_idx, str_idx, null_cnt;
2816   re_dfastate_t *cur_state = NULL;
2817   re_node_set *cur_nodes, next_nodes;
2818   re_dfastate_t **backup_state_log;
2819   unsigned int context;
2820 
2821   subexp_num = dfa->nodes[top_node].opr.idx;
2822   /* Extend the buffer if we need.  */
2823   if (__glibc_unlikely (path->alloc < last_str + mctx->max_mb_elem_len + 1))
2824     {
2825       re_dfastate_t **new_array;
2826       Idx old_alloc = path->alloc;
2827       Idx incr_alloc = last_str + mctx->max_mb_elem_len + 1;
2828       Idx new_alloc;
2829       if (__glibc_unlikely (IDX_MAX - old_alloc < incr_alloc))
2830         return REG_ESPACE;
2831       new_alloc = old_alloc + incr_alloc;
2832       if (__glibc_unlikely (SIZE_MAX / sizeof (re_dfastate_t *) < new_alloc))
2833         return REG_ESPACE;
2834       new_array = re_realloc (path->array, re_dfastate_t *, new_alloc);
2835       if (__glibc_unlikely (new_array == NULL))
2836         return REG_ESPACE;
2837       path->array = new_array;
2838       path->alloc = new_alloc;
2839       memset (new_array + old_alloc, '\0',
2840               sizeof (re_dfastate_t *) * (path->alloc - old_alloc));
2841     }
2842 
2843   str_idx = path->next_idx ? path->next_idx : top_str;
2844 
2845   /* Temporary modify MCTX.  */
2846   backup_state_log = mctx->state_log;
2847   backup_cur_idx = mctx->input.cur_idx;
2848   mctx->state_log = path->array;
2849   mctx->input.cur_idx = str_idx;
2850 
2851   /* Setup initial node set.  */
2852   context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
2853   if (str_idx == top_str)
2854     {
2855       err = re_node_set_init_1 (&next_nodes, top_node);
2856       if (__glibc_unlikely (err != REG_NOERROR))
2857         return err;
2858       err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2859       if (__glibc_unlikely (err != REG_NOERROR))
2860         {
2861           re_node_set_free (&next_nodes);
2862           return err;
2863         }
2864     }
2865   else
2866     {
2867       cur_state = mctx->state_log[str_idx];
2868       if (cur_state && cur_state->has_backref)
2869         {
2870           err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
2871           if (__glibc_unlikely (err != REG_NOERROR))
2872             return err;
2873         }
2874       else
2875         re_node_set_init_empty (&next_nodes);
2876     }
2877   if (str_idx == top_str || (cur_state && cur_state->has_backref))
2878     {
2879       if (next_nodes.nelem)
2880         {
2881           err = expand_bkref_cache (mctx, &next_nodes, str_idx,
2882                                     subexp_num, type);
2883           if (__glibc_unlikely (err != REG_NOERROR))
2884             {
2885               re_node_set_free (&next_nodes);
2886               return err;
2887             }
2888         }
2889       cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2890       if (__glibc_unlikely (cur_state == NULL && err != REG_NOERROR))
2891         {
2892           re_node_set_free (&next_nodes);
2893           return err;
2894         }
2895       mctx->state_log[str_idx] = cur_state;
2896     }
2897 
2898   for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
2899     {
2900       re_node_set_empty (&next_nodes);
2901       if (mctx->state_log[str_idx + 1])
2902         {
2903           err = re_node_set_merge (&next_nodes,
2904                                    &mctx->state_log[str_idx + 1]->nodes);
2905           if (__glibc_unlikely (err != REG_NOERROR))
2906             {
2907               re_node_set_free (&next_nodes);
2908               return err;
2909             }
2910         }
2911       if (cur_state)
2912         {
2913           err = check_arrival_add_next_nodes (mctx, str_idx,
2914                                               &cur_state->non_eps_nodes,
2915                                               &next_nodes);
2916           if (__glibc_unlikely (err != REG_NOERROR))
2917             {
2918               re_node_set_free (&next_nodes);
2919               return err;
2920             }
2921         }
2922       ++str_idx;
2923       if (next_nodes.nelem)
2924         {
2925           err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2926           if (__glibc_unlikely (err != REG_NOERROR))
2927             {
2928               re_node_set_free (&next_nodes);
2929               return err;
2930             }
2931           err = expand_bkref_cache (mctx, &next_nodes, str_idx,
2932                                     subexp_num, type);
2933           if (__glibc_unlikely (err != REG_NOERROR))
2934             {
2935               re_node_set_free (&next_nodes);
2936               return err;
2937             }
2938         }
2939       context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
2940       cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2941       if (__glibc_unlikely (cur_state == NULL && err != REG_NOERROR))
2942         {
2943           re_node_set_free (&next_nodes);
2944           return err;
2945         }
2946       mctx->state_log[str_idx] = cur_state;
2947       null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
2948     }
2949   re_node_set_free (&next_nodes);
2950   cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
2951                : &mctx->state_log[last_str]->nodes);
2952   path->next_idx = str_idx;
2953 
2954   /* Fix MCTX.  */
2955   mctx->state_log = backup_state_log;
2956   mctx->input.cur_idx = backup_cur_idx;
2957 
2958   /* Then check the current node set has the node LAST_NODE.  */
2959   if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))
2960     return REG_NOERROR;
2961 
2962   return REG_NOMATCH;
2963 }
2964 
2965 /* Helper functions for check_arrival.  */
2966 
2967 /* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
2968    to NEXT_NODES.
2969    TODO: This function is similar to the functions transit_state*(),
2970          however this function has many additional works.
2971          Can't we unify them?  */
2972 
2973 static reg_errcode_t
2974 __attribute_warn_unused_result__
2975 check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx,
     /* [previous][next][first][last][top][bottom][index][help] */
2976                               re_node_set *cur_nodes, re_node_set *next_nodes)
2977 {
2978   const re_dfa_t *const dfa = mctx->dfa;
2979   bool ok;
2980   Idx cur_idx;
2981   reg_errcode_t err = REG_NOERROR;
2982   re_node_set union_set;
2983   re_node_set_init_empty (&union_set);
2984   for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
2985     {
2986       int naccepted = 0;
2987       Idx cur_node = cur_nodes->elems[cur_idx];
2988       DEBUG_ASSERT (!IS_EPSILON_NODE (dfa->nodes[cur_node].type));
2989 
2990       /* If the node may accept "multi byte".  */
2991       if (dfa->nodes[cur_node].accept_mb)
2992         {
2993           naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
2994                                                str_idx);
2995           if (naccepted > 1)
2996             {
2997               re_dfastate_t *dest_state;
2998               Idx next_node = dfa->nexts[cur_node];
2999               Idx next_idx = str_idx + naccepted;
3000               dest_state = mctx->state_log[next_idx];
3001               re_node_set_empty (&union_set);
3002               if (dest_state)
3003                 {
3004                   err = re_node_set_merge (&union_set, &dest_state->nodes);
3005                   if (__glibc_unlikely (err != REG_NOERROR))
3006                     {
3007                       re_node_set_free (&union_set);
3008                       return err;
3009                     }
3010                 }
3011               ok = re_node_set_insert (&union_set, next_node);
3012               if (__glibc_unlikely (! ok))
3013                 {
3014                   re_node_set_free (&union_set);
3015                   return REG_ESPACE;
3016                 }
3017               mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
3018                                                             &union_set);
3019               if (__glibc_unlikely (mctx->state_log[next_idx] == NULL
3020                                     && err != REG_NOERROR))
3021                 {
3022                   re_node_set_free (&union_set);
3023                   return err;
3024                 }
3025             }
3026         }
3027 
3028       if (naccepted
3029           || check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
3030         {
3031           ok = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
3032           if (__glibc_unlikely (! ok))
3033             {
3034               re_node_set_free (&union_set);
3035               return REG_ESPACE;
3036             }
3037         }
3038     }
3039   re_node_set_free (&union_set);
3040   return REG_NOERROR;
3041 }
3042 
3043 /* For all the nodes in CUR_NODES, add the epsilon closures of them to
3044    CUR_NODES, however exclude the nodes which are:
3045     - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
3046     - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
3047 */
3048 
3049 static reg_errcode_t
3050 check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes,
     /* [previous][next][first][last][top][bottom][index][help] */
3051                           Idx ex_subexp, int type)
3052 {
3053   reg_errcode_t err;
3054   Idx idx, outside_node;
3055   re_node_set new_nodes;
3056   DEBUG_ASSERT (cur_nodes->nelem);
3057   err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
3058   if (__glibc_unlikely (err != REG_NOERROR))
3059     return err;
3060   /* Create a new node set NEW_NODES with the nodes which are epsilon
3061      closures of the node in CUR_NODES.  */
3062 
3063   for (idx = 0; idx < cur_nodes->nelem; ++idx)
3064     {
3065       Idx cur_node = cur_nodes->elems[idx];
3066       const re_node_set *eclosure = dfa->eclosures + cur_node;
3067       outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
3068       if (outside_node == -1)
3069         {
3070           /* There are no problematic nodes, just merge them.  */
3071           err = re_node_set_merge (&new_nodes, eclosure);
3072           if (__glibc_unlikely (err != REG_NOERROR))
3073             {
3074               re_node_set_free (&new_nodes);
3075               return err;
3076             }
3077         }
3078       else
3079         {
3080           /* There are problematic nodes, re-calculate incrementally.  */
3081           err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
3082                                               ex_subexp, type);
3083           if (__glibc_unlikely (err != REG_NOERROR))
3084             {
3085               re_node_set_free (&new_nodes);
3086               return err;
3087             }
3088         }
3089     }
3090   re_node_set_free (cur_nodes);
3091   *cur_nodes = new_nodes;
3092   return REG_NOERROR;
3093 }
3094 
3095 /* Helper function for check_arrival_expand_ecl.
3096    Check incrementally the epsilon closure of TARGET, and if it isn't
3097    problematic append it to DST_NODES.  */
3098 
3099 static reg_errcode_t
3100 __attribute_warn_unused_result__
3101 check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes,
     /* [previous][next][first][last][top][bottom][index][help] */
3102                               Idx target, Idx ex_subexp, int type)
3103 {
3104   Idx cur_node;
3105   for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
3106     {
3107       bool ok;
3108 
3109       if (dfa->nodes[cur_node].type == type
3110           && dfa->nodes[cur_node].opr.idx == ex_subexp)
3111         {
3112           if (type == OP_CLOSE_SUBEXP)
3113             {
3114               ok = re_node_set_insert (dst_nodes, cur_node);
3115               if (__glibc_unlikely (! ok))
3116                 return REG_ESPACE;
3117             }
3118           break;
3119         }
3120       ok = re_node_set_insert (dst_nodes, cur_node);
3121       if (__glibc_unlikely (! ok))
3122         return REG_ESPACE;
3123       if (dfa->edests[cur_node].nelem == 0)
3124         break;
3125       if (dfa->edests[cur_node].nelem == 2)
3126         {
3127           reg_errcode_t err;
3128           err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
3129                                               dfa->edests[cur_node].elems[1],
3130                                               ex_subexp, type);
3131           if (__glibc_unlikely (err != REG_NOERROR))
3132             return err;
3133         }
3134       cur_node = dfa->edests[cur_node].elems[0];
3135     }
3136   return REG_NOERROR;
3137 }
3138 
3139 
3140 /* For all the back references in the current state, calculate the
3141    destination of the back references by the appropriate entry
3142    in MCTX->BKREF_ENTS.  */
3143 
3144 static reg_errcode_t
3145 __attribute_warn_unused_result__
3146 expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,
     /* [previous][next][first][last][top][bottom][index][help] */
3147                     Idx cur_str, Idx subexp_num, int type)
3148 {
3149   const re_dfa_t *const dfa = mctx->dfa;
3150   reg_errcode_t err;
3151   Idx cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
3152   struct re_backref_cache_entry *ent;
3153 
3154   if (cache_idx_start == -1)
3155     return REG_NOERROR;
3156 
3157  restart:
3158   ent = mctx->bkref_ents + cache_idx_start;
3159   do
3160     {
3161       Idx to_idx, next_node;
3162 
3163       /* Is this entry ENT is appropriate?  */
3164       if (!re_node_set_contains (cur_nodes, ent->node))
3165         continue; /* No.  */
3166 
3167       to_idx = cur_str + ent->subexp_to - ent->subexp_from;
3168       /* Calculate the destination of the back reference, and append it
3169          to MCTX->STATE_LOG.  */
3170       if (to_idx == cur_str)
3171         {
3172           /* The backreference did epsilon transit, we must re-check all the
3173              node in the current state.  */
3174           re_node_set new_dests;
3175           reg_errcode_t err2, err3;
3176           next_node = dfa->edests[ent->node].elems[0];
3177           if (re_node_set_contains (cur_nodes, next_node))
3178             continue;
3179           err = re_node_set_init_1 (&new_dests, next_node);
3180           err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
3181           err3 = re_node_set_merge (cur_nodes, &new_dests);
3182           re_node_set_free (&new_dests);
3183           if (__glibc_unlikely (err != REG_NOERROR || err2 != REG_NOERROR
3184                                 || err3 != REG_NOERROR))
3185             {
3186               err = (err != REG_NOERROR ? err
3187                      : (err2 != REG_NOERROR ? err2 : err3));
3188               return err;
3189             }
3190           /* TODO: It is still inefficient...  */
3191           goto restart;
3192         }
3193       else
3194         {
3195           re_node_set union_set;
3196           next_node = dfa->nexts[ent->node];
3197           if (mctx->state_log[to_idx])
3198             {
3199               bool ok;
3200               if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
3201                                         next_node))
3202                 continue;
3203               err = re_node_set_init_copy (&union_set,
3204                                            &mctx->state_log[to_idx]->nodes);
3205               ok = re_node_set_insert (&union_set, next_node);
3206               if (__glibc_unlikely (err != REG_NOERROR || ! ok))
3207                 {
3208                   re_node_set_free (&union_set);
3209                   err = err != REG_NOERROR ? err : REG_ESPACE;
3210                   return err;
3211                 }
3212             }
3213           else
3214             {
3215               err = re_node_set_init_1 (&union_set, next_node);
3216               if (__glibc_unlikely (err != REG_NOERROR))
3217                 return err;
3218             }
3219           mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
3220           re_node_set_free (&union_set);
3221           if (__glibc_unlikely (mctx->state_log[to_idx] == NULL
3222                                 && err != REG_NOERROR))
3223             return err;
3224         }
3225     }
3226   while (ent++->more);
3227   return REG_NOERROR;
3228 }
3229 
3230 /* Build transition table for the state.
3231    Return true if successful.  */
3232 
3233 static bool __attribute_noinline__
3234 build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
     /* [previous][next][first][last][top][bottom][index][help] */
3235 {
3236   reg_errcode_t err;
3237   Idx i, j;
3238   int ch;
3239   bool need_word_trtable = false;
3240   bitset_word_t elem, mask;
3241   Idx ndests; /* Number of the destination states from 'state'.  */
3242   re_dfastate_t **trtable;
3243   re_dfastate_t *dest_states[SBC_MAX];
3244   re_dfastate_t *dest_states_word[SBC_MAX];
3245   re_dfastate_t *dest_states_nl[SBC_MAX];
3246   re_node_set follows;
3247   bitset_t acceptable;
3248 
3249   /* We build DFA states which corresponds to the destination nodes
3250      from 'state'.  'dests_node[i]' represents the nodes which i-th
3251      destination state contains, and 'dests_ch[i]' represents the
3252      characters which i-th destination state accepts.  */
3253   re_node_set dests_node[SBC_MAX];
3254   bitset_t dests_ch[SBC_MAX];
3255 
3256   /* Initialize transition table.  */
3257   state->word_trtable = state->trtable = NULL;
3258 
3259   /* At first, group all nodes belonging to 'state' into several
3260      destinations.  */
3261   ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
3262   if (__glibc_unlikely (ndests <= 0))
3263     {
3264       /* Return false in case of an error, true otherwise.  */
3265       if (ndests == 0)
3266         {
3267           state->trtable = (re_dfastate_t **)
3268             calloc (sizeof (re_dfastate_t *), SBC_MAX);
3269           if (__glibc_unlikely (state->trtable == NULL))
3270             return false;
3271           return true;
3272         }
3273       return false;
3274     }
3275 
3276   err = re_node_set_alloc (&follows, ndests + 1);
3277   if (__glibc_unlikely (err != REG_NOERROR))
3278     {
3279     out_free:
3280       re_node_set_free (&follows);
3281       for (i = 0; i < ndests; ++i)
3282         re_node_set_free (dests_node + i);
3283       return false;
3284     }
3285 
3286   bitset_empty (acceptable);
3287 
3288   /* Then build the states for all destinations.  */
3289   for (i = 0; i < ndests; ++i)
3290     {
3291       Idx next_node;
3292       re_node_set_empty (&follows);
3293       /* Merge the follows of this destination states.  */
3294       for (j = 0; j < dests_node[i].nelem; ++j)
3295         {
3296           next_node = dfa->nexts[dests_node[i].elems[j]];
3297           if (next_node != -1)
3298             {
3299               err = re_node_set_merge (&follows, dfa->eclosures + next_node);
3300               if (__glibc_unlikely (err != REG_NOERROR))
3301                 goto out_free;
3302             }
3303         }
3304       dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
3305       if (__glibc_unlikely (dest_states[i] == NULL && err != REG_NOERROR))
3306         goto out_free;
3307       /* If the new state has context constraint,
3308          build appropriate states for these contexts.  */
3309       if (dest_states[i]->has_constraint)
3310         {
3311           dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
3312                                                           CONTEXT_WORD);
3313           if (__glibc_unlikely (dest_states_word[i] == NULL
3314                                 && err != REG_NOERROR))
3315             goto out_free;
3316 
3317           if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1)
3318             need_word_trtable = true;
3319 
3320           dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
3321                                                         CONTEXT_NEWLINE);
3322           if (__glibc_unlikely (dest_states_nl[i] == NULL && err != REG_NOERROR))
3323             goto out_free;
3324         }
3325       else
3326         {
3327           dest_states_word[i] = dest_states[i];
3328           dest_states_nl[i] = dest_states[i];
3329         }
3330       bitset_merge (acceptable, dests_ch[i]);
3331     }
3332 
3333   if (!__glibc_unlikely (need_word_trtable))
3334     {
3335       /* We don't care about whether the following character is a word
3336          character, or we are in a single-byte character set so we can
3337          discern by looking at the character code: allocate a
3338          256-entry transition table.  */
3339       trtable = state->trtable =
3340         (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
3341       if (__glibc_unlikely (trtable == NULL))
3342         goto out_free;
3343 
3344       /* For all characters ch...:  */
3345       for (i = 0; i < BITSET_WORDS; ++i)
3346         for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3347              elem;
3348              mask <<= 1, elem >>= 1, ++ch)
3349           if (__glibc_unlikely (elem & 1))
3350             {
3351               /* There must be exactly one destination which accepts
3352                  character ch.  See group_nodes_into_DFAstates.  */
3353               for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3354                 ;
3355 
3356               /* j-th destination accepts the word character ch.  */
3357               if (dfa->word_char[i] & mask)
3358                 trtable[ch] = dest_states_word[j];
3359               else
3360                 trtable[ch] = dest_states[j];
3361             }
3362     }
3363   else
3364     {
3365       /* We care about whether the following character is a word
3366          character, and we are in a multi-byte character set: discern
3367          by looking at the character code: build two 256-entry
3368          transition tables, one starting at trtable[0] and one
3369          starting at trtable[SBC_MAX].  */
3370       trtable = state->word_trtable =
3371         (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX);
3372       if (__glibc_unlikely (trtable == NULL))
3373         goto out_free;
3374 
3375       /* For all characters ch...:  */
3376       for (i = 0; i < BITSET_WORDS; ++i)
3377         for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3378              elem;
3379              mask <<= 1, elem >>= 1, ++ch)
3380           if (__glibc_unlikely (elem & 1))
3381             {
3382               /* There must be exactly one destination which accepts
3383                  character ch.  See group_nodes_into_DFAstates.  */
3384               for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3385                 ;
3386 
3387               /* j-th destination accepts the word character ch.  */
3388               trtable[ch] = dest_states[j];
3389               trtable[ch + SBC_MAX] = dest_states_word[j];
3390             }
3391     }
3392 
3393   /* new line */
3394   if (bitset_contain (acceptable, NEWLINE_CHAR))
3395     {
3396       /* The current state accepts newline character.  */
3397       for (j = 0; j < ndests; ++j)
3398         if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
3399           {
3400             /* k-th destination accepts newline character.  */
3401             trtable[NEWLINE_CHAR] = dest_states_nl[j];
3402             if (need_word_trtable)
3403               trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
3404             /* There must be only one destination which accepts
3405                newline.  See group_nodes_into_DFAstates.  */
3406             break;
3407           }
3408     }
3409 
3410   re_node_set_free (&follows);
3411   for (i = 0; i < ndests; ++i)
3412     re_node_set_free (dests_node + i);
3413   return true;
3414 }
3415 
3416 /* Group all nodes belonging to STATE into several destinations.
3417    Then for all destinations, set the nodes belonging to the destination
3418    to DESTS_NODE[i] and set the characters accepted by the destination
3419    to DEST_CH[i].  Return the number of destinations if successful,
3420    -1 on internal error.  */
3421 
3422 static Idx
3423 group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
     /* [previous][next][first][last][top][bottom][index][help] */
3424                             re_node_set *dests_node, bitset_t *dests_ch)
3425 {
3426   reg_errcode_t err;
3427   bool ok;
3428   Idx i, j, k;
3429   Idx ndests; /* Number of the destinations from 'state'.  */
3430   bitset_t accepts; /* Characters a node can accept.  */
3431   const re_node_set *cur_nodes = &state->nodes;
3432   bitset_empty (accepts);
3433   ndests = 0;
3434 
3435   /* For all the nodes belonging to 'state',  */
3436   for (i = 0; i < cur_nodes->nelem; ++i)
3437     {
3438       re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
3439       re_token_type_t type = node->type;
3440       unsigned int constraint = node->constraint;
3441 
3442       /* Enumerate all single byte character this node can accept.  */
3443       if (type == CHARACTER)
3444         bitset_set (accepts, node->opr.c);
3445       else if (type == SIMPLE_BRACKET)
3446         {
3447           bitset_merge (accepts, node->opr.sbcset);
3448         }
3449       else if (type == OP_PERIOD)
3450         {
3451           if (dfa->mb_cur_max > 1)
3452             bitset_merge (accepts, dfa->sb_char);
3453           else
3454             bitset_set_all (accepts);
3455           if (!(dfa->syntax & RE_DOT_NEWLINE))
3456             bitset_clear (accepts, '\n');
3457           if (dfa->syntax & RE_DOT_NOT_NULL)
3458             bitset_clear (accepts, '\0');
3459         }
3460       else if (type == OP_UTF8_PERIOD)
3461         {
3462           if (ASCII_CHARS % BITSET_WORD_BITS == 0)
3463             memset (accepts, -1, ASCII_CHARS / CHAR_BIT);
3464           else
3465             bitset_merge (accepts, utf8_sb_map);
3466           if (!(dfa->syntax & RE_DOT_NEWLINE))
3467             bitset_clear (accepts, '\n');
3468           if (dfa->syntax & RE_DOT_NOT_NULL)
3469             bitset_clear (accepts, '\0');
3470         }
3471       else
3472         continue;
3473 
3474       /* Check the 'accepts' and sift the characters which are not
3475          match it the context.  */
3476       if (constraint)
3477         {
3478           if (constraint & NEXT_NEWLINE_CONSTRAINT)
3479             {
3480               bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
3481               bitset_empty (accepts);
3482               if (accepts_newline)
3483                 bitset_set (accepts, NEWLINE_CHAR);
3484               else
3485                 continue;
3486             }
3487           if (constraint & NEXT_ENDBUF_CONSTRAINT)
3488             {
3489               bitset_empty (accepts);
3490               continue;
3491             }
3492 
3493           if (constraint & NEXT_WORD_CONSTRAINT)
3494             {
3495               bitset_word_t any_set = 0;
3496               if (type == CHARACTER && !node->word_char)
3497                 {
3498                   bitset_empty (accepts);
3499                   continue;
3500                 }
3501               if (dfa->mb_cur_max > 1)
3502                 for (j = 0; j < BITSET_WORDS; ++j)
3503                   any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));
3504               else
3505                 for (j = 0; j < BITSET_WORDS; ++j)
3506                   any_set |= (accepts[j] &= dfa->word_char[j]);
3507               if (!any_set)
3508                 continue;
3509             }
3510           if (constraint & NEXT_NOTWORD_CONSTRAINT)
3511             {
3512               bitset_word_t any_set = 0;
3513               if (type == CHARACTER && node->word_char)
3514                 {
3515                   bitset_empty (accepts);
3516                   continue;
3517                 }
3518               if (dfa->mb_cur_max > 1)
3519                 for (j = 0; j < BITSET_WORDS; ++j)
3520                   any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
3521               else
3522                 for (j = 0; j < BITSET_WORDS; ++j)
3523                   any_set |= (accepts[j] &= ~dfa->word_char[j]);
3524               if (!any_set)
3525                 continue;
3526             }
3527         }
3528 
3529       /* Then divide 'accepts' into DFA states, or create a new
3530          state.  Above, we make sure that accepts is not empty.  */
3531       for (j = 0; j < ndests; ++j)
3532         {
3533           bitset_t intersec; /* Intersection sets, see below.  */
3534           bitset_t remains;
3535           /* Flags, see below.  */
3536           bitset_word_t has_intersec, not_subset, not_consumed;
3537 
3538           /* Optimization, skip if this state doesn't accept the character.  */
3539           if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
3540             continue;
3541 
3542           /* Enumerate the intersection set of this state and 'accepts'.  */
3543           has_intersec = 0;
3544           for (k = 0; k < BITSET_WORDS; ++k)
3545             has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
3546           /* And skip if the intersection set is empty.  */
3547           if (!has_intersec)
3548             continue;
3549 
3550           /* Then check if this state is a subset of 'accepts'.  */
3551           not_subset = not_consumed = 0;
3552           for (k = 0; k < BITSET_WORDS; ++k)
3553             {
3554               not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
3555               not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
3556             }
3557 
3558           /* If this state isn't a subset of 'accepts', create a
3559              new group state, which has the 'remains'. */
3560           if (not_subset)
3561             {
3562               bitset_copy (dests_ch[ndests], remains);
3563               bitset_copy (dests_ch[j], intersec);
3564               err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
3565               if (__glibc_unlikely (err != REG_NOERROR))
3566                 goto error_return;
3567               ++ndests;
3568             }
3569 
3570           /* Put the position in the current group. */
3571           ok = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
3572           if (__glibc_unlikely (! ok))
3573             goto error_return;
3574 
3575           /* If all characters are consumed, go to next node. */
3576           if (!not_consumed)
3577             break;
3578         }
3579       /* Some characters remain, create a new group. */
3580       if (j == ndests)
3581         {
3582           bitset_copy (dests_ch[ndests], accepts);
3583           err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
3584           if (__glibc_unlikely (err != REG_NOERROR))
3585             goto error_return;
3586           ++ndests;
3587           bitset_empty (accepts);
3588         }
3589     }
3590   assume (ndests <= SBC_MAX);
3591   return ndests;
3592  error_return:
3593   for (j = 0; j < ndests; ++j)
3594     re_node_set_free (dests_node + j);
3595   return -1;
3596 }
3597 
3598 /* Check how many bytes the node 'dfa->nodes[node_idx]' accepts.
3599    Return the number of the bytes the node accepts.
3600    STR_IDX is the current index of the input string.
3601 
3602    This function handles the nodes which can accept one character, or
3603    one collating element like '.', '[a-z]', opposite to the other nodes
3604    can only accept one byte.  */
3605 
3606 #ifdef _LIBC
3607 # include <locale/weight.h>
3608 #endif
3609 
3610 static int
3611 check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
     /* [previous][next][first][last][top][bottom][index][help] */
3612                          const re_string_t *input, Idx str_idx)
3613 {
3614   const re_token_t *node = dfa->nodes + node_idx;
3615   int char_len, elem_len;
3616   Idx i;
3617 
3618   if (__glibc_unlikely (node->type == OP_UTF8_PERIOD))
3619     {
3620       unsigned char c = re_string_byte_at (input, str_idx), d;
3621       if (__glibc_likely (c < 0xc2))
3622         return 0;
3623 
3624       if (str_idx + 2 > input->len)
3625         return 0;
3626 
3627       d = re_string_byte_at (input, str_idx + 1);
3628       if (c < 0xe0)
3629         return (d < 0x80 || d > 0xbf) ? 0 : 2;
3630       else if (c < 0xf0)
3631         {
3632           char_len = 3;
3633           if (c == 0xe0 && d < 0xa0)
3634             return 0;
3635         }
3636       else if (c < 0xf8)
3637         {
3638           char_len = 4;
3639           if (c == 0xf0 && d < 0x90)
3640             return 0;
3641         }
3642       else if (c < 0xfc)
3643         {
3644           char_len = 5;
3645           if (c == 0xf8 && d < 0x88)
3646             return 0;
3647         }
3648       else if (c < 0xfe)
3649         {
3650           char_len = 6;
3651           if (c == 0xfc && d < 0x84)
3652             return 0;
3653         }
3654       else
3655         return 0;
3656 
3657       if (str_idx + char_len > input->len)
3658         return 0;
3659 
3660       for (i = 1; i < char_len; ++i)
3661         {
3662           d = re_string_byte_at (input, str_idx + i);
3663           if (d < 0x80 || d > 0xbf)
3664             return 0;
3665         }
3666       return char_len;
3667     }
3668 
3669   char_len = re_string_char_size_at (input, str_idx);
3670   if (node->type == OP_PERIOD)
3671     {
3672       if (char_len <= 1)
3673         return 0;
3674       /* FIXME: I don't think this if is needed, as both '\n'
3675          and '\0' are char_len == 1.  */
3676       /* '.' accepts any one character except the following two cases.  */
3677       if ((!(dfa->syntax & RE_DOT_NEWLINE)
3678            && re_string_byte_at (input, str_idx) == '\n')
3679           || ((dfa->syntax & RE_DOT_NOT_NULL)
3680               && re_string_byte_at (input, str_idx) == '\0'))
3681         return 0;
3682       return char_len;
3683     }
3684 
3685   elem_len = re_string_elem_size_at (input, str_idx);
3686   if ((elem_len <= 1 && char_len <= 1) || char_len == 0)
3687     return 0;
3688 
3689   if (node->type == COMPLEX_BRACKET)
3690     {
3691       const re_charset_t *cset = node->opr.mbcset;
3692 #ifdef _LIBC
3693       const unsigned char *pin
3694         = ((const unsigned char *) re_string_get_buffer (input) + str_idx);
3695       Idx j;
3696       uint32_t nrules;
3697 #endif
3698       int match_len = 0;
3699       wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
3700                     ? re_string_wchar_at (input, str_idx) : 0);
3701 
3702       /* match with multibyte character?  */
3703       for (i = 0; i < cset->nmbchars; ++i)
3704         if (wc == cset->mbchars[i])
3705           {
3706             match_len = char_len;
3707             goto check_node_accept_bytes_match;
3708           }
3709       /* match with character_class?  */
3710       for (i = 0; i < cset->nchar_classes; ++i)
3711         {
3712           wctype_t wt = cset->char_classes[i];
3713           if (__iswctype (wc, wt))
3714             {
3715               match_len = char_len;
3716               goto check_node_accept_bytes_match;
3717             }
3718         }
3719 
3720 #ifdef _LIBC
3721       nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3722       if (nrules != 0)
3723         {
3724           unsigned int in_collseq = 0;
3725           const int32_t *table, *indirect;
3726           const unsigned char *weights, *extra;
3727           const char *collseqwc;
3728 
3729           /* match with collating_symbol?  */
3730           if (cset->ncoll_syms)
3731             extra = (const unsigned char *)
3732               _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3733           for (i = 0; i < cset->ncoll_syms; ++i)
3734             {
3735               const unsigned char *coll_sym = extra + cset->coll_syms[i];
3736               /* Compare the length of input collating element and
3737                  the length of current collating element.  */
3738               if (*coll_sym != elem_len)
3739                 continue;
3740               /* Compare each bytes.  */
3741               for (j = 0; j < *coll_sym; j++)
3742                 if (pin[j] != coll_sym[1 + j])
3743                   break;
3744               if (j == *coll_sym)
3745                 {
3746                   /* Match if every bytes is equal.  */
3747                   match_len = j;
3748                   goto check_node_accept_bytes_match;
3749                 }
3750             }
3751 
3752           if (cset->nranges)
3753             {
3754               if (elem_len <= char_len)
3755                 {
3756                   collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
3757                   in_collseq = __collseq_table_lookup (collseqwc, wc);
3758                 }
3759               else
3760                 in_collseq = find_collation_sequence_value (pin, elem_len);
3761             }
3762           /* match with range expression?  */
3763           /* FIXME: Implement rational ranges here, too.  */
3764           for (i = 0; i < cset->nranges; ++i)
3765             if (cset->range_starts[i] <= in_collseq
3766                 && in_collseq <= cset->range_ends[i])
3767               {
3768                 match_len = elem_len;
3769                 goto check_node_accept_bytes_match;
3770               }
3771 
3772           /* match with equivalence_class?  */
3773           if (cset->nequiv_classes)
3774             {
3775               const unsigned char *cp = pin;
3776               table = (const int32_t *)
3777                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3778               weights = (const unsigned char *)
3779                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
3780               extra = (const unsigned char *)
3781                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
3782               indirect = (const int32_t *)
3783                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
3784               int32_t idx = findidx (table, indirect, extra, &cp, elem_len);
3785               int32_t rule = idx >> 24;
3786               idx &= 0xffffff;
3787               if (idx > 0)
3788                 {
3789                   size_t weight_len = weights[idx];
3790                   for (i = 0; i < cset->nequiv_classes; ++i)
3791                     {
3792                       int32_t equiv_class_idx = cset->equiv_classes[i];
3793                       int32_t equiv_class_rule = equiv_class_idx >> 24;
3794                       equiv_class_idx &= 0xffffff;
3795                       if (weights[equiv_class_idx] == weight_len
3796                           && equiv_class_rule == rule
3797                           && memcmp (weights + idx + 1,
3798                                      weights + equiv_class_idx + 1,
3799                                      weight_len) == 0)
3800                         {
3801                           match_len = elem_len;
3802                           goto check_node_accept_bytes_match;
3803                         }
3804                     }
3805                 }
3806             }
3807         }
3808       else
3809 #endif /* _LIBC */
3810         {
3811           /* match with range expression?  */
3812           for (i = 0; i < cset->nranges; ++i)
3813             {
3814               if (cset->range_starts[i] <= wc && wc <= cset->range_ends[i])
3815                 {
3816                   match_len = char_len;
3817                   goto check_node_accept_bytes_match;
3818                 }
3819             }
3820         }
3821     check_node_accept_bytes_match:
3822       if (!cset->non_match)
3823         return match_len;
3824       else
3825         {
3826           if (match_len > 0)
3827             return 0;
3828           else
3829             return (elem_len > char_len) ? elem_len : char_len;
3830         }
3831     }
3832   return 0;
3833 }
3834 
3835 #ifdef _LIBC
3836 static unsigned int
3837 find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
     /* [previous][next][first][last][top][bottom][index][help] */
3838 {
3839   uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3840   if (nrules == 0)
3841     {
3842       if (mbs_len == 1)
3843         {
3844           /* No valid character.  Match it as a single byte character.  */
3845           const unsigned char *collseq = (const unsigned char *)
3846             _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
3847           return collseq[mbs[0]];
3848         }
3849       return UINT_MAX;
3850     }
3851   else
3852     {
3853       int32_t idx;
3854       const unsigned char *extra = (const unsigned char *)
3855         _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3856       int32_t extrasize = (const unsigned char *)
3857         _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra;
3858 
3859       for (idx = 0; idx < extrasize;)
3860         {
3861           int mbs_cnt;
3862           bool found = false;
3863           int32_t elem_mbs_len;
3864           /* Skip the name of collating element name.  */
3865           idx = idx + extra[idx] + 1;
3866           elem_mbs_len = extra[idx++];
3867           if (mbs_len == elem_mbs_len)
3868             {
3869               for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
3870                 if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
3871                   break;
3872               if (mbs_cnt == elem_mbs_len)
3873                 /* Found the entry.  */
3874                 found = true;
3875             }
3876           /* Skip the byte sequence of the collating element.  */
3877           idx += elem_mbs_len;
3878           /* Adjust for the alignment.  */
3879           idx = (idx + 3) & ~3;
3880           /* Skip the collation sequence value.  */
3881           idx += sizeof (uint32_t);
3882           /* Skip the wide char sequence of the collating element.  */
3883           idx = idx + sizeof (uint32_t) * (*(int32_t *) (extra + idx) + 1);
3884           /* If we found the entry, return the sequence value.  */
3885           if (found)
3886             return *(uint32_t *) (extra + idx);
3887           /* Skip the collation sequence value.  */
3888           idx += sizeof (uint32_t);
3889         }
3890       return UINT_MAX;
3891     }
3892 }
3893 #endif /* _LIBC */
3894 
3895 /* Check whether the node accepts the byte which is IDX-th
3896    byte of the INPUT.  */
3897 
3898 static bool
3899 check_node_accept (const re_match_context_t *mctx, const re_token_t *node,
     /* [previous][next][first][last][top][bottom][index][help] */
3900                    Idx idx)
3901 {
3902   unsigned char ch;
3903   ch = re_string_byte_at (&mctx->input, idx);
3904   switch (node->type)
3905     {
3906     case CHARACTER:
3907       if (node->opr.c != ch)
3908         return false;
3909       break;
3910 
3911     case SIMPLE_BRACKET:
3912       if (!bitset_contain (node->opr.sbcset, ch))
3913         return false;
3914       break;
3915 
3916     case OP_UTF8_PERIOD:
3917       if (ch >= ASCII_CHARS)
3918         return false;
3919       FALLTHROUGH;
3920     case OP_PERIOD:
3921       if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE))
3922           || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL)))
3923         return false;
3924       break;
3925 
3926     default:
3927       return false;
3928     }
3929 
3930   if (node->constraint)
3931     {
3932       /* The node has constraints.  Check whether the current context
3933          satisfies the constraints.  */
3934       unsigned int context = re_string_context_at (&mctx->input, idx,
3935                                                    mctx->eflags);
3936       if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
3937         return false;
3938     }
3939 
3940   return true;
3941 }
3942 
3943 /* Extend the buffers, if the buffers have run out.  */
3944 
3945 static reg_errcode_t
3946 __attribute_warn_unused_result__
3947 extend_buffers (re_match_context_t *mctx, int min_len)
     /* [previous][next][first][last][top][bottom][index][help] */
3948 {
3949   reg_errcode_t ret;
3950   re_string_t *pstr = &mctx->input;
3951 
3952   /* Avoid overflow.  */
3953   if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) / 2
3954                         <= pstr->bufs_len))
3955     return REG_ESPACE;
3956 
3957   /* Double the lengths of the buffers, but allocate at least MIN_LEN.  */
3958   ret = re_string_realloc_buffers (pstr,
3959                                    MAX (min_len,
3960                                         MIN (pstr->len, pstr->bufs_len * 2)));
3961   if (__glibc_unlikely (ret != REG_NOERROR))
3962     return ret;
3963 
3964   if (mctx->state_log != NULL)
3965     {
3966       /* And double the length of state_log.  */
3967       /* XXX We have no indication of the size of this buffer.  If this
3968          allocation fail we have no indication that the state_log array
3969          does not have the right size.  */
3970       re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *,
3971                                               pstr->bufs_len + 1);
3972       if (__glibc_unlikely (new_array == NULL))
3973         return REG_ESPACE;
3974       mctx->state_log = new_array;
3975     }
3976 
3977   /* Then reconstruct the buffers.  */
3978   if (pstr->icase)
3979     {
3980       if (pstr->mb_cur_max > 1)
3981         {
3982           ret = build_wcs_upper_buffer (pstr);
3983           if (__glibc_unlikely (ret != REG_NOERROR))
3984             return ret;
3985         }
3986       else
3987         build_upper_buffer (pstr);
3988     }
3989   else
3990     {
3991       if (pstr->mb_cur_max > 1)
3992         build_wcs_buffer (pstr);
3993       else
3994         {
3995           if (pstr->trans != NULL)
3996             re_string_translate_buffer (pstr);
3997         }
3998     }
3999   return REG_NOERROR;
4000 }
4001 
4002 
4003 /* Functions for matching context.  */
4004 
4005 /* Initialize MCTX.  */
4006 
4007 static reg_errcode_t
4008 __attribute_warn_unused_result__
4009 match_ctx_init (re_match_context_t *mctx, int eflags, Idx n)
     /* [previous][next][first][last][top][bottom][index][help] */
4010 {
4011   mctx->eflags = eflags;
4012   mctx->match_last = -1;
4013   if (n > 0)
4014     {
4015       /* Avoid overflow.  */
4016       size_t max_object_size =
4017         MAX (sizeof (struct re_backref_cache_entry),
4018              sizeof (re_sub_match_top_t *));
4019       if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / max_object_size) < n))
4020         return REG_ESPACE;
4021 
4022       mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
4023       mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
4024       if (__glibc_unlikely (mctx->bkref_ents == NULL || mctx->sub_tops == NULL))
4025         return REG_ESPACE;
4026     }
4027   /* Already zero-ed by the caller.
4028      else
4029        mctx->bkref_ents = NULL;
4030      mctx->nbkref_ents = 0;
4031      mctx->nsub_tops = 0;  */
4032   mctx->abkref_ents = n;
4033   mctx->max_mb_elem_len = 1;
4034   mctx->asub_tops = n;
4035   return REG_NOERROR;
4036 }
4037 
4038 /* Clean the entries which depend on the current input in MCTX.
4039    This function must be invoked when the matcher changes the start index
4040    of the input, or changes the input string.  */
4041 
4042 static void
4043 match_ctx_clean (re_match_context_t *mctx)
     /* [previous][next][first][last][top][bottom][index][help] */
4044 {
4045   Idx st_idx;
4046   for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
4047     {
4048       Idx sl_idx;
4049       re_sub_match_top_t *top = mctx->sub_tops[st_idx];
4050       for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
4051         {
4052           re_sub_match_last_t *last = top->lasts[sl_idx];
4053           re_free (last->path.array);
4054           re_free (last);
4055         }
4056       re_free (top->lasts);
4057       if (top->path)
4058         {
4059           re_free (top->path->array);
4060           re_free (top->path);
4061         }
4062       re_free (top);
4063     }
4064 
4065   mctx->nsub_tops = 0;
4066   mctx->nbkref_ents = 0;
4067 }
4068 
4069 /* Free all the memory associated with MCTX.  */
4070 
4071 static void
4072 match_ctx_free (re_match_context_t *mctx)
     /* [previous][next][first][last][top][bottom][index][help] */
4073 {
4074   /* First, free all the memory associated with MCTX->SUB_TOPS.  */
4075   match_ctx_clean (mctx);
4076   re_free (mctx->sub_tops);
4077   re_free (mctx->bkref_ents);
4078 }
4079 
4080 /* Add a new backreference entry to MCTX.
4081    Note that we assume that caller never call this function with duplicate
4082    entry, and call with STR_IDX which isn't smaller than any existing entry.
4083 */
4084 
4085 static reg_errcode_t
4086 __attribute_warn_unused_result__
4087 match_ctx_add_entry (re_match_context_t *mctx, Idx node, Idx str_idx, Idx from,
     /* [previous][next][first][last][top][bottom][index][help] */
4088                      Idx to)
4089 {
4090   if (mctx->nbkref_ents >= mctx->abkref_ents)
4091     {
4092       struct re_backref_cache_entry* new_entry;
4093       new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
4094                               mctx->abkref_ents * 2);
4095       if (__glibc_unlikely (new_entry == NULL))
4096         {
4097           re_free (mctx->bkref_ents);
4098           return REG_ESPACE;
4099         }
4100       mctx->bkref_ents = new_entry;
4101       memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
4102               sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
4103       mctx->abkref_ents *= 2;
4104     }
4105   if (mctx->nbkref_ents > 0
4106       && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx)
4107     mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1;
4108 
4109   mctx->bkref_ents[mctx->nbkref_ents].node = node;
4110   mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
4111   mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
4112   mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
4113 
4114   /* This is a cache that saves negative results of check_dst_limits_calc_pos.
4115      If bit N is clear, means that this entry won't epsilon-transition to
4116      an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression.  If
4117      it is set, check_dst_limits_calc_pos_1 will recurse and try to find one
4118      such node.
4119 
4120      A backreference does not epsilon-transition unless it is empty, so set
4121      to all zeros if FROM != TO.  */
4122   mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
4123     = (from == to ? -1 : 0);
4124 
4125   mctx->bkref_ents[mctx->nbkref_ents++].more = 0;
4126   if (mctx->max_mb_elem_len < to - from)
4127     mctx->max_mb_elem_len = to - from;
4128   return REG_NOERROR;
4129 }
4130 
4131 /* Return the first entry with the same str_idx, or -1 if none is
4132    found.  Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX.  */
4133 
4134 static Idx
4135 search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx)
     /* [previous][next][first][last][top][bottom][index][help] */
4136 {
4137   Idx left, right, mid, last;
4138   last = right = mctx->nbkref_ents;
4139   for (left = 0; left < right;)
4140     {
4141       mid = (left + right) / 2;
4142       if (mctx->bkref_ents[mid].str_idx < str_idx)
4143         left = mid + 1;
4144       else
4145         right = mid;
4146     }
4147   if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
4148     return left;
4149   else
4150     return -1;
4151 }
4152 
4153 /* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
4154    at STR_IDX.  */
4155 
4156 static reg_errcode_t
4157 __attribute_warn_unused_result__
4158 match_ctx_add_subtop (re_match_context_t *mctx, Idx node, Idx str_idx)
     /* [previous][next][first][last][top][bottom][index][help] */
4159 {
4160   DEBUG_ASSERT (mctx->sub_tops != NULL);
4161   DEBUG_ASSERT (mctx->asub_tops > 0);
4162   if (__glibc_unlikely (mctx->nsub_tops == mctx->asub_tops))
4163     {
4164       Idx new_asub_tops = mctx->asub_tops * 2;
4165       re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,
4166                                                    re_sub_match_top_t *,
4167                                                    new_asub_tops);
4168       if (__glibc_unlikely (new_array == NULL))
4169         return REG_ESPACE;
4170       mctx->sub_tops = new_array;
4171       mctx->asub_tops = new_asub_tops;
4172     }
4173   mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));
4174   if (__glibc_unlikely (mctx->sub_tops[mctx->nsub_tops] == NULL))
4175     return REG_ESPACE;
4176   mctx->sub_tops[mctx->nsub_tops]->node = node;
4177   mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
4178   return REG_NOERROR;
4179 }
4180 
4181 /* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
4182    at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP.
4183    Return the new entry if successful, NULL if memory is exhausted.  */
4184 
4185 static re_sub_match_last_t *
4186 match_ctx_add_sublast (re_sub_match_top_t *subtop, Idx node, Idx str_idx)
     /* [previous][next][first][last][top][bottom][index][help] */
4187 {
4188   re_sub_match_last_t *new_entry;
4189   if (__glibc_unlikely (subtop->nlasts == subtop->alasts))
4190     {
4191       Idx new_alasts = 2 * subtop->alasts + 1;
4192       re_sub_match_last_t **new_array = re_realloc (subtop->lasts,
4193                                                     re_sub_match_last_t *,
4194                                                     new_alasts);
4195       if (__glibc_unlikely (new_array == NULL))
4196         return NULL;
4197       subtop->lasts = new_array;
4198       subtop->alasts = new_alasts;
4199     }
4200   new_entry = calloc (1, sizeof (re_sub_match_last_t));
4201   if (__glibc_likely (new_entry != NULL))
4202     {
4203       subtop->lasts[subtop->nlasts] = new_entry;
4204       new_entry->node = node;
4205       new_entry->str_idx = str_idx;
4206       ++subtop->nlasts;
4207     }
4208   return new_entry;
4209 }
4210 
4211 static void
4212 sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
     /* [previous][next][first][last][top][bottom][index][help] */
4213                re_dfastate_t **limited_sts, Idx last_node, Idx last_str_idx)
4214 {
4215   sctx->sifted_states = sifted_sts;
4216   sctx->limited_states = limited_sts;
4217   sctx->last_node = last_node;
4218   sctx->last_str_idx = last_str_idx;
4219   re_node_set_init_empty (&sctx->limits);
4220 }

/* [previous][next][first][last][top][bottom][index][help] */