This source file includes following definitions.
- regexec
- libc_hidden_def
- re_match
- weak_alias
- weak_alias
- weak_alias
- weak_alias
- re_search_stub
- re_copy_regs
- re_set_registers
- weak_alias
- re_search_internal
- prune_impossible_nodes
- acquire_init_state_context
- check_matching
- check_halt_node_context
- check_halt_state_context
- proceed_next_node
- push_fail_stack
- pop_fail_stack
- set_regs
- free_fail_stack_return
- update_regs
- sift_states_backward
- build_sifted_states
- clean_state_log_if_needed
- merge_state_array
- update_cur_sifted_state
- add_epsilon_src_nodes
- sub_epsilon_src_nodes
- check_dst_limits
- check_dst_limits_calc_pos_1
- check_dst_limits_calc_pos
- check_subexp_limits
- sift_states_bkref
- sift_states_iter_mb
- transit_state
- merge_state_with_log
- find_recover_state
- check_subexp_matching_top
- transit_state_sb
- transit_state_mb
- transit_state_bkref
- get_subexp
- get_subexp_sub
- find_subexp_node
- check_arrival
- check_arrival_add_next_nodes
- check_arrival_expand_ecl
- check_arrival_expand_ecl_sub
- expand_bkref_cache
- build_trtable
- group_nodes_into_DFAstates
- check_node_accept_bytes
- find_collation_sequence_value
- check_node_accept
- extend_buffers
- match_ctx_init
- match_ctx_clean
- match_ctx_free
- match_ctx_add_entry
- search_cur_bkref_entry
- match_ctx_add_subtop
- match_ctx_add_sublast
- sift_ctx_init
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
21 Idx n);
22 static void match_ctx_clean (re_match_context_t *mctx);
23 static void match_ctx_free (re_match_context_t *cache);
24 static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, Idx node,
25 Idx str_idx, Idx from, Idx to);
26 static Idx search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx);
27 static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, Idx node,
28 Idx str_idx);
29 static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
30 Idx node, Idx str_idx);
31 static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
32 re_dfastate_t **limited_sts, Idx last_node,
33 Idx last_str_idx);
34 static reg_errcode_t re_search_internal (const regex_t *preg,
35 const char *string, Idx length,
36 Idx start, Idx last_start, Idx stop,
37 size_t nmatch, regmatch_t pmatch[],
38 int eflags);
39 static regoff_t re_search_2_stub (struct re_pattern_buffer *bufp,
40 const char *string1, Idx length1,
41 const char *string2, Idx length2,
42 Idx start, regoff_t range,
43 struct re_registers *regs,
44 Idx stop, bool ret_len);
45 static regoff_t re_search_stub (struct re_pattern_buffer *bufp,
46 const char *string, Idx length, Idx start,
47 regoff_t range, Idx stop,
48 struct re_registers *regs,
49 bool ret_len);
50 static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
51 Idx nregs, int regs_allocated);
52 static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx);
53 static Idx check_matching (re_match_context_t *mctx, bool fl_longest_match,
54 Idx *p_match_first);
55 static Idx check_halt_state_context (const re_match_context_t *mctx,
56 const re_dfastate_t *state, Idx idx);
57 static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
58 regmatch_t *prev_idx_match, Idx cur_node,
59 Idx cur_idx, Idx nmatch);
60 static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
61 Idx str_idx, Idx dest_node, Idx nregs,
62 regmatch_t *regs, regmatch_t *prevregs,
63 re_node_set *eps_via_nodes);
64 static reg_errcode_t set_regs (const regex_t *preg,
65 const re_match_context_t *mctx,
66 size_t nmatch, regmatch_t *pmatch,
67 bool fl_backtrack);
68 static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs);
69
70 static int sift_states_iter_mb (const re_match_context_t *mctx,
71 re_sift_context_t *sctx,
72 Idx node_idx, Idx str_idx, Idx max_str_idx);
73 static reg_errcode_t sift_states_backward (const re_match_context_t *mctx,
74 re_sift_context_t *sctx);
75 static reg_errcode_t build_sifted_states (const re_match_context_t *mctx,
76 re_sift_context_t *sctx, Idx str_idx,
77 re_node_set *cur_dest);
78 static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx,
79 re_sift_context_t *sctx,
80 Idx str_idx,
81 re_node_set *dest_nodes);
82 static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa,
83 re_node_set *dest_nodes,
84 const re_node_set *candidates);
85 static bool check_dst_limits (const re_match_context_t *mctx,
86 const re_node_set *limits,
87 Idx dst_node, Idx dst_idx, Idx src_node,
88 Idx src_idx);
89 static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx,
90 int boundaries, Idx subexp_idx,
91 Idx from_node, Idx bkref_idx);
92 static int check_dst_limits_calc_pos (const re_match_context_t *mctx,
93 Idx limit, Idx subexp_idx,
94 Idx node, Idx str_idx,
95 Idx bkref_idx);
96 static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa,
97 re_node_set *dest_nodes,
98 const re_node_set *candidates,
99 re_node_set *limits,
100 struct re_backref_cache_entry *bkref_ents,
101 Idx str_idx);
102 static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx,
103 re_sift_context_t *sctx,
104 Idx str_idx, const re_node_set *candidates);
105 static reg_errcode_t merge_state_array (const re_dfa_t *dfa,
106 re_dfastate_t **dst,
107 re_dfastate_t **src, Idx num);
108 static re_dfastate_t *find_recover_state (reg_errcode_t *err,
109 re_match_context_t *mctx);
110 static re_dfastate_t *transit_state (reg_errcode_t *err,
111 re_match_context_t *mctx,
112 re_dfastate_t *state);
113 static re_dfastate_t *merge_state_with_log (reg_errcode_t *err,
114 re_match_context_t *mctx,
115 re_dfastate_t *next_state);
116 static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
117 re_node_set *cur_nodes,
118 Idx str_idx);
119 #if 0
120 static re_dfastate_t *transit_state_sb (reg_errcode_t *err,
121 re_match_context_t *mctx,
122 re_dfastate_t *pstate);
123 #endif
124 static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
125 re_dfastate_t *pstate);
126 static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
127 const re_node_set *nodes);
128 static reg_errcode_t get_subexp (re_match_context_t *mctx,
129 Idx bkref_node, Idx bkref_str_idx);
130 static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
131 const re_sub_match_top_t *sub_top,
132 re_sub_match_last_t *sub_last,
133 Idx bkref_node, Idx bkref_str);
134 static Idx find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
135 Idx subexp_idx, int type);
136 static reg_errcode_t check_arrival (re_match_context_t *mctx,
137 state_array_t *path, Idx top_node,
138 Idx top_str, Idx last_node, Idx last_str,
139 int type);
140 static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
141 Idx str_idx,
142 re_node_set *cur_nodes,
143 re_node_set *next_nodes);
144 static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa,
145 re_node_set *cur_nodes,
146 Idx ex_subexp, int type);
147 static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa,
148 re_node_set *dst_nodes,
149 Idx target, Idx ex_subexp,
150 int type);
151 static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
152 re_node_set *cur_nodes, Idx cur_str,
153 Idx subexp_num, int type);
154 static bool build_trtable (const re_dfa_t *dfa, re_dfastate_t *state);
155 static int check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
156 const re_string_t *input, Idx idx);
157 #ifdef _LIBC
158 static unsigned int find_collation_sequence_value (const unsigned char *mbs,
159 size_t name_len);
160 #endif
161 static Idx group_nodes_into_DFAstates (const re_dfa_t *dfa,
162 const re_dfastate_t *state,
163 re_node_set *states_node,
164 bitset_t *states_ch);
165 static bool check_node_accept (const re_match_context_t *mctx,
166 const re_token_t *node, Idx idx);
167 static reg_errcode_t extend_buffers (re_match_context_t *mctx, int min_len);
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186 int
187 regexec (const regex_t *__restrict preg, const char *__restrict string,
188 size_t nmatch, regmatch_t pmatch[_REGEX_NELTS (nmatch)], int eflags)
189 {
190 reg_errcode_t err;
191 Idx start, length;
192 re_dfa_t *dfa = preg->buffer;
193
194 if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))
195 return REG_BADPAT;
196
197 if (eflags & REG_STARTEND)
198 {
199 start = pmatch[0].rm_so;
200 length = pmatch[0].rm_eo;
201 }
202 else
203 {
204 start = 0;
205 length = strlen (string);
206 }
207
208 lock_lock (dfa->lock);
209 if (preg->no_sub)
210 err = re_search_internal (preg, string, length, start, length,
211 length, 0, NULL, eflags);
212 else
213 err = re_search_internal (preg, string, length, start, length,
214 length, nmatch, pmatch, eflags);
215 lock_unlock (dfa->lock);
216 return err != REG_NOERROR;
217 }
218
219 #ifdef _LIBC
220 libc_hidden_def (__regexec)
221
222 # include <shlib-compat.h>
223 versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
224
225 # if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
226 __typeof__ (__regexec) __compat_regexec;
227
228 int
229 attribute_compat_text_section
230 __compat_regexec (const regex_t *__restrict preg,
231 const char *__restrict string, size_t nmatch,
232 regmatch_t pmatch[_REGEX_NELTS (nmatch)], int eflags)
233 {
234 return regexec (preg, string, nmatch, pmatch,
235 eflags & (REG_NOTBOL | REG_NOTEOL));
236 }
237 compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
238 # endif
239 #endif
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270 regoff_t
271 re_match (struct re_pattern_buffer *bufp, const char *string, Idx length,
272 Idx start, struct re_registers *regs)
273 {
274 return re_search_stub (bufp, string, length, start, 0, length, regs, true);
275 }
276 #ifdef _LIBC
277 weak_alias (__re_match, re_match)
278 #endif
279
280 regoff_t
281 re_search (struct re_pattern_buffer *bufp, const char *string, Idx length,
282 Idx start, regoff_t range, struct re_registers *regs)
283 {
284 return re_search_stub (bufp, string, length, start, range, length, regs,
285 false);
286 }
287 #ifdef _LIBC
288 weak_alias (__re_search, re_search)
289 #endif
290
291 regoff_t
292 re_match_2 (struct re_pattern_buffer *bufp, const char *string1, Idx length1,
293 const char *string2, Idx length2, Idx start,
294 struct re_registers *regs, Idx stop)
295 {
296 return re_search_2_stub (bufp, string1, length1, string2, length2,
297 start, 0, regs, stop, true);
298 }
299 #ifdef _LIBC
300 weak_alias (__re_match_2, re_match_2)
301 #endif
302
303 regoff_t
304 re_search_2 (struct re_pattern_buffer *bufp, const char *string1, Idx length1,
305 const char *string2, Idx length2, Idx start, regoff_t range,
306 struct re_registers *regs, Idx stop)
307 {
308 return re_search_2_stub (bufp, string1, length1, string2, length2,
309 start, range, regs, stop, false);
310 }
311 #ifdef _LIBC
312 weak_alias (__re_search_2, re_search_2)
313 #endif
314
315 static regoff_t
316 re_search_2_stub (struct re_pattern_buffer *bufp, const char *string1,
317 Idx length1, const char *string2, Idx length2, Idx start,
318 regoff_t range, struct re_registers *regs,
319 Idx stop, bool ret_len)
320 {
321 const char *str;
322 regoff_t rval;
323 Idx len;
324 char *s = NULL;
325
326 if (__glibc_unlikely ((length1 < 0 || length2 < 0 || stop < 0
327 || INT_ADD_WRAPV (length1, length2, &len))))
328 return -2;
329
330
331 if (length2 > 0)
332 if (length1 > 0)
333 {
334 s = re_malloc (char, len);
335
336 if (__glibc_unlikely (s == NULL))
337 return -2;
338 #ifdef _LIBC
339 memcpy (__mempcpy (s, string1, length1), string2, length2);
340 #else
341 memcpy (s, string1, length1);
342 memcpy (s + length1, string2, length2);
343 #endif
344 str = s;
345 }
346 else
347 str = string2;
348 else
349 str = string1;
350
351 rval = re_search_stub (bufp, str, len, start, range, stop, regs,
352 ret_len);
353 re_free (s);
354 return rval;
355 }
356
357
358
359
360
361
362 static regoff_t
363 re_search_stub (struct re_pattern_buffer *bufp, const char *string, Idx length,
364 Idx start, regoff_t range, Idx stop, struct re_registers *regs,
365 bool ret_len)
366 {
367 reg_errcode_t result;
368 regmatch_t *pmatch;
369 Idx nregs;
370 regoff_t rval;
371 int eflags = 0;
372 re_dfa_t *dfa = bufp->buffer;
373 Idx last_start = start + range;
374
375
376 if (__glibc_unlikely (start < 0 || start > length))
377 return -1;
378 if (__glibc_unlikely (length < last_start
379 || (0 <= range && last_start < start)))
380 last_start = length;
381 else if (__glibc_unlikely (last_start < 0
382 || (range < 0 && start <= last_start)))
383 last_start = 0;
384
385 lock_lock (dfa->lock);
386
387 eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
388 eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
389
390
391 if (start < last_start && bufp->fastmap != NULL && !bufp->fastmap_accurate)
392 re_compile_fastmap (bufp);
393
394 if (__glibc_unlikely (bufp->no_sub))
395 regs = NULL;
396
397
398 if (regs == NULL)
399 nregs = 1;
400 else if (__glibc_unlikely (bufp->regs_allocated == REGS_FIXED
401 && regs->num_regs <= bufp->re_nsub))
402 {
403 nregs = regs->num_regs;
404 if (__glibc_unlikely (nregs < 1))
405 {
406
407 regs = NULL;
408 nregs = 1;
409 }
410 }
411 else
412 nregs = bufp->re_nsub + 1;
413 pmatch = re_malloc (regmatch_t, nregs);
414 if (__glibc_unlikely (pmatch == NULL))
415 {
416 rval = -2;
417 goto out;
418 }
419
420 result = re_search_internal (bufp, string, length, start, last_start, stop,
421 nregs, pmatch, eflags);
422
423 rval = 0;
424
425
426 if (result != REG_NOERROR)
427 rval = result == REG_NOMATCH ? -1 : -2;
428 else if (regs != NULL)
429 {
430
431 bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
432 bufp->regs_allocated);
433 if (__glibc_unlikely (bufp->regs_allocated == REGS_UNALLOCATED))
434 rval = -2;
435 }
436
437 if (__glibc_likely (rval == 0))
438 {
439 if (ret_len)
440 {
441 DEBUG_ASSERT (pmatch[0].rm_so == start);
442 rval = pmatch[0].rm_eo - start;
443 }
444 else
445 rval = pmatch[0].rm_so;
446 }
447 re_free (pmatch);
448 out:
449 lock_unlock (dfa->lock);
450 return rval;
451 }
452
453 static unsigned
454 re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, Idx nregs,
455 int regs_allocated)
456 {
457 int rval = REGS_REALLOCATE;
458 Idx i;
459 Idx need_regs = nregs + 1;
460
461
462
463
464 if (regs_allocated == REGS_UNALLOCATED)
465 {
466 regs->start = re_malloc (regoff_t, need_regs);
467 if (__glibc_unlikely (regs->start == NULL))
468 return REGS_UNALLOCATED;
469 regs->end = re_malloc (regoff_t, need_regs);
470 if (__glibc_unlikely (regs->end == NULL))
471 {
472 re_free (regs->start);
473 return REGS_UNALLOCATED;
474 }
475 regs->num_regs = need_regs;
476 }
477 else if (regs_allocated == REGS_REALLOCATE)
478 {
479
480
481 if (__glibc_unlikely (need_regs > regs->num_regs))
482 {
483 regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);
484 regoff_t *new_end;
485 if (__glibc_unlikely (new_start == NULL))
486 return REGS_UNALLOCATED;
487 new_end = re_realloc (regs->end, regoff_t, need_regs);
488 if (__glibc_unlikely (new_end == NULL))
489 {
490 re_free (new_start);
491 return REGS_UNALLOCATED;
492 }
493 regs->start = new_start;
494 regs->end = new_end;
495 regs->num_regs = need_regs;
496 }
497 }
498 else
499 {
500 DEBUG_ASSERT (regs_allocated == REGS_FIXED);
501
502 DEBUG_ASSERT (nregs <= regs->num_regs);
503 rval = REGS_FIXED;
504 }
505
506
507 for (i = 0; i < nregs; ++i)
508 {
509 regs->start[i] = pmatch[i].rm_so;
510 regs->end[i] = pmatch[i].rm_eo;
511 }
512 for ( ; i < regs->num_regs; ++i)
513 regs->start[i] = regs->end[i] = -1;
514
515 return rval;
516 }
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531 void
532 re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs,
533 __re_size_t num_regs, regoff_t *starts, regoff_t *ends)
534 {
535 if (num_regs)
536 {
537 bufp->regs_allocated = REGS_REALLOCATE;
538 regs->num_regs = num_regs;
539 regs->start = starts;
540 regs->end = ends;
541 }
542 else
543 {
544 bufp->regs_allocated = REGS_UNALLOCATED;
545 regs->num_regs = 0;
546 regs->start = regs->end = NULL;
547 }
548 }
549 #ifdef _LIBC
550 weak_alias (__re_set_registers, re_set_registers)
551 #endif
552
553
554
555
556 #if defined _REGEX_RE_COMP || defined _LIBC
557 int
558 # ifdef _LIBC
559 weak_function
560 # endif
561 re_exec (const char *s)
562 {
563 return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
564 }
565 #endif
566
567
568
569
570
571
572
573
574
575
576
577
578 static reg_errcode_t
579 __attribute_warn_unused_result__
580 re_search_internal (const regex_t *preg, const char *string, Idx length,
581 Idx start, Idx last_start, Idx stop, size_t nmatch,
582 regmatch_t pmatch[], int eflags)
583 {
584 reg_errcode_t err;
585 const re_dfa_t *dfa = preg->buffer;
586 Idx left_lim, right_lim;
587 int incr;
588 bool fl_longest_match;
589 int match_kind;
590 Idx match_first;
591 Idx match_last = -1;
592 Idx extra_nmatch;
593 bool sb;
594 int ch;
595 re_match_context_t mctx = { .dfa = dfa };
596 char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate
597 && start != last_start && !preg->can_be_null)
598 ? preg->fastmap : NULL);
599 RE_TRANSLATE_TYPE t = preg->translate;
600
601 extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;
602 nmatch -= extra_nmatch;
603
604
605 if (__glibc_unlikely (preg->used == 0 || dfa->init_state == NULL
606 || dfa->init_state_word == NULL
607 || dfa->init_state_nl == NULL
608 || dfa->init_state_begbuf == NULL))
609 return REG_NOMATCH;
610
611
612 DEBUG_ASSERT (0 <= last_start && last_start <= length);
613
614
615
616
617 if (dfa->init_state->nodes.nelem == 0
618 && dfa->init_state_word->nodes.nelem == 0
619 && (dfa->init_state_nl->nodes.nelem == 0
620 || !preg->newline_anchor))
621 {
622 if (start != 0 && last_start != 0)
623 return REG_NOMATCH;
624 start = last_start = 0;
625 }
626
627
628 fl_longest_match = (nmatch != 0 || dfa->nbackref);
629
630 err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1,
631 preg->translate, (preg->syntax & RE_ICASE) != 0,
632 dfa);
633 if (__glibc_unlikely (err != REG_NOERROR))
634 goto free_return;
635 mctx.input.stop = stop;
636 mctx.input.raw_stop = stop;
637 mctx.input.newline_anchor = preg->newline_anchor;
638
639 err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
640 if (__glibc_unlikely (err != REG_NOERROR))
641 goto free_return;
642
643
644
645
646
647 if (nmatch > 1 || dfa->has_mb_node)
648 {
649
650 if (__glibc_unlikely ((MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *))
651 <= mctx.input.bufs_len)))
652 {
653 err = REG_ESPACE;
654 goto free_return;
655 }
656
657 mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1);
658 if (__glibc_unlikely (mctx.state_log == NULL))
659 {
660 err = REG_ESPACE;
661 goto free_return;
662 }
663 }
664
665 match_first = start;
666 mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
667 : CONTEXT_NEWLINE | CONTEXT_BEGBUF;
668
669
670 incr = (last_start < start) ? -1 : 1;
671 left_lim = (last_start < start) ? last_start : start;
672 right_lim = (last_start < start) ? start : last_start;
673 sb = dfa->mb_cur_max == 1;
674 match_kind =
675 (fastmap
676 ? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0)
677 | (start <= last_start ? 2 : 0)
678 | (t != NULL ? 1 : 0))
679 : 8);
680
681 for (;; match_first += incr)
682 {
683 err = REG_NOMATCH;
684 if (match_first < left_lim || right_lim < match_first)
685 goto free_return;
686
687
688
689
690
691
692 switch (match_kind)
693 {
694 case 8:
695
696 break;
697
698 case 7:
699
700 while (__glibc_likely (match_first < right_lim)
701 && !fastmap[t[(unsigned char) string[match_first]]])
702 ++match_first;
703 goto forward_match_found_start_or_reached_end;
704
705 case 6:
706
707 while (__glibc_likely (match_first < right_lim)
708 && !fastmap[(unsigned char) string[match_first]])
709 ++match_first;
710
711 forward_match_found_start_or_reached_end:
712 if (__glibc_unlikely (match_first == right_lim))
713 {
714 ch = match_first >= length
715 ? 0 : (unsigned char) string[match_first];
716 if (!fastmap[t ? t[ch] : ch])
717 goto free_return;
718 }
719 break;
720
721 case 4:
722 case 5:
723
724 while (match_first >= left_lim)
725 {
726 ch = match_first >= length
727 ? 0 : (unsigned char) string[match_first];
728 if (fastmap[t ? t[ch] : ch])
729 break;
730 --match_first;
731 }
732 if (match_first < left_lim)
733 goto free_return;
734 break;
735
736 default:
737
738
739
740 for (;;)
741 {
742
743
744 __re_size_t offset = match_first - mctx.input.raw_mbs_idx;
745 if (__glibc_unlikely (offset
746 >= (__re_size_t) mctx.input.valid_raw_len))
747 {
748 err = re_string_reconstruct (&mctx.input, match_first,
749 eflags);
750 if (__glibc_unlikely (err != REG_NOERROR))
751 goto free_return;
752
753 offset = match_first - mctx.input.raw_mbs_idx;
754 }
755
756 ch = (offset < mctx.input.valid_len
757 ? re_string_byte_at (&mctx.input, offset) : 0);
758 if (fastmap[ch])
759 break;
760 match_first += incr;
761 if (match_first < left_lim || match_first > right_lim)
762 {
763 err = REG_NOMATCH;
764 goto free_return;
765 }
766 }
767 break;
768 }
769
770
771
772 err = re_string_reconstruct (&mctx.input, match_first, eflags);
773 if (__glibc_unlikely (err != REG_NOERROR))
774 goto free_return;
775
776
777
778 if (!sb && !re_string_first_byte (&mctx.input, 0))
779 continue;
780
781
782
783 mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
784 match_last = check_matching (&mctx, fl_longest_match,
785 start <= last_start ? &match_first : NULL);
786 if (match_last != -1)
787 {
788 if (__glibc_unlikely (match_last == -2))
789 {
790 err = REG_ESPACE;
791 goto free_return;
792 }
793 else
794 {
795 mctx.match_last = match_last;
796 if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
797 {
798 re_dfastate_t *pstate = mctx.state_log[match_last];
799 mctx.last_node = check_halt_state_context (&mctx, pstate,
800 match_last);
801 }
802 if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
803 || dfa->nbackref)
804 {
805 err = prune_impossible_nodes (&mctx);
806 if (err == REG_NOERROR)
807 break;
808 if (__glibc_unlikely (err != REG_NOMATCH))
809 goto free_return;
810 match_last = -1;
811 }
812 else
813 break;
814 }
815 }
816
817 match_ctx_clean (&mctx);
818 }
819
820 DEBUG_ASSERT (match_last != -1);
821 DEBUG_ASSERT (err == REG_NOERROR);
822
823
824 if (nmatch > 0)
825 {
826 Idx reg_idx;
827
828
829 for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)
830 pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
831
832
833 pmatch[0].rm_so = 0;
834 pmatch[0].rm_eo = mctx.match_last;
835
836
837
838
839 if (!preg->no_sub && nmatch > 1)
840 {
841 err = set_regs (preg, &mctx, nmatch, pmatch,
842 dfa->has_plural_match && dfa->nbackref > 0);
843 if (__glibc_unlikely (err != REG_NOERROR))
844 goto free_return;
845 }
846
847
848
849
850 for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
851 if (pmatch[reg_idx].rm_so != -1)
852 {
853 if (__glibc_unlikely (mctx.input.offsets_needed != 0))
854 {
855 pmatch[reg_idx].rm_so =
856 (pmatch[reg_idx].rm_so == mctx.input.valid_len
857 ? mctx.input.valid_raw_len
858 : mctx.input.offsets[pmatch[reg_idx].rm_so]);
859 pmatch[reg_idx].rm_eo =
860 (pmatch[reg_idx].rm_eo == mctx.input.valid_len
861 ? mctx.input.valid_raw_len
862 : mctx.input.offsets[pmatch[reg_idx].rm_eo]);
863 }
864 pmatch[reg_idx].rm_so += match_first;
865 pmatch[reg_idx].rm_eo += match_first;
866 }
867 for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)
868 {
869 pmatch[nmatch + reg_idx].rm_so = -1;
870 pmatch[nmatch + reg_idx].rm_eo = -1;
871 }
872
873 if (dfa->subexp_map)
874 for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
875 if (dfa->subexp_map[reg_idx] != reg_idx)
876 {
877 pmatch[reg_idx + 1].rm_so
878 = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
879 pmatch[reg_idx + 1].rm_eo
880 = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
881 }
882 }
883
884 free_return:
885 re_free (mctx.state_log);
886 if (dfa->nbackref)
887 match_ctx_free (&mctx);
888 re_string_destruct (&mctx.input);
889 return err;
890 }
891
892 static reg_errcode_t
893 __attribute_warn_unused_result__
894 prune_impossible_nodes (re_match_context_t *mctx)
895 {
896 const re_dfa_t *const dfa = mctx->dfa;
897 Idx halt_node, match_last;
898 reg_errcode_t ret;
899 re_dfastate_t **sifted_states;
900 re_dfastate_t **lim_states = NULL;
901 re_sift_context_t sctx;
902 DEBUG_ASSERT (mctx->state_log != NULL);
903 match_last = mctx->match_last;
904 halt_node = mctx->last_node;
905
906
907 if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *))
908 <= match_last))
909 return REG_ESPACE;
910
911 sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
912 if (__glibc_unlikely (sifted_states == NULL))
913 {
914 ret = REG_ESPACE;
915 goto free_return;
916 }
917 if (dfa->nbackref)
918 {
919 lim_states = re_malloc (re_dfastate_t *, match_last + 1);
920 if (__glibc_unlikely (lim_states == NULL))
921 {
922 ret = REG_ESPACE;
923 goto free_return;
924 }
925 while (1)
926 {
927 memset (lim_states, '\0',
928 sizeof (re_dfastate_t *) * (match_last + 1));
929 sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
930 match_last);
931 ret = sift_states_backward (mctx, &sctx);
932 re_node_set_free (&sctx.limits);
933 if (__glibc_unlikely (ret != REG_NOERROR))
934 goto free_return;
935 if (sifted_states[0] != NULL || lim_states[0] != NULL)
936 break;
937 do
938 {
939 --match_last;
940 if (match_last < 0)
941 {
942 ret = REG_NOMATCH;
943 goto free_return;
944 }
945 } while (mctx->state_log[match_last] == NULL
946 || !mctx->state_log[match_last]->halt);
947 halt_node = check_halt_state_context (mctx,
948 mctx->state_log[match_last],
949 match_last);
950 }
951 ret = merge_state_array (dfa, sifted_states, lim_states,
952 match_last + 1);
953 re_free (lim_states);
954 lim_states = NULL;
955 if (__glibc_unlikely (ret != REG_NOERROR))
956 goto free_return;
957 }
958 else
959 {
960 sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);
961 ret = sift_states_backward (mctx, &sctx);
962 re_node_set_free (&sctx.limits);
963 if (__glibc_unlikely (ret != REG_NOERROR))
964 goto free_return;
965 if (sifted_states[0] == NULL)
966 {
967 ret = REG_NOMATCH;
968 goto free_return;
969 }
970 }
971 re_free (mctx->state_log);
972 mctx->state_log = sifted_states;
973 sifted_states = NULL;
974 mctx->last_node = halt_node;
975 mctx->match_last = match_last;
976 ret = REG_NOERROR;
977 free_return:
978 re_free (sifted_states);
979 re_free (lim_states);
980 return ret;
981 }
982
983
984
985
986
987 static __always_inline re_dfastate_t *
988 acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
989 Idx idx)
990 {
991 const re_dfa_t *const dfa = mctx->dfa;
992 if (dfa->init_state->has_constraint)
993 {
994 unsigned int context;
995 context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags);
996 if (IS_WORD_CONTEXT (context))
997 return dfa->init_state_word;
998 else if (IS_ORDINARY_CONTEXT (context))
999 return dfa->init_state;
1000 else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
1001 return dfa->init_state_begbuf;
1002 else if (IS_NEWLINE_CONTEXT (context))
1003 return dfa->init_state_nl;
1004 else if (IS_BEGBUF_CONTEXT (context))
1005 {
1006
1007 return re_acquire_state_context (err, dfa,
1008 dfa->init_state->entrance_nodes,
1009 context);
1010 }
1011 else
1012
1013 return dfa->init_state;
1014 }
1015 else
1016 return dfa->init_state;
1017 }
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028 static Idx
1029 __attribute_warn_unused_result__
1030 check_matching (re_match_context_t *mctx, bool fl_longest_match,
1031 Idx *p_match_first)
1032 {
1033 const re_dfa_t *const dfa = mctx->dfa;
1034 reg_errcode_t err;
1035 Idx match = 0;
1036 Idx match_last = -1;
1037 Idx cur_str_idx = re_string_cur_idx (&mctx->input);
1038 re_dfastate_t *cur_state;
1039 bool at_init_state = p_match_first != NULL;
1040 Idx next_start_idx = cur_str_idx;
1041
1042 err = REG_NOERROR;
1043 cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
1044
1045 if (__glibc_unlikely (cur_state == NULL))
1046 {
1047 DEBUG_ASSERT (err == REG_ESPACE);
1048 return -2;
1049 }
1050
1051 if (mctx->state_log != NULL)
1052 {
1053 mctx->state_log[cur_str_idx] = cur_state;
1054
1055
1056
1057 if (__glibc_unlikely (dfa->nbackref))
1058 {
1059 at_init_state = false;
1060 err = check_subexp_matching_top (mctx, &cur_state->nodes, 0);
1061 if (__glibc_unlikely (err != REG_NOERROR))
1062 return err;
1063
1064 if (cur_state->has_backref)
1065 {
1066 err = transit_state_bkref (mctx, &cur_state->nodes);
1067 if (__glibc_unlikely (err != REG_NOERROR))
1068 return err;
1069 }
1070 }
1071 }
1072
1073
1074 if (__glibc_unlikely (cur_state->halt))
1075 {
1076 if (!cur_state->has_constraint
1077 || check_halt_state_context (mctx, cur_state, cur_str_idx))
1078 {
1079 if (!fl_longest_match)
1080 return cur_str_idx;
1081 else
1082 {
1083 match_last = cur_str_idx;
1084 match = 1;
1085 }
1086 }
1087 }
1088
1089 while (!re_string_eoi (&mctx->input))
1090 {
1091 re_dfastate_t *old_state = cur_state;
1092 Idx next_char_idx = re_string_cur_idx (&mctx->input) + 1;
1093
1094 if ((__glibc_unlikely (next_char_idx >= mctx->input.bufs_len)
1095 && mctx->input.bufs_len < mctx->input.len)
1096 || (__glibc_unlikely (next_char_idx >= mctx->input.valid_len)
1097 && mctx->input.valid_len < mctx->input.len))
1098 {
1099 err = extend_buffers (mctx, next_char_idx + 1);
1100 if (__glibc_unlikely (err != REG_NOERROR))
1101 {
1102 DEBUG_ASSERT (err == REG_ESPACE);
1103 return -2;
1104 }
1105 }
1106
1107 cur_state = transit_state (&err, mctx, cur_state);
1108 if (mctx->state_log != NULL)
1109 cur_state = merge_state_with_log (&err, mctx, cur_state);
1110
1111 if (cur_state == NULL)
1112 {
1113
1114
1115
1116 if (__glibc_unlikely (err != REG_NOERROR))
1117 return -2;
1118
1119 if (mctx->state_log == NULL
1120 || (match && !fl_longest_match)
1121 || (cur_state = find_recover_state (&err, mctx)) == NULL)
1122 break;
1123 }
1124
1125 if (__glibc_unlikely (at_init_state))
1126 {
1127 if (old_state == cur_state)
1128 next_start_idx = next_char_idx;
1129 else
1130 at_init_state = false;
1131 }
1132
1133 if (cur_state->halt)
1134 {
1135
1136
1137 if (!cur_state->has_constraint
1138 || check_halt_state_context (mctx, cur_state,
1139 re_string_cur_idx (&mctx->input)))
1140 {
1141
1142 match_last = re_string_cur_idx (&mctx->input);
1143 match = 1;
1144
1145
1146 p_match_first = NULL;
1147 if (!fl_longest_match)
1148 break;
1149 }
1150 }
1151 }
1152
1153 if (p_match_first)
1154 *p_match_first += next_start_idx;
1155
1156 return match_last;
1157 }
1158
1159
1160
1161 static bool
1162 check_halt_node_context (const re_dfa_t *dfa, Idx node, unsigned int context)
1163 {
1164 re_token_type_t type = dfa->nodes[node].type;
1165 unsigned int constraint = dfa->nodes[node].constraint;
1166 if (type != END_OF_RE)
1167 return false;
1168 if (!constraint)
1169 return true;
1170 if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
1171 return false;
1172 return true;
1173 }
1174
1175
1176
1177
1178
1179 static Idx
1180 check_halt_state_context (const re_match_context_t *mctx,
1181 const re_dfastate_t *state, Idx idx)
1182 {
1183 Idx i;
1184 unsigned int context;
1185 DEBUG_ASSERT (state->halt);
1186 context = re_string_context_at (&mctx->input, idx, mctx->eflags);
1187 for (i = 0; i < state->nodes.nelem; ++i)
1188 if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
1189 return state->nodes.elems[i];
1190 return 0;
1191 }
1192
1193
1194
1195
1196
1197
1198 static Idx
1199 proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs,
1200 regmatch_t *prevregs,
1201 Idx *pidx, Idx node, re_node_set *eps_via_nodes,
1202 struct re_fail_stack_t *fs)
1203 {
1204 const re_dfa_t *const dfa = mctx->dfa;
1205 if (IS_EPSILON_NODE (dfa->nodes[node].type))
1206 {
1207 re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
1208 re_node_set *edests = &dfa->edests[node];
1209
1210 if (! re_node_set_contains (eps_via_nodes, node))
1211 {
1212 bool ok = re_node_set_insert (eps_via_nodes, node);
1213 if (__glibc_unlikely (! ok))
1214 return -2;
1215 }
1216
1217
1218 Idx dest_node = -1;
1219 for (Idx i = 0; i < edests->nelem; i++)
1220 {
1221 Idx candidate = edests->elems[i];
1222 if (!re_node_set_contains (cur_nodes, candidate))
1223 continue;
1224 if (dest_node == -1)
1225 dest_node = candidate;
1226
1227 else
1228 {
1229
1230
1231 if (re_node_set_contains (eps_via_nodes, dest_node))
1232 return candidate;
1233
1234
1235 else if (fs != NULL
1236 && push_fail_stack (fs, *pidx, candidate, nregs, regs,
1237 prevregs, eps_via_nodes))
1238 return -2;
1239
1240
1241 break;
1242 }
1243 }
1244 return dest_node;
1245 }
1246 else
1247 {
1248 Idx naccepted = 0;
1249 re_token_type_t type = dfa->nodes[node].type;
1250
1251 if (dfa->nodes[node].accept_mb)
1252 naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
1253 else if (type == OP_BACK_REF)
1254 {
1255 Idx subexp_idx = dfa->nodes[node].opr.idx + 1;
1256 if (subexp_idx < nregs)
1257 naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
1258 if (fs != NULL)
1259 {
1260 if (subexp_idx >= nregs
1261 || regs[subexp_idx].rm_so == -1
1262 || regs[subexp_idx].rm_eo == -1)
1263 return -1;
1264 else if (naccepted)
1265 {
1266 char *buf = (char *) re_string_get_buffer (&mctx->input);
1267 if (mctx->input.valid_len - *pidx < naccepted
1268 || (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
1269 naccepted)
1270 != 0))
1271 return -1;
1272 }
1273 }
1274
1275 if (naccepted == 0)
1276 {
1277 Idx dest_node;
1278 bool ok = re_node_set_insert (eps_via_nodes, node);
1279 if (__glibc_unlikely (! ok))
1280 return -2;
1281 dest_node = dfa->edests[node].elems[0];
1282 if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1283 dest_node))
1284 return dest_node;
1285 }
1286 }
1287
1288 if (naccepted != 0
1289 || check_node_accept (mctx, dfa->nodes + node, *pidx))
1290 {
1291 Idx dest_node = dfa->nexts[node];
1292 *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
1293 if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
1294 || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1295 dest_node)))
1296 return -1;
1297 re_node_set_empty (eps_via_nodes);
1298 return dest_node;
1299 }
1300 }
1301 return -1;
1302 }
1303
1304 static reg_errcode_t
1305 __attribute_warn_unused_result__
1306 push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node,
1307 Idx nregs, regmatch_t *regs, regmatch_t *prevregs,
1308 re_node_set *eps_via_nodes)
1309 {
1310 reg_errcode_t err;
1311 Idx num = fs->num++;
1312 if (fs->num == fs->alloc)
1313 {
1314 struct re_fail_stack_ent_t *new_array;
1315 new_array = re_realloc (fs->stack, struct re_fail_stack_ent_t,
1316 fs->alloc * 2);
1317 if (new_array == NULL)
1318 return REG_ESPACE;
1319 fs->alloc *= 2;
1320 fs->stack = new_array;
1321 }
1322 fs->stack[num].idx = str_idx;
1323 fs->stack[num].node = dest_node;
1324 fs->stack[num].regs = re_malloc (regmatch_t, 2 * nregs);
1325 if (fs->stack[num].regs == NULL)
1326 return REG_ESPACE;
1327 memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
1328 memcpy (fs->stack[num].regs + nregs, prevregs, sizeof (regmatch_t) * nregs);
1329 err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
1330 return err;
1331 }
1332
1333 static Idx
1334 pop_fail_stack (struct re_fail_stack_t *fs, Idx *pidx, Idx nregs,
1335 regmatch_t *regs, regmatch_t *prevregs,
1336 re_node_set *eps_via_nodes)
1337 {
1338 if (fs == NULL || fs->num == 0)
1339 return -1;
1340 Idx num = --fs->num;
1341 *pidx = fs->stack[num].idx;
1342 memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
1343 memcpy (prevregs, fs->stack[num].regs + nregs, sizeof (regmatch_t) * nregs);
1344 re_node_set_free (eps_via_nodes);
1345 re_free (fs->stack[num].regs);
1346 *eps_via_nodes = fs->stack[num].eps_via_nodes;
1347 DEBUG_ASSERT (0 <= fs->stack[num].node);
1348 return fs->stack[num].node;
1349 }
1350
1351
1352 #define DYNARRAY_STRUCT regmatch_list
1353 #define DYNARRAY_ELEMENT regmatch_t
1354 #define DYNARRAY_PREFIX regmatch_list_
1355 #include <malloc/dynarray-skeleton.c>
1356
1357
1358
1359
1360
1361
1362 static reg_errcode_t
1363 __attribute_warn_unused_result__
1364 set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
1365 regmatch_t *pmatch, bool fl_backtrack)
1366 {
1367 const re_dfa_t *dfa = preg->buffer;
1368 Idx idx, cur_node;
1369 re_node_set eps_via_nodes;
1370 struct re_fail_stack_t *fs;
1371 struct re_fail_stack_t fs_body = { 0, 2, NULL };
1372 struct regmatch_list prev_match;
1373 regmatch_list_init (&prev_match);
1374
1375 DEBUG_ASSERT (nmatch > 1);
1376 DEBUG_ASSERT (mctx->state_log != NULL);
1377 if (fl_backtrack)
1378 {
1379 fs = &fs_body;
1380 fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
1381 if (fs->stack == NULL)
1382 return REG_ESPACE;
1383 }
1384 else
1385 fs = NULL;
1386
1387 cur_node = dfa->init_node;
1388 re_node_set_init_empty (&eps_via_nodes);
1389
1390 if (!regmatch_list_resize (&prev_match, nmatch))
1391 {
1392 regmatch_list_free (&prev_match);
1393 free_fail_stack_return (fs);
1394 return REG_ESPACE;
1395 }
1396 regmatch_t *prev_idx_match = regmatch_list_begin (&prev_match);
1397 memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1398
1399 for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
1400 {
1401 update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
1402
1403 if ((idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
1404 || (fs && re_node_set_contains (&eps_via_nodes, cur_node)))
1405 {
1406 Idx reg_idx;
1407 cur_node = -1;
1408 if (fs)
1409 {
1410 for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
1411 if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
1412 {
1413 cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1414 prev_idx_match, &eps_via_nodes);
1415 break;
1416 }
1417 }
1418 if (cur_node < 0)
1419 {
1420 re_node_set_free (&eps_via_nodes);
1421 regmatch_list_free (&prev_match);
1422 return free_fail_stack_return (fs);
1423 }
1424 }
1425
1426
1427 cur_node = proceed_next_node (mctx, nmatch, pmatch, prev_idx_match,
1428 &idx, cur_node,
1429 &eps_via_nodes, fs);
1430
1431 if (__glibc_unlikely (cur_node < 0))
1432 {
1433 if (__glibc_unlikely (cur_node == -2))
1434 {
1435 re_node_set_free (&eps_via_nodes);
1436 regmatch_list_free (&prev_match);
1437 free_fail_stack_return (fs);
1438 return REG_ESPACE;
1439 }
1440 cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1441 prev_idx_match, &eps_via_nodes);
1442 if (cur_node < 0)
1443 {
1444 re_node_set_free (&eps_via_nodes);
1445 regmatch_list_free (&prev_match);
1446 free_fail_stack_return (fs);
1447 return REG_NOMATCH;
1448 }
1449 }
1450 }
1451 re_node_set_free (&eps_via_nodes);
1452 regmatch_list_free (&prev_match);
1453 return free_fail_stack_return (fs);
1454 }
1455
1456 static reg_errcode_t
1457 free_fail_stack_return (struct re_fail_stack_t *fs)
1458 {
1459 if (fs)
1460 {
1461 Idx fs_idx;
1462 for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
1463 {
1464 re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
1465 re_free (fs->stack[fs_idx].regs);
1466 }
1467 re_free (fs->stack);
1468 }
1469 return REG_NOERROR;
1470 }
1471
1472 static void
1473 update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
1474 regmatch_t *prev_idx_match, Idx cur_node, Idx cur_idx, Idx nmatch)
1475 {
1476 int type = dfa->nodes[cur_node].type;
1477 if (type == OP_OPEN_SUBEXP)
1478 {
1479 Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
1480
1481
1482 if (reg_num < nmatch)
1483 {
1484 pmatch[reg_num].rm_so = cur_idx;
1485 pmatch[reg_num].rm_eo = -1;
1486 }
1487 }
1488 else if (type == OP_CLOSE_SUBEXP)
1489 {
1490
1491 Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
1492 if (reg_num < nmatch)
1493 {
1494 if (pmatch[reg_num].rm_so < cur_idx)
1495 {
1496 pmatch[reg_num].rm_eo = cur_idx;
1497
1498
1499 memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1500 }
1501 else
1502 {
1503 if (dfa->nodes[cur_node].opt_subexp
1504 && prev_idx_match[reg_num].rm_so != -1)
1505
1506
1507
1508
1509
1510 memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
1511 else
1512
1513
1514 pmatch[reg_num].rm_eo = cur_idx;
1515 }
1516 }
1517 }
1518 }
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540 #define STATE_NODE_CONTAINS(state,node) \
1541 ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
1542
1543 static reg_errcode_t
1544 sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx)
1545 {
1546 reg_errcode_t err;
1547 int null_cnt = 0;
1548 Idx str_idx = sctx->last_str_idx;
1549 re_node_set cur_dest;
1550
1551 DEBUG_ASSERT (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
1552
1553
1554
1555 err = re_node_set_init_1 (&cur_dest, sctx->last_node);
1556 if (__glibc_unlikely (err != REG_NOERROR))
1557 return err;
1558 err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1559 if (__glibc_unlikely (err != REG_NOERROR))
1560 goto free_return;
1561
1562
1563 while (str_idx > 0)
1564 {
1565
1566 null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
1567 if (null_cnt > mctx->max_mb_elem_len)
1568 {
1569 memset (sctx->sifted_states, '\0',
1570 sizeof (re_dfastate_t *) * str_idx);
1571 re_node_set_free (&cur_dest);
1572 return REG_NOERROR;
1573 }
1574 re_node_set_empty (&cur_dest);
1575 --str_idx;
1576
1577 if (mctx->state_log[str_idx])
1578 {
1579 err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
1580 if (__glibc_unlikely (err != REG_NOERROR))
1581 goto free_return;
1582 }
1583
1584
1585
1586
1587
1588 err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1589 if (__glibc_unlikely (err != REG_NOERROR))
1590 goto free_return;
1591 }
1592 err = REG_NOERROR;
1593 free_return:
1594 re_node_set_free (&cur_dest);
1595 return err;
1596 }
1597
1598 static reg_errcode_t
1599 __attribute_warn_unused_result__
1600 build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx,
1601 Idx str_idx, re_node_set *cur_dest)
1602 {
1603 const re_dfa_t *const dfa = mctx->dfa;
1604 const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
1605 Idx i;
1606
1607
1608
1609
1610
1611
1612
1613
1614 for (i = 0; i < cur_src->nelem; i++)
1615 {
1616 Idx prev_node = cur_src->elems[i];
1617 int naccepted = 0;
1618 bool ok;
1619 DEBUG_ASSERT (!IS_EPSILON_NODE (dfa->nodes[prev_node].type));
1620
1621
1622 if (dfa->nodes[prev_node].accept_mb)
1623 naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
1624 str_idx, sctx->last_str_idx);
1625
1626
1627
1628 if (!naccepted
1629 && check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
1630 && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
1631 dfa->nexts[prev_node]))
1632 naccepted = 1;
1633
1634 if (naccepted == 0)
1635 continue;
1636
1637 if (sctx->limits.nelem)
1638 {
1639 Idx to_idx = str_idx + naccepted;
1640 if (check_dst_limits (mctx, &sctx->limits,
1641 dfa->nexts[prev_node], to_idx,
1642 prev_node, str_idx))
1643 continue;
1644 }
1645 ok = re_node_set_insert (cur_dest, prev_node);
1646 if (__glibc_unlikely (! ok))
1647 return REG_ESPACE;
1648 }
1649
1650 return REG_NOERROR;
1651 }
1652
1653
1654
1655 static reg_errcode_t
1656 clean_state_log_if_needed (re_match_context_t *mctx, Idx next_state_log_idx)
1657 {
1658 Idx top = mctx->state_log_top;
1659
1660 if ((next_state_log_idx >= mctx->input.bufs_len
1661 && mctx->input.bufs_len < mctx->input.len)
1662 || (next_state_log_idx >= mctx->input.valid_len
1663 && mctx->input.valid_len < mctx->input.len))
1664 {
1665 reg_errcode_t err;
1666 err = extend_buffers (mctx, next_state_log_idx + 1);
1667 if (__glibc_unlikely (err != REG_NOERROR))
1668 return err;
1669 }
1670
1671 if (top < next_state_log_idx)
1672 {
1673 DEBUG_ASSERT (mctx->state_log != NULL);
1674 memset (mctx->state_log + top + 1, '\0',
1675 sizeof (re_dfastate_t *) * (next_state_log_idx - top));
1676 mctx->state_log_top = next_state_log_idx;
1677 }
1678 return REG_NOERROR;
1679 }
1680
1681 static reg_errcode_t
1682 merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst,
1683 re_dfastate_t **src, Idx num)
1684 {
1685 Idx st_idx;
1686 reg_errcode_t err;
1687 for (st_idx = 0; st_idx < num; ++st_idx)
1688 {
1689 if (dst[st_idx] == NULL)
1690 dst[st_idx] = src[st_idx];
1691 else if (src[st_idx] != NULL)
1692 {
1693 re_node_set merged_set;
1694 err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
1695 &src[st_idx]->nodes);
1696 if (__glibc_unlikely (err != REG_NOERROR))
1697 return err;
1698 dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
1699 re_node_set_free (&merged_set);
1700 if (__glibc_unlikely (err != REG_NOERROR))
1701 return err;
1702 }
1703 }
1704 return REG_NOERROR;
1705 }
1706
1707 static reg_errcode_t
1708 update_cur_sifted_state (const re_match_context_t *mctx,
1709 re_sift_context_t *sctx, Idx str_idx,
1710 re_node_set *dest_nodes)
1711 {
1712 const re_dfa_t *const dfa = mctx->dfa;
1713 reg_errcode_t err = REG_NOERROR;
1714 const re_node_set *candidates;
1715 candidates = ((mctx->state_log[str_idx] == NULL) ? NULL
1716 : &mctx->state_log[str_idx]->nodes);
1717
1718 if (dest_nodes->nelem == 0)
1719 sctx->sifted_states[str_idx] = NULL;
1720 else
1721 {
1722 if (candidates)
1723 {
1724
1725
1726 err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
1727 if (__glibc_unlikely (err != REG_NOERROR))
1728 return err;
1729
1730
1731 if (sctx->limits.nelem)
1732 {
1733 err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
1734 mctx->bkref_ents, str_idx);
1735 if (__glibc_unlikely (err != REG_NOERROR))
1736 return err;
1737 }
1738 }
1739
1740 sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
1741 if (__glibc_unlikely (err != REG_NOERROR))
1742 return err;
1743 }
1744
1745 if (candidates && mctx->state_log[str_idx]->has_backref)
1746 {
1747 err = sift_states_bkref (mctx, sctx, str_idx, candidates);
1748 if (__glibc_unlikely (err != REG_NOERROR))
1749 return err;
1750 }
1751 return REG_NOERROR;
1752 }
1753
1754 static reg_errcode_t
1755 __attribute_warn_unused_result__
1756 add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes,
1757 const re_node_set *candidates)
1758 {
1759 reg_errcode_t err = REG_NOERROR;
1760 Idx i;
1761
1762 re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);
1763 if (__glibc_unlikely (err != REG_NOERROR))
1764 return err;
1765
1766 if (!state->inveclosure.alloc)
1767 {
1768 err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
1769 if (__glibc_unlikely (err != REG_NOERROR))
1770 return REG_ESPACE;
1771 for (i = 0; i < dest_nodes->nelem; i++)
1772 {
1773 err = re_node_set_merge (&state->inveclosure,
1774 dfa->inveclosures + dest_nodes->elems[i]);
1775 if (__glibc_unlikely (err != REG_NOERROR))
1776 return REG_ESPACE;
1777 }
1778 }
1779 return re_node_set_add_intersect (dest_nodes, candidates,
1780 &state->inveclosure);
1781 }
1782
1783 static reg_errcode_t
1784 sub_epsilon_src_nodes (const re_dfa_t *dfa, Idx node, re_node_set *dest_nodes,
1785 const re_node_set *candidates)
1786 {
1787 Idx ecl_idx;
1788 reg_errcode_t err;
1789 re_node_set *inv_eclosure = dfa->inveclosures + node;
1790 re_node_set except_nodes;
1791 re_node_set_init_empty (&except_nodes);
1792 for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1793 {
1794 Idx cur_node = inv_eclosure->elems[ecl_idx];
1795 if (cur_node == node)
1796 continue;
1797 if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
1798 {
1799 Idx edst1 = dfa->edests[cur_node].elems[0];
1800 Idx edst2 = ((dfa->edests[cur_node].nelem > 1)
1801 ? dfa->edests[cur_node].elems[1] : -1);
1802 if ((!re_node_set_contains (inv_eclosure, edst1)
1803 && re_node_set_contains (dest_nodes, edst1))
1804 || (edst2 > 0
1805 && !re_node_set_contains (inv_eclosure, edst2)
1806 && re_node_set_contains (dest_nodes, edst2)))
1807 {
1808 err = re_node_set_add_intersect (&except_nodes, candidates,
1809 dfa->inveclosures + cur_node);
1810 if (__glibc_unlikely (err != REG_NOERROR))
1811 {
1812 re_node_set_free (&except_nodes);
1813 return err;
1814 }
1815 }
1816 }
1817 }
1818 for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1819 {
1820 Idx cur_node = inv_eclosure->elems[ecl_idx];
1821 if (!re_node_set_contains (&except_nodes, cur_node))
1822 {
1823 Idx idx = re_node_set_contains (dest_nodes, cur_node) - 1;
1824 re_node_set_remove_at (dest_nodes, idx);
1825 }
1826 }
1827 re_node_set_free (&except_nodes);
1828 return REG_NOERROR;
1829 }
1830
1831 static bool
1832 check_dst_limits (const re_match_context_t *mctx, const re_node_set *limits,
1833 Idx dst_node, Idx dst_idx, Idx src_node, Idx src_idx)
1834 {
1835 const re_dfa_t *const dfa = mctx->dfa;
1836 Idx lim_idx, src_pos, dst_pos;
1837
1838 Idx dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);
1839 Idx src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);
1840 for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
1841 {
1842 Idx subexp_idx;
1843 struct re_backref_cache_entry *ent;
1844 ent = mctx->bkref_ents + limits->elems[lim_idx];
1845 subexp_idx = dfa->nodes[ent->node].opr.idx;
1846
1847 dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1848 subexp_idx, dst_node, dst_idx,
1849 dst_bkref_idx);
1850 src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1851 subexp_idx, src_node, src_idx,
1852 src_bkref_idx);
1853
1854
1855
1856
1857
1858 if (src_pos == dst_pos)
1859 continue;
1860 else
1861 return true;
1862 }
1863 return false;
1864 }
1865
1866 static int
1867 check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
1868 Idx subexp_idx, Idx from_node, Idx bkref_idx)
1869 {
1870 const re_dfa_t *const dfa = mctx->dfa;
1871 const re_node_set *eclosures = dfa->eclosures + from_node;
1872 Idx node_idx;
1873
1874
1875
1876 for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
1877 {
1878 Idx node = eclosures->elems[node_idx];
1879 switch (dfa->nodes[node].type)
1880 {
1881 case OP_BACK_REF:
1882 if (bkref_idx != -1)
1883 {
1884 struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
1885 do
1886 {
1887 Idx dst;
1888 int cpos;
1889
1890 if (ent->node != node)
1891 continue;
1892
1893 if (subexp_idx < BITSET_WORD_BITS
1894 && !(ent->eps_reachable_subexps_map
1895 & ((bitset_word_t) 1 << subexp_idx)))
1896 continue;
1897
1898
1899
1900
1901
1902
1903
1904 dst = dfa->edests[node].elems[0];
1905 if (dst == from_node)
1906 {
1907 if (boundaries & 1)
1908 return -1;
1909 else
1910 return 0;
1911 }
1912
1913 cpos =
1914 check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
1915 dst, bkref_idx);
1916 if (cpos == -1 )
1917 return -1;
1918 if (cpos == 0 && (boundaries & 2))
1919 return 0;
1920
1921 if (subexp_idx < BITSET_WORD_BITS)
1922 ent->eps_reachable_subexps_map
1923 &= ~((bitset_word_t) 1 << subexp_idx);
1924 }
1925 while (ent++->more);
1926 }
1927 break;
1928
1929 case OP_OPEN_SUBEXP:
1930 if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx)
1931 return -1;
1932 break;
1933
1934 case OP_CLOSE_SUBEXP:
1935 if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx)
1936 return 0;
1937 break;
1938
1939 default:
1940 break;
1941 }
1942 }
1943
1944 return (boundaries & 2) ? 1 : 0;
1945 }
1946
1947 static int
1948 check_dst_limits_calc_pos (const re_match_context_t *mctx, Idx limit,
1949 Idx subexp_idx, Idx from_node, Idx str_idx,
1950 Idx bkref_idx)
1951 {
1952 struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
1953 int boundaries;
1954
1955
1956 if (str_idx < lim->subexp_from)
1957 return -1;
1958
1959 if (lim->subexp_to < str_idx)
1960 return 1;
1961
1962
1963 boundaries = (str_idx == lim->subexp_from);
1964 boundaries |= (str_idx == lim->subexp_to) << 1;
1965 if (boundaries == 0)
1966 return 0;
1967
1968
1969 return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
1970 from_node, bkref_idx);
1971 }
1972
1973
1974
1975
1976 static reg_errcode_t
1977 check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes,
1978 const re_node_set *candidates, re_node_set *limits,
1979 struct re_backref_cache_entry *bkref_ents, Idx str_idx)
1980 {
1981 reg_errcode_t err;
1982 Idx node_idx, lim_idx;
1983
1984 for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
1985 {
1986 Idx subexp_idx;
1987 struct re_backref_cache_entry *ent;
1988 ent = bkref_ents + limits->elems[lim_idx];
1989
1990 if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
1991 continue;
1992
1993 subexp_idx = dfa->nodes[ent->node].opr.idx;
1994 if (ent->subexp_to == str_idx)
1995 {
1996 Idx ops_node = -1;
1997 Idx cls_node = -1;
1998 for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
1999 {
2000 Idx node = dest_nodes->elems[node_idx];
2001 re_token_type_t type = dfa->nodes[node].type;
2002 if (type == OP_OPEN_SUBEXP
2003 && subexp_idx == dfa->nodes[node].opr.idx)
2004 ops_node = node;
2005 else if (type == OP_CLOSE_SUBEXP
2006 && subexp_idx == dfa->nodes[node].opr.idx)
2007 cls_node = node;
2008 }
2009
2010
2011
2012 if (ops_node >= 0)
2013 {
2014 err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
2015 candidates);
2016 if (__glibc_unlikely (err != REG_NOERROR))
2017 return err;
2018 }
2019
2020
2021 if (cls_node >= 0)
2022 for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2023 {
2024 Idx node = dest_nodes->elems[node_idx];
2025 if (!re_node_set_contains (dfa->inveclosures + node,
2026 cls_node)
2027 && !re_node_set_contains (dfa->eclosures + node,
2028 cls_node))
2029 {
2030
2031
2032 err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2033 candidates);
2034 if (__glibc_unlikely (err != REG_NOERROR))
2035 return err;
2036 --node_idx;
2037 }
2038 }
2039 }
2040 else
2041 {
2042 for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2043 {
2044 Idx node = dest_nodes->elems[node_idx];
2045 re_token_type_t type = dfa->nodes[node].type;
2046 if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
2047 {
2048 if (subexp_idx != dfa->nodes[node].opr.idx)
2049 continue;
2050
2051
2052 err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2053 candidates);
2054 if (__glibc_unlikely (err != REG_NOERROR))
2055 return err;
2056 }
2057 }
2058 }
2059 }
2060 return REG_NOERROR;
2061 }
2062
2063 static reg_errcode_t
2064 __attribute_warn_unused_result__
2065 sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx,
2066 Idx str_idx, const re_node_set *candidates)
2067 {
2068 const re_dfa_t *const dfa = mctx->dfa;
2069 reg_errcode_t err;
2070 Idx node_idx, node;
2071 re_sift_context_t local_sctx;
2072 Idx first_idx = search_cur_bkref_entry (mctx, str_idx);
2073
2074 if (first_idx == -1)
2075 return REG_NOERROR;
2076
2077 local_sctx.sifted_states = NULL;
2078
2079 for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
2080 {
2081 Idx enabled_idx;
2082 re_token_type_t type;
2083 struct re_backref_cache_entry *entry;
2084 node = candidates->elems[node_idx];
2085 type = dfa->nodes[node].type;
2086
2087 if (node == sctx->last_node && str_idx == sctx->last_str_idx)
2088 continue;
2089 if (type != OP_BACK_REF)
2090 continue;
2091
2092 entry = mctx->bkref_ents + first_idx;
2093 enabled_idx = first_idx;
2094 do
2095 {
2096 Idx subexp_len;
2097 Idx to_idx;
2098 Idx dst_node;
2099 bool ok;
2100 re_dfastate_t *cur_state;
2101
2102 if (entry->node != node)
2103 continue;
2104 subexp_len = entry->subexp_to - entry->subexp_from;
2105 to_idx = str_idx + subexp_len;
2106 dst_node = (subexp_len ? dfa->nexts[node]
2107 : dfa->edests[node].elems[0]);
2108
2109 if (to_idx > sctx->last_str_idx
2110 || sctx->sifted_states[to_idx] == NULL
2111 || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)
2112 || check_dst_limits (mctx, &sctx->limits, node,
2113 str_idx, dst_node, to_idx))
2114 continue;
2115
2116 if (local_sctx.sifted_states == NULL)
2117 {
2118 local_sctx = *sctx;
2119 err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
2120 if (__glibc_unlikely (err != REG_NOERROR))
2121 goto free_return;
2122 }
2123 local_sctx.last_node = node;
2124 local_sctx.last_str_idx = str_idx;
2125 ok = re_node_set_insert (&local_sctx.limits, enabled_idx);
2126 if (__glibc_unlikely (! ok))
2127 {
2128 err = REG_ESPACE;
2129 goto free_return;
2130 }
2131 cur_state = local_sctx.sifted_states[str_idx];
2132 err = sift_states_backward (mctx, &local_sctx);
2133 if (__glibc_unlikely (err != REG_NOERROR))
2134 goto free_return;
2135 if (sctx->limited_states != NULL)
2136 {
2137 err = merge_state_array (dfa, sctx->limited_states,
2138 local_sctx.sifted_states,
2139 str_idx + 1);
2140 if (__glibc_unlikely (err != REG_NOERROR))
2141 goto free_return;
2142 }
2143 local_sctx.sifted_states[str_idx] = cur_state;
2144 re_node_set_remove (&local_sctx.limits, enabled_idx);
2145
2146
2147 entry = mctx->bkref_ents + enabled_idx;
2148 }
2149 while (enabled_idx++, entry++->more);
2150 }
2151 err = REG_NOERROR;
2152 free_return:
2153 if (local_sctx.sifted_states != NULL)
2154 {
2155 re_node_set_free (&local_sctx.limits);
2156 }
2157
2158 return err;
2159 }
2160
2161
2162 static int
2163 sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,
2164 Idx node_idx, Idx str_idx, Idx max_str_idx)
2165 {
2166 const re_dfa_t *const dfa = mctx->dfa;
2167 int naccepted;
2168
2169 naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
2170 if (naccepted > 0 && str_idx + naccepted <= max_str_idx
2171 && !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
2172 dfa->nexts[node_idx]))
2173
2174
2175
2176 naccepted = 0;
2177
2178
2179 return naccepted;
2180 }
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190 static re_dfastate_t *
2191 __attribute_warn_unused_result__
2192 transit_state (reg_errcode_t *err, re_match_context_t *mctx,
2193 re_dfastate_t *state)
2194 {
2195 re_dfastate_t **trtable;
2196 unsigned char ch;
2197
2198
2199 if (__glibc_unlikely (state->accept_mb))
2200 {
2201 *err = transit_state_mb (mctx, state);
2202 if (__glibc_unlikely (*err != REG_NOERROR))
2203 return NULL;
2204 }
2205
2206
2207 #if 0
2208 if (0)
2209
2210 return transit_state_sb (err, mctx, state);
2211 #endif
2212
2213
2214 ch = re_string_fetch_byte (&mctx->input);
2215 for (;;)
2216 {
2217 trtable = state->trtable;
2218 if (__glibc_likely (trtable != NULL))
2219 return trtable[ch];
2220
2221 trtable = state->word_trtable;
2222 if (__glibc_likely (trtable != NULL))
2223 {
2224 unsigned int context;
2225 context
2226 = re_string_context_at (&mctx->input,
2227 re_string_cur_idx (&mctx->input) - 1,
2228 mctx->eflags);
2229 if (IS_WORD_CONTEXT (context))
2230 return trtable[ch + SBC_MAX];
2231 else
2232 return trtable[ch];
2233 }
2234
2235 if (!build_trtable (mctx->dfa, state))
2236 {
2237 *err = REG_ESPACE;
2238 return NULL;
2239 }
2240
2241
2242 }
2243 }
2244
2245
2246 static re_dfastate_t *
2247 merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,
2248 re_dfastate_t *next_state)
2249 {
2250 const re_dfa_t *const dfa = mctx->dfa;
2251 Idx cur_idx = re_string_cur_idx (&mctx->input);
2252
2253 if (cur_idx > mctx->state_log_top)
2254 {
2255 mctx->state_log[cur_idx] = next_state;
2256 mctx->state_log_top = cur_idx;
2257 }
2258 else if (mctx->state_log[cur_idx] == 0)
2259 {
2260 mctx->state_log[cur_idx] = next_state;
2261 }
2262 else
2263 {
2264 re_dfastate_t *pstate;
2265 unsigned int context;
2266 re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
2267
2268
2269
2270
2271 pstate = mctx->state_log[cur_idx];
2272 log_nodes = pstate->entrance_nodes;
2273 if (next_state != NULL)
2274 {
2275 table_nodes = next_state->entrance_nodes;
2276 *err = re_node_set_init_union (&next_nodes, table_nodes,
2277 log_nodes);
2278 if (__glibc_unlikely (*err != REG_NOERROR))
2279 return NULL;
2280 }
2281 else
2282 next_nodes = *log_nodes;
2283
2284
2285
2286 context = re_string_context_at (&mctx->input,
2287 re_string_cur_idx (&mctx->input) - 1,
2288 mctx->eflags);
2289 next_state = mctx->state_log[cur_idx]
2290 = re_acquire_state_context (err, dfa, &next_nodes, context);
2291
2292
2293
2294 if (table_nodes != NULL)
2295 re_node_set_free (&next_nodes);
2296 }
2297
2298 if (__glibc_unlikely (dfa->nbackref) && next_state != NULL)
2299 {
2300
2301
2302
2303 *err = check_subexp_matching_top (mctx, &next_state->nodes,
2304 cur_idx);
2305 if (__glibc_unlikely (*err != REG_NOERROR))
2306 return NULL;
2307
2308
2309 if (next_state->has_backref)
2310 {
2311 *err = transit_state_bkref (mctx, &next_state->nodes);
2312 if (__glibc_unlikely (*err != REG_NOERROR))
2313 return NULL;
2314 next_state = mctx->state_log[cur_idx];
2315 }
2316 }
2317
2318 return next_state;
2319 }
2320
2321
2322
2323
2324 static re_dfastate_t *
2325 find_recover_state (reg_errcode_t *err, re_match_context_t *mctx)
2326 {
2327 re_dfastate_t *cur_state;
2328 do
2329 {
2330 Idx max = mctx->state_log_top;
2331 Idx cur_str_idx = re_string_cur_idx (&mctx->input);
2332
2333 do
2334 {
2335 if (++cur_str_idx > max)
2336 return NULL;
2337 re_string_skip_bytes (&mctx->input, 1);
2338 }
2339 while (mctx->state_log[cur_str_idx] == NULL);
2340
2341 cur_state = merge_state_with_log (err, mctx, NULL);
2342 }
2343 while (*err == REG_NOERROR && cur_state == NULL);
2344 return cur_state;
2345 }
2346
2347
2348
2349
2350
2351
2352
2353
2354 static reg_errcode_t
2355 check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes,
2356 Idx str_idx)
2357 {
2358 const re_dfa_t *const dfa = mctx->dfa;
2359 Idx node_idx;
2360 reg_errcode_t err;
2361
2362
2363
2364
2365
2366
2367 for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
2368 {
2369 Idx node = cur_nodes->elems[node_idx];
2370 if (dfa->nodes[node].type == OP_OPEN_SUBEXP
2371 && dfa->nodes[node].opr.idx < BITSET_WORD_BITS
2372 && (dfa->used_bkref_map
2373 & ((bitset_word_t) 1 << dfa->nodes[node].opr.idx)))
2374 {
2375 err = match_ctx_add_subtop (mctx, node, str_idx);
2376 if (__glibc_unlikely (err != REG_NOERROR))
2377 return err;
2378 }
2379 }
2380 return REG_NOERROR;
2381 }
2382
2383 #if 0
2384
2385
2386
2387 static re_dfastate_t *
2388 transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx,
2389 re_dfastate_t *state)
2390 {
2391 const re_dfa_t *const dfa = mctx->dfa;
2392 re_node_set next_nodes;
2393 re_dfastate_t *next_state;
2394 Idx node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
2395 unsigned int context;
2396
2397 *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
2398 if (__glibc_unlikely (*err != REG_NOERROR))
2399 return NULL;
2400 for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
2401 {
2402 Idx cur_node = state->nodes.elems[node_cnt];
2403 if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))
2404 {
2405 *err = re_node_set_merge (&next_nodes,
2406 dfa->eclosures + dfa->nexts[cur_node]);
2407 if (__glibc_unlikely (*err != REG_NOERROR))
2408 {
2409 re_node_set_free (&next_nodes);
2410 return NULL;
2411 }
2412 }
2413 }
2414 context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);
2415 next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
2416
2417
2418
2419 re_node_set_free (&next_nodes);
2420 re_string_skip_bytes (&mctx->input, 1);
2421 return next_state;
2422 }
2423 #endif
2424
2425 static reg_errcode_t
2426 transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
2427 {
2428 const re_dfa_t *const dfa = mctx->dfa;
2429 reg_errcode_t err;
2430 Idx i;
2431
2432 for (i = 0; i < pstate->nodes.nelem; ++i)
2433 {
2434 re_node_set dest_nodes, *new_nodes;
2435 Idx cur_node_idx = pstate->nodes.elems[i];
2436 int naccepted;
2437 Idx dest_idx;
2438 unsigned int context;
2439 re_dfastate_t *dest_state;
2440
2441 if (!dfa->nodes[cur_node_idx].accept_mb)
2442 continue;
2443
2444 if (dfa->nodes[cur_node_idx].constraint)
2445 {
2446 context = re_string_context_at (&mctx->input,
2447 re_string_cur_idx (&mctx->input),
2448 mctx->eflags);
2449 if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
2450 context))
2451 continue;
2452 }
2453
2454
2455 naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
2456 re_string_cur_idx (&mctx->input));
2457 if (naccepted == 0)
2458 continue;
2459
2460
2461 dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
2462 mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
2463 : mctx->max_mb_elem_len);
2464 err = clean_state_log_if_needed (mctx, dest_idx);
2465 if (__glibc_unlikely (err != REG_NOERROR))
2466 return err;
2467 DEBUG_ASSERT (dfa->nexts[cur_node_idx] != -1);
2468 new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
2469
2470 dest_state = mctx->state_log[dest_idx];
2471 if (dest_state == NULL)
2472 dest_nodes = *new_nodes;
2473 else
2474 {
2475 err = re_node_set_init_union (&dest_nodes,
2476 dest_state->entrance_nodes, new_nodes);
2477 if (__glibc_unlikely (err != REG_NOERROR))
2478 return err;
2479 }
2480 context = re_string_context_at (&mctx->input, dest_idx - 1,
2481 mctx->eflags);
2482 mctx->state_log[dest_idx]
2483 = re_acquire_state_context (&err, dfa, &dest_nodes, context);
2484 if (dest_state != NULL)
2485 re_node_set_free (&dest_nodes);
2486 if (__glibc_unlikely (mctx->state_log[dest_idx] == NULL
2487 && err != REG_NOERROR))
2488 return err;
2489 }
2490 return REG_NOERROR;
2491 }
2492
2493 static reg_errcode_t
2494 transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
2495 {
2496 const re_dfa_t *const dfa = mctx->dfa;
2497 reg_errcode_t err;
2498 Idx i;
2499 Idx cur_str_idx = re_string_cur_idx (&mctx->input);
2500
2501 for (i = 0; i < nodes->nelem; ++i)
2502 {
2503 Idx dest_str_idx, prev_nelem, bkc_idx;
2504 Idx node_idx = nodes->elems[i];
2505 unsigned int context;
2506 const re_token_t *node = dfa->nodes + node_idx;
2507 re_node_set *new_dest_nodes;
2508
2509
2510 if (node->type != OP_BACK_REF)
2511 continue;
2512
2513 if (node->constraint)
2514 {
2515 context = re_string_context_at (&mctx->input, cur_str_idx,
2516 mctx->eflags);
2517 if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
2518 continue;
2519 }
2520
2521
2522
2523 bkc_idx = mctx->nbkref_ents;
2524 err = get_subexp (mctx, node_idx, cur_str_idx);
2525 if (__glibc_unlikely (err != REG_NOERROR))
2526 goto free_return;
2527
2528
2529
2530 DEBUG_ASSERT (dfa->nexts[node_idx] != -1);
2531 for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
2532 {
2533 Idx subexp_len;
2534 re_dfastate_t *dest_state;
2535 struct re_backref_cache_entry *bkref_ent;
2536 bkref_ent = mctx->bkref_ents + bkc_idx;
2537 if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
2538 continue;
2539 subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
2540 new_dest_nodes = (subexp_len == 0
2541 ? dfa->eclosures + dfa->edests[node_idx].elems[0]
2542 : dfa->eclosures + dfa->nexts[node_idx]);
2543 dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
2544 - bkref_ent->subexp_from);
2545 context = re_string_context_at (&mctx->input, dest_str_idx - 1,
2546 mctx->eflags);
2547 dest_state = mctx->state_log[dest_str_idx];
2548 prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
2549 : mctx->state_log[cur_str_idx]->nodes.nelem);
2550
2551 if (dest_state == NULL)
2552 {
2553 mctx->state_log[dest_str_idx]
2554 = re_acquire_state_context (&err, dfa, new_dest_nodes,
2555 context);
2556 if (__glibc_unlikely (mctx->state_log[dest_str_idx] == NULL
2557 && err != REG_NOERROR))
2558 goto free_return;
2559 }
2560 else
2561 {
2562 re_node_set dest_nodes;
2563 err = re_node_set_init_union (&dest_nodes,
2564 dest_state->entrance_nodes,
2565 new_dest_nodes);
2566 if (__glibc_unlikely (err != REG_NOERROR))
2567 {
2568 re_node_set_free (&dest_nodes);
2569 goto free_return;
2570 }
2571 mctx->state_log[dest_str_idx]
2572 = re_acquire_state_context (&err, dfa, &dest_nodes, context);
2573 re_node_set_free (&dest_nodes);
2574 if (__glibc_unlikely (mctx->state_log[dest_str_idx] == NULL
2575 && err != REG_NOERROR))
2576 goto free_return;
2577 }
2578
2579
2580 if (subexp_len == 0
2581 && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
2582 {
2583 err = check_subexp_matching_top (mctx, new_dest_nodes,
2584 cur_str_idx);
2585 if (__glibc_unlikely (err != REG_NOERROR))
2586 goto free_return;
2587 err = transit_state_bkref (mctx, new_dest_nodes);
2588 if (__glibc_unlikely (err != REG_NOERROR))
2589 goto free_return;
2590 }
2591 }
2592 }
2593 err = REG_NOERROR;
2594 free_return:
2595 return err;
2596 }
2597
2598
2599
2600
2601
2602
2603
2604 static reg_errcode_t
2605 __attribute_warn_unused_result__
2606 get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx)
2607 {
2608 const re_dfa_t *const dfa = mctx->dfa;
2609 Idx subexp_num, sub_top_idx;
2610 const char *buf = (const char *) re_string_get_buffer (&mctx->input);
2611
2612 Idx cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
2613 if (cache_idx != -1)
2614 {
2615 const struct re_backref_cache_entry *entry
2616 = mctx->bkref_ents + cache_idx;
2617 do
2618 if (entry->node == bkref_node)
2619 return REG_NOERROR;
2620 while (entry++->more);
2621 }
2622
2623 subexp_num = dfa->nodes[bkref_node].opr.idx;
2624
2625
2626 for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
2627 {
2628 reg_errcode_t err;
2629 re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
2630 re_sub_match_last_t *sub_last;
2631 Idx sub_last_idx, sl_str, bkref_str_off;
2632
2633 if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
2634 continue;
2635
2636 sl_str = sub_top->str_idx;
2637 bkref_str_off = bkref_str_idx;
2638
2639
2640 for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
2641 {
2642 regoff_t sl_str_diff;
2643 sub_last = sub_top->lasts[sub_last_idx];
2644 sl_str_diff = sub_last->str_idx - sl_str;
2645
2646
2647 if (sl_str_diff > 0)
2648 {
2649 if (__glibc_unlikely (bkref_str_off + sl_str_diff
2650 > mctx->input.valid_len))
2651 {
2652
2653 if (bkref_str_off + sl_str_diff > mctx->input.len)
2654 break;
2655
2656 err = clean_state_log_if_needed (mctx,
2657 bkref_str_off
2658 + sl_str_diff);
2659 if (__glibc_unlikely (err != REG_NOERROR))
2660 return err;
2661 buf = (const char *) re_string_get_buffer (&mctx->input);
2662 }
2663 if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)
2664
2665 break;
2666 }
2667 bkref_str_off += sl_str_diff;
2668 sl_str += sl_str_diff;
2669 err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2670 bkref_str_idx);
2671
2672
2673
2674 buf = (const char *) re_string_get_buffer (&mctx->input);
2675
2676 if (err == REG_NOMATCH)
2677 continue;
2678 if (__glibc_unlikely (err != REG_NOERROR))
2679 return err;
2680 }
2681
2682 if (sub_last_idx < sub_top->nlasts)
2683 continue;
2684 if (sub_last_idx > 0)
2685 ++sl_str;
2686
2687 for (; sl_str <= bkref_str_idx; ++sl_str)
2688 {
2689 Idx cls_node;
2690 regoff_t sl_str_off;
2691 const re_node_set *nodes;
2692 sl_str_off = sl_str - sub_top->str_idx;
2693
2694
2695 if (sl_str_off > 0)
2696 {
2697 if (__glibc_unlikely (bkref_str_off >= mctx->input.valid_len))
2698 {
2699
2700 if (bkref_str_off >= mctx->input.len)
2701 break;
2702
2703 err = extend_buffers (mctx, bkref_str_off + 1);
2704 if (__glibc_unlikely (err != REG_NOERROR))
2705 return err;
2706
2707 buf = (const char *) re_string_get_buffer (&mctx->input);
2708 }
2709 if (buf [bkref_str_off++] != buf[sl_str - 1])
2710 break;
2711
2712 }
2713 if (mctx->state_log[sl_str] == NULL)
2714 continue;
2715
2716 nodes = &mctx->state_log[sl_str]->nodes;
2717 cls_node = find_subexp_node (dfa, nodes, subexp_num,
2718 OP_CLOSE_SUBEXP);
2719 if (cls_node == -1)
2720 continue;
2721 if (sub_top->path == NULL)
2722 {
2723 sub_top->path = calloc (sizeof (state_array_t),
2724 sl_str - sub_top->str_idx + 1);
2725 if (sub_top->path == NULL)
2726 return REG_ESPACE;
2727 }
2728
2729
2730 err = check_arrival (mctx, sub_top->path, sub_top->node,
2731 sub_top->str_idx, cls_node, sl_str,
2732 OP_CLOSE_SUBEXP);
2733 if (err == REG_NOMATCH)
2734 continue;
2735 if (__glibc_unlikely (err != REG_NOERROR))
2736 return err;
2737 sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
2738 if (__glibc_unlikely (sub_last == NULL))
2739 return REG_ESPACE;
2740 err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2741 bkref_str_idx);
2742 buf = (const char *) re_string_get_buffer (&mctx->input);
2743 if (err == REG_NOMATCH)
2744 continue;
2745 if (__glibc_unlikely (err != REG_NOERROR))
2746 return err;
2747 }
2748 }
2749 return REG_NOERROR;
2750 }
2751
2752
2753
2754
2755
2756
2757
2758 static reg_errcode_t
2759 get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top,
2760 re_sub_match_last_t *sub_last, Idx bkref_node, Idx bkref_str)
2761 {
2762 reg_errcode_t err;
2763 Idx to_idx;
2764
2765 err = check_arrival (mctx, &sub_last->path, sub_last->node,
2766 sub_last->str_idx, bkref_node, bkref_str,
2767 OP_OPEN_SUBEXP);
2768 if (err != REG_NOERROR)
2769 return err;
2770 err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
2771 sub_last->str_idx);
2772 if (__glibc_unlikely (err != REG_NOERROR))
2773 return err;
2774 to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
2775 return clean_state_log_if_needed (mctx, to_idx);
2776 }
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786 static Idx
2787 find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
2788 Idx subexp_idx, int type)
2789 {
2790 Idx cls_idx;
2791 for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
2792 {
2793 Idx cls_node = nodes->elems[cls_idx];
2794 const re_token_t *node = dfa->nodes + cls_node;
2795 if (node->type == type
2796 && node->opr.idx == subexp_idx)
2797 return cls_node;
2798 }
2799 return -1;
2800 }
2801
2802
2803
2804
2805
2806
2807
2808 static reg_errcode_t
2809 __attribute_warn_unused_result__
2810 check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node,
2811 Idx top_str, Idx last_node, Idx last_str, int type)
2812 {
2813 const re_dfa_t *const dfa = mctx->dfa;
2814 reg_errcode_t err = REG_NOERROR;
2815 Idx subexp_num, backup_cur_idx, str_idx, null_cnt;
2816 re_dfastate_t *cur_state = NULL;
2817 re_node_set *cur_nodes, next_nodes;
2818 re_dfastate_t **backup_state_log;
2819 unsigned int context;
2820
2821 subexp_num = dfa->nodes[top_node].opr.idx;
2822
2823 if (__glibc_unlikely (path->alloc < last_str + mctx->max_mb_elem_len + 1))
2824 {
2825 re_dfastate_t **new_array;
2826 Idx old_alloc = path->alloc;
2827 Idx incr_alloc = last_str + mctx->max_mb_elem_len + 1;
2828 Idx new_alloc;
2829 if (__glibc_unlikely (IDX_MAX - old_alloc < incr_alloc))
2830 return REG_ESPACE;
2831 new_alloc = old_alloc + incr_alloc;
2832 if (__glibc_unlikely (SIZE_MAX / sizeof (re_dfastate_t *) < new_alloc))
2833 return REG_ESPACE;
2834 new_array = re_realloc (path->array, re_dfastate_t *, new_alloc);
2835 if (__glibc_unlikely (new_array == NULL))
2836 return REG_ESPACE;
2837 path->array = new_array;
2838 path->alloc = new_alloc;
2839 memset (new_array + old_alloc, '\0',
2840 sizeof (re_dfastate_t *) * (path->alloc - old_alloc));
2841 }
2842
2843 str_idx = path->next_idx ? path->next_idx : top_str;
2844
2845
2846 backup_state_log = mctx->state_log;
2847 backup_cur_idx = mctx->input.cur_idx;
2848 mctx->state_log = path->array;
2849 mctx->input.cur_idx = str_idx;
2850
2851
2852 context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
2853 if (str_idx == top_str)
2854 {
2855 err = re_node_set_init_1 (&next_nodes, top_node);
2856 if (__glibc_unlikely (err != REG_NOERROR))
2857 return err;
2858 err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2859 if (__glibc_unlikely (err != REG_NOERROR))
2860 {
2861 re_node_set_free (&next_nodes);
2862 return err;
2863 }
2864 }
2865 else
2866 {
2867 cur_state = mctx->state_log[str_idx];
2868 if (cur_state && cur_state->has_backref)
2869 {
2870 err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
2871 if (__glibc_unlikely (err != REG_NOERROR))
2872 return err;
2873 }
2874 else
2875 re_node_set_init_empty (&next_nodes);
2876 }
2877 if (str_idx == top_str || (cur_state && cur_state->has_backref))
2878 {
2879 if (next_nodes.nelem)
2880 {
2881 err = expand_bkref_cache (mctx, &next_nodes, str_idx,
2882 subexp_num, type);
2883 if (__glibc_unlikely (err != REG_NOERROR))
2884 {
2885 re_node_set_free (&next_nodes);
2886 return err;
2887 }
2888 }
2889 cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2890 if (__glibc_unlikely (cur_state == NULL && err != REG_NOERROR))
2891 {
2892 re_node_set_free (&next_nodes);
2893 return err;
2894 }
2895 mctx->state_log[str_idx] = cur_state;
2896 }
2897
2898 for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
2899 {
2900 re_node_set_empty (&next_nodes);
2901 if (mctx->state_log[str_idx + 1])
2902 {
2903 err = re_node_set_merge (&next_nodes,
2904 &mctx->state_log[str_idx + 1]->nodes);
2905 if (__glibc_unlikely (err != REG_NOERROR))
2906 {
2907 re_node_set_free (&next_nodes);
2908 return err;
2909 }
2910 }
2911 if (cur_state)
2912 {
2913 err = check_arrival_add_next_nodes (mctx, str_idx,
2914 &cur_state->non_eps_nodes,
2915 &next_nodes);
2916 if (__glibc_unlikely (err != REG_NOERROR))
2917 {
2918 re_node_set_free (&next_nodes);
2919 return err;
2920 }
2921 }
2922 ++str_idx;
2923 if (next_nodes.nelem)
2924 {
2925 err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2926 if (__glibc_unlikely (err != REG_NOERROR))
2927 {
2928 re_node_set_free (&next_nodes);
2929 return err;
2930 }
2931 err = expand_bkref_cache (mctx, &next_nodes, str_idx,
2932 subexp_num, type);
2933 if (__glibc_unlikely (err != REG_NOERROR))
2934 {
2935 re_node_set_free (&next_nodes);
2936 return err;
2937 }
2938 }
2939 context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
2940 cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2941 if (__glibc_unlikely (cur_state == NULL && err != REG_NOERROR))
2942 {
2943 re_node_set_free (&next_nodes);
2944 return err;
2945 }
2946 mctx->state_log[str_idx] = cur_state;
2947 null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
2948 }
2949 re_node_set_free (&next_nodes);
2950 cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
2951 : &mctx->state_log[last_str]->nodes);
2952 path->next_idx = str_idx;
2953
2954
2955 mctx->state_log = backup_state_log;
2956 mctx->input.cur_idx = backup_cur_idx;
2957
2958
2959 if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))
2960 return REG_NOERROR;
2961
2962 return REG_NOMATCH;
2963 }
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973 static reg_errcode_t
2974 __attribute_warn_unused_result__
2975 check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx,
2976 re_node_set *cur_nodes, re_node_set *next_nodes)
2977 {
2978 const re_dfa_t *const dfa = mctx->dfa;
2979 bool ok;
2980 Idx cur_idx;
2981 reg_errcode_t err = REG_NOERROR;
2982 re_node_set union_set;
2983 re_node_set_init_empty (&union_set);
2984 for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
2985 {
2986 int naccepted = 0;
2987 Idx cur_node = cur_nodes->elems[cur_idx];
2988 DEBUG_ASSERT (!IS_EPSILON_NODE (dfa->nodes[cur_node].type));
2989
2990
2991 if (dfa->nodes[cur_node].accept_mb)
2992 {
2993 naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
2994 str_idx);
2995 if (naccepted > 1)
2996 {
2997 re_dfastate_t *dest_state;
2998 Idx next_node = dfa->nexts[cur_node];
2999 Idx next_idx = str_idx + naccepted;
3000 dest_state = mctx->state_log[next_idx];
3001 re_node_set_empty (&union_set);
3002 if (dest_state)
3003 {
3004 err = re_node_set_merge (&union_set, &dest_state->nodes);
3005 if (__glibc_unlikely (err != REG_NOERROR))
3006 {
3007 re_node_set_free (&union_set);
3008 return err;
3009 }
3010 }
3011 ok = re_node_set_insert (&union_set, next_node);
3012 if (__glibc_unlikely (! ok))
3013 {
3014 re_node_set_free (&union_set);
3015 return REG_ESPACE;
3016 }
3017 mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
3018 &union_set);
3019 if (__glibc_unlikely (mctx->state_log[next_idx] == NULL
3020 && err != REG_NOERROR))
3021 {
3022 re_node_set_free (&union_set);
3023 return err;
3024 }
3025 }
3026 }
3027
3028 if (naccepted
3029 || check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
3030 {
3031 ok = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
3032 if (__glibc_unlikely (! ok))
3033 {
3034 re_node_set_free (&union_set);
3035 return REG_ESPACE;
3036 }
3037 }
3038 }
3039 re_node_set_free (&union_set);
3040 return REG_NOERROR;
3041 }
3042
3043
3044
3045
3046
3047
3048
3049 static reg_errcode_t
3050 check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes,
3051 Idx ex_subexp, int type)
3052 {
3053 reg_errcode_t err;
3054 Idx idx, outside_node;
3055 re_node_set new_nodes;
3056 DEBUG_ASSERT (cur_nodes->nelem);
3057 err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
3058 if (__glibc_unlikely (err != REG_NOERROR))
3059 return err;
3060
3061
3062
3063 for (idx = 0; idx < cur_nodes->nelem; ++idx)
3064 {
3065 Idx cur_node = cur_nodes->elems[idx];
3066 const re_node_set *eclosure = dfa->eclosures + cur_node;
3067 outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
3068 if (outside_node == -1)
3069 {
3070
3071 err = re_node_set_merge (&new_nodes, eclosure);
3072 if (__glibc_unlikely (err != REG_NOERROR))
3073 {
3074 re_node_set_free (&new_nodes);
3075 return err;
3076 }
3077 }
3078 else
3079 {
3080
3081 err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
3082 ex_subexp, type);
3083 if (__glibc_unlikely (err != REG_NOERROR))
3084 {
3085 re_node_set_free (&new_nodes);
3086 return err;
3087 }
3088 }
3089 }
3090 re_node_set_free (cur_nodes);
3091 *cur_nodes = new_nodes;
3092 return REG_NOERROR;
3093 }
3094
3095
3096
3097
3098
3099 static reg_errcode_t
3100 __attribute_warn_unused_result__
3101 check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes,
3102 Idx target, Idx ex_subexp, int type)
3103 {
3104 Idx cur_node;
3105 for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
3106 {
3107 bool ok;
3108
3109 if (dfa->nodes[cur_node].type == type
3110 && dfa->nodes[cur_node].opr.idx == ex_subexp)
3111 {
3112 if (type == OP_CLOSE_SUBEXP)
3113 {
3114 ok = re_node_set_insert (dst_nodes, cur_node);
3115 if (__glibc_unlikely (! ok))
3116 return REG_ESPACE;
3117 }
3118 break;
3119 }
3120 ok = re_node_set_insert (dst_nodes, cur_node);
3121 if (__glibc_unlikely (! ok))
3122 return REG_ESPACE;
3123 if (dfa->edests[cur_node].nelem == 0)
3124 break;
3125 if (dfa->edests[cur_node].nelem == 2)
3126 {
3127 reg_errcode_t err;
3128 err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
3129 dfa->edests[cur_node].elems[1],
3130 ex_subexp, type);
3131 if (__glibc_unlikely (err != REG_NOERROR))
3132 return err;
3133 }
3134 cur_node = dfa->edests[cur_node].elems[0];
3135 }
3136 return REG_NOERROR;
3137 }
3138
3139
3140
3141
3142
3143
3144 static reg_errcode_t
3145 __attribute_warn_unused_result__
3146 expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,
3147 Idx cur_str, Idx subexp_num, int type)
3148 {
3149 const re_dfa_t *const dfa = mctx->dfa;
3150 reg_errcode_t err;
3151 Idx cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
3152 struct re_backref_cache_entry *ent;
3153
3154 if (cache_idx_start == -1)
3155 return REG_NOERROR;
3156
3157 restart:
3158 ent = mctx->bkref_ents + cache_idx_start;
3159 do
3160 {
3161 Idx to_idx, next_node;
3162
3163
3164 if (!re_node_set_contains (cur_nodes, ent->node))
3165 continue;
3166
3167 to_idx = cur_str + ent->subexp_to - ent->subexp_from;
3168
3169
3170 if (to_idx == cur_str)
3171 {
3172
3173
3174 re_node_set new_dests;
3175 reg_errcode_t err2, err3;
3176 next_node = dfa->edests[ent->node].elems[0];
3177 if (re_node_set_contains (cur_nodes, next_node))
3178 continue;
3179 err = re_node_set_init_1 (&new_dests, next_node);
3180 err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
3181 err3 = re_node_set_merge (cur_nodes, &new_dests);
3182 re_node_set_free (&new_dests);
3183 if (__glibc_unlikely (err != REG_NOERROR || err2 != REG_NOERROR
3184 || err3 != REG_NOERROR))
3185 {
3186 err = (err != REG_NOERROR ? err
3187 : (err2 != REG_NOERROR ? err2 : err3));
3188 return err;
3189 }
3190
3191 goto restart;
3192 }
3193 else
3194 {
3195 re_node_set union_set;
3196 next_node = dfa->nexts[ent->node];
3197 if (mctx->state_log[to_idx])
3198 {
3199 bool ok;
3200 if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
3201 next_node))
3202 continue;
3203 err = re_node_set_init_copy (&union_set,
3204 &mctx->state_log[to_idx]->nodes);
3205 ok = re_node_set_insert (&union_set, next_node);
3206 if (__glibc_unlikely (err != REG_NOERROR || ! ok))
3207 {
3208 re_node_set_free (&union_set);
3209 err = err != REG_NOERROR ? err : REG_ESPACE;
3210 return err;
3211 }
3212 }
3213 else
3214 {
3215 err = re_node_set_init_1 (&union_set, next_node);
3216 if (__glibc_unlikely (err != REG_NOERROR))
3217 return err;
3218 }
3219 mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
3220 re_node_set_free (&union_set);
3221 if (__glibc_unlikely (mctx->state_log[to_idx] == NULL
3222 && err != REG_NOERROR))
3223 return err;
3224 }
3225 }
3226 while (ent++->more);
3227 return REG_NOERROR;
3228 }
3229
3230
3231
3232
3233 static bool __attribute_noinline__
3234 build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
3235 {
3236 reg_errcode_t err;
3237 Idx i, j;
3238 int ch;
3239 bool need_word_trtable = false;
3240 bitset_word_t elem, mask;
3241 Idx ndests;
3242 re_dfastate_t **trtable;
3243 re_dfastate_t *dest_states[SBC_MAX];
3244 re_dfastate_t *dest_states_word[SBC_MAX];
3245 re_dfastate_t *dest_states_nl[SBC_MAX];
3246 re_node_set follows;
3247 bitset_t acceptable;
3248
3249
3250
3251
3252
3253 re_node_set dests_node[SBC_MAX];
3254 bitset_t dests_ch[SBC_MAX];
3255
3256
3257 state->word_trtable = state->trtable = NULL;
3258
3259
3260
3261 ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
3262 if (__glibc_unlikely (ndests <= 0))
3263 {
3264
3265 if (ndests == 0)
3266 {
3267 state->trtable = (re_dfastate_t **)
3268 calloc (sizeof (re_dfastate_t *), SBC_MAX);
3269 if (__glibc_unlikely (state->trtable == NULL))
3270 return false;
3271 return true;
3272 }
3273 return false;
3274 }
3275
3276 err = re_node_set_alloc (&follows, ndests + 1);
3277 if (__glibc_unlikely (err != REG_NOERROR))
3278 {
3279 out_free:
3280 re_node_set_free (&follows);
3281 for (i = 0; i < ndests; ++i)
3282 re_node_set_free (dests_node + i);
3283 return false;
3284 }
3285
3286 bitset_empty (acceptable);
3287
3288
3289 for (i = 0; i < ndests; ++i)
3290 {
3291 Idx next_node;
3292 re_node_set_empty (&follows);
3293
3294 for (j = 0; j < dests_node[i].nelem; ++j)
3295 {
3296 next_node = dfa->nexts[dests_node[i].elems[j]];
3297 if (next_node != -1)
3298 {
3299 err = re_node_set_merge (&follows, dfa->eclosures + next_node);
3300 if (__glibc_unlikely (err != REG_NOERROR))
3301 goto out_free;
3302 }
3303 }
3304 dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
3305 if (__glibc_unlikely (dest_states[i] == NULL && err != REG_NOERROR))
3306 goto out_free;
3307
3308
3309 if (dest_states[i]->has_constraint)
3310 {
3311 dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
3312 CONTEXT_WORD);
3313 if (__glibc_unlikely (dest_states_word[i] == NULL
3314 && err != REG_NOERROR))
3315 goto out_free;
3316
3317 if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1)
3318 need_word_trtable = true;
3319
3320 dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
3321 CONTEXT_NEWLINE);
3322 if (__glibc_unlikely (dest_states_nl[i] == NULL && err != REG_NOERROR))
3323 goto out_free;
3324 }
3325 else
3326 {
3327 dest_states_word[i] = dest_states[i];
3328 dest_states_nl[i] = dest_states[i];
3329 }
3330 bitset_merge (acceptable, dests_ch[i]);
3331 }
3332
3333 if (!__glibc_unlikely (need_word_trtable))
3334 {
3335
3336
3337
3338
3339 trtable = state->trtable =
3340 (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
3341 if (__glibc_unlikely (trtable == NULL))
3342 goto out_free;
3343
3344
3345 for (i = 0; i < BITSET_WORDS; ++i)
3346 for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3347 elem;
3348 mask <<= 1, elem >>= 1, ++ch)
3349 if (__glibc_unlikely (elem & 1))
3350 {
3351
3352
3353 for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3354 ;
3355
3356
3357 if (dfa->word_char[i] & mask)
3358 trtable[ch] = dest_states_word[j];
3359 else
3360 trtable[ch] = dest_states[j];
3361 }
3362 }
3363 else
3364 {
3365
3366
3367
3368
3369
3370 trtable = state->word_trtable =
3371 (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX);
3372 if (__glibc_unlikely (trtable == NULL))
3373 goto out_free;
3374
3375
3376 for (i = 0; i < BITSET_WORDS; ++i)
3377 for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3378 elem;
3379 mask <<= 1, elem >>= 1, ++ch)
3380 if (__glibc_unlikely (elem & 1))
3381 {
3382
3383
3384 for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3385 ;
3386
3387
3388 trtable[ch] = dest_states[j];
3389 trtable[ch + SBC_MAX] = dest_states_word[j];
3390 }
3391 }
3392
3393
3394 if (bitset_contain (acceptable, NEWLINE_CHAR))
3395 {
3396
3397 for (j = 0; j < ndests; ++j)
3398 if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
3399 {
3400
3401 trtable[NEWLINE_CHAR] = dest_states_nl[j];
3402 if (need_word_trtable)
3403 trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
3404
3405
3406 break;
3407 }
3408 }
3409
3410 re_node_set_free (&follows);
3411 for (i = 0; i < ndests; ++i)
3412 re_node_set_free (dests_node + i);
3413 return true;
3414 }
3415
3416
3417
3418
3419
3420
3421
3422 static Idx
3423 group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
3424 re_node_set *dests_node, bitset_t *dests_ch)
3425 {
3426 reg_errcode_t err;
3427 bool ok;
3428 Idx i, j, k;
3429 Idx ndests;
3430 bitset_t accepts;
3431 const re_node_set *cur_nodes = &state->nodes;
3432 bitset_empty (accepts);
3433 ndests = 0;
3434
3435
3436 for (i = 0; i < cur_nodes->nelem; ++i)
3437 {
3438 re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
3439 re_token_type_t type = node->type;
3440 unsigned int constraint = node->constraint;
3441
3442
3443 if (type == CHARACTER)
3444 bitset_set (accepts, node->opr.c);
3445 else if (type == SIMPLE_BRACKET)
3446 {
3447 bitset_merge (accepts, node->opr.sbcset);
3448 }
3449 else if (type == OP_PERIOD)
3450 {
3451 if (dfa->mb_cur_max > 1)
3452 bitset_merge (accepts, dfa->sb_char);
3453 else
3454 bitset_set_all (accepts);
3455 if (!(dfa->syntax & RE_DOT_NEWLINE))
3456 bitset_clear (accepts, '\n');
3457 if (dfa->syntax & RE_DOT_NOT_NULL)
3458 bitset_clear (accepts, '\0');
3459 }
3460 else if (type == OP_UTF8_PERIOD)
3461 {
3462 if (ASCII_CHARS % BITSET_WORD_BITS == 0)
3463 memset (accepts, -1, ASCII_CHARS / CHAR_BIT);
3464 else
3465 bitset_merge (accepts, utf8_sb_map);
3466 if (!(dfa->syntax & RE_DOT_NEWLINE))
3467 bitset_clear (accepts, '\n');
3468 if (dfa->syntax & RE_DOT_NOT_NULL)
3469 bitset_clear (accepts, '\0');
3470 }
3471 else
3472 continue;
3473
3474
3475
3476 if (constraint)
3477 {
3478 if (constraint & NEXT_NEWLINE_CONSTRAINT)
3479 {
3480 bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
3481 bitset_empty (accepts);
3482 if (accepts_newline)
3483 bitset_set (accepts, NEWLINE_CHAR);
3484 else
3485 continue;
3486 }
3487 if (constraint & NEXT_ENDBUF_CONSTRAINT)
3488 {
3489 bitset_empty (accepts);
3490 continue;
3491 }
3492
3493 if (constraint & NEXT_WORD_CONSTRAINT)
3494 {
3495 bitset_word_t any_set = 0;
3496 if (type == CHARACTER && !node->word_char)
3497 {
3498 bitset_empty (accepts);
3499 continue;
3500 }
3501 if (dfa->mb_cur_max > 1)
3502 for (j = 0; j < BITSET_WORDS; ++j)
3503 any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));
3504 else
3505 for (j = 0; j < BITSET_WORDS; ++j)
3506 any_set |= (accepts[j] &= dfa->word_char[j]);
3507 if (!any_set)
3508 continue;
3509 }
3510 if (constraint & NEXT_NOTWORD_CONSTRAINT)
3511 {
3512 bitset_word_t any_set = 0;
3513 if (type == CHARACTER && node->word_char)
3514 {
3515 bitset_empty (accepts);
3516 continue;
3517 }
3518 if (dfa->mb_cur_max > 1)
3519 for (j = 0; j < BITSET_WORDS; ++j)
3520 any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
3521 else
3522 for (j = 0; j < BITSET_WORDS; ++j)
3523 any_set |= (accepts[j] &= ~dfa->word_char[j]);
3524 if (!any_set)
3525 continue;
3526 }
3527 }
3528
3529
3530
3531 for (j = 0; j < ndests; ++j)
3532 {
3533 bitset_t intersec;
3534 bitset_t remains;
3535
3536 bitset_word_t has_intersec, not_subset, not_consumed;
3537
3538
3539 if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
3540 continue;
3541
3542
3543 has_intersec = 0;
3544 for (k = 0; k < BITSET_WORDS; ++k)
3545 has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
3546
3547 if (!has_intersec)
3548 continue;
3549
3550
3551 not_subset = not_consumed = 0;
3552 for (k = 0; k < BITSET_WORDS; ++k)
3553 {
3554 not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
3555 not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
3556 }
3557
3558
3559
3560 if (not_subset)
3561 {
3562 bitset_copy (dests_ch[ndests], remains);
3563 bitset_copy (dests_ch[j], intersec);
3564 err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
3565 if (__glibc_unlikely (err != REG_NOERROR))
3566 goto error_return;
3567 ++ndests;
3568 }
3569
3570
3571 ok = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
3572 if (__glibc_unlikely (! ok))
3573 goto error_return;
3574
3575
3576 if (!not_consumed)
3577 break;
3578 }
3579
3580 if (j == ndests)
3581 {
3582 bitset_copy (dests_ch[ndests], accepts);
3583 err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
3584 if (__glibc_unlikely (err != REG_NOERROR))
3585 goto error_return;
3586 ++ndests;
3587 bitset_empty (accepts);
3588 }
3589 }
3590 assume (ndests <= SBC_MAX);
3591 return ndests;
3592 error_return:
3593 for (j = 0; j < ndests; ++j)
3594 re_node_set_free (dests_node + j);
3595 return -1;
3596 }
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606 #ifdef _LIBC
3607 # include <locale/weight.h>
3608 #endif
3609
3610 static int
3611 check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
3612 const re_string_t *input, Idx str_idx)
3613 {
3614 const re_token_t *node = dfa->nodes + node_idx;
3615 int char_len, elem_len;
3616 Idx i;
3617
3618 if (__glibc_unlikely (node->type == OP_UTF8_PERIOD))
3619 {
3620 unsigned char c = re_string_byte_at (input, str_idx), d;
3621 if (__glibc_likely (c < 0xc2))
3622 return 0;
3623
3624 if (str_idx + 2 > input->len)
3625 return 0;
3626
3627 d = re_string_byte_at (input, str_idx + 1);
3628 if (c < 0xe0)
3629 return (d < 0x80 || d > 0xbf) ? 0 : 2;
3630 else if (c < 0xf0)
3631 {
3632 char_len = 3;
3633 if (c == 0xe0 && d < 0xa0)
3634 return 0;
3635 }
3636 else if (c < 0xf8)
3637 {
3638 char_len = 4;
3639 if (c == 0xf0 && d < 0x90)
3640 return 0;
3641 }
3642 else if (c < 0xfc)
3643 {
3644 char_len = 5;
3645 if (c == 0xf8 && d < 0x88)
3646 return 0;
3647 }
3648 else if (c < 0xfe)
3649 {
3650 char_len = 6;
3651 if (c == 0xfc && d < 0x84)
3652 return 0;
3653 }
3654 else
3655 return 0;
3656
3657 if (str_idx + char_len > input->len)
3658 return 0;
3659
3660 for (i = 1; i < char_len; ++i)
3661 {
3662 d = re_string_byte_at (input, str_idx + i);
3663 if (d < 0x80 || d > 0xbf)
3664 return 0;
3665 }
3666 return char_len;
3667 }
3668
3669 char_len = re_string_char_size_at (input, str_idx);
3670 if (node->type == OP_PERIOD)
3671 {
3672 if (char_len <= 1)
3673 return 0;
3674
3675
3676
3677 if ((!(dfa->syntax & RE_DOT_NEWLINE)
3678 && re_string_byte_at (input, str_idx) == '\n')
3679 || ((dfa->syntax & RE_DOT_NOT_NULL)
3680 && re_string_byte_at (input, str_idx) == '\0'))
3681 return 0;
3682 return char_len;
3683 }
3684
3685 elem_len = re_string_elem_size_at (input, str_idx);
3686 if ((elem_len <= 1 && char_len <= 1) || char_len == 0)
3687 return 0;
3688
3689 if (node->type == COMPLEX_BRACKET)
3690 {
3691 const re_charset_t *cset = node->opr.mbcset;
3692 #ifdef _LIBC
3693 const unsigned char *pin
3694 = ((const unsigned char *) re_string_get_buffer (input) + str_idx);
3695 Idx j;
3696 uint32_t nrules;
3697 #endif
3698 int match_len = 0;
3699 wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
3700 ? re_string_wchar_at (input, str_idx) : 0);
3701
3702
3703 for (i = 0; i < cset->nmbchars; ++i)
3704 if (wc == cset->mbchars[i])
3705 {
3706 match_len = char_len;
3707 goto check_node_accept_bytes_match;
3708 }
3709
3710 for (i = 0; i < cset->nchar_classes; ++i)
3711 {
3712 wctype_t wt = cset->char_classes[i];
3713 if (__iswctype (wc, wt))
3714 {
3715 match_len = char_len;
3716 goto check_node_accept_bytes_match;
3717 }
3718 }
3719
3720 #ifdef _LIBC
3721 nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3722 if (nrules != 0)
3723 {
3724 unsigned int in_collseq = 0;
3725 const int32_t *table, *indirect;
3726 const unsigned char *weights, *extra;
3727 const char *collseqwc;
3728
3729
3730 if (cset->ncoll_syms)
3731 extra = (const unsigned char *)
3732 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3733 for (i = 0; i < cset->ncoll_syms; ++i)
3734 {
3735 const unsigned char *coll_sym = extra + cset->coll_syms[i];
3736
3737
3738 if (*coll_sym != elem_len)
3739 continue;
3740
3741 for (j = 0; j < *coll_sym; j++)
3742 if (pin[j] != coll_sym[1 + j])
3743 break;
3744 if (j == *coll_sym)
3745 {
3746
3747 match_len = j;
3748 goto check_node_accept_bytes_match;
3749 }
3750 }
3751
3752 if (cset->nranges)
3753 {
3754 if (elem_len <= char_len)
3755 {
3756 collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
3757 in_collseq = __collseq_table_lookup (collseqwc, wc);
3758 }
3759 else
3760 in_collseq = find_collation_sequence_value (pin, elem_len);
3761 }
3762
3763
3764 for (i = 0; i < cset->nranges; ++i)
3765 if (cset->range_starts[i] <= in_collseq
3766 && in_collseq <= cset->range_ends[i])
3767 {
3768 match_len = elem_len;
3769 goto check_node_accept_bytes_match;
3770 }
3771
3772
3773 if (cset->nequiv_classes)
3774 {
3775 const unsigned char *cp = pin;
3776 table = (const int32_t *)
3777 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3778 weights = (const unsigned char *)
3779 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
3780 extra = (const unsigned char *)
3781 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
3782 indirect = (const int32_t *)
3783 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
3784 int32_t idx = findidx (table, indirect, extra, &cp, elem_len);
3785 int32_t rule = idx >> 24;
3786 idx &= 0xffffff;
3787 if (idx > 0)
3788 {
3789 size_t weight_len = weights[idx];
3790 for (i = 0; i < cset->nequiv_classes; ++i)
3791 {
3792 int32_t equiv_class_idx = cset->equiv_classes[i];
3793 int32_t equiv_class_rule = equiv_class_idx >> 24;
3794 equiv_class_idx &= 0xffffff;
3795 if (weights[equiv_class_idx] == weight_len
3796 && equiv_class_rule == rule
3797 && memcmp (weights + idx + 1,
3798 weights + equiv_class_idx + 1,
3799 weight_len) == 0)
3800 {
3801 match_len = elem_len;
3802 goto check_node_accept_bytes_match;
3803 }
3804 }
3805 }
3806 }
3807 }
3808 else
3809 #endif
3810 {
3811
3812 for (i = 0; i < cset->nranges; ++i)
3813 {
3814 if (cset->range_starts[i] <= wc && wc <= cset->range_ends[i])
3815 {
3816 match_len = char_len;
3817 goto check_node_accept_bytes_match;
3818 }
3819 }
3820 }
3821 check_node_accept_bytes_match:
3822 if (!cset->non_match)
3823 return match_len;
3824 else
3825 {
3826 if (match_len > 0)
3827 return 0;
3828 else
3829 return (elem_len > char_len) ? elem_len : char_len;
3830 }
3831 }
3832 return 0;
3833 }
3834
3835 #ifdef _LIBC
3836 static unsigned int
3837 find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
3838 {
3839 uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3840 if (nrules == 0)
3841 {
3842 if (mbs_len == 1)
3843 {
3844
3845 const unsigned char *collseq = (const unsigned char *)
3846 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
3847 return collseq[mbs[0]];
3848 }
3849 return UINT_MAX;
3850 }
3851 else
3852 {
3853 int32_t idx;
3854 const unsigned char *extra = (const unsigned char *)
3855 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3856 int32_t extrasize = (const unsigned char *)
3857 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra;
3858
3859 for (idx = 0; idx < extrasize;)
3860 {
3861 int mbs_cnt;
3862 bool found = false;
3863 int32_t elem_mbs_len;
3864
3865 idx = idx + extra[idx] + 1;
3866 elem_mbs_len = extra[idx++];
3867 if (mbs_len == elem_mbs_len)
3868 {
3869 for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
3870 if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
3871 break;
3872 if (mbs_cnt == elem_mbs_len)
3873
3874 found = true;
3875 }
3876
3877 idx += elem_mbs_len;
3878
3879 idx = (idx + 3) & ~3;
3880
3881 idx += sizeof (uint32_t);
3882
3883 idx = idx + sizeof (uint32_t) * (*(int32_t *) (extra + idx) + 1);
3884
3885 if (found)
3886 return *(uint32_t *) (extra + idx);
3887
3888 idx += sizeof (uint32_t);
3889 }
3890 return UINT_MAX;
3891 }
3892 }
3893 #endif
3894
3895
3896
3897
3898 static bool
3899 check_node_accept (const re_match_context_t *mctx, const re_token_t *node,
3900 Idx idx)
3901 {
3902 unsigned char ch;
3903 ch = re_string_byte_at (&mctx->input, idx);
3904 switch (node->type)
3905 {
3906 case CHARACTER:
3907 if (node->opr.c != ch)
3908 return false;
3909 break;
3910
3911 case SIMPLE_BRACKET:
3912 if (!bitset_contain (node->opr.sbcset, ch))
3913 return false;
3914 break;
3915
3916 case OP_UTF8_PERIOD:
3917 if (ch >= ASCII_CHARS)
3918 return false;
3919 FALLTHROUGH;
3920 case OP_PERIOD:
3921 if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE))
3922 || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL)))
3923 return false;
3924 break;
3925
3926 default:
3927 return false;
3928 }
3929
3930 if (node->constraint)
3931 {
3932
3933
3934 unsigned int context = re_string_context_at (&mctx->input, idx,
3935 mctx->eflags);
3936 if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
3937 return false;
3938 }
3939
3940 return true;
3941 }
3942
3943
3944
3945 static reg_errcode_t
3946 __attribute_warn_unused_result__
3947 extend_buffers (re_match_context_t *mctx, int min_len)
3948 {
3949 reg_errcode_t ret;
3950 re_string_t *pstr = &mctx->input;
3951
3952
3953 if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) / 2
3954 <= pstr->bufs_len))
3955 return REG_ESPACE;
3956
3957
3958 ret = re_string_realloc_buffers (pstr,
3959 MAX (min_len,
3960 MIN (pstr->len, pstr->bufs_len * 2)));
3961 if (__glibc_unlikely (ret != REG_NOERROR))
3962 return ret;
3963
3964 if (mctx->state_log != NULL)
3965 {
3966
3967
3968
3969
3970 re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *,
3971 pstr->bufs_len + 1);
3972 if (__glibc_unlikely (new_array == NULL))
3973 return REG_ESPACE;
3974 mctx->state_log = new_array;
3975 }
3976
3977
3978 if (pstr->icase)
3979 {
3980 if (pstr->mb_cur_max > 1)
3981 {
3982 ret = build_wcs_upper_buffer (pstr);
3983 if (__glibc_unlikely (ret != REG_NOERROR))
3984 return ret;
3985 }
3986 else
3987 build_upper_buffer (pstr);
3988 }
3989 else
3990 {
3991 if (pstr->mb_cur_max > 1)
3992 build_wcs_buffer (pstr);
3993 else
3994 {
3995 if (pstr->trans != NULL)
3996 re_string_translate_buffer (pstr);
3997 }
3998 }
3999 return REG_NOERROR;
4000 }
4001
4002
4003
4004
4005
4006
4007 static reg_errcode_t
4008 __attribute_warn_unused_result__
4009 match_ctx_init (re_match_context_t *mctx, int eflags, Idx n)
4010 {
4011 mctx->eflags = eflags;
4012 mctx->match_last = -1;
4013 if (n > 0)
4014 {
4015
4016 size_t max_object_size =
4017 MAX (sizeof (struct re_backref_cache_entry),
4018 sizeof (re_sub_match_top_t *));
4019 if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / max_object_size) < n))
4020 return REG_ESPACE;
4021
4022 mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
4023 mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
4024 if (__glibc_unlikely (mctx->bkref_ents == NULL || mctx->sub_tops == NULL))
4025 return REG_ESPACE;
4026 }
4027
4028
4029
4030
4031
4032 mctx->abkref_ents = n;
4033 mctx->max_mb_elem_len = 1;
4034 mctx->asub_tops = n;
4035 return REG_NOERROR;
4036 }
4037
4038
4039
4040
4041
4042 static void
4043 match_ctx_clean (re_match_context_t *mctx)
4044 {
4045 Idx st_idx;
4046 for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
4047 {
4048 Idx sl_idx;
4049 re_sub_match_top_t *top = mctx->sub_tops[st_idx];
4050 for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
4051 {
4052 re_sub_match_last_t *last = top->lasts[sl_idx];
4053 re_free (last->path.array);
4054 re_free (last);
4055 }
4056 re_free (top->lasts);
4057 if (top->path)
4058 {
4059 re_free (top->path->array);
4060 re_free (top->path);
4061 }
4062 re_free (top);
4063 }
4064
4065 mctx->nsub_tops = 0;
4066 mctx->nbkref_ents = 0;
4067 }
4068
4069
4070
4071 static void
4072 match_ctx_free (re_match_context_t *mctx)
4073 {
4074
4075 match_ctx_clean (mctx);
4076 re_free (mctx->sub_tops);
4077 re_free (mctx->bkref_ents);
4078 }
4079
4080
4081
4082
4083
4084
4085 static reg_errcode_t
4086 __attribute_warn_unused_result__
4087 match_ctx_add_entry (re_match_context_t *mctx, Idx node, Idx str_idx, Idx from,
4088 Idx to)
4089 {
4090 if (mctx->nbkref_ents >= mctx->abkref_ents)
4091 {
4092 struct re_backref_cache_entry* new_entry;
4093 new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
4094 mctx->abkref_ents * 2);
4095 if (__glibc_unlikely (new_entry == NULL))
4096 {
4097 re_free (mctx->bkref_ents);
4098 return REG_ESPACE;
4099 }
4100 mctx->bkref_ents = new_entry;
4101 memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
4102 sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
4103 mctx->abkref_ents *= 2;
4104 }
4105 if (mctx->nbkref_ents > 0
4106 && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx)
4107 mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1;
4108
4109 mctx->bkref_ents[mctx->nbkref_ents].node = node;
4110 mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
4111 mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
4112 mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122 mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
4123 = (from == to ? -1 : 0);
4124
4125 mctx->bkref_ents[mctx->nbkref_ents++].more = 0;
4126 if (mctx->max_mb_elem_len < to - from)
4127 mctx->max_mb_elem_len = to - from;
4128 return REG_NOERROR;
4129 }
4130
4131
4132
4133
4134 static Idx
4135 search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx)
4136 {
4137 Idx left, right, mid, last;
4138 last = right = mctx->nbkref_ents;
4139 for (left = 0; left < right;)
4140 {
4141 mid = (left + right) / 2;
4142 if (mctx->bkref_ents[mid].str_idx < str_idx)
4143 left = mid + 1;
4144 else
4145 right = mid;
4146 }
4147 if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
4148 return left;
4149 else
4150 return -1;
4151 }
4152
4153
4154
4155
4156 static reg_errcode_t
4157 __attribute_warn_unused_result__
4158 match_ctx_add_subtop (re_match_context_t *mctx, Idx node, Idx str_idx)
4159 {
4160 DEBUG_ASSERT (mctx->sub_tops != NULL);
4161 DEBUG_ASSERT (mctx->asub_tops > 0);
4162 if (__glibc_unlikely (mctx->nsub_tops == mctx->asub_tops))
4163 {
4164 Idx new_asub_tops = mctx->asub_tops * 2;
4165 re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,
4166 re_sub_match_top_t *,
4167 new_asub_tops);
4168 if (__glibc_unlikely (new_array == NULL))
4169 return REG_ESPACE;
4170 mctx->sub_tops = new_array;
4171 mctx->asub_tops = new_asub_tops;
4172 }
4173 mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));
4174 if (__glibc_unlikely (mctx->sub_tops[mctx->nsub_tops] == NULL))
4175 return REG_ESPACE;
4176 mctx->sub_tops[mctx->nsub_tops]->node = node;
4177 mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
4178 return REG_NOERROR;
4179 }
4180
4181
4182
4183
4184
4185 static re_sub_match_last_t *
4186 match_ctx_add_sublast (re_sub_match_top_t *subtop, Idx node, Idx str_idx)
4187 {
4188 re_sub_match_last_t *new_entry;
4189 if (__glibc_unlikely (subtop->nlasts == subtop->alasts))
4190 {
4191 Idx new_alasts = 2 * subtop->alasts + 1;
4192 re_sub_match_last_t **new_array = re_realloc (subtop->lasts,
4193 re_sub_match_last_t *,
4194 new_alasts);
4195 if (__glibc_unlikely (new_array == NULL))
4196 return NULL;
4197 subtop->lasts = new_array;
4198 subtop->alasts = new_alasts;
4199 }
4200 new_entry = calloc (1, sizeof (re_sub_match_last_t));
4201 if (__glibc_likely (new_entry != NULL))
4202 {
4203 subtop->lasts[subtop->nlasts] = new_entry;
4204 new_entry->node = node;
4205 new_entry->str_idx = str_idx;
4206 ++subtop->nlasts;
4207 }
4208 return new_entry;
4209 }
4210
4211 static void
4212 sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
4213 re_dfastate_t **limited_sts, Idx last_node, Idx last_str_idx)
4214 {
4215 sctx->sifted_states = sifted_sts;
4216 sctx->limited_states = limited_sts;
4217 sctx->last_node = last_node;
4218 sctx->last_str_idx = last_str_idx;
4219 re_node_set_init_empty (&sctx->limits);
4220 }