This source file includes following definitions.
- knuth_morris_pratt_multibyte
- mbsstr
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 #include <config.h>
19
20
21 #include <string.h>
22
23 #include <stdbool.h>
24 #include <stddef.h>
25 #include <stdlib.h>
26
27 #include "malloca.h"
28 #include "mbuiter.h"
29
30
31 #define UNIT unsigned char
32 #define CANON_ELEMENT(c) c
33 #include "str-kmp.h"
34
35
36
37
38
39
40 static bool
41 knuth_morris_pratt_multibyte (const char *haystack, const char *needle,
42 const char **resultp)
43 {
44 size_t m = mbslen (needle);
45 mbchar_t *needle_mbchars;
46 size_t *table;
47
48
49 void *memory = nmalloca (m, sizeof (mbchar_t) + sizeof (size_t));
50 void *table_memory;
51 if (memory == NULL)
52 return false;
53 needle_mbchars = memory;
54 table_memory = needle_mbchars + m;
55 table = table_memory;
56
57
58 {
59 mbui_iterator_t iter;
60 size_t j;
61
62 j = 0;
63 for (mbui_init (iter, needle); mbui_avail (iter); mbui_advance (iter), j++)
64 mb_copy (&needle_mbchars[j], &mbui_cur (iter));
65 }
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82 {
83 size_t i, j;
84
85
86 table[1] = 1;
87 j = 0;
88
89 for (i = 2; i < m; i++)
90 {
91
92
93
94
95 mbchar_t *b = &needle_mbchars[i - 1];
96
97 for (;;)
98 {
99
100
101
102 if (mb_equal (*b, needle_mbchars[j]))
103 {
104
105 table[i] = i - ++j;
106 break;
107 }
108
109
110
111 if (j == 0)
112 {
113
114 table[i] = i;
115 break;
116 }
117
118
119
120
121
122
123
124
125
126
127
128 j = j - table[j];
129 }
130
131 }
132 }
133
134
135 {
136 size_t j;
137 mbui_iterator_t rhaystack;
138 mbui_iterator_t phaystack;
139
140 *resultp = NULL;
141 j = 0;
142 mbui_init (rhaystack, haystack);
143 mbui_init (phaystack, haystack);
144
145 while (mbui_avail (phaystack))
146 if (mb_equal (needle_mbchars[j], mbui_cur (phaystack)))
147 {
148 j++;
149 mbui_advance (phaystack);
150 if (j == m)
151 {
152
153 *resultp = mbui_cur_ptr (rhaystack);
154 break;
155 }
156 }
157 else if (j > 0)
158 {
159
160 size_t count = table[j];
161 j -= count;
162 for (; count > 0; count--)
163 {
164 if (!mbui_avail (rhaystack))
165 abort ();
166 mbui_advance (rhaystack);
167 }
168 }
169 else
170 {
171
172 if (!mbui_avail (rhaystack))
173 abort ();
174 mbui_advance (rhaystack);
175 mbui_advance (phaystack);
176 }
177 }
178
179 freea (memory);
180 return true;
181 }
182
183
184
185 char *
186 mbsstr (const char *haystack, const char *needle)
187 {
188
189
190
191
192
193 if (MB_CUR_MAX > 1)
194 {
195 mbui_iterator_t iter_needle;
196
197 mbui_init (iter_needle, needle);
198 if (mbui_avail (iter_needle))
199 {
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214 bool try_kmp = true;
215 size_t outer_loop_count = 0;
216 size_t comparison_count = 0;
217 size_t last_ccount = 0;
218 mbui_iterator_t iter_needle_last_ccount;
219
220 mbui_iterator_t iter_haystack;
221
222 mbui_init (iter_needle_last_ccount, needle);
223 mbui_init (iter_haystack, haystack);
224 for (;; mbui_advance (iter_haystack))
225 {
226 if (!mbui_avail (iter_haystack))
227
228 return NULL;
229
230
231
232 if (try_kmp
233 && outer_loop_count >= 10
234 && comparison_count >= 5 * outer_loop_count)
235 {
236
237
238 size_t count = comparison_count - last_ccount;
239 for (;
240 count > 0 && mbui_avail (iter_needle_last_ccount);
241 count--)
242 mbui_advance (iter_needle_last_ccount);
243 last_ccount = comparison_count;
244 if (!mbui_avail (iter_needle_last_ccount))
245 {
246
247 const char *result;
248 bool success =
249 knuth_morris_pratt_multibyte (haystack, needle,
250 &result);
251 if (success)
252 return (char *) result;
253 try_kmp = false;
254 }
255 }
256
257 outer_loop_count++;
258 comparison_count++;
259 if (mb_equal (mbui_cur (iter_haystack), mbui_cur (iter_needle)))
260
261 {
262 mbui_iterator_t rhaystack;
263 mbui_iterator_t rneedle;
264
265 memcpy (&rhaystack, &iter_haystack, sizeof (mbui_iterator_t));
266 mbui_advance (rhaystack);
267
268 mbui_init (rneedle, needle);
269 if (!mbui_avail (rneedle))
270 abort ();
271 mbui_advance (rneedle);
272
273 for (;; mbui_advance (rhaystack), mbui_advance (rneedle))
274 {
275 if (!mbui_avail (rneedle))
276
277 return (char *) mbui_cur_ptr (iter_haystack);
278 if (!mbui_avail (rhaystack))
279
280 return NULL;
281 comparison_count++;
282 if (!mb_equal (mbui_cur (rhaystack), mbui_cur (rneedle)))
283
284 break;
285 }
286 }
287 }
288 }
289 else
290 return (char *) haystack;
291 }
292 else
293 {
294 if (*needle != '\0')
295 {
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310 bool try_kmp = true;
311 size_t outer_loop_count = 0;
312 size_t comparison_count = 0;
313 size_t last_ccount = 0;
314 const char *needle_last_ccount = needle;
315
316
317
318 char b = *needle++;
319
320 for (;; haystack++)
321 {
322 if (*haystack == '\0')
323
324 return NULL;
325
326
327
328 if (try_kmp
329 && outer_loop_count >= 10
330 && comparison_count >= 5 * outer_loop_count)
331 {
332
333
334 if (needle_last_ccount != NULL)
335 {
336 needle_last_ccount +=
337 strnlen (needle_last_ccount,
338 comparison_count - last_ccount);
339 if (*needle_last_ccount == '\0')
340 needle_last_ccount = NULL;
341 last_ccount = comparison_count;
342 }
343 if (needle_last_ccount == NULL)
344 {
345
346 const unsigned char *result;
347 bool success =
348 knuth_morris_pratt ((const unsigned char *) haystack,
349 (const unsigned char *) (needle - 1),
350 strlen (needle - 1),
351 &result);
352 if (success)
353 return (char *) result;
354 try_kmp = false;
355 }
356 }
357
358 outer_loop_count++;
359 comparison_count++;
360 if (*haystack == b)
361
362 {
363 const char *rhaystack = haystack + 1;
364 const char *rneedle = needle;
365
366 for (;; rhaystack++, rneedle++)
367 {
368 if (*rneedle == '\0')
369
370 return (char *) haystack;
371 if (*rhaystack == '\0')
372
373 return NULL;
374 comparison_count++;
375 if (*rhaystack != *rneedle)
376
377 break;
378 }
379 }
380 }
381 }
382 else
383 return (char *) haystack;
384 }
385 }