This source file includes following definitions.
- knuth_morris_pratt_multibyte
- mbscasestr
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 #include <config.h>
19
20
21 #include <string.h>
22
23 #include <ctype.h>
24 #include <stdbool.h>
25 #include <stddef.h>
26 #include <stdlib.h>
27
28 #include "malloca.h"
29 #include "mbuiter.h"
30
31 #define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
32
33
34 #define UNIT unsigned char
35 #define CANON_ELEMENT(c) TOLOWER (c)
36 #include "str-kmp.h"
37
38
39
40
41
42
43 static bool
44 knuth_morris_pratt_multibyte (const char *haystack, const char *needle,
45 const char **resultp)
46 {
47 size_t m = mbslen (needle);
48 mbchar_t *needle_mbchars;
49 size_t *table;
50
51
52 char *memory = (char *) nmalloca (m, sizeof (mbchar_t) + sizeof (size_t));
53 if (memory == NULL)
54 return false;
55 needle_mbchars = (mbchar_t *) memory;
56 table = (size_t *) (memory + m * sizeof (mbchar_t));
57
58
59 {
60 mbui_iterator_t iter;
61 size_t j;
62
63 j = 0;
64 for (mbui_init (iter, needle); mbui_avail (iter); mbui_advance (iter), j++)
65 {
66 mb_copy (&needle_mbchars[j], &mbui_cur (iter));
67 if (needle_mbchars[j].wc_valid)
68 needle_mbchars[j].wc = towlower (needle_mbchars[j].wc);
69 }
70 }
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87 {
88 size_t i, j;
89
90
91 table[1] = 1;
92 j = 0;
93
94 for (i = 2; i < m; i++)
95 {
96
97
98
99
100 mbchar_t *b = &needle_mbchars[i - 1];
101
102 for (;;)
103 {
104
105
106
107 if (mb_equal (*b, needle_mbchars[j]))
108 {
109
110 table[i] = i - ++j;
111 break;
112 }
113
114
115
116 if (j == 0)
117 {
118
119 table[i] = i;
120 break;
121 }
122
123
124
125
126
127
128
129
130
131
132
133 j = j - table[j];
134 }
135
136 }
137 }
138
139
140 {
141 size_t j;
142 mbui_iterator_t rhaystack;
143 mbui_iterator_t phaystack;
144
145 *resultp = NULL;
146 j = 0;
147 mbui_init (rhaystack, haystack);
148 mbui_init (phaystack, haystack);
149
150 while (mbui_avail (phaystack))
151 {
152 mbchar_t c;
153
154 mb_copy (&c, &mbui_cur (phaystack));
155 if (c.wc_valid)
156 c.wc = towlower (c.wc);
157 if (mb_equal (needle_mbchars[j], c))
158 {
159 j++;
160 mbui_advance (phaystack);
161 if (j == m)
162 {
163
164 *resultp = mbui_cur_ptr (rhaystack);
165 break;
166 }
167 }
168 else if (j > 0)
169 {
170
171 size_t count = table[j];
172 j -= count;
173 for (; count > 0; count--)
174 {
175 if (!mbui_avail (rhaystack))
176 abort ();
177 mbui_advance (rhaystack);
178 }
179 }
180 else
181 {
182
183 if (!mbui_avail (rhaystack))
184 abort ();
185 mbui_advance (rhaystack);
186 mbui_advance (phaystack);
187 }
188 }
189 }
190
191 freea (memory);
192 return true;
193 }
194
195
196
197
198
199 char *
200 mbscasestr (const char *haystack, const char *needle)
201 {
202
203
204
205
206
207 if (MB_CUR_MAX > 1)
208 {
209 mbui_iterator_t iter_needle;
210
211 mbui_init (iter_needle, needle);
212 if (mbui_avail (iter_needle))
213 {
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228 bool try_kmp = true;
229 size_t outer_loop_count = 0;
230 size_t comparison_count = 0;
231 size_t last_ccount = 0;
232 mbui_iterator_t iter_needle_last_ccount;
233
234 mbchar_t b;
235 mbui_iterator_t iter_haystack;
236
237 mbui_init (iter_needle_last_ccount, needle);
238
239 mb_copy (&b, &mbui_cur (iter_needle));
240 if (b.wc_valid)
241 b.wc = towlower (b.wc);
242
243 mbui_init (iter_haystack, haystack);
244 for (;; mbui_advance (iter_haystack))
245 {
246 mbchar_t c;
247
248 if (!mbui_avail (iter_haystack))
249
250 return NULL;
251
252
253
254 if (try_kmp
255 && outer_loop_count >= 10
256 && comparison_count >= 5 * outer_loop_count)
257 {
258
259
260 size_t count = comparison_count - last_ccount;
261 for (;
262 count > 0 && mbui_avail (iter_needle_last_ccount);
263 count--)
264 mbui_advance (iter_needle_last_ccount);
265 last_ccount = comparison_count;
266 if (!mbui_avail (iter_needle_last_ccount))
267 {
268
269 const char *result;
270 bool success =
271 knuth_morris_pratt_multibyte (haystack, needle,
272 &result);
273 if (success)
274 return (char *) result;
275 try_kmp = false;
276 }
277 }
278
279 outer_loop_count++;
280 comparison_count++;
281 mb_copy (&c, &mbui_cur (iter_haystack));
282 if (c.wc_valid)
283 c.wc = towlower (c.wc);
284 if (mb_equal (c, b))
285
286 {
287 mbui_iterator_t rhaystack;
288 mbui_iterator_t rneedle;
289
290 memcpy (&rhaystack, &iter_haystack, sizeof (mbui_iterator_t));
291 mbui_advance (rhaystack);
292
293 mbui_init (rneedle, needle);
294 if (!mbui_avail (rneedle))
295 abort ();
296 mbui_advance (rneedle);
297
298 for (;; mbui_advance (rhaystack), mbui_advance (rneedle))
299 {
300 if (!mbui_avail (rneedle))
301
302 return (char *) mbui_cur_ptr (iter_haystack);
303 if (!mbui_avail (rhaystack))
304
305 return NULL;
306 comparison_count++;
307 if (!mb_caseequal (mbui_cur (rhaystack),
308 mbui_cur (rneedle)))
309
310 break;
311 }
312 }
313 }
314 }
315 else
316 return (char *) haystack;
317 }
318 else
319 {
320 if (*needle != '\0')
321 {
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336 bool try_kmp = true;
337 size_t outer_loop_count = 0;
338 size_t comparison_count = 0;
339 size_t last_ccount = 0;
340 const char *needle_last_ccount = needle;
341
342
343
344 unsigned char b = TOLOWER ((unsigned char) *needle);
345
346 needle++;
347 for (;; haystack++)
348 {
349 if (*haystack == '\0')
350
351 return NULL;
352
353
354
355 if (try_kmp
356 && outer_loop_count >= 10
357 && comparison_count >= 5 * outer_loop_count)
358 {
359
360
361 if (needle_last_ccount != NULL)
362 {
363 needle_last_ccount +=
364 strnlen (needle_last_ccount,
365 comparison_count - last_ccount);
366 if (*needle_last_ccount == '\0')
367 needle_last_ccount = NULL;
368 last_ccount = comparison_count;
369 }
370 if (needle_last_ccount == NULL)
371 {
372
373 const unsigned char *result;
374 bool success =
375 knuth_morris_pratt ((const unsigned char *) haystack,
376 (const unsigned char *) (needle - 1),
377 strlen (needle - 1),
378 &result);
379 if (success)
380 return (char *) result;
381 try_kmp = false;
382 }
383 }
384
385 outer_loop_count++;
386 comparison_count++;
387 if (TOLOWER ((unsigned char) *haystack) == b)
388
389 {
390 const char *rhaystack = haystack + 1;
391 const char *rneedle = needle;
392
393 for (;; rhaystack++, rneedle++)
394 {
395 if (*rneedle == '\0')
396
397 return (char *) haystack;
398 if (*rhaystack == '\0')
399
400 return NULL;
401 comparison_count++;
402 if (TOLOWER ((unsigned char) *rhaystack)
403 != TOLOWER ((unsigned char) *rneedle))
404
405 break;
406 }
407 }
408 }
409 }
410 else
411 return (char *) haystack;
412 }
413 }