This source file includes following definitions.
- FUNC
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33 UNIT *
34 FUNC (const UNIT *s, size_t n,
35 casing_prefix_context_t prefix_context,
36 casing_suffix_context_t suffix_context,
37 const char *iso639_language,
38 uninorm_t nf,
39 UNIT *resultbuf, size_t *lengthp)
40 {
41
42 UNIT *result;
43 size_t length;
44 size_t allocated;
45
46 char *wordbreaks;
47
48
49 if (nf != NULL || resultbuf == NULL)
50 {
51 result = NULL;
52 allocated = 0;
53 }
54 else
55 {
56 result = resultbuf;
57 allocated = *lengthp;
58 }
59 length = 0;
60
61
62 if (n > 0)
63 {
64 wordbreaks = (char *) malloc (n);
65 if (wordbreaks == NULL)
66 {
67 errno = ENOMEM;
68 goto fail2;
69 }
70 U_WORDBREAKS (s, n, wordbreaks);
71 }
72 else
73 wordbreaks = NULL;
74
75 {
76 const UNIT *s_end = s + n;
77 const char *wp = wordbreaks;
78
79
80
81
82
83
84
85
86
87 bool in_word_first_part = true;
88
89
90
91 ucs4_t last_char_except_ignorable =
92 prefix_context.last_char_except_ignorable;
93
94
95
96 ucs4_t last_char_normal_or_above =
97 prefix_context.last_char_normal_or_above;
98
99 while (s < s_end)
100 {
101
102 ucs4_t uc;
103 int count = U_MBTOUC_UNSAFE (&uc, s, s_end - s);
104
105 ucs4_t (*single_character_map) (ucs4_t);
106 size_t offset_in_rule;
107
108 ucs4_t mapped_uc[3];
109 unsigned int mapped_count;
110
111 if (*wp)
112
113 in_word_first_part = true;
114
115
116
117
118
119
120 if (in_word_first_part)
121 {
122 if (uc_is_cased (uc))
123 {
124
125 single_character_map = uc_totitle;
126 offset_in_rule = offsetof (struct special_casing_rule, title[0]);
127 in_word_first_part = false;
128 }
129 else
130 {
131
132 single_character_map = NULL;
133 offset_in_rule = 0;
134 }
135 }
136 else
137 {
138
139 single_character_map = uc_tolower;
140 offset_in_rule = offsetof (struct special_casing_rule, lower[0]);
141 }
142
143
144 if (single_character_map == NULL)
145 {
146 mapped_uc[0] = uc;
147 mapped_count = 1;
148 goto found_mapping;
149 }
150
151 if (uc < 0x10000)
152 {
153
154 char code[3];
155
156 code[0] = (uc >> 8) & 0xff;
157 code[1] = uc & 0xff;
158
159 for (code[2] = 0; ; code[2]++)
160 {
161 const struct special_casing_rule *rule =
162 gl_unicase_special_lookup (code, 3);
163
164 if (rule == NULL)
165 break;
166
167
168
169 if (rule->language[0] == '\0'
170 || (iso639_language != NULL
171 && iso639_language[0] == rule->language[0]
172 && iso639_language[1] == rule->language[1]))
173 {
174
175 int context = rule->context;
176 bool applies;
177
178 if (context < 0)
179 context = - context;
180 switch (context)
181 {
182 case SCC_ALWAYS:
183 applies = true;
184 break;
185
186 case SCC_FINAL_SIGMA:
187
188
189
190
191
192
193
194 applies = uc_is_cased (last_char_except_ignorable);
195
196 if (applies)
197 {
198 const UNIT *s2 = s + count;
199 for (;;)
200 {
201 if (s2 < s_end)
202 {
203 ucs4_t uc2;
204 int count2 = U_MBTOUC_UNSAFE (&uc2, s2, s_end - s2);
205
206
207
208
209 if (!uc_is_case_ignorable (uc2))
210 {
211 applies = ! uc_is_cased (uc2);
212 break;
213 }
214 s2 += count2;
215 }
216 else
217 {
218 applies = ! uc_is_cased (suffix_context.first_char_except_ignorable);
219 break;
220 }
221 }
222 }
223 break;
224
225 case SCC_AFTER_SOFT_DOTTED:
226
227
228
229
230 applies = uc_is_property_soft_dotted (last_char_normal_or_above);
231 break;
232
233 case SCC_MORE_ABOVE:
234
235
236
237
238 {
239 const UNIT *s2 = s + count;
240 applies = false;
241 for (;;)
242 {
243 if (s2 < s_end)
244 {
245 ucs4_t uc2;
246 int count2 = U_MBTOUC_UNSAFE (&uc2, s2, s_end - s2);
247 int ccc = uc_combining_class (uc2);
248 if (ccc == UC_CCC_A)
249 {
250 applies = true;
251 break;
252 }
253 if (ccc == UC_CCC_NR)
254 break;
255 s2 += count2;
256 }
257 else
258 {
259 applies = ((suffix_context.bits & SCC_MORE_ABOVE_MASK) != 0);
260 break;
261 }
262 }
263 }
264 break;
265
266 case SCC_BEFORE_DOT:
267
268
269
270
271
272
273 {
274 const UNIT *s2 = s + count;
275 applies = false;
276 for (;;)
277 {
278 if (s2 < s_end)
279 {
280 ucs4_t uc2;
281 int count2 = U_MBTOUC_UNSAFE (&uc2, s2, s_end - s2);
282 if (uc2 == 0x0307)
283 {
284 applies = true;
285 break;
286 }
287 {
288 int ccc = uc_combining_class (uc2);
289 if (ccc == UC_CCC_A || ccc == UC_CCC_NR)
290 break;
291 }
292 s2 += count2;
293 }
294 else
295 {
296 applies = ((suffix_context.bits & SCC_BEFORE_DOT_MASK) != 0);
297 break;
298 }
299 }
300 }
301 break;
302
303 case SCC_AFTER_I:
304
305
306
307
308 applies = (last_char_normal_or_above == 'I');
309 break;
310
311 default:
312 abort ();
313 }
314 if (rule->context < 0)
315 applies = !applies;
316
317 if (applies)
318 {
319
320
321 const unsigned short *mapped_in_rule =
322 (const unsigned short *)((const char *)rule + offset_in_rule);
323
324 if (mapped_in_rule[0] == 0)
325 mapped_count = 0;
326 else
327 {
328 mapped_uc[0] = mapped_in_rule[0];
329 if (mapped_in_rule[1] == 0)
330 mapped_count = 1;
331 else
332 {
333 mapped_uc[1] = mapped_in_rule[1];
334 if (mapped_in_rule[2] == 0)
335 mapped_count = 2;
336 else
337 {
338 mapped_uc[2] = mapped_in_rule[2];
339 mapped_count = 3;
340 }
341 }
342 }
343 goto found_mapping;
344 }
345 }
346
347
348 if (!rule->has_next)
349 break;
350 }
351 }
352
353
354
355 mapped_uc[0] = single_character_map (uc);
356 mapped_count = 1;
357
358 found_mapping:
359
360 {
361 unsigned int i;
362
363 for (i = 0; i < mapped_count; i++)
364 {
365 ucs4_t muc = mapped_uc[i];
366
367
368 if (length < allocated)
369 {
370 int ret = U_UCTOMB (result + length, muc, allocated - length);
371 if (ret == -1)
372 {
373 errno = EINVAL;
374 goto fail1;
375 }
376 if (ret >= 0)
377 {
378 length += ret;
379 goto done_appending;
380 }
381 }
382 {
383 size_t old_allocated = allocated;
384 size_t new_allocated = 2 * old_allocated;
385 if (new_allocated < 64)
386 new_allocated = 64;
387 if (new_allocated < old_allocated)
388 abort ();
389 {
390 UNIT *larger_result;
391 if (result == NULL)
392 {
393 larger_result = (UNIT *) malloc (new_allocated * sizeof (UNIT));
394 if (larger_result == NULL)
395 {
396 errno = ENOMEM;
397 goto fail1;
398 }
399 }
400 else if (result == resultbuf)
401 {
402 larger_result = (UNIT *) malloc (new_allocated * sizeof (UNIT));
403 if (larger_result == NULL)
404 {
405 errno = ENOMEM;
406 goto fail1;
407 }
408 U_CPY (larger_result, resultbuf, length);
409 }
410 else
411 {
412 larger_result =
413 (UNIT *) realloc (result, new_allocated * sizeof (UNIT));
414 if (larger_result == NULL)
415 {
416 errno = ENOMEM;
417 goto fail1;
418 }
419 }
420 result = larger_result;
421 allocated = new_allocated;
422 {
423 int ret = U_UCTOMB (result + length, muc, allocated - length);
424 if (ret == -1)
425 {
426 errno = EINVAL;
427 goto fail1;
428 }
429 if (ret < 0)
430 abort ();
431 length += ret;
432 goto done_appending;
433 }
434 }
435 }
436 done_appending: ;
437 }
438 }
439
440 if (!uc_is_case_ignorable (uc))
441 last_char_except_ignorable = uc;
442
443 {
444 int ccc = uc_combining_class (uc);
445 if (ccc == UC_CCC_A || ccc == UC_CCC_NR)
446 last_char_normal_or_above = uc;
447 }
448
449 s += count;
450 wp += count;
451 }
452 }
453
454 free (wordbreaks);
455
456 if (nf != NULL)
457 {
458
459 UNIT *normalized_result;
460
461 normalized_result = U_NORMALIZE (nf, result, length, resultbuf, lengthp);
462 if (normalized_result == NULL)
463 goto fail2;
464
465 free (result);
466 return normalized_result;
467 }
468
469 if (length == 0)
470 {
471 if (result == NULL)
472 {
473
474 result = (UNIT *) malloc (1);
475 if (result == NULL)
476 {
477 errno = ENOMEM;
478 goto fail2;
479 }
480 }
481 }
482 else if (result != resultbuf && length < allocated)
483 {
484
485 UNIT *memory;
486
487 memory = (UNIT *) realloc (result, length * sizeof (UNIT));
488 if (memory != NULL)
489 result = memory;
490 }
491
492 *lengthp = length;
493 return result;
494
495 fail1:
496 {
497 int saved_errno = errno;
498 free (wordbreaks);
499 errno = saved_errno;
500 }
501 fail2:
502 if (result != resultbuf)
503 {
504 int saved_errno = errno;
505 free (result);
506 errno = saved_errno;
507 }
508 return NULL;
509 }
510
511
512
513
514
515