This source file includes following definitions.
- FUNC
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26 UNIT *
27 FUNC (const UNIT *s, size_t n,
28 casing_prefix_context_t prefix_context,
29 casing_suffix_context_t suffix_context,
30 const char *iso639_language,
31 ucs4_t (*single_character_map) (ucs4_t),
32 size_t offset_in_rule,
33 uninorm_t nf,
34 UNIT *resultbuf, size_t *lengthp)
35 {
36
37 UNIT *result;
38 size_t length;
39 size_t allocated;
40
41
42 if (nf != NULL || resultbuf == NULL)
43 {
44 result = NULL;
45 allocated = 0;
46 }
47 else
48 {
49 result = resultbuf;
50 allocated = *lengthp;
51 }
52 length = 0;
53
54 {
55 const UNIT *s_end = s + n;
56
57
58
59 ucs4_t last_char_except_ignorable =
60 prefix_context.last_char_except_ignorable;
61
62
63
64 ucs4_t last_char_normal_or_above =
65 prefix_context.last_char_normal_or_above;
66
67 while (s < s_end)
68 {
69 ucs4_t uc;
70 int count = U_MBTOUC_UNSAFE (&uc, s, s_end - s);
71
72 ucs4_t mapped_uc[3];
73 unsigned int mapped_count;
74
75 if (uc < 0x10000)
76 {
77
78 char code[3];
79
80 code[0] = (uc >> 8) & 0xff;
81 code[1] = uc & 0xff;
82
83 for (code[2] = 0; ; code[2]++)
84 {
85 const struct special_casing_rule *rule =
86 gl_unicase_special_lookup (code, 3);
87
88 if (rule == NULL)
89 break;
90
91
92
93 if (rule->language[0] == '\0'
94 || (iso639_language != NULL
95 && iso639_language[0] == rule->language[0]
96 && iso639_language[1] == rule->language[1]))
97 {
98
99 int context = rule->context;
100 bool applies;
101
102 if (context < 0)
103 context = - context;
104 switch (context)
105 {
106 case SCC_ALWAYS:
107 applies = true;
108 break;
109
110 case SCC_FINAL_SIGMA:
111
112
113
114
115
116
117
118 applies = uc_is_cased (last_char_except_ignorable);
119
120 if (applies)
121 {
122 const UNIT *s2 = s + count;
123 for (;;)
124 {
125 if (s2 < s_end)
126 {
127 ucs4_t uc2;
128 int count2 = U_MBTOUC_UNSAFE (&uc2, s2, s_end - s2);
129
130
131
132
133 if (!uc_is_case_ignorable (uc2))
134 {
135 applies = ! uc_is_cased (uc2);
136 break;
137 }
138 s2 += count2;
139 }
140 else
141 {
142 applies = ! uc_is_cased (suffix_context.first_char_except_ignorable);
143 break;
144 }
145 }
146 }
147 break;
148
149 case SCC_AFTER_SOFT_DOTTED:
150
151
152
153
154 applies = uc_is_property_soft_dotted (last_char_normal_or_above);
155 break;
156
157 case SCC_MORE_ABOVE:
158
159
160
161
162 {
163 const UNIT *s2 = s + count;
164 applies = false;
165 for (;;)
166 {
167 if (s2 < s_end)
168 {
169 ucs4_t uc2;
170 int count2 = U_MBTOUC_UNSAFE (&uc2, s2, s_end - s2);
171 int ccc = uc_combining_class (uc2);
172 if (ccc == UC_CCC_A)
173 {
174 applies = true;
175 break;
176 }
177 if (ccc == UC_CCC_NR)
178 break;
179 s2 += count2;
180 }
181 else
182 {
183 applies = ((suffix_context.bits & SCC_MORE_ABOVE_MASK) != 0);
184 break;
185 }
186 }
187 }
188 break;
189
190 case SCC_BEFORE_DOT:
191
192
193
194
195
196
197 {
198 const UNIT *s2 = s + count;
199 applies = false;
200 for (;;)
201 {
202 if (s2 < s_end)
203 {
204 ucs4_t uc2;
205 int count2 = U_MBTOUC_UNSAFE (&uc2, s2, s_end - s2);
206 if (uc2 == 0x0307)
207 {
208 applies = true;
209 break;
210 }
211 {
212 int ccc = uc_combining_class (uc2);
213 if (ccc == UC_CCC_A || ccc == UC_CCC_NR)
214 break;
215 }
216 s2 += count2;
217 }
218 else
219 {
220 applies = ((suffix_context.bits & SCC_BEFORE_DOT_MASK) != 0);
221 break;
222 }
223 }
224 }
225 break;
226
227 case SCC_AFTER_I:
228
229
230
231
232 applies = (last_char_normal_or_above == 'I');
233 break;
234
235 default:
236 abort ();
237 }
238 if (rule->context < 0)
239 applies = !applies;
240
241 if (applies)
242 {
243
244
245 const unsigned short *mapped_in_rule =
246 (const unsigned short *)((const char *)rule + offset_in_rule);
247
248 if (mapped_in_rule[0] == 0)
249 mapped_count = 0;
250 else
251 {
252 mapped_uc[0] = mapped_in_rule[0];
253 if (mapped_in_rule[1] == 0)
254 mapped_count = 1;
255 else
256 {
257 mapped_uc[1] = mapped_in_rule[1];
258 if (mapped_in_rule[2] == 0)
259 mapped_count = 2;
260 else
261 {
262 mapped_uc[2] = mapped_in_rule[2];
263 mapped_count = 3;
264 }
265 }
266 }
267 goto found_mapping;
268 }
269 }
270
271
272 if (!rule->has_next)
273 break;
274 }
275 }
276
277
278
279 mapped_uc[0] = single_character_map (uc);
280 mapped_count = 1;
281
282 found_mapping:
283
284 {
285 unsigned int i;
286
287 for (i = 0; i < mapped_count; i++)
288 {
289 ucs4_t muc = mapped_uc[i];
290
291
292 if (length < allocated)
293 {
294 int ret = U_UCTOMB (result + length, muc, allocated - length);
295 if (ret == -1)
296 {
297 errno = EINVAL;
298 goto fail;
299 }
300 if (ret >= 0)
301 {
302 length += ret;
303 goto done_appending;
304 }
305 }
306 {
307 size_t old_allocated = allocated;
308 size_t new_allocated = 2 * old_allocated;
309 if (new_allocated < 64)
310 new_allocated = 64;
311 if (new_allocated < old_allocated)
312 abort ();
313 {
314 UNIT *larger_result;
315 if (result == NULL)
316 {
317 larger_result = (UNIT *) malloc (new_allocated * sizeof (UNIT));
318 if (larger_result == NULL)
319 {
320 errno = ENOMEM;
321 goto fail;
322 }
323 }
324 else if (result == resultbuf)
325 {
326 larger_result = (UNIT *) malloc (new_allocated * sizeof (UNIT));
327 if (larger_result == NULL)
328 {
329 errno = ENOMEM;
330 goto fail;
331 }
332 U_CPY (larger_result, resultbuf, length);
333 }
334 else
335 {
336 larger_result =
337 (UNIT *) realloc (result, new_allocated * sizeof (UNIT));
338 if (larger_result == NULL)
339 {
340 errno = ENOMEM;
341 goto fail;
342 }
343 }
344 result = larger_result;
345 allocated = new_allocated;
346 {
347 int ret = U_UCTOMB (result + length, muc, allocated - length);
348 if (ret == -1)
349 {
350 errno = EINVAL;
351 goto fail;
352 }
353 if (ret < 0)
354 abort ();
355 length += ret;
356 goto done_appending;
357 }
358 }
359 }
360 done_appending: ;
361 }
362 }
363
364 if (!uc_is_case_ignorable (uc))
365 last_char_except_ignorable = uc;
366
367 {
368 int ccc = uc_combining_class (uc);
369 if (ccc == UC_CCC_A || ccc == UC_CCC_NR)
370 last_char_normal_or_above = uc;
371 }
372
373 s += count;
374 }
375 }
376
377 if (nf != NULL)
378 {
379
380 UNIT *normalized_result;
381
382 normalized_result = U_NORMALIZE (nf, result, length, resultbuf, lengthp);
383 if (normalized_result == NULL)
384 goto fail;
385
386 free (result);
387 return normalized_result;
388 }
389
390 if (length == 0)
391 {
392 if (result == NULL)
393 {
394
395 result = (UNIT *) malloc (1);
396 if (result == NULL)
397 {
398 errno = ENOMEM;
399 goto fail;
400 }
401 }
402 }
403 else if (result != resultbuf && length < allocated)
404 {
405
406 UNIT *memory;
407
408 memory = (UNIT *) realloc (result, length * sizeof (UNIT));
409 if (memory != NULL)
410 result = memory;
411 }
412
413 *lengthp = length;
414 return result;
415
416 fail:
417 if (result != resultbuf)
418 {
419 int saved_errno = errno;
420 free (result);
421 errno = saved_errno;
422 }
423 return NULL;
424 }