This source file includes following definitions.
- uninorm_filter_create
- uninorm_filter_write
- uninorm_filter_flush
- uninorm_filter_free
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26 #include <config.h>
27
28
29 #include "uninorm.h"
30
31 #include <errno.h>
32 #include <stddef.h>
33 #include <stdlib.h>
34 #include <string.h>
35
36 #include "unictype.h"
37 #include "normalize-internal.h"
38 #include "uninorm/decompose-internal.h"
39
40
41 struct uninorm_filter
42 {
43
44 int (*decomposer) (ucs4_t uc, ucs4_t *decomposition);
45 ucs4_t (*composer) (ucs4_t uc1, ucs4_t uc2);
46
47
48 int (*stream_func) (void *stream_data, ucs4_t uc);
49 void *stream_data;
50
51
52 #define SORTBUF_PREALLOCATED 64
53 struct ucs4_with_ccc sortbuf_preallocated[2 * SORTBUF_PREALLOCATED];
54 struct ucs4_with_ccc *sortbuf;
55 size_t sortbuf_allocated;
56 size_t sortbuf_count;
57 };
58
59 struct uninorm_filter *
60 uninorm_filter_create (uninorm_t nf,
61 int (*stream_func) (void *stream_data, ucs4_t uc),
62 void *stream_data)
63 {
64 struct uninorm_filter *filter =
65 (struct uninorm_filter *) malloc (sizeof (struct uninorm_filter));
66
67 if (filter == NULL)
68
69 return NULL;
70
71 filter->decomposer = nf->decomposer;
72 filter->composer = nf->composer;
73 filter->stream_func = stream_func;
74 filter->stream_data = stream_data;
75 filter->sortbuf = filter->sortbuf_preallocated;
76 filter->sortbuf_allocated = SORTBUF_PREALLOCATED;
77 filter->sortbuf_count = 0;
78
79 return filter;
80 }
81
82 int
83 uninorm_filter_write (struct uninorm_filter *filter, ucs4_t uc_arg)
84 {
85 ucs4_t decomposed[UC_DECOMPOSITION_MAX_LENGTH];
86 int decomposed_count;
87
88
89 decomposed[0] = uc_arg;
90 decomposed_count = 1;
91
92
93
94
95
96
97
98 {
99 int curr;
100
101 for (curr = 0; curr < decomposed_count; )
102 {
103
104
105 ucs4_t curr_decomposed[UC_DECOMPOSITION_MAX_LENGTH];
106 int curr_decomposed_count;
107
108 curr_decomposed_count =
109 filter->decomposer (decomposed[curr], curr_decomposed);
110 if (curr_decomposed_count >= 0)
111 {
112
113
114
115 int shift = curr_decomposed_count - 1;
116
117 if (shift < 0)
118 abort ();
119 if (shift > 0)
120 {
121 int j;
122
123 decomposed_count += shift;
124 if (decomposed_count > UC_DECOMPOSITION_MAX_LENGTH)
125 abort ();
126 for (j = decomposed_count - 1 - shift; j > curr; j--)
127 decomposed[j + shift] = decomposed[j];
128 }
129 for (; shift >= 0; shift--)
130 decomposed[curr + shift] = curr_decomposed[shift];
131 }
132 else
133 {
134
135 curr++;
136 }
137 }
138 }
139
140 {
141
142 struct ucs4_with_ccc *sortbuf = filter->sortbuf;
143 size_t sortbuf_count = filter->sortbuf_count;
144 int i;
145
146 for (i = 0; i < decomposed_count; i++)
147 {
148
149 ucs4_t uc = decomposed[i];
150 int ccc = uc_combining_class (uc);
151
152 if (ccc == 0)
153 {
154 size_t j;
155
156
157
158 if (sortbuf_count > 1)
159 gl_uninorm_decompose_merge_sort_inplace (sortbuf, sortbuf_count,
160 sortbuf + sortbuf_count);
161
162 if (filter->composer != NULL)
163 {
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184 if (sortbuf_count > 0 && sortbuf[0].ccc == 0)
185 {
186 for (j = 1; j < sortbuf_count; )
187 {
188 if (sortbuf[j].ccc > sortbuf[j - 1].ccc)
189 {
190 ucs4_t combined =
191 filter->composer (sortbuf[0].code, sortbuf[j].code);
192 if (combined)
193 {
194 size_t k;
195
196 sortbuf[0].code = combined;
197
198 for (k = j + 1; k < sortbuf_count; k++)
199 sortbuf[k - 1] = sortbuf[k];
200 sortbuf_count--;
201 continue;
202 }
203 }
204 j++;
205 }
206 if (sortbuf_count == 1)
207 {
208 ucs4_t combined =
209 filter->composer (sortbuf[0].code, uc);
210 if (combined)
211 {
212 uc = combined;
213 ccc = 0;
214
215
216
217 sortbuf_count = 0;
218 }
219 }
220 }
221 }
222
223 for (j = 0; j < sortbuf_count; j++)
224 {
225 ucs4_t muc = sortbuf[j].code;
226
227
228 int ret = filter->stream_func (filter->stream_data, muc);
229 if (ret < 0)
230 {
231
232 filter->sortbuf_count = 0;
233 return -1;
234 }
235 }
236
237
238 sortbuf_count = 0;
239 }
240
241
242 if (sortbuf_count == filter->sortbuf_allocated)
243 {
244 struct ucs4_with_ccc *new_sortbuf;
245
246 filter->sortbuf_allocated = 2 * filter->sortbuf_allocated;
247 if (filter->sortbuf_allocated < sortbuf_count)
248 abort ();
249 new_sortbuf =
250 (struct ucs4_with_ccc *)
251 malloc (2 * filter->sortbuf_allocated * sizeof (struct ucs4_with_ccc));
252 if (new_sortbuf == NULL)
253 {
254
255 filter->sortbuf_count = sortbuf_count;
256 return -1;
257 }
258 memcpy (new_sortbuf, filter->sortbuf,
259 sortbuf_count * sizeof (struct ucs4_with_ccc));
260 if (filter->sortbuf != filter->sortbuf_preallocated)
261 free (filter->sortbuf);
262 filter->sortbuf = new_sortbuf;
263
264 sortbuf = filter->sortbuf;
265 }
266 sortbuf[sortbuf_count].code = uc;
267 sortbuf[sortbuf_count].ccc = ccc;
268 sortbuf_count++;
269 }
270
271 filter->sortbuf_count = sortbuf_count;
272 }
273
274 return 0;
275 }
276
277
278
279
280
281
282
283 int
284 uninorm_filter_flush (struct uninorm_filter *filter)
285 {
286
287 struct ucs4_with_ccc * const sortbuf = filter->sortbuf;
288 size_t sortbuf_count = filter->sortbuf_count;
289 size_t j;
290
291
292
293 if (sortbuf_count > 1)
294 gl_uninorm_decompose_merge_sort_inplace (sortbuf, sortbuf_count,
295 sortbuf + sortbuf_count);
296
297 if (filter->composer != NULL)
298 {
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319 if (sortbuf_count > 0 && sortbuf[0].ccc == 0)
320 {
321 for (j = 1; j < sortbuf_count; )
322 {
323 if (sortbuf[j].ccc > sortbuf[j - 1].ccc)
324 {
325 ucs4_t combined =
326 filter->composer (sortbuf[0].code, sortbuf[j].code);
327 if (combined)
328 {
329 size_t k;
330
331 sortbuf[0].code = combined;
332
333 for (k = j + 1; k < sortbuf_count; k++)
334 sortbuf[k - 1] = sortbuf[k];
335 sortbuf_count--;
336 continue;
337 }
338 }
339 j++;
340 }
341 }
342 }
343
344 for (j = 0; j < sortbuf_count; j++)
345 {
346 ucs4_t muc = sortbuf[j].code;
347
348
349 int ret = filter->stream_func (filter->stream_data, muc);
350 if (ret < 0)
351 {
352
353 filter->sortbuf_count = 0;
354 return -1;
355 }
356 }
357
358
359 filter->sortbuf_count = 0;
360
361 return 0;
362 }
363
364
365
366
367 int
368 uninorm_filter_free (struct uninorm_filter *filter)
369 {
370 int ret = uninorm_filter_flush (filter);
371
372 if (ret < 0)
373
374 return -1;
375
376 if (filter->sortbuf_count > 0)
377 abort ();
378 if (filter->sortbuf != filter->sortbuf_preallocated)
379 free (filter->sortbuf);
380 free (filter);
381
382 return 0;
383 }