This source file includes following definitions.
- fill_names
- fill_aliases
- name_has_alias
- test_name_lookup
- test_inverse_lookup
- test_alias_lookup
- main
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 #include <config.h>
19
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <string.h>
23
24 #include "xalloc.h"
25 #include "uniname.h"
26
27
28
29 static const char * unicode_names [0x110000];
30
31
32 #define ALIASLEN 0x200
33
34
35 struct unicode_alias
36 {
37 const char *name;
38 unsigned int uc;
39 };
40
41 static struct unicode_alias unicode_aliases [ALIASLEN];
42 static int aliases_count;
43
44
45
46 static void
47 fill_names (const char *unicodedata_filename)
48 {
49 FILE *stream;
50 char *field0;
51 char *field1;
52 char line[1024];
53 int lineno = 0;
54
55 stream = fopen (unicodedata_filename, "r");
56 if (stream == NULL)
57 {
58 fprintf (stderr, "error during fopen of '%s'\n", unicodedata_filename);
59 exit (EXIT_FAILURE);
60 }
61
62 while (fgets (line, sizeof line, stream))
63 {
64 char *p;
65 char *comment;
66 unsigned long i;
67
68 lineno++;
69
70 comment = strchr (line, '#');
71 if (comment != NULL)
72 *comment = '\0';
73 if (line[strspn (line, " \t\r\n")] == '\0')
74 continue;
75
76 field0 = p = line;
77 p = strchr (p, ';');
78 if (!p)
79 {
80 fprintf (stderr, "short line in '%s':%d\n",
81 unicodedata_filename, lineno);
82 exit (EXIT_FAILURE);
83 }
84 *p++ = '\0';
85
86 field1 = p;
87 if (*field1 == '<')
88 continue;
89 p = strchr (p, ';');
90 if (!p)
91 {
92 fprintf (stderr, "short line in '%s':%d\n",
93 unicodedata_filename, lineno);
94 exit (EXIT_FAILURE);
95 }
96 *p = '\0';
97 i = strtoul (field0, NULL, 16);
98 if (i >= 0x110000)
99 {
100 fprintf (stderr, "index too large\n");
101 exit (EXIT_FAILURE);
102 }
103 unicode_names[i] = xstrdup (field1);
104 }
105 if (ferror (stream) || fclose (stream))
106 {
107 fprintf (stderr, "error reading from '%s'\n", unicodedata_filename);
108 exit (1);
109 }
110 }
111
112
113
114 static void
115 fill_aliases (const char *namealiases_filename)
116 {
117 FILE *stream;
118 char *field0;
119 char *field1;
120 char line[1024];
121 int lineno = 0;
122
123 stream = fopen (namealiases_filename, "r");
124 if (stream == NULL)
125 {
126 fprintf (stderr, "error during fopen of '%s'\n", namealiases_filename);
127 exit (EXIT_FAILURE);
128 }
129
130 while (fgets (line, sizeof line, stream))
131 {
132 char *p;
133 char *comment;
134 unsigned long uc;
135
136 comment = strchr (line, '#');
137 if (comment != NULL)
138 *comment = '\0';
139 if (line[strspn (line, " \t\r\n")] == '\0')
140 continue;
141
142 lineno++;
143
144 field0 = p = line;
145 p = strchr (p, ';');
146 if (!p)
147 {
148 fprintf (stderr, "short line in '%s':%d\n",
149 namealiases_filename, lineno);
150 exit (EXIT_FAILURE);
151 }
152 *p++ = '\0';
153
154 field1 = p;
155 p = strchr (p, ';');
156 if (!p)
157 {
158 fprintf (stderr, "short line in '%s':%d\n",
159 namealiases_filename, lineno);
160 exit (EXIT_FAILURE);
161 }
162 *p = '\0';
163
164 uc = strtoul (field0, NULL, 16);
165 if (uc >= 0x110000)
166 {
167 fprintf (stderr, "index too large\n");
168 exit (EXIT_FAILURE);
169 }
170
171 if (aliases_count == ALIASLEN)
172 {
173 fprintf (stderr, "too many aliases\n");
174 exit (EXIT_FAILURE);
175 }
176 unicode_aliases[aliases_count].name = xstrdup (field1);
177 unicode_aliases[aliases_count].uc = uc;
178 aliases_count++;
179 }
180 if (ferror (stream) || fclose (stream))
181 {
182 fprintf (stderr, "error reading from '%s'\n", namealiases_filename);
183 exit (1);
184 }
185 }
186
187 static int
188 name_has_alias (unsigned int uc)
189 {
190 int i;
191 for (i = 0; i < ALIASLEN; i++)
192 if (unicode_aliases[i].uc == uc)
193 return 1;
194 return 0;
195 }
196
197
198 static int
199 test_name_lookup ()
200 {
201 int error = 0;
202 unsigned int i;
203 char buf[UNINAME_MAX];
204
205 for (i = 0; i < 0x11000; i++)
206 {
207 char *result = unicode_character_name (i, buf);
208
209 if (unicode_names[i] != NULL)
210 {
211 if (result == NULL)
212 {
213 fprintf (stderr, "\\u%04X name lookup failed!\n", i);
214 error = 1;
215 }
216 else if (strcmp (result, unicode_names[i]) != 0)
217 {
218 fprintf (stderr, "\\u%04X name lookup returned wrong name: %s\n",
219 i, result);
220 error = 1;
221 }
222 }
223 else
224 {
225 if (result != NULL)
226 {
227 fprintf (stderr, "\\u%04X name lookup returned wrong name: %s\n",
228 i, result);
229 error = 1;
230 }
231 }
232 }
233
234 for (i = 0x110000; i < 0x1000000; i++)
235 {
236 char *result = unicode_character_name (i, buf);
237
238 if (result != NULL)
239 {
240 fprintf (stderr, "\\u%04X name lookup returned wrong name: %s\n",
241 i, result);
242 error = 1;
243 }
244 }
245
246 return error;
247 }
248
249
250 static int
251 test_inverse_lookup ()
252 {
253 int error = 0;
254 unsigned int i;
255
256
257 for (i = 0; i < 0x110000; i++)
258 if (unicode_names[i] != NULL)
259 {
260 unsigned int result = unicode_name_character (unicode_names[i]);
261 if (result != i)
262 {
263 if (result == UNINAME_INVALID)
264 fprintf (stderr, "inverse name lookup of \"%s\" failed\n",
265 unicode_names[i]);
266 else
267 fprintf (stderr,
268 "inverse name lookup of \"%s\" returned 0x%04X\n",
269 unicode_names[i], result);
270 error = 1;
271 }
272 }
273
274
275
276 for (i = 0; i < 10000; i++)
277 {
278 unsigned int i1, i2;
279 const char *s1;
280 const char *s2;
281 unsigned int l1, l2, j1, j2;
282 char buf[2*UNINAME_MAX];
283 unsigned int result;
284
285 do i1 = ((rand () % 0x11) << 16)
286 + ((rand () & 0xff) << 8)
287 + (rand () & 0xff);
288 while (unicode_names[i1] == NULL);
289
290 do i2 = ((rand () % 0x11) << 16)
291 + ((rand () & 0xff) << 8)
292 + (rand () & 0xff);
293 while (unicode_names[i2] == NULL);
294
295 s1 = unicode_names[i1];
296 l1 = strlen (s1);
297 s2 = unicode_names[i2];
298 l2 = strlen (s2);
299
300
301 for (j1 = 1; j1 <= l1; j1++)
302 if (j1 == l1 || s1[j1] == ' ')
303 for (j2 = 0; j2 < l2; j2++)
304 if (j2 == 0 || s2[j2-1] == ' ')
305 {
306 memcpy (buf, s1, j1);
307 buf[j1] = ' ';
308 memcpy (buf + j1 + 1, s2 + j2, l2 - j2 + 1);
309
310 result = unicode_name_character (buf);
311 if (result != UNINAME_INVALID
312 && !name_has_alias (result)
313 && !(unicode_names[result] != NULL
314 && strcmp (unicode_names[result], buf) == 0))
315 {
316 fprintf (stderr,
317 "inverse name lookup of \"%s\" returned 0x%04X\n",
318 unicode_names[i], result);
319 error = 1;
320 }
321 }
322 }
323
324
325 if (unicode_name_character ("A A") != UNINAME_INVALID)
326 error = 1;
327
328 return error;
329 }
330
331
332 static int
333 test_alias_lookup ()
334 {
335 int error = 0;
336 unsigned int i;
337 char buf[UNINAME_MAX];
338
339
340 for (i = 0; i < ALIASLEN; i++)
341 if (unicode_aliases[i].uc != UNINAME_INVALID
342
343
344 && unicode_character_name (unicode_aliases[i].uc, buf))
345 {
346 unsigned int result = unicode_name_character (unicode_aliases[i].name);
347 if (result != unicode_aliases[i].uc)
348 {
349 if (result == UNINAME_INVALID)
350 fprintf (stderr, "inverse name lookup of \"%s\" failed\n",
351 unicode_aliases[i].name);
352 else
353 fprintf (stderr,
354 "inverse name lookup of \"%s\" returned 0x%04X\n",
355 unicode_aliases[i].name, result);
356 error = 1;
357 }
358 }
359
360 return error;
361 }
362
363 int
364 main (int argc, char *argv[])
365 {
366 int error = 0;
367 int i;
368
369 for (i = 1; i < argc && strcmp (argv[i], "--") != 0; i++)
370 fill_names (argv[i]);
371
372 if (i < argc)
373 {
374 int j;
375 for (j = 0; j < ALIASLEN; j++)
376 unicode_aliases[j].uc = UNINAME_INVALID;
377
378 i++;
379 for (; i < argc; i++)
380 fill_aliases (argv[i]);
381 }
382
383 error |= test_name_lookup ();
384 error |= test_inverse_lookup ();
385
386 if (aliases_count > 0)
387 error |= test_alias_lookup ();
388
389 return error;
390 }