This source file includes following definitions.
- graphemebreakproperty_to_string
- main
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 #include <config.h>
20
21
22 #include <unigbrk.h>
23
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27
28 const char *
29 graphemebreakproperty_to_string (int gbp)
30 {
31 printf ("%d\n", gbp);
32 switch (gbp)
33 {
34 #define CASE(VALUE) case GBP_##VALUE: return #VALUE;
35 CASE(OTHER)
36 CASE(CR)
37 CASE(LF)
38 CASE(CONTROL)
39 CASE(EXTEND)
40 CASE(PREPEND)
41 CASE(SPACINGMARK)
42 CASE(L)
43 CASE(V)
44 CASE(T)
45 CASE(LV)
46 CASE(LVT)
47 CASE(RI)
48 CASE(ZWJ)
49 CASE(EB)
50 CASE(EM)
51 CASE(GAZ)
52 CASE(EBG)
53 }
54 abort ();
55 }
56
57 int
58 main (int argc, char *argv[])
59 {
60 const char *filename;
61 char line[1024];
62 int exit_code;
63 FILE *stream;
64 int lineno;
65
66 if (argc != 2)
67 {
68 fprintf (stderr, "usage: %s FILENAME\n"
69 "where FILENAME is the location of the GraphemeBreakTest.txt\n"
70 "test file.\n", argv[0]);
71 exit (1);
72 }
73
74 filename = argv[1];
75 stream = fopen (filename, "r");
76 if (stream == NULL)
77 {
78 fprintf (stderr, "error during fopen of '%s'\n", filename);
79 exit (1);
80 }
81
82 exit_code = 0;
83 lineno = 0;
84 while (fgets (line, sizeof line, stream))
85 {
86 char *comment;
87 const char *p;
88 ucs4_t prev;
89 int last_compchar_prop;
90 size_t ri_count;
91
92 lineno++;
93
94 comment = strchr (line, '#');
95 if (comment != NULL)
96 *comment = '\0';
97 if (line[strspn (line, " \t\r\n")] == '\0')
98 continue;
99
100 last_compchar_prop = -1;
101 ri_count = 0;
102 prev = 0;
103 p = line;
104 do
105 {
106 bool should_break;
107 ucs4_t next;
108
109 p += strspn (p, " \t\r\n");
110 if (!strncmp (p, "\303\267" , 2))
111 {
112 should_break = true;
113 p += 2;
114 }
115 else if (!strncmp (p, "\303\227" , 2))
116 {
117 should_break = false;
118 p += 2;
119 }
120 else
121 {
122 fprintf (stderr, "%s:%d.%d: syntax error expecting '÷' or '×'\n",
123 filename, lineno, (int) (p - line + 1));
124 exit (1);
125 }
126
127 p += strspn (p, " \t\r\n");
128 if (*p == '\0')
129 next = 0;
130 else
131 {
132 unsigned int next_int;
133 int n;
134
135 if (sscanf (p, "%x%n", &next_int, &n) != 1)
136 {
137 fprintf (stderr, "%s:%d.%d: syntax error at '%s' "
138 "expecting hexadecimal Unicode code point number\n",
139 filename, lineno, (int) (p - line + 1), p);
140 exit (1);
141 }
142 p += n;
143
144 next = next_int;
145 }
146
147 if ((last_compchar_prop == GBP_EB
148 || last_compchar_prop == GBP_EBG)
149 && uc_graphemeclusterbreak_property (next) == GBP_EM)
150 {
151 int prev_gbp = uc_graphemeclusterbreak_property (prev);
152 int next_gbp = uc_graphemeclusterbreak_property (next);
153 fprintf (stderr, "%s:%d: skipping GB10: should join U+%04X (%s) "
154 "and U+%04X (%s)\n",
155 filename, lineno,
156 prev, graphemebreakproperty_to_string (prev_gbp),
157 next, graphemebreakproperty_to_string (next_gbp));
158 }
159 else if (uc_graphemeclusterbreak_property (next) == GBP_RI
160 && ri_count % 2 != 0)
161 {
162 int prev_gbp = uc_graphemeclusterbreak_property (prev);
163 int next_gbp = uc_graphemeclusterbreak_property (next);
164 fprintf (stderr, "%s:%d: skipping GB12: should join U+%04X (%s) "
165 "and U+%04X (%s)\n",
166 filename, lineno,
167 prev, graphemebreakproperty_to_string (prev_gbp),
168 next, graphemebreakproperty_to_string (next_gbp));
169 }
170 else if (uc_is_grapheme_break (prev, next) != should_break)
171 {
172 int prev_gbp = uc_graphemeclusterbreak_property (prev);
173 int next_gbp = uc_graphemeclusterbreak_property (next);
174 fprintf (stderr, "%s:%d: should %s U+%04X (%s) and "
175 "U+%04X (%s)\n",
176 filename, lineno,
177 should_break ? "break" : "join",
178 prev, graphemebreakproperty_to_string (prev_gbp),
179 next, graphemebreakproperty_to_string (next_gbp));
180 exit_code = 1;
181 }
182
183 p += strspn (p, " \t\r\n");
184 prev = next;
185
186 if (!(uc_graphemeclusterbreak_property (next) == GBP_EXTEND
187 && (last_compchar_prop == GBP_EB
188 || last_compchar_prop == GBP_EBG)))
189 last_compchar_prop = uc_graphemeclusterbreak_property (next);
190
191 if (uc_graphemeclusterbreak_property (next) == GBP_RI)
192 ri_count++;
193 else
194 ri_count = 0;
195 }
196 while (*p != '\0');
197 }
198
199 return exit_code;
200 }