This source file includes following definitions.
- u8_possible_linebreaks
- read_file
- main
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26 #include <config.h>
27
28
29 #include "unilbrk.h"
30
31 #include <stdlib.h>
32 #include <string.h>
33
34 #include "unilbrk/lbrktables.h"
35 #include "uniwidth/cjk.h"
36 #include "unistr.h"
37
38 void
39 u8_possible_linebreaks (const uint8_t *s, size_t n, const char *encoding, char *p)
40 {
41 if (n > 0)
42 {
43 int LBP_AI_REPLACEMENT = (is_cjk_encoding (encoding) ? LBP_ID : LBP_AL);
44 const uint8_t *s_end = s + n;
45 int last_prop = LBP_BK;
46 char *seen_space = NULL;
47 char *seen_space2 = NULL;
48
49
50 memset (p, UC_BREAK_PROHIBITED, n);
51
52 do
53 {
54 ucs4_t uc;
55 int count = u8_mbtouc_unsafe (&uc, s, s_end - s);
56 int prop = unilbrkprop_lookup (uc);
57
58 if (prop == LBP_BK)
59 {
60
61 *p = UC_BREAK_MANDATORY;
62 last_prop = LBP_BK;
63 seen_space = NULL;
64 seen_space2 = NULL;
65 }
66 else
67 {
68 char *q;
69
70
71 switch (prop)
72 {
73 case LBP_AI:
74
75 prop = LBP_AI_REPLACEMENT;
76 break;
77 case LBP_CB:
78
79 prop = LBP_ID;
80 break;
81 case LBP_SA:
82
83
84 case LBP_XX:
85
86 prop = LBP_AL;
87 break;
88 }
89
90
91 q = p;
92 if (prop == LBP_SP)
93 {
94
95 *p = UC_BREAK_PROHIBITED;
96 seen_space2 = seen_space;
97 seen_space = p;
98 }
99 else if (prop == LBP_ZW)
100 {
101
102 *p = UC_BREAK_PROHIBITED;
103 last_prop = LBP_ZW;
104 seen_space = NULL;
105 seen_space2 = NULL;
106 }
107 else if (prop == LBP_CM)
108 {
109
110
111 if (last_prop == LBP_ZW)
112 {
113
114 *p = UC_BREAK_POSSIBLE;
115
116 last_prop = LBP_ID;
117 }
118 else
119 {
120 *p = UC_BREAK_PROHIBITED;
121
122 if (seen_space != NULL)
123 {
124 q = seen_space;
125 seen_space = seen_space2;
126 prop = LBP_ID;
127 goto lookup_via_table;
128 }
129 }
130 }
131 else
132 {
133 lookup_via_table:
134
135 if (!(prop >= 0 && prop < sizeof (unilbrk_table) / sizeof (unilbrk_table[0])))
136 abort ();
137
138 if (last_prop == LBP_BK)
139 {
140
141 *q = UC_BREAK_PROHIBITED;
142 }
143 else if (last_prop == LBP_ZW)
144 {
145
146 *q = UC_BREAK_POSSIBLE;
147 }
148 else
149 {
150 switch (unilbrk_table [last_prop] [prop])
151 {
152 case D:
153 *q = UC_BREAK_POSSIBLE;
154 break;
155 case I:
156 *q = (seen_space != NULL ? UC_BREAK_POSSIBLE : UC_BREAK_PROHIBITED);
157 break;
158 case P:
159 *q = UC_BREAK_PROHIBITED;
160 break;
161 default:
162 abort ();
163 }
164 }
165 last_prop = prop;
166 seen_space = NULL;
167 seen_space2 = NULL;
168 }
169 }
170
171 s += count;
172 p += count;
173 }
174 while (s < s_end);
175 }
176 }
177
178
179 #ifdef TEST
180
181 #include <stdio.h>
182 #include <string.h>
183
184
185
186 char *
187 read_file (FILE *stream)
188 {
189 #define BUFSIZE 4096
190 char *buf = NULL;
191 int alloc = 0;
192 int size = 0;
193 int count;
194
195 while (! feof (stream))
196 {
197 if (size + BUFSIZE > alloc)
198 {
199 alloc = alloc + alloc / 2;
200 if (alloc < size + BUFSIZE)
201 alloc = size + BUFSIZE;
202 buf = realloc (buf, alloc);
203 if (buf == NULL)
204 {
205 fprintf (stderr, "out of memory\n");
206 exit (1);
207 }
208 }
209 count = fread (buf + size, 1, BUFSIZE, stream);
210 if (count == 0)
211 {
212 if (ferror (stream))
213 {
214 perror ("fread");
215 exit (1);
216 }
217 }
218 else
219 size += count;
220 }
221 buf = realloc (buf, size + 1);
222 if (buf == NULL)
223 {
224 fprintf (stderr, "out of memory\n");
225 exit (1);
226 }
227 buf[size] = '\0';
228 return buf;
229 #undef BUFSIZE
230 }
231
232 int
233 main (int argc, char * argv[])
234 {
235 if (argc == 1)
236 {
237
238 char *input = read_file (stdin);
239 int length = strlen (input);
240 char *breaks = malloc (length);
241 int i;
242
243 u8_possible_linebreaks ((uint8_t *) input, length, "UTF-8", breaks);
244
245 for (i = 0; i < length; i++)
246 {
247 switch (breaks[i])
248 {
249 case UC_BREAK_POSSIBLE:
250
251 putc (0xe2, stdout); putc (0x80, stdout); putc (0xa7, stdout);
252 break;
253 case UC_BREAK_MANDATORY:
254
255 putc (0xe2, stdout); putc (0x86, stdout); putc (0xb2, stdout);
256 break;
257 case UC_BREAK_PROHIBITED:
258 break;
259 default:
260 abort ();
261 }
262 putc (input[i], stdout);
263 }
264
265 free (breaks);
266
267 return 0;
268 }
269 else
270 return 1;
271 }
272
273 #endif