This source file includes following definitions.
- ulc_wordbreaks
- read_file
- main
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26 #include <config.h>
27
28
29 #include "uniwbrk.h"
30
31 #include <stdlib.h>
32 #include <string.h>
33
34 #include "c-ctype.h"
35 #include "localcharset.h"
36 #include "uniconv.h"
37 #include "unilbrk/ulc-common.h"
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52 void
53 ulc_wordbreaks (const char *s, size_t n, char *p)
54 {
55 if (n > 0)
56 {
57 const char *encoding = locale_charset ();
58
59 if (is_utf8_encoding (encoding))
60 u8_wordbreaks ((const uint8_t *) s, n, p);
61 else
62 {
63
64
65 size_t *offsets = (size_t *) malloc (n * sizeof (size_t));
66
67 if (offsets != NULL)
68 {
69 uint8_t *t;
70 size_t m;
71
72 t = u8_conv_from_encoding (encoding, iconveh_question_mark,
73 s, n, offsets, NULL, &m);
74 if (t != NULL)
75 {
76 char *q = (char *) (m > 0 ? malloc (m) : NULL);
77
78 if (m == 0 || q != NULL)
79 {
80 size_t i;
81
82
83 u8_wordbreaks (t, m, q);
84
85
86 memset (p, 0, n);
87 for (i = 0; i < n; i++)
88 if (offsets[i] != (size_t)(-1))
89 p[i] = q[offsets[i]];
90
91 free (q);
92 free (t);
93 free (offsets);
94 return;
95 }
96 free (t);
97 }
98 free (offsets);
99 }
100
101
102 #if C_CTYPE_ASCII
103 if (is_all_ascii (s, n))
104 {
105
106 u8_wordbreaks ((const uint8_t *) s, n, p);
107 return;
108 }
109 #endif
110
111
112 memset (p, 0, n);
113 }
114 }
115 }
116
117
118 #ifdef TEST
119
120 #include <locale.h>
121 #include <stdio.h>
122 #include <stdlib.h>
123
124
125
126 char *
127 read_file (FILE *stream)
128 {
129 #define BUFSIZE 4096
130 char *buf = NULL;
131 int alloc = 0;
132 int size = 0;
133 int count;
134
135 while (! feof (stream))
136 {
137 if (size + BUFSIZE > alloc)
138 {
139 alloc = alloc + alloc / 2;
140 if (alloc < size + BUFSIZE)
141 alloc = size + BUFSIZE;
142 buf = realloc (buf, alloc);
143 if (buf == NULL)
144 {
145 fprintf (stderr, "out of memory\n");
146 exit (1);
147 }
148 }
149 count = fread (buf + size, 1, BUFSIZE, stream);
150 if (count == 0)
151 {
152 if (ferror (stream))
153 {
154 perror ("fread");
155 exit (1);
156 }
157 }
158 else
159 size += count;
160 }
161 buf = realloc (buf, size + 1);
162 if (buf == NULL)
163 {
164 fprintf (stderr, "out of memory\n");
165 exit (1);
166 }
167 buf[size] = '\0';
168 return buf;
169 #undef BUFSIZE
170 }
171
172 int
173 main (int argc, char * argv[])
174 {
175 setlocale (LC_CTYPE, "");
176 if (argc == 1)
177 {
178
179 char *input = read_file (stdin);
180 int length = strlen (input);
181 char *breaks = malloc (length);
182 int i;
183
184 ulc_wordbreaks (input, length, breaks);
185
186 for (i = 0; i < length; i++)
187 {
188 switch (breaks[i])
189 {
190 case 1:
191 putc ('|', stdout);
192 break;
193 case 0:
194 break;
195 default:
196 abort ();
197 }
198 putc (input[i], stdout);
199 }
200
201 free (breaks);
202
203 return 0;
204 }
205 else
206 return 1;
207 }
208
209 #endif