1 /* Search character in UTF-8 string.
2 Copyright (C) 1999, 2002, 2006-2007, 2009-2021 Free Software Foundation,
3 Inc.
4 Written by Bruno Haible <bruno@clisp.org>, 2002.
5
6 This file is free software.
7 It is dual-licensed under "the GNU LGPLv3+ or the GNU GPLv2+".
8 You can redistribute it and/or modify it under either
9 - the terms of the GNU Lesser General Public License as published
10 by the Free Software Foundation; either version 3, or (at your
11 option) any later version, or
12 - the terms of the GNU General Public License as published by the
13 Free Software Foundation; either version 2, or (at your option)
14 any later version, or
15 - the same dual license "the GNU LGPLv3+ or the GNU GPLv2+".
16
17 This file is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 Lesser General Public License and the GNU General Public License
21 for more details.
22
23 You should have received a copy of the GNU Lesser General Public
24 License and of the GNU General Public License along with this
25 program. If not, see <https://www.gnu.org/licenses/>. */
26
27 #include <config.h>
28
29 /* Specification. */
30 #include "unistr.h"
31
32 #include <string.h>
33
34 uint8_t *
35 u8_strchr (const uint8_t *s, ucs4_t uc)
/* ![[previous]](../icons/n_left.png)
![[next]](../icons/n_right.png)
![[first]](../icons/n_first.png)
![[last]](../icons/n_last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
36 {
37 uint8_t c[6];
38
39 if (uc < 0x80)
40 {
41 uint8_t c0 = uc;
42
43 if (false)
44 {
45 /* Unoptimized code. */
46 for (;;)
47 {
48 uint8_t s0 = *s;
49 if (s0 == c0)
50 return (uint8_t *) s;
51 s++;
52 if (s0 == 0)
53 break;
54 }
55 }
56 else
57 {
58 /* Optimized code.
59 strchr() is often so well optimized, that it's worth the
60 added function call. */
61 return (uint8_t *) strchr ((const char *) s, c0);
62 }
63 }
64 else
65 /* Loops equivalent to strstr, optimized for a specific length (2, 3, 4)
66 of the needle. We use an algorithm similar to Boyer-Moore which
67 is documented in lib/unistr/u8-chr.c. There is additional
68 complication because we need to check after every byte for
69 a NUL byte, but the idea is the same. */
70 switch (u8_uctomb_aux (c, uc, 6))
71 {
72 case 2:
73 if (*s == 0 || s[1] == 0)
74 break;
75 {
76 uint8_t c0 = c[0];
77 uint8_t c1 = c[1];
78 /* Search for { c0, c1 }. */
79 uint8_t s1 = s[1];
80
81 for (;;)
82 {
83 /* Here s[0] != 0, s[1] != 0.
84 Test whether s[0..1] == { c0, c1 }. */
85 if (s1 == c1)
86 {
87 if (*s == c0)
88 return (uint8_t *) s;
89 else
90 /* Skip the search at s + 1, because s[1] = c1 < c0. */
91 goto case2_skip2;
92 }
93 else
94 {
95 if (s1 == c0)
96 goto case2_skip1;
97 else
98 /* Skip the search at s + 1, because s[1] != c0. */
99 goto case2_skip2;
100 }
101 case2_skip2:
102 s++;
103 s1 = s[1];
104 if (s[1] == 0)
105 break;
106 case2_skip1:
107 s++;
108 s1 = s[1];
109 if (s[1] == 0)
110 break;
111 }
112 }
113 break;
114
115 case 3:
116 if (*s == 0 || s[1] == 0 || s[2] == 0)
117 break;
118 {
119 uint8_t c0 = c[0];
120 uint8_t c1 = c[1];
121 uint8_t c2 = c[2];
122 /* Search for { c0, c1, c2 }. */
123 uint8_t s2 = s[2];
124
125 for (;;)
126 {
127 /* Here s[0] != 0, s[1] != 0, s[2] != 0.
128 Test whether s[0..2] == { c0, c1, c2 }. */
129 if (s2 == c2)
130 {
131 if (s[1] == c1 && *s == c0)
132 return (uint8_t *) s;
133 else
134 /* If c2 != c1:
135 Skip the search at s + 1, because s[2] == c2 != c1.
136 Skip the search at s + 2, because s[2] == c2 < c0. */
137 if (c2 == c1)
138 goto case3_skip1;
139 else
140 goto case3_skip3;
141 }
142 else
143 {
144 if (s2 == c1)
145 goto case3_skip1;
146 else if (s2 == c0)
147 /* Skip the search at s + 1, because s[2] != c1. */
148 goto case3_skip2;
149 else
150 /* Skip the search at s + 1, because s[2] != c1.
151 Skip the search at s + 2, because s[2] != c0. */
152 goto case3_skip3;
153 }
154 case3_skip3:
155 s++;
156 s2 = s[2];
157 if (s[2] == 0)
158 break;
159 case3_skip2:
160 s++;
161 s2 = s[2];
162 if (s[2] == 0)
163 break;
164 case3_skip1:
165 s++;
166 s2 = s[2];
167 if (s[2] == 0)
168 break;
169 }
170 }
171 break;
172
173 case 4:
174 if (*s == 0 || s[1] == 0 || s[2] == 0 || s[3] == 0)
175 break;
176 {
177 uint8_t c0 = c[0];
178 uint8_t c1 = c[1];
179 uint8_t c2 = c[2];
180 uint8_t c3 = c[3];
181 /* Search for { c0, c1, c2, c3 }. */
182 uint8_t s3 = s[3];
183
184 for (;;)
185 {
186 /* Here s[0] != 0, s[1] != 0, s[2] != 0, s[3] != 0.
187 Test whether s[0..3] == { c0, c1, c2, c3 }. */
188 if (s3 == c3)
189 {
190 if (s[2] == c2 && s[1] == c1 && *s == c0)
191 return (uint8_t *) s;
192 else
193 /* If c3 != c2:
194 Skip the search at s + 1, because s[3] == c3 != c2.
195 If c3 != c1:
196 Skip the search at s + 2, because s[3] == c3 != c1.
197 Skip the search at s + 3, because s[3] == c3 < c0. */
198 if (c3 == c2)
199 goto case4_skip1;
200 else if (c3 == c1)
201 goto case4_skip2;
202 else
203 goto case4_skip4;
204 }
205 else
206 {
207 if (s3 == c2)
208 goto case4_skip1;
209 else if (s3 == c1)
210 /* Skip the search at s + 1, because s[3] != c2. */
211 goto case4_skip2;
212 else if (s3 == c0)
213 /* Skip the search at s + 1, because s[3] != c2.
214 Skip the search at s + 2, because s[3] != c1. */
215 goto case4_skip3;
216 else
217 /* Skip the search at s + 1, because s[3] != c2.
218 Skip the search at s + 2, because s[3] != c1.
219 Skip the search at s + 3, because s[3] != c0. */
220 goto case4_skip4;
221 }
222 case4_skip4:
223 s++;
224 s3 = s[3];
225 if (s[3] == 0)
226 break;
227 case4_skip3:
228 s++;
229 s3 = s[3];
230 if (s[3] == 0)
231 break;
232 case4_skip2:
233 s++;
234 s3 = s[3];
235 if (s[3] == 0)
236 break;
237 case4_skip1:
238 s++;
239 s3 = s[3];
240 if (s[3] == 0)
241 break;
242 }
243 }
244 break;
245 }
246
247 return NULL;
248 }