1 /* Grapheme cluster break function.
2 Copyright (C) 2010-2021 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@cs.stanford.edu>, 2010.
4
5 This file is free software.
6 It is dual-licensed under "the GNU LGPLv3+ or the GNU GPLv2+".
7 You can redistribute it and/or modify it under either
8 - the terms of the GNU Lesser General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version, or
11 - the terms of the GNU General Public License as published by the
12 Free Software Foundation; either version 2, or (at your option)
13 any later version, or
14 - the same dual license "the GNU LGPLv3+ or the GNU GPLv2+".
15
16 This file is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 Lesser General Public License and the GNU General Public License
20 for more details.
21
22 You should have received a copy of the GNU Lesser General Public
23 License and of the GNU General Public License along with this
24 program. If not, see <https://www.gnu.org/licenses/>. */
25
26 void
27 FUNC (const UNIT *s, size_t n, char *p)
/* ![[previous]](../icons/n_left.png)
![[next]](../icons/n_right.png)
![[first]](../icons/n_first.png)
![[last]](../icons/n_last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
28 {
29 if (n > 0)
30 {
31 const UNIT *s_end = s + n;
32
33 /* Grapheme Cluster break property of the last character.
34 -1 at the very beginning of the string. */
35 int last_char_prop = -1;
36
37 /* Grapheme Cluster break property of the last complex character.
38 -1 at the very beginning of the string. */
39 int last_compchar_prop = -1;
40
41 size_t ri_count = 0;
42
43 /* Don't break inside multibyte characters. */
44 memset (p, 0, n);
45
46 while (s < s_end)
47 {
48 ucs4_t uc;
49 int count = U_MBTOUC (&uc, s, s_end - s);
50 int prop = uc_graphemeclusterbreak_property (uc);
51
52 /* Break at the start of the string (GB1). */
53 if (last_char_prop < 0)
54 *p = 1;
55 else
56 {
57 /* No break between CR and LF (GB3). */
58 if (last_char_prop == GBP_CR && prop == GBP_LF)
59 /* *p = 0 */;
60 /* Break before and after newlines (GB4, GB5). */
61 else if ((last_char_prop == GBP_CR
62 || last_char_prop == GBP_LF
63 || last_char_prop == GBP_CONTROL)
64 || (prop == GBP_CR
65 || prop == GBP_LF
66 || prop == GBP_CONTROL))
67 *p = 1;
68 /* No break between Hangul syllable sequences (GB6, GB7, GB8). */
69 else if ((last_char_prop == GBP_L
70 && (prop == GBP_L
71 || prop == GBP_V
72 || prop == GBP_LV
73 || prop == GBP_LVT))
74 || ((last_char_prop == GBP_LV
75 || last_char_prop == GBP_V)
76 && (prop == GBP_V
77 || prop == GBP_T))
78 || ((last_char_prop == GBP_LVT
79 || last_char_prop == GBP_T)
80 && prop == GBP_T))
81 /* *p = 0 */;
82 /* No break before extending characters or ZWJ (GB9). */
83 else if (prop == GBP_EXTEND || prop == GBP_ZWJ)
84 /* *p = 0 */;
85 /* No break before SpacingMarks (GB9a). */
86 else if (prop == GBP_SPACINGMARK)
87 /* *p = 0 */;
88 /* No break after Prepend characters (GB9b). */
89 else if (last_char_prop == GBP_PREPEND)
90 /* *p = 0 */;
91 /* No break within emoji modifier sequences (GB10). */
92 else if ((last_compchar_prop == GBP_EB
93 || last_compchar_prop == GBP_EBG)
94 && prop == GBP_EM)
95 /* *p = 0 */;
96 /* No break within emoji zwj sequences (GB11). */
97 else if (last_char_prop == GBP_ZWJ
98 && (prop == GBP_GAZ
99 || prop == GBP_EBG))
100 /* *p = 0 */;
101 /* No break between RI if there is an odd number of RI
102 characters before (GB12, GB13). */
103 else if (prop == GBP_RI)
104 {
105 if (ri_count % 2 == 0)
106 *p = 1;
107 /* else *p = 0; */
108 }
109 /* Break everywhere (GBP999). */
110 else
111 *p = 1;
112 }
113
114 last_char_prop = prop;
115
116 if (!(prop == GBP_EXTEND
117 && (last_compchar_prop == GBP_EB
118 || last_compchar_prop == GBP_EBG)))
119 last_compchar_prop = prop;
120
121 if (prop == GBP_RI)
122 ri_count++;
123 else
124 ri_count = 0;
125
126 s += count;
127 p += count;
128 }
129 }
130 }