1 /* Grapheme cluster break function.
2 Copyright (C) 2010-2021 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@cs.stanford.edu>, 2010.
4
5 This file is free software.
6 It is dual-licensed under "the GNU LGPLv3+ or the GNU GPLv2+".
7 You can redistribute it and/or modify it under either
8 - the terms of the GNU Lesser General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version, or
11 - the terms of the GNU General Public License as published by the
12 Free Software Foundation; either version 2, or (at your option)
13 any later version, or
14 - the same dual license "the GNU LGPLv3+ or the GNU GPLv2+".
15
16 This file is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 Lesser General Public License and the GNU General Public License
20 for more details.
21
22 You should have received a copy of the GNU Lesser General Public
23 License and of the GNU General Public License along with this
24 program. If not, see <https://www.gnu.org/licenses/>. */
25
26 #include <config.h>
27
28 /* Specification. */
29 #include "unigbrk.h"
30
31 /* Evaluates to true if there is an extended grapheme cluster break between
32 code points with GBP_* values A and B, false if there is not. The comments
33 are the grapheme cluster boundary rules from in UAX #29. */
34 #define UC_IS_GRAPHEME_BREAK(A, B) \
35 (/* GB1 and GB2 are covered--just use a GBP_CONTROL character, such \
36 as 0, for sot and eot. */ \
37 \
38 /* GB3 */ \
39 (A) == GBP_CR && (B) == GBP_LF ? false : \
40 \
41 /* GB4 */ \
42 (A) == GBP_CONTROL || (A) == GBP_CR || (A) == GBP_LF ? true : \
43 \
44 /* GB5 */ \
45 (B) == GBP_CONTROL || (B) == GBP_CR || (B) == GBP_LF ? true : \
46 \
47 /* GB6 */ \
48 (A) == GBP_L && ((B) == GBP_L || (B) == GBP_V \
49 || (B) == GBP_LV || (B) == GBP_LVT) ? false : \
50 \
51 /* GB7 */ \
52 ((A) == GBP_LV || (A) == GBP_V) \
53 && ((B) == GBP_V || (B) == GBP_T) ? false : \
54 \
55 /* GB8 */ \
56 ((A) == GBP_LVT || (A) == GBP_T) && (B) == GBP_T ? false : \
57 \
58 /* GB9 */ \
59 (B) == GBP_EXTEND || (B) == GBP_ZWJ ? false : \
60 \
61 /* GB9a */ \
62 (B) == GBP_SPACINGMARK ? false : \
63 \
64 /* GB9b */ \
65 (A) == GBP_PREPEND ? false : \
66 \
67 /* GB10 -- incomplete */ \
68 ((A) == GBP_EB || (A) == GBP_EBG) && (B) == GBP_EM ? false : \
69 \
70 /* GB11 */ \
71 (A) == GBP_ZWJ && ((B) == GBP_GAZ || (B) == GBP_EBG) ? false \
72 \
73 /* GB999 */ \
74 : true)
75
76 #define UC_GRAPHEME_BREAKS_FOR(A) \
77 ( (UC_IS_GRAPHEME_BREAK(A, GBP_OTHER) << GBP_OTHER) \
78 | (UC_IS_GRAPHEME_BREAK(A, GBP_CR) << GBP_CR) \
79 | (UC_IS_GRAPHEME_BREAK(A, GBP_LF) << GBP_LF) \
80 | (UC_IS_GRAPHEME_BREAK(A, GBP_CONTROL) << GBP_CONTROL) \
81 | (UC_IS_GRAPHEME_BREAK(A, GBP_EXTEND) << GBP_EXTEND) \
82 | (UC_IS_GRAPHEME_BREAK(A, GBP_PREPEND) << GBP_PREPEND) \
83 | (UC_IS_GRAPHEME_BREAK(A, GBP_SPACINGMARK) << GBP_SPACINGMARK) \
84 | (UC_IS_GRAPHEME_BREAK(A, GBP_L) << GBP_L) \
85 | (UC_IS_GRAPHEME_BREAK(A, GBP_V) << GBP_V) \
86 | (UC_IS_GRAPHEME_BREAK(A, GBP_T) << GBP_T) \
87 | (UC_IS_GRAPHEME_BREAK(A, GBP_LV) << GBP_LV) \
88 | (UC_IS_GRAPHEME_BREAK(A, GBP_LVT) << GBP_LVT) \
89 | (UC_IS_GRAPHEME_BREAK(A, GBP_RI) << GBP_RI) \
90 | (UC_IS_GRAPHEME_BREAK(A, GBP_ZWJ) << GBP_ZWJ) \
91 | (UC_IS_GRAPHEME_BREAK(A, GBP_EB) << GBP_EB) \
92 | (UC_IS_GRAPHEME_BREAK(A, GBP_EM) << GBP_EM) \
93 | (UC_IS_GRAPHEME_BREAK(A, GBP_GAZ) << GBP_GAZ) \
94 | (UC_IS_GRAPHEME_BREAK(A, GBP_EBG) << GBP_EBG))
95
96 static const unsigned long int gb_table[18] =
97 {
98 UC_GRAPHEME_BREAKS_FOR(0), /* GBP_OTHER */
99 UC_GRAPHEME_BREAKS_FOR(1), /* GBP_CR */
100 UC_GRAPHEME_BREAKS_FOR(2), /* GBP_LF */
101 UC_GRAPHEME_BREAKS_FOR(3), /* GBP_CONTROL */
102 UC_GRAPHEME_BREAKS_FOR(4), /* GBP_EXTEND */
103 UC_GRAPHEME_BREAKS_FOR(5), /* GBP_PREPEND */
104 UC_GRAPHEME_BREAKS_FOR(6), /* GBP_SPACINGMARK */
105 UC_GRAPHEME_BREAKS_FOR(7), /* GBP_L */
106 UC_GRAPHEME_BREAKS_FOR(8), /* GBP_V */
107 UC_GRAPHEME_BREAKS_FOR(9), /* GBP_T */
108 UC_GRAPHEME_BREAKS_FOR(10), /* GBP_LV */
109 UC_GRAPHEME_BREAKS_FOR(11), /* GBP_LVT */
110 UC_GRAPHEME_BREAKS_FOR(12), /* GBP_RI */
111 UC_GRAPHEME_BREAKS_FOR(13), /* GBP_ZWJ */
112 UC_GRAPHEME_BREAKS_FOR(14), /* GBP_EB */
113 UC_GRAPHEME_BREAKS_FOR(15), /* GBP_EM */
114 UC_GRAPHEME_BREAKS_FOR(16), /* GBP_GAZ */
115 UC_GRAPHEME_BREAKS_FOR(17), /* GBP_EBG */
116 };
117
118 bool
119 uc_is_grapheme_break (ucs4_t a, ucs4_t b)
/* ![[previous]](../icons/n_left.png)
![[next]](../icons/n_right.png)
![[first]](../icons/n_first.png)
![[last]](../icons/n_last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
120 {
121 int a_gcp, b_gcp;
122
123 if ((a | b) < 0x300)
124 {
125 /* GB3 is the only relevant rule for this case. */
126 return a != '\r' || b != '\n';
127 }
128
129 a_gcp = uc_graphemeclusterbreak_property (a);
130 b_gcp = uc_graphemeclusterbreak_property (b);
131 return (gb_table[a_gcp] >> b_gcp) & 1;
132 }