1 /* Grapheme cluster break function. 2 Copyright (C) 2010-2021 Free Software Foundation, Inc. 3 Written by Ben Pfaff <blp@cs.stanford.edu>, 2010. 4 5 This file is free software. 6 It is dual-licensed under "the GNU LGPLv3+ or the GNU GPLv2+". 7 You can redistribute it and/or modify it under either 8 - the terms of the GNU Lesser General Public License as published 9 by the Free Software Foundation; either version 3, or (at your 10 option) any later version, or 11 - the terms of the GNU General Public License as published by the 12 Free Software Foundation; either version 2, or (at your option) 13 any later version, or 14 - the same dual license "the GNU LGPLv3+ or the GNU GPLv2+". 15 16 This file is distributed in the hope that it will be useful, 17 but WITHOUT ANY WARRANTY; without even the implied warranty of 18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19 Lesser General Public License and the GNU General Public License 20 for more details. 21 22 You should have received a copy of the GNU Lesser General Public 23 License and of the GNU General Public License along with this 24 program. If not, see <https://www.gnu.org/licenses/>. */ 25 26 void 27 FUNC (const UNIT *s, size_t n, char *p) /* */ 28 { 29 if (n > 0) 30 { 31 const UNIT *s_end = s + n; 32 33 /* Grapheme Cluster break property of the last character. 34 -1 at the very beginning of the string. */ 35 int last_char_prop = -1; 36 37 /* Grapheme Cluster break property of the last complex character. 38 -1 at the very beginning of the string. */ 39 int last_compchar_prop = -1; 40 41 size_t ri_count = 0; 42 43 /* Don't break inside multibyte characters. */ 44 memset (p, 0, n); 45 46 while (s < s_end) 47 { 48 ucs4_t uc; 49 int count = U_MBTOUC (&uc, s, s_end - s); 50 int prop = uc_graphemeclusterbreak_property (uc); 51 52 /* Break at the start of the string (GB1). */ 53 if (last_char_prop < 0) 54 *p = 1; 55 else 56 { 57 /* No break between CR and LF (GB3). */ 58 if (last_char_prop == GBP_CR && prop == GBP_LF) 59 /* *p = 0 */; 60 /* Break before and after newlines (GB4, GB5). */ 61 else if ((last_char_prop == GBP_CR 62 || last_char_prop == GBP_LF 63 || last_char_prop == GBP_CONTROL) 64 || (prop == GBP_CR 65 || prop == GBP_LF 66 || prop == GBP_CONTROL)) 67 *p = 1; 68 /* No break between Hangul syllable sequences (GB6, GB7, GB8). */ 69 else if ((last_char_prop == GBP_L 70 && (prop == GBP_L 71 || prop == GBP_V 72 || prop == GBP_LV 73 || prop == GBP_LVT)) 74 || ((last_char_prop == GBP_LV 75 || last_char_prop == GBP_V) 76 && (prop == GBP_V 77 || prop == GBP_T)) 78 || ((last_char_prop == GBP_LVT 79 || last_char_prop == GBP_T) 80 && prop == GBP_T)) 81 /* *p = 0 */; 82 /* No break before extending characters or ZWJ (GB9). */ 83 else if (prop == GBP_EXTEND || prop == GBP_ZWJ) 84 /* *p = 0 */; 85 /* No break before SpacingMarks (GB9a). */ 86 else if (prop == GBP_SPACINGMARK) 87 /* *p = 0 */; 88 /* No break after Prepend characters (GB9b). */ 89 else if (last_char_prop == GBP_PREPEND) 90 /* *p = 0 */; 91 /* No break within emoji modifier sequences (GB10). */ 92 else if ((last_compchar_prop == GBP_EB 93 || last_compchar_prop == GBP_EBG) 94 && prop == GBP_EM) 95 /* *p = 0 */; 96 /* No break within emoji zwj sequences (GB11). */ 97 else if (last_char_prop == GBP_ZWJ 98 && (prop == GBP_GAZ 99 || prop == GBP_EBG)) 100 /* *p = 0 */; 101 /* No break between RI if there is an odd number of RI 102 characters before (GB12, GB13). */ 103 else if (prop == GBP_RI) 104 { 105 if (ri_count % 2 == 0) 106 *p = 1; 107 /* else *p = 0; */ 108 } 109 /* Break everywhere (GBP999). */ 110 else 111 *p = 1; 112 } 113 114 last_char_prop = prop; 115 116 if (!(prop == GBP_EXTEND 117 && (last_compchar_prop == GBP_EB 118 || last_compchar_prop == GBP_EBG))) 119 last_compchar_prop = prop; 120 121 if (prop == GBP_RI) 122 ri_count++; 123 else 124 ri_count = 0; 125 126 s += count; 127 p += count; 128 } 129 } 130 }