1 /* Line breaking auxiliary tables.
2 Copyright (C) 2001-2003, 2006-2021 Free Software Foundation, Inc.
3 Written by Bruno Haible <bruno@clisp.org>, 2001.
4
5 This file is free software.
6 It is dual-licensed under "the GNU LGPLv3+ or the GNU GPLv2+".
7 You can redistribute it and/or modify it under either
8 - the terms of the GNU Lesser General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version, or
11 - the terms of the GNU General Public License as published by the
12 Free Software Foundation; either version 2, or (at your option)
13 any later version, or
14 - the same dual license "the GNU LGPLv3+ or the GNU GPLv2+".
15
16 This file is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 Lesser General Public License and the GNU General Public License
20 for more details.
21
22 You should have received a copy of the GNU Lesser General Public
23 License and of the GNU General Public License along with this
24 program. If not, see <https://www.gnu.org/licenses/>. */
25
26 #include "unitypes.h"
27
28 /* Line breaking classification. */
29
30 enum
31 {
32 /* Values >= 30 are resolved at run time. */
33 LBP_BK = 30, /* mandatory break */
34 /*LBP_CR, carriage return - not used here because it's a DOSism */
35 /*LBP_LF, line feed - not used here because it's a DOSism */
36 LBP_CM = 31, /* attached characters and combining marks */
37 /*LBP_NL, next line - not used here because it's equivalent to LBP_BK */
38 /*LBP_SG, surrogates - not used here because they are not characters */
39 LBP_WJ = 0, /* word joiner */
40 LBP_ZW = 32, /* zero width space */
41 LBP_GL = 1, /* non-breaking (glue) */
42 LBP_SP = 33, /* space */
43 LBP_B2 = 2, /* break opportunity before and after */
44 LBP_BA = 3, /* break opportunity after */
45 LBP_BB = 4, /* break opportunity before */
46 LBP_HY = 5, /* hyphen */
47 LBP_CB = 34, /* contingent break opportunity */
48 LBP_CL = 6, /* closing punctuation */
49 LBP_CP = 7, /* closing parenthesis */
50 LBP_EX = 8, /* exclamation/interrogation */
51 LBP_IN = 9, /* inseparable */
52 LBP_NS = 10, /* non starter */
53 LBP_OP = 11, /* opening punctuation */
54 LBP_QU = 12, /* ambiguous quotation */
55 LBP_IS = 13, /* infix separator (numeric) */
56 LBP_NU = 14, /* numeric */
57 LBP_PO = 15, /* postfix (numeric) */
58 LBP_PR = 16, /* prefix (numeric) */
59 LBP_SY = 17, /* symbols allowing breaks */
60 LBP_AI = 35, /* ambiguous (alphabetic or ideograph) */
61 LBP_AL = 18, /* ordinary alphabetic and symbol characters */
62 /*LBP_CJ, conditional Japanese starters, resolved to NS */
63 LBP_H2 = 19, /* Hangul LV syllable */
64 LBP_H3 = 20, /* Hangul LVT syllable */
65 LBP_HL = 25, /* Hebrew letter */
66 LBP_ID = 21, /* ideographic */
67 LBP_JL = 22, /* Hangul L Jamo */
68 LBP_JV = 23, /* Hangul V Jamo */
69 LBP_JT = 24, /* Hangul T Jamo */
70 LBP_RI = 26, /* regional indicator */
71 LBP_SA = 36, /* complex context (South East Asian) */
72 LBP_ZWJ = 27, /* zero width joiner */
73 LBP_EB = 28, /* emoji base */
74 LBP_EM = 29, /* emoji modifier */
75 LBP_XX = 37 /* unknown */
76 };
77
78 #include "lbrkprop1.h"
79
80 static inline unsigned char
81 unilbrkprop_lookup (ucs4_t uc)
/* ![[previous]](../icons/n_left.png)
![[next]](../icons/n_right.png)
![[first]](../icons/n_first.png)
![[last]](../icons/n_last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
82 {
83 unsigned int index1 = uc >> lbrkprop_header_0;
84 if (index1 < lbrkprop_header_1)
85 {
86 int lookup1 = unilbrkprop.level1[index1];
87 if (lookup1 >= 0)
88 {
89 unsigned int index2 = (uc >> lbrkprop_header_2) & lbrkprop_header_3;
90 int lookup2 = unilbrkprop.level2[lookup1 + index2];
91 if (lookup2 >= 0)
92 {
93 unsigned int index3 = uc & lbrkprop_header_4;
94 return unilbrkprop.level3[lookup2 + index3];
95 }
96 }
97 }
98 return LBP_XX;
99 }
100
101 /* Table indexed by two line breaking classifications. */
102 #define D 1 /* direct break opportunity, empty in table 7.3 of UTR #14 */
103 #define I 2 /* indirect break opportunity, '%' in table 7.3 of UTR #14 */
104 #define P 3 /* prohibited break, '^' in table 7.3 of UTR #14 */
105
106 extern const unsigned char unilbrk_table[30][30];
107
108 /* We don't support line breaking of complex-context dependent characters
109 (Thai, Lao, Myanmar, Khmer) yet, because it requires dictionary lookup. */