1 /* Test whether a Unicode character is case-ignorable.
2 Copyright (C) 2002, 2006-2007, 2009-2021 Free Software Foundation, Inc.
3 Written by Bruno Haible <bruno@clisp.org>, 2009.
4
5 This file is free software.
6 It is dual-licensed under "the GNU LGPLv3+ or the GNU GPLv2+".
7 You can redistribute it and/or modify it under either
8 - the terms of the GNU Lesser General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version, or
11 - the terms of the GNU General Public License as published by the
12 Free Software Foundation; either version 2, or (at your option)
13 any later version, or
14 - the same dual license "the GNU LGPLv3+ or the GNU GPLv2+".
15
16 This file is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 Lesser General Public License and the GNU General Public License
20 for more details.
21
22 You should have received a copy of the GNU Lesser General Public
23 License and of the GNU General Public License along with this
24 program. If not, see <https://www.gnu.org/licenses/>. */
25
26 #include <config.h>
27
28 /* Specification. */
29 #include "caseprop.h"
30
31 /* Quoting the Unicode standard:
32 Definition: A character is defined to be "case-ignorable" if it has the
33 value MidLetter {or the value MidNumLet} for the Word_Break property or
34 its General_Category is one of Nonspacing_Mark (Mn), Enclosing_Mark (Me),
35 Format (Cf), Modifier_Letter (Lm), or Modifier_Symbol (Sk).
36 The text marked in braces was added in Unicode 5.1.0, see
37 <https://www.unicode.org/versions/Unicode5.1.0/> section "Update of
38 Definition of case-ignorable". */
39 /* Since this predicate is only used for the "Before C" and "After C"
40 conditions of FINAL_SIGMA, we exclude the "cased" characters here.
41 This simplifies the evaluation of the regular expressions
42 \p{cased} (\p{case-ignorable})* C
43 and
44 C (\p{case-ignorable})* \p{cased}
45 */
46
47 #if 0
48
49 #include "unictype.h"
50 #include "uniwbrk.h"
51
52 bool
53 uc_is_case_ignorable (ucs4_t uc)
/* ![[previous]](../icons/n_left.png)
![[next]](../icons/right.png)
![[first]](../icons/n_first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
54 {
55 int wbp = uc_wordbreak_property (uc);
56
57 return (wbp == WBP_MIDLETTER || wbp == WBP_MIDNUMLET
58 || uc_is_general_category_withtable (uc, UC_CATEGORY_MASK_Mn
59 | UC_CATEGORY_MASK_Me
60 | UC_CATEGORY_MASK_Cf
61 | UC_CATEGORY_MASK_Lm
62 | UC_CATEGORY_MASK_Sk))
63 && !uc_is_cased (uc);
64 }
65
66 #else
67
68 #include "unictype/bitmap.h"
69
70 /* Define u_casing_property_case_ignorable table. */
71 #include "ignorable.h"
72
73 bool
74 uc_is_case_ignorable (ucs4_t uc)
/* ![[previous]](../icons/left.png)
![[next]](../icons/n_right.png)
![[first]](../icons/first.png)
![[last]](../icons/n_last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
75 {
76 return bitmap_lookup (&u_casing_property_case_ignorable, uc);
77 }
78
79 #endif