1 /* Test whether a Unicode character is case-ignorable. 2 Copyright (C) 2002, 2006-2007, 2009-2021 Free Software Foundation, Inc. 3 Written by Bruno Haible <bruno@clisp.org>, 2009. 4 5 This file is free software. 6 It is dual-licensed under "the GNU LGPLv3+ or the GNU GPLv2+". 7 You can redistribute it and/or modify it under either 8 - the terms of the GNU Lesser General Public License as published 9 by the Free Software Foundation; either version 3, or (at your 10 option) any later version, or 11 - the terms of the GNU General Public License as published by the 12 Free Software Foundation; either version 2, or (at your option) 13 any later version, or 14 - the same dual license "the GNU LGPLv3+ or the GNU GPLv2+". 15 16 This file is distributed in the hope that it will be useful, 17 but WITHOUT ANY WARRANTY; without even the implied warranty of 18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19 Lesser General Public License and the GNU General Public License 20 for more details. 21 22 You should have received a copy of the GNU Lesser General Public 23 License and of the GNU General Public License along with this 24 program. If not, see <https://www.gnu.org/licenses/>. */ 25 26 #include <config.h> 27 28 /* Specification. */ 29 #include "caseprop.h" 30 31 /* Quoting the Unicode standard: 32 Definition: A character is defined to be "case-ignorable" if it has the 33 value MidLetter {or the value MidNumLet} for the Word_Break property or 34 its General_Category is one of Nonspacing_Mark (Mn), Enclosing_Mark (Me), 35 Format (Cf), Modifier_Letter (Lm), or Modifier_Symbol (Sk). 36 The text marked in braces was added in Unicode 5.1.0, see 37 <https://www.unicode.org/versions/Unicode5.1.0/> section "Update of 38 Definition of case-ignorable". */ 39 /* Since this predicate is only used for the "Before C" and "After C" 40 conditions of FINAL_SIGMA, we exclude the "cased" characters here. 41 This simplifies the evaluation of the regular expressions 42 \p{cased} (\p{case-ignorable})* C 43 and 44 C (\p{case-ignorable})* \p{cased} 45 */ 46 47 #if 0 48 49 #include "unictype.h" 50 #include "uniwbrk.h" 51 52 bool 53 uc_is_case_ignorable (ucs4_t uc) /* */ 54 { 55 int wbp = uc_wordbreak_property (uc); 56 57 return (wbp == WBP_MIDLETTER || wbp == WBP_MIDNUMLET 58 || uc_is_general_category_withtable (uc, UC_CATEGORY_MASK_Mn 59 | UC_CATEGORY_MASK_Me 60 | UC_CATEGORY_MASK_Cf 61 | UC_CATEGORY_MASK_Lm 62 | UC_CATEGORY_MASK_Sk)) 63 && !uc_is_cased (uc); 64 } 65 66 #else 67 68 #include "unictype/bitmap.h" 69 70 /* Define u_casing_property_case_ignorable table. */ 71 #include "ignorable.h" 72 73 bool 74 uc_is_case_ignorable (ucs4_t uc) /* */ 75 { 76 return bitmap_lookup (&u_casing_property_case_ignorable, uc); 77 } 78 79 #endif