root/maint/gnulib/lib/unicase/ignorable.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. uc_is_case_ignorable
  2. uc_is_case_ignorable

   1 /* Test whether a Unicode character is case-ignorable.
   2    Copyright (C) 2002, 2006-2007, 2009-2021 Free Software Foundation, Inc.
   3    Written by Bruno Haible <bruno@clisp.org>, 2009.
   4 
   5    This file is free software.
   6    It is dual-licensed under "the GNU LGPLv3+ or the GNU GPLv2+".
   7    You can redistribute it and/or modify it under either
   8      - the terms of the GNU Lesser General Public License as published
   9        by the Free Software Foundation; either version 3, or (at your
  10        option) any later version, or
  11      - the terms of the GNU General Public License as published by the
  12        Free Software Foundation; either version 2, or (at your option)
  13        any later version, or
  14      - the same dual license "the GNU LGPLv3+ or the GNU GPLv2+".
  15 
  16    This file is distributed in the hope that it will be useful,
  17    but WITHOUT ANY WARRANTY; without even the implied warranty of
  18    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19    Lesser General Public License and the GNU General Public License
  20    for more details.
  21 
  22    You should have received a copy of the GNU Lesser General Public
  23    License and of the GNU General Public License along with this
  24    program.  If not, see <https://www.gnu.org/licenses/>.  */
  25 
  26 #include <config.h>
  27 
  28 /* Specification.  */
  29 #include "caseprop.h"
  30 
  31 /* Quoting the Unicode standard:
  32      Definition: A character is defined to be "case-ignorable" if it has the
  33      value MidLetter {or the value MidNumLet} for the Word_Break property or
  34      its General_Category is one of Nonspacing_Mark (Mn), Enclosing_Mark (Me),
  35      Format (Cf), Modifier_Letter (Lm), or Modifier_Symbol (Sk).
  36    The text marked in braces was added in Unicode 5.1.0, see
  37    <https://www.unicode.org/versions/Unicode5.1.0/> section "Update of
  38    Definition of case-ignorable".   */
  39 /* Since this predicate is only used for the "Before C" and "After C"
  40    conditions of FINAL_SIGMA, we exclude the "cased" characters here.
  41    This simplifies the evaluation of the regular expressions
  42      \p{cased} (\p{case-ignorable})* C
  43    and
  44      C (\p{case-ignorable})* \p{cased}
  45  */
  46 
  47 #if 0
  48 
  49 #include "unictype.h"
  50 #include "uniwbrk.h"
  51 
  52 bool
  53 uc_is_case_ignorable (ucs4_t uc)
     /* [previous][next][first][last][top][bottom][index][help] */
  54 {
  55   int wbp = uc_wordbreak_property (uc);
  56 
  57   return (wbp == WBP_MIDLETTER || wbp == WBP_MIDNUMLET
  58           || uc_is_general_category_withtable (uc, UC_CATEGORY_MASK_Mn
  59                                                    | UC_CATEGORY_MASK_Me
  60                                                    | UC_CATEGORY_MASK_Cf
  61                                                    | UC_CATEGORY_MASK_Lm
  62                                                    | UC_CATEGORY_MASK_Sk))
  63          && !uc_is_cased (uc);
  64 }
  65 
  66 #else
  67 
  68 #include "unictype/bitmap.h"
  69 
  70 /* Define u_casing_property_case_ignorable table.  */
  71 #include "ignorable.h"
  72 
  73 bool
  74 uc_is_case_ignorable (ucs4_t uc)
     /* [previous][next][first][last][top][bottom][index][help] */
  75 {
  76   return bitmap_lookup (&u_casing_property_case_ignorable, uc);
  77 }
  78 
  79 #endif

/* [previous][next][first][last][top][bottom][index][help] */