root/maint/gnulib/lib/unicase/context.h

/* [previous][next][first][last][top][bottom][index][help] */

INCLUDED FROM


   1 /* Case-mapping contexts of UTF-8/UTF-16/UTF-32 substring.
   2    Copyright (C) 2009-2021 Free Software Foundation, Inc.
   3    Written by Bruno Haible <bruno@clisp.org>, 2009.
   4 
   5    This file is free software.
   6    It is dual-licensed under "the GNU LGPLv3+ or the GNU GPLv2+".
   7    You can redistribute it and/or modify it under either
   8      - the terms of the GNU Lesser General Public License as published
   9        by the Free Software Foundation; either version 3, or (at your
  10        option) any later version, or
  11      - the terms of the GNU General Public License as published by the
  12        Free Software Foundation; either version 2, or (at your option)
  13        any later version, or
  14      - the same dual license "the GNU LGPLv3+ or the GNU GPLv2+".
  15 
  16    This file is distributed in the hope that it will be useful,
  17    but WITHOUT ANY WARRANTY; without even the implied warranty of
  18    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19    Lesser General Public License and the GNU General Public License
  20    for more details.
  21 
  22    You should have received a copy of the GNU Lesser General Public
  23    License and of the GNU General Public License along with this
  24    program.  If not, see <https://www.gnu.org/licenses/>.  */
  25 
  26 
  27 /* The context of a prefix string combines the information of the "Before C"
  28    conditions of the Unicode Standard,
  29    <https://www.unicode.org/versions/Unicode5.0.0/ch03.pdf>, section 3.13,
  30    table 3-14 "Context Specification for Casing".
  31 
  32    casing_prefix_context_t contains the following fields:
  33 
  34      // Helper for evaluating the FINAL_SIGMA condition:
  35      //  Last character that was not case-ignorable.
  36      ucs4_t last_char_except_ignorable;
  37 
  38      // Helper for evaluating the AFTER_SOFT_DOTTED and AFTER_I conditions:
  39      // Last character that was of combining class 230 ("Above") or 0.
  40      ucs4_t last_char_normal_or_above;
  41 
  42    Three bits would be sufficient to carry the context information, but
  43    that would require to invoke uc_is_cased and uc_is_property_soft_dotted
  44    ahead of time, more often than actually needed.  */
  45 
  46 
  47 /* The context of a suffix string combines the information of the "After C"
  48    conditions of the Unicode Standard,
  49    <https://www.unicode.org/versions/Unicode5.0.0/ch03.pdf>, section 3.13,
  50    table 3-14 "Context Specification for Casing".
  51 
  52    casing_suffix_context_t contains the following fields:
  53 
  54      // For evaluating the FINAL_SIGMA condition:
  55      //  First character that was not case-ignorable.
  56      ucs4_t first_char_except_ignorable;
  57 
  58      // For evaluating the MORE_ABOVE condition:
  59      // Bit 0 is set if the suffix contains a character of combining class
  60      // 230 (Above) with no character of combining class 0 or 230 (Above)
  61      // before it.
  62      //
  63      // For evaluating the BEFORE_DOT condition:
  64      // Bit 1 is set if the suffix contains a COMBINING DOT ABOVE (U+0307)
  65      // with no character of combining class 0 or 230 (Above) before it.
  66      //
  67      uint32_t bits;
  68 
  69    Three bits would be sufficient to carry the context information, but
  70    that would require to invoke uc_is_cased ahead of time, more often than
  71    actually needed.  */
  72 #define SCC_MORE_ABOVE_MASK  1
  73 #define SCC_BEFORE_DOT_MASK  2

/* [previous][next][first][last][top][bottom][index][help] */