root/maint/gnulib/lib/unigbrk/u-grapheme-breaks.h

/* [previous][next][first][last][top][bottom][index][help] */

INCLUDED FROM


DEFINITIONS

This source file includes following definitions.
  1. FUNC

   1 /* Grapheme cluster break function.
   2    Copyright (C) 2010-2021 Free Software Foundation, Inc.
   3    Written by Ben Pfaff <blp@cs.stanford.edu>, 2010.
   4 
   5    This file is free software.
   6    It is dual-licensed under "the GNU LGPLv3+ or the GNU GPLv2+".
   7    You can redistribute it and/or modify it under either
   8      - the terms of the GNU Lesser General Public License as published
   9        by the Free Software Foundation; either version 3, or (at your
  10        option) any later version, or
  11      - the terms of the GNU General Public License as published by the
  12        Free Software Foundation; either version 2, or (at your option)
  13        any later version, or
  14      - the same dual license "the GNU LGPLv3+ or the GNU GPLv2+".
  15 
  16    This file is distributed in the hope that it will be useful,
  17    but WITHOUT ANY WARRANTY; without even the implied warranty of
  18    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19    Lesser General Public License and the GNU General Public License
  20    for more details.
  21 
  22    You should have received a copy of the GNU Lesser General Public
  23    License and of the GNU General Public License along with this
  24    program.  If not, see <https://www.gnu.org/licenses/>.  */
  25 
  26 void
  27 FUNC (const UNIT *s, size_t n, char *p)
     /* [previous][next][first][last][top][bottom][index][help] */
  28 {
  29   if (n > 0)
  30     {
  31       const UNIT *s_end = s + n;
  32 
  33       /* Grapheme Cluster break property of the last character.
  34          -1 at the very beginning of the string.  */
  35       int last_char_prop = -1;
  36 
  37       /* Grapheme Cluster break property of the last complex character.
  38          -1 at the very beginning of the string.  */
  39       int last_compchar_prop = -1;
  40 
  41       size_t ri_count = 0;
  42 
  43       /* Don't break inside multibyte characters.  */
  44       memset (p, 0, n);
  45 
  46       while (s < s_end)
  47         {
  48           ucs4_t uc;
  49           int count = U_MBTOUC (&uc, s, s_end - s);
  50           int prop = uc_graphemeclusterbreak_property (uc);
  51 
  52           /* Break at the start of the string (GB1).  */
  53           if (last_char_prop < 0)
  54             *p = 1;
  55           else
  56             {
  57               /* No break between CR and LF (GB3).  */
  58               if (last_char_prop == GBP_CR && prop == GBP_LF)
  59                 /* *p = 0 */;
  60               /* Break before and after newlines (GB4, GB5).  */
  61               else if ((last_char_prop == GBP_CR
  62                         || last_char_prop == GBP_LF
  63                         || last_char_prop == GBP_CONTROL)
  64                        || (prop == GBP_CR
  65                            || prop == GBP_LF
  66                            || prop == GBP_CONTROL))
  67                 *p = 1;
  68               /* No break between Hangul syllable sequences (GB6, GB7, GB8).  */
  69               else if ((last_char_prop == GBP_L
  70                         && (prop == GBP_L
  71                             || prop == GBP_V
  72                             || prop == GBP_LV
  73                             || prop == GBP_LVT))
  74                        || ((last_char_prop == GBP_LV
  75                             || last_char_prop == GBP_V)
  76                            && (prop == GBP_V
  77                                || prop == GBP_T))
  78                        || ((last_char_prop == GBP_LVT
  79                             || last_char_prop == GBP_T)
  80                            && prop == GBP_T))
  81                 /* *p = 0 */;
  82               /* No break before extending characters or ZWJ (GB9).  */
  83               else if (prop == GBP_EXTEND || prop == GBP_ZWJ)
  84                 /* *p = 0 */;
  85               /* No break before SpacingMarks (GB9a).  */
  86               else if (prop == GBP_SPACINGMARK)
  87                 /* *p = 0 */;
  88               /* No break after Prepend characters (GB9b).  */
  89               else if (last_char_prop == GBP_PREPEND)
  90                 /* *p = 0 */;
  91               /* No break within emoji modifier sequences (GB10).  */
  92               else if ((last_compchar_prop == GBP_EB
  93                         || last_compchar_prop == GBP_EBG)
  94                        && prop == GBP_EM)
  95                 /* *p = 0 */;
  96               /* No break within emoji zwj sequences (GB11).  */
  97               else if (last_char_prop == GBP_ZWJ
  98                        && (prop == GBP_GAZ
  99                            || prop == GBP_EBG))
 100                 /* *p = 0 */;
 101               /* No break between RI if there is an odd number of RI
 102                  characters before (GB12, GB13).  */
 103               else if (prop == GBP_RI)
 104                 {
 105                   if (ri_count % 2 == 0)
 106                     *p = 1;
 107                   /* else *p = 0; */
 108                 }
 109               /* Break everywhere (GBP999).  */
 110               else
 111                 *p = 1;
 112             }
 113 
 114           last_char_prop = prop;
 115 
 116           if (!(prop == GBP_EXTEND
 117                 && (last_compchar_prop == GBP_EB
 118                     || last_compchar_prop == GBP_EBG)))
 119             last_compchar_prop = prop;
 120 
 121           if (prop == GBP_RI)
 122             ri_count++;
 123           else
 124             ri_count = 0;
 125 
 126           s += count;
 127           p += count;
 128         }
 129     }
 130 }

/* [previous][next][first][last][top][bottom][index][help] */