root/maint/gnulib/lib/unilbrk/u32-possible-linebreaks.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. u32_possible_linebreaks

   1 /* Line breaking of UTF-32 strings.
   2    Copyright (C) 2001-2003, 2006-2021 Free Software Foundation, Inc.
   3    Written by Bruno Haible <bruno@clisp.org>, 2001.
   4 
   5    This file is free software.
   6    It is dual-licensed under "the GNU LGPLv3+ or the GNU GPLv2+".
   7    You can redistribute it and/or modify it under either
   8      - the terms of the GNU Lesser General Public License as published
   9        by the Free Software Foundation; either version 3, or (at your
  10        option) any later version, or
  11      - the terms of the GNU General Public License as published by the
  12        Free Software Foundation; either version 2, or (at your option)
  13        any later version, or
  14      - the same dual license "the GNU LGPLv3+ or the GNU GPLv2+".
  15 
  16    This file is distributed in the hope that it will be useful,
  17    but WITHOUT ANY WARRANTY; without even the implied warranty of
  18    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19    Lesser General Public License and the GNU General Public License
  20    for more details.
  21 
  22    You should have received a copy of the GNU Lesser General Public
  23    License and of the GNU General Public License along with this
  24    program.  If not, see <https://www.gnu.org/licenses/>.  */
  25 
  26 #include <config.h>
  27 
  28 /* Specification.  */
  29 #include "unilbrk.h"
  30 
  31 #include <stdlib.h>
  32 
  33 #include "unilbrk/lbrktables.h"
  34 #include "uniwidth/cjk.h"
  35 
  36 void
  37 u32_possible_linebreaks (const uint32_t *s, size_t n, const char *encoding, char *p)
     /* [previous][next][first][last][top][bottom][index][help] */
  38 {
  39   if (n > 0)
  40     {
  41       int LBP_AI_REPLACEMENT = (is_cjk_encoding (encoding) ? LBP_ID : LBP_AL);
  42       const uint32_t *s_end = s + n;
  43       int last_prop = LBP_BK; /* line break property of last non-space character */
  44       char *seen_space = NULL; /* Was a space seen after the last non-space character? */
  45       char *seen_space2 = NULL; /* At least two spaces after the last non-space? */
  46 
  47       do
  48         {
  49           ucs4_t uc = *s;
  50           int prop = unilbrkprop_lookup (uc);
  51 
  52           if (prop == LBP_BK)
  53             {
  54               /* Mandatory break.  */
  55               *p = UC_BREAK_MANDATORY;
  56               last_prop = LBP_BK;
  57               seen_space = NULL;
  58               seen_space2 = NULL;
  59             }
  60           else
  61             {
  62               char *q;
  63 
  64               /* Resolve property values whose behaviour is not fixed.  */
  65               switch (prop)
  66                 {
  67                 case LBP_AI:
  68                   /* Resolve ambiguous.  */
  69                   prop = LBP_AI_REPLACEMENT;
  70                   break;
  71                 case LBP_CB:
  72                   /* This is arbitrary.  */
  73                   prop = LBP_ID;
  74                   break;
  75                 case LBP_SA:
  76                   /* We don't handle complex scripts yet.
  77                      Treat LBP_SA like LBP_XX.  */
  78                 case LBP_XX:
  79                   /* This is arbitrary.  */
  80                   prop = LBP_AL;
  81                   break;
  82                 }
  83 
  84               /* Deal with spaces and combining characters.  */
  85               q = p;
  86               if (prop == LBP_SP)
  87                 {
  88                   /* Don't break just before a space.  */
  89                   *p = UC_BREAK_PROHIBITED;
  90                   seen_space2 = seen_space;
  91                   seen_space = p;
  92                 }
  93               else if (prop == LBP_ZW)
  94                 {
  95                   /* Don't break just before a zero-width space.  */
  96                   *p = UC_BREAK_PROHIBITED;
  97                   last_prop = LBP_ZW;
  98                   seen_space = NULL;
  99                   seen_space2 = NULL;
 100                 }
 101               else if (prop == LBP_CM)
 102                 {
 103                   /* Don't break just before a combining character, except immediately
 104                      after a zero-width space.  */
 105                   if (last_prop == LBP_ZW)
 106                     {
 107                       /* Break after zero-width space.  */
 108                       *p = UC_BREAK_POSSIBLE;
 109                       /* A combining character turns a preceding space into LBP_ID.  */
 110                       last_prop = LBP_ID;
 111                     }
 112                   else
 113                     {
 114                       *p = UC_BREAK_PROHIBITED;
 115                       /* A combining character turns a preceding space into LBP_ID.  */
 116                       if (seen_space != NULL)
 117                         {
 118                           q = seen_space;
 119                           seen_space = seen_space2;
 120                           prop = LBP_ID;
 121                           goto lookup_via_table;
 122                         }
 123                     }
 124                 }
 125               else
 126                 {
 127                  lookup_via_table:
 128                   /* prop must be usable as an index for table 7.3 of UTR #14.  */
 129                   if (!(prop >= 0 && prop < sizeof (unilbrk_table) / sizeof (unilbrk_table[0])))
 130                     abort ();
 131 
 132                   if (last_prop == LBP_BK)
 133                     {
 134                       /* Don't break at the beginning of a line.  */
 135                       *q = UC_BREAK_PROHIBITED;
 136                     }
 137                   else if (last_prop == LBP_ZW)
 138                     {
 139                       /* Break after zero-width space.  */
 140                       *q = UC_BREAK_POSSIBLE;
 141                     }
 142                   else
 143                     {
 144                       switch (unilbrk_table [last_prop] [prop])
 145                         {
 146                         case D:
 147                           *q = UC_BREAK_POSSIBLE;
 148                           break;
 149                         case I:
 150                           *q = (seen_space != NULL ? UC_BREAK_POSSIBLE : UC_BREAK_PROHIBITED);
 151                           break;
 152                         case P:
 153                           *q = UC_BREAK_PROHIBITED;
 154                           break;
 155                         default:
 156                           abort ();
 157                         }
 158                     }
 159                   last_prop = prop;
 160                   seen_space = NULL;
 161                   seen_space2 = NULL;
 162                 }
 163             }
 164 
 165           s++;
 166           p++;
 167         }
 168       while (s < s_end);
 169     }
 170 }

/* [previous][next][first][last][top][bottom][index][help] */