root/maint/gnulib/lib/unistr/u8-mbtouc.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. u8_mbtouc

   1 /* Look at first character in UTF-8 string.
   2    Copyright (C) 1999-2002, 2006-2007, 2009-2021 Free Software Foundation, Inc.
   3    Written by Bruno Haible <bruno@clisp.org>, 2001.
   4 
   5    This file is free software: you can redistribute it and/or modify
   6    it under the terms of the GNU Lesser General Public License as
   7    published by the Free Software Foundation; either version 2.1 of the
   8    License, or (at your option) any later version.
   9 
  10    This file is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13    GNU Lesser General Public License for more details.
  14 
  15    You should have received a copy of the GNU Lesser General Public License
  16    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
  17 
  18 #include <config.h>
  19 
  20 #if defined IN_LIBUNISTRING
  21 /* Tell unistr.h to declare u8_mbtouc as 'extern', not 'static inline'.  */
  22 # include "unistring-notinline.h"
  23 #endif
  24 
  25 /* Specification.  */
  26 #include "unistr.h"
  27 
  28 #if !HAVE_INLINE
  29 
  30 int
  31 u8_mbtouc (ucs4_t *puc, const uint8_t *s, size_t n)
     /* [previous][next][first][last][top][bottom][index][help] */
  32 {
  33   uint8_t c = *s;
  34 
  35   if (c < 0x80)
  36     {
  37       *puc = c;
  38       return 1;
  39     }
  40   else if (c >= 0xc2)
  41     {
  42       if (c < 0xe0)
  43         {
  44           if (n >= 2)
  45             {
  46               if ((s[1] ^ 0x80) < 0x40)
  47                 {
  48                   *puc = ((unsigned int) (c & 0x1f) << 6)
  49                          | (unsigned int) (s[1] ^ 0x80);
  50                   return 2;
  51                 }
  52               /* invalid multibyte character */
  53             }
  54           else
  55             {
  56               /* incomplete multibyte character */
  57               *puc = 0xfffd;
  58               return 1;
  59             }
  60         }
  61       else if (c < 0xf0)
  62         {
  63           if (n >= 3)
  64             {
  65               if ((s[1] ^ 0x80) < 0x40)
  66                 {
  67                   if ((s[2] ^ 0x80) < 0x40)
  68                     {
  69                       if ((c >= 0xe1 || s[1] >= 0xa0)
  70                           && (c != 0xed || s[1] < 0xa0))
  71                         {
  72                           *puc = ((unsigned int) (c & 0x0f) << 12)
  73                                  | ((unsigned int) (s[1] ^ 0x80) << 6)
  74                                  | (unsigned int) (s[2] ^ 0x80);
  75                           return 3;
  76                         }
  77                       /* invalid multibyte character */
  78                       *puc = 0xfffd;
  79                       return 3;
  80                     }
  81                   /* invalid multibyte character */
  82                   *puc = 0xfffd;
  83                   return 2;
  84                 }
  85               /* invalid multibyte character */
  86             }
  87           else
  88             {
  89               /* incomplete multibyte character */
  90               *puc = 0xfffd;
  91               if (n == 1 || (s[1] ^ 0x80) >= 0x40)
  92                 return 1;
  93               else
  94                 return 2;
  95             }
  96         }
  97       else if (c < 0xf8)
  98         {
  99           if (n >= 4)
 100             {
 101               if ((s[1] ^ 0x80) < 0x40)
 102                 {
 103                   if ((s[2] ^ 0x80) < 0x40)
 104                     {
 105                       if ((s[3] ^ 0x80) < 0x40)
 106                         {
 107                           if ((c >= 0xf1 || s[1] >= 0x90)
 108                               && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90)))
 109                             {
 110                               *puc = ((unsigned int) (c & 0x07) << 18)
 111                                      | ((unsigned int) (s[1] ^ 0x80) << 12)
 112                                      | ((unsigned int) (s[2] ^ 0x80) << 6)
 113                                      | (unsigned int) (s[3] ^ 0x80);
 114                               return 4;
 115                             }
 116                           /* invalid multibyte character */
 117                           *puc = 0xfffd;
 118                           return 4;
 119                         }
 120                       /* invalid multibyte character */
 121                       *puc = 0xfffd;
 122                       return 3;
 123                     }
 124                   /* invalid multibyte character */
 125                   *puc = 0xfffd;
 126                   return 2;
 127                 }
 128               /* invalid multibyte character */
 129             }
 130           else
 131             {
 132               /* incomplete multibyte character */
 133               *puc = 0xfffd;
 134               if (n == 1 || (s[1] ^ 0x80) >= 0x40)
 135                 return 1;
 136               else if (n == 2 || (s[2] ^ 0x80) >= 0x40)
 137                 return 2;
 138               else
 139                 return 3;
 140             }
 141         }
 142     }
 143   /* invalid multibyte character */
 144   *puc = 0xfffd;
 145   return 1;
 146 }
 147 
 148 #endif

/* [previous][next][first][last][top][bottom][index][help] */