root/maint/gnulib/lib/unistr/u8-mbtouc-unsafe.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. u8_mbtouc_unsafe

   1 /* Look at first character in UTF-8 string.
   2    Copyright (C) 1999-2002, 2006-2007, 2009-2021 Free Software Foundation, Inc.
   3    Written by Bruno Haible <bruno@clisp.org>, 2001.
   4 
   5    This file is free software: you can redistribute it and/or modify
   6    it under the terms of the GNU Lesser General Public License as
   7    published by the Free Software Foundation; either version 2.1 of the
   8    License, or (at your option) any later version.
   9 
  10    This file is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13    GNU Lesser General Public License for more details.
  14 
  15    You should have received a copy of the GNU Lesser General Public License
  16    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
  17 
  18 #include <config.h>
  19 
  20 #if defined IN_LIBUNISTRING
  21 /* Tell unistr.h to declare u8_mbtouc_unsafe as 'extern', not
  22    'static inline'.  */
  23 # include "unistring-notinline.h"
  24 #endif
  25 
  26 /* Specification.  */
  27 #include "unistr.h"
  28 
  29 #if !HAVE_INLINE
  30 
  31 int
  32 u8_mbtouc_unsafe (ucs4_t *puc, const uint8_t *s, size_t n)
     /* [previous][next][first][last][top][bottom][index][help] */
  33 {
  34   uint8_t c = *s;
  35 
  36   if (c < 0x80)
  37     {
  38       *puc = c;
  39       return 1;
  40     }
  41   else if (c >= 0xc2)
  42     {
  43       if (c < 0xe0)
  44         {
  45           if (n >= 2)
  46             {
  47               if ((s[1] ^ 0x80) < 0x40)
  48                 {
  49                   *puc = ((unsigned int) (c & 0x1f) << 6)
  50                          | (unsigned int) (s[1] ^ 0x80);
  51                   return 2;
  52                 }
  53               /* invalid multibyte character */
  54             }
  55           else
  56             {
  57               /* incomplete multibyte character */
  58               *puc = 0xfffd;
  59               return 1;
  60             }
  61         }
  62       else if (c < 0xf0)
  63         {
  64           if (n >= 3)
  65             {
  66               if ((s[1] ^ 0x80) < 0x40)
  67                 {
  68                   if ((s[2] ^ 0x80) < 0x40)
  69                     {
  70                       if ((c >= 0xe1 || s[1] >= 0xa0)
  71                           && (c != 0xed || s[1] < 0xa0))
  72                         {
  73                           *puc = ((unsigned int) (c & 0x0f) << 12)
  74                                  | ((unsigned int) (s[1] ^ 0x80) << 6)
  75                                  | (unsigned int) (s[2] ^ 0x80);
  76                           return 3;
  77                         }
  78                       /* invalid multibyte character */
  79                       *puc = 0xfffd;
  80                       return 3;
  81                     }
  82                   /* invalid multibyte character */
  83                   *puc = 0xfffd;
  84                   return 2;
  85                 }
  86               /* invalid multibyte character */
  87             }
  88           else
  89             {
  90               /* incomplete multibyte character */
  91               *puc = 0xfffd;
  92               if (n == 1 || (s[1] ^ 0x80) >= 0x40)
  93                 return 1;
  94               else
  95                 return 2;
  96             }
  97         }
  98       else if (c < 0xf8)
  99         {
 100           if (n >= 4)
 101             {
 102               if ((s[1] ^ 0x80) < 0x40)
 103                 {
 104                   if ((s[2] ^ 0x80) < 0x40)
 105                     {
 106                       if ((s[3] ^ 0x80) < 0x40)
 107                         {
 108                           if ((c >= 0xf1 || s[1] >= 0x90)
 109                               && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90))
 110                              )
 111                             {
 112                               *puc = ((unsigned int) (c & 0x07) << 18)
 113                                      | ((unsigned int) (s[1] ^ 0x80) << 12)
 114                                      | ((unsigned int) (s[2] ^ 0x80) << 6)
 115                                      | (unsigned int) (s[3] ^ 0x80);
 116                               return 4;
 117                             }
 118                           /* invalid multibyte character */
 119                           *puc = 0xfffd;
 120                           return 4;
 121                         }
 122                       /* invalid multibyte character */
 123                       *puc = 0xfffd;
 124                       return 3;
 125                     }
 126                   /* invalid multibyte character */
 127                   *puc = 0xfffd;
 128                   return 2;
 129                 }
 130               /* invalid multibyte character */
 131             }
 132           else
 133             {
 134               /* incomplete multibyte character */
 135               *puc = 0xfffd;
 136               if (n == 1 || (s[1] ^ 0x80) >= 0x40)
 137                 return 1;
 138               else if (n == 2 || (s[2] ^ 0x80) >= 0x40)
 139                 return 2;
 140               else
 141                 return 3;
 142             }
 143         }
 144     }
 145   /* invalid multibyte character */
 146   *puc = 0xfffd;
 147   return 1;
 148 }
 149 
 150 #endif

/* [previous][next][first][last][top][bottom][index][help] */