root/maint/gnulib/lib/unistr/u8-mbtouc-aux.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. u8_mbtouc_aux

   1 /* Conversion UTF-8 to UCS-4.
   2    Copyright (C) 2001-2002, 2006-2007, 2009-2021 Free Software Foundation, Inc.
   3    Written by Bruno Haible <bruno@clisp.org>, 2001.
   4 
   5    This file is free software: you can redistribute it and/or modify
   6    it under the terms of the GNU Lesser General Public License as
   7    published by the Free Software Foundation; either version 2.1 of the
   8    License, or (at your option) any later version.
   9 
  10    This file is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13    GNU Lesser General Public License for more details.
  14 
  15    You should have received a copy of the GNU Lesser General Public License
  16    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
  17 
  18 #include <config.h>
  19 
  20 /* Specification.  */
  21 #include "unistr.h"
  22 
  23 #if defined IN_LIBUNISTRING || HAVE_INLINE
  24 
  25 int
  26 u8_mbtouc_aux (ucs4_t *puc, const uint8_t *s, size_t n)
     /* [previous][next][first][last][top][bottom][index][help] */
  27 {
  28   uint8_t c = *s;
  29 
  30   if (c >= 0xc2)
  31     {
  32       if (c < 0xe0)
  33         {
  34           if (n >= 2)
  35             {
  36               if ((s[1] ^ 0x80) < 0x40)
  37                 {
  38                   *puc = ((unsigned int) (c & 0x1f) << 6)
  39                          | (unsigned int) (s[1] ^ 0x80);
  40                   return 2;
  41                 }
  42               /* invalid multibyte character */
  43             }
  44           else
  45             {
  46               /* incomplete multibyte character */
  47               *puc = 0xfffd;
  48               return 1;
  49             }
  50         }
  51       else if (c < 0xf0)
  52         {
  53           if (n >= 3)
  54             {
  55               if ((s[1] ^ 0x80) < 0x40)
  56                 {
  57                   if ((s[2] ^ 0x80) < 0x40)
  58                     {
  59                       if ((c >= 0xe1 || s[1] >= 0xa0)
  60                           && (c != 0xed || s[1] < 0xa0))
  61                         {
  62                           *puc = ((unsigned int) (c & 0x0f) << 12)
  63                                  | ((unsigned int) (s[1] ^ 0x80) << 6)
  64                                  | (unsigned int) (s[2] ^ 0x80);
  65                           return 3;
  66                         }
  67                       /* invalid multibyte character */
  68                       *puc = 0xfffd;
  69                       return 3;
  70                     }
  71                   /* invalid multibyte character */
  72                   *puc = 0xfffd;
  73                   return 2;
  74                 }
  75               /* invalid multibyte character */
  76             }
  77           else
  78             {
  79               /* incomplete multibyte character */
  80               *puc = 0xfffd;
  81               if (n == 1 || (s[1] ^ 0x80) >= 0x40)
  82                 return 1;
  83               else
  84                 return 2;
  85             }
  86         }
  87       else if (c < 0xf8)
  88         {
  89           if (n >= 4)
  90             {
  91               if ((s[1] ^ 0x80) < 0x40)
  92                 {
  93                   if ((s[2] ^ 0x80) < 0x40)
  94                     {
  95                       if ((s[3] ^ 0x80) < 0x40)
  96                         {
  97                           if ((c >= 0xf1 || s[1] >= 0x90)
  98                               && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90)))
  99                             {
 100                               *puc = ((unsigned int) (c & 0x07) << 18)
 101                                      | ((unsigned int) (s[1] ^ 0x80) << 12)
 102                                      | ((unsigned int) (s[2] ^ 0x80) << 6)
 103                                      | (unsigned int) (s[3] ^ 0x80);
 104                               return 4;
 105                             }
 106                           /* invalid multibyte character */
 107                           *puc = 0xfffd;
 108                           return 4;
 109                         }
 110                       /* invalid multibyte character */
 111                       *puc = 0xfffd;
 112                       return 3;
 113                     }
 114                   /* invalid multibyte character */
 115                   *puc = 0xfffd;
 116                   return 2;
 117                 }
 118               /* invalid multibyte character */
 119             }
 120           else
 121             {
 122               /* incomplete multibyte character */
 123               *puc = 0xfffd;
 124               if (n == 1 || (s[1] ^ 0x80) >= 0x40)
 125                 return 1;
 126               else if (n == 2 || (s[2] ^ 0x80) >= 0x40)
 127                 return 2;
 128               else
 129                 return 3;
 130             }
 131         }
 132     }
 133   /* invalid multibyte character */
 134   *puc = 0xfffd;
 135   return 1;
 136 }
 137 
 138 #endif

/* [previous][next][first][last][top][bottom][index][help] */