root/maint/gnulib/lib/unistr/u8-mbtoucr.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. u8_mbtoucr

   1 /* Look at first character in UTF-8 string, returning an error code.
   2    Copyright (C) 1999-2002, 2006-2007, 2009-2021 Free Software Foundation, Inc.
   3    Written by Bruno Haible <bruno@clisp.org>, 2001.
   4 
   5    This file is free software: you can redistribute it and/or modify
   6    it under the terms of the GNU Lesser General Public License as
   7    published by the Free Software Foundation; either version 2.1 of the
   8    License, or (at your option) any later version.
   9 
  10    This file is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13    GNU Lesser General Public License for more details.
  14 
  15    You should have received a copy of the GNU Lesser General Public License
  16    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
  17 
  18 #include <config.h>
  19 
  20 /* Specification.  */
  21 #include "unistr.h"
  22 
  23 int
  24 u8_mbtoucr (ucs4_t *puc, const uint8_t *s, size_t n)
     /* [previous][next][first][last][top][bottom][index][help] */
  25 {
  26   uint8_t c = *s;
  27 
  28   if (c < 0x80)
  29     {
  30       *puc = c;
  31       return 1;
  32     }
  33   else if (c >= 0xc2)
  34     {
  35       if (c < 0xe0)
  36         {
  37           if (n >= 2)
  38             {
  39               if ((s[1] ^ 0x80) < 0x40)
  40                 {
  41                   *puc = ((unsigned int) (c & 0x1f) << 6)
  42                          | (unsigned int) (s[1] ^ 0x80);
  43                   return 2;
  44                 }
  45               /* invalid multibyte character */
  46             }
  47           else
  48             {
  49               /* incomplete multibyte character */
  50               *puc = 0xfffd;
  51               return -2;
  52             }
  53         }
  54       else if (c < 0xf0)
  55         {
  56           if (n >= 2)
  57             {
  58               if ((s[1] ^ 0x80) < 0x40
  59                   && (c >= 0xe1 || s[1] >= 0xa0)
  60                   && (c != 0xed || s[1] < 0xa0))
  61                 {
  62                   if (n >= 3)
  63                     {
  64                       if ((s[2] ^ 0x80) < 0x40)
  65                         {
  66                           *puc = ((unsigned int) (c & 0x0f) << 12)
  67                                  | ((unsigned int) (s[1] ^ 0x80) << 6)
  68                                  | (unsigned int) (s[2] ^ 0x80);
  69                           return 3;
  70                         }
  71                       /* invalid multibyte character */
  72                     }
  73                   else
  74                     {
  75                       /* incomplete multibyte character */
  76                       *puc = 0xfffd;
  77                       return -2;
  78                     }
  79                 }
  80               /* invalid multibyte character */
  81             }
  82           else
  83             {
  84               /* incomplete multibyte character */
  85               *puc = 0xfffd;
  86               return -2;
  87             }
  88         }
  89       else if (c < 0xf8)
  90         {
  91           if (n >= 2)
  92             {
  93               if ((s[1] ^ 0x80) < 0x40
  94                   && (c >= 0xf1 || s[1] >= 0x90)
  95                   && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90)))
  96                 {
  97                   if (n >= 3)
  98                     {
  99                       if ((s[2] ^ 0x80) < 0x40)
 100                         {
 101                           if (n >= 4)
 102                             {
 103                               if ((s[3] ^ 0x80) < 0x40)
 104                                 {
 105                                   *puc = ((unsigned int) (c & 0x07) << 18)
 106                                          | ((unsigned int) (s[1] ^ 0x80) << 12)
 107                                          | ((unsigned int) (s[2] ^ 0x80) << 6)
 108                                          | (unsigned int) (s[3] ^ 0x80);
 109                                   return 4;
 110                                 }
 111                               /* invalid multibyte character */
 112                             }
 113                           else
 114                             {
 115                               /* incomplete multibyte character */
 116                               *puc = 0xfffd;
 117                               return -2;
 118                             }
 119                         }
 120                       /* invalid multibyte character */
 121                     }
 122                   else
 123                     {
 124                       /* incomplete multibyte character */
 125                       *puc = 0xfffd;
 126                       return -2;
 127                     }
 128                 }
 129               /* invalid multibyte character */
 130             }
 131           else
 132             {
 133               /* incomplete multibyte character */
 134               *puc = 0xfffd;
 135               return -2;
 136             }
 137         }
 138     }
 139   /* invalid multibyte character */
 140   *puc = 0xfffd;
 141   return -1;
 142 }

/* [previous][next][first][last][top][bottom][index][help] */