root/maint/gnulib/lib/mbrtowc-impl.h

/* [previous][next][first][last][top][bottom][index][help] */

INCLUDED FROM


   1 /* Convert multibyte character to wide character.
   2    Copyright (C) 1999-2002, 2005-2021 Free Software Foundation, Inc.
   3 
   4    This file is free software: you can redistribute it and/or modify
   5    it under the terms of the GNU Lesser General Public License as
   6    published by the Free Software Foundation; either version 2.1 of the
   7    License, or (at your option) any later version.
   8 
   9    This file is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU Lesser General Public License for more details.
  13 
  14    You should have received a copy of the GNU Lesser General Public License
  15    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
  16 
  17 /* Written by Bruno Haible <bruno@clisp.org>, 2008.  */
  18 
  19 /* This file contains the body of the mbrtowc and mbrtoc32 functions,
  20    when GNULIB_defined_mbstate_t is defined.  */
  21 
  22   char *pstate = (char *)ps;
  23 
  24   if (s == NULL)
  25     {
  26       pwc = NULL;
  27       s = "";
  28       n = 1;
  29     }
  30 
  31   if (n == 0)
  32     return (size_t)(-2);
  33 
  34   /* Here n > 0.  */
  35 
  36   if (pstate == NULL)
  37     pstate = internal_state;
  38 
  39   {
  40     size_t nstate = pstate[0];
  41     char buf[4];
  42     const char *p;
  43     size_t m;
  44     enc_t enc;
  45     int res;
  46 
  47     switch (nstate)
  48       {
  49       case 0:
  50         p = s;
  51         m = n;
  52         break;
  53       case 3:
  54         buf[2] = pstate[3];
  55         FALLTHROUGH;
  56       case 2:
  57         buf[1] = pstate[2];
  58         FALLTHROUGH;
  59       case 1:
  60         buf[0] = pstate[1];
  61         p = buf;
  62         m = nstate;
  63         buf[m++] = s[0];
  64         if (n >= 2 && m < 4)
  65           {
  66             buf[m++] = s[1];
  67             if (n >= 3 && m < 4)
  68               buf[m++] = s[2];
  69           }
  70         break;
  71       default:
  72         errno = EINVAL;
  73         return (size_t)(-1);
  74       }
  75 
  76     /* Here m > 0.  */
  77 
  78     enc = locale_encoding_classification ();
  79 
  80     if (enc == enc_utf8) /* UTF-8 */
  81       {
  82         /* Achieve
  83              - multi-thread safety and
  84              - the ability to produce wide character values > WCHAR_MAX
  85            by not calling mbtowc() at all.  */
  86 #include "mbrtowc-impl-utf8.h"
  87       }
  88     else
  89       {
  90         /* The hidden internal state of mbtowc would make this function not
  91            multi-thread safe.  Achieve multi-thread safety through a lock.  */
  92         wchar_t wc;
  93         res = mbtowc_with_lock (&wc, p, m);
  94 
  95         if (res >= 0)
  96           {
  97             if ((wc == 0) != (res == 0))
  98               abort ();
  99             if (pwc != NULL)
 100               *pwc = wc;
 101             goto success;
 102           }
 103 
 104         /* mbtowc does not distinguish between invalid and incomplete multibyte
 105            sequences.  But mbrtowc needs to make this distinction.
 106            There are two possible approaches:
 107              - Use iconv() and its return value.
 108              - Use built-in knowledge about the possible encodings.
 109            Given the low quality of implementation of iconv() on the systems
 110            that lack mbrtowc(), we use the second approach.
 111            The possible encodings are:
 112              - 8-bit encodings,
 113              - EUC-JP, EUC-KR, GB2312, EUC-TW, BIG5, GB18030, SJIS,
 114              - UTF-8 (already handled above).
 115            Use specialized code for each.  */
 116         if (m >= 4 || m >= MB_CUR_MAX)
 117           goto invalid;
 118         /* Here MB_CUR_MAX > 1 and 0 < m < 4.  */
 119         switch (enc)
 120           {
 121           /* As a reference for this code, you can use the GNU libiconv
 122              implementation.  Look for uses of the RET_TOOFEW macro.  */
 123 
 124           case enc_eucjp: /* EUC-JP */
 125             {
 126               if (m == 1)
 127                 {
 128                   unsigned char c = (unsigned char) p[0];
 129 
 130                   if ((c >= 0xa1 && c < 0xff) || c == 0x8e || c == 0x8f)
 131                     goto incomplete;
 132                 }
 133               if (m == 2)
 134                 {
 135                   unsigned char c = (unsigned char) p[0];
 136 
 137                   if (c == 0x8f)
 138                     {
 139                       unsigned char c2 = (unsigned char) p[1];
 140 
 141                       if (c2 >= 0xa1 && c2 < 0xff)
 142                         goto incomplete;
 143                     }
 144                 }
 145               goto invalid;
 146             }
 147 
 148           case enc_94: /* EUC-KR, GB2312, BIG5 */
 149             {
 150               if (m == 1)
 151                 {
 152                   unsigned char c = (unsigned char) p[0];
 153 
 154                   if (c >= 0xa1 && c < 0xff)
 155                     goto incomplete;
 156                 }
 157               goto invalid;
 158             }
 159 
 160           case enc_euctw: /* EUC-TW */
 161             {
 162               if (m == 1)
 163                 {
 164                   unsigned char c = (unsigned char) p[0];
 165 
 166                   if ((c >= 0xa1 && c < 0xff) || c == 0x8e)
 167                     goto incomplete;
 168                 }
 169               else /* m == 2 || m == 3 */
 170                 {
 171                   unsigned char c = (unsigned char) p[0];
 172 
 173                   if (c == 0x8e)
 174                     goto incomplete;
 175                 }
 176               goto invalid;
 177             }
 178 
 179           case enc_gb18030: /* GB18030 */
 180             {
 181               if (m == 1)
 182                 {
 183                   unsigned char c = (unsigned char) p[0];
 184 
 185                   if ((c >= 0x90 && c <= 0xe3) || (c >= 0xf8 && c <= 0xfe))
 186                     goto incomplete;
 187                 }
 188               else /* m == 2 || m == 3 */
 189                 {
 190                   unsigned char c = (unsigned char) p[0];
 191 
 192                   if (c >= 0x90 && c <= 0xe3)
 193                     {
 194                       unsigned char c2 = (unsigned char) p[1];
 195 
 196                       if (c2 >= 0x30 && c2 <= 0x39)
 197                         {
 198                           if (m == 2)
 199                             goto incomplete;
 200                           else /* m == 3 */
 201                             {
 202                               unsigned char c3 = (unsigned char) p[2];
 203 
 204                               if (c3 >= 0x81 && c3 <= 0xfe)
 205                                 goto incomplete;
 206                             }
 207                         }
 208                     }
 209                 }
 210               goto invalid;
 211             }
 212 
 213           case enc_sjis: /* SJIS */
 214             {
 215               if (m == 1)
 216                 {
 217                   unsigned char c = (unsigned char) p[0];
 218 
 219                   if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)
 220                       || (c >= 0xf0 && c <= 0xf9))
 221                     goto incomplete;
 222                 }
 223               goto invalid;
 224             }
 225 
 226           default:
 227             /* An unknown multibyte encoding.  */
 228             goto incomplete;
 229           }
 230       }
 231 
 232    success:
 233     /* res >= 0 is the corrected return value of
 234        mbtowc_with_lock (&wc, p, m).  */
 235     if (nstate >= (res > 0 ? res : 1))
 236       abort ();
 237     res -= nstate;
 238     pstate[0] = 0;
 239     return res;
 240 
 241    incomplete:
 242     {
 243       size_t k = nstate;
 244       /* Here 0 <= k < m < 4.  */
 245       pstate[++k] = s[0];
 246       if (k < m)
 247         {
 248           pstate[++k] = s[1];
 249           if (k < m)
 250             pstate[++k] = s[2];
 251         }
 252       if (k != m)
 253         abort ();
 254     }
 255     pstate[0] = m;
 256     return (size_t)(-2);
 257 
 258    invalid:
 259     errno = EILSEQ;
 260     /* The conversion state is undefined, says POSIX.  */
 261     return (size_t)(-1);
 262   }

/* [previous][next][first][last][top][bottom][index][help] */