root/maint/gnulib/lib/iconv.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. utf16be_mbtowc
  2. utf16be_wctomb
  3. utf16le_mbtowc
  4. utf16le_wctomb
  5. utf32be_mbtowc
  6. utf32be_wctomb
  7. utf32le_mbtowc
  8. utf32le_wctomb
  9. rpl_iconv

   1 /* Character set conversion.
   2    Copyright (C) 1999-2001, 2007, 2009-2021 Free Software Foundation, Inc.
   3 
   4    This file is free software: you can redistribute it and/or modify
   5    it under the terms of the GNU Lesser General Public License as
   6    published by the Free Software Foundation; either version 2.1 of the
   7    License, or (at your option) any later version.
   8 
   9    This file is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU Lesser General Public License for more details.
  13 
  14    You should have received a copy of the GNU Lesser General Public License
  15    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
  16 
  17 #include <config.h>
  18 
  19 /* Specification.  */
  20 #include <iconv.h>
  21 
  22 #include <stddef.h>
  23 
  24 #if REPLACE_ICONV_UTF
  25 # include <errno.h>
  26 # include <stdint.h>
  27 # include <stdlib.h>
  28 # include "unistr.h"
  29 #endif
  30 
  31 #if REPLACE_ICONV_UTF
  32 
  33 /* UTF-{16,32}{BE,LE} converters taken from GNU libiconv 1.11.  */
  34 
  35 /* Return code if invalid. (xxx_mbtowc) */
  36 # define RET_ILSEQ      -1
  37 /* Return code if no bytes were read. (xxx_mbtowc) */
  38 # define RET_TOOFEW     -2
  39 
  40 /* Return code if invalid. (xxx_wctomb) */
  41 # define RET_ILUNI      -1
  42 /* Return code if output buffer is too small. (xxx_wctomb, xxx_reset) */
  43 # define RET_TOOSMALL   -2
  44 
  45 /*
  46  * UTF-16BE
  47  */
  48 
  49 /* Specification: RFC 2781 */
  50 
  51 static int
  52 utf16be_mbtowc (ucs4_t *pwc, const unsigned char *s, size_t n)
     /* [previous][next][first][last][top][bottom][index][help] */
  53 {
  54   if (n >= 2)
  55     {
  56       ucs4_t wc = (s[0] << 8) + s[1];
  57       if (wc >= 0xd800 && wc < 0xdc00)
  58         {
  59           if (n >= 4)
  60             {
  61               ucs4_t wc2 = (s[2] << 8) + s[3];
  62               if (!(wc2 >= 0xdc00 && wc2 < 0xe000))
  63                 return RET_ILSEQ;
  64               *pwc = 0x10000 + ((wc - 0xd800) << 10) + (wc2 - 0xdc00);
  65               return 4;
  66             }
  67         }
  68       else if (wc >= 0xdc00 && wc < 0xe000)
  69         {
  70           return RET_ILSEQ;
  71         }
  72       else
  73         {
  74           *pwc = wc;
  75           return 2;
  76         }
  77     }
  78   return RET_TOOFEW;
  79 }
  80 
  81 static int
  82 utf16be_wctomb (unsigned char *r, ucs4_t wc, size_t n)
     /* [previous][next][first][last][top][bottom][index][help] */
  83 {
  84   if (!(wc >= 0xd800 && wc < 0xe000))
  85     {
  86       if (wc < 0x10000)
  87         {
  88           if (n >= 2)
  89             {
  90               r[0] = (unsigned char) (wc >> 8);
  91               r[1] = (unsigned char) wc;
  92               return 2;
  93             }
  94           else
  95             return RET_TOOSMALL;
  96         }
  97       else if (wc < 0x110000)
  98         {
  99           if (n >= 4)
 100             {
 101               ucs4_t wc1 = 0xd800 + ((wc - 0x10000) >> 10);
 102               ucs4_t wc2 = 0xdc00 + ((wc - 0x10000) & 0x3ff);
 103               r[0] = (unsigned char) (wc1 >> 8);
 104               r[1] = (unsigned char) wc1;
 105               r[2] = (unsigned char) (wc2 >> 8);
 106               r[3] = (unsigned char) wc2;
 107               return 4;
 108             }
 109           else
 110             return RET_TOOSMALL;
 111         }
 112     }
 113   return RET_ILUNI;
 114 }
 115 
 116 /*
 117  * UTF-16LE
 118  */
 119 
 120 /* Specification: RFC 2781 */
 121 
 122 static int
 123 utf16le_mbtowc (ucs4_t *pwc, const unsigned char *s, size_t n)
     /* [previous][next][first][last][top][bottom][index][help] */
 124 {
 125   if (n >= 2)
 126     {
 127       ucs4_t wc = s[0] + (s[1] << 8);
 128       if (wc >= 0xd800 && wc < 0xdc00)
 129         {
 130           if (n >= 4)
 131             {
 132               ucs4_t wc2 = s[2] + (s[3] << 8);
 133               if (!(wc2 >= 0xdc00 && wc2 < 0xe000))
 134                 return RET_ILSEQ;
 135               *pwc = 0x10000 + ((wc - 0xd800) << 10) + (wc2 - 0xdc00);
 136               return 4;
 137             }
 138         }
 139       else if (wc >= 0xdc00 && wc < 0xe000)
 140         {
 141           return RET_ILSEQ;
 142         }
 143       else
 144         {
 145           *pwc = wc;
 146           return 2;
 147         }
 148     }
 149   return RET_TOOFEW;
 150 }
 151 
 152 static int
 153 utf16le_wctomb (unsigned char *r, ucs4_t wc, size_t n)
     /* [previous][next][first][last][top][bottom][index][help] */
 154 {
 155   if (!(wc >= 0xd800 && wc < 0xe000))
 156     {
 157       if (wc < 0x10000)
 158         {
 159           if (n >= 2)
 160             {
 161               r[0] = (unsigned char) wc;
 162               r[1] = (unsigned char) (wc >> 8);
 163               return 2;
 164             }
 165           else
 166             return RET_TOOSMALL;
 167         }
 168       else if (wc < 0x110000)
 169         {
 170           if (n >= 4)
 171             {
 172               ucs4_t wc1 = 0xd800 + ((wc - 0x10000) >> 10);
 173               ucs4_t wc2 = 0xdc00 + ((wc - 0x10000) & 0x3ff);
 174               r[0] = (unsigned char) wc1;
 175               r[1] = (unsigned char) (wc1 >> 8);
 176               r[2] = (unsigned char) wc2;
 177               r[3] = (unsigned char) (wc2 >> 8);
 178               return 4;
 179             }
 180           else
 181             return RET_TOOSMALL;
 182         }
 183     }
 184   return RET_ILUNI;
 185 }
 186 
 187 /*
 188  * UTF-32BE
 189  */
 190 
 191 /* Specification: Unicode 3.1 Standard Annex #19 */
 192 
 193 static int
 194 utf32be_mbtowc (ucs4_t *pwc, const unsigned char *s, size_t n)
     /* [previous][next][first][last][top][bottom][index][help] */
 195 {
 196   if (n >= 4)
 197     {
 198       ucs4_t wc = (s[0] << 24) + (s[1] << 16) + (s[2] << 8) + s[3];
 199       if (wc < 0x110000 && !(wc >= 0xd800 && wc < 0xe000))
 200         {
 201           *pwc = wc;
 202           return 4;
 203         }
 204       else
 205         return RET_ILSEQ;
 206     }
 207   return RET_TOOFEW;
 208 }
 209 
 210 static int
 211 utf32be_wctomb (unsigned char *r, ucs4_t wc, size_t n)
     /* [previous][next][first][last][top][bottom][index][help] */
 212 {
 213   if (wc < 0x110000 && !(wc >= 0xd800 && wc < 0xe000))
 214     {
 215       if (n >= 4)
 216         {
 217           r[0] = 0;
 218           r[1] = (unsigned char) (wc >> 16);
 219           r[2] = (unsigned char) (wc >> 8);
 220           r[3] = (unsigned char) wc;
 221           return 4;
 222         }
 223       else
 224         return RET_TOOSMALL;
 225     }
 226   return RET_ILUNI;
 227 }
 228 
 229 /*
 230  * UTF-32LE
 231  */
 232 
 233 /* Specification: Unicode 3.1 Standard Annex #19 */
 234 
 235 static int
 236 utf32le_mbtowc (ucs4_t *pwc, const unsigned char *s, size_t n)
     /* [previous][next][first][last][top][bottom][index][help] */
 237 {
 238   if (n >= 4)
 239     {
 240       ucs4_t wc = s[0] + (s[1] << 8) + (s[2] << 16) + (s[3] << 24);
 241       if (wc < 0x110000 && !(wc >= 0xd800 && wc < 0xe000))
 242         {
 243           *pwc = wc;
 244           return 4;
 245         }
 246       else
 247         return RET_ILSEQ;
 248     }
 249   return RET_TOOFEW;
 250 }
 251 
 252 static int
 253 utf32le_wctomb (unsigned char *r, ucs4_t wc, size_t n)
     /* [previous][next][first][last][top][bottom][index][help] */
 254 {
 255   if (wc < 0x110000 && !(wc >= 0xd800 && wc < 0xe000))
 256     {
 257       if (n >= 4)
 258         {
 259           r[0] = (unsigned char) wc;
 260           r[1] = (unsigned char) (wc >> 8);
 261           r[2] = (unsigned char) (wc >> 16);
 262           r[3] = 0;
 263           return 4;
 264         }
 265       else
 266         return RET_TOOSMALL;
 267     }
 268   return RET_ILUNI;
 269 }
 270 
 271 #endif
 272 
 273 size_t
 274 rpl_iconv (iconv_t cd,
     /* [previous][next][first][last][top][bottom][index][help] */
 275            ICONV_CONST char **inbuf, size_t *inbytesleft,
 276            char **outbuf, size_t *outbytesleft)
 277 #undef iconv
 278 {
 279 #if REPLACE_ICONV_UTF
 280   switch ((uintptr_t) cd)
 281     {
 282       {
 283         int (*xxx_wctomb) (unsigned char *, ucs4_t, size_t);
 284 
 285         case (uintptr_t) _ICONV_UTF8_UTF16BE:
 286           xxx_wctomb = utf16be_wctomb;
 287           goto loop_from_utf8;
 288         case (uintptr_t) _ICONV_UTF8_UTF16LE:
 289           xxx_wctomb = utf16le_wctomb;
 290           goto loop_from_utf8;
 291         case (uintptr_t) _ICONV_UTF8_UTF32BE:
 292           xxx_wctomb = utf32be_wctomb;
 293           goto loop_from_utf8;
 294         case (uintptr_t) _ICONV_UTF8_UTF32LE:
 295           xxx_wctomb = utf32le_wctomb;
 296           goto loop_from_utf8;
 297 
 298        loop_from_utf8:
 299         if (inbuf == NULL || *inbuf == NULL)
 300           return 0;
 301         {
 302           ICONV_CONST char *inptr = *inbuf;
 303           size_t inleft = *inbytesleft;
 304           char *outptr = *outbuf;
 305           size_t outleft = *outbytesleft;
 306           size_t res = 0;
 307           while (inleft > 0)
 308             {
 309               ucs4_t uc;
 310               int m = u8_mbtoucr (&uc, (const uint8_t *) inptr, inleft);
 311               if (m <= 0)
 312                 {
 313                   if (m == -1)
 314                     {
 315                       errno = EILSEQ;
 316                       res = (size_t)(-1);
 317                       break;
 318                     }
 319                   if (m == -2)
 320                     {
 321                       errno = EINVAL;
 322                       res = (size_t)(-1);
 323                       break;
 324                     }
 325                   abort ();
 326                 }
 327               else
 328                 {
 329                   int n = xxx_wctomb ((uint8_t *) outptr, uc, outleft);
 330                   if (n < 0)
 331                     {
 332                       if (n == RET_ILUNI)
 333                         {
 334                           errno = EILSEQ;
 335                           res = (size_t)(-1);
 336                           break;
 337                         }
 338                       if (n == RET_TOOSMALL)
 339                         {
 340                           errno = E2BIG;
 341                           res = (size_t)(-1);
 342                           break;
 343                         }
 344                       abort ();
 345                     }
 346                   else
 347                     {
 348                       inptr += m;
 349                       inleft -= m;
 350                       outptr += n;
 351                       outleft -= n;
 352                     }
 353                 }
 354             }
 355           *inbuf = inptr;
 356           *inbytesleft = inleft;
 357           *outbuf = outptr;
 358           *outbytesleft = outleft;
 359           return res;
 360         }
 361       }
 362 
 363       {
 364         int (*xxx_mbtowc) (ucs4_t *, const unsigned char *, size_t);
 365 
 366         case (uintptr_t) _ICONV_UTF16BE_UTF8:
 367           xxx_mbtowc = utf16be_mbtowc;
 368           goto loop_to_utf8;
 369         case (uintptr_t) _ICONV_UTF16LE_UTF8:
 370           xxx_mbtowc = utf16le_mbtowc;
 371           goto loop_to_utf8;
 372         case (uintptr_t) _ICONV_UTF32BE_UTF8:
 373           xxx_mbtowc = utf32be_mbtowc;
 374           goto loop_to_utf8;
 375         case (uintptr_t) _ICONV_UTF32LE_UTF8:
 376           xxx_mbtowc = utf32le_mbtowc;
 377           goto loop_to_utf8;
 378 
 379        loop_to_utf8:
 380         if (inbuf == NULL || *inbuf == NULL)
 381           return 0;
 382         {
 383           ICONV_CONST char *inptr = *inbuf;
 384           size_t inleft = *inbytesleft;
 385           char *outptr = *outbuf;
 386           size_t outleft = *outbytesleft;
 387           size_t res = 0;
 388           while (inleft > 0)
 389             {
 390               ucs4_t uc;
 391               int m = xxx_mbtowc (&uc, (const uint8_t *) inptr, inleft);
 392               if (m <= 0)
 393                 {
 394                   if (m == RET_ILSEQ)
 395                     {
 396                       errno = EILSEQ;
 397                       res = (size_t)(-1);
 398                       break;
 399                     }
 400                   if (m == RET_TOOFEW)
 401                     {
 402                       errno = EINVAL;
 403                       res = (size_t)(-1);
 404                       break;
 405                     }
 406                   abort ();
 407                 }
 408               else
 409                 {
 410                   int n = u8_uctomb ((uint8_t *) outptr, uc, outleft);
 411                   if (n < 0)
 412                     {
 413                       if (n == -1)
 414                         {
 415                           errno = EILSEQ;
 416                           res = (size_t)(-1);
 417                           break;
 418                         }
 419                       if (n == -2)
 420                         {
 421                           errno = E2BIG;
 422                           res = (size_t)(-1);
 423                           break;
 424                         }
 425                       abort ();
 426                     }
 427                   else
 428                     {
 429                       inptr += m;
 430                       inleft -= m;
 431                       outptr += n;
 432                       outleft -= n;
 433                     }
 434                 }
 435             }
 436           *inbuf = inptr;
 437           *inbytesleft = inleft;
 438           *outbuf = outptr;
 439           *outbytesleft = outleft;
 440           return res;
 441         }
 442       }
 443     }
 444 #endif
 445   return iconv (cd, inbuf, inbytesleft, outbuf, outbytesleft);
 446 }

/* [previous][next][first][last][top][bottom][index][help] */