root/maint/gnulib/tests/test-mbrtoc32.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. main

   1 /* Test of conversion of multibyte character to 32-bit wide character.
   2    Copyright (C) 2008-2021 Free Software Foundation, Inc.
   3 
   4    This program is free software: you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation; either version 3 of the License, or
   7    (at your option) any later version.
   8 
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13 
  14    You should have received a copy of the GNU General Public License
  15    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
  16 
  17 /* Written by Bruno Haible <bruno@clisp.org>, 2008.  */
  18 
  19 #include <config.h>
  20 
  21 #include <uchar.h>
  22 
  23 #include "signature.h"
  24 SIGNATURE_CHECK (mbrtoc32, size_t,
  25                  (char32_t *, const char *, size_t, mbstate_t *));
  26 
  27 #include <locale.h>
  28 #include <stdio.h>
  29 #include <string.h>
  30 
  31 #include "macros.h"
  32 
  33 int
  34 main (int argc, char *argv[])
     /* [previous][next][first][last][top][bottom][index][help] */
  35 {
  36   mbstate_t state;
  37   char32_t wc;
  38   size_t ret;
  39 
  40   /* configure should already have checked that the locale is supported.  */
  41   if (setlocale (LC_ALL, "") == NULL)
  42     return 1;
  43 
  44   /* Test zero-length input.  */
  45   {
  46     memset (&state, '\0', sizeof (mbstate_t));
  47     wc = (char32_t) 0xBADFACE;
  48     ret = mbrtoc32 (&wc, "x", 0, &state);
  49     ASSERT (ret == (size_t)(-2));
  50     ASSERT (mbsinit (&state));
  51   }
  52 
  53   /* Test NUL byte input.  */
  54   {
  55     memset (&state, '\0', sizeof (mbstate_t));
  56     wc = (char32_t) 0xBADFACE;
  57     ret = mbrtoc32 (&wc, "", 1, &state);
  58     ASSERT (ret == 0);
  59     ASSERT (wc == 0);
  60     ASSERT (mbsinit (&state));
  61     ret = mbrtoc32 (NULL, "", 1, &state);
  62     ASSERT (ret == 0);
  63     ASSERT (mbsinit (&state));
  64   }
  65 
  66   /* Test single-byte input.  */
  67   {
  68     int c;
  69     char buf[1];
  70 
  71     memset (&state, '\0', sizeof (mbstate_t));
  72     for (c = 0; c < 0x100; c++)
  73       switch (c)
  74         {
  75         default:
  76           if (! (c && 1 < argc && argv[1][0] == '5'))
  77             break;
  78           FALLTHROUGH;
  79         case '\t': case '\v': case '\f':
  80         case ' ': case '!': case '"': case '#': case '%':
  81         case '&': case '\'': case '(': case ')': case '*':
  82         case '+': case ',': case '-': case '.': case '/':
  83         case '0': case '1': case '2': case '3': case '4':
  84         case '5': case '6': case '7': case '8': case '9':
  85         case ':': case ';': case '<': case '=': case '>':
  86         case '?':
  87         case 'A': case 'B': case 'C': case 'D': case 'E':
  88         case 'F': case 'G': case 'H': case 'I': case 'J':
  89         case 'K': case 'L': case 'M': case 'N': case 'O':
  90         case 'P': case 'Q': case 'R': case 'S': case 'T':
  91         case 'U': case 'V': case 'W': case 'X': case 'Y':
  92         case 'Z':
  93         case '[': case '\\': case ']': case '^': case '_':
  94         case 'a': case 'b': case 'c': case 'd': case 'e':
  95         case 'f': case 'g': case 'h': case 'i': case 'j':
  96         case 'k': case 'l': case 'm': case 'n': case 'o':
  97         case 'p': case 'q': case 'r': case 's': case 't':
  98         case 'u': case 'v': case 'w': case 'x': case 'y':
  99         case 'z': case '{': case '|': case '}': case '~':
 100           /* c is in the ISO C "basic character set", or argv[1] starts
 101              with '5' so we are testing all nonnull bytes.  */
 102           buf[0] = c;
 103           wc = (char32_t) 0xBADFACE;
 104           ret = mbrtoc32 (&wc, buf, 1, &state);
 105           ASSERT (ret == 1);
 106           if (c < 0x80)
 107             /* c is an ASCII character.  */
 108             ASSERT (wc == c);
 109           else
 110             /* argv[1] starts with '5', that is, we are testing the C or POSIX
 111                locale.
 112                On most platforms, the bytes 0x80..0xFF map to U+0080..U+00FF.
 113                But on musl libc, the bytes 0x80..0xFF map to U+DF80..U+DFFF.  */
 114             ASSERT (wc == (btowc (c) == 0xDF00 + c ? btowc (c) : c));
 115           ASSERT (mbsinit (&state));
 116           ret = mbrtoc32 (NULL, buf, 1, &state);
 117           ASSERT (ret == 1);
 118           ASSERT (mbsinit (&state));
 119           break;
 120         }
 121   }
 122 
 123   /* Test special calling convention, passing a NULL pointer.  */
 124   {
 125     memset (&state, '\0', sizeof (mbstate_t));
 126     wc = (char32_t) 0xBADFACE;
 127     ret = mbrtoc32 (&wc, NULL, 5, &state);
 128     ASSERT (ret == 0);
 129     ASSERT (wc == (char32_t) 0xBADFACE);
 130     ASSERT (mbsinit (&state));
 131   }
 132 
 133   if (argc > 1)
 134     switch (argv[1][0])
 135       {
 136       case '1':
 137         /* Locale encoding is ISO-8859-1 or ISO-8859-15.  */
 138         {
 139           char input[] = "B\374\337er"; /* "Büßer" */
 140           memset (&state, '\0', sizeof (mbstate_t));
 141 
 142           wc = (char32_t) 0xBADFACE;
 143           ret = mbrtoc32 (&wc, input, 1, &state);
 144           ASSERT (ret == 1);
 145           ASSERT (wc == 'B');
 146           ASSERT (mbsinit (&state));
 147           input[0] = '\0';
 148 
 149           wc = (char32_t) 0xBADFACE;
 150           ret = mbrtoc32 (&wc, input + 1, 1, &state);
 151           ASSERT (ret == 1);
 152           ASSERT (c32tob (wc) == (unsigned char) '\374');
 153           ASSERT (mbsinit (&state));
 154           input[1] = '\0';
 155 
 156           /* Test support of NULL first argument.  */
 157           ret = mbrtoc32 (NULL, input + 2, 3, &state);
 158           ASSERT (ret == 1);
 159           ASSERT (mbsinit (&state));
 160 
 161           wc = (char32_t) 0xBADFACE;
 162           ret = mbrtoc32 (&wc, input + 2, 3, &state);
 163           ASSERT (ret == 1);
 164           ASSERT (c32tob (wc) == (unsigned char) '\337');
 165           ASSERT (mbsinit (&state));
 166           input[2] = '\0';
 167 
 168           wc = (char32_t) 0xBADFACE;
 169           ret = mbrtoc32 (&wc, input + 3, 2, &state);
 170           ASSERT (ret == 1);
 171           ASSERT (wc == 'e');
 172           ASSERT (mbsinit (&state));
 173           input[3] = '\0';
 174 
 175           wc = (char32_t) 0xBADFACE;
 176           ret = mbrtoc32 (&wc, input + 4, 1, &state);
 177           ASSERT (ret == 1);
 178           ASSERT (wc == 'r');
 179           ASSERT (mbsinit (&state));
 180         }
 181         return 0;
 182 
 183       case '2':
 184         /* Locale encoding is UTF-8.  */
 185         {
 186           char input[] = "s\303\274\303\237\360\237\230\213!"; /* "süß😋!" */
 187           memset (&state, '\0', sizeof (mbstate_t));
 188 
 189           wc = (char32_t) 0xBADFACE;
 190           ret = mbrtoc32 (&wc, input, 1, &state);
 191           ASSERT (ret == 1);
 192           ASSERT (wc == 's');
 193           ASSERT (mbsinit (&state));
 194           input[0] = '\0';
 195 
 196           wc = (char32_t) 0xBADFACE;
 197           ret = mbrtoc32 (&wc, input + 1, 1, &state);
 198           ASSERT (ret == (size_t)(-2));
 199           ASSERT (wc == (char32_t) 0xBADFACE);
 200           ASSERT (!mbsinit (&state));
 201           input[1] = '\0';
 202 
 203           wc = (char32_t) 0xBADFACE;
 204           ret = mbrtoc32 (&wc, input + 2, 7, &state);
 205           ASSERT (ret == 1);
 206           ASSERT (c32tob (wc) == EOF);
 207           ASSERT (wc == 0x00FC); /* expect Unicode encoding */
 208           ASSERT (mbsinit (&state));
 209           input[2] = '\0';
 210 
 211           /* Test support of NULL first argument.  */
 212           ret = mbrtoc32 (NULL, input + 3, 6, &state);
 213           ASSERT (ret == 2);
 214           ASSERT (mbsinit (&state));
 215 
 216           wc = (char32_t) 0xBADFACE;
 217           ret = mbrtoc32 (&wc, input + 3, 6, &state);
 218           ASSERT (ret == 2);
 219           ASSERT (c32tob (wc) == EOF);
 220           ASSERT (wc == 0x00DF); /* expect Unicode encoding */
 221           ASSERT (mbsinit (&state));
 222           input[3] = '\0';
 223           input[4] = '\0';
 224 
 225           /* Test support of NULL first argument.  */
 226           ret = mbrtoc32 (NULL, input + 5, 4, &state);
 227           ASSERT (ret == 4);
 228           ASSERT (mbsinit (&state));
 229 
 230           wc = (char32_t) 0xBADFACE;
 231           ret = mbrtoc32 (&wc, input + 5, 4, &state);
 232           ASSERT (ret == 4);
 233           ASSERT (c32tob (wc) == EOF);
 234           ASSERT (wc == 0x1F60B); /* expect Unicode encoding */
 235           ASSERT (mbsinit (&state));
 236           input[5] = '\0';
 237           input[6] = '\0';
 238           input[7] = '\0';
 239           input[8] = '\0';
 240 
 241           wc = (char32_t) 0xBADFACE;
 242           ret = mbrtoc32 (&wc, input + 9, 1, &state);
 243           ASSERT (ret == 1);
 244           ASSERT (wc == '!');
 245           ASSERT (mbsinit (&state));
 246         }
 247         return 0;
 248 
 249       case '3':
 250         /* Locale encoding is EUC-JP.  */
 251         {
 252           char input[] = "<\306\374\313\334\270\354>"; /* "<日本語>" */
 253           memset (&state, '\0', sizeof (mbstate_t));
 254 
 255           wc = (char32_t) 0xBADFACE;
 256           ret = mbrtoc32 (&wc, input, 1, &state);
 257           ASSERT (ret == 1);
 258           ASSERT (wc == '<');
 259           ASSERT (mbsinit (&state));
 260           input[0] = '\0';
 261 
 262           wc = (char32_t) 0xBADFACE;
 263           ret = mbrtoc32 (&wc, input + 1, 2, &state);
 264           ASSERT (ret == 2);
 265           ASSERT (c32tob (wc) == EOF);
 266           ASSERT (mbsinit (&state));
 267           input[1] = '\0';
 268           input[2] = '\0';
 269 
 270           wc = (char32_t) 0xBADFACE;
 271           ret = mbrtoc32 (&wc, input + 3, 1, &state);
 272           ASSERT (ret == (size_t)(-2));
 273           ASSERT (wc == (char32_t) 0xBADFACE);
 274           ASSERT (!mbsinit (&state));
 275           input[3] = '\0';
 276 
 277           wc = (char32_t) 0xBADFACE;
 278           ret = mbrtoc32 (&wc, input + 4, 4, &state);
 279           ASSERT (ret == 1);
 280           ASSERT (c32tob (wc) == EOF);
 281           ASSERT (mbsinit (&state));
 282           input[4] = '\0';
 283 
 284           /* Test support of NULL first argument.  */
 285           ret = mbrtoc32 (NULL, input + 5, 3, &state);
 286           ASSERT (ret == 2);
 287           ASSERT (mbsinit (&state));
 288 
 289           wc = (char32_t) 0xBADFACE;
 290           ret = mbrtoc32 (&wc, input + 5, 3, &state);
 291           ASSERT (ret == 2);
 292           ASSERT (c32tob (wc) == EOF);
 293           ASSERT (mbsinit (&state));
 294           input[5] = '\0';
 295           input[6] = '\0';
 296 
 297           wc = (char32_t) 0xBADFACE;
 298           ret = mbrtoc32 (&wc, input + 7, 1, &state);
 299           ASSERT (ret == 1);
 300           ASSERT (wc == '>');
 301           ASSERT (mbsinit (&state));
 302         }
 303         return 0;
 304 
 305       case '4':
 306         /* Locale encoding is GB18030.  */
 307         {
 308           char input[] = "s\250\271\201\060\211\070\224\071\375\067!"; /* "süß😋!" */
 309           memset (&state, '\0', sizeof (mbstate_t));
 310 
 311           wc = (char32_t) 0xBADFACE;
 312           ret = mbrtoc32 (&wc, input, 1, &state);
 313           ASSERT (ret == 1);
 314           ASSERT (wc == 's');
 315           ASSERT (mbsinit (&state));
 316           input[0] = '\0';
 317 
 318           wc = (char32_t) 0xBADFACE;
 319           ret = mbrtoc32 (&wc, input + 1, 1, &state);
 320           ASSERT (ret == (size_t)(-2));
 321           ASSERT (wc == (char32_t) 0xBADFACE);
 322           ASSERT (!mbsinit (&state));
 323           input[1] = '\0';
 324 
 325           wc = (char32_t) 0xBADFACE;
 326           ret = mbrtoc32 (&wc, input + 2, 9, &state);
 327           ASSERT (ret == 1);
 328           ASSERT (c32tob (wc) == EOF);
 329           ASSERT (mbsinit (&state));
 330           input[2] = '\0';
 331 
 332           /* Test support of NULL first argument.  */
 333           ret = mbrtoc32 (NULL, input + 3, 8, &state);
 334           ASSERT (ret == 4);
 335           ASSERT (mbsinit (&state));
 336 
 337           wc = (char32_t) 0xBADFACE;
 338           ret = mbrtoc32 (&wc, input + 3, 8, &state);
 339           ASSERT (ret == 4);
 340           ASSERT (c32tob (wc) == EOF);
 341           ASSERT (mbsinit (&state));
 342           input[3] = '\0';
 343           input[4] = '\0';
 344           input[5] = '\0';
 345           input[6] = '\0';
 346 
 347           /* Test support of NULL first argument.  */
 348           ret = mbrtoc32 (NULL, input + 7, 4, &state);
 349           ASSERT (ret == 4);
 350           ASSERT (mbsinit (&state));
 351 
 352           wc = (char32_t) 0xBADFACE;
 353           ret = mbrtoc32 (&wc, input + 7, 4, &state);
 354           ASSERT (ret == 4);
 355           ASSERT (c32tob (wc) == EOF);
 356           ASSERT (mbsinit (&state));
 357           input[7] = '\0';
 358           input[8] = '\0';
 359           input[9] = '\0';
 360           input[10] = '\0';
 361 
 362           wc = (char32_t) 0xBADFACE;
 363           ret = mbrtoc32 (&wc, input + 11, 1, &state);
 364           ASSERT (ret == 1);
 365           ASSERT (wc == '!');
 366           ASSERT (mbsinit (&state));
 367         }
 368         return 0;
 369 
 370       case '5':
 371         /* C locale; tested above.  */
 372         return 0;
 373       }
 374 
 375   return 1;
 376 }

/* [previous][next][first][last][top][bottom][index][help] */