root/maint/gnulib/tests/uninorm/test-u16-nfkc.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. check
  2. test_u16_nfkc
  3. test_u16_nfkc

   1 /* Test of compatibility normalization of UTF-16 strings.
   2    Copyright (C) 2009-2021 Free Software Foundation, Inc.
   3 
   4    This program is free software: you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation; either version 3 of the License, or
   7    (at your option) any later version.
   8 
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13 
  14    You should have received a copy of the GNU General Public License
  15    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
  16 
  17 /* Written by Bruno Haible <bruno@clisp.org>, 2009.  */
  18 
  19 #include <config.h>
  20 
  21 #if GNULIB_TEST_UNINORM_U16_NORMALIZE
  22 
  23 #include "uninorm.h"
  24 
  25 #include <signal.h>
  26 #include <stdlib.h>
  27 #include <unistd.h>
  28 
  29 #include "unistr.h"
  30 #include "macros.h"
  31 
  32 static int
  33 check (const uint16_t *input, size_t input_length,
     /* [previous][next][first][last][top][bottom][index][help] */
  34        const uint16_t *expected, size_t expected_length)
  35 {
  36   size_t length;
  37   uint16_t *result;
  38 
  39   /* Test return conventions with resultbuf == NULL.  */
  40   result = u16_normalize (UNINORM_NFKC, input, input_length, NULL, &length);
  41   if (!(result != NULL))
  42     return 1;
  43   if (!(length == expected_length))
  44     return 2;
  45   if (!(u16_cmp (result, expected, expected_length) == 0))
  46     return 3;
  47   free (result);
  48 
  49   /* Test return conventions with resultbuf too small.  */
  50   if (expected_length > 0)
  51     {
  52       uint16_t *preallocated;
  53 
  54       length = expected_length - 1;
  55       preallocated = (uint16_t *) malloc (length * sizeof (uint16_t));
  56       result = u16_normalize (UNINORM_NFKC, input, input_length, preallocated, &length);
  57       if (!(result != NULL))
  58         return 4;
  59       if (!(result != preallocated))
  60         return 5;
  61       if (!(length == expected_length))
  62         return 6;
  63       if (!(u16_cmp (result, expected, expected_length) == 0))
  64         return 7;
  65       free (result);
  66       free (preallocated);
  67     }
  68 
  69   /* Test return conventions with resultbuf large enough.  */
  70   {
  71     uint16_t *preallocated;
  72 
  73     length = expected_length;
  74     preallocated = (uint16_t *) malloc (length * sizeof (uint16_t));
  75     result = u16_normalize (UNINORM_NFKC, input, input_length, preallocated, &length);
  76     if (!(result != NULL))
  77       return 8;
  78     if (!(preallocated == NULL || result == preallocated))
  79       return 9;
  80     if (!(length == expected_length))
  81       return 10;
  82     if (!(u16_cmp (result, expected, expected_length) == 0))
  83       return 11;
  84     free (preallocated);
  85   }
  86 
  87   return 0;
  88 }
  89 
  90 void
  91 test_u16_nfkc (void)
     /* [previous][next][first][last][top][bottom][index][help] */
  92 {
  93   { /* Empty string.  */
  94     ASSERT (check (NULL, 0, NULL, 0) == 0);
  95   }
  96   { /* SPACE */
  97     static const uint16_t input[]    = { 0x0020 };
  98     ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
  99   }
 100 
 101   { /* LATIN CAPITAL LETTER A WITH DIAERESIS */
 102     static const uint16_t input[]      = { 0x00C4 };
 103     static const uint16_t decomposed[] = { 0x0041, 0x0308 };
 104     ASSERT (check (input, SIZEOF (input),           input, SIZEOF (input)) == 0);
 105     ASSERT (check (decomposed, SIZEOF (decomposed), input, SIZEOF (input)) == 0);
 106   }
 107 
 108   { /* LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON */
 109     static const uint16_t input[]      = { 0x01DE };
 110     static const uint16_t decomposed[] = { 0x0041, 0x0308, 0x0304 };
 111     ASSERT (check (input, SIZEOF (input),           input, SIZEOF (input)) == 0);
 112     ASSERT (check (decomposed, SIZEOF (decomposed), input, SIZEOF (input)) == 0);
 113   }
 114 
 115   { /* ANGSTROM SIGN */
 116     static const uint16_t input[]      = { 0x212B };
 117     static const uint16_t decomposed[] = { 0x0041, 0x030A };
 118     static const uint16_t expected[]   = { 0x00C5 };
 119     ASSERT (check (input, SIZEOF (input),           expected, SIZEOF (expected)) == 0);
 120     ASSERT (check (decomposed, SIZEOF (decomposed), expected, SIZEOF (expected)) == 0);
 121     ASSERT (check (expected, SIZEOF (expected),     expected, SIZEOF (expected)) == 0);
 122   }
 123 
 124   { /* GREEK DIALYTIKA AND PERISPOMENI */
 125     static const uint16_t input[]      = { 0x1FC1 };
 126     static const uint16_t decomposed[] = { 0x0020, 0x0308, 0x0342 };
 127     ASSERT (check (input, SIZEOF (input),           decomposed, SIZEOF (decomposed)) == 0);
 128     ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0);
 129   }
 130 
 131   { /* SCRIPT SMALL L */
 132     static const uint16_t input[]      = { 0x2113 };
 133     static const uint16_t decomposed[] = { 0x006C };
 134     ASSERT (check (input, SIZEOF (input),           decomposed, SIZEOF (decomposed)) == 0);
 135     ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0);
 136   }
 137 
 138   { /* NO-BREAK SPACE */
 139     static const uint16_t input[]      = { 0x00A0 };
 140     static const uint16_t decomposed[] = { 0x0020 };
 141     ASSERT (check (input, SIZEOF (input),           decomposed, SIZEOF (decomposed)) == 0);
 142     ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0);
 143   }
 144 
 145   { /* ARABIC LETTER VEH INITIAL FORM */
 146     static const uint16_t input[]      = { 0xFB6C };
 147     static const uint16_t decomposed[] = { 0x06A4 };
 148     ASSERT (check (input, SIZEOF (input),           decomposed, SIZEOF (decomposed)) == 0);
 149     ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0);
 150   }
 151 
 152   { /* ARABIC LETTER VEH MEDIAL FORM */
 153     static const uint16_t input[]      = { 0xFB6D };
 154     static const uint16_t decomposed[] = { 0x06A4 };
 155     ASSERT (check (input, SIZEOF (input),           decomposed, SIZEOF (decomposed)) == 0);
 156     ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0);
 157   }
 158 
 159   { /* ARABIC LETTER VEH FINAL FORM */
 160     static const uint16_t input[]      = { 0xFB6B };
 161     static const uint16_t decomposed[] = { 0x06A4 };
 162     ASSERT (check (input, SIZEOF (input),           decomposed, SIZEOF (decomposed)) == 0);
 163     ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0);
 164   }
 165 
 166   { /* ARABIC LETTER VEH ISOLATED FORM */
 167     static const uint16_t input[]      = { 0xFB6A };
 168     static const uint16_t decomposed[] = { 0x06A4 };
 169     ASSERT (check (input, SIZEOF (input),           decomposed, SIZEOF (decomposed)) == 0);
 170     ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0);
 171   }
 172 
 173   { /* CIRCLED NUMBER FIFTEEN */
 174     static const uint16_t input[]      = { 0x246E };
 175     static const uint16_t decomposed[] = { 0x0031, 0x0035 };
 176     ASSERT (check (input, SIZEOF (input),           decomposed, SIZEOF (decomposed)) == 0);
 177     ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0);
 178   }
 179 
 180   { /* TRADE MARK SIGN */
 181     static const uint16_t input[]      = { 0x2122 };
 182     static const uint16_t decomposed[] = { 0x0054, 0x004D };
 183     ASSERT (check (input, SIZEOF (input),           decomposed, SIZEOF (decomposed)) == 0);
 184     ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0);
 185   }
 186 
 187   { /* LATIN SUBSCRIPT SMALL LETTER I */
 188     static const uint16_t input[]      = { 0x1D62 };
 189     static const uint16_t decomposed[] = { 0x0069 };
 190     ASSERT (check (input, SIZEOF (input),           decomposed, SIZEOF (decomposed)) == 0);
 191     ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0);
 192   }
 193 
 194   { /* PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS */
 195     static const uint16_t input[]      = { 0xFE35 };
 196     static const uint16_t decomposed[] = { 0x0028 };
 197     ASSERT (check (input, SIZEOF (input),           decomposed, SIZEOF (decomposed)) == 0);
 198     ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0);
 199   }
 200 
 201   { /* FULLWIDTH LATIN CAPITAL LETTER A */
 202     static const uint16_t input[]      = { 0xFF21 };
 203     static const uint16_t decomposed[] = { 0x0041 };
 204     ASSERT (check (input, SIZEOF (input),           decomposed, SIZEOF (decomposed)) == 0);
 205     ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0);
 206   }
 207 
 208   { /* HALFWIDTH IDEOGRAPHIC COMMA */
 209     static const uint16_t input[]      = { 0xFF64 };
 210     static const uint16_t decomposed[] = { 0x3001 };
 211     ASSERT (check (input, SIZEOF (input),           decomposed, SIZEOF (decomposed)) == 0);
 212     ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0);
 213   }
 214 
 215   { /* SMALL IDEOGRAPHIC COMMA */
 216     static const uint16_t input[]      = { 0xFE51 };
 217     static const uint16_t decomposed[] = { 0x3001 };
 218     ASSERT (check (input, SIZEOF (input),           decomposed, SIZEOF (decomposed)) == 0);
 219     ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0);
 220   }
 221 
 222   { /* SQUARE MHZ */
 223     static const uint16_t input[]      = { 0x3392 };
 224     static const uint16_t decomposed[] = { 0x004D, 0x0048, 0x007A };
 225     ASSERT (check (input, SIZEOF (input),           decomposed, SIZEOF (decomposed)) == 0);
 226     ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0);
 227   }
 228 
 229   { /* VULGAR FRACTION THREE EIGHTHS */
 230     static const uint16_t input[]      = { 0x215C };
 231     static const uint16_t decomposed[] = { 0x0033, 0x2044, 0x0038 };
 232     ASSERT (check (input, SIZEOF (input),           decomposed, SIZEOF (decomposed)) == 0);
 233     ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0);
 234   }
 235 
 236   { /* MICRO SIGN */
 237     static const uint16_t input[]      = { 0x00B5 };
 238     static const uint16_t decomposed[] = { 0x03BC };
 239     ASSERT (check (input, SIZEOF (input),           decomposed, SIZEOF (decomposed)) == 0);
 240     ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0);
 241   }
 242 
 243   { /* ARABIC LIGATURE SALLALLAHOU ALAYHE WASALLAM */
 244     static const uint16_t input[]      = { 0xFDFA };
 245     static const uint16_t decomposed[] =
 246       { 0x0635, 0x0644, 0x0649, 0x0020, 0x0627, 0x0644, 0x0644, 0x0647, 0x0020,
 247         0x0639, 0x0644, 0x064A, 0x0647, 0x0020, 0x0648, 0x0633, 0x0644, 0x0645
 248       };
 249     ASSERT (check (input, SIZEOF (input),           decomposed, SIZEOF (decomposed)) == 0);
 250     ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0);
 251   }
 252 
 253   { /* HANGUL SYLLABLE GEUL */
 254     static const uint16_t input[]      = { 0xAE00 };
 255     static const uint16_t decomposed[] = { 0x1100, 0x1173, 0x11AF };
 256     ASSERT (check (input, SIZEOF (input),           input, SIZEOF (input)) == 0);
 257     ASSERT (check (decomposed, SIZEOF (decomposed), input, SIZEOF (input)) == 0);
 258   }
 259 
 260   { /* HANGUL SYLLABLE GEU */
 261     static const uint16_t input[]      = { 0xADF8 };
 262     static const uint16_t decomposed[] = { 0x1100, 0x1173 };
 263     ASSERT (check (input, SIZEOF (input),           input, SIZEOF (input)) == 0);
 264     ASSERT (check (decomposed, SIZEOF (decomposed), input, SIZEOF (input)) == 0);
 265   }
 266 
 267   { /* "Grüß Gott. Здравствуйте! x=(-b±sqrt(b²-4ac))/(2a)  日本語,中文,한글" */
 268     static const uint16_t input[] =
 269       { 'G', 'r', 0x00FC, 0x00DF, ' ', 'G', 'o', 't', 't', '.', ' ',
 270         0x0417, 0x0434, 0x0440, 0x0430, 0x0432, 0x0441, 0x0442, 0x0432, 0x0443,
 271         0x0439, 0x0442, 0x0435, '!', ' ',
 272         'x', '=', '(', '-', 'b', 0x00B1, 's', 'q', 'r', 't', '(', 'b', 0x00B2,
 273         '-', '4', 'a', 'c', ')', ')', '/', '(', '2', 'a', ')', ' ', ' ',
 274         0x65E5, 0x672C, 0x8A9E, ',', 0x4E2D, 0x6587, ',', 0xD55C, 0xAE00, '\n'
 275       };
 276     static const uint16_t decomposed[] =
 277       { 'G', 'r', 0x0075, 0x0308, 0x00DF, ' ', 'G', 'o', 't', 't', '.', ' ',
 278         0x0417, 0x0434, 0x0440, 0x0430, 0x0432, 0x0441, 0x0442, 0x0432, 0x0443,
 279         0x0438, 0x0306, 0x0442, 0x0435, '!', ' ',
 280         'x', '=', '(', '-', 'b', 0x00B1, 's', 'q', 'r', 't', '(', 'b', 0x0032,
 281         '-', '4', 'a', 'c', ')', ')', '/', '(', '2', 'a', ')', ' ', ' ',
 282         0x65E5, 0x672C, 0x8A9E, ',', 0x4E2D, 0x6587, ',',
 283         0x1112, 0x1161, 0x11AB, 0x1100, 0x1173, 0x11AF, '\n'
 284       };
 285     static const uint16_t expected[] =
 286       { 'G', 'r', 0x00FC, 0x00DF, ' ', 'G', 'o', 't', 't', '.', ' ',
 287         0x0417, 0x0434, 0x0440, 0x0430, 0x0432, 0x0441, 0x0442, 0x0432, 0x0443,
 288         0x0439, 0x0442, 0x0435, '!', ' ',
 289         'x', '=', '(', '-', 'b', 0x00B1, 's', 'q', 'r', 't', '(', 'b', 0x0032,
 290         '-', '4', 'a', 'c', ')', ')', '/', '(', '2', 'a', ')', ' ', ' ',
 291         0x65E5, 0x672C, 0x8A9E, ',', 0x4E2D, 0x6587, ',', 0xD55C, 0xAE00, '\n'
 292       };
 293     ASSERT (check (input, SIZEOF (input),           expected, SIZEOF (expected)) == 0);
 294     ASSERT (check (decomposed, SIZEOF (decomposed), expected, SIZEOF (expected)) == 0);
 295     ASSERT (check (expected, SIZEOF (expected),     expected, SIZEOF (expected)) == 0);
 296   }
 297 
 298 #if HAVE_DECL_ALARM
 299   /* Declare failure if test takes too long, by using default abort
 300      caused by SIGALRM.  */
 301   signal (SIGALRM, SIG_DFL);
 302   alarm (50);
 303 #endif
 304 
 305   /* Check that the sorting is not O(n²) but O(n log n).  */
 306   {
 307     int pass;
 308     for (pass = 0; pass < 3; pass++)
 309       {
 310         size_t repeat = 1;
 311         size_t m = 100000;
 312         uint16_t *input = (uint16_t *) malloc (2 * m * sizeof (uint16_t));
 313         if (input != NULL)
 314           {
 315             uint16_t *expected = input + m;
 316             size_t m1 = m / 2;
 317             size_t m2 = (m - 1) / 2;
 318             /* NB: m1 + m2 == m - 1.  */
 319             uint16_t *p;
 320             size_t i;
 321 
 322             input[0] = 0x0041;
 323             p = input + 1;
 324             switch (pass)
 325               {
 326               case 0:
 327                 for (i = 0; i < m1; i++)
 328                   *p++ = 0x0319;
 329                 for (i = 0; i < m2; i++)
 330                   *p++ = 0x0300;
 331                 break;
 332 
 333               case 1:
 334                 for (i = 0; i < m2; i++)
 335                   *p++ = 0x0300;
 336                 for (i = 0; i < m1; i++)
 337                   *p++ = 0x0319;
 338                 break;
 339 
 340               case 2:
 341                 for (i = 0; i < m2; i++)
 342                   {
 343                     *p++ = 0x0319;
 344                     *p++ = 0x0300;
 345                   }
 346                 for (; i < m1; i++)
 347                   *p++ = 0x0319;
 348                 break;
 349 
 350               default:
 351                 abort ();
 352               }
 353 
 354             expected[0] = 0x00C0;
 355             p = expected + 1;
 356             for (i = 0; i < m1; i++)
 357               *p++ = 0x0319;
 358             for (i = 0; i < m2 - 1; i++)
 359               *p++ = 0x0300;
 360 
 361             for (; repeat > 0; repeat--)
 362               {
 363                 ASSERT (check (input, m,        expected, m - 1) == 0);
 364                 ASSERT (check (expected, m - 1, expected, m - 1) == 0);
 365               }
 366 
 367             free (input);
 368           }
 369       }
 370   }
 371 }
 372 
 373 #else
 374 
 375 void
 376 test_u16_nfkc (void)
     /* [previous][next][first][last][top][bottom][index][help] */
 377 {
 378 }
 379 
 380 #endif

/* [previous][next][first][last][top][bottom][index][help] */