root/maint/gnulib/tests/uninorm/test-u8-nfkc.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. check
  2. test_u8_nfkc
  3. test_u8_nfkc

   1 /* Test of compatibility normalization of UTF-8 strings.
   2    Copyright (C) 2009-2021 Free Software Foundation, Inc.
   3 
   4    This program is free software: you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation; either version 3 of the License, or
   7    (at your option) any later version.
   8 
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13 
  14    You should have received a copy of the GNU General Public License
  15    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
  16 
  17 /* Written by Bruno Haible <bruno@clisp.org>, 2009.  */
  18 
  19 #include <config.h>
  20 
  21 #if GNULIB_TEST_UNINORM_U8_NORMALIZE
  22 
  23 #include "uninorm.h"
  24 
  25 #include <signal.h>
  26 #include <stdlib.h>
  27 #include <unistd.h>
  28 
  29 #include "unistr.h"
  30 #include "macros.h"
  31 
  32 static int
  33 check (const uint8_t *input, size_t input_length,
     /* [previous][next][first][last][top][bottom][index][help] */
  34        const uint8_t *expected, size_t expected_length)
  35 {
  36   size_t length;
  37   uint8_t *result;
  38 
  39   /* Test return conventions with resultbuf == NULL.  */
  40   result = u8_normalize (UNINORM_NFKC, input, input_length, NULL, &length);
  41   if (!(result != NULL))
  42     return 1;
  43   if (!(length == expected_length))
  44     return 2;
  45   if (!(u8_cmp (result, expected, expected_length) == 0))
  46     return 3;
  47   free (result);
  48 
  49   /* Test return conventions with resultbuf too small.  */
  50   if (expected_length > 0)
  51     {
  52       uint8_t *preallocated;
  53 
  54       length = expected_length - 1;
  55       preallocated = (uint8_t *) malloc (length * sizeof (uint8_t));
  56       result = u8_normalize (UNINORM_NFKC, input, input_length, preallocated, &length);
  57       if (!(result != NULL))
  58         return 4;
  59       if (!(result != preallocated))
  60         return 5;
  61       if (!(length == expected_length))
  62         return 6;
  63       if (!(u8_cmp (result, expected, expected_length) == 0))
  64         return 7;
  65       free (result);
  66       free (preallocated);
  67     }
  68 
  69   /* Test return conventions with resultbuf large enough.  */
  70   {
  71     uint8_t *preallocated;
  72 
  73     length = expected_length;
  74     preallocated = (uint8_t *) malloc (length * sizeof (uint8_t));
  75     result = u8_normalize (UNINORM_NFKC, input, input_length, preallocated, &length);
  76     if (!(result != NULL))
  77       return 8;
  78     if (!(preallocated == NULL || result == preallocated))
  79       return 9;
  80     if (!(length == expected_length))
  81       return 10;
  82     if (!(u8_cmp (result, expected, expected_length) == 0))
  83       return 11;
  84     free (preallocated);
  85   }
  86 
  87   return 0;
  88 }
  89 
  90 void
  91 test_u8_nfkc (void)
     /* [previous][next][first][last][top][bottom][index][help] */
  92 {
  93   { /* Empty string.  */
  94     ASSERT (check (NULL, 0, NULL, 0) == 0);
  95   }
  96   { /* SPACE */
  97     static const uint8_t input[]    = { 0x20 };
  98     ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
  99   }
 100 
 101   { /* LATIN CAPITAL LETTER A WITH DIAERESIS */
 102     static const uint8_t input[]      = { 0xC3, 0x84 };
 103     static const uint8_t decomposed[] = { 0x41, 0xCC, 0x88 };
 104     ASSERT (check (input, SIZEOF (input),           input, SIZEOF (input)) == 0);
 105     ASSERT (check (decomposed, SIZEOF (decomposed), input, SIZEOF (input)) == 0);
 106   }
 107 
 108   { /* LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON */
 109     static const uint8_t input[]      = { 0xC7, 0x9E };
 110     static const uint8_t decomposed[] = { 0x41, 0xCC, 0x88, 0xCC, 0x84 };
 111     ASSERT (check (input, SIZEOF (input),           input, SIZEOF (input)) == 0);
 112     ASSERT (check (decomposed, SIZEOF (decomposed), input, SIZEOF (input)) == 0);
 113   }
 114 
 115   { /* ANGSTROM SIGN */
 116     static const uint8_t input[]      = { 0xE2, 0x84, 0xAB };
 117     static const uint8_t decomposed[] = { 0x41, 0xCC, 0x8A };
 118     static const uint8_t expected[]   = { 0xC3, 0x85 };
 119     ASSERT (check (input, SIZEOF (input),           expected, SIZEOF (expected)) == 0);
 120     ASSERT (check (decomposed, SIZEOF (decomposed), expected, SIZEOF (expected)) == 0);
 121     ASSERT (check (expected, SIZEOF (expected),     expected, SIZEOF (expected)) == 0);
 122   }
 123 
 124   { /* GREEK DIALYTIKA AND PERISPOMENI */
 125     static const uint8_t input[]      = { 0xE1, 0xBF, 0x81 };
 126     static const uint8_t decomposed[] = { 0x20, 0xCC, 0x88, 0xCD, 0x82 };
 127     ASSERT (check (input, SIZEOF (input),           decomposed, SIZEOF (decomposed)) == 0);
 128     ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0);
 129   }
 130 
 131   { /* SCRIPT SMALL L */
 132     static const uint8_t input[]      = { 0xE2, 0x84, 0x93 };
 133     static const uint8_t decomposed[] = { 0x6C };
 134     ASSERT (check (input, SIZEOF (input),           decomposed, SIZEOF (decomposed)) == 0);
 135     ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0);
 136   }
 137 
 138   { /* NO-BREAK SPACE */
 139     static const uint8_t input[]      = { 0xC2, 0xA0 };
 140     static const uint8_t decomposed[] = { 0x20 };
 141     ASSERT (check (input, SIZEOF (input),           decomposed, SIZEOF (decomposed)) == 0);
 142     ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0);
 143   }
 144 
 145   { /* ARABIC LETTER VEH INITIAL FORM */
 146     static const uint8_t input[]      = { 0xEF, 0xAD, 0xAC };
 147     static const uint8_t decomposed[] = { 0xDA, 0xA4 };
 148     ASSERT (check (input, SIZEOF (input),           decomposed, SIZEOF (decomposed)) == 0);
 149     ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0);
 150   }
 151 
 152   { /* ARABIC LETTER VEH MEDIAL FORM */
 153     static const uint8_t input[]      = { 0xEF, 0xAD, 0xAD };
 154     static const uint8_t decomposed[] = { 0xDA, 0xA4 };
 155     ASSERT (check (input, SIZEOF (input),           decomposed, SIZEOF (decomposed)) == 0);
 156     ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0);
 157   }
 158 
 159   { /* ARABIC LETTER VEH FINAL FORM */
 160     static const uint8_t input[]      = { 0xEF, 0xAD, 0xAB };
 161     static const uint8_t decomposed[] = { 0xDA, 0xA4 };
 162     ASSERT (check (input, SIZEOF (input),           decomposed, SIZEOF (decomposed)) == 0);
 163     ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0);
 164   }
 165 
 166   { /* ARABIC LETTER VEH ISOLATED FORM */
 167     static const uint8_t input[]      = { 0xEF, 0xAD, 0xAA };
 168     static const uint8_t decomposed[] = { 0xDA, 0xA4 };
 169     ASSERT (check (input, SIZEOF (input),           decomposed, SIZEOF (decomposed)) == 0);
 170     ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0);
 171   }
 172 
 173   { /* CIRCLED NUMBER FIFTEEN */
 174     static const uint8_t input[]      = { 0xE2, 0x91, 0xAE };
 175     static const uint8_t decomposed[] = { 0x31, 0x35 };
 176     ASSERT (check (input, SIZEOF (input),           decomposed, SIZEOF (decomposed)) == 0);
 177     ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0);
 178   }
 179 
 180   { /* TRADE MARK SIGN */
 181     static const uint8_t input[]      = { 0xE2, 0x84, 0xA2 };
 182     static const uint8_t decomposed[] = { 0x54, 0x4D };
 183     ASSERT (check (input, SIZEOF (input),           decomposed, SIZEOF (decomposed)) == 0);
 184     ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0);
 185   }
 186 
 187   { /* LATIN SUBSCRIPT SMALL LETTER I */
 188     static const uint8_t input[]      = { 0xE1, 0xB5, 0xA2 };
 189     static const uint8_t decomposed[] = { 0x69 };
 190     ASSERT (check (input, SIZEOF (input),           decomposed, SIZEOF (decomposed)) == 0);
 191     ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0);
 192   }
 193 
 194   { /* PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS */
 195     static const uint8_t input[]      = { 0xEF, 0xB8, 0xB5 };
 196     static const uint8_t decomposed[] = { 0x28 };
 197     ASSERT (check (input, SIZEOF (input),           decomposed, SIZEOF (decomposed)) == 0);
 198     ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0);
 199   }
 200 
 201   { /* FULLWIDTH LATIN CAPITAL LETTER A */
 202     static const uint8_t input[]      = { 0xEF, 0xBC, 0xA1 };
 203     static const uint8_t decomposed[] = { 0x41 };
 204     ASSERT (check (input, SIZEOF (input),           decomposed, SIZEOF (decomposed)) == 0);
 205     ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0);
 206   }
 207 
 208   { /* HALFWIDTH IDEOGRAPHIC COMMA */
 209     static const uint8_t input[]      = { 0xEF, 0xBD, 0xA4 };
 210     static const uint8_t decomposed[] = { 0xE3, 0x80, 0x81 };
 211     ASSERT (check (input, SIZEOF (input),           decomposed, SIZEOF (decomposed)) == 0);
 212     ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0);
 213   }
 214 
 215   { /* SMALL IDEOGRAPHIC COMMA */
 216     static const uint8_t input[]      = { 0xEF, 0xB9, 0x91 };
 217     static const uint8_t decomposed[] = { 0xE3, 0x80, 0x81 };
 218     ASSERT (check (input, SIZEOF (input),           decomposed, SIZEOF (decomposed)) == 0);
 219     ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0);
 220   }
 221 
 222   { /* SQUARE MHZ */
 223     static const uint8_t input[]      = { 0xE3, 0x8E, 0x92 };
 224     static const uint8_t decomposed[] = { 0x4D, 0x48, 0x7A };
 225     ASSERT (check (input, SIZEOF (input),           decomposed, SIZEOF (decomposed)) == 0);
 226     ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0);
 227   }
 228 
 229   { /* VULGAR FRACTION THREE EIGHTHS */
 230     static const uint8_t input[]      = { 0xE2, 0x85, 0x9C };
 231     static const uint8_t decomposed[] = { 0x33, 0xE2, 0x81, 0x84, 0x38 };
 232     ASSERT (check (input, SIZEOF (input),           decomposed, SIZEOF (decomposed)) == 0);
 233     ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0);
 234   }
 235 
 236   { /* MICRO SIGN */
 237     static const uint8_t input[]      = { 0xC2, 0xB5 };
 238     static const uint8_t decomposed[] = { 0xCE, 0xBC };
 239     ASSERT (check (input, SIZEOF (input),           decomposed, SIZEOF (decomposed)) == 0);
 240     ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0);
 241   }
 242 
 243   { /* ARABIC LIGATURE SALLALLAHOU ALAYHE WASALLAM */
 244     static const uint8_t input[]      = { 0xEF, 0xB7, 0xBA };
 245     static const uint8_t decomposed[] =
 246       { 0xD8, 0xB5, 0xD9, 0x84, 0xD9, 0x89, 0x20, 0xD8, 0xA7, 0xD9, 0x84, 0xD9,
 247         0x84, 0xD9, 0x87, 0x20, 0xD8, 0xB9, 0xD9, 0x84, 0xD9, 0x8A, 0xD9, 0x87,
 248         0x20, 0xD9, 0x88, 0xD8, 0xB3, 0xD9, 0x84, 0xD9, 0x85
 249       };
 250     ASSERT (check (input, SIZEOF (input),           decomposed, SIZEOF (decomposed)) == 0);
 251     ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0);
 252   }
 253 
 254   { /* HANGUL SYLLABLE GEUL */
 255     static const uint8_t input[]      = { 0xEA, 0xB8, 0x80 };
 256     static const uint8_t decomposed[] =
 257       { 0xE1, 0x84, 0x80, 0xE1, 0x85, 0xB3, 0xE1, 0x86, 0xAF };
 258     ASSERT (check (input, SIZEOF (input),           input, SIZEOF (input)) == 0);
 259     ASSERT (check (decomposed, SIZEOF (decomposed), input, SIZEOF (input)) == 0);
 260   }
 261 
 262   { /* HANGUL SYLLABLE GEU */
 263     static const uint8_t input[]      = { 0xEA, 0xB7, 0xB8 };
 264     static const uint8_t decomposed[] = { 0xE1, 0x84, 0x80, 0xE1, 0x85, 0xB3 };
 265     ASSERT (check (input, SIZEOF (input),           input, SIZEOF (input)) == 0);
 266     ASSERT (check (decomposed, SIZEOF (decomposed), input, SIZEOF (input)) == 0);
 267   }
 268 
 269   { /* "Grüß Gott. Здравствуйте! x=(-b±sqrt(b²-4ac))/(2a)  日本語,中文,한글" */
 270     static const uint8_t input[] =
 271       { 'G', 'r', 0xC3, 0xBC, 0xC3, 0x9F, ' ', 'G', 'o', 't', 't', '.',
 272         ' ', 0xD0, 0x97, 0xD0, 0xB4, 0xD1, 0x80, 0xD0, 0xB0, 0xD0, 0xB2, 0xD1,
 273         0x81, 0xD1, 0x82, 0xD0, 0xB2, 0xD1, 0x83, 0xD0, 0xB9,
 274         0xD1, 0x82, 0xD0, 0xB5, '!', ' ', 'x', '=', '(', '-', 'b', 0xC2, 0xB1,
 275         's', 'q', 'r', 't', '(', 'b', 0xC2, 0xB2, '-', '4', 'a', 'c', ')', ')',
 276         '/', '(', '2', 'a', ')', ' ', ' ', 0xE6, 0x97, 0xA5, 0xE6, 0x9C, 0xAC,
 277         0xE8, 0xAA, 0x9E, ',', 0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87, ',',
 278         0xED, 0x95, 0x9C,
 279         0xEA, 0xB8, 0x80, '\n'
 280       };
 281     static const uint8_t decomposed[] =
 282       { 'G', 'r', 0x75, 0xCC, 0x88, 0xC3, 0x9F, ' ', 'G', 'o', 't', 't', '.',
 283         ' ', 0xD0, 0x97, 0xD0, 0xB4, 0xD1, 0x80, 0xD0, 0xB0, 0xD0, 0xB2, 0xD1,
 284         0x81, 0xD1, 0x82, 0xD0, 0xB2, 0xD1, 0x83, 0xD0, 0xB8, 0xCC, 0x86,
 285         0xD1, 0x82, 0xD0, 0xB5, '!', ' ', 'x', '=', '(', '-', 'b', 0xC2, 0xB1,
 286         's', 'q', 'r', 't', '(', 'b', 0x32, '-', '4', 'a', 'c', ')', ')',
 287         '/', '(', '2', 'a', ')', ' ', ' ', 0xE6, 0x97, 0xA5, 0xE6, 0x9C, 0xAC,
 288         0xE8, 0xAA, 0x9E, ',', 0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87, ',',
 289         0xE1, 0x84, 0x92, 0xE1, 0x85, 0xA1, 0xE1, 0x86, 0xAB,
 290         0xE1, 0x84, 0x80, 0xE1, 0x85, 0xB3, 0xE1, 0x86, 0xAF, '\n'
 291       };
 292     static const uint8_t expected[] =
 293       { 'G', 'r', 0xC3, 0xBC, 0xC3, 0x9F, ' ', 'G', 'o', 't', 't', '.',
 294         ' ', 0xD0, 0x97, 0xD0, 0xB4, 0xD1, 0x80, 0xD0, 0xB0, 0xD0, 0xB2, 0xD1,
 295         0x81, 0xD1, 0x82, 0xD0, 0xB2, 0xD1, 0x83, 0xD0, 0xB9,
 296         0xD1, 0x82, 0xD0, 0xB5, '!', ' ', 'x', '=', '(', '-', 'b', 0xC2, 0xB1,
 297         's', 'q', 'r', 't', '(', 'b', 0x32, '-', '4', 'a', 'c', ')', ')',
 298         '/', '(', '2', 'a', ')', ' ', ' ', 0xE6, 0x97, 0xA5, 0xE6, 0x9C, 0xAC,
 299         0xE8, 0xAA, 0x9E, ',', 0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87, ',',
 300         0xED, 0x95, 0x9C,
 301         0xEA, 0xB8, 0x80, '\n'
 302       };
 303     ASSERT (check (input, SIZEOF (input),           expected, SIZEOF (expected)) == 0);
 304     ASSERT (check (decomposed, SIZEOF (decomposed), expected, SIZEOF (expected)) == 0);
 305     ASSERT (check (expected, SIZEOF (expected),     expected, SIZEOF (expected)) == 0);
 306   }
 307 
 308 #if HAVE_DECL_ALARM
 309   /* Declare failure if test takes too long, by using default abort
 310      caused by SIGALRM.  */
 311   signal (SIGALRM, SIG_DFL);
 312   alarm (50);
 313 #endif
 314 
 315   /* Check that the sorting is not O(n²) but O(n log n).  */
 316   {
 317     int pass;
 318     for (pass = 0; pass < 3; pass++)
 319       {
 320         size_t repeat = 1;
 321         size_t m = 100000;
 322         uint8_t *input = (uint8_t *) malloc (2 * (2 * m - 1) * sizeof (uint8_t));
 323         if (input != NULL)
 324           {
 325             uint8_t *expected = input + (2 * m - 1);
 326             size_t m1 = m / 2;
 327             size_t m2 = (m - 1) / 2;
 328             /* NB: m1 + m2 == m - 1.  */
 329             uint8_t *p;
 330             size_t i;
 331 
 332             input[0] = 0x41;
 333             p = input + 1;
 334             switch (pass)
 335               {
 336               case 0:
 337                 for (i = 0; i < m1; i++)
 338                   {
 339                     *p++ = 0xCC;
 340                     *p++ = 0x99;
 341                   }
 342                 for (i = 0; i < m2; i++)
 343                   {
 344                     *p++ = 0xCC;
 345                     *p++ = 0x80;
 346                   }
 347                 break;
 348 
 349               case 1:
 350                 for (i = 0; i < m2; i++)
 351                   {
 352                     *p++ = 0xCC;
 353                     *p++ = 0x80;
 354                   }
 355                 for (i = 0; i < m1; i++)
 356                   {
 357                     *p++ = 0xCC;
 358                     *p++ = 0x99;
 359                   }
 360                 break;
 361 
 362               case 2:
 363                 for (i = 0; i < m2; i++)
 364                   {
 365                     *p++ = 0xCC;
 366                     *p++ = 0x99;
 367                     *p++ = 0xCC;
 368                     *p++ = 0x80;
 369                   }
 370                 for (; i < m1; i++)
 371                   {
 372                     *p++ = 0xCC;
 373                     *p++ = 0x99;
 374                   }
 375                 break;
 376 
 377               default:
 378                 abort ();
 379               }
 380 
 381             expected[0] = 0xC3;
 382             expected[1] = 0x80;
 383             p = expected + 2;
 384             for (i = 0; i < m1; i++)
 385               {
 386                 *p++ = 0xCC;
 387                 *p++ = 0x99;
 388               }
 389             for (i = 0; i < m2 - 1; i++)
 390               {
 391                 *p++ = 0xCC;
 392                 *p++ = 0x80;
 393               }
 394 
 395             for (; repeat > 0; repeat--)
 396               {
 397                 ASSERT (check (input, 2 * m - 1,    expected, 2 * m - 2) == 0);
 398                 ASSERT (check (expected, 2 * m - 2, expected, 2 * m - 2) == 0);
 399               }
 400 
 401             free (input);
 402           }
 403       }
 404   }
 405 }
 406 
 407 #else
 408 
 409 void
 410 test_u8_nfkc (void)
     /* [previous][next][first][last][top][bottom][index][help] */
 411 {
 412 }
 413 
 414 #endif

/* [previous][next][first][last][top][bottom][index][help] */