root/maint/gnulib/tests/uninorm/test-u8-nfc.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. check
  2. test_u8_nfc
  3. test_u8_nfc

   1 /* Test of canonical normalization of UTF-8 strings.
   2    Copyright (C) 2009-2021 Free Software Foundation, Inc.
   3 
   4    This program is free software: you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation; either version 3 of the License, or
   7    (at your option) any later version.
   8 
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13 
  14    You should have received a copy of the GNU General Public License
  15    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
  16 
  17 /* Written by Bruno Haible <bruno@clisp.org>, 2009.  */
  18 
  19 #include <config.h>
  20 
  21 #if GNULIB_TEST_UNINORM_U8_NORMALIZE
  22 
  23 #include "uninorm.h"
  24 
  25 #include <signal.h>
  26 #include <stdlib.h>
  27 #include <unistd.h>
  28 
  29 #include "unistr.h"
  30 #include "macros.h"
  31 
  32 static int
  33 check (const uint8_t *input, size_t input_length,
     /* [previous][next][first][last][top][bottom][index][help] */
  34        const uint8_t *expected, size_t expected_length)
  35 {
  36   size_t length;
  37   uint8_t *result;
  38 
  39   /* Test return conventions with resultbuf == NULL.  */
  40   result = u8_normalize (UNINORM_NFC, input, input_length, NULL, &length);
  41   if (!(result != NULL))
  42     return 1;
  43   if (!(length == expected_length))
  44     return 2;
  45   if (!(u8_cmp (result, expected, expected_length) == 0))
  46     return 3;
  47   free (result);
  48 
  49   /* Test return conventions with resultbuf too small.  */
  50   if (expected_length > 0)
  51     {
  52       uint8_t *preallocated;
  53 
  54       length = expected_length - 1;
  55       preallocated = (uint8_t *) malloc (length * sizeof (uint8_t));
  56       result = u8_normalize (UNINORM_NFC, input, input_length, preallocated, &length);
  57       if (!(result != NULL))
  58         return 4;
  59       if (!(result != preallocated))
  60         return 5;
  61       if (!(length == expected_length))
  62         return 6;
  63       if (!(u8_cmp (result, expected, expected_length) == 0))
  64         return 7;
  65       free (result);
  66       free (preallocated);
  67     }
  68 
  69   /* Test return conventions with resultbuf large enough.  */
  70   {
  71     uint8_t *preallocated;
  72 
  73     length = expected_length;
  74     preallocated = (uint8_t *) malloc (length * sizeof (uint8_t));
  75     result = u8_normalize (UNINORM_NFC, input, input_length, preallocated, &length);
  76     if (!(result != NULL))
  77       return 8;
  78     if (!(preallocated == NULL || result == preallocated))
  79       return 9;
  80     if (!(length == expected_length))
  81       return 10;
  82     if (!(u8_cmp (result, expected, expected_length) == 0))
  83       return 11;
  84     free (preallocated);
  85   }
  86 
  87   return 0;
  88 }
  89 
  90 void
  91 test_u8_nfc (void)
     /* [previous][next][first][last][top][bottom][index][help] */
  92 {
  93   { /* Empty string.  */
  94     ASSERT (check (NULL, 0, NULL, 0) == 0);
  95   }
  96   { /* SPACE */
  97     static const uint8_t input[]    = { 0x20 };
  98     ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
  99   }
 100 
 101   { /* LATIN CAPITAL LETTER A WITH DIAERESIS */
 102     static const uint8_t input[]      = { 0xC3, 0x84 };
 103     static const uint8_t decomposed[] = { 0x41, 0xCC, 0x88 };
 104     ASSERT (check (input, SIZEOF (input),           input, SIZEOF (input)) == 0);
 105     ASSERT (check (decomposed, SIZEOF (decomposed), input, SIZEOF (input)) == 0);
 106   }
 107 
 108   { /* LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON */
 109     static const uint8_t input[]      = { 0xC7, 0x9E };
 110     static const uint8_t decomposed[] = { 0x41, 0xCC, 0x88, 0xCC, 0x84 };
 111     ASSERT (check (input, SIZEOF (input),           input, SIZEOF (input)) == 0);
 112     ASSERT (check (decomposed, SIZEOF (decomposed), input, SIZEOF (input)) == 0);
 113   }
 114 
 115   { /* ANGSTROM SIGN */
 116     static const uint8_t input[]      = { 0xE2, 0x84, 0xAB };
 117     static const uint8_t decomposed[] = { 0x41, 0xCC, 0x8A };
 118     static const uint8_t expected[]   = { 0xC3, 0x85 };
 119     ASSERT (check (input, SIZEOF (input),           expected, SIZEOF (expected)) == 0);
 120     ASSERT (check (decomposed, SIZEOF (decomposed), expected, SIZEOF (expected)) == 0);
 121     ASSERT (check (expected, SIZEOF (expected),     expected, SIZEOF (expected)) == 0);
 122   }
 123 
 124   { /* GREEK DIALYTIKA AND PERISPOMENI */
 125     static const uint8_t input[]      = { 0xE1, 0xBF, 0x81 };
 126     ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
 127   }
 128 
 129   { /* SCRIPT SMALL L */
 130     static const uint8_t input[]      = { 0xE2, 0x84, 0x93 };
 131     ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
 132   }
 133 
 134   { /* NO-BREAK SPACE */
 135     static const uint8_t input[]      = { 0xC2, 0xA0 };
 136     ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
 137   }
 138 
 139   { /* ARABIC LETTER VEH INITIAL FORM */
 140     static const uint8_t input[]      = { 0xEF, 0xAD, 0xAC };
 141     ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
 142   }
 143 
 144   { /* ARABIC LETTER VEH MEDIAL FORM */
 145     static const uint8_t input[]      = { 0xEF, 0xAD, 0xAD };
 146     ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
 147   }
 148 
 149   { /* ARABIC LETTER VEH FINAL FORM */
 150     static const uint8_t input[]      = { 0xEF, 0xAD, 0xAB };
 151     ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
 152   }
 153 
 154   { /* ARABIC LETTER VEH ISOLATED FORM */
 155     static const uint8_t input[]      = { 0xEF, 0xAD, 0xAA };
 156     ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
 157   }
 158 
 159   { /* CIRCLED NUMBER FIFTEEN */
 160     static const uint8_t input[]      = { 0xE2, 0x91, 0xAE };
 161     ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
 162   }
 163 
 164   { /* TRADE MARK SIGN */
 165     static const uint8_t input[]      = { 0xE2, 0x84, 0xA2 };
 166     ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
 167   }
 168 
 169   { /* LATIN SUBSCRIPT SMALL LETTER I */
 170     static const uint8_t input[]      = { 0xE1, 0xB5, 0xA2 };
 171     ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
 172   }
 173 
 174   { /* PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS */
 175     static const uint8_t input[]      = { 0xEF, 0xB8, 0xB5 };
 176     ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
 177   }
 178 
 179   { /* FULLWIDTH LATIN CAPITAL LETTER A */
 180     static const uint8_t input[]      = { 0xEF, 0xBC, 0xA1 };
 181     ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
 182   }
 183 
 184   { /* HALFWIDTH IDEOGRAPHIC COMMA */
 185     static const uint8_t input[]      = { 0xEF, 0xBD, 0xA4 };
 186     ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
 187   }
 188 
 189   { /* SMALL IDEOGRAPHIC COMMA */
 190     static const uint8_t input[]      = { 0xEF, 0xB9, 0x91 };
 191     ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
 192   }
 193 
 194   { /* SQUARE MHZ */
 195     static const uint8_t input[]      = { 0xE3, 0x8E, 0x92 };
 196     ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
 197   }
 198 
 199   { /* VULGAR FRACTION THREE EIGHTHS */
 200     static const uint8_t input[]      = { 0xE2, 0x85, 0x9C };
 201     ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
 202   }
 203 
 204   { /* MICRO SIGN */
 205     static const uint8_t input[]      = { 0xC2, 0xB5 };
 206     ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
 207   }
 208 
 209   { /* ARABIC LIGATURE SALLALLAHOU ALAYHE WASALLAM */
 210     static const uint8_t input[]      = { 0xEF, 0xB7, 0xBA };
 211     ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
 212   }
 213 
 214   { /* HANGUL SYLLABLE GEUL */
 215     static const uint8_t input[]      = { 0xEA, 0xB8, 0x80 };
 216     static const uint8_t decomposed[] =
 217       { 0xE1, 0x84, 0x80, 0xE1, 0x85, 0xB3, 0xE1, 0x86, 0xAF };
 218     ASSERT (check (input, SIZEOF (input),           input, SIZEOF (input)) == 0);
 219     ASSERT (check (decomposed, SIZEOF (decomposed), input, SIZEOF (input)) == 0);
 220   }
 221 
 222   { /* HANGUL SYLLABLE GEU */
 223     static const uint8_t input[]      = { 0xEA, 0xB7, 0xB8 };
 224     static const uint8_t decomposed[] = { 0xE1, 0x84, 0x80, 0xE1, 0x85, 0xB3 };
 225     ASSERT (check (input, SIZEOF (input),           input, SIZEOF (input)) == 0);
 226     ASSERT (check (decomposed, SIZEOF (decomposed), input, SIZEOF (input)) == 0);
 227   }
 228 
 229   { /* "Grüß Gott. Здравствуйте! x=(-b±sqrt(b²-4ac))/(2a)  日本語,中文,한글" */
 230     static const uint8_t input[] =
 231       { 'G', 'r', 0xC3, 0xBC, 0xC3, 0x9F, ' ', 'G', 'o', 't', 't', '.',
 232         ' ', 0xD0, 0x97, 0xD0, 0xB4, 0xD1, 0x80, 0xD0, 0xB0, 0xD0, 0xB2, 0xD1,
 233         0x81, 0xD1, 0x82, 0xD0, 0xB2, 0xD1, 0x83, 0xD0, 0xB9,
 234         0xD1, 0x82, 0xD0, 0xB5, '!', ' ', 'x', '=', '(', '-', 'b', 0xC2, 0xB1,
 235         's', 'q', 'r', 't', '(', 'b', 0xC2, 0xB2, '-', '4', 'a', 'c', ')', ')',
 236         '/', '(', '2', 'a', ')', ' ', ' ', 0xE6, 0x97, 0xA5, 0xE6, 0x9C, 0xAC,
 237         0xE8, 0xAA, 0x9E, ',', 0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87, ',',
 238         0xED, 0x95, 0x9C,
 239         0xEA, 0xB8, 0x80, '\n'
 240       };
 241     static const uint8_t decomposed[] =
 242       { 'G', 'r', 0x75, 0xCC, 0x88, 0xC3, 0x9F, ' ', 'G', 'o', 't', 't', '.',
 243         ' ', 0xD0, 0x97, 0xD0, 0xB4, 0xD1, 0x80, 0xD0, 0xB0, 0xD0, 0xB2, 0xD1,
 244         0x81, 0xD1, 0x82, 0xD0, 0xB2, 0xD1, 0x83, 0xD0, 0xB8, 0xCC, 0x86,
 245         0xD1, 0x82, 0xD0, 0xB5, '!', ' ', 'x', '=', '(', '-', 'b', 0xC2, 0xB1,
 246         's', 'q', 'r', 't', '(', 'b', 0xC2, 0xB2, '-', '4', 'a', 'c', ')', ')',
 247         '/', '(', '2', 'a', ')', ' ', ' ', 0xE6, 0x97, 0xA5, 0xE6, 0x9C, 0xAC,
 248         0xE8, 0xAA, 0x9E, ',', 0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87, ',',
 249         0xE1, 0x84, 0x92, 0xE1, 0x85, 0xA1, 0xE1, 0x86, 0xAB,
 250         0xE1, 0x84, 0x80, 0xE1, 0x85, 0xB3, 0xE1, 0x86, 0xAF, '\n'
 251       };
 252     ASSERT (check (input, SIZEOF (input),           input, SIZEOF (input)) == 0);
 253     ASSERT (check (decomposed, SIZEOF (decomposed), input, SIZEOF (input)) == 0);
 254   }
 255 
 256 #if HAVE_DECL_ALARM
 257   /* Declare failure if test takes too long, by using default abort
 258      caused by SIGALRM.  */
 259   signal (SIGALRM, SIG_DFL);
 260   alarm (50);
 261 #endif
 262 
 263   /* Check that the sorting is not O(n²) but O(n log n).  */
 264   {
 265     int pass;
 266     for (pass = 0; pass < 3; pass++)
 267       {
 268         size_t repeat = 1;
 269         size_t m = 100000;
 270         uint8_t *input = (uint8_t *) malloc (2 * (2 * m - 1) * sizeof (uint8_t));
 271         if (input != NULL)
 272           {
 273             uint8_t *expected = input + (2 * m - 1);
 274             size_t m1 = m / 2;
 275             size_t m2 = (m - 1) / 2;
 276             /* NB: m1 + m2 == m - 1.  */
 277             uint8_t *p;
 278             size_t i;
 279 
 280             input[0] = 0x41;
 281             p = input + 1;
 282             switch (pass)
 283               {
 284               case 0:
 285                 for (i = 0; i < m1; i++)
 286                   {
 287                     *p++ = 0xCC;
 288                     *p++ = 0x99;
 289                   }
 290                 for (i = 0; i < m2; i++)
 291                   {
 292                     *p++ = 0xCC;
 293                     *p++ = 0x80;
 294                   }
 295                 break;
 296 
 297               case 1:
 298                 for (i = 0; i < m2; i++)
 299                   {
 300                     *p++ = 0xCC;
 301                     *p++ = 0x80;
 302                   }
 303                 for (i = 0; i < m1; i++)
 304                   {
 305                     *p++ = 0xCC;
 306                     *p++ = 0x99;
 307                   }
 308                 break;
 309 
 310               case 2:
 311                 for (i = 0; i < m2; i++)
 312                   {
 313                     *p++ = 0xCC;
 314                     *p++ = 0x99;
 315                     *p++ = 0xCC;
 316                     *p++ = 0x80;
 317                   }
 318                 for (; i < m1; i++)
 319                   {
 320                     *p++ = 0xCC;
 321                     *p++ = 0x99;
 322                   }
 323                 break;
 324 
 325               default:
 326                 abort ();
 327               }
 328 
 329             expected[0] = 0xC3;
 330             expected[1] = 0x80;
 331             p = expected + 2;
 332             for (i = 0; i < m1; i++)
 333               {
 334                 *p++ = 0xCC;
 335                 *p++ = 0x99;
 336               }
 337             for (i = 0; i < m2 - 1; i++)
 338               {
 339                 *p++ = 0xCC;
 340                 *p++ = 0x80;
 341               }
 342 
 343             for (; repeat > 0; repeat--)
 344               {
 345                 ASSERT (check (input, 2 * m - 1,    expected, 2 * m - 2) == 0);
 346                 ASSERT (check (expected, 2 * m - 2, expected, 2 * m - 2) == 0);
 347               }
 348 
 349             free (input);
 350           }
 351       }
 352   }
 353 }
 354 
 355 #else
 356 
 357 void
 358 test_u8_nfc (void)
     /* [previous][next][first][last][top][bottom][index][help] */
 359 {
 360 }
 361 
 362 #endif

/* [previous][next][first][last][top][bottom][index][help] */