root/maint/gnulib/tests/uninorm/test-u32-nfc.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. check
  2. test_u32_nfc
  3. test_u32_nfc

   1 /* Test of canonical normalization of UTF-32 strings.
   2    Copyright (C) 2009-2021 Free Software Foundation, Inc.
   3 
   4    This program is free software: you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation; either version 3 of the License, or
   7    (at your option) any later version.
   8 
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13 
  14    You should have received a copy of the GNU General Public License
  15    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
  16 
  17 /* Written by Bruno Haible <bruno@clisp.org>, 2009.  */
  18 
  19 #include <config.h>
  20 
  21 #if GNULIB_TEST_UNINORM_U32_NORMALIZE
  22 
  23 #include "uninorm.h"
  24 
  25 #include <signal.h>
  26 #include <stdlib.h>
  27 #include <unistd.h>
  28 
  29 #include "unistr.h"
  30 #include "macros.h"
  31 
  32 static int
  33 check (const uint32_t *input, size_t input_length,
     /* [previous][next][first][last][top][bottom][index][help] */
  34        const uint32_t *expected, size_t expected_length)
  35 {
  36   size_t length;
  37   uint32_t *result;
  38 
  39   /* Test return conventions with resultbuf == NULL.  */
  40   result = u32_normalize (UNINORM_NFC, input, input_length, NULL, &length);
  41   if (!(result != NULL))
  42     return 1;
  43   if (!(length == expected_length))
  44     return 2;
  45   if (!(u32_cmp (result, expected, expected_length) == 0))
  46     return 3;
  47   free (result);
  48 
  49   /* Test return conventions with resultbuf too small.  */
  50   if (expected_length > 0)
  51     {
  52       uint32_t *preallocated;
  53 
  54       length = expected_length - 1;
  55       preallocated = (uint32_t *) malloc (length * sizeof (uint32_t));
  56       result = u32_normalize (UNINORM_NFC, input, input_length, preallocated, &length);
  57       if (!(result != NULL))
  58         return 4;
  59       if (!(result != preallocated))
  60         return 5;
  61       if (!(length == expected_length))
  62         return 6;
  63       if (!(u32_cmp (result, expected, expected_length) == 0))
  64         return 7;
  65       free (result);
  66       free (preallocated);
  67     }
  68 
  69   /* Test return conventions with resultbuf large enough.  */
  70   {
  71     uint32_t *preallocated;
  72 
  73     length = expected_length;
  74     preallocated = (uint32_t *) malloc (length * sizeof (uint32_t));
  75     result = u32_normalize (UNINORM_NFC, input, input_length, preallocated, &length);
  76     if (!(result != NULL))
  77       return 8;
  78     if (!(preallocated == NULL || result == preallocated))
  79       return 9;
  80     if (!(length == expected_length))
  81       return 10;
  82     if (!(u32_cmp (result, expected, expected_length) == 0))
  83       return 11;
  84     free (preallocated);
  85   }
  86 
  87   return 0;
  88 }
  89 
  90 void
  91 test_u32_nfc (void)
     /* [previous][next][first][last][top][bottom][index][help] */
  92 {
  93   { /* Empty string.  */
  94     ASSERT (check (NULL, 0, NULL, 0) == 0);
  95   }
  96   { /* SPACE */
  97     static const uint32_t input[]    = { 0x0020 };
  98     ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
  99   }
 100 
 101   { /* LATIN CAPITAL LETTER A WITH DIAERESIS */
 102     static const uint32_t input[]      = { 0x00C4 };
 103     static const uint32_t decomposed[] = { 0x0041, 0x0308 };
 104     ASSERT (check (input, SIZEOF (input),           input, SIZEOF (input)) == 0);
 105     ASSERT (check (decomposed, SIZEOF (decomposed), input, SIZEOF (input)) == 0);
 106   }
 107 
 108   { /* LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON */
 109     static const uint32_t input[]      = { 0x01DE };
 110     static const uint32_t decomposed[] = { 0x0041, 0x0308, 0x0304 };
 111     ASSERT (check (input, SIZEOF (input),           input, SIZEOF (input)) == 0);
 112     ASSERT (check (decomposed, SIZEOF (decomposed), input, SIZEOF (input)) == 0);
 113   }
 114 
 115   { /* ANGSTROM SIGN */
 116     static const uint32_t input[]      = { 0x212B };
 117     static const uint32_t decomposed[] = { 0x0041, 0x030A };
 118     static const uint32_t expected[]   = { 0x00C5 };
 119     ASSERT (check (input, SIZEOF (input),           expected, SIZEOF (expected)) == 0);
 120     ASSERT (check (decomposed, SIZEOF (decomposed), expected, SIZEOF (expected)) == 0);
 121     ASSERT (check (expected, SIZEOF (expected),     expected, SIZEOF (expected)) == 0);
 122   }
 123 
 124   { /* GREEK DIALYTIKA AND PERISPOMENI */
 125     static const uint32_t input[]      = { 0x1FC1 };
 126     ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
 127   }
 128 
 129   { /* SCRIPT SMALL L */
 130     static const uint32_t input[]      = { 0x2113 };
 131     ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
 132   }
 133 
 134   { /* NO-BREAK SPACE */
 135     static const uint32_t input[]      = { 0x00A0 };
 136     ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
 137   }
 138 
 139   { /* ARABIC LETTER VEH INITIAL FORM */
 140     static const uint32_t input[]      = { 0xFB6C };
 141     ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
 142   }
 143 
 144   { /* ARABIC LETTER VEH MEDIAL FORM */
 145     static const uint32_t input[]      = { 0xFB6D };
 146     ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
 147   }
 148 
 149   { /* ARABIC LETTER VEH FINAL FORM */
 150     static const uint32_t input[]      = { 0xFB6B };
 151     ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
 152   }
 153 
 154   { /* ARABIC LETTER VEH ISOLATED FORM */
 155     static const uint32_t input[]      = { 0xFB6A };
 156     ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
 157   }
 158 
 159   { /* CIRCLED NUMBER FIFTEEN */
 160     static const uint32_t input[]      = { 0x246E };
 161     ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
 162   }
 163 
 164   { /* TRADE MARK SIGN */
 165     static const uint32_t input[]      = { 0x2122 };
 166     ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
 167   }
 168 
 169   { /* LATIN SUBSCRIPT SMALL LETTER I */
 170     static const uint32_t input[]      = { 0x1D62 };
 171     ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
 172   }
 173 
 174   { /* PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS */
 175     static const uint32_t input[]      = { 0xFE35 };
 176     ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
 177   }
 178 
 179   { /* FULLWIDTH LATIN CAPITAL LETTER A */
 180     static const uint32_t input[]      = { 0xFF21 };
 181     ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
 182   }
 183 
 184   { /* HALFWIDTH IDEOGRAPHIC COMMA */
 185     static const uint32_t input[]      = { 0xFF64 };
 186     ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
 187   }
 188 
 189   { /* SMALL IDEOGRAPHIC COMMA */
 190     static const uint32_t input[]      = { 0xFE51 };
 191     ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
 192   }
 193 
 194   { /* SQUARE MHZ */
 195     static const uint32_t input[]      = { 0x3392 };
 196     ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
 197   }
 198 
 199   { /* VULGAR FRACTION THREE EIGHTHS */
 200     static const uint32_t input[]      = { 0x215C };
 201     ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
 202   }
 203 
 204   { /* MICRO SIGN */
 205     static const uint32_t input[]      = { 0x00B5 };
 206     ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
 207   }
 208 
 209   { /* ARABIC LIGATURE SALLALLAHOU ALAYHE WASALLAM */
 210     static const uint32_t input[]      = { 0xFDFA };
 211     ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
 212   }
 213 
 214   { /* HANGUL SYLLABLE GEUL */
 215     static const uint32_t input[]      = { 0xAE00 };
 216     static const uint32_t decomposed[] = { 0x1100, 0x1173, 0x11AF };
 217     ASSERT (check (input, SIZEOF (input),           input, SIZEOF (input)) == 0);
 218     ASSERT (check (decomposed, SIZEOF (decomposed), input, SIZEOF (input)) == 0);
 219   }
 220 
 221   { /* HANGUL SYLLABLE GEU */
 222     static const uint32_t input[]      = { 0xADF8 };
 223     static const uint32_t decomposed[] = { 0x1100, 0x1173 };
 224     ASSERT (check (input, SIZEOF (input),           input, SIZEOF (input)) == 0);
 225     ASSERT (check (decomposed, SIZEOF (decomposed), input, SIZEOF (input)) == 0);
 226   }
 227 
 228   { /* "Grüß Gott. Здравствуйте! x=(-b±sqrt(b²-4ac))/(2a)  日本語,中文,한글" */
 229     static const uint32_t input[] =
 230       { 'G', 'r', 0x00FC, 0x00DF, ' ', 'G', 'o', 't', 't', '.', ' ',
 231         0x0417, 0x0434, 0x0440, 0x0430, 0x0432, 0x0441, 0x0442, 0x0432, 0x0443,
 232         0x0439, 0x0442, 0x0435, '!', ' ',
 233         'x', '=', '(', '-', 'b', 0x00B1, 's', 'q', 'r', 't', '(', 'b', 0x00B2,
 234         '-', '4', 'a', 'c', ')', ')', '/', '(', '2', 'a', ')', ' ', ' ',
 235         0x65E5, 0x672C, 0x8A9E, ',', 0x4E2D, 0x6587, ',', 0xD55C, 0xAE00, '\n'
 236       };
 237     static const uint32_t decomposed[] =
 238       { 'G', 'r', 0x0075, 0x0308, 0x00DF, ' ', 'G', 'o', 't', 't', '.', ' ',
 239         0x0417, 0x0434, 0x0440, 0x0430, 0x0432, 0x0441, 0x0442, 0x0432, 0x0443,
 240         0x0438, 0x0306, 0x0442, 0x0435, '!', ' ',
 241         'x', '=', '(', '-', 'b', 0x00B1, 's', 'q', 'r', 't', '(', 'b', 0x00B2,
 242         '-', '4', 'a', 'c', ')', ')', '/', '(', '2', 'a', ')', ' ', ' ',
 243         0x65E5, 0x672C, 0x8A9E, ',', 0x4E2D, 0x6587, ',',
 244         0x1112, 0x1161, 0x11AB, 0x1100, 0x1173, 0x11AF, '\n'
 245       };
 246     ASSERT (check (input, SIZEOF (input),           input, SIZEOF (input)) == 0);
 247     ASSERT (check (decomposed, SIZEOF (decomposed), input, SIZEOF (input)) == 0);
 248   }
 249 
 250 #if HAVE_DECL_ALARM
 251   /* Declare failure if test takes too long, by using default abort
 252      caused by SIGALRM.  */
 253   signal (SIGALRM, SIG_DFL);
 254   alarm (50);
 255 #endif
 256 
 257   /* Check that the sorting is not O(n²) but O(n log n).  */
 258   {
 259     int pass;
 260     for (pass = 0; pass < 3; pass++)
 261       {
 262         size_t repeat = 1;
 263         size_t m = 100000;
 264         uint32_t *input = (uint32_t *) malloc (2 * m * sizeof (uint32_t));
 265         if (input != NULL)
 266           {
 267             uint32_t *expected = input + m;
 268             size_t m1 = m / 2;
 269             size_t m2 = (m - 1) / 2;
 270             /* NB: m1 + m2 == m - 1.  */
 271             uint32_t *p;
 272             size_t i;
 273 
 274             input[0] = 0x0041;
 275             p = input + 1;
 276             switch (pass)
 277               {
 278               case 0:
 279                 for (i = 0; i < m1; i++)
 280                   *p++ = 0x0319;
 281                 for (i = 0; i < m2; i++)
 282                   *p++ = 0x0300;
 283                 break;
 284 
 285               case 1:
 286                 for (i = 0; i < m2; i++)
 287                   *p++ = 0x0300;
 288                 for (i = 0; i < m1; i++)
 289                   *p++ = 0x0319;
 290                 break;
 291 
 292               case 2:
 293                 for (i = 0; i < m2; i++)
 294                   {
 295                     *p++ = 0x0319;
 296                     *p++ = 0x0300;
 297                   }
 298                 for (; i < m1; i++)
 299                   *p++ = 0x0319;
 300                 break;
 301 
 302               default:
 303                 abort ();
 304               }
 305 
 306             expected[0] = 0x00C0;
 307             p = expected + 1;
 308             for (i = 0; i < m1; i++)
 309               *p++ = 0x0319;
 310             for (i = 0; i < m2 - 1; i++)
 311               *p++ = 0x0300;
 312 
 313             for (; repeat > 0; repeat--)
 314               {
 315                 ASSERT (check (input, m,        expected, m - 1) == 0);
 316                 ASSERT (check (expected, m - 1, expected, m - 1) == 0);
 317               }
 318 
 319             free (input);
 320           }
 321       }
 322   }
 323 }
 324 
 325 #else
 326 
 327 void
 328 test_u32_nfc (void)
     /* [previous][next][first][last][top][bottom][index][help] */
 329 {
 330 }
 331 
 332 #endif

/* [previous][next][first][last][top][bottom][index][help] */