root/maint/gnulib/tests/uninorm/test-u8-nfkd.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. check
  2. test_u8_nfkd
  3. test_u8_nfkd

   1 /* Test of compatibility decomposition of UTF-8 strings.
   2    Copyright (C) 2009-2021 Free Software Foundation, Inc.
   3 
   4    This program is free software: you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation; either version 3 of the License, or
   7    (at your option) any later version.
   8 
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13 
  14    You should have received a copy of the GNU General Public License
  15    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
  16 
  17 /* Written by Bruno Haible <bruno@clisp.org>, 2009.  */
  18 
  19 #include <config.h>
  20 
  21 #if GNULIB_TEST_UNINORM_U8_NORMALIZE
  22 
  23 #include "uninorm.h"
  24 
  25 #include <signal.h>
  26 #include <stdlib.h>
  27 #include <unistd.h>
  28 
  29 #include "unistr.h"
  30 #include "macros.h"
  31 
  32 static int
  33 check (const uint8_t *input, size_t input_length,
     /* [previous][next][first][last][top][bottom][index][help] */
  34        const uint8_t *expected, size_t expected_length)
  35 {
  36   size_t length;
  37   uint8_t *result;
  38 
  39   /* Test return conventions with resultbuf == NULL.  */
  40   result = u8_normalize (UNINORM_NFKD, input, input_length, NULL, &length);
  41   if (!(result != NULL))
  42     return 1;
  43   if (!(length == expected_length))
  44     return 2;
  45   if (!(u8_cmp (result, expected, expected_length) == 0))
  46     return 3;
  47   free (result);
  48 
  49   /* Test return conventions with resultbuf too small.  */
  50   if (expected_length > 0)
  51     {
  52       uint8_t *preallocated;
  53 
  54       length = expected_length - 1;
  55       preallocated = (uint8_t *) malloc (length * sizeof (uint8_t));
  56       result = u8_normalize (UNINORM_NFKD, input, input_length, preallocated, &length);
  57       if (!(result != NULL))
  58         return 4;
  59       if (!(result != preallocated))
  60         return 5;
  61       if (!(length == expected_length))
  62         return 6;
  63       if (!(u8_cmp (result, expected, expected_length) == 0))
  64         return 7;
  65       free (result);
  66       free (preallocated);
  67     }
  68 
  69   /* Test return conventions with resultbuf large enough.  */
  70   {
  71     uint8_t *preallocated;
  72 
  73     length = expected_length;
  74     preallocated = (uint8_t *) malloc (length * sizeof (uint8_t));
  75     result = u8_normalize (UNINORM_NFKD, input, input_length, preallocated, &length);
  76     if (!(result != NULL))
  77       return 8;
  78     if (!(preallocated == NULL || result == preallocated))
  79       return 9;
  80     if (!(length == expected_length))
  81       return 10;
  82     if (!(u8_cmp (result, expected, expected_length) == 0))
  83       return 11;
  84     free (preallocated);
  85   }
  86 
  87   return 0;
  88 }
  89 
  90 void
  91 test_u8_nfkd (void)
     /* [previous][next][first][last][top][bottom][index][help] */
  92 {
  93   { /* Empty string.  */
  94     ASSERT (check (NULL, 0, NULL, 0) == 0);
  95   }
  96   { /* SPACE */
  97     static const uint8_t input[]    = { 0x20 };
  98     ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
  99   }
 100 
 101   { /* LATIN CAPITAL LETTER A WITH DIAERESIS */
 102     static const uint8_t input[]    = { 0xC3, 0x84 };
 103     static const uint8_t expected[] = { 0x41, 0xCC, 0x88 };
 104     ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
 105   }
 106 
 107   { /* LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON */
 108     static const uint8_t input[]    = { 0xC7, 0x9E };
 109     static const uint8_t expected[] = { 0x41, 0xCC, 0x88, 0xCC, 0x84 };
 110     ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
 111   }
 112 
 113   { /* GREEK DIALYTIKA AND PERISPOMENI */
 114     static const uint8_t input[]    = { 0xE1, 0xBF, 0x81 };
 115     static const uint8_t expected[] = { 0x20, 0xCC, 0x88, 0xCD, 0x82 };
 116     ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
 117   }
 118 
 119   { /* SCRIPT SMALL L */
 120     static const uint8_t input[]    = { 0xE2, 0x84, 0x93 };
 121     static const uint8_t expected[] = { 0x6C };
 122     ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
 123   }
 124 
 125   { /* NO-BREAK SPACE */
 126     static const uint8_t input[]    = { 0xC2, 0xA0 };
 127     static const uint8_t expected[] = { 0x20 };
 128     ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
 129   }
 130 
 131   { /* ARABIC LETTER VEH INITIAL FORM */
 132     static const uint8_t input[]    = { 0xEF, 0xAD, 0xAC };
 133     static const uint8_t expected[] = { 0xDA, 0xA4 };
 134     ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
 135   }
 136 
 137   { /* ARABIC LETTER VEH MEDIAL FORM */
 138     static const uint8_t input[]    = { 0xEF, 0xAD, 0xAD };
 139     static const uint8_t expected[] = { 0xDA, 0xA4 };
 140     ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
 141   }
 142 
 143   { /* ARABIC LETTER VEH FINAL FORM */
 144     static const uint8_t input[]    = { 0xEF, 0xAD, 0xAB };
 145     static const uint8_t expected[] = { 0xDA, 0xA4 };
 146     ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
 147   }
 148 
 149   { /* ARABIC LETTER VEH ISOLATED FORM */
 150     static const uint8_t input[]    = { 0xEF, 0xAD, 0xAA };
 151     static const uint8_t expected[] = { 0xDA, 0xA4 };
 152     ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
 153   }
 154 
 155   { /* CIRCLED NUMBER FIFTEEN */
 156     static const uint8_t input[]    = { 0xE2, 0x91, 0xAE };
 157     static const uint8_t expected[] = { 0x31, 0x35 };
 158     ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
 159   }
 160 
 161   { /* TRADE MARK SIGN */
 162     static const uint8_t input[]    = { 0xE2, 0x84, 0xA2 };
 163     static const uint8_t expected[] = { 0x54, 0x4D };
 164     ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
 165   }
 166 
 167   { /* LATIN SUBSCRIPT SMALL LETTER I */
 168     static const uint8_t input[]    = { 0xE1, 0xB5, 0xA2 };
 169     static const uint8_t expected[] = { 0x69 };
 170     ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
 171   }
 172 
 173   { /* PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS */
 174     static const uint8_t input[]    = { 0xEF, 0xB8, 0xB5 };
 175     static const uint8_t expected[] = { 0x28 };
 176     ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
 177   }
 178 
 179   { /* FULLWIDTH LATIN CAPITAL LETTER A */
 180     static const uint8_t input[]    = { 0xEF, 0xBC, 0xA1 };
 181     static const uint8_t expected[] = { 0x41 };
 182     ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
 183   }
 184 
 185   { /* HALFWIDTH IDEOGRAPHIC COMMA */
 186     static const uint8_t input[]    = { 0xEF, 0xBD, 0xA4 };
 187     static const uint8_t expected[] = { 0xE3, 0x80, 0x81 };
 188     ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
 189   }
 190 
 191   { /* SMALL IDEOGRAPHIC COMMA */
 192     static const uint8_t input[]    = { 0xEF, 0xB9, 0x91 };
 193     static const uint8_t expected[] = { 0xE3, 0x80, 0x81 };
 194     ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
 195   }
 196 
 197   { /* SQUARE MHZ */
 198     static const uint8_t input[]    = { 0xE3, 0x8E, 0x92 };
 199     static const uint8_t expected[] = { 0x4D, 0x48, 0x7A };
 200     ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
 201   }
 202 
 203   { /* VULGAR FRACTION THREE EIGHTHS */
 204     static const uint8_t input[]    = { 0xE2, 0x85, 0x9C };
 205     static const uint8_t expected[] = { 0x33, 0xE2, 0x81, 0x84, 0x38 };
 206     ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
 207   }
 208 
 209   { /* MICRO SIGN */
 210     static const uint8_t input[]    = { 0xC2, 0xB5 };
 211     static const uint8_t expected[] = { 0xCE, 0xBC };
 212     ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
 213   }
 214 
 215   { /* ARABIC LIGATURE SALLALLAHOU ALAYHE WASALLAM */
 216     static const uint8_t input[]    = { 0xEF, 0xB7, 0xBA };
 217     static const uint8_t expected[] =
 218       { 0xD8, 0xB5, 0xD9, 0x84, 0xD9, 0x89, 0x20, 0xD8, 0xA7, 0xD9, 0x84, 0xD9,
 219         0x84, 0xD9, 0x87, 0x20, 0xD8, 0xB9, 0xD9, 0x84, 0xD9, 0x8A, 0xD9, 0x87,
 220         0x20, 0xD9, 0x88, 0xD8, 0xB3, 0xD9, 0x84, 0xD9, 0x85
 221       };
 222     ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
 223   }
 224 
 225   { /* HANGUL SYLLABLE GEUL */
 226     static const uint8_t input[]    = { 0xEA, 0xB8, 0x80 };
 227     static const uint8_t expected[] =
 228       { 0xE1, 0x84, 0x80, 0xE1, 0x85, 0xB3, 0xE1, 0x86, 0xAF };
 229     ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
 230   }
 231 
 232   { /* HANGUL SYLLABLE GEU */
 233     static const uint8_t input[]    = { 0xEA, 0xB7, 0xB8 };
 234     static const uint8_t expected[] = { 0xE1, 0x84, 0x80, 0xE1, 0x85, 0xB3 };
 235     ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
 236   }
 237 
 238   { /* "Grüß Gott. Здравствуйте! x=(-b±sqrt(b²-4ac))/(2a)  日本語,中文,한글" */
 239     static const uint8_t input[] =
 240       { 'G', 'r', 0xC3, 0xBC, 0xC3, 0x9F, ' ', 'G', 'o', 't', 't', '.',
 241         ' ', 0xD0, 0x97, 0xD0, 0xB4, 0xD1, 0x80, 0xD0, 0xB0, 0xD0, 0xB2, 0xD1,
 242         0x81, 0xD1, 0x82, 0xD0, 0xB2, 0xD1, 0x83, 0xD0, 0xB9,
 243         0xD1, 0x82, 0xD0, 0xB5, '!', ' ', 'x', '=', '(', '-', 'b', 0xC2, 0xB1,
 244         's', 'q', 'r', 't', '(', 'b', 0xC2, 0xB2, '-', '4', 'a', 'c', ')', ')',
 245         '/', '(', '2', 'a', ')', ' ', ' ', 0xE6, 0x97, 0xA5, 0xE6, 0x9C, 0xAC,
 246         0xE8, 0xAA, 0x9E, ',', 0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87, ',',
 247         0xED, 0x95, 0x9C,
 248         0xEA, 0xB8, 0x80, '\n'
 249       };
 250     static const uint8_t expected[] =
 251       { 'G', 'r', 0x75, 0xCC, 0x88, 0xC3, 0x9F, ' ', 'G', 'o', 't', 't', '.',
 252         ' ', 0xD0, 0x97, 0xD0, 0xB4, 0xD1, 0x80, 0xD0, 0xB0, 0xD0, 0xB2, 0xD1,
 253         0x81, 0xD1, 0x82, 0xD0, 0xB2, 0xD1, 0x83, 0xD0, 0xB8, 0xCC, 0x86,
 254         0xD1, 0x82, 0xD0, 0xB5, '!', ' ', 'x', '=', '(', '-', 'b', 0xC2, 0xB1,
 255         's', 'q', 'r', 't', '(', 'b', 0x32, '-', '4', 'a', 'c', ')', ')',
 256         '/', '(', '2', 'a', ')', ' ', ' ', 0xE6, 0x97, 0xA5, 0xE6, 0x9C, 0xAC,
 257         0xE8, 0xAA, 0x9E, ',', 0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87, ',',
 258         0xE1, 0x84, 0x92, 0xE1, 0x85, 0xA1, 0xE1, 0x86, 0xAB,
 259         0xE1, 0x84, 0x80, 0xE1, 0x85, 0xB3, 0xE1, 0x86, 0xAF, '\n'
 260       };
 261     ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
 262   }
 263 
 264 #if HAVE_DECL_ALARM
 265   /* Declare failure if test takes too long, by using default abort
 266      caused by SIGALRM.  */
 267   signal (SIGALRM, SIG_DFL);
 268   alarm (50);
 269 #endif
 270 
 271   /* Check that the sorting is not O(n²) but O(n log n).  */
 272   {
 273     int pass;
 274     for (pass = 0; pass < 3; pass++)
 275       {
 276         size_t repeat = 1;
 277         size_t m = 100000;
 278         uint8_t *input = (uint8_t *) malloc (2 * (2 * m - 1) * sizeof (uint8_t));
 279         if (input != NULL)
 280           {
 281             uint8_t *expected = input + (2 * m - 1);
 282             size_t m1 = m / 2;
 283             size_t m2 = (m - 1) / 2;
 284             /* NB: m1 + m2 == m - 1.  */
 285             uint8_t *p;
 286             size_t i;
 287 
 288             input[0] = 0x41;
 289             p = input + 1;
 290             switch (pass)
 291               {
 292               case 0:
 293                 for (i = 0; i < m1; i++)
 294                   {
 295                     *p++ = 0xCC;
 296                     *p++ = 0x99;
 297                   }
 298                 for (i = 0; i < m2; i++)
 299                   {
 300                     *p++ = 0xCC;
 301                     *p++ = 0x80;
 302                   }
 303                 break;
 304 
 305               case 1:
 306                 for (i = 0; i < m2; i++)
 307                   {
 308                     *p++ = 0xCC;
 309                     *p++ = 0x80;
 310                   }
 311                 for (i = 0; i < m1; i++)
 312                   {
 313                     *p++ = 0xCC;
 314                     *p++ = 0x99;
 315                   }
 316                 break;
 317 
 318               case 2:
 319                 for (i = 0; i < m2; i++)
 320                   {
 321                     *p++ = 0xCC;
 322                     *p++ = 0x99;
 323                     *p++ = 0xCC;
 324                     *p++ = 0x80;
 325                   }
 326                 for (; i < m1; i++)
 327                   {
 328                     *p++ = 0xCC;
 329                     *p++ = 0x99;
 330                   }
 331                 break;
 332 
 333               default:
 334                 abort ();
 335               }
 336 
 337             expected[0] = 0x41;
 338             p = expected + 1;
 339             for (i = 0; i < m1; i++)
 340               {
 341                 *p++ = 0xCC;
 342                 *p++ = 0x99;
 343               }
 344             for (i = 0; i < m2; i++)
 345               {
 346                 *p++ = 0xCC;
 347                 *p++ = 0x80;
 348               }
 349 
 350             for (; repeat > 0; repeat--)
 351               ASSERT (check (input, 2 * m - 1, expected, 2 * m - 1) == 0);
 352 
 353             free (input);
 354           }
 355       }
 356   }
 357 }
 358 
 359 #else
 360 
 361 void
 362 test_u8_nfkd (void)
     /* [previous][next][first][last][top][bottom][index][help] */
 363 {
 364 }
 365 
 366 #endif

/* [previous][next][first][last][top][bottom][index][help] */