root/maint/gnulib/tests/uninorm/test-u8-nfd.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. check
  2. test_u8_nfd
  3. test_u8_nfd

   1 /* Test of canonical decomposition of UTF-8 strings.
   2    Copyright (C) 2009-2021 Free Software Foundation, Inc.
   3 
   4    This program is free software: you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation; either version 3 of the License, or
   7    (at your option) any later version.
   8 
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13 
  14    You should have received a copy of the GNU General Public License
  15    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
  16 
  17 /* Written by Bruno Haible <bruno@clisp.org>, 2009.  */
  18 
  19 #include <config.h>
  20 
  21 #if GNULIB_TEST_UNINORM_U8_NORMALIZE
  22 
  23 #include "uninorm.h"
  24 
  25 #include <signal.h>
  26 #include <stdlib.h>
  27 #include <unistd.h>
  28 
  29 #include "unistr.h"
  30 #include "macros.h"
  31 
  32 static int
  33 check (const uint8_t *input, size_t input_length,
     /* [previous][next][first][last][top][bottom][index][help] */
  34        const uint8_t *expected, size_t expected_length)
  35 {
  36   size_t length;
  37   uint8_t *result;
  38 
  39   /* Test return conventions with resultbuf == NULL.  */
  40   result = u8_normalize (UNINORM_NFD, input, input_length, NULL, &length);
  41   if (!(result != NULL))
  42     return 1;
  43   if (!(length == expected_length))
  44     return 2;
  45   if (!(u8_cmp (result, expected, expected_length) == 0))
  46     return 3;
  47   free (result);
  48 
  49   /* Test return conventions with resultbuf too small.  */
  50   if (expected_length > 0)
  51     {
  52       uint8_t *preallocated;
  53 
  54       length = expected_length - 1;
  55       preallocated = (uint8_t *) malloc (length * sizeof (uint8_t));
  56       result = u8_normalize (UNINORM_NFD, input, input_length, preallocated, &length);
  57       if (!(result != NULL))
  58         return 4;
  59       if (!(result != preallocated))
  60         return 5;
  61       if (!(length == expected_length))
  62         return 6;
  63       if (!(u8_cmp (result, expected, expected_length) == 0))
  64         return 7;
  65       free (result);
  66       free (preallocated);
  67     }
  68 
  69   /* Test return conventions with resultbuf large enough.  */
  70   {
  71     uint8_t *preallocated;
  72 
  73     length = expected_length;
  74     preallocated = (uint8_t *) malloc (length * sizeof (uint8_t));
  75     result = u8_normalize (UNINORM_NFD, input, input_length, preallocated, &length);
  76     if (!(result != NULL))
  77       return 8;
  78     if (!(preallocated == NULL || result == preallocated))
  79       return 9;
  80     if (!(length == expected_length))
  81       return 10;
  82     if (!(u8_cmp (result, expected, expected_length) == 0))
  83       return 11;
  84     free (preallocated);
  85   }
  86 
  87   return 0;
  88 }
  89 
  90 void
  91 test_u8_nfd (void)
     /* [previous][next][first][last][top][bottom][index][help] */
  92 {
  93   { /* Empty string.  */
  94     ASSERT (check (NULL, 0, NULL, 0) == 0);
  95   }
  96   { /* SPACE */
  97     static const uint8_t input[]    = { 0x20 };
  98     ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
  99   }
 100 
 101   { /* LATIN CAPITAL LETTER A WITH DIAERESIS */
 102     static const uint8_t input[]    = { 0xC3, 0x84 };
 103     static const uint8_t expected[] = { 0x41, 0xCC, 0x88 };
 104     ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
 105   }
 106 
 107   { /* LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON */
 108     static const uint8_t input[]    = { 0xC7, 0x9E };
 109     static const uint8_t expected[] = { 0x41, 0xCC, 0x88, 0xCC, 0x84 };
 110     ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
 111   }
 112 
 113   { /* GREEK DIALYTIKA AND PERISPOMENI */
 114     static const uint8_t input[]    = { 0xE1, 0xBF, 0x81 };
 115     static const uint8_t expected[] = { 0xC2, 0xA8, 0xCD, 0x82 };
 116     ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
 117   }
 118 
 119   { /* SCRIPT SMALL L */
 120     static const uint8_t input[]    = { 0xE2, 0x84, 0x93 };
 121     ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
 122   }
 123 
 124   { /* NO-BREAK SPACE */
 125     static const uint8_t input[]    = { 0xC2, 0xA0 };
 126     ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
 127   }
 128 
 129   { /* ARABIC LETTER VEH INITIAL FORM */
 130     static const uint8_t input[]    = { 0xEF, 0xAD, 0xAC };
 131     ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
 132   }
 133 
 134   { /* ARABIC LETTER VEH MEDIAL FORM */
 135     static const uint8_t input[]    = { 0xEF, 0xAD, 0xAD };
 136     ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
 137   }
 138 
 139   { /* ARABIC LETTER VEH FINAL FORM */
 140     static const uint8_t input[]    = { 0xEF, 0xAD, 0xAB };
 141     ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
 142   }
 143 
 144   { /* ARABIC LETTER VEH ISOLATED FORM */
 145     static const uint8_t input[]    = { 0xEF, 0xAD, 0xAA };
 146     ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
 147   }
 148 
 149   { /* CIRCLED NUMBER FIFTEEN */
 150     static const uint8_t input[]    = { 0xE2, 0x91, 0xAE };
 151     ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
 152   }
 153 
 154   { /* TRADE MARK SIGN */
 155     static const uint8_t input[]    = { 0xE2, 0x84, 0xA2 };
 156     ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
 157   }
 158 
 159   { /* LATIN SUBSCRIPT SMALL LETTER I */
 160     static const uint8_t input[]    = { 0xE1, 0xB5, 0xA2 };
 161     ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
 162   }
 163 
 164   { /* PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS */
 165     static const uint8_t input[]    = { 0xEF, 0xB8, 0xB5 };
 166     ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
 167   }
 168 
 169   { /* FULLWIDTH LATIN CAPITAL LETTER A */
 170     static const uint8_t input[]    = { 0xEF, 0xBC, 0xA1 };
 171     ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
 172   }
 173 
 174   { /* HALFWIDTH IDEOGRAPHIC COMMA */
 175     static const uint8_t input[]    = { 0xEF, 0xBD, 0xA4 };
 176     ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
 177   }
 178 
 179   { /* SMALL IDEOGRAPHIC COMMA */
 180     static const uint8_t input[]    = { 0xEF, 0xB9, 0x91 };
 181     ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
 182   }
 183 
 184   { /* SQUARE MHZ */
 185     static const uint8_t input[]    = { 0xE3, 0x8E, 0x92 };
 186     ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
 187   }
 188 
 189   { /* VULGAR FRACTION THREE EIGHTHS */
 190     static const uint8_t input[]    = { 0xE2, 0x85, 0x9C };
 191     ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
 192   }
 193 
 194   { /* MICRO SIGN */
 195     static const uint8_t input[]    = { 0xC2, 0xB5 };
 196     ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
 197   }
 198 
 199   { /* ARABIC LIGATURE SALLALLAHOU ALAYHE WASALLAM */
 200     static const uint8_t input[]    = { 0xEF, 0xB7, 0xBA };
 201     ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
 202   }
 203 
 204   { /* HANGUL SYLLABLE GEUL */
 205     static const uint8_t input[]    = { 0xEA, 0xB8, 0x80 };
 206     static const uint8_t expected[] =
 207       { 0xE1, 0x84, 0x80, 0xE1, 0x85, 0xB3, 0xE1, 0x86, 0xAF };
 208     ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
 209   }
 210 
 211   { /* HANGUL SYLLABLE GEU */
 212     static const uint8_t input[]    = { 0xEA, 0xB7, 0xB8 };
 213     static const uint8_t expected[] = { 0xE1, 0x84, 0x80, 0xE1, 0x85, 0xB3 };
 214     ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
 215   }
 216 
 217   { /* "Grüß Gott. Здравствуйте! x=(-b±sqrt(b²-4ac))/(2a)  日本語,中文,한글" */
 218     static const uint8_t input[] =
 219       { 'G', 'r', 0xC3, 0xBC, 0xC3, 0x9F, ' ', 'G', 'o', 't', 't', '.',
 220         ' ', 0xD0, 0x97, 0xD0, 0xB4, 0xD1, 0x80, 0xD0, 0xB0, 0xD0, 0xB2, 0xD1,
 221         0x81, 0xD1, 0x82, 0xD0, 0xB2, 0xD1, 0x83, 0xD0, 0xB9,
 222         0xD1, 0x82, 0xD0, 0xB5, '!', ' ', 'x', '=', '(', '-', 'b', 0xC2, 0xB1,
 223         's', 'q', 'r', 't', '(', 'b', 0xC2, 0xB2, '-', '4', 'a', 'c', ')', ')',
 224         '/', '(', '2', 'a', ')', ' ', ' ', 0xE6, 0x97, 0xA5, 0xE6, 0x9C, 0xAC,
 225         0xE8, 0xAA, 0x9E, ',', 0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87, ',',
 226         0xED, 0x95, 0x9C,
 227         0xEA, 0xB8, 0x80, '\n'
 228       };
 229     static const uint8_t expected[] =
 230       { 'G', 'r', 0x75, 0xCC, 0x88, 0xC3, 0x9F, ' ', 'G', 'o', 't', 't', '.',
 231         ' ', 0xD0, 0x97, 0xD0, 0xB4, 0xD1, 0x80, 0xD0, 0xB0, 0xD0, 0xB2, 0xD1,
 232         0x81, 0xD1, 0x82, 0xD0, 0xB2, 0xD1, 0x83, 0xD0, 0xB8, 0xCC, 0x86,
 233         0xD1, 0x82, 0xD0, 0xB5, '!', ' ', 'x', '=', '(', '-', 'b', 0xC2, 0xB1,
 234         's', 'q', 'r', 't', '(', 'b', 0xC2, 0xB2, '-', '4', 'a', 'c', ')', ')',
 235         '/', '(', '2', 'a', ')', ' ', ' ', 0xE6, 0x97, 0xA5, 0xE6, 0x9C, 0xAC,
 236         0xE8, 0xAA, 0x9E, ',', 0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87, ',',
 237         0xE1, 0x84, 0x92, 0xE1, 0x85, 0xA1, 0xE1, 0x86, 0xAB,
 238         0xE1, 0x84, 0x80, 0xE1, 0x85, 0xB3, 0xE1, 0x86, 0xAF, '\n'
 239       };
 240     ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
 241   }
 242 
 243 #if HAVE_DECL_ALARM
 244   /* Declare failure if test takes too long, by using default abort
 245      caused by SIGALRM.  */
 246   signal (SIGALRM, SIG_DFL);
 247   alarm (50);
 248 #endif
 249 
 250   /* Check that the sorting is not O(n²) but O(n log n).  */
 251   {
 252     int pass;
 253     for (pass = 0; pass < 3; pass++)
 254       {
 255         size_t repeat = 1;
 256         size_t m = 100000;
 257         uint8_t *input = (uint8_t *) malloc (2 * (2 * m - 1) * sizeof (uint8_t));
 258         if (input != NULL)
 259           {
 260             uint8_t *expected = input + (2 * m - 1);
 261             size_t m1 = m / 2;
 262             size_t m2 = (m - 1) / 2;
 263             /* NB: m1 + m2 == m - 1.  */
 264             uint8_t *p;
 265             size_t i;
 266 
 267             input[0] = 0x41;
 268             p = input + 1;
 269             switch (pass)
 270               {
 271               case 0:
 272                 for (i = 0; i < m1; i++)
 273                   {
 274                     *p++ = 0xCC;
 275                     *p++ = 0x99;
 276                   }
 277                 for (i = 0; i < m2; i++)
 278                   {
 279                     *p++ = 0xCC;
 280                     *p++ = 0x80;
 281                   }
 282                 break;
 283 
 284               case 1:
 285                 for (i = 0; i < m2; i++)
 286                   {
 287                     *p++ = 0xCC;
 288                     *p++ = 0x80;
 289                   }
 290                 for (i = 0; i < m1; i++)
 291                   {
 292                     *p++ = 0xCC;
 293                     *p++ = 0x99;
 294                   }
 295                 break;
 296 
 297               case 2:
 298                 for (i = 0; i < m2; i++)
 299                   {
 300                     *p++ = 0xCC;
 301                     *p++ = 0x99;
 302                     *p++ = 0xCC;
 303                     *p++ = 0x80;
 304                   }
 305                 for (; i < m1; i++)
 306                   {
 307                     *p++ = 0xCC;
 308                     *p++ = 0x99;
 309                   }
 310                 break;
 311 
 312               default:
 313                 abort ();
 314               }
 315 
 316             expected[0] = 0x41;
 317             p = expected + 1;
 318             for (i = 0; i < m1; i++)
 319               {
 320                 *p++ = 0xCC;
 321                 *p++ = 0x99;
 322               }
 323             for (i = 0; i < m2; i++)
 324               {
 325                 *p++ = 0xCC;
 326                 *p++ = 0x80;
 327               }
 328 
 329             for (; repeat > 0; repeat--)
 330               ASSERT (check (input, 2 * m - 1, expected, 2 * m - 1) == 0);
 331 
 332             free (input);
 333           }
 334       }
 335   }
 336 }
 337 
 338 #else
 339 
 340 void
 341 test_u8_nfd (void)
     /* [previous][next][first][last][top][bottom][index][help] */
 342 {
 343 }
 344 
 345 #endif

/* [previous][next][first][last][top][bottom][index][help] */