root/maint/gnulib/tests/uniname/test-uninames.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. fill_names
  2. fill_aliases
  3. name_has_alias
  4. test_name_lookup
  5. test_inverse_lookup
  6. test_alias_lookup
  7. main

   1 /* Test the Unicode character name functions.
   2    Copyright (C) 2000-2003, 2005, 2007, 2009-2021 Free Software Foundation,
   3    Inc.
   4 
   5    This program is free software: you can redistribute it and/or modify
   6    it under the terms of the GNU General Public License as published by
   7    the Free Software Foundation; either version 3 of the License, or
   8    (at your option) any later version.
   9 
  10    This program is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13    GNU General Public License for more details.
  14 
  15    You should have received a copy of the GNU General Public License
  16    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
  17 
  18 #include <config.h>
  19 
  20 #include <stdio.h>
  21 #include <stdlib.h>
  22 #include <string.h>
  23 
  24 #include "xalloc.h"
  25 #include "uniname.h"
  26 
  27 /* The names according to the UnicodeData.txt file, modified to contain the
  28    Hangul syllable names, as described in the Unicode 3.0 book.  */
  29 static const char * unicode_names [0x110000];
  30 
  31 /* Maximum entries in unicode_aliases.  */
  32 #define ALIASLEN 0x200
  33 
  34 /* The aliases according to the NameAliases.txt file.  */
  35 struct unicode_alias
  36 {
  37   const char *name;
  38   unsigned int uc;
  39 };
  40 
  41 static struct unicode_alias unicode_aliases [ALIASLEN];
  42 static int aliases_count;
  43 
  44 /* Stores in unicode_names[] the relevant contents of the UnicodeData.txt
  45    file.  */
  46 static void
  47 fill_names (const char *unicodedata_filename)
     /* [previous][next][first][last][top][bottom][index][help] */
  48 {
  49   FILE *stream;
  50   char *field0;
  51   char *field1;
  52   char line[1024];
  53   int lineno = 0;
  54 
  55   stream = fopen (unicodedata_filename, "r");
  56   if (stream == NULL)
  57     {
  58       fprintf (stderr, "error during fopen of '%s'\n", unicodedata_filename);
  59       exit (EXIT_FAILURE);
  60     }
  61 
  62   while (fgets (line, sizeof line, stream))
  63     {
  64       char *p;
  65       char *comment;
  66       unsigned long i;
  67 
  68       lineno++;
  69 
  70       comment = strchr (line, '#');
  71       if (comment != NULL)
  72         *comment = '\0';
  73       if (line[strspn (line, " \t\r\n")] == '\0')
  74         continue;
  75 
  76       field0 = p = line;
  77       p = strchr (p, ';');
  78       if (!p)
  79         {
  80           fprintf (stderr, "short line in '%s':%d\n",
  81                    unicodedata_filename, lineno);
  82           exit (EXIT_FAILURE);
  83         }
  84       *p++ = '\0';
  85 
  86       field1 = p;
  87       if (*field1 == '<')
  88         continue;
  89       p = strchr (p, ';');
  90       if (!p)
  91         {
  92           fprintf (stderr, "short line in '%s':%d\n",
  93                    unicodedata_filename, lineno);
  94           exit (EXIT_FAILURE);
  95         }
  96       *p = '\0';
  97       i = strtoul (field0, NULL, 16);
  98       if (i >= 0x110000)
  99         {
 100           fprintf (stderr, "index too large\n");
 101           exit (EXIT_FAILURE);
 102         }
 103       unicode_names[i] = xstrdup (field1);
 104     }
 105   if (ferror (stream) || fclose (stream))
 106     {
 107       fprintf (stderr, "error reading from '%s'\n", unicodedata_filename);
 108       exit (1);
 109     }
 110 }
 111 
 112 /* Stores in unicode_aliases[] the relevant contents of the NameAliases.txt
 113    file.  */
 114 static void
 115 fill_aliases (const char *namealiases_filename)
     /* [previous][next][first][last][top][bottom][index][help] */
 116 {
 117   FILE *stream;
 118   char *field0;
 119   char *field1;
 120   char line[1024];
 121   int lineno = 0;
 122 
 123   stream = fopen (namealiases_filename, "r");
 124   if (stream == NULL)
 125     {
 126       fprintf (stderr, "error during fopen of '%s'\n", namealiases_filename);
 127       exit (EXIT_FAILURE);
 128     }
 129 
 130   while (fgets (line, sizeof line, stream))
 131     {
 132       char *p;
 133       char *comment;
 134       unsigned long uc;
 135 
 136       comment = strchr (line, '#');
 137       if (comment != NULL)
 138         *comment = '\0';
 139       if (line[strspn (line, " \t\r\n")] == '\0')
 140         continue;
 141 
 142       lineno++;
 143 
 144       field0 = p = line;
 145       p = strchr (p, ';');
 146       if (!p)
 147         {
 148           fprintf (stderr, "short line in '%s':%d\n",
 149                    namealiases_filename, lineno);
 150           exit (EXIT_FAILURE);
 151         }
 152       *p++ = '\0';
 153 
 154       field1 = p;
 155       p = strchr (p, ';');
 156       if (!p)
 157         {
 158           fprintf (stderr, "short line in '%s':%d\n",
 159                    namealiases_filename, lineno);
 160           exit (EXIT_FAILURE);
 161         }
 162       *p = '\0';
 163 
 164       uc = strtoul (field0, NULL, 16);
 165       if (uc >= 0x110000)
 166         {
 167           fprintf (stderr, "index too large\n");
 168           exit (EXIT_FAILURE);
 169         }
 170 
 171       if (aliases_count == ALIASLEN)
 172         {
 173           fprintf (stderr, "too many aliases\n");
 174           exit (EXIT_FAILURE);
 175         }
 176       unicode_aliases[aliases_count].name = xstrdup (field1);
 177       unicode_aliases[aliases_count].uc = uc;
 178       aliases_count++;
 179     }
 180   if (ferror (stream) || fclose (stream))
 181     {
 182       fprintf (stderr, "error reading from '%s'\n", namealiases_filename);
 183       exit (1);
 184     }
 185 }
 186 
 187 static int
 188 name_has_alias (unsigned int uc)
     /* [previous][next][first][last][top][bottom][index][help] */
 189 {
 190   int i;
 191   for (i = 0; i < ALIASLEN; i++)
 192     if (unicode_aliases[i].uc == uc)
 193       return 1;
 194   return 0;
 195 }
 196 
 197 /* Perform an exhaustive test of the unicode_character_name function.  */
 198 static int
 199 test_name_lookup ()
     /* [previous][next][first][last][top][bottom][index][help] */
 200 {
 201   int error = 0;
 202   unsigned int i;
 203   char buf[UNINAME_MAX];
 204 
 205   for (i = 0; i < 0x11000; i++)
 206     {
 207       char *result = unicode_character_name (i, buf);
 208 
 209       if (unicode_names[i] != NULL)
 210         {
 211           if (result == NULL)
 212             {
 213               fprintf (stderr, "\\u%04X name lookup failed!\n", i);
 214               error = 1;
 215             }
 216           else if (strcmp (result, unicode_names[i]) != 0)
 217             {
 218               fprintf (stderr, "\\u%04X name lookup returned wrong name: %s\n",
 219                                i, result);
 220               error = 1;
 221             }
 222         }
 223       else
 224         {
 225           if (result != NULL)
 226             {
 227               fprintf (stderr, "\\u%04X name lookup returned wrong name: %s\n",
 228                                i, result);
 229               error = 1;
 230             }
 231         }
 232     }
 233 
 234   for (i = 0x110000; i < 0x1000000; i++)
 235     {
 236       char *result = unicode_character_name (i, buf);
 237 
 238       if (result != NULL)
 239         {
 240           fprintf (stderr, "\\u%04X name lookup returned wrong name: %s\n",
 241                            i, result);
 242           error = 1;
 243         }
 244     }
 245 
 246   return error;
 247 }
 248 
 249 /* Perform a test of the unicode_name_character function.  */
 250 static int
 251 test_inverse_lookup ()
     /* [previous][next][first][last][top][bottom][index][help] */
 252 {
 253   int error = 0;
 254   unsigned int i;
 255 
 256   /* First, verify all valid character names are recognized.  */
 257   for (i = 0; i < 0x110000; i++)
 258     if (unicode_names[i] != NULL)
 259       {
 260         unsigned int result = unicode_name_character (unicode_names[i]);
 261         if (result != i)
 262           {
 263             if (result == UNINAME_INVALID)
 264               fprintf (stderr, "inverse name lookup of \"%s\" failed\n",
 265                        unicode_names[i]);
 266             else
 267               fprintf (stderr,
 268                        "inverse name lookup of \"%s\" returned 0x%04X\n",
 269                        unicode_names[i], result);
 270             error = 1;
 271           }
 272       }
 273 
 274   /* Second, generate random but likely names and verify they are not
 275      recognized unless really valid.  */
 276   for (i = 0; i < 10000; i++)
 277     {
 278       unsigned int i1, i2;
 279       const char *s1;
 280       const char *s2;
 281       unsigned int l1, l2, j1, j2;
 282       char buf[2*UNINAME_MAX];
 283       unsigned int result;
 284 
 285       do i1 = ((rand () % 0x11) << 16)
 286               + ((rand () & 0xff) << 8)
 287               + (rand () & 0xff);
 288       while (unicode_names[i1] == NULL);
 289 
 290       do i2 = ((rand () % 0x11) << 16)
 291               + ((rand () & 0xff) << 8)
 292               + (rand () & 0xff);
 293       while (unicode_names[i2] == NULL);
 294 
 295       s1 = unicode_names[i1];
 296       l1 = strlen (s1);
 297       s2 = unicode_names[i2];
 298       l2 = strlen (s2);
 299 
 300       /* Concatenate a starting piece of s1 with an ending piece of s2.  */
 301       for (j1 = 1; j1 <= l1; j1++)
 302         if (j1 == l1 || s1[j1] == ' ')
 303           for (j2 = 0; j2 < l2; j2++)
 304             if (j2 == 0 || s2[j2-1] == ' ')
 305               {
 306                 memcpy (buf, s1, j1);
 307                 buf[j1] = ' ';
 308                 memcpy (buf + j1 + 1, s2 + j2, l2 - j2 + 1);
 309 
 310                 result = unicode_name_character (buf);
 311                 if (result != UNINAME_INVALID
 312                     && !name_has_alias (result)
 313                     && !(unicode_names[result] != NULL
 314                          && strcmp (unicode_names[result], buf) == 0))
 315                   {
 316                     fprintf (stderr,
 317                              "inverse name lookup of \"%s\" returned 0x%04X\n",
 318                              unicode_names[i], result);
 319                     error = 1;
 320                   }
 321               }
 322     }
 323 
 324   /* Third, some extreme case that used to loop.  */
 325   if (unicode_name_character ("A A") != UNINAME_INVALID)
 326     error = 1;
 327 
 328   return error;
 329 }
 330 
 331 /* Perform a test of the unicode_name_character function for aliases.  */
 332 static int
 333 test_alias_lookup ()
     /* [previous][next][first][last][top][bottom][index][help] */
 334 {
 335   int error = 0;
 336   unsigned int i;
 337   char buf[UNINAME_MAX];
 338 
 339   /* Verify all valid character names are recognized.  */
 340   for (i = 0; i < ALIASLEN; i++)
 341     if (unicode_aliases[i].uc != UNINAME_INVALID
 342         /* Skip if the character has no canonical name (e.g. control
 343            characters).  */
 344         && unicode_character_name (unicode_aliases[i].uc, buf))
 345       {
 346         unsigned int result = unicode_name_character (unicode_aliases[i].name);
 347         if (result != unicode_aliases[i].uc)
 348           {
 349             if (result == UNINAME_INVALID)
 350               fprintf (stderr, "inverse name lookup of \"%s\" failed\n",
 351                        unicode_aliases[i].name);
 352             else
 353               fprintf (stderr,
 354                        "inverse name lookup of \"%s\" returned 0x%04X\n",
 355                        unicode_aliases[i].name, result);
 356             error = 1;
 357           }
 358       }
 359 
 360   return error;
 361 }
 362 
 363 int
 364 main (int argc, char *argv[])
     /* [previous][next][first][last][top][bottom][index][help] */
 365 {
 366   int error = 0;
 367   int i;
 368 
 369   for (i = 1; i < argc && strcmp (argv[i], "--") != 0; i++)
 370     fill_names (argv[i]);
 371 
 372   if (i < argc)
 373     {
 374       int j;
 375       for (j = 0; j < ALIASLEN; j++)
 376         unicode_aliases[j].uc = UNINAME_INVALID;
 377 
 378       i++;
 379       for (; i < argc; i++)
 380         fill_aliases (argv[i]);
 381     }
 382 
 383   error |= test_name_lookup ();
 384   error |= test_inverse_lookup ();
 385 
 386   if (aliases_count > 0)
 387     error |= test_alias_lookup ();
 388 
 389   return error;
 390 }

/* [previous][next][first][last][top][bottom][index][help] */