root/maint/gnulib/tests/uniwbrk/test-uc-wordbreaks.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. wordbreakproperty_to_string
  2. main

   1 /* Word break function test, using test data from UCD.
   2    Copyright (C) 2010-2021 Free Software Foundation, Inc.
   3 
   4    This program is free software: you can redistribute it and/or modify it
   5    under the terms of the GNU Lesser General Public License as published
   6    by the Free Software Foundation; either version 3 of the License, or
   7    (at your option) any later version.
   8 
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12    Lesser General Public License for more details.
  13 
  14    You should have received a copy of the GNU Lesser General Public License
  15    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
  16 
  17 /* Written by Daiki Ueno <ueno@gnu.org>, 2014.
  18 
  19    Largely based on unigbrk/test-uc-is-grapheme-break.c,
  20    written by Ben Pfaff <blp@cs.stanford.edu>, 2010.  */
  21 
  22 #include <config.h>
  23 
  24 /* Specification. */
  25 #include <uniwbrk.h>
  26 
  27 #include <stdio.h>
  28 #include <stdlib.h>
  29 #include <string.h>
  30 
  31 const char *
  32 wordbreakproperty_to_string (int wbp)
     /* [previous][next][first][last][top][bottom][index][help] */
  33 {
  34   switch (wbp)
  35     {
  36 #define CASE(VALUE) case WBP_##VALUE: return #VALUE;
  37       CASE(OTHER)
  38       CASE(CR)
  39       CASE(LF)
  40       CASE(NEWLINE)
  41       CASE(EXTEND)
  42       CASE(FORMAT)
  43       CASE(KATAKANA)
  44       CASE(ALETTER)
  45       CASE(MIDNUMLET)
  46       CASE(MIDLETTER)
  47       CASE(MIDNUM)
  48       CASE(NUMERIC)
  49       CASE(EXTENDNUMLET)
  50       CASE(RI)
  51       CASE(DQ)
  52       CASE(SQ)
  53       CASE(HL)
  54       CASE(ZWJ)
  55       CASE(EB)
  56       CASE(EM)
  57       CASE(GAZ)
  58       CASE(EBG)
  59     }
  60   abort ();
  61 }
  62 
  63 int
  64 main (int argc, char *argv[])
     /* [previous][next][first][last][top][bottom][index][help] */
  65 {
  66   const char *filename;
  67   char line[4096];
  68   int exit_code;
  69   FILE *stream;
  70   int lineno;
  71 
  72   if (argc != 2)
  73     {
  74       fprintf (stderr, "usage: %s FILENAME\n"
  75                "where FILENAME is the location of the WordBreakTest.txt\n"
  76                "test file.\n", argv[0]);
  77       exit (1);
  78     }
  79 
  80   filename = argv[1];
  81   stream = fopen (filename, "r");
  82   if (stream == NULL)
  83     {
  84       fprintf (stderr, "error during fopen of '%s'\n", filename);
  85       exit (1);
  86     }
  87 
  88   exit_code = 0;
  89   lineno = 0;
  90   while (fgets (line, sizeof line, stream))
  91     {
  92       char *comment;
  93       const char *p;
  94       uint32_t input[100];
  95       char breaks[101];
  96       char breaks_expected[101];
  97       int i;
  98 
  99       lineno++;
 100 
 101       memset (breaks, 0, sizeof (breaks));
 102       memset (breaks_expected, 0, sizeof (breaks_expected));
 103 
 104       comment = strchr (line, '#');
 105       if (comment != NULL)
 106         *comment = '\0';
 107       if (line[strspn (line, " \t\r\n")] == '\0')
 108         continue;
 109 
 110       i = 0;
 111       p = line;
 112       do
 113         {
 114           p += strspn (p, " \t\r\n");
 115           if (!strncmp (p, "\303\267" /* ÷ */, 2))
 116             {
 117               breaks_expected[i] = 1;
 118               p += 2;
 119             }
 120           else if (!strncmp (p, "\303\227" /* × */, 2))
 121             {
 122               breaks_expected[i] = 0;
 123               p += 2;
 124             }
 125           else
 126             {
 127               fprintf (stderr, "%s:%d.%d: syntax error expecting '÷' or '×'\n",
 128                        filename, lineno, (int) (p - line + 1));
 129               exit (1);
 130             }
 131 
 132           p += strspn (p, " \t\r\n");
 133           if (*p != '\0')
 134             {
 135               unsigned int next_int;
 136               int n;
 137 
 138               if (sscanf (p, "%x%n", &next_int, &n) != 1)
 139                 {
 140                   fprintf (stderr, "%s:%d.%d: syntax error at '%s' "
 141                            "expecting hexadecimal Unicode code point number\n",
 142                            filename, lineno, (int) (p - line + 1), p);
 143                   exit (1);
 144                 }
 145               p += n;
 146 
 147               input[i] = next_int;
 148             }
 149 
 150           p += strspn (p, " \t\r\n");
 151           i++;
 152         }
 153       while (*p != '\0');
 154 
 155       u32_wordbreaks (input, i - 1, breaks);
 156 
 157       /* u32_wordbreaks always set BREAKS[0] to 0.  */
 158       breaks[0] = breaks_expected[0] = 1;
 159       if (memcmp (breaks, breaks_expected, i - 1) != 0)
 160         {
 161           int j;
 162 
 163           fprintf (stderr, "%s:%d: expected: ", filename, lineno);
 164           for (j = 0; j < i - 1; j++)
 165             {
 166               int input_wbp = uc_wordbreak_property (input[j]);
 167               fprintf (stderr, "%s U+%04X (%s) ",
 168                        breaks_expected[j] == 1 ? "\303\267" : "\303\227",
 169                        input[j], wordbreakproperty_to_string (input_wbp));
 170             }
 171           fprintf (stderr, "\n");
 172           fprintf (stderr, "%s:%d: actual: ", filename, lineno);
 173           for (j = 0; j < i - 1; j++)
 174             {
 175               int input_wbp = uc_wordbreak_property (input[j]);
 176               fprintf (stderr, "%s U+%04X (%s) ",
 177                        breaks[j] == 1 ? "\303\267" : "\303\227",
 178                        input[j], wordbreakproperty_to_string (input_wbp));
 179             }
 180           fprintf (stderr, "\n");
 181           exit_code = 1;
 182         }
 183     }
 184 
 185   return exit_code;
 186 }

/* [previous][next][first][last][top][bottom][index][help] */