root/maint/gnulib/lib/propername.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. mbsstr_trimmed_wordbounded
  2. proper_name
  3. proper_name_utf8
  4. main
  5. main

   1 /* Localization of proper names.
   2    Copyright (C) 2006-2021 Free Software Foundation, Inc.
   3    Written by Bruno Haible <bruno@clisp.org>, 2006.
   4 
   5    This program is free software: you can redistribute it and/or modify
   6    it under the terms of the GNU General Public License as published by
   7    the Free Software Foundation; either version 3 of the License, or
   8    (at your option) any later version.
   9 
  10    This program is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13    GNU General Public License for more details.
  14 
  15    You should have received a copy of the GNU General Public License
  16    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
  17 
  18 /* Without this pragma, gcc 4.7.0 20111124 mistakenly suggests that
  19    the proper_name function might be candidate for attribute 'const'  */
  20 #if (__GNUC__ == 4 && 6 <= __GNUC_MINOR__) || 4 < __GNUC__
  21 # pragma GCC diagnostic ignored "-Wsuggest-attribute=const"
  22 #endif
  23 
  24 #include <config.h>
  25 
  26 /* Specification.  */
  27 #include "propername.h"
  28 
  29 #include <ctype.h>
  30 #include <stdbool.h>
  31 #include <stdio.h>
  32 #include <stdlib.h>
  33 #include <string.h>
  34 #if HAVE_ICONV
  35 # include <iconv.h>
  36 #endif
  37 
  38 #include "trim.h"
  39 #include "mbchar.h"
  40 #include "mbuiter.h"
  41 #include "localcharset.h"
  42 #include "c-strcase.h"
  43 #include "xstriconv.h"
  44 #include "xalloc.h"
  45 #include "gettext.h"
  46 
  47 
  48 /* Tests whether STRING contains trim (SUB), starting and ending at word
  49    boundaries.
  50    Here, instead of implementing Unicode Standard Annex #29 for determining
  51    word boundaries, we assume that trim (SUB) starts and ends with words and
  52    only test whether the part before it ends with a non-word and the part
  53    after it starts with a non-word.  */
  54 static bool
  55 mbsstr_trimmed_wordbounded (const char *string, const char *sub)
     /* [previous][next][first][last][top][bottom][index][help] */
  56 {
  57   char *tsub = trim (sub);
  58   bool found = false;
  59 
  60   for (; *string != '\0';)
  61     {
  62       const char *tsub_in_string = mbsstr (string, tsub);
  63       if (tsub_in_string == NULL)
  64         break;
  65       else
  66         {
  67           if (MB_CUR_MAX > 1)
  68             {
  69               mbui_iterator_t string_iter;
  70               bool word_boundary_before;
  71               bool word_boundary_after;
  72 
  73               mbui_init (string_iter, string);
  74               word_boundary_before = true;
  75               if (mbui_cur_ptr (string_iter) < tsub_in_string)
  76                 {
  77                   mbchar_t last_char_before_tsub;
  78                   do
  79                     {
  80                       if (!mbui_avail (string_iter))
  81                         abort ();
  82                       last_char_before_tsub = mbui_cur (string_iter);
  83                       mbui_advance (string_iter);
  84                     }
  85                   while (mbui_cur_ptr (string_iter) < tsub_in_string);
  86                   if (mb_isalnum (last_char_before_tsub))
  87                     word_boundary_before = false;
  88                 }
  89 
  90               mbui_init (string_iter, tsub_in_string);
  91               {
  92                 mbui_iterator_t tsub_iter;
  93 
  94                 for (mbui_init (tsub_iter, tsub);
  95                      mbui_avail (tsub_iter);
  96                      mbui_advance (tsub_iter))
  97                   {
  98                     if (!mbui_avail (string_iter))
  99                       abort ();
 100                     mbui_advance (string_iter);
 101                   }
 102               }
 103               word_boundary_after = true;
 104               if (mbui_avail (string_iter))
 105                 {
 106                   mbchar_t first_char_after_tsub = mbui_cur (string_iter);
 107                   if (mb_isalnum (first_char_after_tsub))
 108                     word_boundary_after = false;
 109                 }
 110 
 111               if (word_boundary_before && word_boundary_after)
 112                 {
 113                   found = true;
 114                   break;
 115                 }
 116 
 117               mbui_init (string_iter, tsub_in_string);
 118               if (!mbui_avail (string_iter))
 119                 break;
 120               string = tsub_in_string + mb_len (mbui_cur (string_iter));
 121             }
 122           else
 123             {
 124               bool word_boundary_before;
 125               const char *p;
 126               bool word_boundary_after;
 127 
 128               word_boundary_before = true;
 129               if (string < tsub_in_string)
 130                 if (isalnum ((unsigned char) tsub_in_string[-1]))
 131                   word_boundary_before = false;
 132 
 133               p = tsub_in_string + strlen (tsub);
 134               word_boundary_after = true;
 135               if (*p != '\0')
 136                 if (isalnum ((unsigned char) *p))
 137                   word_boundary_after = false;
 138 
 139               if (word_boundary_before && word_boundary_after)
 140                 {
 141                   found = true;
 142                   break;
 143                 }
 144 
 145               if (*tsub_in_string == '\0')
 146                 break;
 147               string = tsub_in_string + 1;
 148             }
 149         }
 150     }
 151   free (tsub);
 152   return found;
 153 }
 154 
 155 /* Return the localization of NAME.  NAME is written in ASCII.  */
 156 
 157 const char *
 158 proper_name (const char *name)
     /* [previous][next][first][last][top][bottom][index][help] */
 159 {
 160   /* See whether there is a translation.   */
 161   const char *translation = gettext (name);
 162 
 163   if (translation != name)
 164     {
 165       /* See whether the translation contains the original name.  */
 166       if (mbsstr_trimmed_wordbounded (translation, name))
 167         return translation;
 168       else
 169         {
 170           /* Return "TRANSLATION (NAME)".  */
 171           char *result =
 172             XNMALLOC (strlen (translation) + 2 + strlen (name) + 1 + 1, char);
 173 
 174           sprintf (result, "%s (%s)", translation, name);
 175           return result;
 176         }
 177     }
 178   else
 179     return name;
 180 }
 181 
 182 /* Return the localization of a name whose original writing is not ASCII.
 183    NAME_UTF8 is the real name, written in UTF-8 with octal or hexadecimal
 184    escape sequences.  NAME_ASCII is a fallback written only with ASCII
 185    characters.  */
 186 
 187 const char *
 188 proper_name_utf8 (const char *name_ascii, const char *name_utf8)
     /* [previous][next][first][last][top][bottom][index][help] */
 189 {
 190   /* See whether there is a translation.   */
 191   const char *translation = gettext (name_ascii);
 192 
 193   /* Try to convert NAME_UTF8 to the locale encoding.  */
 194   const char *locale_code = locale_charset ();
 195   char *alloc_name_converted = NULL;
 196   char *alloc_name_converted_translit = NULL;
 197   const char *name_converted = NULL;
 198   const char *name_converted_translit = NULL;
 199   const char *name;
 200 
 201   if (c_strcasecmp (locale_code, "UTF-8") != 0)
 202     {
 203 #if HAVE_ICONV
 204       name_converted = alloc_name_converted =
 205         xstr_iconv (name_utf8, "UTF-8", locale_code);
 206 
 207 # if (((__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) || __GLIBC__ > 2) \
 208       && !defined __UCLIBC__) \
 209      || _LIBICONV_VERSION >= 0x0105
 210       {
 211         char *converted_translit;
 212 
 213         size_t len = strlen (locale_code);
 214         char *locale_code_translit = XNMALLOC (len + 10 + 1, char);
 215         memcpy (locale_code_translit, locale_code, len);
 216         memcpy (locale_code_translit + len, "//TRANSLIT", 10 + 1);
 217 
 218         converted_translit =
 219           xstr_iconv (name_utf8, "UTF-8", locale_code_translit);
 220 
 221         free (locale_code_translit);
 222 
 223         if (converted_translit != NULL)
 224           {
 225 #  if !_LIBICONV_VERSION
 226             /* Don't use the transliteration if it added question marks.
 227                glibc's transliteration falls back to question marks; libiconv's
 228                transliteration does not.
 229                mbschr is equivalent to strchr in this case.  */
 230             if (strchr (converted_translit, '?') != NULL)
 231               free (converted_translit);
 232             else
 233 #  endif
 234               name_converted_translit = alloc_name_converted_translit =
 235                 converted_translit;
 236           }
 237       }
 238 # endif
 239 #endif
 240     }
 241   else
 242     {
 243       name_converted = name_utf8;
 244       name_converted_translit = name_utf8;
 245     }
 246 
 247   /* The name in locale encoding.  */
 248   name = (name_converted != NULL ? name_converted :
 249           name_converted_translit != NULL ? name_converted_translit :
 250           name_ascii);
 251 
 252   /* See whether we have a translation.  Some translators have not understood
 253      that they should use the UTF-8 form of the name, if possible.  So if the
 254      translator provided a no-op translation, we ignore it.  */
 255   if (strcmp (translation, name_ascii) != 0)
 256     {
 257       /* See whether the translation contains the original name.  */
 258       if (mbsstr_trimmed_wordbounded (translation, name_ascii)
 259           || (name_converted != NULL
 260               && mbsstr_trimmed_wordbounded (translation, name_converted))
 261           || (name_converted_translit != NULL
 262               && mbsstr_trimmed_wordbounded (translation, name_converted_translit)))
 263         {
 264           if (alloc_name_converted != NULL)
 265             free (alloc_name_converted);
 266           if (alloc_name_converted_translit != NULL)
 267             free (alloc_name_converted_translit);
 268           return translation;
 269         }
 270       else
 271         {
 272           /* Return "TRANSLATION (NAME)".  */
 273           char *result =
 274             XNMALLOC (strlen (translation) + 2 + strlen (name) + 1 + 1, char);
 275 
 276           sprintf (result, "%s (%s)", translation, name);
 277 
 278           if (alloc_name_converted != NULL)
 279             free (alloc_name_converted);
 280           if (alloc_name_converted_translit != NULL)
 281             free (alloc_name_converted_translit);
 282           return result;
 283         }
 284     }
 285   else
 286     {
 287       if (alloc_name_converted != NULL && alloc_name_converted != name)
 288         free (alloc_name_converted);
 289       if (alloc_name_converted_translit != NULL
 290           && alloc_name_converted_translit != name)
 291         free (alloc_name_converted_translit);
 292       return name;
 293     }
 294 }
 295 
 296 #ifdef TEST1
 297 # include <locale.h>
 298 int
 299 main (int argc, char *argv[])
     /* [previous][next][first][last][top][bottom][index][help] */
 300 {
 301   setlocale (LC_ALL, "");
 302   if (mbsstr_trimmed_wordbounded (argv[1], argv[2]))
 303     printf("found\n");
 304   return 0;
 305 }
 306 #endif
 307 
 308 #ifdef TEST2
 309 # include <locale.h>
 310 # include <stdio.h>
 311 int
 312 main (int argc, char *argv[])
     /* [previous][next][first][last][top][bottom][index][help] */
 313 {
 314   setlocale (LC_ALL, "");
 315   printf ("%s\n", proper_name_utf8 ("Franc,ois Pinard", "Fran\303\247ois Pinard"));
 316   return 0;
 317 }
 318 #endif

/* [previous][next][first][last][top][bottom][index][help] */