root/maint/gnulib/lib/striconveha.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. uniconv_register_autodetect
  2. mem_iconveha_notranslit
  3. mem_iconveha
  4. str_iconveha_notranslit
  5. str_iconveha

   1 /* Character set conversion with error handling and autodetection.
   2    Copyright (C) 2002, 2005, 2007, 2009-2021 Free Software Foundation, Inc.
   3    Written by Bruno Haible.
   4 
   5    This file is free software: you can redistribute it and/or modify
   6    it under the terms of the GNU Lesser General Public License as
   7    published by the Free Software Foundation; either version 2.1 of the
   8    License, or (at your option) any later version.
   9 
  10    This file is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13    GNU Lesser General Public License for more details.
  14 
  15    You should have received a copy of the GNU Lesser General Public License
  16    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
  17 
  18 #include <config.h>
  19 
  20 /* Specification.  */
  21 #include "striconveha.h"
  22 
  23 #include <errno.h>
  24 #include <stdlib.h>
  25 #include <string.h>
  26 
  27 #include "malloca.h"
  28 #include "c-strcase.h"
  29 #include "striconveh.h"
  30 
  31 #define SIZEOF(a) (sizeof(a)/sizeof(a[0]))
  32 
  33 
  34 /* Autodetection list.  */
  35 
  36 struct autodetect_alias
  37 {
  38   struct autodetect_alias *next;
  39   const char *name;
  40   const char * const *encodings_to_try;
  41 };
  42 
  43 static const char * const autodetect_utf8_try[] =
  44 {
  45   /* Try UTF-8 first. There are very few ISO-8859-1 inputs that would
  46      be valid UTF-8, but many UTF-8 inputs are valid ISO-8859-1.  */
  47   "UTF-8", "ISO-8859-1",
  48   NULL
  49 };
  50 static const char * const autodetect_jp_try[] =
  51 {
  52   /* Try 7-bit encoding first. If the input contains bytes >= 0x80,
  53      it will fail.
  54      Try EUC-JP next. Short SHIFT_JIS inputs may come out wrong. This
  55      is unavoidable. People will condemn SHIFT_JIS.
  56      If we tried SHIFT_JIS first, then some short EUC-JP inputs would
  57      come out wrong, and people would condemn EUC-JP and Unix, which
  58      would not be good.
  59      Finally try SHIFT_JIS.  */
  60   "ISO-2022-JP-2", "EUC-JP", "SHIFT_JIS",
  61   NULL
  62 };
  63 static const char * const autodetect_kr_try[] =
  64 {
  65   /* Try 7-bit encoding first. If the input contains bytes >= 0x80,
  66      it will fail.
  67      Finally try EUC-KR.  */
  68   "ISO-2022-KR", "EUC-KR",
  69   NULL
  70 };
  71 
  72 static struct autodetect_alias autodetect_predefined[] =
  73 {
  74   { &autodetect_predefined[1], "autodetect_utf8", autodetect_utf8_try },
  75   { &autodetect_predefined[2], "autodetect_jp",   autodetect_jp_try },
  76   { NULL,                      "autodetect_kr",   autodetect_kr_try }
  77 };
  78 
  79 static struct autodetect_alias *autodetect_list = &autodetect_predefined[0];
  80 static struct autodetect_alias **autodetect_list_end =
  81   &autodetect_predefined[SIZEOF(autodetect_predefined)-1].next;
  82 
  83 int
  84 uniconv_register_autodetect (const char *name,
     /* [previous][next][first][last][top][bottom][index][help] */
  85                              const char * const *try_in_order)
  86 {
  87   size_t namelen;
  88   size_t listlen;
  89   size_t memneed;
  90   size_t i;
  91   char *memory;
  92   struct autodetect_alias *new_alias;
  93   char *new_name;
  94   const char **new_try_in_order;
  95 
  96   /* The TRY_IN_ORDER list must not be empty.  */
  97   if (try_in_order[0] == NULL)
  98     {
  99       errno = EINVAL;
 100       return -1;
 101     }
 102 
 103   /* We must deep-copy NAME and TRY_IN_ORDER, because they may be allocated
 104      with dynamic extent.  */
 105   namelen = strlen (name) + 1;
 106   memneed = sizeof (struct autodetect_alias) + namelen + sizeof (char *);
 107   for (i = 0; try_in_order[i] != NULL; i++)
 108     memneed += sizeof (char *) + strlen (try_in_order[i]) + 1;
 109   listlen = i;
 110 
 111   memory = (char *) malloc (memneed);
 112   if (memory != NULL)
 113     {
 114       new_alias = (struct autodetect_alias *) memory;
 115       memory += sizeof (struct autodetect_alias);
 116 
 117       new_try_in_order = (const char **) memory;
 118       memory += (listlen + 1) * sizeof (char *);
 119 
 120       new_name = (char *) memory;
 121       memcpy (new_name, name, namelen);
 122       memory += namelen;
 123 
 124       for (i = 0; i < listlen; i++)
 125         {
 126           size_t len = strlen (try_in_order[i]) + 1;
 127           memcpy (memory, try_in_order[i], len);
 128           new_try_in_order[i] = (const char *) memory;
 129           memory += len;
 130         }
 131       new_try_in_order[i] = NULL;
 132 
 133       /* Now insert the new alias.  */
 134       new_alias->name = new_name;
 135       new_alias->encodings_to_try = new_try_in_order;
 136       new_alias->next = NULL;
 137       /* FIXME: Not multithread-safe.  */
 138       *autodetect_list_end = new_alias;
 139       autodetect_list_end = &new_alias->next;
 140       return 0;
 141     }
 142   else
 143     {
 144       errno = ENOMEM;
 145       return -1;
 146     }
 147 }
 148 
 149 /* Like mem_iconveha, except no handling of transliteration.  */
 150 static int
 151 mem_iconveha_notranslit (const char *src, size_t srclen,
     /* [previous][next][first][last][top][bottom][index][help] */
 152                          const char *from_codeset, const char *to_codeset,
 153                          enum iconv_ilseq_handler handler,
 154                          size_t *offsets,
 155                          char **resultp, size_t *lengthp)
 156 {
 157   int retval = mem_iconveh (src, srclen, from_codeset, to_codeset, handler,
 158                             offsets, resultp, lengthp);
 159   if (retval >= 0 || errno != EINVAL)
 160     return retval;
 161   else
 162     {
 163       struct autodetect_alias *alias;
 164 
 165       /* Unsupported from_codeset or to_codeset. Check whether the caller
 166          requested autodetection.  */
 167       for (alias = autodetect_list; alias != NULL; alias = alias->next)
 168         if (strcmp (from_codeset, alias->name) == 0)
 169           {
 170             const char * const *encodings;
 171 
 172             if (handler != iconveh_error)
 173               {
 174                 /* First try all encodings without any forgiving.  */
 175                 encodings = alias->encodings_to_try;
 176                 do
 177                   {
 178                     retval = mem_iconveha_notranslit (src, srclen,
 179                                                       *encodings, to_codeset,
 180                                                       iconveh_error, offsets,
 181                                                       resultp, lengthp);
 182                     if (!(retval < 0 && errno == EILSEQ))
 183                       return retval;
 184                     encodings++;
 185                   }
 186                 while (*encodings != NULL);
 187               }
 188 
 189             encodings = alias->encodings_to_try;
 190             do
 191               {
 192                 retval = mem_iconveha_notranslit (src, srclen,
 193                                                   *encodings, to_codeset,
 194                                                   handler, offsets,
 195                                                   resultp, lengthp);
 196                 if (!(retval < 0 && errno == EILSEQ))
 197                   return retval;
 198                 encodings++;
 199               }
 200             while (*encodings != NULL);
 201 
 202             /* Return the last call's result.  */
 203             return -1;
 204           }
 205 
 206       /* It wasn't an autodetection name.  */
 207       errno = EINVAL;
 208       return -1;
 209     }
 210 }
 211 
 212 int
 213 mem_iconveha (const char *src, size_t srclen,
     /* [previous][next][first][last][top][bottom][index][help] */
 214               const char *from_codeset, const char *to_codeset,
 215               bool transliterate,
 216               enum iconv_ilseq_handler handler,
 217               size_t *offsets,
 218               char **resultp, size_t *lengthp)
 219 {
 220   if (srclen == 0)
 221     {
 222       /* Nothing to convert.  */
 223       *lengthp = 0;
 224       return 0;
 225     }
 226 
 227   /* When using GNU libc >= 2.2 or GNU libiconv >= 1.5,
 228      we want to use transliteration.  */
 229 #if (((__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) || __GLIBC__ > 2) \
 230      && !defined __UCLIBC__) \
 231     || _LIBICONV_VERSION >= 0x0105
 232   if (transliterate)
 233     {
 234       int retval;
 235       size_t len = strlen (to_codeset);
 236       char *to_codeset_suffixed = (char *) malloca (len + 10 + 1);
 237       memcpy (to_codeset_suffixed, to_codeset, len);
 238       memcpy (to_codeset_suffixed + len, "//TRANSLIT", 10 + 1);
 239 
 240       retval = mem_iconveha_notranslit (src, srclen,
 241                                         from_codeset, to_codeset_suffixed,
 242                                         handler, offsets, resultp, lengthp);
 243 
 244       freea (to_codeset_suffixed);
 245 
 246       return retval;
 247     }
 248   else
 249 #endif
 250     return mem_iconveha_notranslit (src, srclen,
 251                                     from_codeset, to_codeset,
 252                                     handler, offsets, resultp, lengthp);
 253 }
 254 
 255 /* Like str_iconveha, except no handling of transliteration.  */
 256 static char *
 257 str_iconveha_notranslit (const char *src,
     /* [previous][next][first][last][top][bottom][index][help] */
 258                          const char *from_codeset, const char *to_codeset,
 259                          enum iconv_ilseq_handler handler)
 260 {
 261   char *result = str_iconveh (src, from_codeset, to_codeset, handler);
 262 
 263   if (result != NULL || errno != EINVAL)
 264     return result;
 265   else
 266     {
 267       struct autodetect_alias *alias;
 268 
 269       /* Unsupported from_codeset or to_codeset. Check whether the caller
 270          requested autodetection.  */
 271       for (alias = autodetect_list; alias != NULL; alias = alias->next)
 272         if (strcmp (from_codeset, alias->name) == 0)
 273           {
 274             const char * const *encodings;
 275 
 276             if (handler != iconveh_error)
 277               {
 278                 /* First try all encodings without any forgiving.  */
 279                 encodings = alias->encodings_to_try;
 280                 do
 281                   {
 282                     result = str_iconveha_notranslit (src,
 283                                                       *encodings, to_codeset,
 284                                                       iconveh_error);
 285                     if (!(result == NULL && errno == EILSEQ))
 286                       return result;
 287                     encodings++;
 288                   }
 289                 while (*encodings != NULL);
 290               }
 291 
 292             encodings = alias->encodings_to_try;
 293             do
 294               {
 295                 result = str_iconveha_notranslit (src,
 296                                                   *encodings, to_codeset,
 297                                                   handler);
 298                 if (!(result == NULL && errno == EILSEQ))
 299                   return result;
 300                 encodings++;
 301               }
 302             while (*encodings != NULL);
 303 
 304             /* Return the last call's result.  */
 305             return NULL;
 306           }
 307 
 308       /* It wasn't an autodetection name.  */
 309       errno = EINVAL;
 310       return NULL;
 311     }
 312 }
 313 
 314 char *
 315 str_iconveha (const char *src,
     /* [previous][next][first][last][top][bottom][index][help] */
 316               const char *from_codeset, const char *to_codeset,
 317               bool transliterate,
 318               enum iconv_ilseq_handler handler)
 319 {
 320   if (*src == '\0' || c_strcasecmp (from_codeset, to_codeset) == 0)
 321     {
 322       char *result = strdup (src);
 323 
 324       if (result == NULL)
 325         errno = ENOMEM;
 326       return result;
 327     }
 328 
 329   /* When using GNU libc >= 2.2 or GNU libiconv >= 1.5,
 330      we want to use transliteration.  */
 331 #if (((__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) || __GLIBC__ > 2) \
 332      && !defined __UCLIBC__) \
 333     || _LIBICONV_VERSION >= 0x0105
 334   if (transliterate)
 335     {
 336       char *result;
 337       size_t len = strlen (to_codeset);
 338       char *to_codeset_suffixed = (char *) malloca (len + 10 + 1);
 339       memcpy (to_codeset_suffixed, to_codeset, len);
 340       memcpy (to_codeset_suffixed + len, "//TRANSLIT", 10 + 1);
 341 
 342       result = str_iconveha_notranslit (src, from_codeset, to_codeset_suffixed,
 343                                         handler);
 344 
 345       freea (to_codeset_suffixed);
 346 
 347       return result;
 348     }
 349   else
 350 #endif
 351     return str_iconveha_notranslit (src, from_codeset, to_codeset, handler);
 352 }

/* [previous][next][first][last][top][bottom][index][help] */