root/maint/gnulib/tests/uniconv/test-u8-conv-from-enc.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. new_offsets
  2. main

   1 /* Test of conversion to UTF-8 from legacy encodings.
   2    Copyright (C) 2007-2021 Free Software Foundation, Inc.
   3 
   4    This program is free software: you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation; either version 3 of the License, or
   7    (at your option) any later version.
   8 
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13 
  14    You should have received a copy of the GNU General Public License
  15    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
  16 
  17 /* Written by Bruno Haible <bruno@clisp.org>, 2007.  */
  18 
  19 #include <config.h>
  20 
  21 #include "uniconv.h"
  22 
  23 #include <stdlib.h>
  24 #include <string.h>
  25 
  26 #include "unistr.h"
  27 #include "macros.h"
  28 extern int iconv_supports_encoding (const char *encoding);
  29 
  30 /* Magic number for detecting bounds violations.  */
  31 #define MAGIC 0x1983EFF1
  32 
  33 static size_t *
  34 new_offsets (size_t n)
     /* [previous][next][first][last][top][bottom][index][help] */
  35 {
  36   size_t *offsets = (size_t *) malloc ((n + 1) * sizeof (size_t));
  37   offsets[n] = MAGIC;
  38   return offsets;
  39 }
  40 
  41 int
  42 main ()
     /* [previous][next][first][last][top][bottom][index][help] */
  43 {
  44 #if HAVE_ICONV
  45   static enum iconv_ilseq_handler handlers[] =
  46     { iconveh_error, iconveh_question_mark, iconveh_escape_sequence };
  47   size_t h;
  48   size_t o;
  49   size_t i;
  50 
  51   /* Assume that iconv() supports at least the encodings ASCII, ISO-8859-1,
  52      ISO-8859-2, and UTF-8.  */
  53 
  54   /* Test conversion from ISO-8859-1 to UTF-8 with no errors.  */
  55   for (h = 0; h < SIZEOF (handlers); h++)
  56     {
  57       enum iconv_ilseq_handler handler = handlers[h];
  58       static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
  59       static const uint8_t expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
  60       for (o = 0; o < 2; o++)
  61         {
  62           size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
  63           size_t length;
  64           uint8_t *result = u8_conv_from_encoding ("ISO-8859-1", handler,
  65                                                    input, strlen (input),
  66                                                    offsets,
  67                                                    NULL, &length);
  68           ASSERT (result != NULL);
  69           ASSERT (length == u8_strlen (expected));
  70           ASSERT (u8_cmp (result, expected, u8_strlen (expected)) == 0);
  71           if (o)
  72             {
  73               for (i = 0; i < 37; i++)
  74                 ASSERT (offsets[i] == (i < 1 ? i :
  75                                        i < 12 ? i + 1 :
  76                                        i < 18 ? i + 2 :
  77                                        i + 3));
  78               ASSERT (offsets[37] == MAGIC);
  79               free (offsets);
  80             }
  81           free (result);
  82         }
  83     }
  84 
  85   /* Test conversion from ISO-8859-2 to UTF-8 with no errors.  */
  86   for (h = 0; h < SIZEOF (handlers); h++)
  87     {
  88       enum iconv_ilseq_handler handler = handlers[h];
  89       static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
  90       static const uint8_t expected[] = "Rafa\305\202 Maszkowski";
  91       for (o = 0; o < 2; o++)
  92         {
  93           size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
  94           size_t length;
  95           uint8_t *result = u8_conv_from_encoding ("ISO-8859-2", handler,
  96                                                    input, strlen (input),
  97                                                    offsets,
  98                                                    NULL, &length);
  99           ASSERT (result != NULL);
 100           ASSERT (length == u8_strlen (expected));
 101           ASSERT (u8_cmp (result, expected, u8_strlen (expected)) == 0);
 102           if (o)
 103             {
 104               for (i = 0; i < 16; i++)
 105                 ASSERT (offsets[i] == (i < 5 ? i :
 106                                        i + 1));
 107               ASSERT (offsets[16] == MAGIC);
 108               free (offsets);
 109             }
 110           free (result);
 111         }
 112     }
 113 
 114   /* autodetect_jp is only supported when iconv() support ISO-2022-JP-2.  */
 115 # if defined _LIBICONV_VERSION || !(defined _AIX || defined __sgi || defined __hpux || defined __osf__ || defined __sun)
 116   if (iconv_supports_encoding ("ISO-2022-JP-2"))
 117     {
 118       /* Test conversions from autodetect_jp to UTF-8.  */
 119       for (h = 0; h < SIZEOF (handlers); h++)
 120         {
 121           enum iconv_ilseq_handler handler = handlers[h];
 122           static const char input[] = "\244\263\244\363\244\313\244\301\244\317"; /* こんにちは in EUC-JP */
 123           static const uint8_t expected[] = "\343\201\223\343\202\223\343\201\253\343\201\241\343\201\257"; /* こんにちは */
 124           for (o = 0; o < 2; o++)
 125             {
 126               size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
 127               size_t length;
 128               uint8_t *result = u8_conv_from_encoding ("autodetect_jp", handler,
 129                                                        input, strlen (input),
 130                                                        offsets,
 131                                                        NULL, &length);
 132               ASSERT (result != NULL);
 133               ASSERT (length == u8_strlen (expected));
 134               ASSERT (u8_cmp (result, expected, u8_strlen (expected)) == 0);
 135               if (o)
 136                 {
 137                   for (i = 0; i < 10; i++)
 138                     ASSERT (offsets[i] == ((i % 2) == 0 ? (i / 2) * 3 : (size_t)(-1)));
 139                   ASSERT (offsets[10] == MAGIC);
 140                   free (offsets);
 141                 }
 142               free (result);
 143             }
 144         }
 145       for (h = 0; h < SIZEOF (handlers); h++)
 146         {
 147           enum iconv_ilseq_handler handler = handlers[h];
 148           static const char input[] = "\202\261\202\361\202\311\202\277\202\315"; /* こんにちは in Shift_JIS */
 149           static const uint8_t expected[] = "\343\201\223\343\202\223\343\201\253\343\201\241\343\201\257"; /* こんにちは */
 150           for (o = 0; o < 2; o++)
 151             {
 152               size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
 153               size_t length;
 154               uint8_t *result = u8_conv_from_encoding ("autodetect_jp", handler,
 155                                                        input, strlen (input),
 156                                                        offsets,
 157                                                        NULL, &length);
 158               ASSERT (result != NULL);
 159               ASSERT (length == u8_strlen (expected));
 160               ASSERT (u8_cmp (result, expected, u8_strlen (expected)) == 0);
 161               if (o)
 162                 {
 163                   for (i = 0; i < 10; i++)
 164                     ASSERT (offsets[i] == ((i % 2) == 0 ? (i / 2) * 3 : (size_t)(-1)));
 165                   ASSERT (offsets[10] == MAGIC);
 166                   free (offsets);
 167                 }
 168               free (result);
 169             }
 170         }
 171       for (h = 0; h < SIZEOF (handlers); h++)
 172         {
 173           enum iconv_ilseq_handler handler = handlers[h];
 174           static const char input[] = "\033$B$3$s$K$A$O\033(B"; /* こんにちは in ISO-2022-JP-2 */
 175           static const uint8_t expected[] = "\343\201\223\343\202\223\343\201\253\343\201\241\343\201\257"; /* こんにちは */
 176           for (o = 0; o < 2; o++)
 177             {
 178               size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
 179               size_t length;
 180               uint8_t *result = u8_conv_from_encoding ("autodetect_jp", handler,
 181                                                        input, strlen (input),
 182                                                        offsets,
 183                                                        NULL, &length);
 184               ASSERT (result != NULL);
 185               ASSERT (length == u8_strlen (expected));
 186               ASSERT (u8_cmp (result, expected, u8_strlen (expected)) == 0);
 187               if (o)
 188                 {
 189                   for (i = 0; i < 16; i++)
 190                     ASSERT (offsets[i] == (i == 0 ? 0 :
 191                                            i == 5 ? 3 :
 192                                            i == 7 ? 6 :
 193                                            i == 9 ? 9 :
 194                                            i == 11 ? 12 :
 195                                            i == 13 ? 15 :
 196                                            (size_t)(-1)));
 197                   ASSERT (offsets[16] == MAGIC);
 198                   free (offsets);
 199                 }
 200               free (result);
 201             }
 202         }
 203     }
 204 # endif
 205 
 206 #endif
 207 
 208   return 0;
 209 }

/* [previous][next][first][last][top][bottom][index][help] */