root/maint/gnulib/tests/uniconv/test-u8-conv-to-enc.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. new_offsets
  2. main

   1 /* Test of conversion from UTF-8 to legacy encodings.
   2    Copyright (C) 2007-2021 Free Software Foundation, Inc.
   3 
   4    This program is free software: you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation; either version 3 of the License, or
   7    (at your option) any later version.
   8 
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13 
  14    You should have received a copy of the GNU General Public License
  15    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
  16 
  17 /* Written by Bruno Haible <bruno@clisp.org>, 2007.  */
  18 
  19 #include <config.h>
  20 
  21 #include "uniconv.h"
  22 
  23 #include <errno.h>
  24 #include <stdlib.h>
  25 #include <string.h>
  26 
  27 #include "unistr.h"
  28 #include "macros.h"
  29 
  30 /* Magic number for detecting bounds violations.  */
  31 #define MAGIC 0x1983EFF1
  32 
  33 static size_t *
  34 new_offsets (size_t n)
     /* [previous][next][first][last][top][bottom][index][help] */
  35 {
  36   size_t *offsets = (size_t *) malloc ((n + 1) * sizeof (size_t));
  37   offsets[n] = MAGIC;
  38   return offsets;
  39 }
  40 
  41 int
  42 main ()
     /* [previous][next][first][last][top][bottom][index][help] */
  43 {
  44 #if HAVE_ICONV
  45   static enum iconv_ilseq_handler handlers[] =
  46     { iconveh_error, iconveh_question_mark, iconveh_escape_sequence };
  47   size_t h;
  48   size_t o;
  49   size_t i;
  50 
  51   /* Assume that iconv() supports at least the encodings ASCII, ISO-8859-1,
  52      ISO-8859-2, and UTF-8.  */
  53 
  54   /* Test conversion from UTF-8 to ISO-8859-1 with no errors.  */
  55   for (h = 0; h < SIZEOF (handlers); h++)
  56     {
  57       enum iconv_ilseq_handler handler = handlers[h];
  58       static const uint8_t input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
  59       static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
  60       for (o = 0; o < 2; o++)
  61         {
  62           size_t *offsets = (o ? new_offsets (u8_strlen (input)) : NULL);
  63           size_t length;
  64           char *result = u8_conv_to_encoding ("ISO-8859-1", handler,
  65                                               input, u8_strlen (input),
  66                                               offsets,
  67                                               NULL, &length);
  68           ASSERT (result != NULL);
  69           ASSERT (length == strlen (expected));
  70           ASSERT (memcmp (result, expected, length) == 0);
  71           if (o)
  72             {
  73               for (i = 0; i < 41; i++)
  74                 ASSERT (offsets[i] == (i < 1 ? i :
  75                                        i == 1 ? (size_t)(-1) :
  76                                        i < 13 ? i - 1 :
  77                                        i == 13 ? (size_t)(-1) :
  78                                        i < 20 ? i - 2 :
  79                                        i == 20 ? (size_t)(-1) :
  80                                        i < 40 ? i - 3 :
  81                                        i == 40 ? (size_t)(-1) :
  82                                        i - 4));
  83               ASSERT (offsets[41] == MAGIC);
  84               free (offsets);
  85             }
  86           free (result);
  87         }
  88     }
  89 
  90   /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ.  */
  91   for (h = 0; h < SIZEOF (handlers); h++)
  92     {
  93       enum iconv_ilseq_handler handler = handlers[h];
  94       static const uint8_t input[] = "Rafa\305\202 Maszkowski"; /* RafaƂ Maszkowski */
  95       for (o = 0; o < 2; o++)
  96         {
  97           size_t *offsets = (o ? new_offsets (u8_strlen (input)) : NULL);
  98           size_t length = 0xdead;
  99           char *result = u8_conv_to_encoding ("ISO-8859-1", handler,
 100                                               input, u8_strlen (input),
 101                                               offsets,
 102                                               NULL, &length);
 103           switch (handler)
 104             {
 105             case iconveh_error:
 106               ASSERT (result == NULL);
 107               ASSERT (errno == EILSEQ);
 108               ASSERT (length == 0xdead);
 109               if (o)
 110                 free (offsets);
 111               break;
 112             case iconveh_question_mark:
 113               {
 114                 static const char expected[] = "Rafa? Maszkowski";
 115                 static const char expected_translit[] = "Rafal Maszkowski";
 116                 ASSERT (result != NULL);
 117                 ASSERT (length == strlen (expected));
 118                 ASSERT (memcmp (result, expected, length) == 0
 119                         || memcmp (result, expected_translit, length) == 0);
 120                 if (o)
 121                   {
 122                     for (i = 0; i < 17; i++)
 123                       ASSERT (offsets[i] == (i < 5 ? i :
 124                                              i == 5 ? (size_t)(-1) :
 125                                              i - 1));
 126                     ASSERT (offsets[17] == MAGIC);
 127                     free (offsets);
 128                   }
 129                 free (result);
 130               }
 131               break;
 132             case iconveh_escape_sequence:
 133               {
 134                 static const char expected[] = "Rafa\\u0142 Maszkowski";
 135                 ASSERT (result != NULL);
 136                 ASSERT (length == strlen (expected));
 137                 ASSERT (memcmp (result, expected, length) == 0);
 138                 if (o)
 139                   {
 140                     for (i = 0; i < 17; i++)
 141                       ASSERT (offsets[i] == (i < 5 ? i :
 142                                              i == 5 ? (size_t)(-1) :
 143                                              i + 4));
 144                     ASSERT (offsets[17] == MAGIC);
 145                     free (offsets);
 146                   }
 147                 free (result);
 148               }
 149               break;
 150             }
 151         }
 152     }
 153 
 154   /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL.  */
 155   for (h = 0; h < SIZEOF (handlers); h++)
 156     {
 157       enum iconv_ilseq_handler handler = handlers[h];
 158       static const uint8_t input[] = "\342";
 159       for (o = 0; o < 2; o++)
 160         {
 161           size_t *offsets = (o ? new_offsets (u8_strlen (input)) : NULL);
 162           size_t length;
 163           char *result = u8_conv_to_encoding ("ISO-8859-1", handler,
 164                                               input, u8_strlen (input),
 165                                               offsets,
 166                                               NULL, &length);
 167           ASSERT (result != NULL);
 168           ASSERT (length == strlen (""));
 169           if (o)
 170             {
 171               ASSERT (offsets[0] == 0);
 172               ASSERT (offsets[1] == MAGIC);
 173               free (offsets);
 174             }
 175           free (result);
 176         }
 177     }
 178 
 179 #endif
 180 
 181   return 0;
 182 }

/* [previous][next][first][last][top][bottom][index][help] */