root/maint/gnulib/tests/uninorm/test-decomposition.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. main

   1 /* Test of decomposition of Unicode characters.
   2    Copyright (C) 2009-2021 Free Software Foundation, Inc.
   3 
   4    This program is free software: you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation; either version 3 of the License, or
   7    (at your option) any later version.
   8 
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13 
  14    You should have received a copy of the GNU General Public License
  15    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
  16 
  17 /* Written by Bruno Haible <bruno@clisp.org>, 2009.  */
  18 
  19 #include <config.h>
  20 
  21 #include "uninorm.h"
  22 
  23 #include "macros.h"
  24 
  25 int
  26 main ()
     /* [previous][next][first][last][top][bottom][index][help] */
  27 {
  28   ucs4_t decomposed[UC_DECOMPOSITION_MAX_LENGTH];
  29   int tag;
  30   int ret;
  31 
  32   /* SPACE */
  33   ret = uc_decomposition (0x0020, &tag, decomposed);
  34   ASSERT (ret == -1);
  35 
  36   /* LATIN CAPITAL LETTER A WITH DIAERESIS */
  37   ret = uc_decomposition (0x00C4, &tag, decomposed);
  38   ASSERT (ret == 2);
  39   ASSERT (tag == UC_DECOMP_CANONICAL);
  40   ASSERT (decomposed[0] == 0x0041);
  41   ASSERT (decomposed[1] == 0x0308);
  42 
  43   /* LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON */
  44   ret = uc_decomposition (0x01DE, &tag, decomposed);
  45   ASSERT (ret == 2);
  46   ASSERT (tag == UC_DECOMP_CANONICAL);
  47   ASSERT (decomposed[0] == 0x00C4);
  48   ASSERT (decomposed[1] == 0x0304);
  49 
  50   /* GREEK DIALYTIKA AND PERISPOMENI */
  51   ret = uc_decomposition (0x1FC1, &tag, decomposed);
  52   ASSERT (ret == 2);
  53   ASSERT (tag == UC_DECOMP_CANONICAL);
  54   ASSERT (decomposed[0] == 0x00A8);
  55   ASSERT (decomposed[1] == 0x0342);
  56 
  57   /* SCRIPT SMALL L */
  58   ret = uc_decomposition (0x2113, &tag, decomposed);
  59   ASSERT (ret == 1);
  60   ASSERT (tag == UC_DECOMP_FONT);
  61   ASSERT (decomposed[0] == 0x006C);
  62 
  63   /* NO-BREAK SPACE */
  64   ret = uc_decomposition (0x00A0, &tag, decomposed);
  65   ASSERT (ret == 1);
  66   ASSERT (tag == UC_DECOMP_NOBREAK);
  67   ASSERT (decomposed[0] == 0x0020);
  68 
  69   /* ARABIC LETTER VEH INITIAL FORM */
  70   ret = uc_decomposition (0xFB6C, &tag, decomposed);
  71   ASSERT (ret == 1);
  72   ASSERT (tag == UC_DECOMP_INITIAL);
  73   ASSERT (decomposed[0] == 0x06A4);
  74 
  75   /* ARABIC LETTER VEH MEDIAL FORM */
  76   ret = uc_decomposition (0xFB6D, &tag, decomposed);
  77   ASSERT (ret == 1);
  78   ASSERT (tag == UC_DECOMP_MEDIAL);
  79   ASSERT (decomposed[0] == 0x06A4);
  80 
  81   /* ARABIC LETTER VEH FINAL FORM */
  82   ret = uc_decomposition (0xFB6B, &tag, decomposed);
  83   ASSERT (ret == 1);
  84   ASSERT (tag == UC_DECOMP_FINAL);
  85   ASSERT (decomposed[0] == 0x06A4);
  86 
  87   /* ARABIC LETTER VEH ISOLATED FORM */
  88   ret = uc_decomposition (0xFB6A, &tag, decomposed);
  89   ASSERT (ret == 1);
  90   ASSERT (tag == UC_DECOMP_ISOLATED);
  91   ASSERT (decomposed[0] == 0x06A4);
  92 
  93   /* CIRCLED NUMBER FIFTEEN */
  94   ret = uc_decomposition (0x246E, &tag, decomposed);
  95   ASSERT (ret == 2);
  96   ASSERT (tag == UC_DECOMP_CIRCLE);
  97   ASSERT (decomposed[0] == 0x0031);
  98   ASSERT (decomposed[1] == 0x0035);
  99 
 100   /* TRADE MARK SIGN */
 101   ret = uc_decomposition (0x2122, &tag, decomposed);
 102   ASSERT (ret == 2);
 103   ASSERT (tag == UC_DECOMP_SUPER);
 104   ASSERT (decomposed[0] == 0x0054);
 105   ASSERT (decomposed[1] == 0x004D);
 106 
 107   /* LATIN SUBSCRIPT SMALL LETTER I */
 108   ret = uc_decomposition (0x1D62, &tag, decomposed);
 109   ASSERT (ret == 1);
 110   ASSERT (tag == UC_DECOMP_SUB);
 111   ASSERT (decomposed[0] == 0x0069);
 112 
 113   /* PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS */
 114   ret = uc_decomposition (0xFE35, &tag, decomposed);
 115   ASSERT (ret == 1);
 116   ASSERT (tag == UC_DECOMP_VERTICAL);
 117   ASSERT (decomposed[0] == 0x0028);
 118 
 119   /* FULLWIDTH LATIN CAPITAL LETTER A */
 120   ret = uc_decomposition (0xFF21, &tag, decomposed);
 121   ASSERT (ret == 1);
 122   ASSERT (tag == UC_DECOMP_WIDE);
 123   ASSERT (decomposed[0] == 0x0041);
 124 
 125   /* HALFWIDTH IDEOGRAPHIC COMMA */
 126   ret = uc_decomposition (0xFF64, &tag, decomposed);
 127   ASSERT (ret == 1);
 128   ASSERT (tag == UC_DECOMP_NARROW);
 129   ASSERT (decomposed[0] == 0x3001);
 130 
 131   /* SMALL IDEOGRAPHIC COMMA */
 132   ret = uc_decomposition (0xFE51, &tag, decomposed);
 133   ASSERT (ret == 1);
 134   ASSERT (tag == UC_DECOMP_SMALL);
 135   ASSERT (decomposed[0] == 0x3001);
 136 
 137   /* SQUARE MHZ */
 138   ret = uc_decomposition (0x3392, &tag, decomposed);
 139   ASSERT (ret == 3);
 140   ASSERT (tag == UC_DECOMP_SQUARE);
 141   ASSERT (decomposed[0] == 0x004D);
 142   ASSERT (decomposed[1] == 0x0048);
 143   ASSERT (decomposed[2] == 0x007A);
 144 
 145   /* VULGAR FRACTION THREE EIGHTHS */
 146   ret = uc_decomposition (0x215C, &tag, decomposed);
 147   ASSERT (ret == 3);
 148   ASSERT (tag == UC_DECOMP_FRACTION);
 149   ASSERT (decomposed[0] == 0x0033);
 150   ASSERT (decomposed[1] == 0x2044);
 151   ASSERT (decomposed[2] == 0x0038);
 152 
 153   /* MICRO SIGN */
 154   ret = uc_decomposition (0x00B5, &tag, decomposed);
 155   ASSERT (ret == 1);
 156   ASSERT (tag == UC_DECOMP_COMPAT);
 157   ASSERT (decomposed[0] == 0x03BC);
 158 
 159   /* ARABIC LIGATURE SALLALLAHOU ALAYHE WASALLAM */
 160   ret = uc_decomposition (0xFDFA, &tag, decomposed);
 161   ASSERT (ret == 18);
 162   ASSERT (tag == UC_DECOMP_ISOLATED);
 163   ASSERT (decomposed[0] == 0x0635);
 164   ASSERT (decomposed[1] == 0x0644);
 165   ASSERT (decomposed[2] == 0x0649);
 166   ASSERT (decomposed[3] == 0x0020);
 167   ASSERT (decomposed[4] == 0x0627);
 168   ASSERT (decomposed[5] == 0x0644);
 169   ASSERT (decomposed[6] == 0x0644);
 170   ASSERT (decomposed[7] == 0x0647);
 171   ASSERT (decomposed[8] == 0x0020);
 172   ASSERT (decomposed[9] == 0x0639);
 173   ASSERT (decomposed[10] == 0x0644);
 174   ASSERT (decomposed[11] == 0x064A);
 175   ASSERT (decomposed[12] == 0x0647);
 176   ASSERT (decomposed[13] == 0x0020);
 177   ASSERT (decomposed[14] == 0x0648);
 178   ASSERT (decomposed[15] == 0x0633);
 179   ASSERT (decomposed[16] == 0x0644);
 180   ASSERT (decomposed[17] == 0x0645);
 181 
 182   /* HANGUL SYLLABLE GEUL */
 183   ret = uc_decomposition (0xAE00, &tag, decomposed);
 184   /* See the clarification at <https://www.unicode.org/versions/Unicode5.1.0/>,
 185      section "Clarification of Hangul Jamo Handling".  */
 186 #if 1
 187   ASSERT (ret == 2);
 188   ASSERT (tag == UC_DECOMP_CANONICAL);
 189   ASSERT (decomposed[0] == 0xADF8);
 190   ASSERT (decomposed[1] == 0x11AF);
 191 #else
 192   ASSERT (ret == 3);
 193   ASSERT (tag == UC_DECOMP_CANONICAL);
 194   ASSERT (decomposed[0] == 0x1100);
 195   ASSERT (decomposed[1] == 0x1173);
 196   ASSERT (decomposed[2] == 0x11AF);
 197 #endif
 198 
 199   /* HANGUL SYLLABLE GEU */
 200   ret = uc_decomposition (0xADF8, &tag, decomposed);
 201   ASSERT (ret == 2);
 202   ASSERT (tag == UC_DECOMP_CANONICAL);
 203   ASSERT (decomposed[0] == 0x1100);
 204   ASSERT (decomposed[1] == 0x1173);
 205 
 206   return 0;
 207 }

/* [previous][next][first][last][top][bottom][index][help] */