root/maint/gnulib/tests/uninorm/test-canonical-decomposition.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. main

   1 /* Test of canonical decomposition of Unicode characters.
   2    Copyright (C) 2009-2021 Free Software Foundation, Inc.
   3 
   4    This program is free software: you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation; either version 3 of the License, or
   7    (at your option) any later version.
   8 
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13 
  14    You should have received a copy of the GNU General Public License
  15    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
  16 
  17 /* Written by Bruno Haible <bruno@clisp.org>, 2009.  */
  18 
  19 #include <config.h>
  20 
  21 #include "uninorm.h"
  22 
  23 #include "macros.h"
  24 
  25 int
  26 main ()
     /* [previous][next][first][last][top][bottom][index][help] */
  27 {
  28   ucs4_t decomposed[UC_DECOMPOSITION_MAX_LENGTH];
  29   int ret;
  30 
  31   /* SPACE */
  32   ret = uc_canonical_decomposition (0x0020, decomposed);
  33   ASSERT (ret == -1);
  34 
  35   /* LATIN CAPITAL LETTER A WITH DIAERESIS */
  36   ret = uc_canonical_decomposition (0x00C4, decomposed);
  37   ASSERT (ret == 2);
  38   ASSERT (decomposed[0] == 0x0041);
  39   ASSERT (decomposed[1] == 0x0308);
  40 
  41   /* LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON */
  42   ret = uc_canonical_decomposition (0x01DE, decomposed);
  43   ASSERT (ret == 2);
  44   ASSERT (decomposed[0] == 0x00C4);
  45   ASSERT (decomposed[1] == 0x0304);
  46 
  47   /* GREEK DIALYTIKA AND PERISPOMENI */
  48   ret = uc_canonical_decomposition (0x1FC1, decomposed);
  49   ASSERT (ret == 2);
  50   ASSERT (decomposed[0] == 0x00A8);
  51   ASSERT (decomposed[1] == 0x0342);
  52 
  53   /* SCRIPT SMALL L */
  54   ret = uc_canonical_decomposition (0x2113, decomposed);
  55   ASSERT (ret == -1);
  56 
  57   /* NO-BREAK SPACE */
  58   ret = uc_canonical_decomposition (0x00A0, decomposed);
  59   ASSERT (ret == -1);
  60 
  61   /* ARABIC LETTER VEH INITIAL FORM */
  62   ret = uc_canonical_decomposition (0xFB6C, decomposed);
  63   ASSERT (ret == -1);
  64 
  65   /* ARABIC LETTER VEH MEDIAL FORM */
  66   ret = uc_canonical_decomposition (0xFB6D, decomposed);
  67   ASSERT (ret == -1);
  68 
  69   /* ARABIC LETTER VEH FINAL FORM */
  70   ret = uc_canonical_decomposition (0xFB6B, decomposed);
  71   ASSERT (ret == -1);
  72 
  73   /* ARABIC LETTER VEH ISOLATED FORM */
  74   ret = uc_canonical_decomposition (0xFB6A, decomposed);
  75   ASSERT (ret == -1);
  76 
  77   /* CIRCLED NUMBER FIFTEEN */
  78   ret = uc_canonical_decomposition (0x246E, decomposed);
  79   ASSERT (ret == -1);
  80 
  81   /* TRADE MARK SIGN */
  82   ret = uc_canonical_decomposition (0x2122, decomposed);
  83   ASSERT (ret == -1);
  84 
  85   /* LATIN SUBSCRIPT SMALL LETTER I */
  86   ret = uc_canonical_decomposition (0x1D62, decomposed);
  87   ASSERT (ret == -1);
  88 
  89   /* PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS */
  90   ret = uc_canonical_decomposition (0xFE35, decomposed);
  91   ASSERT (ret == -1);
  92 
  93   /* FULLWIDTH LATIN CAPITAL LETTER A */
  94   ret = uc_canonical_decomposition (0xFF21, decomposed);
  95   ASSERT (ret == -1);
  96 
  97   /* HALFWIDTH IDEOGRAPHIC COMMA */
  98   ret = uc_canonical_decomposition (0xFF64, decomposed);
  99   ASSERT (ret == -1);
 100 
 101   /* SMALL IDEOGRAPHIC COMMA */
 102   ret = uc_canonical_decomposition (0xFE51, decomposed);
 103   ASSERT (ret == -1);
 104 
 105   /* SQUARE MHZ */
 106   ret = uc_canonical_decomposition (0x3392, decomposed);
 107   ASSERT (ret == -1);
 108 
 109   /* VULGAR FRACTION THREE EIGHTHS */
 110   ret = uc_canonical_decomposition (0x215C, decomposed);
 111   ASSERT (ret == -1);
 112 
 113   /* MICRO SIGN */
 114   ret = uc_canonical_decomposition (0x00B5, decomposed);
 115   ASSERT (ret == -1);
 116 
 117   /* ARABIC LIGATURE SALLALLAHOU ALAYHE WASALLAM */
 118   ret = uc_canonical_decomposition (0xFDFA, decomposed);
 119   ASSERT (ret == -1);
 120 
 121   /* HANGUL SYLLABLE GEUL */
 122   ret = uc_canonical_decomposition (0xAE00, decomposed);
 123   /* See the clarification at <https://www.unicode.org/versions/Unicode5.1.0/>,
 124      section "Clarification of Hangul Jamo Handling".  */
 125 #if 1
 126   ASSERT (ret == 2);
 127   ASSERT (decomposed[0] == 0xADF8);
 128   ASSERT (decomposed[1] == 0x11AF);
 129 #else
 130   ASSERT (ret == 3);
 131   ASSERT (decomposed[0] == 0x1100);
 132   ASSERT (decomposed[1] == 0x1173);
 133   ASSERT (decomposed[2] == 0x11AF);
 134 #endif
 135 
 136   /* HANGUL SYLLABLE GEU */
 137   ret = uc_canonical_decomposition (0xADF8, decomposed);
 138   ASSERT (ret == 2);
 139   ASSERT (decomposed[0] == 0x1100);
 140   ASSERT (decomposed[1] == 0x1173);
 141 
 142   return 0;
 143 }

/* [previous][next][first][last][top][bottom][index][help] */