1 /* Test of wcwidth() function. 2 Copyright (C) 2007-2021 Free Software Foundation, Inc. 3 4 This program is free software: you can redistribute it and/or modify 5 it under the terms of the GNU General Public License as published by 6 the Free Software Foundation; either version 3 of the License, or 7 (at your option) any later version. 8 9 This program is distributed in the hope that it will be useful, 10 but WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 GNU General Public License for more details. 13 14 You should have received a copy of the GNU General Public License 15 along with this program. If not, see <https://www.gnu.org/licenses/>. */ 16 17 /* Written by Bruno Haible <bruno@clisp.org>, 2007. */ 18 19 #include <config.h> 20 21 #include <wchar.h> 22 23 #include "signature.h" 24 SIGNATURE_CHECK (wcwidth, int, (wchar_t)); 25 26 #include <locale.h> 27 #include <string.h> 28 29 #include "c-ctype.h" 30 #include "localcharset.h" 31 #include "macros.h" 32 33 int 34 main () /* */ 35 { 36 wchar_t wc; 37 38 #if !GNULIB_WCHAR_SINGLE_LOCALE 39 # ifdef C_CTYPE_ASCII 40 /* Test width of ASCII characters. */ 41 for (wc = 0x20; wc < 0x7F; wc++) 42 ASSERT (wcwidth (wc) == 1); 43 # endif 44 #endif 45 46 /* Switch to an UTF-8 locale. */ 47 if (setlocale (LC_ALL, "fr_FR.UTF-8") != NULL 48 /* Check whether it's really an UTF-8 locale. 49 On OpenBSD 4.0, the setlocale call succeeds only for the LC_CTYPE 50 category and therefore returns "C/fr_FR.UTF-8/C/C/C/C", but the 51 LC_CTYPE category is effectively set to an ASCII LC_CTYPE category; 52 in particular, locale_charset() returns "ASCII". */ 53 && strcmp (locale_charset (), "UTF-8") == 0) 54 { 55 /* Test width of ASCII characters. */ 56 for (wc = 0x20; wc < 0x7F; wc++) 57 ASSERT (wcwidth (wc) == 1); 58 59 /* Test width of some non-spacing characters. */ 60 ASSERT (wcwidth (0x0301) == 0); 61 ASSERT (wcwidth (0x05B0) == 0); 62 63 /* Test width of some format control characters. */ 64 ASSERT (wcwidth (0x200E) <= 0); 65 ASSERT (wcwidth (0x2060) <= 0); 66 #if 0 /* wchar_t may be only 16 bits. */ 67 ASSERT (wcwidth (0xE0001) <= 0); 68 ASSERT (wcwidth (0xE0044) <= 0); 69 #endif 70 71 /* Test width of some zero width characters. */ 72 /* While it is desirable that U+200B, U+200C, U+200D have width 0, 73 because this makes wcswidth work better on strings that contain these 74 characters, it is acceptable if an implementation treats these 75 characters like control characters. */ 76 ASSERT (wcwidth (0x200B) <= 0); 77 ASSERT (wcwidth (0xFEFF) <= 0); 78 79 /* Test width of some math symbols. 80 U+2202 is marked as having ambiguous width (A) in EastAsianWidth.txt 81 (see <https://www.unicode.org/Public/12.0.0/ucd/EastAsianWidth.txt>). 82 The Unicode Standard Annex 11 83 <https://www.unicode.org/reports/tr11/tr11-36.html> 84 says 85 "Ambiguous characters behave like wide or narrow characters 86 depending on the context (language tag, script identification, 87 associated font, source of data, or explicit markup; all can 88 provide the context). If the context cannot be established 89 reliably, they should be treated as narrow characters by default." 90 For wcwidth(), the only available context information is the locale. 91 "fr_FR.UTF-8" is a Western locale, not an East Asian locale, therefore 92 U+2202 should be treated like a narrow character. */ 93 ASSERT (wcwidth (0x2202) == 1); 94 95 /* Test width of some CJK characters. */ 96 ASSERT (wcwidth (0x3000) == 2); 97 ASSERT (wcwidth (0xB250) == 2); 98 ASSERT (wcwidth (0xFF1A) == 2); 99 #if 0 /* wchar_t may be only 16 bits. */ 100 ASSERT (wcwidth (0x20369) == 2); 101 ASSERT (wcwidth (0x2F876) == 2); 102 #endif 103 } 104 105 return 0; 106 }