1 /* Test whether a 32-bit wide character belongs to a specific character class. 2 Copyright (C) 2020-2021 Free Software Foundation, Inc. 3 4 This file is free software. 5 It is dual-licensed under "the GNU LGPLv3+ or the GNU GPLv2+". 6 You can redistribute it and/or modify it under either 7 - the terms of the GNU Lesser General Public License as published 8 by the Free Software Foundation; either version 3, or (at your 9 option) any later version, or 10 - the terms of the GNU General Public License as published by the 11 Free Software Foundation; either version 2, or (at your option) 12 any later version, or 13 - the same dual license "the GNU LGPLv3+ or the GNU GPLv2+". 14 15 This file is distributed in the hope that it will be useful, 16 but WITHOUT ANY WARRANTY; without even the implied warranty of 17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 Lesser General Public License and the GNU General Public License 19 for more details. 20 21 You should have received a copy of the GNU Lesser General Public 22 License and of the GNU General Public License along with this 23 program. If not, see <https://www.gnu.org/licenses/>. */ 24 25 /* Written by Bruno Haible <bruno@clisp.org>, 2020. */ 26 27 #include <wchar.h> 28 #include <wctype.h> 29 30 #ifdef __CYGWIN__ 31 # include <cygwin/version.h> 32 #endif 33 34 #if GNULIB_defined_mbstate_t 35 # include "localcharset.h" 36 # include "streq.h" 37 #endif 38 39 #include "unictype.h" 40 #include "verify.h" 41 42 int 43 FUNC (wint_t wc) /* */ 44 { 45 /* The char32_t encoding of a multibyte character is defined by the way 46 mbrtoc32() is defined. */ 47 48 #if GNULIB_defined_mbstate_t /* AIX, IRIX */ 49 /* mbrtoc32() is defined on top of mbtowc() for the non-UTF-8 locales 50 and directly for the UTF-8 locales. */ 51 if (wc != WEOF) 52 { 53 const char *encoding = locale_charset (); 54 if (STREQ_OPT (encoding, "UTF-8", 'U', 'T', 'F', '-', '8', 0, 0, 0, 0)) 55 return UCS_FUNC (wc); 56 else 57 return WCHAR_FUNC (wc); 58 } 59 else 60 return 0; 61 62 #elif HAVE_WORKING_MBRTOC32 /* glibc */ 63 /* mbrtoc32() is essentially defined by the system libc. */ 64 65 # if defined __GLIBC__ 66 /* The char32_t encoding of a multibyte character is known to be the same as 67 the wchar_t encoding. */ 68 return WCHAR_FUNC (wc); 69 # else 70 /* The char32_t encoding of a multibyte character is known to be UCS-4, 71 different from the the wchar_t encoding. */ 72 if (wc != WEOF) 73 return UCS_FUNC (wc); 74 else 75 return 0; 76 # endif 77 78 #elif _GL_LARGE_CHAR32_T /* Cygwin, mingw, MSVC */ 79 /* The wchar_t encoding is UTF-16. 80 The char32_t encoding is UCS-4. */ 81 82 # if defined __CYGWIN__ && CYGWIN_VERSION_DLL_MAJOR >= 1007 83 /* As an extension to POSIX, the iswalnum() function of Cygwin >= 1.7 84 supports also wc arguments outside the Unicode BMP, that is, outside 85 the 'wchar_t' range. See 86 <https://lists.gnu.org/archive/html/bug-gnulib/2011-02/msg00019.html> 87 = <https://cygwin.com/ml/cygwin/2011-02/msg00044.html>. */ 88 return WCHAR_FUNC (wc); 89 # else 90 if (wc == WEOF || wc == (wchar_t) wc) 91 /* wc is in the range for the isw* functions. */ 92 return WCHAR_FUNC (wc); 93 else 94 return UCS_FUNC (wc); 95 # endif 96 97 #else /* macOS, FreeBSD, NetBSD, OpenBSD, HP-UX, Solaris, Minix, Android */ 98 /* char32_t and wchar_t are equivalent. */ 99 verify (sizeof (char32_t) == sizeof (wchar_t)); 100 101 return WCHAR_FUNC (wc); 102 #endif 103 }