1 /* Test whether a 32-bit wide character belongs to a specific character class.
2 Copyright (C) 2020-2021 Free Software Foundation, Inc.
3
4 This file is free software.
5 It is dual-licensed under "the GNU LGPLv3+ or the GNU GPLv2+".
6 You can redistribute it and/or modify it under either
7 - the terms of the GNU Lesser General Public License as published
8 by the Free Software Foundation; either version 3, or (at your
9 option) any later version, or
10 - the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option)
12 any later version, or
13 - the same dual license "the GNU LGPLv3+ or the GNU GPLv2+".
14
15 This file is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 Lesser General Public License and the GNU General Public License
19 for more details.
20
21 You should have received a copy of the GNU Lesser General Public
22 License and of the GNU General Public License along with this
23 program. If not, see <https://www.gnu.org/licenses/>. */
24
25 /* Written by Bruno Haible <bruno@clisp.org>, 2020. */
26
27 #include <wchar.h>
28 #include <wctype.h>
29
30 #ifdef __CYGWIN__
31 # include <cygwin/version.h>
32 #endif
33
34 #if GNULIB_defined_mbstate_t
35 # include "localcharset.h"
36 # include "streq.h"
37 #endif
38
39 #include "unictype.h"
40 #include "verify.h"
41
42 int
43 FUNC (wint_t wc)
/* ![[previous]](../icons/n_left.png)
![[next]](../icons/n_right.png)
![[first]](../icons/n_first.png)
![[last]](../icons/n_last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
44 {
45 /* The char32_t encoding of a multibyte character is defined by the way
46 mbrtoc32() is defined. */
47
48 #if GNULIB_defined_mbstate_t /* AIX, IRIX */
49 /* mbrtoc32() is defined on top of mbtowc() for the non-UTF-8 locales
50 and directly for the UTF-8 locales. */
51 if (wc != WEOF)
52 {
53 const char *encoding = locale_charset ();
54 if (STREQ_OPT (encoding, "UTF-8", 'U', 'T', 'F', '-', '8', 0, 0, 0, 0))
55 return UCS_FUNC (wc);
56 else
57 return WCHAR_FUNC (wc);
58 }
59 else
60 return 0;
61
62 #elif HAVE_WORKING_MBRTOC32 /* glibc */
63 /* mbrtoc32() is essentially defined by the system libc. */
64
65 # if defined __GLIBC__
66 /* The char32_t encoding of a multibyte character is known to be the same as
67 the wchar_t encoding. */
68 return WCHAR_FUNC (wc);
69 # else
70 /* The char32_t encoding of a multibyte character is known to be UCS-4,
71 different from the the wchar_t encoding. */
72 if (wc != WEOF)
73 return UCS_FUNC (wc);
74 else
75 return 0;
76 # endif
77
78 #elif _GL_LARGE_CHAR32_T /* Cygwin, mingw, MSVC */
79 /* The wchar_t encoding is UTF-16.
80 The char32_t encoding is UCS-4. */
81
82 # if defined __CYGWIN__ && CYGWIN_VERSION_DLL_MAJOR >= 1007
83 /* As an extension to POSIX, the iswalnum() function of Cygwin >= 1.7
84 supports also wc arguments outside the Unicode BMP, that is, outside
85 the 'wchar_t' range. See
86 <https://lists.gnu.org/archive/html/bug-gnulib/2011-02/msg00019.html>
87 = <https://cygwin.com/ml/cygwin/2011-02/msg00044.html>. */
88 return WCHAR_FUNC (wc);
89 # else
90 if (wc == WEOF || wc == (wchar_t) wc)
91 /* wc is in the range for the isw* functions. */
92 return WCHAR_FUNC (wc);
93 else
94 return UCS_FUNC (wc);
95 # endif
96
97 #else /* macOS, FreeBSD, NetBSD, OpenBSD, HP-UX, Solaris, Minix, Android */
98 /* char32_t and wchar_t are equivalent. */
99 verify (sizeof (char32_t) == sizeof (wchar_t));
100
101 return WCHAR_FUNC (wc);
102 #endif
103 }