root/maint/gnulib/lib/unicase.in.h

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. uc_toupper

   1 /* Unicode character case mappings.
   2    Copyright (C) 2002, 2009-2021 Free Software Foundation, Inc.
   3 
   4    This file is free software.
   5    It is dual-licensed under "the GNU LGPLv3+ or the GNU GPLv2+".
   6    You can redistribute it and/or modify it under either
   7      - the terms of the GNU Lesser General Public License as published
   8        by the Free Software Foundation; either version 3, or (at your
   9        option) any later version, or
  10      - the terms of the GNU General Public License as published by the
  11        Free Software Foundation; either version 2, or (at your option)
  12        any later version, or
  13      - the same dual license "the GNU LGPLv3+ or the GNU GPLv2+".
  14 
  15    This file is distributed in the hope that it will be useful,
  16    but WITHOUT ANY WARRANTY; without even the implied warranty of
  17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18    Lesser General Public License and the GNU General Public License
  19    for more details.
  20 
  21    You should have received a copy of the GNU Lesser General Public
  22    License and of the GNU General Public License along with this
  23    program.  If not, see <https://www.gnu.org/licenses/>.  */
  24 
  25 #ifndef _UNICASE_H
  26 #define _UNICASE_H
  27 
  28 #include "unitypes.h"
  29 
  30 /* Get bool.  */
  31 #include <stdbool.h>
  32 
  33 /* Get size_t.  */
  34 #include <stddef.h>
  35 
  36 /* Get uninorm_t.  */
  37 #include "uninorm.h"
  38 
  39 #ifdef __cplusplus
  40 extern "C" {
  41 #endif
  42 
  43 /* ========================================================================= */
  44 
  45 /* Character case mappings.
  46    These mappings are locale and context independent.
  47    WARNING! These functions are not sufficient for languages such as German.
  48    Better use the functions below that treat an entire string at once and are
  49    language aware.  */
  50 
  51 /* Return the uppercase mapping of a Unicode character.  */
  52 extern ucs4_t
  53        uc_toupper (ucs4_t uc)
     /* [previous][next][first][last][top][bottom][index][help] */
  54        _UC_ATTRIBUTE_CONST;
  55 
  56 /* Return the lowercase mapping of a Unicode character.  */
  57 extern ucs4_t
  58        uc_tolower (ucs4_t uc)
  59        _UC_ATTRIBUTE_CONST;
  60 
  61 /* Return the titlecase mapping of a Unicode character.  */
  62 extern ucs4_t
  63        uc_totitle (ucs4_t uc)
  64        _UC_ATTRIBUTE_CONST;
  65 
  66 /* ========================================================================= */
  67 
  68 /* String case mappings.  */
  69 
  70 /* These functions are locale dependent.  The iso639_language argument
  71    identifies the language (e.g. "tr" for Turkish).  NULL means to use
  72    locale independent case mappings.  */
  73 
  74 /* Return the ISO 639 language code of the current locale.
  75    Return "" if it is unknown, or in the "C" locale.  */
  76 extern const char *
  77        uc_locale_language (void)
  78        _UC_ATTRIBUTE_PURE;
  79 
  80 /* Conventions:
  81 
  82    All functions prefixed with u8_ operate on UTF-8 encoded strings.
  83    Their unit is an uint8_t (1 byte).
  84 
  85    All functions prefixed with u16_ operate on UTF-16 encoded strings.
  86    Their unit is an uint16_t (a 2-byte word).
  87 
  88    All functions prefixed with u32_ operate on UCS-4 encoded strings.
  89    Their unit is an uint32_t (a 4-byte word).
  90 
  91    All argument pairs (s, n) denote a Unicode string s[0..n-1] with exactly
  92    n units.
  93 
  94    Functions returning a string result take a (resultbuf, lengthp) argument
  95    pair.  If resultbuf is not NULL and the result fits into *lengthp units,
  96    it is put in resultbuf, and resultbuf is returned.  Otherwise, a freshly
  97    allocated string is returned.  In both cases, *lengthp is set to the
  98    length (number of units) of the returned string.  In case of error,
  99    NULL is returned and errno is set.  */
 100 
 101 /* Return the uppercase mapping of a string.
 102    The nf argument identifies the normalization form to apply after the
 103    case-mapping.  It can also be NULL, for no normalization.  */
 104 extern uint8_t *
 105        u8_toupper (const uint8_t *s, size_t n, const char *iso639_language,
 106                    uninorm_t nf,
 107                    uint8_t *_UC_RESTRICT resultbuf, size_t *lengthp);
 108 extern uint16_t *
 109        u16_toupper (const uint16_t *s, size_t n, const char *iso639_language,
 110                     uninorm_t nf,
 111                     uint16_t *_UC_RESTRICT resultbuf, size_t *lengthp);
 112 extern uint32_t *
 113        u32_toupper (const uint32_t *s, size_t n, const char *iso639_language,
 114                     uninorm_t nf,
 115                     uint32_t *_UC_RESTRICT resultbuf, size_t *lengthp);
 116 
 117 /* Return the lowercase mapping of a string.
 118    The nf argument identifies the normalization form to apply after the
 119    case-mapping.  It can also be NULL, for no normalization.  */
 120 extern uint8_t *
 121        u8_tolower (const uint8_t *s, size_t n, const char *iso639_language,
 122                    uninorm_t nf,
 123                    uint8_t *_UC_RESTRICT resultbuf, size_t *lengthp);
 124 extern uint16_t *
 125        u16_tolower (const uint16_t *s, size_t n, const char *iso639_language,
 126                     uninorm_t nf,
 127                     uint16_t *_UC_RESTRICT resultbuf, size_t *lengthp);
 128 extern uint32_t *
 129        u32_tolower (const uint32_t *s, size_t n, const char *iso639_language,
 130                     uninorm_t nf,
 131                     uint32_t *_UC_RESTRICT resultbuf, size_t *lengthp);
 132 
 133 /* Return the titlecase mapping of a string.
 134    The nf argument identifies the normalization form to apply after the
 135    case-mapping.  It can also be NULL, for no normalization.  */
 136 extern uint8_t *
 137        u8_totitle (const uint8_t *s, size_t n, const char *iso639_language,
 138                    uninorm_t nf,
 139                    uint8_t *_UC_RESTRICT resultbuf, size_t *lengthp);
 140 extern uint16_t *
 141        u16_totitle (const uint16_t *s, size_t n, const char *iso639_language,
 142                     uninorm_t nf,
 143                     uint16_t *_UC_RESTRICT resultbuf, size_t *lengthp);
 144 extern uint32_t *
 145        u32_totitle (const uint32_t *s, size_t n, const char *iso639_language,
 146                     uninorm_t nf,
 147                     uint32_t *_UC_RESTRICT resultbuf, size_t *lengthp);
 148 
 149 /* The case-mapping context given by a prefix string.  */
 150 typedef struct casing_prefix_context
 151         {
 152           /* These fields are private, undocumented.  */
 153           uint32_t last_char_except_ignorable;
 154           uint32_t last_char_normal_or_above;
 155         }
 156         casing_prefix_context_t;
 157 /* The case-mapping context of the empty prefix string.  */
 158 extern const casing_prefix_context_t unicase_empty_prefix_context;
 159 /* Return the case-mapping context of a given prefix string.  */
 160 extern casing_prefix_context_t
 161        u8_casing_prefix_context (const uint8_t *s, size_t n);
 162 extern casing_prefix_context_t
 163        u16_casing_prefix_context (const uint16_t *s, size_t n);
 164 extern casing_prefix_context_t
 165        u32_casing_prefix_context (const uint32_t *s, size_t n);
 166 /* Return the case-mapping context of the prefix concat(A, S), given the
 167    case-mapping context of the prefix A.  */
 168 extern casing_prefix_context_t
 169        u8_casing_prefixes_context (const uint8_t *s, size_t n,
 170                                    casing_prefix_context_t a_context);
 171 extern casing_prefix_context_t
 172        u16_casing_prefixes_context (const uint16_t *s, size_t n,
 173                                     casing_prefix_context_t a_context);
 174 extern casing_prefix_context_t
 175        u32_casing_prefixes_context (const uint32_t *s, size_t n,
 176                                     casing_prefix_context_t a_context);
 177 
 178 /* The case-mapping context given by a suffix string.  */
 179 typedef struct casing_suffix_context
 180         {
 181           /* These fields are private, undocumented.  */
 182           uint32_t first_char_except_ignorable;
 183           uint32_t bits;
 184         }
 185         casing_suffix_context_t;
 186 /* The case-mapping context of the empty suffix string.  */
 187 extern const casing_suffix_context_t unicase_empty_suffix_context;
 188 /* Return the case-mapping context of a given suffix string.  */
 189 extern casing_suffix_context_t
 190        u8_casing_suffix_context (const uint8_t *s, size_t n);
 191 extern casing_suffix_context_t
 192        u16_casing_suffix_context (const uint16_t *s, size_t n);
 193 extern casing_suffix_context_t
 194        u32_casing_suffix_context (const uint32_t *s, size_t n);
 195 /* Return the case-mapping context of the suffix concat(S, A), given the
 196    case-mapping context of the suffix A.  */
 197 extern casing_suffix_context_t
 198        u8_casing_suffixes_context (const uint8_t *s, size_t n,
 199                                    casing_suffix_context_t a_context);
 200 extern casing_suffix_context_t
 201        u16_casing_suffixes_context (const uint16_t *s, size_t n,
 202                                     casing_suffix_context_t a_context);
 203 extern casing_suffix_context_t
 204        u32_casing_suffixes_context (const uint32_t *s, size_t n,
 205                                     casing_suffix_context_t a_context);
 206 
 207 /* Return the uppercase mapping of a string that is surrounded by a prefix
 208    and a suffix.  */
 209 extern uint8_t *
 210        u8_ct_toupper (const uint8_t *s, size_t n,
 211                       casing_prefix_context_t prefix_context,
 212                       casing_suffix_context_t suffix_context,
 213                       const char *iso639_language,
 214                       uninorm_t nf,
 215                       uint8_t *_UC_RESTRICT resultbuf, size_t *lengthp);
 216 extern uint16_t *
 217        u16_ct_toupper (const uint16_t *s, size_t n,
 218                       casing_prefix_context_t prefix_context,
 219                       casing_suffix_context_t suffix_context,
 220                       const char *iso639_language,
 221                       uninorm_t nf,
 222                       uint16_t *_UC_RESTRICT resultbuf, size_t *lengthp);
 223 extern uint32_t *
 224        u32_ct_toupper (const uint32_t *s, size_t n,
 225                       casing_prefix_context_t prefix_context,
 226                       casing_suffix_context_t suffix_context,
 227                       const char *iso639_language,
 228                       uninorm_t nf,
 229                       uint32_t *_UC_RESTRICT resultbuf, size_t *lengthp);
 230 
 231 /* Return the lowercase mapping of a string that is surrounded by a prefix
 232    and a suffix.  */
 233 extern uint8_t *
 234        u8_ct_tolower (const uint8_t *s, size_t n,
 235                       casing_prefix_context_t prefix_context,
 236                       casing_suffix_context_t suffix_context,
 237                       const char *iso639_language,
 238                       uninorm_t nf,
 239                       uint8_t *_UC_RESTRICT resultbuf, size_t *lengthp);
 240 extern uint16_t *
 241        u16_ct_tolower (const uint16_t *s, size_t n,
 242                       casing_prefix_context_t prefix_context,
 243                       casing_suffix_context_t suffix_context,
 244                       const char *iso639_language,
 245                       uninorm_t nf,
 246                       uint16_t *_UC_RESTRICT resultbuf, size_t *lengthp);
 247 extern uint32_t *
 248        u32_ct_tolower (const uint32_t *s, size_t n,
 249                       casing_prefix_context_t prefix_context,
 250                       casing_suffix_context_t suffix_context,
 251                       const char *iso639_language,
 252                       uninorm_t nf,
 253                       uint32_t *_UC_RESTRICT resultbuf, size_t *lengthp);
 254 
 255 /* Return the titlecase mapping of a string that is surrounded by a prefix
 256    and a suffix.  */
 257 extern uint8_t *
 258        u8_ct_totitle (const uint8_t *s, size_t n,
 259                       casing_prefix_context_t prefix_context,
 260                       casing_suffix_context_t suffix_context,
 261                       const char *iso639_language,
 262                       uninorm_t nf,
 263                       uint8_t *_UC_RESTRICT resultbuf, size_t *lengthp);
 264 extern uint16_t *
 265        u16_ct_totitle (const uint16_t *s, size_t n,
 266                       casing_prefix_context_t prefix_context,
 267                       casing_suffix_context_t suffix_context,
 268                       const char *iso639_language,
 269                       uninorm_t nf,
 270                       uint16_t *_UC_RESTRICT resultbuf, size_t *lengthp);
 271 extern uint32_t *
 272        u32_ct_totitle (const uint32_t *s, size_t n,
 273                       casing_prefix_context_t prefix_context,
 274                       casing_suffix_context_t suffix_context,
 275                       const char *iso639_language,
 276                       uninorm_t nf,
 277                       uint32_t *_UC_RESTRICT resultbuf, size_t *lengthp);
 278 
 279 /* Return the case folded string.
 280    Comparing uN_casefold (S1) and uN_casefold (S2) with uN_cmp2() is equivalent
 281    to comparing S1 and S2 with uN_casecmp().
 282    The nf argument identifies the normalization form to apply after the
 283    case-mapping.  It can also be NULL, for no normalization.  */
 284 extern uint8_t *
 285        u8_casefold (const uint8_t *s, size_t n, const char *iso639_language,
 286                     uninorm_t nf,
 287                     uint8_t *_UC_RESTRICT resultbuf, size_t *lengthp);
 288 extern uint16_t *
 289        u16_casefold (const uint16_t *s, size_t n, const char *iso639_language,
 290                      uninorm_t nf,
 291                      uint16_t *_UC_RESTRICT resultbuf, size_t *lengthp);
 292 extern uint32_t *
 293        u32_casefold (const uint32_t *s, size_t n, const char *iso639_language,
 294                      uninorm_t nf,
 295                      uint32_t *_UC_RESTRICT resultbuf, size_t *lengthp);
 296 /* Likewise, for a string that is surrounded by a prefix and a suffix.  */
 297 extern uint8_t *
 298        u8_ct_casefold (const uint8_t *s, size_t n,
 299                        casing_prefix_context_t prefix_context,
 300                        casing_suffix_context_t suffix_context,
 301                        const char *iso639_language,
 302                        uninorm_t nf,
 303                        uint8_t *_UC_RESTRICT resultbuf, size_t *lengthp);
 304 extern uint16_t *
 305        u16_ct_casefold (const uint16_t *s, size_t n,
 306                         casing_prefix_context_t prefix_context,
 307                         casing_suffix_context_t suffix_context,
 308                         const char *iso639_language,
 309                         uninorm_t nf,
 310                         uint16_t *_UC_RESTRICT resultbuf, size_t *lengthp);
 311 extern uint32_t *
 312        u32_ct_casefold (const uint32_t *s, size_t n,
 313                         casing_prefix_context_t prefix_context,
 314                         casing_suffix_context_t suffix_context,
 315                         const char *iso639_language,
 316                         uninorm_t nf,
 317                         uint32_t *_UC_RESTRICT resultbuf, size_t *lengthp);
 318 
 319 /* Compare S1 and S2, ignoring differences in case and normalization.
 320    The nf argument identifies the normalization form to apply after the
 321    case-mapping.  It can also be NULL, for no normalization.
 322    If successful, set *RESULTP to -1 if S1 < S2, 0 if S1 = S2, 1 if S1 > S2, and
 323    return 0.  Upon failure, return -1 with errno set.  */
 324 extern int
 325        u8_casecmp (const uint8_t *s1, size_t n1,
 326                    const uint8_t *s2, size_t n2,
 327                    const char *iso639_language, uninorm_t nf, int *resultp);
 328 extern int
 329        u16_casecmp (const uint16_t *s1, size_t n1,
 330                     const uint16_t *s2, size_t n2,
 331                     const char *iso639_language, uninorm_t nf, int *resultp);
 332 extern int
 333        u32_casecmp (const uint32_t *s1, size_t n1,
 334                     const uint32_t *s2, size_t n2,
 335                     const char *iso639_language, uninorm_t nf, int *resultp);
 336 extern int
 337        ulc_casecmp (const char *s1, size_t n1,
 338                     const char *s2, size_t n2,
 339                     const char *iso639_language, uninorm_t nf, int *resultp);
 340 
 341 /* Convert the string S of length N to a NUL-terminated byte sequence, in such
 342    a way that comparing uN_casexfrm (S1) and uN_casexfrm (S2) with the gnulib
 343    function memcmp2() is equivalent to comparing S1 and S2 with uN_casecoll().
 344    NF must be either UNINORM_NFC, UNINORM_NFKC, or NULL for no normalization.  */
 345 extern char *
 346        u8_casexfrm (const uint8_t *s, size_t n, const char *iso639_language,
 347                     uninorm_t nf,
 348                     char *_UC_RESTRICT resultbuf, size_t *lengthp);
 349 extern char *
 350        u16_casexfrm (const uint16_t *s, size_t n, const char *iso639_language,
 351                      uninorm_t nf,
 352                      char *_UC_RESTRICT resultbuf, size_t *lengthp);
 353 extern char *
 354        u32_casexfrm (const uint32_t *s, size_t n, const char *iso639_language,
 355                      uninorm_t nf,
 356                      char *_UC_RESTRICT resultbuf, size_t *lengthp);
 357 extern char *
 358        ulc_casexfrm (const char *s, size_t n, const char *iso639_language,
 359                      uninorm_t nf,
 360                      char *_UC_RESTRICT resultbuf, size_t *lengthp);
 361 
 362 /* Compare S1 and S2, ignoring differences in case and normalization, using the
 363    collation rules of the current locale.
 364    The nf argument identifies the normalization form to apply after the
 365    case-mapping.  It must be either UNINORM_NFC or UNINORM_NFKC.  It can also
 366    be NULL, for no normalization.
 367    If successful, set *RESULTP to -1 if S1 < S2, 0 if S1 = S2, 1 if S1 > S2, and
 368    return 0.  Upon failure, return -1 with errno set.  */
 369 extern int
 370        u8_casecoll (const uint8_t *s1, size_t n1,
 371                     const uint8_t *s2, size_t n2,
 372                     const char *iso639_language, uninorm_t nf, int *resultp);
 373 extern int
 374        u16_casecoll (const uint16_t *s1, size_t n1,
 375                      const uint16_t *s2, size_t n2,
 376                      const char *iso639_language, uninorm_t nf, int *resultp);
 377 extern int
 378        u32_casecoll (const uint32_t *s1, size_t n1,
 379                      const uint32_t *s2, size_t n2,
 380                      const char *iso639_language, uninorm_t nf, int *resultp);
 381 extern int
 382        ulc_casecoll (const char *s1, size_t n1,
 383                      const char *s2, size_t n2,
 384                      const char *iso639_language, uninorm_t nf, int *resultp);
 385 
 386 
 387 /* Set *RESULTP to true if mapping NFD(S) to upper case is a no-op, or to false
 388    otherwise, and return 0.  Upon failure, return -1 with errno set.  */
 389 extern int
 390        u8_is_uppercase (const uint8_t *s, size_t n,
 391                         const char *iso639_language,
 392                         bool *resultp);
 393 extern int
 394        u16_is_uppercase (const uint16_t *s, size_t n,
 395                          const char *iso639_language,
 396                          bool *resultp);
 397 extern int
 398        u32_is_uppercase (const uint32_t *s, size_t n,
 399                          const char *iso639_language,
 400                          bool *resultp);
 401 
 402 /* Set *RESULTP to true if mapping NFD(S) to lower case is a no-op, or to false
 403    otherwise, and return 0.  Upon failure, return -1 with errno set.  */
 404 extern int
 405        u8_is_lowercase (const uint8_t *s, size_t n,
 406                         const char *iso639_language,
 407                         bool *resultp);
 408 extern int
 409        u16_is_lowercase (const uint16_t *s, size_t n,
 410                          const char *iso639_language,
 411                          bool *resultp);
 412 extern int
 413        u32_is_lowercase (const uint32_t *s, size_t n,
 414                          const char *iso639_language,
 415                          bool *resultp);
 416 
 417 /* Set *RESULTP to true if mapping NFD(S) to title case is a no-op, or to false
 418    otherwise, and return 0.  Upon failure, return -1 with errno set.  */
 419 extern int
 420        u8_is_titlecase (const uint8_t *s, size_t n,
 421                         const char *iso639_language,
 422                         bool *resultp);
 423 extern int
 424        u16_is_titlecase (const uint16_t *s, size_t n,
 425                          const char *iso639_language,
 426                          bool *resultp);
 427 extern int
 428        u32_is_titlecase (const uint32_t *s, size_t n,
 429                          const char *iso639_language,
 430                          bool *resultp);
 431 
 432 /* Set *RESULTP to true if applying case folding to NFD(S) is a no-op, or to
 433    false otherwise, and return 0.  Upon failure, return -1 with errno set.  */
 434 extern int
 435        u8_is_casefolded (const uint8_t *s, size_t n,
 436                          const char *iso639_language,
 437                          bool *resultp);
 438 extern int
 439        u16_is_casefolded (const uint16_t *s, size_t n,
 440                           const char *iso639_language,
 441                           bool *resultp);
 442 extern int
 443        u32_is_casefolded (const uint32_t *s, size_t n,
 444                           const char *iso639_language,
 445                           bool *resultp);
 446 
 447 /* Set *RESULTP to true if case matters for S, that is, if mapping NFD(S) to
 448    either upper case or lower case or title case is not a no-op.
 449    Set *RESULTP to false if NFD(S) maps to itself under the upper case mapping,
 450    under the lower case mapping, and under the title case mapping; in other
 451    words, when NFD(S) consists entirely of caseless characters.
 452    Upon failure, return -1 with errno set.  */
 453 extern int
 454        u8_is_cased (const uint8_t *s, size_t n,
 455                     const char *iso639_language,
 456                     bool *resultp);
 457 extern int
 458        u16_is_cased (const uint16_t *s, size_t n,
 459                      const char *iso639_language,
 460                      bool *resultp);
 461 extern int
 462        u32_is_cased (const uint32_t *s, size_t n,
 463                      const char *iso639_language,
 464                      bool *resultp);
 465 
 466 
 467 /* ========================================================================= */
 468 
 469 #ifdef __cplusplus
 470 }
 471 #endif
 472 
 473 #endif /* _UNICASE_H */

/* [previous][next][first][last][top][bottom][index][help] */