1 /* Look at first character in UTF-8 string. 2 Copyright (C) 1999-2000, 2002, 2006-2007, 2009-2021 Free Software 3 Foundation, Inc. 4 Written by Bruno Haible <bruno@clisp.org>, 2002. 5 6 This file is free software. 7 It is dual-licensed under "the GNU LGPLv3+ or the GNU GPLv2+". 8 You can redistribute it and/or modify it under either 9 - the terms of the GNU Lesser General Public License as published 10 by the Free Software Foundation; either version 3, or (at your 11 option) any later version, or 12 - the terms of the GNU General Public License as published by the 13 Free Software Foundation; either version 2, or (at your option) 14 any later version, or 15 - the same dual license "the GNU LGPLv3+ or the GNU GPLv2+". 16 17 This file is distributed in the hope that it will be useful, 18 but WITHOUT ANY WARRANTY; without even the implied warranty of 19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 Lesser General Public License and the GNU General Public License 21 for more details. 22 23 You should have received a copy of the GNU Lesser General Public 24 License and of the GNU General Public License along with this 25 program. If not, see <https://www.gnu.org/licenses/>. */ 26 27 #include <config.h> 28 29 /* Specification. */ 30 #include "unistr.h" 31 32 int 33 u8_strmblen (const uint8_t *s) /* */ 34 { 35 /* Keep in sync with unistr.h and u8-mbtouc-aux.c. */ 36 uint8_t c = *s; 37 38 if (c < 0x80) 39 return (c != 0 ? 1 : 0); 40 if (c >= 0xc2) 41 { 42 if (c < 0xe0) 43 { 44 if ((s[1] ^ 0x80) < 0x40) 45 return 2; 46 } 47 else if (c < 0xf0) 48 { 49 if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 50 && (c >= 0xe1 || s[1] >= 0xa0) 51 && (c != 0xed || s[1] < 0xa0)) 52 return 3; 53 } 54 else if (c < 0xf8) 55 { 56 if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 57 && (s[3] ^ 0x80) < 0x40 58 && (c >= 0xf1 || s[1] >= 0x90) 59 && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90))) 60 return 4; 61 } 62 } 63 /* invalid or incomplete multibyte character */ 64 return -1; 65 }