1 /* Word break auxiliary table. -*- coding: utf-8 -*- 2 Copyright (C) 2009-2021 Free Software Foundation, Inc. 3 Written by Bruno Haible <bruno@clisp.org>, 2009. 4 5 This file is free software. 6 It is dual-licensed under "the GNU LGPLv3+ or the GNU GPLv2+". 7 You can redistribute it and/or modify it under either 8 - the terms of the GNU Lesser General Public License as published 9 by the Free Software Foundation; either version 3, or (at your 10 option) any later version, or 11 - the terms of the GNU General Public License as published by the 12 Free Software Foundation; either version 2, or (at your option) 13 any later version, or 14 - the same dual license "the GNU LGPLv3+ or the GNU GPLv2+". 15 16 This file is distributed in the hope that it will be useful, 17 but WITHOUT ANY WARRANTY; without even the implied warranty of 18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19 Lesser General Public License and the GNU General Public License 20 for more details. 21 22 You should have received a copy of the GNU Lesser General Public 23 License and of the GNU General Public License along with this 24 program. If not, see <https://www.gnu.org/licenses/>. */ 25 26 #include <config.h> 27 28 /* Specification. */ 29 #include "wbrktable.h" 30 31 const int uniwbrk_prop_index[22] = 32 { 33 0, /* WBP_OTHER */ 34 1, /* WBP_KATAKANA */ 35 2, /* WBP_ALETTER */ 36 3, /* WBP_MIDNUMLET */ 37 4, /* WBP_MIDLETTER */ 38 5, /* WBP_MIDNUM */ 39 6, /* WBP_NUMERIC */ 40 7, /* WBP_EXTENDNUMLET */ 41 -1, /* WBP_EXTEND */ 42 -1, /* WBP_FORMAT */ 43 -1, /* WBP_NEWLINE */ 44 -1, /* WBP_CR */ 45 -1, /* WBP_LF */ 46 -1, /* WBP_RI */ 47 8, /* WBP_DQ */ 48 9, /* WBP_SQ */ 49 10, /* WBP_HL */ 50 -1, /* WBP_ZWJ */ 51 11, /* WBP_EB */ 52 12, /* WBP_EM */ 53 -1, /* WBP_GAZ */ 54 13 /* WBP_EBG */ 55 }; 56 57 /* This table contains the following rules (see UAX #29): 58 59 last current 60 61 (ALetter | HL) × (ALetter | HL) (WB5) 62 (ALetter | HL) × Numeric (WB9) 63 HL × SQ (WB7a) 64 Numeric × (ALetter | HL) (WB10) 65 Numeric × Numeric (WB8) 66 Katakana × Katakana (WB13) 67 (ALetter | HL | Numeric | Katakana) × ExtendNumLet (WB13a) 68 ExtendNumLet × ExtendNumLet (WB13a) 69 ExtendNumLet × (ALetter | HL | Numeric | Katakana) (WB13b) 70 (E_Base | EBG) × E_Modifier (WB14) 71 72 Note that the following rules are not handled here but in the loop in u-wordbreaks.h: 73 - The rules need to look back or look ahead the second character (WB6, WB7, WB7b, WB7c, WB11, WB12) 74 - The rules with a higher precedence over the "ignore" rule (WB4), such as WB3c 75 */ 76 77 const unsigned char uniwbrk_table[14][14] = 78 { /* current: OTHER MIDNUMLET NUMERIC SQ EM */ 79 /* KATAKANA MIDLETTER EXNUMLET HL EBG */ 80 /* ALETTER MIDNUM DQ EB */ 81 /* last */ 82 /* WBP_OTHER */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, 83 /* WBP_KATAKANA */ { 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1 }, 84 /* WBP_ALETTER */ { 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1 }, 85 /* WBP_MIDNUMLET */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, 86 /* WBP_MIDLETTER */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, 87 /* WBP_MIDNUM */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, 88 /* WBP_NUMERIC */ { 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1 }, 89 /* WBP_EXTENDNUMLET */ { 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1 }, 90 /* WBP_DQ */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, 91 /* WBP_SQ */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, 92 /* WBP_HL */ { 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1 }, 93 /* WBP_EB */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1 }, 94 /* WBP_EM */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, 95 /* WBP_EBG */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1 } 96 };