root/maint/gnulib/lib/uniwbrk/wbrktable.c

/* [previous][next][first][last][top][bottom][index][help] */
   1 /* Word break auxiliary table.  -*- coding: utf-8 -*-
   2    Copyright (C) 2009-2021 Free Software Foundation, Inc.
   3    Written by Bruno Haible <bruno@clisp.org>, 2009.
   4 
   5    This file is free software.
   6    It is dual-licensed under "the GNU LGPLv3+ or the GNU GPLv2+".
   7    You can redistribute it and/or modify it under either
   8      - the terms of the GNU Lesser General Public License as published
   9        by the Free Software Foundation; either version 3, or (at your
  10        option) any later version, or
  11      - the terms of the GNU General Public License as published by the
  12        Free Software Foundation; either version 2, or (at your option)
  13        any later version, or
  14      - the same dual license "the GNU LGPLv3+ or the GNU GPLv2+".
  15 
  16    This file is distributed in the hope that it will be useful,
  17    but WITHOUT ANY WARRANTY; without even the implied warranty of
  18    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19    Lesser General Public License and the GNU General Public License
  20    for more details.
  21 
  22    You should have received a copy of the GNU Lesser General Public
  23    License and of the GNU General Public License along with this
  24    program.  If not, see <https://www.gnu.org/licenses/>.  */
  25 
  26 #include <config.h>
  27 
  28 /* Specification.  */
  29 #include "wbrktable.h"
  30 
  31 const int uniwbrk_prop_index[22] =
  32 {
  33    0, /* WBP_OTHER */
  34    1, /* WBP_KATAKANA */
  35    2, /* WBP_ALETTER */
  36    3, /* WBP_MIDNUMLET */
  37    4, /* WBP_MIDLETTER */
  38    5, /* WBP_MIDNUM */
  39    6, /* WBP_NUMERIC */
  40    7, /* WBP_EXTENDNUMLET */
  41   -1, /* WBP_EXTEND */
  42   -1, /* WBP_FORMAT */
  43   -1, /* WBP_NEWLINE */
  44   -1, /* WBP_CR */
  45   -1, /* WBP_LF */
  46   -1, /* WBP_RI */
  47    8, /* WBP_DQ */
  48    9, /* WBP_SQ */
  49   10, /* WBP_HL */
  50   -1, /* WBP_ZWJ */
  51   11, /* WBP_EB */
  52   12, /* WBP_EM */
  53   -1, /* WBP_GAZ */
  54   13  /* WBP_EBG */
  55 };
  56 
  57 /* This table contains the following rules (see UAX #29):
  58 
  59                            last         current
  60 
  61                      (ALetter | HL) × (ALetter | HL)                  (WB5)
  62                      (ALetter | HL) × Numeric                         (WB9)
  63                                  HL × SQ                              (WB7a)
  64                             Numeric × (ALetter | HL)                  (WB10)
  65                             Numeric × Numeric                         (WB8)
  66                            Katakana × Katakana                        (WB13)
  67 (ALetter | HL | Numeric | Katakana) × ExtendNumLet                    (WB13a)
  68                        ExtendNumLet × ExtendNumLet                    (WB13a)
  69                    ExtendNumLet × (ALetter | HL | Numeric | Katakana) (WB13b)
  70                      (E_Base | EBG) × E_Modifier                      (WB14)
  71 
  72    Note that the following rules are not handled here but in the loop in u-wordbreaks.h:
  73    - The rules need to look back or look ahead the second character (WB6, WB7, WB7b, WB7c, WB11, WB12)
  74    - The rules with a higher precedence over the "ignore" rule (WB4), such as WB3c
  75  */
  76 
  77 const unsigned char uniwbrk_table[14][14] =
  78 {        /* current:        OTHER       MIDNUMLET   NUMERIC     SQ          EM      */
  79          /*                     KATAKANA    MIDLETTER   EXNUMLET    HL          EBG */
  80          /*                         ALETTER     MIDNUM      DQ          EB          */
  81   /* last */
  82   /* WBP_OTHER */        {  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1 },
  83   /* WBP_KATAKANA */     {  1,  0,  1,  1,  1,  1,  1,  0,  1,  1,  1,  1,  1,  1 },
  84   /* WBP_ALETTER */      {  1,  1,  0,  1,  1,  1,  0,  0,  1,  1,  0,  1,  1,  1 },
  85   /* WBP_MIDNUMLET */    {  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1 },
  86   /* WBP_MIDLETTER */    {  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1 },
  87   /* WBP_MIDNUM */       {  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1 },
  88   /* WBP_NUMERIC */      {  1,  1,  0,  1,  1,  1,  0,  0,  1,  1,  0,  1,  1,  1 },
  89   /* WBP_EXTENDNUMLET */ {  1,  0,  0,  1,  1,  1,  0,  0,  1,  1,  0,  1,  1,  1 },
  90   /* WBP_DQ */           {  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1 },
  91   /* WBP_SQ */           {  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1 },
  92   /* WBP_HL */           {  1,  1,  0,  1,  1,  1,  0,  0,  1,  0,  0,  1,  1,  1 },
  93   /* WBP_EB */           {  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  0,  1 },
  94   /* WBP_EM */           {  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1 },
  95   /* WBP_EBG */          {  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  0,  1 }
  96 };

/* [previous][next][first][last][top][bottom][index][help] */