root/maint/gnulib/lib/regex-quote.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. regex_quote_spec_posix
  2. regex_quote_spec_gnu
  3. regex_quote_spec_pcre
  4. regex_quote_length
  5. regex_quote_copy
  6. regex_quote

   1 /* Construct a regular expression from a literal string.
   2    Copyright (C) 1995, 2010-2021 Free Software Foundation, Inc.
   3    Written by Bruno Haible <haible@clisp.cons.org>, 2010.
   4 
   5    This program is free software: you can redistribute it and/or modify
   6    it under the terms of the GNU General Public License as published by
   7    the Free Software Foundation; either version 3 of the License, or
   8    (at your option) any later version.
   9 
  10    This program is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13    GNU General Public License for more details.
  14 
  15    You should have received a copy of the GNU General Public License
  16    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
  17 
  18 #include <config.h>
  19 
  20 /* Specification.  */
  21 #include "regex-quote.h"
  22 
  23 #include <string.h>
  24 
  25 #include "mbuiter.h"
  26 #include "xalloc.h"
  27 
  28 /* Characters that are special in a BRE.  */
  29 static const char bre_special[] = "$^.*[]\\";
  30 
  31 /* Characters that are special in an ERE.  */
  32 static const char ere_special[] = "$^.*[]\\+?{}()|";
  33 
  34 struct regex_quote_spec
  35 regex_quote_spec_posix (int cflags, bool anchored)
     /* [previous][next][first][last][top][bottom][index][help] */
  36 {
  37   struct regex_quote_spec result;
  38 
  39   strcpy (result.special, cflags != 0 ? ere_special : bre_special);
  40   result.multibyte = true;
  41   result.anchored = anchored;
  42 
  43   return result;
  44 }
  45 
  46 /* Syntax bit values, defined in GNU <regex.h>.  We don't include it here,
  47    otherwise this module would need to depend on gnulib module 'regex'.  */
  48 #define RE_BK_PLUS_QM    0x00000002
  49 #define RE_INTERVALS     0x00000200
  50 #define RE_LIMITED_OPS   0x00000400
  51 #define RE_NEWLINE_ALT   0x00000800
  52 #define RE_NO_BK_BRACES  0x00001000
  53 #define RE_NO_BK_PARENS  0x00002000
  54 #define RE_NO_BK_VBAR    0x00008000
  55 
  56 struct regex_quote_spec
  57 regex_quote_spec_gnu (unsigned long /*reg_syntax_t*/ syntax, bool anchored)
     /* [previous][next][first][last][top][bottom][index][help] */
  58 {
  59   struct regex_quote_spec result;
  60   char *p;
  61 
  62   p = result.special;
  63   memcpy (p, bre_special, sizeof (bre_special) - 1);
  64   p += sizeof (bre_special) - 1;
  65   if ((syntax & RE_LIMITED_OPS) == 0 && (syntax & RE_BK_PLUS_QM) == 0)
  66     {
  67       *p++ = '+';
  68       *p++ = '?';
  69     }
  70   if ((syntax & RE_INTERVALS) != 0 && (syntax & RE_NO_BK_BRACES) != 0)
  71     {
  72       *p++ = '{';
  73       *p++ = '}';
  74     }
  75   if ((syntax & RE_NO_BK_PARENS) != 0)
  76     {
  77       *p++ = '(';
  78       *p++ = ')';
  79     }
  80   if ((syntax & RE_LIMITED_OPS) == 0 && (syntax & RE_NO_BK_VBAR) != 0)
  81     *p++ = '|';
  82   if ((syntax & RE_NEWLINE_ALT) != 0)
  83     *p++ = '\n';
  84   *p = '\0';
  85 
  86   result.multibyte = true;
  87   result.anchored = anchored;
  88 
  89   return result;
  90 }
  91 
  92 /* Characters that are special in a PCRE.  */
  93 static const char pcre_special[] = "$^.*[]\\+?{}()|";
  94 
  95 /* Options bit values, defined in <pcre.h>.  We don't include it here, because
  96    it is not a standard header.  */
  97 #define PCRE_ANCHORED 0x00000010
  98 #define PCRE_EXTENDED 0x00000008
  99 
 100 struct regex_quote_spec
 101 regex_quote_spec_pcre (int options, bool anchored)
     /* [previous][next][first][last][top][bottom][index][help] */
 102 {
 103   struct regex_quote_spec result;
 104   char *p;
 105 
 106   p = result.special;
 107   memcpy (p, pcre_special, sizeof (pcre_special) - 1);
 108   p += sizeof (pcre_special) - 1;
 109   if (options & PCRE_EXTENDED)
 110     {
 111       *p++ = ' ';
 112       *p++ = '\t';
 113       *p++ = '\n';
 114       *p++ = '\v';
 115       *p++ = '\f';
 116       *p++ = '\r';
 117       *p++ = '#';
 118     }
 119   *p = '\0';
 120 
 121   /* PCRE regular expressions consist of UTF-8 characters of options contains
 122      PCRE_UTF8 and of single bytes otherwise.  */
 123   result.multibyte = false;
 124   /* If options contains PCRE_ANCHORED, the anchoring is implicit.  */
 125   result.anchored = (options & PCRE_ANCHORED ? 0 : anchored);
 126 
 127   return result;
 128 }
 129 
 130 size_t
 131 regex_quote_length (const char *string, const struct regex_quote_spec *spec)
     /* [previous][next][first][last][top][bottom][index][help] */
 132 {
 133   const char *special = spec->special;
 134   size_t length;
 135 
 136   length = 0;
 137   if (spec->anchored)
 138     length += 2; /* for '^' at the beginning and '$' at the end */
 139   if (spec->multibyte)
 140     {
 141       mbui_iterator_t iter;
 142 
 143       for (mbui_init (iter, string); mbui_avail (iter); mbui_advance (iter))
 144         {
 145           /* We know that special contains only ASCII characters.  */
 146           if (mb_len (mbui_cur (iter)) == 1
 147               && strchr (special, * mbui_cur_ptr (iter)))
 148             length += 1;
 149           length += mb_len (mbui_cur (iter));
 150         }
 151     }
 152   else
 153     {
 154       const char *iter;
 155 
 156       for (iter = string; *iter != '\0'; iter++)
 157         {
 158           if (strchr (special, *iter))
 159             length += 1;
 160           length += 1;
 161         }
 162     }
 163 
 164   return length;
 165 }
 166 
 167 char *
 168 regex_quote_copy (char *p, const char *string, const struct regex_quote_spec *spec)
     /* [previous][next][first][last][top][bottom][index][help] */
 169 {
 170   const char *special = spec->special;
 171 
 172   if (spec->anchored)
 173     *p++ = '^';
 174   if (spec->multibyte)
 175     {
 176       mbui_iterator_t iter;
 177 
 178       for (mbui_init (iter, string); mbui_avail (iter); mbui_advance (iter))
 179         {
 180           /* We know that special contains only ASCII characters.  */
 181           if (mb_len (mbui_cur (iter)) == 1
 182               && strchr (special, * mbui_cur_ptr (iter)))
 183             *p++ = '\\';
 184           memcpy (p, mbui_cur_ptr (iter), mb_len (mbui_cur (iter)));
 185           p += mb_len (mbui_cur (iter));
 186         }
 187     }
 188   else
 189     {
 190       const char *iter;
 191 
 192       for (iter = string; *iter != '\0'; iter++)
 193         {
 194           if (strchr (special, *iter))
 195             *p++ = '\\';
 196           *p++ = *iter++;
 197         }
 198     }
 199   if (spec->anchored)
 200     *p++ = '$';
 201 
 202   return p;
 203 }
 204 
 205 char *
 206 regex_quote (const char *string, const struct regex_quote_spec *spec)
     /* [previous][next][first][last][top][bottom][index][help] */
 207 {
 208   size_t length = regex_quote_length (string, spec);
 209   char *result = XNMALLOC (length + 1, char);
 210   char *p;
 211 
 212   p = result;
 213   p = regex_quote_copy (p, string, spec);
 214   *p = '\0';
 215   return result;
 216 }

/* [previous][next][first][last][top][bottom][index][help] */