root/maint/gnulib/lib/exclude.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. exclude_add_pattern_buffer
  2. fnmatch_pattern_has_wildcards
  3. unescape_pattern
  4. new_exclude
  5. string_hasher
  6. string_hasher_ci
  7. string_compare
  8. string_compare_ci
  9. string_free
  10. new_exclude_segment
  11. free_exclude_segment
  12. free_exclude
  13. fnmatch_no_wildcards
  14. exclude_fnmatch
  15. exclude_patopts
  16. file_pattern_matches
  17. file_name_matches
  18. excluded_file_name
  19. add_exclude
  20. add_exclude_fp
  21. call_addfn
  22. add_exclude_file

   1 /* exclude.c -- exclude file names
   2 
   3    Copyright (C) 1992-1994, 1997, 1999-2007, 2009-2021 Free Software
   4    Foundation, Inc.
   5 
   6    This program is free software: you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 3 of the License, or
   9    (at your option) any later version.
  10 
  11    This program is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15 
  16    You should have received a copy of the GNU General Public License
  17    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
  18 
  19 /* Written by Paul Eggert <eggert@twinsun.com>
  20    and Sergey Poznyakoff <gray@gnu.org>.
  21    Thanks to Phil Proudman <phil@proudman51.freeserve.co.uk>
  22    for improvement suggestions. */
  23 
  24 #include <config.h>
  25 
  26 #include <stdbool.h>
  27 
  28 #include <ctype.h>
  29 #include <errno.h>
  30 #include <stddef.h>
  31 #include <stdio.h>
  32 #include <stdlib.h>
  33 #include <string.h>
  34 #include <wctype.h>
  35 #include <regex.h>
  36 
  37 #include "exclude.h"
  38 #include "hash.h"
  39 #include "mbuiter.h"
  40 #include "fnmatch.h"
  41 #include "xalloc.h"
  42 #include "verify.h"
  43 #include "filename.h"
  44 
  45 #if GNULIB_EXCLUDE_SINGLE_THREAD
  46 # include "unlocked-io.h"
  47 #endif
  48 
  49 /* Non-GNU systems lack these options, so we don't need to check them.  */
  50 #ifndef FNM_CASEFOLD
  51 # define FNM_CASEFOLD 0
  52 #endif
  53 #ifndef FNM_EXTMATCH
  54 # define FNM_EXTMATCH 0
  55 #endif
  56 #ifndef FNM_LEADING_DIR
  57 # define FNM_LEADING_DIR 0
  58 #endif
  59 
  60 verify (((EXCLUDE_ANCHORED | EXCLUDE_INCLUDE | EXCLUDE_WILDCARDS)
  61          & (FNM_PATHNAME | FNM_NOESCAPE | FNM_PERIOD | FNM_LEADING_DIR
  62             | FNM_CASEFOLD | FNM_EXTMATCH))
  63         == 0);
  64 
  65 
  66 /* Exclusion patterns are grouped into a singly-linked list of
  67    "exclusion segments".  Each segment represents a set of patterns
  68    that can be matches using the same algorithm.  Non-wildcard
  69    patterns are kept in hash tables, to speed up searches.  Wildcard
  70    patterns are stored as arrays of patterns. */
  71 
  72 
  73 /* An exclude pattern-options pair.  The options are fnmatch options
  74    ORed with EXCLUDE_* options.  */
  75 
  76 struct patopts
  77   {
  78     int options;
  79     union
  80     {
  81       char const *pattern;
  82       regex_t re;
  83     } v;
  84   };
  85 
  86 /* An array of pattern-options pairs.  */
  87 
  88 struct exclude_pattern
  89   {
  90     struct patopts *exclude;
  91     idx_t exclude_alloc;
  92     idx_t exclude_count;
  93   };
  94 
  95 enum exclude_type
  96   {
  97     exclude_hash,                    /* a hash table of excluded names */
  98     exclude_pattern                  /* an array of exclude patterns */
  99   };
 100 
 101 struct exclude_segment
 102   {
 103     struct exclude_segment *next;    /* next segment in list */
 104     enum exclude_type type;          /* type of this segment */
 105     int options;                     /* common options for this segment */
 106     union
 107     {
 108       Hash_table *table;             /* for type == exclude_hash */
 109       struct exclude_pattern pat;    /* for type == exclude_pattern */
 110     } v;
 111   };
 112 
 113 struct pattern_buffer
 114   {
 115     struct pattern_buffer *next;
 116     char *base;
 117   };
 118 
 119 /* The exclude structure keeps a singly-linked list of exclude segments,
 120    maintained in reverse order.  */
 121 struct exclude
 122   {
 123     struct exclude_segment *head;
 124     struct pattern_buffer *patbuf;
 125   };
 126 
 127 /* Register BUF in the pattern buffer list of EX.  ADD_FUNC (see
 128    add_exclude_file and add_exclude_fp below) can use this function
 129    if it modifies the pattern, to ensure the allocated memory will be
 130    properly reclaimed upon calling free_exclude. */
 131 void
 132 exclude_add_pattern_buffer (struct exclude *ex, char *buf)
     /* [previous][next][first][last][top][bottom][index][help] */
 133 {
 134   struct pattern_buffer *pbuf = xmalloc (sizeof *pbuf);
 135   pbuf->base = buf;
 136   pbuf->next = ex->patbuf;
 137   ex->patbuf = pbuf;
 138 }
 139 
 140 /* Return true if STR has or may have wildcards, when matched with OPTIONS.
 141    Return false if STR definitely does not have wildcards.  */
 142 bool
 143 fnmatch_pattern_has_wildcards (const char *str, int options)
     /* [previous][next][first][last][top][bottom][index][help] */
 144 {
 145   while (1)
 146     {
 147       switch (*str++)
 148         {
 149         case '.':
 150         case '{':
 151         case '}':
 152         case '(':
 153         case ')':
 154           if (options & EXCLUDE_REGEX)
 155             return true;
 156           break;
 157 
 158         case '\\':
 159           if (options & EXCLUDE_REGEX)
 160             continue;
 161           else
 162             str += ! (options & FNM_NOESCAPE) && *str;
 163           break;
 164 
 165         case '+': case '@': case '!':
 166           if (options & FNM_EXTMATCH && *str == '(')
 167             return true;
 168           break;
 169 
 170         case '?': case '*': case '[':
 171           return true;
 172 
 173         case '\0':
 174           return false;
 175         }
 176     }
 177 }
 178 
 179 static void
 180 unescape_pattern (char *str)
     /* [previous][next][first][last][top][bottom][index][help] */
 181 {
 182   char const *q = str;
 183   do
 184     q += *q == '\\' && q[1];
 185   while ((*str++ = *q++));
 186 }
 187 
 188 /* Return a newly allocated and empty exclude list.  */
 189 
 190 struct exclude *
 191 new_exclude (void)
     /* [previous][next][first][last][top][bottom][index][help] */
 192 {
 193   return xzalloc (sizeof *new_exclude ());
 194 }
 195 
 196 /* Calculate the hash of string.  */
 197 static size_t
 198 string_hasher (void const *data, size_t n_buckets)
     /* [previous][next][first][last][top][bottom][index][help] */
 199 {
 200   char const *p = data;
 201   return hash_string (p, n_buckets);
 202 }
 203 
 204 /* Ditto, for case-insensitive hashes */
 205 static size_t
 206 string_hasher_ci (void const *data, size_t n_buckets)
     /* [previous][next][first][last][top][bottom][index][help] */
 207 {
 208   char const *p = data;
 209   mbui_iterator_t iter;
 210   size_t value = 0;
 211 
 212   for (mbui_init (iter, p); mbui_avail (iter); mbui_advance (iter))
 213     {
 214       mbchar_t m = mbui_cur (iter);
 215       wchar_t wc;
 216 
 217       if (m.wc_valid)
 218         wc = towlower (m.wc);
 219       else
 220         wc = *m.ptr;
 221 
 222       value = value * 31 + wc;
 223     }
 224 
 225   return value % n_buckets;
 226 }
 227 
 228 /* compare two strings for equality */
 229 static bool
 230 string_compare (void const *data1, void const *data2)
     /* [previous][next][first][last][top][bottom][index][help] */
 231 {
 232   char const *p1 = data1;
 233   char const *p2 = data2;
 234   return strcmp (p1, p2) == 0;
 235 }
 236 
 237 /* compare two strings for equality, case-insensitive */
 238 static bool
 239 string_compare_ci (void const *data1, void const *data2)
     /* [previous][next][first][last][top][bottom][index][help] */
 240 {
 241   char const *p1 = data1;
 242   char const *p2 = data2;
 243   return mbscasecmp (p1, p2) == 0;
 244 }
 245 
 246 static void
 247 string_free (void *data)
     /* [previous][next][first][last][top][bottom][index][help] */
 248 {
 249   free (data);
 250 }
 251 
 252 /* Create new exclude segment of given TYPE and OPTIONS, and attach it
 253    to the head of EX.  */
 254 static void
 255 new_exclude_segment (struct exclude *ex, enum exclude_type type, int options)
     /* [previous][next][first][last][top][bottom][index][help] */
 256 {
 257   struct exclude_segment *sp = xzalloc (sizeof (struct exclude_segment));
 258   sp->type = type;
 259   sp->options = options;
 260   switch (type)
 261     {
 262     case exclude_pattern:
 263       break;
 264 
 265     case exclude_hash:
 266       sp->v.table = hash_initialize (0, NULL,
 267                                      (options & FNM_CASEFOLD) ?
 268                                        string_hasher_ci
 269                                        : string_hasher,
 270                                      (options & FNM_CASEFOLD) ?
 271                                        string_compare_ci
 272                                        : string_compare,
 273                                      string_free);
 274       break;
 275     }
 276   sp->next = ex->head;
 277   ex->head = sp;
 278 }
 279 
 280 /* Free a single exclude segment */
 281 static void
 282 free_exclude_segment (struct exclude_segment *seg)
     /* [previous][next][first][last][top][bottom][index][help] */
 283 {
 284   switch (seg->type)
 285     {
 286     case exclude_pattern:
 287       for (idx_t i = 0; i < seg->v.pat.exclude_count; i++)
 288         {
 289           if (seg->v.pat.exclude[i].options & EXCLUDE_REGEX)
 290             regfree (&seg->v.pat.exclude[i].v.re);
 291         }
 292       free (seg->v.pat.exclude);
 293       break;
 294 
 295     case exclude_hash:
 296       hash_free (seg->v.table);
 297       break;
 298     }
 299   free (seg);
 300 }
 301 
 302 /* Free the storage associated with an exclude list.  */
 303 void
 304 free_exclude (struct exclude *ex)
     /* [previous][next][first][last][top][bottom][index][help] */
 305 {
 306   struct exclude_segment *seg;
 307   struct pattern_buffer *pbuf;
 308 
 309   for (seg = ex->head; seg; )
 310     {
 311       struct exclude_segment *next = seg->next;
 312       free_exclude_segment (seg);
 313       seg = next;
 314     }
 315 
 316   for (pbuf = ex->patbuf; pbuf; )
 317     {
 318       struct pattern_buffer *next = pbuf->next;
 319       free (pbuf->base);
 320       free (pbuf);
 321       pbuf = next;
 322     }
 323 
 324   free (ex);
 325 }
 326 
 327 /* Return zero if PATTERN matches F, obeying OPTIONS, except that
 328    (unlike fnmatch) wildcards are disabled in PATTERN.  */
 329 
 330 static int
 331 fnmatch_no_wildcards (char const *pattern, char const *f, int options)
     /* [previous][next][first][last][top][bottom][index][help] */
 332 {
 333   if (! (options & FNM_LEADING_DIR))
 334     return ((options & FNM_CASEFOLD)
 335             ? mbscasecmp (pattern, f)
 336             : strcmp (pattern, f));
 337   else if (! (options & FNM_CASEFOLD))
 338     {
 339       size_t patlen = strlen (pattern);
 340       int r = strncmp (pattern, f, patlen);
 341       if (! r)
 342         {
 343           r = f[patlen];
 344           if (r == '/')
 345             r = 0;
 346         }
 347       return r;
 348     }
 349   else
 350     {
 351       /* Walk through a copy of F, seeing whether P matches any prefix
 352          of F.
 353 
 354          FIXME: This is an O(N**2) algorithm; it should be O(N).
 355          Also, the copy should not be necessary.  However, fixing this
 356          will probably involve a change to the mbs* API.  */
 357 
 358       char *fcopy = xstrdup (f);
 359       char *p;
 360       int r;
 361       for (p = fcopy; ; *p++ = '/')
 362         {
 363           p = strchr (p, '/');
 364           if (p)
 365             *p = '\0';
 366           r = mbscasecmp (pattern, fcopy);
 367           if (!p || r <= 0)
 368             break;
 369         }
 370       free (fcopy);
 371       return r;
 372     }
 373 }
 374 
 375 bool
 376 exclude_fnmatch (char const *pattern, char const *f, int options)
     /* [previous][next][first][last][top][bottom][index][help] */
 377 {
 378   int (*matcher) (char const *, char const *, int) =
 379     (options & EXCLUDE_WILDCARDS
 380      ? fnmatch
 381      : fnmatch_no_wildcards);
 382   bool matched = ((*matcher) (pattern, f, options) == 0);
 383   char const *p;
 384 
 385   if (! (options & EXCLUDE_ANCHORED))
 386     for (p = f; *p && ! matched; p++)
 387       if (*p == '/' && p[1] != '/')
 388         matched = ((*matcher) (pattern, p + 1, options) == 0);
 389 
 390   return matched;
 391 }
 392 
 393 static bool
 394 exclude_patopts (struct patopts const *opts, char const *f)
     /* [previous][next][first][last][top][bottom][index][help] */
 395 {
 396   int options = opts->options;
 397 
 398   return (options & EXCLUDE_REGEX)
 399           ? regexec (&opts->v.re, f, 0, NULL, 0) == 0
 400           : exclude_fnmatch (opts->v.pattern, f, options);
 401 }
 402 
 403 /* Return true if the exclude_pattern segment SEG matches F.  */
 404 
 405 static bool
 406 file_pattern_matches (struct exclude_segment const *seg, char const *f)
     /* [previous][next][first][last][top][bottom][index][help] */
 407 {
 408   idx_t exclude_count = seg->v.pat.exclude_count;
 409   struct patopts const *exclude = seg->v.pat.exclude;
 410 
 411   for (idx_t i = 0; i < exclude_count; i++)
 412     {
 413       if (exclude_patopts (exclude + i, f))
 414         return true;
 415     }
 416   return false;
 417 }
 418 
 419 /* Return true if the exclude_hash segment SEG matches F.
 420    BUFFER is an auxiliary storage of the same length as F (with nul
 421    terminator included) */
 422 static bool
 423 file_name_matches (struct exclude_segment const *seg, char const *f,
     /* [previous][next][first][last][top][bottom][index][help] */
 424                    char *buffer)
 425 {
 426   int options = seg->options;
 427   Hash_table *table = seg->v.table;
 428 
 429   do
 430     {
 431       /* initialize the pattern */
 432       strcpy (buffer, f);
 433 
 434       while (1)
 435         {
 436           if (hash_lookup (table, buffer))
 437             return true;
 438           if (options & FNM_LEADING_DIR)
 439             {
 440               char *p = strrchr (buffer, '/');
 441               if (p)
 442                 {
 443                   *p = 0;
 444                   continue;
 445                 }
 446             }
 447           break;
 448         }
 449 
 450       if (!(options & EXCLUDE_ANCHORED))
 451         {
 452           f = strchr (f, '/');
 453           if (f)
 454             f++;
 455         }
 456       else
 457         break;
 458     }
 459   while (f);
 460 
 461   return false;
 462 }
 463 
 464 /* Return true if EX excludes F.  */
 465 
 466 bool
 467 excluded_file_name (struct exclude const *ex, char const *f)
     /* [previous][next][first][last][top][bottom][index][help] */
 468 {
 469   struct exclude_segment *seg;
 470   bool invert = false;
 471   char *filename = NULL;
 472 
 473   /* If no patterns are given, the default is to include.  */
 474   if (!ex->head)
 475     return false;
 476 
 477   /* Scan through the segments, reporting the status of the first match.
 478      The segments are in reverse order, so this reports the status of
 479      the last match in the original option list.  */
 480   for (seg = ex->head; ; seg = seg->next)
 481     {
 482       if (seg->type == exclude_hash)
 483         {
 484           if (!filename)
 485             filename = xmalloc (strlen (f) + 1);
 486           if (file_name_matches (seg, f, filename))
 487             break;
 488         }
 489       else
 490         {
 491           if (file_pattern_matches (seg, f))
 492             break;
 493         }
 494 
 495       if (! seg->next)
 496         {
 497           /* If patterns are given but none match, the default is the
 498              opposite of the last segment (i.e., the first in the
 499              original option list).  For example, in the command
 500              'grep -r --exclude="a*" --include="*b" pat dir', the
 501              first option is --exclude so any file name matching
 502              neither a* nor *b is included.  */
 503           invert = true;
 504           break;
 505         }
 506     }
 507 
 508   free (filename);
 509   return invert ^ ! (seg->options & EXCLUDE_INCLUDE);
 510 }
 511 
 512 /* Append to EX the exclusion PATTERN with OPTIONS.  */
 513 
 514 void
 515 add_exclude (struct exclude *ex, char const *pattern, int options)
     /* [previous][next][first][last][top][bottom][index][help] */
 516 {
 517   struct exclude_segment *seg;
 518   struct exclude_pattern *pat;
 519   struct patopts *patopts;
 520 
 521   if ((options & (EXCLUDE_REGEX|EXCLUDE_WILDCARDS))
 522       && fnmatch_pattern_has_wildcards (pattern, options))
 523     {
 524       if (! (ex->head && ex->head->type == exclude_pattern
 525              && ((ex->head->options & EXCLUDE_INCLUDE)
 526                  == (options & EXCLUDE_INCLUDE))))
 527         new_exclude_segment (ex, exclude_pattern, options);
 528 
 529       seg = ex->head;
 530 
 531       pat = &seg->v.pat;
 532       if (pat->exclude_count == pat->exclude_alloc)
 533         pat->exclude = xpalloc (pat->exclude, &pat->exclude_alloc, 1, -1,
 534                                 sizeof *pat->exclude);
 535       patopts = &pat->exclude[pat->exclude_count++];
 536 
 537       patopts->options = options;
 538       if (options & EXCLUDE_REGEX)
 539         {
 540           int rc;
 541           int cflags = REG_NOSUB|REG_EXTENDED|
 542                        ((options & FNM_CASEFOLD) ? REG_ICASE : 0);
 543 
 544           if (options & FNM_LEADING_DIR)
 545             {
 546               char *tmp;
 547               idx_t len = strlen (pattern);
 548 
 549               while (len > 0 && ISSLASH (pattern[len-1]))
 550                 --len;
 551 
 552               if (len == 0)
 553                 rc = 1;
 554               else
 555                 {
 556                   tmp = ximalloc (len + 7);
 557                   memcpy (tmp, pattern, len);
 558                   strcpy (tmp + len, "(/.*)?");
 559                   rc = regcomp (&patopts->v.re, tmp, cflags);
 560                   free (tmp);
 561                 }
 562             }
 563           else
 564             rc = regcomp (&patopts->v.re, pattern, cflags);
 565 
 566           if (rc)
 567             {
 568               pat->exclude_count--;
 569               return;
 570             }
 571         }
 572       else
 573         {
 574           if (options & EXCLUDE_ALLOC)
 575             {
 576               pattern = xstrdup (pattern);
 577               exclude_add_pattern_buffer (ex, (char*) pattern);
 578             }
 579           patopts->v.pattern = pattern;
 580         }
 581     }
 582   else
 583     {
 584       char *str, *p;
 585       int exclude_hash_flags = (EXCLUDE_INCLUDE | EXCLUDE_ANCHORED
 586                                 | FNM_LEADING_DIR | FNM_CASEFOLD);
 587       if (! (ex->head && ex->head->type == exclude_hash
 588              && ((ex->head->options & exclude_hash_flags)
 589                  == (options & exclude_hash_flags))))
 590         new_exclude_segment (ex, exclude_hash, options);
 591       seg = ex->head;
 592 
 593       str = xstrdup (pattern);
 594       if ((options & (EXCLUDE_WILDCARDS | FNM_NOESCAPE)) == EXCLUDE_WILDCARDS)
 595         unescape_pattern (str);
 596       p = hash_insert (seg->v.table, str);
 597       if (p != str)
 598         free (str);
 599     }
 600 }
 601 
 602 /* Use ADD_FUNC to append to EX the patterns in FILE_NAME, each with
 603    OPTIONS.  LINE_END terminates each pattern in the file.  If
 604    LINE_END is a space character, ignore trailing spaces and empty
 605    lines in FP.  Return -1 (setting errno) on failure, 0 on success.  */
 606 
 607 int
 608 add_exclude_fp (void (*add_func) (struct exclude *, char const *, int, void *),
     /* [previous][next][first][last][top][bottom][index][help] */
 609                 struct exclude *ex, FILE *fp, int options,
 610                 char line_end,
 611                 void *data)
 612 {
 613   char *buf = NULL;
 614   char *p;
 615   char *pattern;
 616   char const *lim;
 617   idx_t buf_alloc = 0;
 618   idx_t buf_count = 0;
 619   int c;
 620   int e = 0;
 621 
 622   while ((c = getc (fp)) != EOF)
 623     {
 624       if (buf_count == buf_alloc)
 625         buf = xpalloc (buf, &buf_alloc, 1, -1, 1);
 626       buf[buf_count++] = c;
 627     }
 628 
 629   if (ferror (fp))
 630     e = errno;
 631 
 632   buf = xirealloc (buf, buf_count + 1);
 633   buf[buf_count] = line_end;
 634   lim = buf + buf_count + ! (buf_count == 0 || buf[buf_count - 1] == line_end);
 635 
 636   exclude_add_pattern_buffer (ex, buf);
 637 
 638   pattern = buf;
 639 
 640   for (p = buf; p < lim; p++)
 641     if (*p == line_end)
 642       {
 643         char *pattern_end = p;
 644 
 645         if (isspace ((unsigned char) line_end))
 646           {
 647             for (; ; pattern_end--)
 648               if (pattern_end == pattern)
 649                 goto next_pattern;
 650               else if (! isspace ((unsigned char) pattern_end[-1]))
 651                 break;
 652           }
 653 
 654         *pattern_end = '\0';
 655         (*add_func) (ex, pattern, options, data);
 656 
 657       next_pattern:
 658         pattern = p + 1;
 659       }
 660 
 661   errno = e;
 662   return e ? -1 : 0;
 663 }
 664 
 665 static void
 666 call_addfn (struct exclude *ex, char const *pattern, int options, void *data)
     /* [previous][next][first][last][top][bottom][index][help] */
 667 {
 668   void (**addfnptr) (struct exclude *, char const *, int) = data;
 669   (*addfnptr) (ex, pattern, options);
 670 }
 671 
 672 int
 673 add_exclude_file (void (*add_func) (struct exclude *, char const *, int),
     /* [previous][next][first][last][top][bottom][index][help] */
 674                   struct exclude *ex, char const *file_name, int options,
 675                   char line_end)
 676 {
 677   if (strcmp (file_name, "-") == 0)
 678     return add_exclude_fp (call_addfn, ex, stdin, options, line_end, &add_func);
 679 
 680   FILE *in = fopen (file_name, "re");
 681   if (!in)
 682     return -1;
 683   int rc = add_exclude_fp (call_addfn, ex, in, options, line_end, &add_func);
 684   int e = errno;
 685   if (fclose (in) != 0)
 686     return -1;
 687   errno = e;
 688   return rc;
 689 }

/* [previous][next][first][last][top][bottom][index][help] */