root/maint/gnulib/lib/unistr/u-strstr.h

/* [previous][next][first][last][top][bottom][index][help] */

INCLUDED FROM


DEFINITIONS

This source file includes following definitions.
  1. FUNC

   1 /* Substring test for UTF-8/UTF-16/UTF-32 strings.  -*- coding: utf-8 -*-
   2    Copyright (C) 1999, 2002, 2006, 2010-2021 Free Software Foundation, Inc.
   3    Written by Bruno Haible <bruno@clisp.org>, 2002, 2005.
   4 
   5    This file is free software.
   6    It is dual-licensed under "the GNU LGPLv3+ or the GNU GPLv2+".
   7    You can redistribute it and/or modify it under either
   8      - the terms of the GNU Lesser General Public License as published
   9        by the Free Software Foundation; either version 3, or (at your
  10        option) any later version, or
  11      - the terms of the GNU General Public License as published by the
  12        Free Software Foundation; either version 2, or (at your option)
  13        any later version, or
  14      - the same dual license "the GNU LGPLv3+ or the GNU GPLv2+".
  15 
  16    This file is distributed in the hope that it will be useful,
  17    but WITHOUT ANY WARRANTY; without even the implied warranty of
  18    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19    Lesser General Public License and the GNU General Public License
  20    for more details.
  21 
  22    You should have received a copy of the GNU Lesser General Public
  23    License and of the GNU General Public License along with this
  24    program.  If not, see <https://www.gnu.org/licenses/>.  */
  25 
  26 UNIT *
  27 FUNC (const UNIT *haystack, const UNIT *needle)
     /* [previous][next][first][last][top][bottom][index][help] */
  28 {
  29   UNIT first = needle[0];
  30 
  31   /* Is needle empty?  */
  32   if (first == 0)
  33     return (UNIT *) haystack;
  34 
  35   /* Is needle nearly empty (only one unit)?  */
  36   if (needle[1] == 0)
  37     return U_STRCHR (haystack, first);
  38 
  39 #ifdef U_STRMBTOUC
  40   /* Is needle nearly empty (only one character)?  */
  41   {
  42     ucs4_t first_uc;
  43     int count = U_STRMBTOUC (&first_uc, needle);
  44     if (count > 0 && needle[count] == 0)
  45       return U_STRCHR (haystack, first_uc);
  46   }
  47 #endif
  48 
  49 #if UNIT_IS_UINT8_T
  50   return (uint8_t *) strstr ((const char *) haystack, (const char *) needle);
  51 #else
  52   {
  53     /* Minimizing the worst-case complexity:
  54        Let n = U_STRLEN(haystack), m = U_STRLEN(needle).
  55        The naïve algorithm is O(n*m) worst-case.
  56        The Knuth-Morris-Pratt algorithm is O(n) worst-case but it needs a
  57        memory allocation.
  58        To achieve linear complexity and yet amortize the cost of the
  59        memory allocation, we activate the Knuth-Morris-Pratt algorithm
  60        only once the naïve algorithm has already run for some time; more
  61        precisely, when
  62          - the outer loop count is >= 10,
  63          - the average number of comparisons per outer loop is >= 5,
  64          - the total number of comparisons is >= m.
  65        But we try it only once.  If the memory allocation attempt failed,
  66        we don't retry it.  */
  67     bool try_kmp = true;
  68     size_t outer_loop_count = 0;
  69     size_t comparison_count = 0;
  70     size_t last_ccount = 0;                  /* last comparison count */
  71     const UNIT *needle_last_ccount = needle; /* = needle + last_ccount */
  72 
  73     /* Speed up the following searches of needle by caching its first
  74        character.  */
  75     UNIT b = *needle++;
  76 
  77     for (;; haystack++)
  78       {
  79         if (*haystack == 0)
  80           /* No match.  */
  81           return NULL;
  82 
  83         /* See whether it's advisable to use an asymptotically faster
  84            algorithm.  */
  85         if (try_kmp
  86             && outer_loop_count >= 10
  87             && comparison_count >= 5 * outer_loop_count)
  88           {
  89             /* See if needle + comparison_count now reaches the end of
  90                needle.  */
  91             if (needle_last_ccount != NULL)
  92               {
  93                 needle_last_ccount +=
  94                   U_STRNLEN (needle_last_ccount,
  95                              comparison_count - last_ccount);
  96                 if (*needle_last_ccount == 0)
  97                   needle_last_ccount = NULL;
  98                 last_ccount = comparison_count;
  99               }
 100             if (needle_last_ccount == NULL)
 101               {
 102                 /* Try the Knuth-Morris-Pratt algorithm.  */
 103                 const UNIT *result;
 104                 bool success =
 105                   knuth_morris_pratt (haystack,
 106                                       needle - 1, U_STRLEN (needle - 1),
 107                                       &result);
 108                 if (success)
 109                   return (UNIT *) result;
 110                 try_kmp = false;
 111               }
 112           }
 113 
 114         outer_loop_count++;
 115         comparison_count++;
 116         if (*haystack == b)
 117           /* The first character matches.  */
 118           {
 119             const UNIT *rhaystack = haystack + 1;
 120             const UNIT *rneedle = needle;
 121 
 122             for (;; rhaystack++, rneedle++)
 123               {
 124                 if (*rneedle == 0)
 125                   /* Found a match.  */
 126                   return (UNIT *) haystack;
 127                 if (*rhaystack == 0)
 128                   /* No match.  */
 129                   return NULL;
 130                 comparison_count++;
 131                 if (*rhaystack != *rneedle)
 132                   /* Nothing in this round.  */
 133                   break;
 134               }
 135           }
 136       }
 137   }
 138 #endif
 139 }

/* [previous][next][first][last][top][bottom][index][help] */