1 /* Substring test for UTF-8/UTF-16/UTF-32 strings. -*- coding: utf-8 -*- 2 Copyright (C) 1999, 2002, 2006, 2010-2021 Free Software Foundation, Inc. 3 Written by Bruno Haible <bruno@clisp.org>, 2002, 2005. 4 5 This file is free software. 6 It is dual-licensed under "the GNU LGPLv3+ or the GNU GPLv2+". 7 You can redistribute it and/or modify it under either 8 - the terms of the GNU Lesser General Public License as published 9 by the Free Software Foundation; either version 3, or (at your 10 option) any later version, or 11 - the terms of the GNU General Public License as published by the 12 Free Software Foundation; either version 2, or (at your option) 13 any later version, or 14 - the same dual license "the GNU LGPLv3+ or the GNU GPLv2+". 15 16 This file is distributed in the hope that it will be useful, 17 but WITHOUT ANY WARRANTY; without even the implied warranty of 18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19 Lesser General Public License and the GNU General Public License 20 for more details. 21 22 You should have received a copy of the GNU Lesser General Public 23 License and of the GNU General Public License along with this 24 program. If not, see <https://www.gnu.org/licenses/>. */ 25 26 UNIT * 27 FUNC (const UNIT *haystack, const UNIT *needle) /* */ 28 { 29 UNIT first = needle[0]; 30 31 /* Is needle empty? */ 32 if (first == 0) 33 return (UNIT *) haystack; 34 35 /* Is needle nearly empty (only one unit)? */ 36 if (needle[1] == 0) 37 return U_STRCHR (haystack, first); 38 39 #ifdef U_STRMBTOUC 40 /* Is needle nearly empty (only one character)? */ 41 { 42 ucs4_t first_uc; 43 int count = U_STRMBTOUC (&first_uc, needle); 44 if (count > 0 && needle[count] == 0) 45 return U_STRCHR (haystack, first_uc); 46 } 47 #endif 48 49 #if UNIT_IS_UINT8_T 50 return (uint8_t *) strstr ((const char *) haystack, (const char *) needle); 51 #else 52 { 53 /* Minimizing the worst-case complexity: 54 Let n = U_STRLEN(haystack), m = U_STRLEN(needle). 55 The naïve algorithm is O(n*m) worst-case. 56 The Knuth-Morris-Pratt algorithm is O(n) worst-case but it needs a 57 memory allocation. 58 To achieve linear complexity and yet amortize the cost of the 59 memory allocation, we activate the Knuth-Morris-Pratt algorithm 60 only once the naïve algorithm has already run for some time; more 61 precisely, when 62 - the outer loop count is >= 10, 63 - the average number of comparisons per outer loop is >= 5, 64 - the total number of comparisons is >= m. 65 But we try it only once. If the memory allocation attempt failed, 66 we don't retry it. */ 67 bool try_kmp = true; 68 size_t outer_loop_count = 0; 69 size_t comparison_count = 0; 70 size_t last_ccount = 0; /* last comparison count */ 71 const UNIT *needle_last_ccount = needle; /* = needle + last_ccount */ 72 73 /* Speed up the following searches of needle by caching its first 74 character. */ 75 UNIT b = *needle++; 76 77 for (;; haystack++) 78 { 79 if (*haystack == 0) 80 /* No match. */ 81 return NULL; 82 83 /* See whether it's advisable to use an asymptotically faster 84 algorithm. */ 85 if (try_kmp 86 && outer_loop_count >= 10 87 && comparison_count >= 5 * outer_loop_count) 88 { 89 /* See if needle + comparison_count now reaches the end of 90 needle. */ 91 if (needle_last_ccount != NULL) 92 { 93 needle_last_ccount += 94 U_STRNLEN (needle_last_ccount, 95 comparison_count - last_ccount); 96 if (*needle_last_ccount == 0) 97 needle_last_ccount = NULL; 98 last_ccount = comparison_count; 99 } 100 if (needle_last_ccount == NULL) 101 { 102 /* Try the Knuth-Morris-Pratt algorithm. */ 103 const UNIT *result; 104 bool success = 105 knuth_morris_pratt (haystack, 106 needle - 1, U_STRLEN (needle - 1), 107 &result); 108 if (success) 109 return (UNIT *) result; 110 try_kmp = false; 111 } 112 } 113 114 outer_loop_count++; 115 comparison_count++; 116 if (*haystack == b) 117 /* The first character matches. */ 118 { 119 const UNIT *rhaystack = haystack + 1; 120 const UNIT *rneedle = needle; 121 122 for (;; rhaystack++, rneedle++) 123 { 124 if (*rneedle == 0) 125 /* Found a match. */ 126 return (UNIT *) haystack; 127 if (*rhaystack == 0) 128 /* No match. */ 129 return NULL; 130 comparison_count++; 131 if (*rhaystack != *rneedle) 132 /* Nothing in this round. */ 133 break; 134 } 135 } 136 } 137 } 138 #endif 139 }