1 /* dfa.h - declarations for GNU deterministic regexp compiler 2 Copyright (C) 1988, 1998, 2007, 2009-2021 Free Software Foundation, Inc. 3 4 This program is free software; you can redistribute it and/or modify 5 it under the terms of the GNU General Public License as published by 6 the Free Software Foundation; either version 3, or (at your option) 7 any later version. 8 9 This program is distributed in the hope that it will be useful, 10 but WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 GNU General Public License for more details. 13 14 You should have received a copy of the GNU General Public License 15 along with this program; if not, write to the Free Software 16 Foundation, Inc., 17 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA */ 18 19 /* Written June, 1988 by Mike Haertel */ 20 21 #ifndef DFA_H_ 22 #define DFA_H_ 23 24 #include "idx.h" 25 #include <regex.h> 26 #include <stdbool.h> 27 #include <stddef.h> 28 #include <stdlib.h> 29 30 #ifdef __cplusplus 31 extern "C" { 32 #endif 33 34 struct localeinfo; /* See localeinfo.h. */ 35 36 /* Element of a list of strings, at least one of which is known to 37 appear in any R.E. matching the DFA. */ 38 struct dfamust 39 { 40 bool exact; 41 bool begline; 42 bool endline; 43 char must[FLEXIBLE_ARRAY_MEMBER]; 44 }; 45 46 /* The dfa structure. It is completely opaque. */ 47 struct dfa; 48 49 /* Needed when Gnulib is not used. */ 50 #ifndef _GL_ATTRIBUTE_MALLOC 51 # define _GL_ATTRIBUTE_MALLOC 52 # define _GL_ATTRIBUTE_DEALLOC(f, i) 53 # define _GL_ATTRIBUTE_DEALLOC_FREE 54 # define _GL_ATTRIBUTE_RETURNS_NONNULL 55 #endif 56 57 /* Entry points. */ 58 59 /* Allocate a struct dfa. The struct dfa is completely opaque. 60 It should be initialized via dfasyntax or dfacopysyntax before other use. 61 The returned pointer should be passed directly to free() after 62 calling dfafree() on it. */ 63 extern struct dfa *dfaalloc (void) /* */ 64 _GL_ATTRIBUTE_MALLOC _GL_ATTRIBUTE_DEALLOC_FREE 65 _GL_ATTRIBUTE_RETURNS_NONNULL; 66 67 /* DFA options that can be ORed together, for dfasyntax's 4th arg. */ 68 enum 69 { 70 /* ^ and $ match only the start and end of data, and do not match 71 end-of-line within data. This is always false for grep, but 72 possibly true for other apps. */ 73 DFA_ANCHOR = 1 << 0, 74 75 /* '\0' in data is end-of-line, instead of the traditional '\n'. */ 76 DFA_EOL_NUL = 1 << 1 77 }; 78 79 /* Initialize or reinitialize a DFA. The arguments are: 80 1. The DFA to operate on. 81 2. Information about the current locale. 82 3. Syntax bits described in regex.h. 83 4. Additional DFA options described above. */ 84 extern void dfasyntax (struct dfa *, struct localeinfo const *, 85 reg_syntax_t, int); 86 87 /* Initialize or reinitialize a DFA from an already-initialized DFA. */ 88 extern void dfacopysyntax (struct dfa *, struct dfa const *); 89 90 /* Parse the given string of given length into the given struct dfa. */ 91 extern void dfaparse (char const *, idx_t, struct dfa *); 92 93 struct dfamust; 94 95 /* Free the storage held by the components of a struct dfamust. */ 96 extern void dfamustfree (struct dfamust *); 97 98 /* Allocate and return a struct dfamust from a struct dfa that was 99 initialized by dfaparse and not yet given to dfacomp. */ 100 extern struct dfamust *dfamust (struct dfa const *) 101 _GL_ATTRIBUTE_DEALLOC (dfamustfree, 1); 102 103 /* Compile the given string of the given length into the given struct dfa. 104 The last argument says whether to build a searching or an exact matcher. 105 A null first argument means the struct dfa has already been 106 initialized by dfaparse; the second argument is ignored. */ 107 extern void dfacomp (char const *, idx_t, struct dfa *, bool); 108 109 /* Search through a buffer looking for a match to the given struct dfa. 110 Find the first occurrence of a string matching the regexp in the 111 buffer, and the shortest possible version thereof. Return a pointer to 112 the first character after the match, or NULL if none is found. BEGIN 113 points to the beginning of the buffer, and END points to the first byte 114 after its end. Note however that we store a sentinel byte (usually 115 newline) in *END, so the actual buffer must be one byte longer. 116 When ALLOW_NL is true, newlines may appear in the matching string. 117 If COUNT is non-NULL, increment *COUNT once for each newline processed. 118 Finally, if BACKREF is non-NULL set *BACKREF to indicate whether we 119 encountered a back-reference. The caller can use this to decide 120 whether to fall back on a backtracking matcher. */ 121 extern char *dfaexec (struct dfa *d, char const *begin, char *end, 122 bool allow_nl, idx_t *count, bool *backref); 123 124 /* Return a superset for D. The superset matches everything that D 125 matches, along with some other strings (though the latter should be 126 rare, for efficiency reasons). Return a null pointer if no useful 127 superset is available. */ 128 extern struct dfa *dfasuperset (struct dfa const *d) _GL_ATTRIBUTE_PURE; 129 130 /* The DFA is likely to be fast. */ 131 extern bool dfaisfast (struct dfa const *) _GL_ATTRIBUTE_PURE; 132 133 /* Return true if every construct in D is supported by this DFA matcher. */ 134 extern bool dfasupported (struct dfa const *) _GL_ATTRIBUTE_PURE; 135 136 /* Free the storage held by the components of a struct dfa. */ 137 extern void dfafree (struct dfa *); 138 139 /* Error handling. */ 140 141 /* dfawarn() is called by the regexp routines whenever a regex is compiled 142 that likely doesn't do what the user wanted. It takes a single 143 argument, a NUL-terminated string describing the situation. The user 144 must supply a dfawarn. */ 145 extern void dfawarn (const char *); 146 147 /* dfaerror() is called by the regexp routines whenever an error occurs. It 148 takes a single argument, a NUL-terminated string describing the error. 149 The user must supply a dfaerror. */ 150 extern _Noreturn void dfaerror (const char *); 151 152 #ifdef __cplusplus 153 } 154 #endif 155 156 #endif /* dfa.h */