1 /* (Persistent) hash array mapped tries.
2 Copyright (C) 2021 Free Software Foundation, Inc.
3
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
16
17 /* Written by Marc Nieper-Wißkirchen <marc@nieper-wisskirchen.de>, 2021. */
18
19 /* This module provides a persistent version of hash array mapped
20 tries (hamts) that can be used in place of hash tables when pure
21 (functional) operations are needed.
22
23 A hash function and an equivalence predicate has to be provided for
24 the elements that can be inserted, replaced and removed in a hamt.
25 A hamt cannot contain duplicates that compare equal.
26
27 Each non-destructive updating operation returns a new hamt, which
28 shares structure with the original one. Destructive updates only
29 effect the hamt, on which the destructive operation is applied.
30 For example, given a hamt HAMT1, any non-destructive update
31 operation (e.g. hamt_insert) will result in a new hamt HAMT2.
32 Whatever further operations (destructive or not, including freeing
33 a hamt) are applied to HAMT1 won't change HAMT2 and vice versa. To
34 free all the memory, hash_free has therefore to be applied to both
35 HAMT1 and HAMT2.
36
37 If persistence is not needed, transient hash tables are probably
38 faster.
39
40 See also: Phil Bagwell (2000). Ideal Hash Trees (Report). Infoscience
41 Department, École Polytechnique Fédérale de Lausanne.
42
43 http://infoscience.epfl.ch/record/64398/files/idealhashtrees.pdf */
44
45 #ifndef _GL_HAMT_H
46 #define _GL_HAMT_H
47
48 #ifndef _GL_INLINE_HEADER_BEGIN
49 # error "Please include config.h first."
50 #endif
51 _GL_INLINE_HEADER_BEGIN
52 #ifndef _GL_HAMT_INLINE
53 # define _GL_HAMT_INLINE _GL_INLINE
54 #endif
55
56 /* The GL_HAMT_THREAD_SAFE flag is set if the implementation of hamts
57 is thread-safe as long as two threads do not simultaneously access
58 the same hamt. This is non-trivial as different hamts may share
59 some structure. */
60
61 #if (__STDC_VERSION__ < 201112 || defined __STD_NO_ATOMICS__) \
62 && __GNUC__ + (__GNUC_MINOR >= 9) <= 4
63 # define GL_HAMT_THREAD_SAFE 0
64 #else
65 # define GL_HAMT_THREAD_SAFE 1
66 #endif
67
68 #include <stdbool.h>
69 #include <stddef.h>
70 #include <stdint.h>
71
72 /* Hash values are of type size_t. For each level of the trie, we use
73 5 bits (corresponding to lg2 of the width of a 32-bit word. */
74 #define _GL_HAMT_MAX_DEPTH ((SIZE_WIDTH + 4) / 5)
75
76 /************/
77 /* Elements */
78 /************/
79
80 /* A hamt stores pointers to elements. Each element has to be a
81 struct whose initial member is of the type Hamt_entry. You need to
82 define this struct yourself. It will typically contain an
83 Hamt_entry, a key, and, optionally, a value.
84
85 An element is conceptually owned by a hamt as soon as it is
86 inserted. It will be automatically freed as soon as the last hamt
87 containing it is freed. */
88 typedef struct
89 {
90 #if GL_HAMT_THREAD_SAFE
91 _Atomic
92 #endif
93 size_t ref_count;
94 } Hamt_entry;
95
96 /* Initialize *ELT, which has to point to a structure as described
97 above and return ELT type-cast.
98
99 Before an element is inserted into any hamt, whether once or
100 multiple times, it has to be initialized exactly once. */
101 _GL_HAMT_INLINE Hamt_entry *
102 hamt_element (void *elt)
/* ![[previous]](../icons/n_left.png)
![[next]](../icons/right.png)
![[first]](../icons/n_first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
103 {
104 Hamt_entry *entry = elt;
105 entry->ref_count = 0; /* This assumes that element_entry ==
106 0. */
107 return entry;
108 }
109
110 /*************************/
111 /* Opaque Hamt Structure */
112 /*************************/
113
114 /* In user-code, hamts are accessed through pointers to the opaque
115 Hamt type. Two hamts are said to be the same if and only if their
116 pointers are equal. */
117 typedef struct hamt Hamt;
118
119 /*************/
120 /* Interface */
121 /*************/
122
123 /* A hash function has to be pure, and two elements that compare equal
124 have to have the same hash value. For a hash function to be a good
125 one, it is important that it uses all SIZE_WIDTH bits in
126 size_t. */
127 typedef size_t (Hamt_hasher) (const void *elt);
128
129 /* A comparison function has to be pure, and two elements that have
130 equal pointers have to compare equal. */
131 typedef bool (Hamt_comparator) (const void *elt1, const void *elt2);
132
133 /* A user-defined function that is called when the last hamt
134 containing a particular element is freed. */
135 typedef void (Hamt_freer) (Hamt_entry *elt);
136
137 /****************************/
138 /* Creation and Destruction */
139 /****************************/
140
141 /* Free the resources solely allocated by HAMT and all elements solely
142 contained in it. */
143 extern void hamt_free (Hamt *hamt);
144
145 /* Create and return a new and empty hash array mapped trie. */
146 _GL_ATTRIBUTE_NODISCARD
147 extern Hamt *hamt_create (Hamt_hasher *hasher, Hamt_comparator *comparator,
/* ![[previous]](../icons/left.png)
![[next]](../icons/n_right.png)
![[first]](../icons/first.png)
![[last]](../icons/n_last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
148 Hamt_freer *freer)
149 _GL_ATTRIBUTE_DEALLOC (hamt_free, 1);
150
151 /* Return a copy of HAMT, which is not the same in the sense above.
152 This procedure can be used, for example, so that two threads can
153 access the same data independently. */
154 _GL_ATTRIBUTE_NODISCARD
155 extern Hamt *hamt_copy (Hamt *hamt)
156 _GL_ATTRIBUTE_DEALLOC (hamt_free, 1);
157
158 /**********/
159 /* Lookup */
160 /**********/
161
162 /* If ELT matches an entry in HAMT, return this entry. Otherwise,
163 return NULL. */
164 extern Hamt_entry *hamt_lookup (const Hamt *hamt, const void *elt);
165
166 /**************************/
167 /* Insertion and Deletion */
168 /**************************/
169
170 /* If *ELT_PTR matches an element already in HAMT, set *ELT_PTR to the
171 existing element and return the original hamt. Otherwise, insert
172 *ELT_PTR into a copy of the hamt and return the copy. */
173 _GL_ATTRIBUTE_NODISCARD
174 extern Hamt *hamt_insert (Hamt *hamt, Hamt_entry **elt_ptr);
175
176 /* If *ELT_PTR matches an element already in HAMT, set *ELT_PTR to the
177 existing element, remove the element from a copy of the hamt and
178 return the copy. Otherwise, return the original hamt. */
179 _GL_ATTRIBUTE_NODISCARD
180 extern Hamt *hamt_remove (Hamt *hamt, Hamt_entry **elt_ptr);
181
182 /* Insert *ELT_PTR into a copy of HAMT and return the copy. If an
183 existing element was replaced, set *ELT_PTR to this element, and to
184 NULL otherwise. */
185 _GL_ATTRIBUTE_NODISCARD
186 extern Hamt *hamt_replace (Hamt *hamt, Hamt_entry **elt_ptr);
187
188 /*************/
189 /* Iteration */
190 /*************/
191
192 /* A processor function is called during walking of a hamt, which
193 returns true to continue the walking. */
194 typedef bool (Hamt_processor) (Hamt_entry *elt, void *data);
195
196 /* Call PROC for every entry of the hamt until it returns false. The
197 first argument to the processor is the entry, the second argument
198 is the value of DATA as received. Return the number of calls that
199 returned true. During processing, the hamt mustn't be
200 modified. */
201 extern size_t hamt_do_while (const Hamt *hamt, Hamt_processor *proc,
202 void *data);
203
204 /* An alternative interface to iterating through the entry of a hamt
205 that does not make use of a higher-order function like
206 hamt_do_while uses the Hamt_iterator type, which can be allocated
207 through automatic variables on the stack. As a hamt iterator
208 operates on a copy of a hamt, the original hamt can modified
209 (including freeing it) without affecting the iterator. */
210 struct hamt_iterator
211 {
212 Hamt* hamt;
213 int depth;
214 size_t path;
215 size_t position;
216 Hamt_entry *entry[_GL_HAMT_MAX_DEPTH + 1];
217 };
218 typedef struct hamt_iterator Hamt_iterator;
219
220 /* Create of copy of HAMT and return an initialized ITER on the
221 copy. */
222 extern Hamt_iterator hamt_iterator (Hamt *hamt);
223
224 /* Free the resources allocated for ITER, including the hamt copy
225 associated with it. */
226 extern void hamt_iterator_free (Hamt_iterator *iter);
227
228 /* Return an independent copy of ITER that is initially in the same
229 state. Any operation on the original iterator (including freeing
230 it) doesn't affect the iterator copy and vice versa. */
231 extern Hamt_iterator hamt_iterator_copy (Hamt_iterator *iter);
232
233 /* Return true if ITER is not at the end, place the current element in
234 *ELT_PTR and move the iterator forward. Otherwise, return
235 false. */
236 extern bool hamt_iterator_next (Hamt_iterator *iter,
237 Hamt_entry **elt_ptr);
238
239 /***********************/
240 /* Destructive Updates */
241 /***********************/
242
243 /* If *ELT_PTR matches an element already in HAMT, set *ELT_PTR to the
244 element from the table and return false. Otherwise, insert *ELT_PTR
245 destructively into the hamt and return true. */
246 extern bool hamt_insert_x (Hamt *hamt, Hamt_entry **elt_ptr);
247
248 /* Insert ELT destructively into HAMT. If an existing element was
249 replaced, return true. Otherwise, return false. */
250 extern bool hamt_replace_x (Hamt *hamt, Hamt_entry *elt);
251
252 /* If ELT matches an element already in HAMT, remove the element
253 destructively from the hamt and return true. Otherwise, return
254 false. */
255 extern bool hamt_remove_x (Hamt *hamt, Hamt_entry *elt);
256
257 _GL_INLINE_HEADER_END
258
259 #endif /* _GL_HAMT_H */