This source file includes following definitions.
- hash_get_n_buckets
- hash_get_n_buckets_used
- hash_get_n_entries
- hash_get_max_bucket_length
- hash_table_ok
- hash_print_statistics
- safe_hasher
- hash_lookup
- hash_get_first
- hash_get_next
- hash_get_entries
- hash_do_for_each
- hash_string
- hash_string
- is_prime
- next_prime
- hash_reset_tuning
- raw_hasher
- raw_comparator
- check_tuning
- compute_bucket_size
- hash_initialize
- hash_clear
- hash_free
- allocate_entry
- free_entry
- hash_find_entry
- transfer_entries
- hash_rehash
- hash_insert_if_absent
- hash_insert
- hash_remove
- hash_delete
- hash_print
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 #include <config.h>
26
27 #include "hash.h"
28
29 #include "bitrotate.h"
30 #include "xalloc-oversized.h"
31
32 #include <stdint.h>
33 #include <stdio.h>
34 #include <stdlib.h>
35
36 #if USE_OBSTACK
37 # include "obstack.h"
38 # ifndef obstack_chunk_alloc
39 # define obstack_chunk_alloc malloc
40 # endif
41 # ifndef obstack_chunk_free
42 # define obstack_chunk_free free
43 # endif
44 #endif
45
46 struct hash_entry
47 {
48 void *data;
49 struct hash_entry *next;
50 };
51
52 struct hash_table
53 {
54
55
56
57 struct hash_entry *bucket;
58 struct hash_entry const *bucket_limit;
59 size_t n_buckets;
60 size_t n_buckets_used;
61 size_t n_entries;
62
63
64 const Hash_tuning *tuning;
65
66
67
68
69
70
71 Hash_hasher hasher;
72 Hash_comparator comparator;
73 Hash_data_freer data_freer;
74
75
76 struct hash_entry *free_entry_list;
77
78 #if USE_OBSTACK
79
80
81
82 struct obstack entry_stack;
83 #endif
84 };
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116 #define DEFAULT_GROWTH_THRESHOLD 0.8f
117 #define DEFAULT_GROWTH_FACTOR 1.414f
118
119
120
121
122
123
124
125 #define DEFAULT_SHRINK_THRESHOLD 0.0f
126 #define DEFAULT_SHRINK_FACTOR 1.0f
127
128
129
130 static const Hash_tuning default_tuning =
131 {
132 DEFAULT_SHRINK_THRESHOLD,
133 DEFAULT_SHRINK_FACTOR,
134 DEFAULT_GROWTH_THRESHOLD,
135 DEFAULT_GROWTH_FACTOR,
136 false
137 };
138
139
140
141 size_t
142 hash_get_n_buckets (const Hash_table *table)
143 {
144 return table->n_buckets;
145 }
146
147 size_t
148 hash_get_n_buckets_used (const Hash_table *table)
149 {
150 return table->n_buckets_used;
151 }
152
153 size_t
154 hash_get_n_entries (const Hash_table *table)
155 {
156 return table->n_entries;
157 }
158
159 size_t
160 hash_get_max_bucket_length (const Hash_table *table)
161 {
162 struct hash_entry const *bucket;
163 size_t max_bucket_length = 0;
164
165 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
166 {
167 if (bucket->data)
168 {
169 struct hash_entry const *cursor = bucket;
170 size_t bucket_length = 1;
171
172 while (cursor = cursor->next, cursor)
173 bucket_length++;
174
175 if (bucket_length > max_bucket_length)
176 max_bucket_length = bucket_length;
177 }
178 }
179
180 return max_bucket_length;
181 }
182
183 bool
184 hash_table_ok (const Hash_table *table)
185 {
186 struct hash_entry const *bucket;
187 size_t n_buckets_used = 0;
188 size_t n_entries = 0;
189
190 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
191 {
192 if (bucket->data)
193 {
194 struct hash_entry const *cursor = bucket;
195
196
197 n_buckets_used++;
198 n_entries++;
199
200
201 while (cursor = cursor->next, cursor)
202 n_entries++;
203 }
204 }
205
206 if (n_buckets_used == table->n_buckets_used && n_entries == table->n_entries)
207 return true;
208
209 return false;
210 }
211
212 void
213 hash_print_statistics (const Hash_table *table, FILE *stream)
214 {
215 size_t n_entries = hash_get_n_entries (table);
216 size_t n_buckets = hash_get_n_buckets (table);
217 size_t n_buckets_used = hash_get_n_buckets_used (table);
218 size_t max_bucket_length = hash_get_max_bucket_length (table);
219
220 fprintf (stream, "# entries: %lu\n", (unsigned long int) n_entries);
221 fprintf (stream, "# buckets: %lu\n", (unsigned long int) n_buckets);
222 fprintf (stream, "# buckets used: %lu (%.2f%%)\n",
223 (unsigned long int) n_buckets_used,
224 (100.0 * n_buckets_used) / n_buckets);
225 fprintf (stream, "max bucket length: %lu\n",
226 (unsigned long int) max_bucket_length);
227 }
228
229
230
231 static struct hash_entry *
232 safe_hasher (const Hash_table *table, const void *key)
233 {
234 size_t n = table->hasher (key, table->n_buckets);
235 if (! (n < table->n_buckets))
236 abort ();
237 return table->bucket + n;
238 }
239
240 void *
241 hash_lookup (const Hash_table *table, const void *entry)
242 {
243 struct hash_entry const *bucket = safe_hasher (table, entry);
244 struct hash_entry const *cursor;
245
246 if (bucket->data == NULL)
247 return NULL;
248
249 for (cursor = bucket; cursor; cursor = cursor->next)
250 if (entry == cursor->data || table->comparator (entry, cursor->data))
251 return cursor->data;
252
253 return NULL;
254 }
255
256
257
258 void *
259 hash_get_first (const Hash_table *table)
260 {
261 struct hash_entry const *bucket;
262
263 if (table->n_entries == 0)
264 return NULL;
265
266 for (bucket = table->bucket; ; bucket++)
267 if (! (bucket < table->bucket_limit))
268 abort ();
269 else if (bucket->data)
270 return bucket->data;
271 }
272
273 void *
274 hash_get_next (const Hash_table *table, const void *entry)
275 {
276 struct hash_entry const *bucket = safe_hasher (table, entry);
277 struct hash_entry const *cursor;
278
279
280 cursor = bucket;
281 do
282 {
283 if (cursor->data == entry && cursor->next)
284 return cursor->next->data;
285 cursor = cursor->next;
286 }
287 while (cursor != NULL);
288
289
290 while (++bucket < table->bucket_limit)
291 if (bucket->data)
292 return bucket->data;
293
294
295 return NULL;
296 }
297
298 size_t
299 hash_get_entries (const Hash_table *table, void **buffer,
300 size_t buffer_size)
301 {
302 size_t counter = 0;
303 struct hash_entry const *bucket;
304 struct hash_entry const *cursor;
305
306 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
307 {
308 if (bucket->data)
309 {
310 for (cursor = bucket; cursor; cursor = cursor->next)
311 {
312 if (counter >= buffer_size)
313 return counter;
314 buffer[counter++] = cursor->data;
315 }
316 }
317 }
318
319 return counter;
320 }
321
322 size_t
323 hash_do_for_each (const Hash_table *table, Hash_processor processor,
324 void *processor_data)
325 {
326 size_t counter = 0;
327 struct hash_entry const *bucket;
328 struct hash_entry const *cursor;
329
330 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
331 {
332 if (bucket->data)
333 {
334 for (cursor = bucket; cursor; cursor = cursor->next)
335 {
336 if (! processor (cursor->data, processor_data))
337 return counter;
338 counter++;
339 }
340 }
341 }
342
343 return counter;
344 }
345
346
347
348 #if USE_DIFF_HASH
349
350
351
352
353
354
355
356 size_t
357 hash_string (const char *string, size_t n_buckets)
358 {
359 # define HASH_ONE_CHAR(Value, Byte) \
360 ((Byte) + rotl_sz (Value, 7))
361
362 size_t value = 0;
363 unsigned char ch;
364
365 for (; (ch = *string); string++)
366 value = HASH_ONE_CHAR (value, ch);
367 return value % n_buckets;
368
369 # undef HASH_ONE_CHAR
370 }
371
372 #else
373
374
375
376
377
378
379 size_t
380 hash_string (const char *string, size_t n_buckets)
381 {
382 size_t value = 0;
383 unsigned char ch;
384
385 for (; (ch = *string); string++)
386 value = (value * 31 + ch) % n_buckets;
387 return value;
388 }
389
390 #endif
391
392
393
394
395 static bool _GL_ATTRIBUTE_CONST
396 is_prime (size_t candidate)
397 {
398 size_t divisor = 3;
399 size_t square = divisor * divisor;
400
401 while (square < candidate && (candidate % divisor))
402 {
403 divisor++;
404 square += 4 * divisor;
405 divisor++;
406 }
407
408 return (candidate % divisor ? true : false);
409 }
410
411
412
413
414 static size_t _GL_ATTRIBUTE_CONST
415 next_prime (size_t candidate)
416 {
417
418 if (candidate < 10)
419 candidate = 10;
420
421
422 candidate |= 1;
423
424 while (SIZE_MAX != candidate && !is_prime (candidate))
425 candidate += 2;
426
427 return candidate;
428 }
429
430 void
431 hash_reset_tuning (Hash_tuning *tuning)
432 {
433 *tuning = default_tuning;
434 }
435
436
437 static size_t
438 raw_hasher (const void *data, size_t n)
439 {
440
441
442
443
444
445 size_t val = rotr_sz ((size_t) data, 3);
446 return val % n;
447 }
448
449
450 static bool
451 raw_comparator (const void *a, const void *b)
452 {
453 return a == b;
454 }
455
456
457
458
459
460
461
462
463 static bool
464 check_tuning (Hash_table *table)
465 {
466 const Hash_tuning *tuning = table->tuning;
467 float epsilon;
468 if (tuning == &default_tuning)
469 return true;
470
471
472
473
474
475
476 epsilon = 0.1f;
477
478 if (epsilon < tuning->growth_threshold
479 && tuning->growth_threshold < 1 - epsilon
480 && 1 + epsilon < tuning->growth_factor
481 && 0 <= tuning->shrink_threshold
482 && tuning->shrink_threshold + epsilon < tuning->shrink_factor
483 && tuning->shrink_factor <= 1
484 && tuning->shrink_threshold + epsilon < tuning->growth_threshold)
485 return true;
486
487 table->tuning = &default_tuning;
488 return false;
489 }
490
491
492
493
494
495 static size_t _GL_ATTRIBUTE_PURE
496 compute_bucket_size (size_t candidate, const Hash_tuning *tuning)
497 {
498 if (!tuning->is_n_buckets)
499 {
500 float new_candidate = candidate / tuning->growth_threshold;
501 if ((float) SIZE_MAX <= new_candidate)
502 return 0;
503 candidate = new_candidate;
504 }
505 candidate = next_prime (candidate);
506 if (xalloc_oversized (candidate, sizeof (struct hash_entry *)))
507 return 0;
508 return candidate;
509 }
510
511 Hash_table *
512 hash_initialize (size_t candidate, const Hash_tuning *tuning,
513 Hash_hasher hasher, Hash_comparator comparator,
514 Hash_data_freer data_freer)
515 {
516 Hash_table *table;
517
518 if (hasher == NULL)
519 hasher = raw_hasher;
520 if (comparator == NULL)
521 comparator = raw_comparator;
522
523 table = malloc (sizeof *table);
524 if (table == NULL)
525 return NULL;
526
527 if (!tuning)
528 tuning = &default_tuning;
529 table->tuning = tuning;
530 if (!check_tuning (table))
531 {
532
533
534
535
536
537 goto fail;
538 }
539
540 table->n_buckets = compute_bucket_size (candidate, tuning);
541 if (!table->n_buckets)
542 goto fail;
543
544 table->bucket = calloc (table->n_buckets, sizeof *table->bucket);
545 if (table->bucket == NULL)
546 goto fail;
547 table->bucket_limit = table->bucket + table->n_buckets;
548 table->n_buckets_used = 0;
549 table->n_entries = 0;
550
551 table->hasher = hasher;
552 table->comparator = comparator;
553 table->data_freer = data_freer;
554
555 table->free_entry_list = NULL;
556 #if USE_OBSTACK
557 obstack_init (&table->entry_stack);
558 #endif
559 return table;
560
561 fail:
562 free (table);
563 return NULL;
564 }
565
566 void
567 hash_clear (Hash_table *table)
568 {
569 struct hash_entry *bucket;
570
571 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
572 {
573 if (bucket->data)
574 {
575 struct hash_entry *cursor;
576 struct hash_entry *next;
577
578
579 for (cursor = bucket->next; cursor; cursor = next)
580 {
581 if (table->data_freer)
582 table->data_freer (cursor->data);
583 cursor->data = NULL;
584
585 next = cursor->next;
586
587
588 cursor->next = table->free_entry_list;
589 table->free_entry_list = cursor;
590 }
591
592
593 if (table->data_freer)
594 table->data_freer (bucket->data);
595 bucket->data = NULL;
596 bucket->next = NULL;
597 }
598 }
599
600 table->n_buckets_used = 0;
601 table->n_entries = 0;
602 }
603
604 void
605 hash_free (Hash_table *table)
606 {
607 struct hash_entry *bucket;
608 struct hash_entry *cursor;
609 struct hash_entry *next;
610
611
612 if (table->data_freer && table->n_entries)
613 {
614 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
615 {
616 if (bucket->data)
617 {
618 for (cursor = bucket; cursor; cursor = cursor->next)
619 table->data_freer (cursor->data);
620 }
621 }
622 }
623
624 #if USE_OBSTACK
625
626 obstack_free (&table->entry_stack, NULL);
627
628 #else
629
630
631 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
632 {
633 for (cursor = bucket->next; cursor; cursor = next)
634 {
635 next = cursor->next;
636 free (cursor);
637 }
638 }
639
640
641 for (cursor = table->free_entry_list; cursor; cursor = next)
642 {
643 next = cursor->next;
644 free (cursor);
645 }
646
647 #endif
648
649
650 free (table->bucket);
651 free (table);
652 }
653
654
655
656
657
658
659 static struct hash_entry *
660 allocate_entry (Hash_table *table)
661 {
662 struct hash_entry *new;
663
664 if (table->free_entry_list)
665 {
666 new = table->free_entry_list;
667 table->free_entry_list = new->next;
668 }
669 else
670 {
671 #if USE_OBSTACK
672 new = obstack_alloc (&table->entry_stack, sizeof *new);
673 #else
674 new = malloc (sizeof *new);
675 #endif
676 }
677
678 return new;
679 }
680
681
682
683
684 static void
685 free_entry (Hash_table *table, struct hash_entry *entry)
686 {
687 entry->data = NULL;
688 entry->next = table->free_entry_list;
689 table->free_entry_list = entry;
690 }
691
692
693
694
695
696
697
698 static void *
699 hash_find_entry (Hash_table *table, const void *entry,
700 struct hash_entry **bucket_head, bool delete)
701 {
702 struct hash_entry *bucket = safe_hasher (table, entry);
703 struct hash_entry *cursor;
704
705 *bucket_head = bucket;
706
707
708 if (bucket->data == NULL)
709 return NULL;
710
711
712 if (entry == bucket->data || table->comparator (entry, bucket->data))
713 {
714 void *data = bucket->data;
715
716 if (delete)
717 {
718 if (bucket->next)
719 {
720 struct hash_entry *next = bucket->next;
721
722
723
724 *bucket = *next;
725 free_entry (table, next);
726 }
727 else
728 {
729 bucket->data = NULL;
730 }
731 }
732
733 return data;
734 }
735
736
737 for (cursor = bucket; cursor->next; cursor = cursor->next)
738 {
739 if (entry == cursor->next->data
740 || table->comparator (entry, cursor->next->data))
741 {
742 void *data = cursor->next->data;
743
744 if (delete)
745 {
746 struct hash_entry *next = cursor->next;
747
748
749
750 cursor->next = next->next;
751 free_entry (table, next);
752 }
753
754 return data;
755 }
756 }
757
758
759 return NULL;
760 }
761
762
763
764
765
766
767
768 static bool
769 transfer_entries (Hash_table *dst, Hash_table *src, bool safe)
770 {
771 struct hash_entry *bucket;
772 struct hash_entry *cursor;
773 struct hash_entry *next;
774 for (bucket = src->bucket; bucket < src->bucket_limit; bucket++)
775 if (bucket->data)
776 {
777 void *data;
778 struct hash_entry *new_bucket;
779
780
781
782
783
784
785
786 for (cursor = bucket->next; cursor; cursor = next)
787 {
788 data = cursor->data;
789 new_bucket = safe_hasher (dst, data);
790
791 next = cursor->next;
792
793 if (new_bucket->data)
794 {
795
796
797 cursor->next = new_bucket->next;
798 new_bucket->next = cursor;
799 }
800 else
801 {
802
803
804 new_bucket->data = data;
805 dst->n_buckets_used++;
806 free_entry (dst, cursor);
807 }
808 }
809
810
811
812 data = bucket->data;
813 bucket->next = NULL;
814 if (safe)
815 continue;
816 new_bucket = safe_hasher (dst, data);
817
818 if (new_bucket->data)
819 {
820
821
822 struct hash_entry *new_entry = allocate_entry (dst);
823
824 if (new_entry == NULL)
825 return false;
826
827 new_entry->data = data;
828 new_entry->next = new_bucket->next;
829 new_bucket->next = new_entry;
830 }
831 else
832 {
833
834 new_bucket->data = data;
835 dst->n_buckets_used++;
836 }
837 bucket->data = NULL;
838 src->n_buckets_used--;
839 }
840 return true;
841 }
842
843 bool
844 hash_rehash (Hash_table *table, size_t candidate)
845 {
846 Hash_table storage;
847 Hash_table *new_table;
848 size_t new_size = compute_bucket_size (candidate, table->tuning);
849
850 if (!new_size)
851 return false;
852 if (new_size == table->n_buckets)
853 return true;
854 new_table = &storage;
855 new_table->bucket = calloc (new_size, sizeof *new_table->bucket);
856 if (new_table->bucket == NULL)
857 return false;
858 new_table->n_buckets = new_size;
859 new_table->bucket_limit = new_table->bucket + new_size;
860 new_table->n_buckets_used = 0;
861 new_table->n_entries = 0;
862 new_table->tuning = table->tuning;
863 new_table->hasher = table->hasher;
864 new_table->comparator = table->comparator;
865 new_table->data_freer = table->data_freer;
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882 #if USE_OBSTACK
883 new_table->entry_stack = table->entry_stack;
884 #endif
885 new_table->free_entry_list = table->free_entry_list;
886
887 if (transfer_entries (new_table, table, false))
888 {
889
890 free (table->bucket);
891 table->bucket = new_table->bucket;
892 table->bucket_limit = new_table->bucket_limit;
893 table->n_buckets = new_table->n_buckets;
894 table->n_buckets_used = new_table->n_buckets_used;
895 table->free_entry_list = new_table->free_entry_list;
896
897 return true;
898 }
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913 table->free_entry_list = new_table->free_entry_list;
914 if (! (transfer_entries (table, new_table, true)
915 && transfer_entries (table, new_table, false)))
916 abort ();
917
918 free (new_table->bucket);
919 return false;
920 }
921
922 int
923 hash_insert_if_absent (Hash_table *table, void const *entry,
924 void const **matched_ent)
925 {
926 void *data;
927 struct hash_entry *bucket;
928
929
930
931
932 if (! entry)
933 abort ();
934
935
936 if ((data = hash_find_entry (table, entry, &bucket, false)) != NULL)
937 {
938 if (matched_ent)
939 *matched_ent = data;
940 return 0;
941 }
942
943
944
945
946
947
948 if (table->n_buckets_used
949 > table->tuning->growth_threshold * table->n_buckets)
950 {
951
952
953 check_tuning (table);
954 if (table->n_buckets_used
955 > table->tuning->growth_threshold * table->n_buckets)
956 {
957 const Hash_tuning *tuning = table->tuning;
958 float candidate =
959 (tuning->is_n_buckets
960 ? (table->n_buckets * tuning->growth_factor)
961 : (table->n_buckets * tuning->growth_factor
962 * tuning->growth_threshold));
963
964 if ((float) SIZE_MAX <= candidate)
965 return -1;
966
967
968 if (!hash_rehash (table, candidate))
969 return -1;
970
971
972 if (hash_find_entry (table, entry, &bucket, false) != NULL)
973 abort ();
974 }
975 }
976
977
978
979 if (bucket->data)
980 {
981 struct hash_entry *new_entry = allocate_entry (table);
982
983 if (new_entry == NULL)
984 return -1;
985
986
987
988 new_entry->data = (void *) entry;
989 new_entry->next = bucket->next;
990 bucket->next = new_entry;
991 table->n_entries++;
992 return 1;
993 }
994
995
996
997 bucket->data = (void *) entry;
998 table->n_entries++;
999 table->n_buckets_used++;
1000
1001 return 1;
1002 }
1003
1004 void *
1005 hash_insert (Hash_table *table, void const *entry)
1006 {
1007 void const *matched_ent;
1008 int err = hash_insert_if_absent (table, entry, &matched_ent);
1009 return (err == -1
1010 ? NULL
1011 : (void *) (err == 0 ? matched_ent : entry));
1012 }
1013
1014 void *
1015 hash_remove (Hash_table *table, const void *entry)
1016 {
1017 void *data;
1018 struct hash_entry *bucket;
1019
1020 data = hash_find_entry (table, entry, &bucket, true);
1021 if (!data)
1022 return NULL;
1023
1024 table->n_entries--;
1025 if (!bucket->data)
1026 {
1027 table->n_buckets_used--;
1028
1029
1030
1031
1032 if (table->n_buckets_used
1033 < table->tuning->shrink_threshold * table->n_buckets)
1034 {
1035
1036
1037 check_tuning (table);
1038 if (table->n_buckets_used
1039 < table->tuning->shrink_threshold * table->n_buckets)
1040 {
1041 const Hash_tuning *tuning = table->tuning;
1042 size_t candidate =
1043 (tuning->is_n_buckets
1044 ? table->n_buckets * tuning->shrink_factor
1045 : (table->n_buckets * tuning->shrink_factor
1046 * tuning->growth_threshold));
1047
1048 if (!hash_rehash (table, candidate))
1049 {
1050
1051
1052
1053
1054
1055 #if ! USE_OBSTACK
1056 struct hash_entry *cursor = table->free_entry_list;
1057 struct hash_entry *next;
1058 while (cursor)
1059 {
1060 next = cursor->next;
1061 free (cursor);
1062 cursor = next;
1063 }
1064 table->free_entry_list = NULL;
1065 #endif
1066 }
1067 }
1068 }
1069 }
1070
1071 return data;
1072 }
1073
1074 void *
1075 hash_delete (Hash_table *table, const void *entry)
1076 {
1077 return hash_remove (table, entry);
1078 }
1079
1080
1081
1082 #if TESTING
1083
1084 void
1085 hash_print (const Hash_table *table)
1086 {
1087 struct hash_entry *bucket = (struct hash_entry *) table->bucket;
1088
1089 for ( ; bucket < table->bucket_limit; bucket++)
1090 {
1091 struct hash_entry *cursor;
1092
1093 if (bucket)
1094 printf ("%lu:\n", (unsigned long int) (bucket - table->bucket));
1095
1096 for (cursor = bucket; cursor; cursor = cursor->next)
1097 {
1098 char const *s = cursor->data;
1099
1100 if (s)
1101 printf (" %s\n", s);
1102 }
1103 }
1104 }
1105
1106 #endif