2 * hash.c Non-thread-safe split-ordered hash table.
4 * The weird "reverse" function is based on an idea from
5 * "Split-Ordered Lists - Lock-free Resizable Hash Tables", with
6 * modifications so that they're not lock-free. :(
8 * However, the split-order idea allows a fast & easy splitting of the
9 * hash bucket chain when the hash table is resized. Without it, we'd
10 * have to check & update the pointers for every node in the buck chain,
11 * rather than being able to move 1/2 of the entries in the chain with
16 * This library is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU Lesser General Public
18 * License as published by the Free Software Foundation; either
19 * version 2.1 of the License, or (at your option) any later version.
21 * This library is distributed in the hope that it will be useful,
22 * but WITHOUT ANY WARRANTY; without even the implied warranty of
23 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24 * Lesser General Public License for more details.
26 * You should have received a copy of the GNU Lesser General Public
27 * License along with this library; if not, write to the Free Software
28 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
30 * Copyright 2005,2006 The FreeRADIUS server project
33 #include <freeradius-devel/ident.h>
36 #include <freeradius-devel/libradius.h>
37 #include <freeradius-devel/hash.h>
40 * A reasonable number of buckets to start off with.
41 * Should be a power of two.
43 #define FR_HASH_NUM_BUCKETS (64)
45 typedef struct fr_hash_entry_t {
46 struct fr_hash_entry_t *next;
53 struct fr_hash_table_t {
55 int num_buckets; /* power of 2 */
59 fr_hash_table_free_t free;
60 fr_hash_table_hash_t hash;
61 fr_hash_table_cmp_t cmp;
65 fr_hash_entry_t **buckets;
75 * perl -e 'foreach $i (0..255) {$r = 0; foreach $j (0 .. 7 ) { if (($i & ( 1<< $j)) != 0) { $r |= (1 << (7 - $j));}} print $r, ", ";if (($i & 7) == 7) {print "\n";}}'
77 static const uint8_t reversed_byte[256] = {
78 0, 128, 64, 192, 32, 160, 96, 224,
79 16, 144, 80, 208, 48, 176, 112, 240,
80 8, 136, 72, 200, 40, 168, 104, 232,
81 24, 152, 88, 216, 56, 184, 120, 248,
82 4, 132, 68, 196, 36, 164, 100, 228,
83 20, 148, 84, 212, 52, 180, 116, 244,
84 12, 140, 76, 204, 44, 172, 108, 236,
85 28, 156, 92, 220, 60, 188, 124, 252,
86 2, 130, 66, 194, 34, 162, 98, 226,
87 18, 146, 82, 210, 50, 178, 114, 242,
88 10, 138, 74, 202, 42, 170, 106, 234,
89 26, 154, 90, 218, 58, 186, 122, 250,
90 6, 134, 70, 198, 38, 166, 102, 230,
91 22, 150, 86, 214, 54, 182, 118, 246,
92 14, 142, 78, 206, 46, 174, 110, 238,
93 30, 158, 94, 222, 62, 190, 126, 254,
94 1, 129, 65, 193, 33, 161, 97, 225,
95 17, 145, 81, 209, 49, 177, 113, 241,
96 9, 137, 73, 201, 41, 169, 105, 233,
97 25, 153, 89, 217, 57, 185, 121, 249,
98 5, 133, 69, 197, 37, 165, 101, 229,
99 21, 149, 85, 213, 53, 181, 117, 245,
100 13, 141, 77, 205, 45, 173, 109, 237,
101 29, 157, 93, 221, 61, 189, 125, 253,
102 3, 131, 67, 195, 35, 163, 99, 227,
103 19, 147, 83, 211, 51, 179, 115, 243,
104 11, 139, 75, 203, 43, 171, 107, 235,
105 27, 155, 91, 219, 59, 187, 123, 251,
106 7, 135, 71, 199, 39, 167, 103, 231,
107 23, 151, 87, 215, 55, 183, 119, 247,
108 15, 143, 79, 207, 47, 175, 111, 239,
109 31, 159, 95, 223, 63, 191, 127, 255
114 * perl -e 'foreach $i (0..255) {$r = 0;foreach $j (0 .. 7) { $r = $i & (1 << (7 - $j)); last if ($r)} print $i & ~($r), ", ";if (($i & 7) == 7) {print "\n";}}'
116 static uint8_t parent_byte[256] = {
117 0, 0, 0, 1, 0, 1, 2, 3,
118 0, 1, 2, 3, 4, 5, 6, 7,
119 0, 1, 2, 3, 4, 5, 6, 7,
120 8, 9, 10, 11, 12, 13, 14, 15,
121 0, 1, 2, 3, 4, 5, 6, 7,
122 8, 9, 10, 11, 12, 13, 14, 15,
123 16, 17, 18, 19, 20, 21, 22, 23,
124 24, 25, 26, 27, 28, 29, 30, 31,
125 0, 1, 2, 3, 4, 5, 6, 7,
126 8, 9, 10, 11, 12, 13, 14, 15,
127 16, 17, 18, 19, 20, 21, 22, 23,
128 24, 25, 26, 27, 28, 29, 30, 31,
129 32, 33, 34, 35, 36, 37, 38, 39,
130 40, 41, 42, 43, 44, 45, 46, 47,
131 48, 49, 50, 51, 52, 53, 54, 55,
132 56, 57, 58, 59, 60, 61, 62, 63,
133 0, 1, 2, 3, 4, 5, 6, 7,
134 8, 9, 10, 11, 12, 13, 14, 15,
135 16, 17, 18, 19, 20, 21, 22, 23,
136 24, 25, 26, 27, 28, 29, 30, 31,
137 32, 33, 34, 35, 36, 37, 38, 39,
138 40, 41, 42, 43, 44, 45, 46, 47,
139 48, 49, 50, 51, 52, 53, 54, 55,
140 56, 57, 58, 59, 60, 61, 62, 63,
141 64, 65, 66, 67, 68, 69, 70, 71,
142 72, 73, 74, 75, 76, 77, 78, 79,
143 80, 81, 82, 83, 84, 85, 86, 87,
144 88, 89, 90, 91, 92, 93, 94, 95,
145 96, 97, 98, 99, 100, 101, 102, 103,
146 104, 105, 106, 107, 108, 109, 110, 111,
147 112, 113, 114, 115, 116, 117, 118, 119,
148 120, 121, 122, 123, 124, 125, 126, 127
155 static uint32_t reverse(uint32_t key)
157 return ((reversed_byte[key & 0xff] << 24) |
158 (reversed_byte[(key >> 8) & 0xff] << 16) |
159 (reversed_byte[(key >> 16) & 0xff] << 8) |
160 (reversed_byte[(key >> 24) & 0xff]));
164 * Take the parent by discarding the highest bit that is set.
166 static uint32_t parent_of(uint32_t key)
168 if (key > 0x00ffffff)
169 return (key & 0x00ffffff) | (parent_byte[key >> 24] << 24);
171 if (key > 0x0000ffff)
172 return (key & 0x0000ffff) | (parent_byte[key >> 16] << 16);
174 if (key > 0x000000ff)
175 return (key & 0x000000ff) | (parent_byte[key >> 8] << 8);
177 return parent_byte[key];
181 static fr_hash_entry_t *list_find(fr_hash_table_t *ht,
182 fr_hash_entry_t *head,
186 fr_hash_entry_t *cur;
188 for (cur = head; cur != &ht->null; cur = cur->next) {
189 if (cur->reversed == reversed) {
191 int cmp = ht->cmp(data, cur->data);
193 if (cmp < 0) continue;
197 if (cur->reversed > reversed) break;
205 * Inserts a new entry into the list, in order.
207 static int list_insert(fr_hash_table_t *ht,
208 fr_hash_entry_t **head, fr_hash_entry_t *node)
210 fr_hash_entry_t **last, *cur;
214 for (cur = *head; cur != &ht->null; cur = cur->next) {
215 if (cur->reversed > node->reversed) break;
218 if (cur->reversed == node->reversed) {
220 int cmp = ht->cmp(node->data, cur->data);
222 if (cmp < 0) continue;
236 * Delete an entry from the list.
238 static int list_delete(fr_hash_table_t *ht,
239 fr_hash_entry_t **head, fr_hash_entry_t *node)
241 fr_hash_entry_t **last, *cur;
245 for (cur = *head; cur != &ht->null; cur = cur->next) {
246 if (cur == node) break;
258 * Memory usage in bytes is (20/3) * number of entries.
260 fr_hash_table_t *fr_hash_table_create(fr_hash_table_hash_t hashNode,
261 fr_hash_table_cmp_t cmpNode,
262 fr_hash_table_free_t freeNode)
266 if (!hashNode) return NULL;
268 ht = malloc(sizeof(*ht));
269 if (!ht) return NULL;
271 memset(ht, 0, sizeof(*ht));
275 ht->num_buckets = FR_HASH_NUM_BUCKETS;
276 ht->mask = ht->num_buckets - 1;
279 * Have a default load factor of 2.5. In practice this
280 * means that the average load will hit 3 before the
283 ht->next_grow = (ht->num_buckets << 1) + (ht->num_buckets >> 1);
285 ht->buckets = malloc(sizeof(*ht->buckets) * ht->num_buckets);
290 memset(ht->buckets, 0, sizeof(*ht->buckets) * ht->num_buckets);
292 ht->null.reversed = ~0;
294 ht->null.next = &ht->null;
296 ht->buckets[0] = &ht->null;
303 * If the current bucket is uninitialized, initialize it
304 * by recursively copying information from the parent.
306 * We may have a situation where entry E is a parent to 2 other
307 * entries E' and E". If we split E into E and E', then the
308 * nodes meant for E" end up in E or E', either of which is
309 * wrong. To solve that problem, we walk down the whole chain,
310 * inserting the elements into the correct place.
312 static void fr_hash_table_fixup(fr_hash_table_t *ht, uint32_t entry)
314 uint32_t parent_entry = parent_of(entry);
315 fr_hash_entry_t **last, *cur;
318 parent_entry = parent_of(entry);
320 /* parent_entry == entry if and only if entry == 0 */
322 if (!ht->buckets[parent_entry]) {
323 fr_hash_table_fixup(ht, parent_entry);
327 * Keep walking down cur, trying to find entries that
328 * don't belong here any more. There may be multiple
329 * ones, so we can't have a naive algorithm...
331 last = &ht->buckets[parent_entry];
334 for (cur = *last; cur != &ht->null; cur = cur->next) {
337 real_entry = cur->key & ht->mask;
338 if (real_entry != this) { /* ht->buckets[real_entry] == NULL */
340 ht->buckets[real_entry] = cur;
348 * We may NOT have initialized this bucket, so do it now.
350 if (!ht->buckets[entry]) ht->buckets[entry] = &ht->null;
354 * This should be a power of two. Changing it to 4 doesn't seem
355 * to make any difference.
357 #define GROW_FACTOR (2)
360 * Grow the hash table.
362 static void fr_hash_table_grow(fr_hash_table_t *ht)
364 fr_hash_entry_t **buckets;
366 buckets = malloc(sizeof(*buckets) * GROW_FACTOR * ht->num_buckets);
367 if (!buckets) return;
369 memcpy(buckets, ht->buckets,
370 sizeof(*buckets) * ht->num_buckets);
371 memset(&buckets[ht->num_buckets], 0,
372 sizeof(*buckets) * ht->num_buckets);
375 ht->buckets = buckets;
376 ht->num_buckets *= GROW_FACTOR;
377 ht->next_grow *= GROW_FACTOR;
378 ht->mask = ht->num_buckets - 1;
381 fprintf(stderr, "GROW TO %d\n", ht->num_buckets);
389 int fr_hash_table_insert(fr_hash_table_t *ht, void *data)
394 fr_hash_entry_t *node;
396 if (!ht || !data) return 0;
398 key = ht->hash(data);
399 entry = key & ht->mask;
400 reversed = reverse(key);
402 if (!ht->buckets[entry]) fr_hash_table_fixup(ht, entry);
405 * If we try to do our own memory allocation here, the
406 * speedup is only ~15% or so, which isn't worth it.
408 node = malloc(sizeof(*node));
410 memset(node, 0, sizeof(*node));
412 node->next = &ht->null;
413 node->reversed = reversed;
417 /* already in the table, can't insert it */
418 if (!list_insert(ht, &ht->buckets[entry], node)) {
424 * Check the load factor, and grow the table if
428 if (ht->num_elements >= ht->next_grow) {
429 fr_hash_table_grow(ht);
437 * Internal find a node routine.
439 static fr_hash_entry_t *fr_hash_table_find(fr_hash_table_t *ht,
446 if (!ht) return NULL;
448 key = ht->hash(data);
449 entry = key & ht->mask;
450 reversed = reverse(key);
452 if (!ht->buckets[entry]) fr_hash_table_fixup(ht, entry);
454 return list_find(ht, ht->buckets[entry], reversed, data);
459 * Replace old data with new data, OR insert if there is no old.
461 int fr_hash_table_replace(fr_hash_table_t *ht, void *data)
463 fr_hash_entry_t *node;
465 if (!ht || !data) return 0;
467 node = fr_hash_table_find(ht, data);
468 if (!node) return fr_hash_table_insert(ht, data);
470 if (ht->free) ht->free(node->data);
478 * Find data from a template
480 void *fr_hash_table_finddata(fr_hash_table_t *ht, const void *data)
482 fr_hash_entry_t *node;
484 node = fr_hash_table_find(ht, data);
485 if (!node) return NULL;
493 * Yank an entry from the hash table, without freeing the data.
495 void *fr_hash_table_yank(fr_hash_table_t *ht, const void *data)
501 fr_hash_entry_t *node;
503 if (!ht) return NULL;
505 key = ht->hash(data);
506 entry = key & ht->mask;
507 reversed = reverse(key);
509 if (!ht->buckets[entry]) fr_hash_table_fixup(ht, entry);
511 node = list_find(ht, ht->buckets[entry], reversed, data);
512 if (!node) return NULL;
514 list_delete(ht, &ht->buckets[entry], node);
525 * Delete a piece of data from the hash table.
527 int fr_hash_table_delete(fr_hash_table_t *ht, const void *data)
531 old = fr_hash_table_yank(ht, data);
534 if (ht->free) ht->free(old);
543 void fr_hash_table_free(fr_hash_table_t *ht)
546 fr_hash_entry_t *node, *next;
551 * Walk over the buckets, freeing them all.
553 for (i = 0; i < ht->num_buckets; i++) {
554 if (ht->buckets[i]) for (node = ht->buckets[i];
559 if (!node->data) continue; /* dummy entry */
561 if (ht->free) ht->free(node->data);
572 * Count number of elements
574 int fr_hash_table_num_elements(fr_hash_table_t *ht)
578 return ht->num_elements;
583 * Walk over the nodes, allowing deletes & inserts to happen.
585 int fr_hash_table_walk(fr_hash_table_t *ht,
586 fr_hash_table_walk_t callback,
591 if (!ht || !callback) return 0;
593 for (i = ht->num_buckets - 1; i >= 0; i--) {
594 fr_hash_entry_t *node, *next;
597 * Ensure that the current bucket is filled.
599 if (!ht->buckets[i]) fr_hash_table_fixup(ht, i);
601 for (node = ht->buckets[i]; node != &ht->null; node = next) {
604 rcode = callback(context, node->data);
605 if (rcode != 0) return rcode;
615 * Show what the hash table is doing.
617 int fr_hash_table_info(fr_hash_table_t *ht)
619 int i, a, collisions, uninitialized;
624 uninitialized = collisions = 0;
625 memset(array, 0, sizeof(array));
627 for (i = 0; i < ht->num_buckets; i++) {
630 fr_hash_entry_t *node, *next;
633 * If we haven't inserted or looked up an entry
634 * in a bucket, it's uninitialized.
636 if (!ht->buckets[i]) {
643 for (node = ht->buckets[i]; node != &ht->null; node = next) {
644 if (node->reversed == key) {
647 key = node->reversed;
653 if (load > 255) load = 255;
657 printf("HASH TABLE %p\tbuckets: %d\t(%d uninitialized)\n", ht,
658 ht->num_buckets, uninitialized);
659 printf("\tnum entries %d\thash collisions %d\n",
660 ht->num_elements, collisions);
663 for (i = 1; i < 256; i++) {
664 if (!array[i]) continue;
665 printf("%d\t%d\n", i, array[i]);
668 * Since the entries are ordered, the lookup cost
669 * for any one element in a chain is (on average)
670 * the cost of walking half of the chain.
679 printf("\texpected lookup cost = %d/%d or %f\n\n",
681 (float) ht->num_elements / (float) a);
688 #define FNV_MAGIC_INIT (0x811c9dc5)
689 #define FNV_MAGIC_PRIME (0x01000193)
692 * A fast hash function. For details, see:
694 * http://www.isthe.com/chongo/tech/comp/fnv/
696 * Which also includes public domain source. We've re-written
697 * it here for our purposes.
699 uint32_t fr_hash(const void *data, size_t size)
701 const uint8_t *p = data;
702 const uint8_t *q = p + size;
703 uint32_t hash = FNV_MAGIC_INIT;
706 * FNV-1 hash each octet in the buffer
710 * Multiple by 32-bit magic FNV prime, mod 2^32
712 hash *= FNV_MAGIC_PRIME;
715 * Potential optimization.
717 hash += (hash<<1) + (hash<<4) + (hash<<7) + (hash<<8) + (hash<<24);
720 * XOR the 8-bit quantity into the bottom of
723 hash ^= (uint32_t) (*p++);
730 * Continue hashing data.
732 uint32_t fr_hash_update(const void *data, size_t size, uint32_t hash)
734 const uint8_t *p = data;
735 const uint8_t *q = p + size;
738 hash *= FNV_MAGIC_PRIME;
739 hash ^= (uint32_t) (*p++);
747 * Return a "folded" hash, where the lower "bits" are the
748 * hash, and the upper bits are zero.
750 * If you need a non-power-of-two hash, cope.
752 uint32_t fr_hash_fold(uint32_t hash, int bits)
757 if ((bits <= 0) || (bits >= 32)) return hash;
762 * Never use the same bits twice in an xor.
764 for (count = 0; count < 32; count += bits) {
769 return result & (((uint32_t) (1 << bits)) - 1);
774 * Hash a C string, so we loop over it once.
776 uint32_t fr_hash_string(const char *p)
778 uint32_t hash = FNV_MAGIC_INIT;
781 hash *= FNV_MAGIC_PRIME;
782 hash ^= (uint32_t) (*p++);
791 * cc -g -DTESTING -I ../include hash.c -o hash
799 static uint32_t hash_int(const void *data)
801 return fr_hash((int *) data, sizeof(int));
804 #define MAX 1024*1024
805 int main(int argc, char **argv)
811 ht = fr_hash_table_create(hash_int, NULL, NULL);
813 fprintf(stderr, "Hash create failed\n");
817 array = malloc(sizeof(int) * MAX);
820 for (i = 0; i < MAX; i++) {
824 if (!fr_hash_table_insert(ht, p)) {
825 fprintf(stderr, "Failed insert %08x\n", i);
829 q = fr_hash_table_finddata(ht, p);
831 fprintf(stderr, "Bad data %d\n", i);
837 fr_hash_table_info(ht);
840 * Build this to see how lookups result in shortening
841 * of the hash chains.
844 for (i = 0; i < MAX ; i++) {
845 q = fr_hash_table_finddata(ht, &i);
847 fprintf(stderr, "Failed finding %d\n", i);
852 if (!fr_hash_table_delete(ht, &i)) {
853 fprintf(stderr, "Failed deleting %d\n", i);
856 q = fr_hash_table_finddata(ht, &i);
858 fprintf(stderr, "Failed to delete %08x\n", i);
864 fr_hash_table_info(ht);
867 fr_hash_table_free(ht);