2 * hash.c Non-thread-safe split-ordered hash table.
4 * The weird "reverse" function is based on an idea from
5 * "Split-Ordered Lists - Lock-free Resizable Hash Tables", with
6 * modifications so that they're not lock-free. :(
8 * However, the split-order idea allows a fast & easy splitting of the
9 * hash bucket chain when the hash table is resized. Without it, we'd
10 * have to check & update the pointers for every node in the buck chain,
11 * rather than being able to move 1/2 of the entries in the chain with
16 * This library is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU Lesser General Public
18 * License as published by the Free Software Foundation; either
19 * version 2.1 of the License, or (at your option) any later version.
21 * This library is distributed in the hope that it will be useful,
22 * but WITHOUT ANY WARRANTY; without even the implied warranty of
23 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24 * Lesser General Public License for more details.
26 * You should have received a copy of the GNU Lesser General Public
27 * License along with this library; if not, write to the Free Software
28 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
30 * Copyright 2005 The FreeRADIUS server project
33 static const char rcsid[] = "$Id$";
35 #include <freeradius-devel/autoconf.h>
40 #include <freeradius-devel/missing.h>
41 #include <freeradius-devel/hash.h>
44 * A reasonable number of buckets to start off with.
45 * Should be a power of two.
47 #define LRAD_HASH_NUM_BUCKETS (64)
49 typedef struct lrad_hash_entry_t {
50 struct lrad_hash_entry_t *next;
57 struct lrad_hash_table_t {
59 int num_buckets; /* power of 2 */
63 lrad_hash_table_free_t free;
64 lrad_hash_table_hash_t hash;
65 lrad_hash_table_cmp_t cmp;
67 lrad_hash_entry_t null;
69 lrad_hash_entry_t **buckets;
77 * perl -e 'foreach $i (0..255) {$r = 0; foreach $j (0 .. 7 ) { if (($i & ( 1<< $j)) != 0) { $r |= (1 << (7 - $j));}} print $r, ", ";if (($i & 7) == 7) {print "\n";}}'
79 static const uint8_t reversed_byte[256] = {
80 0, 128, 64, 192, 32, 160, 96, 224,
81 16, 144, 80, 208, 48, 176, 112, 240,
82 8, 136, 72, 200, 40, 168, 104, 232,
83 24, 152, 88, 216, 56, 184, 120, 248,
84 4, 132, 68, 196, 36, 164, 100, 228,
85 20, 148, 84, 212, 52, 180, 116, 244,
86 12, 140, 76, 204, 44, 172, 108, 236,
87 28, 156, 92, 220, 60, 188, 124, 252,
88 2, 130, 66, 194, 34, 162, 98, 226,
89 18, 146, 82, 210, 50, 178, 114, 242,
90 10, 138, 74, 202, 42, 170, 106, 234,
91 26, 154, 90, 218, 58, 186, 122, 250,
92 6, 134, 70, 198, 38, 166, 102, 230,
93 22, 150, 86, 214, 54, 182, 118, 246,
94 14, 142, 78, 206, 46, 174, 110, 238,
95 30, 158, 94, 222, 62, 190, 126, 254,
96 1, 129, 65, 193, 33, 161, 97, 225,
97 17, 145, 81, 209, 49, 177, 113, 241,
98 9, 137, 73, 201, 41, 169, 105, 233,
99 25, 153, 89, 217, 57, 185, 121, 249,
100 5, 133, 69, 197, 37, 165, 101, 229,
101 21, 149, 85, 213, 53, 181, 117, 245,
102 13, 141, 77, 205, 45, 173, 109, 237,
103 29, 157, 93, 221, 61, 189, 125, 253,
104 3, 131, 67, 195, 35, 163, 99, 227,
105 19, 147, 83, 211, 51, 179, 115, 243,
106 11, 139, 75, 203, 43, 171, 107, 235,
107 27, 155, 91, 219, 59, 187, 123, 251,
108 7, 135, 71, 199, 39, 167, 103, 231,
109 23, 151, 87, 215, 55, 183, 119, 247,
110 15, 143, 79, 207, 47, 175, 111, 239,
111 31, 159, 95, 223, 63, 191, 127, 255
116 * perl -e 'foreach $i (0..255) {$r = 0;foreach $j (0 .. 7) { $r = $i & (1 << (7 - $j)); last if ($r)} print $i & ~($r), ", ";if (($i & 7) == 7) {print "\n";}}'
118 static uint8_t parent_byte[256] = {
119 0, 0, 0, 1, 0, 1, 2, 3,
120 0, 1, 2, 3, 4, 5, 6, 7,
121 0, 1, 2, 3, 4, 5, 6, 7,
122 8, 9, 10, 11, 12, 13, 14, 15,
123 0, 1, 2, 3, 4, 5, 6, 7,
124 8, 9, 10, 11, 12, 13, 14, 15,
125 16, 17, 18, 19, 20, 21, 22, 23,
126 24, 25, 26, 27, 28, 29, 30, 31,
127 0, 1, 2, 3, 4, 5, 6, 7,
128 8, 9, 10, 11, 12, 13, 14, 15,
129 16, 17, 18, 19, 20, 21, 22, 23,
130 24, 25, 26, 27, 28, 29, 30, 31,
131 32, 33, 34, 35, 36, 37, 38, 39,
132 40, 41, 42, 43, 44, 45, 46, 47,
133 48, 49, 50, 51, 52, 53, 54, 55,
134 56, 57, 58, 59, 60, 61, 62, 63,
135 0, 1, 2, 3, 4, 5, 6, 7,
136 8, 9, 10, 11, 12, 13, 14, 15,
137 16, 17, 18, 19, 20, 21, 22, 23,
138 24, 25, 26, 27, 28, 29, 30, 31,
139 32, 33, 34, 35, 36, 37, 38, 39,
140 40, 41, 42, 43, 44, 45, 46, 47,
141 48, 49, 50, 51, 52, 53, 54, 55,
142 56, 57, 58, 59, 60, 61, 62, 63,
143 64, 65, 66, 67, 68, 69, 70, 71,
144 72, 73, 74, 75, 76, 77, 78, 79,
145 80, 81, 82, 83, 84, 85, 86, 87,
146 88, 89, 90, 91, 92, 93, 94, 95,
147 96, 97, 98, 99, 100, 101, 102, 103,
148 104, 105, 106, 107, 108, 109, 110, 111,
149 112, 113, 114, 115, 116, 117, 118, 119,
150 120, 121, 122, 123, 124, 125, 126, 127
157 static uint32_t reverse(uint32_t key)
159 return ((reversed_byte[key & 0xff] << 24) |
160 (reversed_byte[(key >> 8) & 0xff] << 16) |
161 (reversed_byte[(key >> 16) & 0xff] << 8) |
162 (reversed_byte[(key >> 24) & 0xff]));
166 * Take the parent by discarding the highest bit that is set.
168 static uint32_t parent_of(uint32_t key)
170 if (key > 0x00ffffff)
171 return (key & 0x00ffffff) | (parent_byte[key >> 24] << 24);
173 if (key > 0x0000ffff)
174 return (key & 0x0000ffff) | (parent_byte[key >> 16] << 16);
176 if (key > 0x000000ff)
177 return (key & 0x000000ff) | (parent_byte[key >> 8] << 8);
179 return parent_byte[key];
183 static lrad_hash_entry_t *list_find(lrad_hash_table_t *ht,
184 lrad_hash_entry_t *head,
188 lrad_hash_entry_t *cur;
190 for (cur = head; cur != &ht->null; cur = cur->next) {
191 if (cur->reversed == reversed) {
193 int cmp = ht->cmp(data, cur->data);
195 if (cmp < 0) continue;
199 if (cur->reversed > reversed) break;
207 * Inserts a new entry into the list, in order.
209 static int list_insert(lrad_hash_table_t *ht,
210 lrad_hash_entry_t **head, lrad_hash_entry_t *node)
212 lrad_hash_entry_t **last, *cur;
216 for (cur = *head; cur != &ht->null; cur = cur->next) {
217 if (cur->reversed > node->reversed) break;
220 if (cur->reversed == node->reversed) {
222 int cmp = ht->cmp(node->data, cur->data);
224 if (cmp < 0) continue;
238 * Delete an entry from the list.
240 static int list_delete(lrad_hash_table_t *ht,
241 lrad_hash_entry_t **head, lrad_hash_entry_t *node)
243 lrad_hash_entry_t **last, *cur;
247 for (cur = *head; cur != &ht->null; cur = cur->next) {
250 int cmp = ht->cmp(node->data, cur->data);
252 if (cmp < 0) continue;
267 * Memory usage in bytes is (20/3) * number of entries.
269 lrad_hash_table_t *lrad_hash_table_create(lrad_hash_table_hash_t hashNode,
270 lrad_hash_table_cmp_t cmpNode,
271 lrad_hash_table_free_t freeNode)
273 lrad_hash_table_t *ht;
275 if (!hashNode) return NULL;
277 ht = malloc(sizeof(*ht));
278 if (!ht) return NULL;
280 memset(ht, 0, sizeof(*ht));
284 ht->num_buckets = LRAD_HASH_NUM_BUCKETS;
285 ht->mask = ht->num_buckets - 1;
288 * Have a default load factor of 2.5. In practice this
289 * means that the average load will hit 3 before the
292 ht->next_grow = (ht->num_buckets << 1) + (ht->num_buckets >> 1);
294 ht->buckets = malloc(sizeof(*ht->buckets) * ht->num_buckets);
299 memset(ht->buckets, 0, sizeof(*ht->buckets) * ht->num_buckets);
301 ht->null.reversed = ~0;
303 ht->null.next = &ht->null;
305 ht->buckets[0] = &ht->null;
312 * If the current bucket is uninitialized, initialize it
313 * by recursively copying information from the parent.
315 * We may have a situation where entry E is a parent to 2 other
316 * entries E' and E". If we split E into E and E', then the
317 * nodes meant for E" end up in E or E', either of which is
318 * wrong. To solve that problem, we walk down the whole chain,
319 * inserting the elements into the correct place.
321 static void lrad_hash_table_fixup(lrad_hash_table_t *ht, uint32_t entry)
323 uint32_t parent_entry = parent_of(entry);
324 lrad_hash_entry_t **last, *cur;
327 parent_entry = parent_of(entry);
329 /* parent_entry == entry if and only if entry == 0 */
331 if (!ht->buckets[parent_entry]) {
332 lrad_hash_table_fixup(ht, parent_entry);
336 * Keep walking down cur, trying to find entries that
337 * don't belong here any more. There may be multiple
338 * ones, so we can't have a naive algorithm...
340 last = &ht->buckets[parent_entry];
343 for (cur = *last; cur != &ht->null; cur = cur->next) {
346 real_entry = cur->key & ht->mask;
347 if (real_entry != this) { /* ht->buckets[real_entry] == NULL */
349 ht->buckets[real_entry] = cur;
357 * We may NOT have initialized this bucket, so do it now.
359 if (!ht->buckets[entry]) ht->buckets[entry] = &ht->null;
363 * This should be a power of two. Changing it to 4 doesn't seem
364 * to make any difference.
366 #define GROW_FACTOR (2)
369 * Grow the hash table.
371 static void lrad_hash_table_grow(lrad_hash_table_t *ht)
373 lrad_hash_entry_t **buckets;
375 buckets = malloc(sizeof(*buckets) * GROW_FACTOR * ht->num_buckets);
376 if (!buckets) return;
378 memcpy(buckets, ht->buckets,
379 sizeof(*buckets) * ht->num_buckets);
380 memset(&buckets[ht->num_buckets], 0,
381 sizeof(*buckets) * ht->num_buckets);
384 ht->buckets = buckets;
385 ht->num_buckets *= GROW_FACTOR;
386 ht->next_grow *= GROW_FACTOR;
387 ht->mask = ht->num_buckets - 1;
390 fprintf(stderr, "GROW TO %d\n", ht->num_buckets);
398 int lrad_hash_table_insert(lrad_hash_table_t *ht, void *data)
403 lrad_hash_entry_t *node;
405 if (!ht || !data) return 0;
407 key = ht->hash(data);
408 entry = key & ht->mask;
409 reversed = reverse(key);
411 if (!ht->buckets[entry]) lrad_hash_table_fixup(ht, entry);
414 * If we try to do our own memory allocation here, the
415 * speedup is only ~15% or so, which isn't worth it.
417 node = malloc(sizeof(*node));
419 memset(node, 0, sizeof(*node));
421 node->next = &ht->null;
422 node->reversed = reversed;
426 /* already in the table, can't insert it */
427 if (!list_insert(ht, &ht->buckets[entry], node)) {
433 * Check the load factor, and grow the table if
437 if (ht->num_elements >= ht->next_grow) {
438 lrad_hash_table_grow(ht);
446 * Internal find a node routine.
448 static lrad_hash_entry_t *lrad_hash_table_find(lrad_hash_table_t *ht,
455 if (!ht) return NULL;
457 key = ht->hash(data);
458 entry = key & ht->mask;
459 reversed = reverse(key);
461 if (!ht->buckets[entry]) lrad_hash_table_fixup(ht, entry);
463 return list_find(ht, ht->buckets[entry], reversed, data);
468 * Replace old data with new data, OR insert if there is no old.
470 int lrad_hash_table_replace(lrad_hash_table_t *ht, void *data)
472 lrad_hash_entry_t *node;
474 if (!ht || !data) return 0;
476 node = lrad_hash_table_find(ht, data);
477 if (!node) return lrad_hash_table_insert(ht, data);
479 if (ht->free) ht->free(node->data);
487 * Find data from a template
489 void *lrad_hash_table_finddata(lrad_hash_table_t *ht, const void *data)
491 lrad_hash_entry_t *node;
493 node = lrad_hash_table_find(ht, data);
494 if (!node) return NULL;
502 * Yank an entry from the hash table, without freeing the data.
504 void *lrad_hash_table_yank(lrad_hash_table_t *ht, const void *data)
510 lrad_hash_entry_t *node;
512 if (!ht) return NULL;
514 key = ht->hash(data);
515 entry = key & ht->mask;
516 reversed = reverse(key);
518 if (!ht->buckets[entry]) lrad_hash_table_fixup(ht, entry);
520 node = list_find(ht, ht->buckets[entry], reversed, data);
521 if (!node) return NULL;
523 list_delete(ht, &ht->buckets[entry], node);
534 * Delete a piece of data from the hash table.
536 int lrad_hash_table_delete(lrad_hash_table_t *ht, const void *data)
540 old = lrad_hash_table_yank(ht, data);
543 if (ht->free) ht->free(old);
552 void lrad_hash_table_free(lrad_hash_table_t *ht)
554 lrad_hash_entry_t *node, *next;
559 * The entries MUST be all in one linked list.
561 for (node = ht->buckets[0]; node != &ht->null; node = next) {
564 if (!node->data) continue; /* dummy entry */
566 if (ht->free) ht->free(node->data);
576 * Count number of elements
578 int lrad_hash_table_num_elements(lrad_hash_table_t *ht)
582 return ht->num_elements;
587 * Walk over the nodes, allowing deletes & inserts to happen.
589 int lrad_hash_table_walk(lrad_hash_table_t *ht,
590 lrad_hash_table_walk_t callback,
595 if (!ht || !callback) return 0;
597 for (i = ht->num_buckets - 1; i >= 0; i--) {
598 lrad_hash_entry_t *node, *next;
601 * Ensure that the current bucket is filled.
603 if (!ht->buckets[i]) lrad_hash_table_fixup(ht, i);
605 for (node = ht->buckets[i]; node != &ht->null; node = next) {
608 rcode = callback(context, node->data);
609 if (rcode != 0) return rcode;
619 * Show what the hash table is doing.
621 int lrad_hash_table_info(lrad_hash_table_t *ht)
623 int i, a, collisions, uninitialized;
628 uninitialized = collisions = 0;
629 memset(array, 0, sizeof(array));
631 for (i = 0; i < ht->num_buckets; i++) {
634 lrad_hash_entry_t *node, *next;
637 * If we haven't inserted or looked up an entry
638 * in a bucket, it's uninitialized.
640 if (!ht->buckets[i]) {
647 for (node = ht->buckets[i]; node != &ht->null; node = next) {
648 if (node->reversed == key) {
651 key = node->reversed;
657 if (load > 255) load = 255;
661 printf("HASH TABLE %p\tbuckets: %d\t(%d uninitialized)\n", ht,
662 ht->num_buckets, uninitialized);
663 printf("\tnum entries %d\thash collisions %d\n",
664 ht->num_elements, collisions);
667 for (i = 1; i < 256; i++) {
668 if (!array[i]) continue;
669 printf("%d\t%d\n", i, array[i]);
672 * Since the entries are ordered, the lookup cost
673 * for any one element in a chain is (on average)
674 * the cost of walking half of the chain.
683 printf("\texpected lookup cost = %d/%d or %f\n\n",
685 (float) ht->num_elements / (float) a);
692 #define FNV_MAGIC_INIT (0x811c9dc5)
693 #define FNV_MAGIC_PRIME (0x01000193)
696 * A fast hash function. For details, see:
698 * http://www.isthe.com/chongo/tech/comp/fnv/
700 * Which also includes public domain source. We've re-written
701 * it here for our purposes.
703 uint32_t lrad_hash(const void *data, size_t size)
705 const uint8_t *p = data;
706 const uint8_t *q = p + size;
707 uint32_t hash = FNV_MAGIC_INIT;
710 * FNV-1 hash each octet in the buffer
714 * Multiple by 32-bit magic FNV prime, mod 2^32
716 hash *= FNV_MAGIC_PRIME;
719 * Potential optimization.
721 hash += (hash<<1) + (hash<<4) + (hash<<7) + (hash<<8) + (hash<<24);
724 * XOR the 8-bit quantity into the bottom of
727 hash ^= (uint32_t) (*p++);
734 * Continue hashing data.
736 uint32_t lrad_hash_update(const void *data, size_t size, uint32_t hash)
738 const uint8_t *p = data;
739 const uint8_t *q = p + size;
742 hash *= FNV_MAGIC_PRIME;
743 hash ^= (uint32_t) (*p++);
751 * Return a "folded" hash, where the lower "bits" are the
752 * hash, and the upper bits are zero.
754 * If you need a non-power-of-two hash, cope.
756 uint32_t lrad_hash_fold(uint32_t hash, int bits)
761 if ((bits <= 0) || (bits >= 32)) return hash;
766 * Never use the same bits twice in an xor.
768 for (count = 0; count < 32; count += bits) {
773 return result & (((uint32_t) (1 << bits)) - 1);
778 * Hash a C string, so we loop over it once.
780 uint32_t lrad_hash_string(const char *p)
782 uint32_t hash = FNV_MAGIC_INIT;
785 hash *= FNV_MAGIC_PRIME;
786 hash ^= (uint32_t) (*p++);
795 * cc -DTESTING -I .. hash.c -o hash
803 static uint32_t hash_int(const void *data)
805 return lrad_hash((int *) data, sizeof(int));
808 #define MAX 1024*1024
809 int main(int argc, char **argv)
812 lrad_hash_table_t *ht;
815 ht = lrad_hash_table_create(hash_int, NULL, NULL);
817 fprintf(stderr, "Hash create failed\n");
821 array = malloc(sizeof(int) * MAX);
823 for (i = 0; i < MAX; i++) {
827 if (!lrad_hash_table_insert(ht, p)) {
828 fprintf(stderr, "Failed insert %08x\n", i);
832 q = lrad_hash_table_finddata(ht, p);
834 fprintf(stderr, "Bad data %d\n", i);
840 lrad_hash_table_info(ht);
843 * Build this to see how lookups result in shortening
844 * of the hash chains.
847 for (i = 0; i < MAX ; i++) {
848 q = lrad_hash_table_finddata(ht, &i);
850 fprintf(stderr, "Failed finding %d\n", i);
855 if (!lrad_hash_table_delete(ht, &i)) {
856 fprintf(stderr, "Failed deleting %d\n", i);
859 q = lrad_hash_table_finddata(ht, &i);
861 fprintf(stderr, "Failed to delete %08x\n", i);
867 lrad_hash_table_info(ht);
870 lrad_hash_table_free(ht);