2 * hash.c Non-thread-safe split-ordered hash table.
4 * The weird "reverse" function is based on an idea from
5 * "Split-Ordered Lists - Lock-free Resizable Hash Tables", with
6 * modifications so that they're not lock-free. :(
8 * However, the split-order idea allows a fast & easy splitting of the
9 * hash bucket chain when the hash table is resized. Without it, we'd
10 * have to check & update the pointers for every node in the buck chain,
11 * rather than being able to move 1/2 of the entries in the chain with
16 * This library is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU Lesser General Public
18 * License as published by the Free Software Foundation; either
19 * version 2.1 of the License, or (at your option) any later version.
21 * This library is distributed in the hope that it will be useful,
22 * but WITHOUT ANY WARRANTY; without even the implied warranty of
23 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24 * Lesser General Public License for more details.
26 * You should have received a copy of the GNU Lesser General Public
27 * License along with this library; if not, write to the Free Software
28 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
30 * Copyright 2005,2006 The FreeRADIUS server project
33 #include <freeradius-devel/ident.h>
36 #include <freeradius-devel/autoconf.h>
41 #include <freeradius-devel/missing.h>
42 #include <freeradius-devel/hash.h>
45 * A reasonable number of buckets to start off with.
46 * Should be a power of two.
48 #define LRAD_HASH_NUM_BUCKETS (64)
50 typedef struct lrad_hash_entry_t {
51 struct lrad_hash_entry_t *next;
58 struct lrad_hash_table_t {
60 int num_buckets; /* power of 2 */
64 lrad_hash_table_free_t free;
65 lrad_hash_table_hash_t hash;
66 lrad_hash_table_cmp_t cmp;
68 lrad_hash_entry_t null;
70 lrad_hash_entry_t **buckets;
78 * perl -e 'foreach $i (0..255) {$r = 0; foreach $j (0 .. 7 ) { if (($i & ( 1<< $j)) != 0) { $r |= (1 << (7 - $j));}} print $r, ", ";if (($i & 7) == 7) {print "\n";}}'
80 static const uint8_t reversed_byte[256] = {
81 0, 128, 64, 192, 32, 160, 96, 224,
82 16, 144, 80, 208, 48, 176, 112, 240,
83 8, 136, 72, 200, 40, 168, 104, 232,
84 24, 152, 88, 216, 56, 184, 120, 248,
85 4, 132, 68, 196, 36, 164, 100, 228,
86 20, 148, 84, 212, 52, 180, 116, 244,
87 12, 140, 76, 204, 44, 172, 108, 236,
88 28, 156, 92, 220, 60, 188, 124, 252,
89 2, 130, 66, 194, 34, 162, 98, 226,
90 18, 146, 82, 210, 50, 178, 114, 242,
91 10, 138, 74, 202, 42, 170, 106, 234,
92 26, 154, 90, 218, 58, 186, 122, 250,
93 6, 134, 70, 198, 38, 166, 102, 230,
94 22, 150, 86, 214, 54, 182, 118, 246,
95 14, 142, 78, 206, 46, 174, 110, 238,
96 30, 158, 94, 222, 62, 190, 126, 254,
97 1, 129, 65, 193, 33, 161, 97, 225,
98 17, 145, 81, 209, 49, 177, 113, 241,
99 9, 137, 73, 201, 41, 169, 105, 233,
100 25, 153, 89, 217, 57, 185, 121, 249,
101 5, 133, 69, 197, 37, 165, 101, 229,
102 21, 149, 85, 213, 53, 181, 117, 245,
103 13, 141, 77, 205, 45, 173, 109, 237,
104 29, 157, 93, 221, 61, 189, 125, 253,
105 3, 131, 67, 195, 35, 163, 99, 227,
106 19, 147, 83, 211, 51, 179, 115, 243,
107 11, 139, 75, 203, 43, 171, 107, 235,
108 27, 155, 91, 219, 59, 187, 123, 251,
109 7, 135, 71, 199, 39, 167, 103, 231,
110 23, 151, 87, 215, 55, 183, 119, 247,
111 15, 143, 79, 207, 47, 175, 111, 239,
112 31, 159, 95, 223, 63, 191, 127, 255
117 * perl -e 'foreach $i (0..255) {$r = 0;foreach $j (0 .. 7) { $r = $i & (1 << (7 - $j)); last if ($r)} print $i & ~($r), ", ";if (($i & 7) == 7) {print "\n";}}'
119 static uint8_t parent_byte[256] = {
120 0, 0, 0, 1, 0, 1, 2, 3,
121 0, 1, 2, 3, 4, 5, 6, 7,
122 0, 1, 2, 3, 4, 5, 6, 7,
123 8, 9, 10, 11, 12, 13, 14, 15,
124 0, 1, 2, 3, 4, 5, 6, 7,
125 8, 9, 10, 11, 12, 13, 14, 15,
126 16, 17, 18, 19, 20, 21, 22, 23,
127 24, 25, 26, 27, 28, 29, 30, 31,
128 0, 1, 2, 3, 4, 5, 6, 7,
129 8, 9, 10, 11, 12, 13, 14, 15,
130 16, 17, 18, 19, 20, 21, 22, 23,
131 24, 25, 26, 27, 28, 29, 30, 31,
132 32, 33, 34, 35, 36, 37, 38, 39,
133 40, 41, 42, 43, 44, 45, 46, 47,
134 48, 49, 50, 51, 52, 53, 54, 55,
135 56, 57, 58, 59, 60, 61, 62, 63,
136 0, 1, 2, 3, 4, 5, 6, 7,
137 8, 9, 10, 11, 12, 13, 14, 15,
138 16, 17, 18, 19, 20, 21, 22, 23,
139 24, 25, 26, 27, 28, 29, 30, 31,
140 32, 33, 34, 35, 36, 37, 38, 39,
141 40, 41, 42, 43, 44, 45, 46, 47,
142 48, 49, 50, 51, 52, 53, 54, 55,
143 56, 57, 58, 59, 60, 61, 62, 63,
144 64, 65, 66, 67, 68, 69, 70, 71,
145 72, 73, 74, 75, 76, 77, 78, 79,
146 80, 81, 82, 83, 84, 85, 86, 87,
147 88, 89, 90, 91, 92, 93, 94, 95,
148 96, 97, 98, 99, 100, 101, 102, 103,
149 104, 105, 106, 107, 108, 109, 110, 111,
150 112, 113, 114, 115, 116, 117, 118, 119,
151 120, 121, 122, 123, 124, 125, 126, 127
158 static uint32_t reverse(uint32_t key)
160 return ((reversed_byte[key & 0xff] << 24) |
161 (reversed_byte[(key >> 8) & 0xff] << 16) |
162 (reversed_byte[(key >> 16) & 0xff] << 8) |
163 (reversed_byte[(key >> 24) & 0xff]));
167 * Take the parent by discarding the highest bit that is set.
169 static uint32_t parent_of(uint32_t key)
171 if (key > 0x00ffffff)
172 return (key & 0x00ffffff) | (parent_byte[key >> 24] << 24);
174 if (key > 0x0000ffff)
175 return (key & 0x0000ffff) | (parent_byte[key >> 16] << 16);
177 if (key > 0x000000ff)
178 return (key & 0x000000ff) | (parent_byte[key >> 8] << 8);
180 return parent_byte[key];
184 static lrad_hash_entry_t *list_find(lrad_hash_table_t *ht,
185 lrad_hash_entry_t *head,
189 lrad_hash_entry_t *cur;
191 for (cur = head; cur != &ht->null; cur = cur->next) {
192 if (cur->reversed == reversed) {
194 int cmp = ht->cmp(data, cur->data);
196 if (cmp < 0) continue;
200 if (cur->reversed > reversed) break;
208 * Inserts a new entry into the list, in order.
210 static int list_insert(lrad_hash_table_t *ht,
211 lrad_hash_entry_t **head, lrad_hash_entry_t *node)
213 lrad_hash_entry_t **last, *cur;
217 for (cur = *head; cur != &ht->null; cur = cur->next) {
218 if (cur->reversed > node->reversed) break;
221 if (cur->reversed == node->reversed) {
223 int cmp = ht->cmp(node->data, cur->data);
225 if (cmp < 0) continue;
239 * Delete an entry from the list.
241 static int list_delete(lrad_hash_table_t *ht,
242 lrad_hash_entry_t **head, lrad_hash_entry_t *node)
244 lrad_hash_entry_t **last, *cur;
248 for (cur = *head; cur != &ht->null; cur = cur->next) {
251 int cmp = ht->cmp(node->data, cur->data);
253 if (cmp < 0) continue;
268 * Memory usage in bytes is (20/3) * number of entries.
270 lrad_hash_table_t *lrad_hash_table_create(lrad_hash_table_hash_t hashNode,
271 lrad_hash_table_cmp_t cmpNode,
272 lrad_hash_table_free_t freeNode)
274 lrad_hash_table_t *ht;
276 if (!hashNode) return NULL;
278 ht = malloc(sizeof(*ht));
279 if (!ht) return NULL;
281 memset(ht, 0, sizeof(*ht));
285 ht->num_buckets = LRAD_HASH_NUM_BUCKETS;
286 ht->mask = ht->num_buckets - 1;
289 * Have a default load factor of 2.5. In practice this
290 * means that the average load will hit 3 before the
293 ht->next_grow = (ht->num_buckets << 1) + (ht->num_buckets >> 1);
295 ht->buckets = malloc(sizeof(*ht->buckets) * ht->num_buckets);
300 memset(ht->buckets, 0, sizeof(*ht->buckets) * ht->num_buckets);
302 ht->null.reversed = ~0;
304 ht->null.next = &ht->null;
306 ht->buckets[0] = &ht->null;
313 * If the current bucket is uninitialized, initialize it
314 * by recursively copying information from the parent.
316 * We may have a situation where entry E is a parent to 2 other
317 * entries E' and E". If we split E into E and E', then the
318 * nodes meant for E" end up in E or E', either of which is
319 * wrong. To solve that problem, we walk down the whole chain,
320 * inserting the elements into the correct place.
322 static void lrad_hash_table_fixup(lrad_hash_table_t *ht, uint32_t entry)
324 uint32_t parent_entry = parent_of(entry);
325 lrad_hash_entry_t **last, *cur;
328 parent_entry = parent_of(entry);
330 /* parent_entry == entry if and only if entry == 0 */
332 if (!ht->buckets[parent_entry]) {
333 lrad_hash_table_fixup(ht, parent_entry);
337 * Keep walking down cur, trying to find entries that
338 * don't belong here any more. There may be multiple
339 * ones, so we can't have a naive algorithm...
341 last = &ht->buckets[parent_entry];
344 for (cur = *last; cur != &ht->null; cur = cur->next) {
347 real_entry = cur->key & ht->mask;
348 if (real_entry != this) { /* ht->buckets[real_entry] == NULL */
350 ht->buckets[real_entry] = cur;
358 * We may NOT have initialized this bucket, so do it now.
360 if (!ht->buckets[entry]) ht->buckets[entry] = &ht->null;
364 * This should be a power of two. Changing it to 4 doesn't seem
365 * to make any difference.
367 #define GROW_FACTOR (2)
370 * Grow the hash table.
372 static void lrad_hash_table_grow(lrad_hash_table_t *ht)
374 lrad_hash_entry_t **buckets;
376 buckets = malloc(sizeof(*buckets) * GROW_FACTOR * ht->num_buckets);
377 if (!buckets) return;
379 memcpy(buckets, ht->buckets,
380 sizeof(*buckets) * ht->num_buckets);
381 memset(&buckets[ht->num_buckets], 0,
382 sizeof(*buckets) * ht->num_buckets);
385 ht->buckets = buckets;
386 ht->num_buckets *= GROW_FACTOR;
387 ht->next_grow *= GROW_FACTOR;
388 ht->mask = ht->num_buckets - 1;
391 fprintf(stderr, "GROW TO %d\n", ht->num_buckets);
399 int lrad_hash_table_insert(lrad_hash_table_t *ht, void *data)
404 lrad_hash_entry_t *node;
406 if (!ht || !data) return 0;
408 key = ht->hash(data);
409 entry = key & ht->mask;
410 reversed = reverse(key);
412 if (!ht->buckets[entry]) lrad_hash_table_fixup(ht, entry);
415 * If we try to do our own memory allocation here, the
416 * speedup is only ~15% or so, which isn't worth it.
418 node = malloc(sizeof(*node));
420 memset(node, 0, sizeof(*node));
422 node->next = &ht->null;
423 node->reversed = reversed;
427 /* already in the table, can't insert it */
428 if (!list_insert(ht, &ht->buckets[entry], node)) {
434 * Check the load factor, and grow the table if
438 if (ht->num_elements >= ht->next_grow) {
439 lrad_hash_table_grow(ht);
447 * Internal find a node routine.
449 static lrad_hash_entry_t *lrad_hash_table_find(lrad_hash_table_t *ht,
456 if (!ht) return NULL;
458 key = ht->hash(data);
459 entry = key & ht->mask;
460 reversed = reverse(key);
462 if (!ht->buckets[entry]) lrad_hash_table_fixup(ht, entry);
464 return list_find(ht, ht->buckets[entry], reversed, data);
469 * Replace old data with new data, OR insert if there is no old.
471 int lrad_hash_table_replace(lrad_hash_table_t *ht, void *data)
473 lrad_hash_entry_t *node;
475 if (!ht || !data) return 0;
477 node = lrad_hash_table_find(ht, data);
478 if (!node) return lrad_hash_table_insert(ht, data);
480 if (ht->free) ht->free(node->data);
488 * Find data from a template
490 void *lrad_hash_table_finddata(lrad_hash_table_t *ht, const void *data)
492 lrad_hash_entry_t *node;
494 node = lrad_hash_table_find(ht, data);
495 if (!node) return NULL;
503 * Yank an entry from the hash table, without freeing the data.
505 void *lrad_hash_table_yank(lrad_hash_table_t *ht, const void *data)
511 lrad_hash_entry_t *node;
513 if (!ht) return NULL;
515 key = ht->hash(data);
516 entry = key & ht->mask;
517 reversed = reverse(key);
519 if (!ht->buckets[entry]) lrad_hash_table_fixup(ht, entry);
521 node = list_find(ht, ht->buckets[entry], reversed, data);
522 if (!node) return NULL;
524 list_delete(ht, &ht->buckets[entry], node);
535 * Delete a piece of data from the hash table.
537 int lrad_hash_table_delete(lrad_hash_table_t *ht, const void *data)
541 old = lrad_hash_table_yank(ht, data);
544 if (ht->free) ht->free(old);
553 void lrad_hash_table_free(lrad_hash_table_t *ht)
555 lrad_hash_entry_t *node, *next;
560 * The entries MUST be all in one linked list.
562 for (node = ht->buckets[0]; node != &ht->null; node = next) {
565 if (!node->data) continue; /* dummy entry */
567 if (ht->free) ht->free(node->data);
577 * Count number of elements
579 int lrad_hash_table_num_elements(lrad_hash_table_t *ht)
583 return ht->num_elements;
588 * Walk over the nodes, allowing deletes & inserts to happen.
590 int lrad_hash_table_walk(lrad_hash_table_t *ht,
591 lrad_hash_table_walk_t callback,
596 if (!ht || !callback) return 0;
598 for (i = ht->num_buckets - 1; i >= 0; i--) {
599 lrad_hash_entry_t *node, *next;
602 * Ensure that the current bucket is filled.
604 if (!ht->buckets[i]) lrad_hash_table_fixup(ht, i);
606 for (node = ht->buckets[i]; node != &ht->null; node = next) {
609 rcode = callback(context, node->data);
610 if (rcode != 0) return rcode;
620 * Show what the hash table is doing.
622 int lrad_hash_table_info(lrad_hash_table_t *ht)
624 int i, a, collisions, uninitialized;
629 uninitialized = collisions = 0;
630 memset(array, 0, sizeof(array));
632 for (i = 0; i < ht->num_buckets; i++) {
635 lrad_hash_entry_t *node, *next;
638 * If we haven't inserted or looked up an entry
639 * in a bucket, it's uninitialized.
641 if (!ht->buckets[i]) {
648 for (node = ht->buckets[i]; node != &ht->null; node = next) {
649 if (node->reversed == key) {
652 key = node->reversed;
658 if (load > 255) load = 255;
662 printf("HASH TABLE %p\tbuckets: %d\t(%d uninitialized)\n", ht,
663 ht->num_buckets, uninitialized);
664 printf("\tnum entries %d\thash collisions %d\n",
665 ht->num_elements, collisions);
668 for (i = 1; i < 256; i++) {
669 if (!array[i]) continue;
670 printf("%d\t%d\n", i, array[i]);
673 * Since the entries are ordered, the lookup cost
674 * for any one element in a chain is (on average)
675 * the cost of walking half of the chain.
684 printf("\texpected lookup cost = %d/%d or %f\n\n",
686 (float) ht->num_elements / (float) a);
693 #define FNV_MAGIC_INIT (0x811c9dc5)
694 #define FNV_MAGIC_PRIME (0x01000193)
697 * A fast hash function. For details, see:
699 * http://www.isthe.com/chongo/tech/comp/fnv/
701 * Which also includes public domain source. We've re-written
702 * it here for our purposes.
704 uint32_t lrad_hash(const void *data, size_t size)
706 const uint8_t *p = data;
707 const uint8_t *q = p + size;
708 uint32_t hash = FNV_MAGIC_INIT;
711 * FNV-1 hash each octet in the buffer
715 * Multiple by 32-bit magic FNV prime, mod 2^32
717 hash *= FNV_MAGIC_PRIME;
720 * Potential optimization.
722 hash += (hash<<1) + (hash<<4) + (hash<<7) + (hash<<8) + (hash<<24);
725 * XOR the 8-bit quantity into the bottom of
728 hash ^= (uint32_t) (*p++);
735 * Continue hashing data.
737 uint32_t lrad_hash_update(const void *data, size_t size, uint32_t hash)
739 const uint8_t *p = data;
740 const uint8_t *q = p + size;
743 hash *= FNV_MAGIC_PRIME;
744 hash ^= (uint32_t) (*p++);
752 * Return a "folded" hash, where the lower "bits" are the
753 * hash, and the upper bits are zero.
755 * If you need a non-power-of-two hash, cope.
757 uint32_t lrad_hash_fold(uint32_t hash, int bits)
762 if ((bits <= 0) || (bits >= 32)) return hash;
767 * Never use the same bits twice in an xor.
769 for (count = 0; count < 32; count += bits) {
774 return result & (((uint32_t) (1 << bits)) - 1);
779 * Hash a C string, so we loop over it once.
781 uint32_t lrad_hash_string(const char *p)
783 uint32_t hash = FNV_MAGIC_INIT;
786 hash *= FNV_MAGIC_PRIME;
787 hash ^= (uint32_t) (*p++);
796 * cc -DTESTING -I .. hash.c -o hash
804 static uint32_t hash_int(const void *data)
806 return lrad_hash((int *) data, sizeof(int));
809 #define MAX 1024*1024
810 int main(int argc, char **argv)
813 lrad_hash_table_t *ht;
816 ht = lrad_hash_table_create(hash_int, NULL, NULL);
818 fprintf(stderr, "Hash create failed\n");
822 array = malloc(sizeof(int) * MAX);
824 for (i = 0; i < MAX; i++) {
828 if (!lrad_hash_table_insert(ht, p)) {
829 fprintf(stderr, "Failed insert %08x\n", i);
833 q = lrad_hash_table_finddata(ht, p);
835 fprintf(stderr, "Bad data %d\n", i);
841 lrad_hash_table_info(ht);
844 * Build this to see how lookups result in shortening
845 * of the hash chains.
848 for (i = 0; i < MAX ; i++) {
849 q = lrad_hash_table_finddata(ht, &i);
851 fprintf(stderr, "Failed finding %d\n", i);
856 if (!lrad_hash_table_delete(ht, &i)) {
857 fprintf(stderr, "Failed deleting %d\n", i);
860 q = lrad_hash_table_finddata(ht, &i);
862 fprintf(stderr, "Failed to delete %08x\n", i);
868 lrad_hash_table_info(ht);
871 lrad_hash_table_free(ht);