2 * hash.c Non-thread-safe split-ordered hash table.
4 * The weird "reverse" function is based on an idea from
5 * "Split-Ordered Lists - Lock-free Resizable Hash Tables", with
6 * modifications so that they're not lock-free. :(
8 * However, the split-order idea allows a fast & easy splitting of the
9 * hash bucket chain when the hash table is resized. Without it, we'd
10 * have to check & update the pointers for every node in the buck chain,
11 * rather than being able to move 1/2 of the entries in the chain with
16 * This library is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU Lesser General Public
18 * License as published by the Free Software Foundation; either
19 * version 2.1 of the License, or (at your option) any later version.
21 * This library is distributed in the hope that it will be useful,
22 * but WITHOUT ANY WARRANTY; without even the implied warranty of
23 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24 * Lesser General Public License for more details.
26 * You should have received a copy of the GNU Lesser General Public
27 * License along with this library; if not, write to the Free Software
28 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
30 * Copyright 2005,2006 The FreeRADIUS server project
33 #include <freeradius-devel/ident.h>
36 #include <freeradius-devel/autoconf.h>
41 #include <freeradius-devel/missing.h>
42 #include <freeradius-devel/hash.h>
45 * A reasonable number of buckets to start off with.
46 * Should be a power of two.
48 #define LRAD_HASH_NUM_BUCKETS (64)
50 typedef struct lrad_hash_entry_t {
51 struct lrad_hash_entry_t *next;
58 struct lrad_hash_table_t {
60 int num_buckets; /* power of 2 */
64 lrad_hash_table_free_t free;
65 lrad_hash_table_hash_t hash;
66 lrad_hash_table_cmp_t cmp;
68 lrad_hash_entry_t null;
70 lrad_hash_entry_t **buckets;
80 * perl -e 'foreach $i (0..255) {$r = 0; foreach $j (0 .. 7 ) { if (($i & ( 1<< $j)) != 0) { $r |= (1 << (7 - $j));}} print $r, ", ";if (($i & 7) == 7) {print "\n";}}'
82 static const uint8_t reversed_byte[256] = {
83 0, 128, 64, 192, 32, 160, 96, 224,
84 16, 144, 80, 208, 48, 176, 112, 240,
85 8, 136, 72, 200, 40, 168, 104, 232,
86 24, 152, 88, 216, 56, 184, 120, 248,
87 4, 132, 68, 196, 36, 164, 100, 228,
88 20, 148, 84, 212, 52, 180, 116, 244,
89 12, 140, 76, 204, 44, 172, 108, 236,
90 28, 156, 92, 220, 60, 188, 124, 252,
91 2, 130, 66, 194, 34, 162, 98, 226,
92 18, 146, 82, 210, 50, 178, 114, 242,
93 10, 138, 74, 202, 42, 170, 106, 234,
94 26, 154, 90, 218, 58, 186, 122, 250,
95 6, 134, 70, 198, 38, 166, 102, 230,
96 22, 150, 86, 214, 54, 182, 118, 246,
97 14, 142, 78, 206, 46, 174, 110, 238,
98 30, 158, 94, 222, 62, 190, 126, 254,
99 1, 129, 65, 193, 33, 161, 97, 225,
100 17, 145, 81, 209, 49, 177, 113, 241,
101 9, 137, 73, 201, 41, 169, 105, 233,
102 25, 153, 89, 217, 57, 185, 121, 249,
103 5, 133, 69, 197, 37, 165, 101, 229,
104 21, 149, 85, 213, 53, 181, 117, 245,
105 13, 141, 77, 205, 45, 173, 109, 237,
106 29, 157, 93, 221, 61, 189, 125, 253,
107 3, 131, 67, 195, 35, 163, 99, 227,
108 19, 147, 83, 211, 51, 179, 115, 243,
109 11, 139, 75, 203, 43, 171, 107, 235,
110 27, 155, 91, 219, 59, 187, 123, 251,
111 7, 135, 71, 199, 39, 167, 103, 231,
112 23, 151, 87, 215, 55, 183, 119, 247,
113 15, 143, 79, 207, 47, 175, 111, 239,
114 31, 159, 95, 223, 63, 191, 127, 255
119 * perl -e 'foreach $i (0..255) {$r = 0;foreach $j (0 .. 7) { $r = $i & (1 << (7 - $j)); last if ($r)} print $i & ~($r), ", ";if (($i & 7) == 7) {print "\n";}}'
121 static uint8_t parent_byte[256] = {
122 0, 0, 0, 1, 0, 1, 2, 3,
123 0, 1, 2, 3, 4, 5, 6, 7,
124 0, 1, 2, 3, 4, 5, 6, 7,
125 8, 9, 10, 11, 12, 13, 14, 15,
126 0, 1, 2, 3, 4, 5, 6, 7,
127 8, 9, 10, 11, 12, 13, 14, 15,
128 16, 17, 18, 19, 20, 21, 22, 23,
129 24, 25, 26, 27, 28, 29, 30, 31,
130 0, 1, 2, 3, 4, 5, 6, 7,
131 8, 9, 10, 11, 12, 13, 14, 15,
132 16, 17, 18, 19, 20, 21, 22, 23,
133 24, 25, 26, 27, 28, 29, 30, 31,
134 32, 33, 34, 35, 36, 37, 38, 39,
135 40, 41, 42, 43, 44, 45, 46, 47,
136 48, 49, 50, 51, 52, 53, 54, 55,
137 56, 57, 58, 59, 60, 61, 62, 63,
138 0, 1, 2, 3, 4, 5, 6, 7,
139 8, 9, 10, 11, 12, 13, 14, 15,
140 16, 17, 18, 19, 20, 21, 22, 23,
141 24, 25, 26, 27, 28, 29, 30, 31,
142 32, 33, 34, 35, 36, 37, 38, 39,
143 40, 41, 42, 43, 44, 45, 46, 47,
144 48, 49, 50, 51, 52, 53, 54, 55,
145 56, 57, 58, 59, 60, 61, 62, 63,
146 64, 65, 66, 67, 68, 69, 70, 71,
147 72, 73, 74, 75, 76, 77, 78, 79,
148 80, 81, 82, 83, 84, 85, 86, 87,
149 88, 89, 90, 91, 92, 93, 94, 95,
150 96, 97, 98, 99, 100, 101, 102, 103,
151 104, 105, 106, 107, 108, 109, 110, 111,
152 112, 113, 114, 115, 116, 117, 118, 119,
153 120, 121, 122, 123, 124, 125, 126, 127
160 static uint32_t reverse(uint32_t key)
162 return ((reversed_byte[key & 0xff] << 24) |
163 (reversed_byte[(key >> 8) & 0xff] << 16) |
164 (reversed_byte[(key >> 16) & 0xff] << 8) |
165 (reversed_byte[(key >> 24) & 0xff]));
169 * Take the parent by discarding the highest bit that is set.
171 static uint32_t parent_of(uint32_t key)
173 if (key > 0x00ffffff)
174 return (key & 0x00ffffff) | (parent_byte[key >> 24] << 24);
176 if (key > 0x0000ffff)
177 return (key & 0x0000ffff) | (parent_byte[key >> 16] << 16);
179 if (key > 0x000000ff)
180 return (key & 0x000000ff) | (parent_byte[key >> 8] << 8);
182 return parent_byte[key];
186 static lrad_hash_entry_t *list_find(lrad_hash_table_t *ht,
187 lrad_hash_entry_t *head,
191 lrad_hash_entry_t *cur;
193 for (cur = head; cur != &ht->null; cur = cur->next) {
194 if (cur->reversed == reversed) {
196 int cmp = ht->cmp(data, cur->data);
198 if (cmp < 0) continue;
202 if (cur->reversed > reversed) break;
210 * Inserts a new entry into the list, in order.
212 static int list_insert(lrad_hash_table_t *ht,
213 lrad_hash_entry_t **head, lrad_hash_entry_t *node)
215 lrad_hash_entry_t **last, *cur;
219 for (cur = *head; cur != &ht->null; cur = cur->next) {
220 if (cur->reversed > node->reversed) break;
223 if (cur->reversed == node->reversed) {
225 int cmp = ht->cmp(node->data, cur->data);
227 if (cmp < 0) continue;
241 * Delete an entry from the list.
243 static int list_delete(lrad_hash_table_t *ht,
244 lrad_hash_entry_t **head, lrad_hash_entry_t *node)
246 lrad_hash_entry_t **last, *cur;
250 for (cur = *head; cur != &ht->null; cur = cur->next) {
253 int cmp = ht->cmp(node->data, cur->data);
255 if (cmp < 0) continue;
270 * Memory usage in bytes is (20/3) * number of entries.
272 lrad_hash_table_t *lrad_hash_table_create(lrad_hash_table_hash_t hashNode,
273 lrad_hash_table_cmp_t cmpNode,
274 lrad_hash_table_free_t freeNode)
276 lrad_hash_table_t *ht;
278 if (!hashNode) return NULL;
280 ht = malloc(sizeof(*ht));
281 if (!ht) return NULL;
283 memset(ht, 0, sizeof(*ht));
287 ht->num_buckets = LRAD_HASH_NUM_BUCKETS;
288 ht->mask = ht->num_buckets - 1;
291 * Have a default load factor of 2.5. In practice this
292 * means that the average load will hit 3 before the
295 ht->next_grow = (ht->num_buckets << 1) + (ht->num_buckets >> 1);
297 ht->buckets = malloc(sizeof(*ht->buckets) * ht->num_buckets);
302 memset(ht->buckets, 0, sizeof(*ht->buckets) * ht->num_buckets);
304 ht->null.reversed = ~0;
306 ht->null.next = &ht->null;
308 ht->buckets[0] = &ht->null;
315 * If the current bucket is uninitialized, initialize it
316 * by recursively copying information from the parent.
318 * We may have a situation where entry E is a parent to 2 other
319 * entries E' and E". If we split E into E and E', then the
320 * nodes meant for E" end up in E or E', either of which is
321 * wrong. To solve that problem, we walk down the whole chain,
322 * inserting the elements into the correct place.
324 static void lrad_hash_table_fixup(lrad_hash_table_t *ht, uint32_t entry)
326 uint32_t parent_entry = parent_of(entry);
327 lrad_hash_entry_t **last, *cur;
330 parent_entry = parent_of(entry);
332 /* parent_entry == entry if and only if entry == 0 */
334 if (!ht->buckets[parent_entry]) {
335 lrad_hash_table_fixup(ht, parent_entry);
339 * Keep walking down cur, trying to find entries that
340 * don't belong here any more. There may be multiple
341 * ones, so we can't have a naive algorithm...
343 last = &ht->buckets[parent_entry];
346 for (cur = *last; cur != &ht->null; cur = cur->next) {
349 real_entry = cur->key & ht->mask;
350 if (real_entry != this) { /* ht->buckets[real_entry] == NULL */
352 ht->buckets[real_entry] = cur;
360 * We may NOT have initialized this bucket, so do it now.
362 if (!ht->buckets[entry]) ht->buckets[entry] = &ht->null;
366 * This should be a power of two. Changing it to 4 doesn't seem
367 * to make any difference.
369 #define GROW_FACTOR (2)
372 * Grow the hash table.
374 static void lrad_hash_table_grow(lrad_hash_table_t *ht)
376 lrad_hash_entry_t **buckets;
378 buckets = malloc(sizeof(*buckets) * GROW_FACTOR * ht->num_buckets);
379 if (!buckets) return;
381 memcpy(buckets, ht->buckets,
382 sizeof(*buckets) * ht->num_buckets);
383 memset(&buckets[ht->num_buckets], 0,
384 sizeof(*buckets) * ht->num_buckets);
387 ht->buckets = buckets;
388 ht->num_buckets *= GROW_FACTOR;
389 ht->next_grow *= GROW_FACTOR;
390 ht->mask = ht->num_buckets - 1;
393 fprintf(stderr, "GROW TO %d\n", ht->num_buckets);
401 int lrad_hash_table_insert(lrad_hash_table_t *ht, void *data)
406 lrad_hash_entry_t *node;
408 if (!ht || !data) return 0;
410 key = ht->hash(data);
411 entry = key & ht->mask;
412 reversed = reverse(key);
414 if (!ht->buckets[entry]) lrad_hash_table_fixup(ht, entry);
417 * If we try to do our own memory allocation here, the
418 * speedup is only ~15% or so, which isn't worth it.
420 node = malloc(sizeof(*node));
422 memset(node, 0, sizeof(*node));
424 node->next = &ht->null;
425 node->reversed = reversed;
429 /* already in the table, can't insert it */
430 if (!list_insert(ht, &ht->buckets[entry], node)) {
436 * Check the load factor, and grow the table if
440 if (ht->num_elements >= ht->next_grow) {
441 lrad_hash_table_grow(ht);
449 * Internal find a node routine.
451 static lrad_hash_entry_t *lrad_hash_table_find(lrad_hash_table_t *ht,
458 if (!ht) return NULL;
460 key = ht->hash(data);
461 entry = key & ht->mask;
462 reversed = reverse(key);
464 if (!ht->buckets[entry]) lrad_hash_table_fixup(ht, entry);
466 return list_find(ht, ht->buckets[entry], reversed, data);
471 * Replace old data with new data, OR insert if there is no old.
473 int lrad_hash_table_replace(lrad_hash_table_t *ht, void *data)
475 lrad_hash_entry_t *node;
477 if (!ht || !data) return 0;
479 node = lrad_hash_table_find(ht, data);
480 if (!node) return lrad_hash_table_insert(ht, data);
482 if (ht->free) ht->free(node->data);
490 * Find data from a template
492 void *lrad_hash_table_finddata(lrad_hash_table_t *ht, const void *data)
494 lrad_hash_entry_t *node;
496 node = lrad_hash_table_find(ht, data);
497 if (!node) return NULL;
505 * Yank an entry from the hash table, without freeing the data.
507 void *lrad_hash_table_yank(lrad_hash_table_t *ht, const void *data)
513 lrad_hash_entry_t *node;
515 if (!ht) return NULL;
517 key = ht->hash(data);
518 entry = key & ht->mask;
519 reversed = reverse(key);
521 if (!ht->buckets[entry]) lrad_hash_table_fixup(ht, entry);
523 node = list_find(ht, ht->buckets[entry], reversed, data);
524 if (!node) return NULL;
526 list_delete(ht, &ht->buckets[entry], node);
537 * Delete a piece of data from the hash table.
539 int lrad_hash_table_delete(lrad_hash_table_t *ht, const void *data)
543 old = lrad_hash_table_yank(ht, data);
546 if (ht->free) ht->free(old);
555 void lrad_hash_table_free(lrad_hash_table_t *ht)
558 lrad_hash_entry_t *node, *next;
563 * Walk over the buckets, freeing them all.
565 for (i = 0; i < ht->num_buckets; i++) {
566 if (ht->buckets[i]) for (node = ht->buckets[i];
571 if (!node->data) continue; /* dummy entry */
573 if (ht->free) ht->free(node->data);
584 * Count number of elements
586 int lrad_hash_table_num_elements(lrad_hash_table_t *ht)
590 return ht->num_elements;
595 * Walk over the nodes, allowing deletes & inserts to happen.
597 int lrad_hash_table_walk(lrad_hash_table_t *ht,
598 lrad_hash_table_walk_t callback,
603 if (!ht || !callback) return 0;
605 for (i = ht->num_buckets - 1; i >= 0; i--) {
606 lrad_hash_entry_t *node, *next;
609 * Ensure that the current bucket is filled.
611 if (!ht->buckets[i]) lrad_hash_table_fixup(ht, i);
613 for (node = ht->buckets[i]; node != &ht->null; node = next) {
616 rcode = callback(context, node->data);
617 if (rcode != 0) return rcode;
627 * Show what the hash table is doing.
629 int lrad_hash_table_info(lrad_hash_table_t *ht)
631 int i, a, collisions, uninitialized;
636 uninitialized = collisions = 0;
637 memset(array, 0, sizeof(array));
639 for (i = 0; i < ht->num_buckets; i++) {
642 lrad_hash_entry_t *node, *next;
645 * If we haven't inserted or looked up an entry
646 * in a bucket, it's uninitialized.
648 if (!ht->buckets[i]) {
655 for (node = ht->buckets[i]; node != &ht->null; node = next) {
656 if (node->reversed == key) {
659 key = node->reversed;
665 if (load > 255) load = 255;
669 printf("HASH TABLE %p\tbuckets: %d\t(%d uninitialized)\n", ht,
670 ht->num_buckets, uninitialized);
671 printf("\tnum entries %d\thash collisions %d\n",
672 ht->num_elements, collisions);
675 for (i = 1; i < 256; i++) {
676 if (!array[i]) continue;
677 printf("%d\t%d\n", i, array[i]);
680 * Since the entries are ordered, the lookup cost
681 * for any one element in a chain is (on average)
682 * the cost of walking half of the chain.
691 printf("\texpected lookup cost = %d/%d or %f\n\n",
693 (float) ht->num_elements / (float) a);
700 #define FNV_MAGIC_INIT (0x811c9dc5)
701 #define FNV_MAGIC_PRIME (0x01000193)
704 * A fast hash function. For details, see:
706 * http://www.isthe.com/chongo/tech/comp/fnv/
708 * Which also includes public domain source. We've re-written
709 * it here for our purposes.
711 uint32_t lrad_hash(const void *data, size_t size)
713 const uint8_t *p = data;
714 const uint8_t *q = p + size;
715 uint32_t hash = FNV_MAGIC_INIT;
718 * FNV-1 hash each octet in the buffer
722 * Multiple by 32-bit magic FNV prime, mod 2^32
724 hash *= FNV_MAGIC_PRIME;
727 * Potential optimization.
729 hash += (hash<<1) + (hash<<4) + (hash<<7) + (hash<<8) + (hash<<24);
732 * XOR the 8-bit quantity into the bottom of
735 hash ^= (uint32_t) (*p++);
742 * Continue hashing data.
744 uint32_t lrad_hash_update(const void *data, size_t size, uint32_t hash)
746 const uint8_t *p = data;
747 const uint8_t *q = p + size;
750 hash *= FNV_MAGIC_PRIME;
751 hash ^= (uint32_t) (*p++);
759 * Return a "folded" hash, where the lower "bits" are the
760 * hash, and the upper bits are zero.
762 * If you need a non-power-of-two hash, cope.
764 uint32_t lrad_hash_fold(uint32_t hash, int bits)
769 if ((bits <= 0) || (bits >= 32)) return hash;
774 * Never use the same bits twice in an xor.
776 for (count = 0; count < 32; count += bits) {
781 return result & (((uint32_t) (1 << bits)) - 1);
786 * Hash a C string, so we loop over it once.
788 uint32_t lrad_hash_string(const char *p)
790 uint32_t hash = FNV_MAGIC_INIT;
793 hash *= FNV_MAGIC_PRIME;
794 hash ^= (uint32_t) (*p++);
803 * cc -g -DTESTING -I ../include hash.c -o hash
811 static uint32_t hash_int(const void *data)
813 return lrad_hash((int *) data, sizeof(int));
816 #define MAX 1024*1024
817 int main(int argc, char **argv)
820 lrad_hash_table_t *ht;
823 ht = lrad_hash_table_create(hash_int, NULL, NULL);
825 fprintf(stderr, "Hash create failed\n");
829 array = malloc(sizeof(int) * MAX);
832 for (i = 0; i < MAX; i++) {
836 if (!lrad_hash_table_insert(ht, p)) {
837 fprintf(stderr, "Failed insert %08x\n", i);
841 q = lrad_hash_table_finddata(ht, p);
843 fprintf(stderr, "Bad data %d\n", i);
849 lrad_hash_table_info(ht);
852 * Build this to see how lookups result in shortening
853 * of the hash chains.
856 for (i = 0; i < MAX ; i++) {
857 q = lrad_hash_table_finddata(ht, &i);
859 fprintf(stderr, "Failed finding %d\n", i);
864 if (!lrad_hash_table_delete(ht, &i)) {
865 fprintf(stderr, "Failed deleting %d\n", i);
868 q = lrad_hash_table_finddata(ht, &i);
870 fprintf(stderr, "Failed to delete %08x\n", i);
876 lrad_hash_table_info(ht);
879 lrad_hash_table_free(ht);