Do waitpid() for ALL children, even if there are no threads waiting
[freeradius.git] / src / main / threads.c
index bbc2d0d..130fe9e 100644 (file)
  *   along with this program; if not, write to the Free Software
  *   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
  *
- * Copyright 2000  The FreeRADIUS server project
+ * Copyright 2000,2006  The FreeRADIUS server project
  * Copyright 2000  Alan DeKok <aland@ox.org>
  */
 
-#include <freeradius-devel/autoconf.h>
+#include <freeradius-devel/ident.h>
+RCSID("$Id$")
 
-#include <stdlib.h>
-#include <string.h>
+#include <freeradius-devel/radiusd.h>
+#include <freeradius-devel/rad_assert.h>
 
 /*
  *     Other OS's have sem_init, OS X doesn't.
  */
-#ifndef DARWIN
+#ifdef HAVE_SEMAPHORE_H
 #include <semaphore.h>
-#else
+#endif
+
+#ifdef DARWIN
 #include <mach/task.h>
 #include <mach/semaphore.h>
 
 #define sem_post(s) semaphore_signal(*s)
 #endif
 
-#include <signal.h>
-
 #ifdef HAVE_SYS_WAIT_H
 #include <sys/wait.h>
 #endif
 
-#include <freeradius-devel/radiusd.h>
-#include <freeradius-devel/rad_assert.h>
-#include <freeradius-devel/conffile.h>
-#include <freeradius-devel/modules.h>
-
-static const char rcsid[] =
-"$Id$";
-
 #ifdef HAVE_PTHREAD_H
 
 #ifdef HAVE_OPENSSL_CRYPTO_H
@@ -67,6 +60,9 @@ static const char rcsid[] =
 #ifdef HAVE_OPENSSL_ERR_H
 #include <openssl/err.h>
 #endif
+#ifdef HAVE_OPENSSL_EVP_H
+#include <openssl/evp.h>
+#endif
 
 #define SEMAPHORE_LOCKED       (0)
 #define SEMAPHORE_UNLOCKED     (1)
@@ -75,14 +71,8 @@ static const char rcsid[] =
 #define THREAD_CANCELLED       (2)
 #define THREAD_EXITED          (3)
 
-#define NUM_FIFOS               (2)
+#define NUM_FIFOS               RAD_LISTEN_MAX
 
-/*
- *     Ordered this way because we prefer proxy, then ongoing, then
- *     start.
- */
-#define FIFO_START   (1)
-#define FIFO_PROXY   (0)
 
 /*
  *  A data structure which contains the information about
@@ -106,14 +96,6 @@ typedef struct THREAD_HANDLE {
        REQUEST              *request;
 } THREAD_HANDLE;
 
-/*
- *     For the request queue.
- */
-typedef struct request_queue_t {
-       REQUEST           *request;
-       RAD_REQUEST_FUNP  fun;
-} request_queue_t;
-
 typedef struct thread_fork_t {
        pid_t           pid;
        int             status;
@@ -143,8 +125,10 @@ typedef struct THREAD_POOL {
        int cleanup_delay;
        int spawn_flag;
 
+#ifdef WNOHANG
        pthread_mutex_t wait_mutex;
-       lrad_hash_table_t *waiters;
+       fr_hash_table_t *waiters;
+#endif
 
        /*
         *      All threads wait on this semaphore, for requests
@@ -159,13 +143,14 @@ typedef struct THREAD_POOL {
 
        int             max_queue_size;
        int             num_queued;
-       int             fifo_state;
-       lrad_fifo_t     *fifo[NUM_FIFOS];
+       fr_fifo_t       *fifo[NUM_FIFOS];
 } THREAD_POOL;
 
 static THREAD_POOL thread_pool;
 static int pool_initialized = FALSE;
+static time_t last_cleaned = 0;
 
+static void thread_pool_manage(time_t now);
 
 /*
  *     A mapping of configuration file names to internal integers
@@ -219,6 +204,13 @@ static int setup_ssl_mutexes(void)
 {
        int i;
 
+#ifdef HAVE_OPENSSL_EVP_H
+       /*
+        *      Enable all ciphers and digests.
+        */
+       OpenSSL_add_all_algorithms();
+#endif
+
        ssl_mutexes = rad_malloc(CRYPTO_num_locks() * sizeof(pthread_mutex_t));
        if (!ssl_mutexes) {
                radlog(L_ERR, "Error allocating memory for SSL mutexes!");
@@ -236,7 +228,7 @@ static int setup_ssl_mutexes(void)
 }
 #endif
 
-
+#ifdef WNOHANG
 /*
  *     We don't want to catch SIGCHLD for a host of reasons.
  *
@@ -259,24 +251,27 @@ static void reap_children(void)
        int status;
        thread_fork_t mytf, *tf;
 
-       if (lrad_hash_table_num_elements(thread_pool.waiters) == 0) return;
 
        pthread_mutex_lock(&thread_pool.wait_mutex);
 
-       while (1) {
+       do {
+       retry:
                pid = waitpid(0, &status, WNOHANG);
                if (pid <= 0) break;
 
                mytf.pid = pid;
-               tf = lrad_hash_table_finddata(thread_pool.waiters, &mytf);
-               if (!tf) continue;
-               
+               tf = fr_hash_table_finddata(thread_pool.waiters, &mytf);
+               if (!tf) goto retry;
+
                tf->status = status;
                tf->exited = 1;
-       }
+       } while (fr_hash_table_num_elements(thread_pool.waiters) > 0);
 
        pthread_mutex_unlock(&thread_pool.wait_mutex);
 }
+#else
+#define reap_children()
+#endif /* WNOHANG */
 
 /*
  *     Add a request to the list of waiting requests.
@@ -286,50 +281,29 @@ static void reap_children(void)
  */
 static int request_enqueue(REQUEST *request, RAD_REQUEST_FUNP fun)
 {
-       int fifo = FIFO_START;
-       request_queue_t *entry;
-
+       rad_assert(request->process == fun);
        pthread_mutex_lock(&thread_pool.queue_mutex);
 
        thread_pool.request_count++;
 
-       /*
-        *      FIXME: Handle proxy replies separately?
-        */
        if (thread_pool.num_queued >= thread_pool.max_queue_size) {
                pthread_mutex_unlock(&thread_pool.queue_mutex);
-               
+
                /*
                 *      Mark the request as done.
                 */
-               radlog(L_ERR|L_CONS, "!!! ERROR !!! The server is blocked: discarding new request %d", request->number);
-               request->finished = TRUE;
+               radlog(L_ERR, "!!! ERROR !!! The server is blocked: discarding new request %d", request->number);
+               request->child_state = REQUEST_DONE;
                return 0;
        }
 
        /*
-        *      Requests get handled in priority.  First, we handle
-        *      replies from a home server, to finish ongoing requests.
-        *
-        *      Then, we handle requests with State, to finish
-        *      multi-packet transactions.
-        *
-        *      Finally, we handle new requests.
+        *      Push the request onto the appropriate fifo for that
         */
-       if (request->proxy_reply) {
-               fifo = FIFO_PROXY;
-       } else {
-               fifo = FIFO_START;
-       }
-
-       entry = rad_malloc(sizeof(*entry));
-       entry->request = request;
-       entry->fun = fun;
-
-       if (!lrad_fifo_push(thread_pool.fifo[fifo], entry)) {
+       if (!fr_fifo_push(thread_pool.fifo[request->priority], request)) {
                pthread_mutex_unlock(&thread_pool.queue_mutex);
                radlog(L_ERR, "!!! ERROR !!! Failed inserting request %d into the queue", request->number);
-               request->finished = TRUE;
+               request->child_state = REQUEST_DONE;
                return 0;
        }
 
@@ -354,54 +328,76 @@ static int request_enqueue(REQUEST *request, RAD_REQUEST_FUNP fun)
 /*
  *     Remove a request from the queue.
  */
-static int request_dequeue(REQUEST **request, RAD_REQUEST_FUNP *fun)
+static int request_dequeue(REQUEST **prequest, RAD_REQUEST_FUNP *fun)
 {
-       int fifo_state;
-       request_queue_t *entry;
+       RAD_LISTEN_TYPE i, start;
+       REQUEST *request = NULL;
 
        reap_children();
 
        pthread_mutex_lock(&thread_pool.queue_mutex);
 
-       fifo_state = thread_pool.fifo_state;
-
- retry:
-       do {
+       /*
+        *      Clear old requests from all queues.
+        *
+        *      We only do one pass over the queue, in order to
+        *      amortize the work across the child threads.  Since we
+        *      do N checks for one request de-queued, the old
+        *      requests will be quickly cleared.
+        */
+       for (i = 0; i < RAD_LISTEN_MAX; i++) {
+               request = fr_fifo_peek(thread_pool.fifo[i]);
+               if (!request ||
+                   (request->master_state != REQUEST_STOP_PROCESSING)) {
+                       continue;
+}
                /*
-                *      Pop an entry from the current queue, and go to
-                *      the next queue.
+                *      This entry was marked to be stopped.  Acknowledge it.
                 */
-               entry = lrad_fifo_pop(thread_pool.fifo[fifo_state]);
-               fifo_state++;
-               if (fifo_state >= NUM_FIFOS) fifo_state = 0;
-       } while ((fifo_state != thread_pool.fifo_state) && !entry);
+               request = fr_fifo_pop(thread_pool.fifo[i]);
+               rad_assert(request != NULL);
+               request->child_state = REQUEST_DONE;
+               thread_pool.num_queued--;
+       }
+
+       start = 0;
+ retry:
+       /*
+        *      Pop results from the top of the queue
+        */
+       for (i = start; i < RAD_LISTEN_MAX; i++) {
+               request = fr_fifo_pop(thread_pool.fifo[i]);
+               if (request) {
+                       start = i;
+                       break;
+               }
+       }
 
-       if (!entry) {
+       if (!request) {
                pthread_mutex_unlock(&thread_pool.queue_mutex);
-               *request = NULL;
+               *prequest = NULL;
                *fun = NULL;
                return 0;
        }
 
        rad_assert(thread_pool.num_queued > 0);
        thread_pool.num_queued--;
-       *request = entry->request;
-       *fun = entry->fun;
-       free(entry);
+       *prequest = request;
+       *fun = request->process;
 
-       rad_assert(*request != NULL);
-       rad_assert((*request)->magic == REQUEST_MAGIC);
+       rad_assert(request->magic == REQUEST_MAGIC);
        rad_assert(*fun != NULL);
 
        /*
         *      If the request has sat in the queue for too long,
         *      kill it.
         *
-        *      The main clean-up code won't delete the request from
-        *      the request list, until it's marked "finished"
+        *      The main clean-up code can't delete the request from
+        *      the queue, and therefore won't clean it up until we
+        *      have acknowledged it as "done".
         */
-       if ((*request)->options & RAD_REQUEST_OPTION_STOP_NOW) {
-               (*request)->finished = 1;
+       if (request->master_state == REQUEST_STOP_PROCESSING) {
+               request->child_state = REQUEST_DONE;
                goto retry;
        }
 
@@ -409,12 +405,9 @@ static int request_dequeue(REQUEST **request, RAD_REQUEST_FUNP *fun)
         *      The thread is currently processing a request.
         */
        thread_pool.active_threads++;
-       thread_pool.fifo_state = fifo_state;
 
        pthread_mutex_unlock(&thread_pool.queue_mutex);
 
-       rad_assert((*request)->child_pid == NO_SUCH_CHILD_PID);
-
        return 1;
 }
 
@@ -428,33 +421,11 @@ static void *request_handler_thread(void *arg)
 {
        RAD_REQUEST_FUNP  fun;
        THREAD_HANDLE     *self = (THREAD_HANDLE *) arg;
-#ifdef HAVE_PTHREAD_SIGMASK
-       sigset_t set;
-
-       /*
-        *      Block SIGHUP handling for the child threads.
-        *
-        *      This ensures that only the main server thread will
-        *      process HUP signals.
-        *
-        *      If we don't have sigprocmask, then it shouldn't be
-        *      a problem, either, as the sig_hup handler should check
-        *      for this condition.
-        */
-       sigemptyset(&set);
-       sigaddset(&set, SIGHUP);
-       sigaddset(&set, SIGINT);
-       sigaddset(&set, SIGQUIT);
-       sigaddset(&set, SIGTERM);
-       pthread_sigmask(SIG_BLOCK, &set, NULL);
-#endif
 
        /*
         *      Loop forever, until told to exit.
         */
        do {
-               int finished;
-
                /*
                 *      Wait to be signalled.
                 */
@@ -478,6 +449,13 @@ static void *request_handler_thread(void *arg)
 
                DEBUG2("Thread %d got semaphore", self->thread_num);
 
+#ifdef HAVE_OPENSSL_ERR_H
+               /*
+                *      Clear the error queue for the current thread.
+                */
+               ERR_clear_error ();
+#endif
+
                /*
                 *      Try to grab a request from the queue.
                 *
@@ -493,38 +471,12 @@ static void *request_handler_thread(void *arg)
                       self->thread_num, self->request->number,
                       self->request_count);
 
-               /*
-                *      Respond, and reset request->child_pid
-                */
-               finished = rad_respond(self->request, fun);
-               self->request = NULL;
+               radius_handle_request(self->request, fun);
 
                /*
                 *      Update the active threads.
                 */
                pthread_mutex_lock(&thread_pool.queue_mutex);
-
-               /*
-                *      We haven't replied to the client, but we HAVE
-                *      sent a proxied packet, and we have NOT
-                *      received a proxy response.  In that case, send
-                *      the proxied packet now.  Doing this in the mutex
-                *      avoids race conditions.
-                *
-                *      FIXME: this work should really depend on a
-                *      "state", and "next handler", rather than
-                *      horrid hacks like thise.
-                */
-               if (!self->request->reply->data &&
-                   self->request->proxy && self->request->proxy->data
-                   && !self->request->proxy_reply)
-                       self->request->proxy_listener->send(self->request->proxy_listener,
-                                                           (char *)self->request->proxysecret);
-
-               self->request->child_pid = NO_SUCH_CHILD_PID;
-               self->request->finished = finished;
-               self->request = NULL;
-               
                rad_assert(thread_pool.active_threads > 0);
                thread_pool.active_threads--;
                pthread_mutex_unlock(&thread_pool.queue_mutex);
@@ -544,6 +496,7 @@ static void *request_handler_thread(void *arg)
        /*
         *  Do this as the LAST thing before exiting.
         */
+       self->request = NULL;
        self->status = THREAD_EXITED;
 
        return NULL;
@@ -621,7 +574,6 @@ static THREAD_HANDLE *spawn_thread(time_t now)
        memset(handle, 0, sizeof(THREAD_HANDLE));
        handle->prev = NULL;
        handle->next = NULL;
-       handle->pthread_id = NO_SUCH_CHILD_PID;
        handle->thread_num = thread_pool.max_thread_num++;
        handle->request_count = 0;
        handle->status = THREAD_RUNNING;
@@ -647,9 +599,9 @@ static THREAD_HANDLE *spawn_thread(time_t now)
        rcode = pthread_create(&handle->pthread_id, &attr,
                        request_handler_thread, handle);
        if (rcode != 0) {
-               radlog(L_ERR|L_CONS, "FATAL: Thread create failed: %s",
+               radlog(L_ERR, "Thread create failed: %s",
                       strerror(rcode));
-               exit(1);
+               return NULL;
        }
        pthread_attr_destroy(&attr);
 
@@ -700,11 +652,12 @@ int total_active_threads(void)
 }
 
 
+#ifdef WNOHANG
 static uint32_t pid_hash(const void *data)
 {
        const thread_fork_t *tf = data;
 
-       return lrad_hash(&tf->pid, sizeof(tf->pid));
+       return fr_hash(&tf->pid, sizeof(tf->pid));
 }
 
 static int pid_cmp(const void *one, const void *two)
@@ -714,6 +667,7 @@ static int pid_cmp(const void *one, const void *two)
 
        return (a->pid - b->pid);
 }
+#endif
 
 /*
  *     Allocate the thread pool, and seed it with an initial number
@@ -721,63 +675,72 @@ static int pid_cmp(const void *one, const void *two)
  *
  *     FIXME: What to do on a SIGHUP???
  */
-int thread_pool_init(int spawn_flag)
+int thread_pool_init(CONF_SECTION *cs, int *spawn_flag)
 {
        int             i, rcode;
        CONF_SECTION    *pool_cf;
        time_t          now;
 
-       DEBUG("Initializing the thread pool...");
        now = time(NULL);
 
+       rad_assert(spawn_flag != NULL);
+       rad_assert(*spawn_flag == TRUE);
+       rad_assert(pool_initialized == FALSE); /* not called on HUP */
+
+       pool_cf = cf_subsection_find_next(cs, NULL, "thread");
+       if (!pool_cf) *spawn_flag = FALSE;
+
        /*
-        *      After a SIGHUP, we don't over-write the previous values.
+        *      Initialize the thread pool to some reasonable values.
         */
-       if (!pool_initialized) {
-               /*
-                *      Initialize the thread pool to some reasonable values.
-                */
-               memset(&thread_pool, 0, sizeof(THREAD_POOL));
-               thread_pool.head = NULL;
-               thread_pool.tail = NULL;
-               thread_pool.total_threads = 0;
-               thread_pool.max_thread_num = 1;
-               thread_pool.cleanup_delay = 5;
-               thread_pool.spawn_flag = spawn_flag;
-
-               if ((pthread_mutex_init(&thread_pool.wait_mutex,NULL) != 0)) {
-                       radlog(L_ERR, "FATAL: Failed to initialize wait mutex: %s",
-                              strerror(errno));
-                       exit(1);
-               }               
-               
-               /*
-                *      Create the hash table of child PID's
-                */
-               thread_pool.waiters = lrad_hash_table_create(pid_hash,
-                                                            pid_cmp,
-                                                            free);
-               if (!thread_pool.waiters) {
-                       radlog(L_ERR, "FATAL: Failed to set up wait hash");
-                       exit(1);
-               }
+       memset(&thread_pool, 0, sizeof(THREAD_POOL));
+       thread_pool.head = NULL;
+       thread_pool.tail = NULL;
+       thread_pool.total_threads = 0;
+       thread_pool.max_thread_num = 1;
+       thread_pool.cleanup_delay = 5;
+       thread_pool.spawn_flag = *spawn_flag;
+       
+       /*
+        *      Don't bother initializing the mutexes or
+        *      creating the hash tables.  They won't be used.
+        */
+       if (!*spawn_flag) return 0;
+       
+#ifdef WNOHANG
+       if ((pthread_mutex_init(&thread_pool.wait_mutex,NULL) != 0)) {
+               radlog(L_ERR, "FATAL: Failed to initialize wait mutex: %s",
+                      strerror(errno));
+               return -1;
        }
-
+       
        /*
-        *      We're not spawning new threads, don't do
-        *      anything.
+        *      Create the hash table of child PID's
         */
-       if (!spawn_flag) return 0;
+       thread_pool.waiters = fr_hash_table_create(pid_hash,
+                                                  pid_cmp,
+                                                  free);
+       if (!thread_pool.waiters) {
+               radlog(L_ERR, "FATAL: Failed to set up wait hash");
+               return -1;
+       }
+#endif
 
-       pool_cf = cf_section_find("thread");
-       if (pool_cf != NULL) {
-               /*
-                *      FIXME: Check for errors?
-                */
-               cf_section_parse(pool_cf, NULL, thread_config);
+       if (cf_section_parse(pool_cf, NULL, thread_config) < 0) {
+               return -1;
        }
 
        /*
+        *      Catch corner cases.
+        */
+       if (thread_pool.min_spare_threads < 1)
+               thread_pool.min_spare_threads = 1;
+       if (thread_pool.max_spare_threads < 1)
+               thread_pool.max_spare_threads = 1;
+       if (thread_pool.max_spare_threads < thread_pool.min_spare_threads)
+               thread_pool.max_spare_threads = thread_pool.min_spare_threads;
+
+       /*
         *      The pool has already been initialized.  Don't spawn
         *      new threads, and don't forget about forked children,
         */
@@ -791,26 +754,26 @@ int thread_pool_init(int spawn_flag)
        memset(&thread_pool.semaphore, 0, sizeof(thread_pool.semaphore));
        rcode = sem_init(&thread_pool.semaphore, 0, SEMAPHORE_LOCKED);
        if (rcode != 0) {
-               radlog(L_ERR|L_CONS, "FATAL: Failed to initialize semaphore: %s",
+               radlog(L_ERR, "FATAL: Failed to initialize semaphore: %s",
                       strerror(errno));
-               exit(1);
+               return -1;
        }
 
        rcode = pthread_mutex_init(&thread_pool.queue_mutex,NULL);
        if (rcode != 0) {
                radlog(L_ERR, "FATAL: Failed to initialize queue mutex: %s",
                       strerror(errno));
-               exit(1);
+               return -1;
        }
 
        /*
         *      Allocate multiple fifos.
         */
-       for (i = 0; i < NUM_FIFOS; i++) {
-               thread_pool.fifo[i] = lrad_fifo_create(65536, NULL);
+       for (i = 0; i < RAD_LISTEN_MAX; i++) {
+               thread_pool.fifo[i] = fr_fifo_create(65536, NULL);
                if (!thread_pool.fifo[i]) {
                        radlog(L_ERR, "FATAL: Failed to set up request fifo");
-                       exit(1);
+                       return -1;
                }
        }
 
@@ -821,7 +784,7 @@ int thread_pool_init(int spawn_flag)
         */
        if (!setup_ssl_mutexes()) {
                radlog(L_ERR, "FATAL: Failed to set up SSL mutexes");
-               exit(1);
+               return -1;
        }
 #endif
 
@@ -851,11 +814,24 @@ int thread_pool_init(int spawn_flag)
  */
 int thread_pool_addrequest(REQUEST *request, RAD_REQUEST_FUNP fun)
 {
+       time_t now = request->timestamp;
+
+       request->process = fun;
+
        /*
         *      We've been told not to spawn threads, so don't.
         */
        if (!thread_pool.spawn_flag) {
-               request->finished = rad_respond(request, fun);
+               radius_handle_request(request, fun);
+
+#ifdef WNOHANG
+               /*
+                *      Requests that care about child process exit
+                *      codes have already either called
+                *      rad_waitpid(), or they've given up.
+                */
+               wait(NULL);
+#endif
                return 1;
        }
 
@@ -865,22 +841,13 @@ int thread_pool_addrequest(REQUEST *request, RAD_REQUEST_FUNP fun)
        if (!request_enqueue(request, fun)) return 0;
 
        /*
-        *      If the thread pool is busy handling requests, then
-        *      try to spawn another one.  We don't acquire the mutex
-        *      before reading active_threads, so our thread count is
-        *      just an estimate.  It's fine to go ahead and spawn an
-        *      extra thread in that case.
-        *      NOTE: the log message may be in error since active_threads
-        *      is an estimate, but it's only in error about the thread
-        *      count, not about the fact that we can't create a new one.
+        *      If we haven't checked the number of child threads
+        *      in a while, OR if the thread pool appears to be full,
+        *      go manage it.
         */
-       if (thread_pool.active_threads == thread_pool.total_threads) {
-               if (spawn_thread(request->timestamp) == NULL) {
-                       radlog(L_INFO,
-                              "The maximum number of threads (%d) are active, cannot spawn new thread to handle request",
-                              thread_pool.max_threads);
-                       return 1;
-               }
+       if ((last_cleaned < now) ||
+           (thread_pool.active_threads == thread_pool.total_threads)) {
+               thread_pool_manage(now);
        }
 
        return 1;
@@ -892,28 +859,12 @@ int thread_pool_addrequest(REQUEST *request, RAD_REQUEST_FUNP fun)
  *     If there are too many or too few threads waiting, then we
  *     either create some more, or delete some.
  */
-int thread_pool_clean(time_t now)
+static void thread_pool_manage(time_t now)
 {
        int spare;
        int i, total;
        THREAD_HANDLE *handle, *next;
        int active_threads;
-       static time_t last_cleaned = 0;
-
-       /*
-        *      Loop over the thread pool deleting exited threads.
-        */
-       for (handle = thread_pool.head; handle; handle = next) {
-               next = handle->next;
-
-               /*
-                *      Maybe we've asked the thread to exit, and it
-                *      has agreed.
-                */
-               if (handle->status == THREAD_EXITED) {
-                       delete_thread(handle);
-               }
-       }
 
        /*
         *      We don't need a mutex lock here, as we're reading
@@ -937,26 +888,24 @@ int thread_pool_clean(time_t now)
        }
 
        /*
-        *      If there are too few spare threads, create some more.
+        *      If there are too few spare threads.  Go create some more.
         */
        if (spare < thread_pool.min_spare_threads) {
                total = thread_pool.min_spare_threads - spare;
 
                DEBUG2("Threads: Spawning %d spares", total);
+
                /*
                 *      Create a number of spare threads.
                 */
                for (i = 0; i < total; i++) {
                        handle = spawn_thread(now);
                        if (handle == NULL) {
-                               return -1;
+                               return;
                        }
                }
 
-               /*
-                *      And exit, as there can't be too many spare threads.
-                */
-               return 0;
+               return;         /* there aren't too many spare threads */
        }
 
        /*
@@ -964,17 +913,32 @@ int thread_pool_clean(time_t now)
         *      so this second.
         */
        if (now == last_cleaned) {
-               return 0;
+               return;
        }
        last_cleaned = now;
 
        /*
+        *      Loop over the thread pool, deleting exited threads.
+        */
+       for (handle = thread_pool.head; handle; handle = next) {
+               next = handle->next;
+
+               /*
+                *      Maybe we've asked the thread to exit, and it
+                *      has agreed.
+                */
+               if (handle->status == THREAD_EXITED) {
+                       delete_thread(handle);
+               }
+       }
+
+       /*
         *      Only delete the spare threads if sufficient time has
         *      passed since we last created one.  This helps to minimize
         *      the amount of create/delete cycles.
         */
        if ((now - thread_pool.time_last_spawned) < thread_pool.cleanup_delay) {
-               return 0;
+               return;
        }
 
        /*
@@ -1044,22 +1008,23 @@ int thread_pool_clean(time_t now)
         *      Otherwise everything's kosher.  There are not too few,
         *      or too many spare threads.  Exit happily.
         */
-       return 0;
+       return;
 }
 
 
+#ifdef WNOHANG
 /*
  *     Thread wrapper for fork().
  */
-pid_t rad_fork(int exec_wait)
+pid_t rad_fork(void)
 {
        pid_t child_pid;
 
-       if (exec_wait) return fork();
+       if (!pool_initialized) return fork();
 
        reap_children();        /* be nice to non-wait thingies */
 
-       if (lrad_hash_table_num_elements(thread_pool.waiters) >= 1024) {
+       if (fr_hash_table_num_elements(thread_pool.waiters) >= 1024) {
                return -1;
        }
 
@@ -1073,24 +1038,17 @@ pid_t rad_fork(int exec_wait)
 
                tf = rad_malloc(sizeof(*tf));
                memset(tf, 0, sizeof(*tf));
-               
+
                tf->pid = child_pid;
 
-               /*
-                *      Lock the mutex.
-                */
                pthread_mutex_lock(&thread_pool.wait_mutex);
-
-               rcode = lrad_hash_table_insert(thread_pool.waiters, tf);
-
-               /*
-                *      Unlock the mutex.
-                */
+               rcode = fr_hash_table_insert(thread_pool.waiters, tf);
                pthread_mutex_unlock(&thread_pool.wait_mutex);
 
                if (!rcode) {
                        radlog(L_ERR, "Failed to store PID, creating what will be a zombie process %d",
                               (int) child_pid);
+                       free(tf);
                }
        }
 
@@ -1100,59 +1058,78 @@ pid_t rad_fork(int exec_wait)
        return child_pid;
 }
 
+
 /*
- *     We may not need this any more...
+ *     Wait 10 seconds at most for a child to exit, then give up.
  */
-pid_t rad_waitpid(pid_t pid, int *status, int options)
+pid_t rad_waitpid(pid_t pid, int *status)
 {
+       int i;
        thread_fork_t mytf, *tf;
 
-       reap_children();        /* be nice to non-wait thingies */
+       if (!pool_initialized) return waitpid(pid, status, 0);
 
        if (pid <= 0) return -1;
 
-       if ((options & WNOHANG) == 0) return -1;
-
        mytf.pid = pid;
 
        pthread_mutex_lock(&thread_pool.wait_mutex);
-       tf = lrad_hash_table_finddata(thread_pool.waiters, &mytf);
+       tf = fr_hash_table_finddata(thread_pool.waiters, &mytf);
+       pthread_mutex_unlock(&thread_pool.wait_mutex);
 
-       if (!tf) {              /* not found.  It's a problem... */
-               pthread_mutex_unlock(&thread_pool.wait_mutex);
-               return waitpid(pid, status, options);
-       }
+       if (!tf) return -1;
 
-       if (tf->exited) {
-               *status = tf->status;
-               lrad_hash_table_delete(thread_pool.waiters, &mytf);
-               pthread_mutex_unlock(&thread_pool.wait_mutex);
-               return pid;
+       for (i = 0; i < 100; i++) {
+               reap_children();
+
+               if (tf->exited) {
+                       *status = tf->status;
+
+                       pthread_mutex_lock(&thread_pool.wait_mutex);
+                       fr_hash_table_delete(thread_pool.waiters, &mytf);
+                       pthread_mutex_unlock(&thread_pool.wait_mutex);
+                       return pid;
+               }
+               usleep(100000); /* sleep for 1/10 of a second */
        }
-       
+
        /*
-        *      Don't wait, and it hasn't exited.  Return.
+        *      10 seconds have passed, give up on the child.
         */
+       pthread_mutex_lock(&thread_pool.wait_mutex);
+       fr_hash_table_delete(thread_pool.waiters, &mytf);
        pthread_mutex_unlock(&thread_pool.wait_mutex);
+
        return 0;
 }
-
-#else /* HAVE_PTHREAD_H */
+#else
 /*
- *     "thread" code when we don't have threads.
+ *     No rad_fork or rad_waitpid
  */
-int thread_pool_init(int spawn_flag)
+#endif
+
+void thread_pool_lock(void)
 {
-       return 0;
+       pthread_mutex_lock(&thread_pool.queue_mutex);
 }
 
-/*
- *     call "radrespond".
- */
-int thread_pool_addrequest(REQUEST *request, RAD_REQUEST_FUNP fun)
+void thread_pool_unlock(void)
 {
-       rad_respond(request, fun);
-       return 1;
+       pthread_mutex_unlock(&thread_pool.queue_mutex);
 }
 
+void thread_pool_queue_stats(int *array)
+{
+       int i;
+
+       if (pool_initialized) {
+               for (i = 0; i < RAD_LISTEN_MAX; i++) {
+                       array[i] = fr_fifo_num_elements(thread_pool.fifo[i]);
+               }
+       } else {
+               for (i = 0; i < RAD_LISTEN_MAX; i++) {
+                       array[i] = 0;
+               }
+       }
+}
 #endif /* HAVE_PTHREAD_H */