Do waitpid() for ALL children, even if there are no threads waiting

[freeradius.git] / src / main / threads.c
diff --git a/src/main/threads.c b/src/main/threads.c

index bbc2d0d..130fe9e 100644 (file)
--- a/src/main/threads.c
+++ b/src/main/threads.c
@@ -17,21 +17,24 @@
   *   along with this program; if not, write to the Free Software
   *   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
   *
- * Copyright 2000  The FreeRADIUS server project
+ * Copyright 2000,2006  The FreeRADIUS server project
   * Copyright 2000  Alan DeKok <aland@ox.org>
   */
  
-#include <freeradius-devel/autoconf.h>
+#include <freeradius-devel/ident.h>
+RCSID("$Id$")
  
-#include <stdlib.h>
-#include <string.h>
+#include <freeradius-devel/radiusd.h>
+#include <freeradius-devel/rad_assert.h>
  
  /*
   *     Other OS's have sem_init, OS X doesn't.
   */
-#ifndef DARWIN
+#ifdef HAVE_SEMAPHORE_H
  #include <semaphore.h>
-#else
+#endif
+
+#ifdef DARWIN
  #include <mach/task.h>
  #include <mach/semaphore.h>
  
@@ -45,20 +48,10 @@
  #define sem_post(s) semaphore_signal(*s)
  #endif
  
-#include <signal.h>
-
  #ifdef HAVE_SYS_WAIT_H
  #include <sys/wait.h>
  #endif
  
-#include <freeradius-devel/radiusd.h>
-#include <freeradius-devel/rad_assert.h>
-#include <freeradius-devel/conffile.h>
-#include <freeradius-devel/modules.h>
-
-static const char rcsid[] =
-"$Id$";
-
  #ifdef HAVE_PTHREAD_H
  
  #ifdef HAVE_OPENSSL_CRYPTO_H
@@ -67,6 +60,9 @@ static const char rcsid[] =
  #ifdef HAVE_OPENSSL_ERR_H
  #include <openssl/err.h>
  #endif
+#ifdef HAVE_OPENSSL_EVP_H
+#include <openssl/evp.h>
+#endif
  
  #define SEMAPHORE_LOCKED       (0)
  #define SEMAPHORE_UNLOCKED     (1)
@@ -75,14 +71,8 @@ static const char rcsid[] =
  #define THREAD_CANCELLED       (2)
  #define THREAD_EXITED          (3)
  
-#define NUM_FIFOS               (2)
+#define NUM_FIFOS               RAD_LISTEN_MAX
  
-/*
- *     Ordered this way because we prefer proxy, then ongoing, then
- *     start.
- */
-#define FIFO_START   (1)
-#define FIFO_PROXY   (0)
  
  /*
   *  A data structure which contains the information about
@@ -106,14 +96,6 @@ typedef struct THREAD_HANDLE {
         REQUEST              *request;
  } THREAD_HANDLE;
  
-/*
- *     For the request queue.
- */
-typedef struct request_queue_t {
-       REQUEST           *request;
-       RAD_REQUEST_FUNP  fun;
-} request_queue_t;
-
  typedef struct thread_fork_t {
         pid_t           pid;
         int             status;
@@ -143,8 +125,10 @@ typedef struct THREAD_POOL {
         int cleanup_delay;
         int spawn_flag;
  
+#ifdef WNOHANG
         pthread_mutex_t wait_mutex;
-       lrad_hash_table_t *waiters;
+       fr_hash_table_t *waiters;
+#endif
  
         /*
          *      All threads wait on this semaphore, for requests
@@ -159,13 +143,14 @@ typedef struct THREAD_POOL {
  
         int             max_queue_size;
         int             num_queued;
-       int             fifo_state;
-       lrad_fifo_t     *fifo[NUM_FIFOS];
+       fr_fifo_t       *fifo[NUM_FIFOS];
  } THREAD_POOL;
  
  static THREAD_POOL thread_pool;
  static int pool_initialized = FALSE;
+static time_t last_cleaned = 0;
  
+static void thread_pool_manage(time_t now);
  
  /*
   *     A mapping of configuration file names to internal integers
@@ -219,6 +204,13 @@ static int setup_ssl_mutexes(void)
  {
         int i;
  
+#ifdef HAVE_OPENSSL_EVP_H
+       /*
+        *      Enable all ciphers and digests.
+        */
+       OpenSSL_add_all_algorithms();
+#endif
+
         ssl_mutexes = rad_malloc(CRYPTO_num_locks() * sizeof(pthread_mutex_t));
         if (!ssl_mutexes) {
                 radlog(L_ERR, "Error allocating memory for SSL mutexes!");
@@ -236,7 +228,7 @@ static int setup_ssl_mutexes(void)
  }
  #endif
  
-
+#ifdef WNOHANG
  /*
   *     We don't want to catch SIGCHLD for a host of reasons.
   *
@@ -259,24 +251,27 @@ static void reap_children(void)
         int status;
         thread_fork_t mytf, *tf;
  
-       if (lrad_hash_table_num_elements(thread_pool.waiters) == 0) return;
  
         pthread_mutex_lock(&thread_pool.wait_mutex);
  
-       while (1) {
+       do {
+       retry:
                 pid = waitpid(0, &status, WNOHANG);
                 if (pid <= 0) break;
  
                 mytf.pid = pid;
-               tf = lrad_hash_table_finddata(thread_pool.waiters, &mytf);
-               if (!tf) continue;
-               
+               tf = fr_hash_table_finddata(thread_pool.waiters, &mytf);
+               if (!tf) goto retry;
+
                 tf->status = status;
                 tf->exited = 1;
-       }
+       } while (fr_hash_table_num_elements(thread_pool.waiters) > 0);
  
         pthread_mutex_unlock(&thread_pool.wait_mutex);
  }
+#else
+#define reap_children()
+#endif /* WNOHANG */
  
  /*
   *     Add a request to the list of waiting requests.
@@ -286,50 +281,29 @@ static void reap_children(void)
   */
  static int request_enqueue(REQUEST *request, RAD_REQUEST_FUNP fun)
  {
-       int fifo = FIFO_START;
-       request_queue_t *entry;
-
+       rad_assert(request->process == fun);
         pthread_mutex_lock(&thread_pool.queue_mutex);
  
         thread_pool.request_count++;
  
-       /*
-        *      FIXME: Handle proxy replies separately?
-        */
         if (thread_pool.num_queued >= thread_pool.max_queue_size) {
                 pthread_mutex_unlock(&thread_pool.queue_mutex);
-               
+
                 /*
                  *      Mark the request as done.
                  */
-               radlog(L_ERR|L_CONS, "!!! ERROR !!! The server is blocked: discarding new request %d", request->number);
-               request->finished = TRUE;
+               radlog(L_ERR, "!!! ERROR !!! The server is blocked: discarding new request %d", request->number);
+               request->child_state = REQUEST_DONE;
                 return 0;
         }
  
         /*
-        *      Requests get handled in priority.  First, we handle
-        *      replies from a home server, to finish ongoing requests.
-        *
-        *      Then, we handle requests with State, to finish
-        *      multi-packet transactions.
-        *
-        *      Finally, we handle new requests.
+        *      Push the request onto the appropriate fifo for that
          */
-       if (request->proxy_reply) {
-               fifo = FIFO_PROXY;
-       } else {
-               fifo = FIFO_START;
-       }
-
-       entry = rad_malloc(sizeof(*entry));
-       entry->request = request;
-       entry->fun = fun;
-
-       if (!lrad_fifo_push(thread_pool.fifo[fifo], entry)) {
+       if (!fr_fifo_push(thread_pool.fifo[request->priority], request)) {
                 pthread_mutex_unlock(&thread_pool.queue_mutex);
                 radlog(L_ERR, "!!! ERROR !!! Failed inserting request %d into the queue", request->number);
-               request->finished = TRUE;
+               request->child_state = REQUEST_DONE;
                 return 0;
         }
  
@@ -354,54 +328,76 @@ static int request_enqueue(REQUEST *request, RAD_REQUEST_FUNP fun)
  /*
   *     Remove a request from the queue.
   */
-static int request_dequeue(REQUEST **request, RAD_REQUEST_FUNP *fun)
+static int request_dequeue(REQUEST **prequest, RAD_REQUEST_FUNP *fun)
  {
-       int fifo_state;
-       request_queue_t *entry;
+       RAD_LISTEN_TYPE i, start;
+       REQUEST *request = NULL;
  
         reap_children();
  
         pthread_mutex_lock(&thread_pool.queue_mutex);
  
-       fifo_state = thread_pool.fifo_state;
-
- retry:
-       do {
+       /*
+        *      Clear old requests from all queues.
+        *
+        *      We only do one pass over the queue, in order to
+        *      amortize the work across the child threads.  Since we
+        *      do N checks for one request de-queued, the old
+        *      requests will be quickly cleared.
+        */
+       for (i = 0; i < RAD_LISTEN_MAX; i++) {
+               request = fr_fifo_peek(thread_pool.fifo[i]);
+               if (!request ||
+                   (request->master_state != REQUEST_STOP_PROCESSING)) {
+                       continue;
+}
                 /*
-                *      Pop an entry from the current queue, and go to
-                *      the next queue.
+                *      This entry was marked to be stopped.  Acknowledge it.
                  */
-               entry = lrad_fifo_pop(thread_pool.fifo[fifo_state]);
-               fifo_state++;
-               if (fifo_state >= NUM_FIFOS) fifo_state = 0;
-       } while ((fifo_state != thread_pool.fifo_state) && !entry);
+               request = fr_fifo_pop(thread_pool.fifo[i]);
+               rad_assert(request != NULL);
+               request->child_state = REQUEST_DONE;
+               thread_pool.num_queued--;
+       }
+
+       start = 0;
+ retry:
+       /*
+        *      Pop results from the top of the queue
+        */
+       for (i = start; i < RAD_LISTEN_MAX; i++) {
+               request = fr_fifo_pop(thread_pool.fifo[i]);
+               if (request) {
+                       start = i;
+                       break;
+               }
+       }
  
-       if (!entry) {
+       if (!request) {
                 pthread_mutex_unlock(&thread_pool.queue_mutex);
-               *request = NULL;
+               *prequest = NULL;
                 *fun = NULL;
                 return 0;
         }
  
         rad_assert(thread_pool.num_queued > 0);
         thread_pool.num_queued--;
-       *request = entry->request;
-       *fun = entry->fun;
-       free(entry);
+       *prequest = request;
+       *fun = request->process;
  
-       rad_assert(*request != NULL);
-       rad_assert((*request)->magic == REQUEST_MAGIC);
+       rad_assert(request->magic == REQUEST_MAGIC);
         rad_assert(*fun != NULL);
  
         /*
          *      If the request has sat in the queue for too long,
          *      kill it.
          *
-        *      The main clean-up code won't delete the request from
-        *      the request list, until it's marked "finished"
+        *      The main clean-up code can't delete the request from
+        *      the queue, and therefore won't clean it up until we
+        *      have acknowledged it as "done".
          */
-       if ((*request)->options & RAD_REQUEST_OPTION_STOP_NOW) {
-               (*request)->finished = 1;
+       if (request->master_state == REQUEST_STOP_PROCESSING) {
+               request->child_state = REQUEST_DONE;
                 goto retry;
         }
  
@@ -409,12 +405,9 @@ static int request_dequeue(REQUEST **request, RAD_REQUEST_FUNP *fun)
          *      The thread is currently processing a request.
          */
         thread_pool.active_threads++;
-       thread_pool.fifo_state = fifo_state;
  
         pthread_mutex_unlock(&thread_pool.queue_mutex);
  
-       rad_assert((*request)->child_pid == NO_SUCH_CHILD_PID);
-
         return 1;
  }
  
@@ -428,33 +421,11 @@ static void *request_handler_thread(void *arg)
  {
         RAD_REQUEST_FUNP  fun;
         THREAD_HANDLE     *self = (THREAD_HANDLE *) arg;
-#ifdef HAVE_PTHREAD_SIGMASK
-       sigset_t set;
-
-       /*
-        *      Block SIGHUP handling for the child threads.
-        *
-        *      This ensures that only the main server thread will
-        *      process HUP signals.
-        *
-        *      If we don't have sigprocmask, then it shouldn't be
-        *      a problem, either, as the sig_hup handler should check
-        *      for this condition.
-        */
-       sigemptyset(&set);
-       sigaddset(&set, SIGHUP);
-       sigaddset(&set, SIGINT);
-       sigaddset(&set, SIGQUIT);
-       sigaddset(&set, SIGTERM);
-       pthread_sigmask(SIG_BLOCK, &set, NULL);
-#endif
  
         /*
          *      Loop forever, until told to exit.
          */
         do {
-               int finished;
-
                 /*
                  *      Wait to be signalled.
                  */
@@ -478,6 +449,13 @@ static void *request_handler_thread(void *arg)
  
                 DEBUG2("Thread %d got semaphore", self->thread_num);
  
+#ifdef HAVE_OPENSSL_ERR_H
+               /*
+                *      Clear the error queue for the current thread.
+                */
+               ERR_clear_error ();
+#endif
+
                 /*
                  *      Try to grab a request from the queue.
                  *
@@ -493,38 +471,12 @@ static void *request_handler_thread(void *arg)
                        self->thread_num, self->request->number,
                        self->request_count);
  
-               /*
-                *      Respond, and reset request->child_pid
-                */
-               finished = rad_respond(self->request, fun);
-               self->request = NULL;
+               radius_handle_request(self->request, fun);
  
                 /*
                  *      Update the active threads.
                  */
                 pthread_mutex_lock(&thread_pool.queue_mutex);
-
-               /*
-                *      We haven't replied to the client, but we HAVE
-                *      sent a proxied packet, and we have NOT
-                *      received a proxy response.  In that case, send
-                *      the proxied packet now.  Doing this in the mutex
-                *      avoids race conditions.
-                *
-                *      FIXME: this work should really depend on a
-                *      "state", and "next handler", rather than
-                *      horrid hacks like thise.
-                */
-               if (!self->request->reply->data &&
-                   self->request->proxy && self->request->proxy->data
-                   && !self->request->proxy_reply)
-                       self->request->proxy_listener->send(self->request->proxy_listener,
-                                                           (char *)self->request->proxysecret);
-
-               self->request->child_pid = NO_SUCH_CHILD_PID;
-               self->request->finished = finished;
-               self->request = NULL;
-               
                 rad_assert(thread_pool.active_threads > 0);
                 thread_pool.active_threads--;
                 pthread_mutex_unlock(&thread_pool.queue_mutex);
@@ -544,6 +496,7 @@ static void *request_handler_thread(void *arg)
         /*
          *  Do this as the LAST thing before exiting.
          */
+       self->request = NULL;
         self->status = THREAD_EXITED;
  
         return NULL;
@@ -621,7 +574,6 @@ static THREAD_HANDLE *spawn_thread(time_t now)
         memset(handle, 0, sizeof(THREAD_HANDLE));
         handle->prev = NULL;
         handle->next = NULL;
-       handle->pthread_id = NO_SUCH_CHILD_PID;
         handle->thread_num = thread_pool.max_thread_num++;
         handle->request_count = 0;
         handle->status = THREAD_RUNNING;
@@ -647,9 +599,9 @@ static THREAD_HANDLE *spawn_thread(time_t now)
         rcode = pthread_create(&handle->pthread_id, &attr,
                         request_handler_thread, handle);
         if (rcode != 0) {
-               radlog(L_ERR|L_CONS, "FATAL: Thread create failed: %s",
+               radlog(L_ERR, "Thread create failed: %s",
                        strerror(rcode));
-               exit(1);
+               return NULL;
         }
         pthread_attr_destroy(&attr);
  
@@ -700,11 +652,12 @@ int total_active_threads(void)
  }
  
  
+#ifdef WNOHANG
  static uint32_t pid_hash(const void *data)
  {
         const thread_fork_t *tf = data;
  
-       return lrad_hash(&tf->pid, sizeof(tf->pid));
+       return fr_hash(&tf->pid, sizeof(tf->pid));
  }
  
  static int pid_cmp(const void *one, const void *two)
@@ -714,6 +667,7 @@ static int pid_cmp(const void *one, const void *two)
  
         return (a->pid - b->pid);
  }
+#endif
  
  /*
   *     Allocate the thread pool, and seed it with an initial number
@@ -721,63 +675,72 @@ static int pid_cmp(const void *one, const void *two)
   *
   *     FIXME: What to do on a SIGHUP???
   */
-int thread_pool_init(int spawn_flag)
+int thread_pool_init(CONF_SECTION *cs, int *spawn_flag)
  {
         int             i, rcode;
         CONF_SECTION    *pool_cf;
         time_t          now;
  
-       DEBUG("Initializing the thread pool...");
         now = time(NULL);
  
+       rad_assert(spawn_flag != NULL);
+       rad_assert(*spawn_flag == TRUE);
+       rad_assert(pool_initialized == FALSE); /* not called on HUP */
+
+       pool_cf = cf_subsection_find_next(cs, NULL, "thread");
+       if (!pool_cf) *spawn_flag = FALSE;
+
         /*
-        *      After a SIGHUP, we don't over-write the previous values.
+        *      Initialize the thread pool to some reasonable values.
          */
-       if (!pool_initialized) {
-               /*
-                *      Initialize the thread pool to some reasonable values.
-                */
-               memset(&thread_pool, 0, sizeof(THREAD_POOL));
-               thread_pool.head = NULL;
-               thread_pool.tail = NULL;
-               thread_pool.total_threads = 0;
-               thread_pool.max_thread_num = 1;
-               thread_pool.cleanup_delay = 5;
-               thread_pool.spawn_flag = spawn_flag;
-
-               if ((pthread_mutex_init(&thread_pool.wait_mutex,NULL) != 0)) {
-                       radlog(L_ERR, "FATAL: Failed to initialize wait mutex: %s",
-                              strerror(errno));
-                       exit(1);
-               }               
-               
-               /*
-                *      Create the hash table of child PID's
-                */
-               thread_pool.waiters = lrad_hash_table_create(pid_hash,
-                                                            pid_cmp,
-                                                            free);
-               if (!thread_pool.waiters) {
-                       radlog(L_ERR, "FATAL: Failed to set up wait hash");
-                       exit(1);
-               }
+       memset(&thread_pool, 0, sizeof(THREAD_POOL));
+       thread_pool.head = NULL;
+       thread_pool.tail = NULL;
+       thread_pool.total_threads = 0;
+       thread_pool.max_thread_num = 1;
+       thread_pool.cleanup_delay = 5;
+       thread_pool.spawn_flag = *spawn_flag;
+       
+       /*
+        *      Don't bother initializing the mutexes or
+        *      creating the hash tables.  They won't be used.
+        */
+       if (!*spawn_flag) return 0;
+       
+#ifdef WNOHANG
+       if ((pthread_mutex_init(&thread_pool.wait_mutex,NULL) != 0)) {
+               radlog(L_ERR, "FATAL: Failed to initialize wait mutex: %s",
+                      strerror(errno));
+               return -1;
         }
-
+       
         /*
-        *      We're not spawning new threads, don't do
-        *      anything.
+        *      Create the hash table of child PID's
          */
-       if (!spawn_flag) return 0;
+       thread_pool.waiters = fr_hash_table_create(pid_hash,
+                                                  pid_cmp,
+                                                  free);
+       if (!thread_pool.waiters) {
+               radlog(L_ERR, "FATAL: Failed to set up wait hash");
+               return -1;
+       }
+#endif
  
-       pool_cf = cf_section_find("thread");
-       if (pool_cf != NULL) {
-               /*
-                *      FIXME: Check for errors?
-                */
-               cf_section_parse(pool_cf, NULL, thread_config);
+       if (cf_section_parse(pool_cf, NULL, thread_config) < 0) {
+               return -1;
         }
  
         /*
+        *      Catch corner cases.
+        */
+       if (thread_pool.min_spare_threads < 1)
+               thread_pool.min_spare_threads = 1;
+       if (thread_pool.max_spare_threads < 1)
+               thread_pool.max_spare_threads = 1;
+       if (thread_pool.max_spare_threads < thread_pool.min_spare_threads)
+               thread_pool.max_spare_threads = thread_pool.min_spare_threads;
+
+       /*
          *      The pool has already been initialized.  Don't spawn
          *      new threads, and don't forget about forked children,
          */
@@ -791,26 +754,26 @@ int thread_pool_init(int spawn_flag)
         memset(&thread_pool.semaphore, 0, sizeof(thread_pool.semaphore));
         rcode = sem_init(&thread_pool.semaphore, 0, SEMAPHORE_LOCKED);
         if (rcode != 0) {
-               radlog(L_ERR|L_CONS, "FATAL: Failed to initialize semaphore: %s",
+               radlog(L_ERR, "FATAL: Failed to initialize semaphore: %s",
                        strerror(errno));
-               exit(1);
+               return -1;
         }
  
         rcode = pthread_mutex_init(&thread_pool.queue_mutex,NULL);
         if (rcode != 0) {
                 radlog(L_ERR, "FATAL: Failed to initialize queue mutex: %s",
                        strerror(errno));
-               exit(1);
+               return -1;
         }
  
         /*
          *      Allocate multiple fifos.
          */
-       for (i = 0; i < NUM_FIFOS; i++) {
-               thread_pool.fifo[i] = lrad_fifo_create(65536, NULL);
+       for (i = 0; i < RAD_LISTEN_MAX; i++) {
+               thread_pool.fifo[i] = fr_fifo_create(65536, NULL);
                 if (!thread_pool.fifo[i]) {
                         radlog(L_ERR, "FATAL: Failed to set up request fifo");
-                       exit(1);
+                       return -1;
                 }
         }
  
@@ -821,7 +784,7 @@ int thread_pool_init(int spawn_flag)
          */
         if (!setup_ssl_mutexes()) {
                 radlog(L_ERR, "FATAL: Failed to set up SSL mutexes");
-               exit(1);
+               return -1;
         }
  #endif
  
@@ -851,11 +814,24 @@ int thread_pool_init(int spawn_flag)
   */
  int thread_pool_addrequest(REQUEST *request, RAD_REQUEST_FUNP fun)
  {
+       time_t now = request->timestamp;
+
+       request->process = fun;
+
         /*
          *      We've been told not to spawn threads, so don't.
          */
         if (!thread_pool.spawn_flag) {
-               request->finished = rad_respond(request, fun);
+               radius_handle_request(request, fun);
+
+#ifdef WNOHANG
+               /*
+                *      Requests that care about child process exit
+                *      codes have already either called
+                *      rad_waitpid(), or they've given up.
+                */
+               wait(NULL);
+#endif
                 return 1;
         }
  
@@ -865,22 +841,13 @@ int thread_pool_addrequest(REQUEST *request, RAD_REQUEST_FUNP fun)
         if (!request_enqueue(request, fun)) return 0;
  
         /*
-        *      If the thread pool is busy handling requests, then
-        *      try to spawn another one.  We don't acquire the mutex
-        *      before reading active_threads, so our thread count is
-        *      just an estimate.  It's fine to go ahead and spawn an
-        *      extra thread in that case.
-        *      NOTE: the log message may be in error since active_threads
-        *      is an estimate, but it's only in error about the thread
-        *      count, not about the fact that we can't create a new one.
+        *      If we haven't checked the number of child threads
+        *      in a while, OR if the thread pool appears to be full,
+        *      go manage it.
          */
-       if (thread_pool.active_threads == thread_pool.total_threads) {
-               if (spawn_thread(request->timestamp) == NULL) {
-                       radlog(L_INFO,
-                              "The maximum number of threads (%d) are active, cannot spawn new thread to handle request",
-                              thread_pool.max_threads);
-                       return 1;
-               }
+       if ((last_cleaned < now) ||
+           (thread_pool.active_threads == thread_pool.total_threads)) {
+               thread_pool_manage(now);
         }
  
         return 1;
@@ -892,28 +859,12 @@ int thread_pool_addrequest(REQUEST *request, RAD_REQUEST_FUNP fun)
   *     If there are too many or too few threads waiting, then we
   *     either create some more, or delete some.
   */
-int thread_pool_clean(time_t now)
+static void thread_pool_manage(time_t now)
  {
         int spare;
         int i, total;
         THREAD_HANDLE *handle, *next;
         int active_threads;
-       static time_t last_cleaned = 0;
-
-       /*
-        *      Loop over the thread pool deleting exited threads.
-        */
-       for (handle = thread_pool.head; handle; handle = next) {
-               next = handle->next;
-
-               /*
-                *      Maybe we've asked the thread to exit, and it
-                *      has agreed.
-                */
-               if (handle->status == THREAD_EXITED) {
-                       delete_thread(handle);
-               }
-       }
  
         /*
          *      We don't need a mutex lock here, as we're reading
@@ -937,26 +888,24 @@ int thread_pool_clean(time_t now)
         }
  
         /*
-        *      If there are too few spare threads, create some more.
+        *      If there are too few spare threads.  Go create some more.
          */
         if (spare < thread_pool.min_spare_threads) {
                 total = thread_pool.min_spare_threads - spare;
  
                 DEBUG2("Threads: Spawning %d spares", total);
+
                 /*
                  *      Create a number of spare threads.
                  */
                 for (i = 0; i < total; i++) {
                         handle = spawn_thread(now);
                         if (handle == NULL) {
-                               return -1;
+                               return;
                         }
                 }
  
-               /*
-                *      And exit, as there can't be too many spare threads.
-                */
-               return 0;
+               return;         /* there aren't too many spare threads */
         }
  
         /*
@@ -964,17 +913,32 @@ int thread_pool_clean(time_t now)
          *      so this second.
          */
         if (now == last_cleaned) {
-               return 0;
+               return;
         }
         last_cleaned = now;
  
         /*
+        *      Loop over the thread pool, deleting exited threads.
+        */
+       for (handle = thread_pool.head; handle; handle = next) {
+               next = handle->next;
+
+               /*
+                *      Maybe we've asked the thread to exit, and it
+                *      has agreed.
+                */
+               if (handle->status == THREAD_EXITED) {
+                       delete_thread(handle);
+               }
+       }
+
+       /*
          *      Only delete the spare threads if sufficient time has
          *      passed since we last created one.  This helps to minimize
          *      the amount of create/delete cycles.
          */
         if ((now - thread_pool.time_last_spawned) < thread_pool.cleanup_delay) {
-               return 0;
+               return;
         }
  
         /*
@@ -1044,22 +1008,23 @@ int thread_pool_clean(time_t now)
          *      Otherwise everything's kosher.  There are not too few,
          *      or too many spare threads.  Exit happily.
          */
-       return 0;
+       return;
  }
  
  
+#ifdef WNOHANG
  /*
   *     Thread wrapper for fork().
   */
-pid_t rad_fork(int exec_wait)
+pid_t rad_fork(void)
  {
         pid_t child_pid;
  
-       if (exec_wait) return fork();
+       if (!pool_initialized) return fork();
  
         reap_children();        /* be nice to non-wait thingies */
  
-       if (lrad_hash_table_num_elements(thread_pool.waiters) >= 1024) {
+       if (fr_hash_table_num_elements(thread_pool.waiters) >= 1024) {
                 return -1;
         }
  
@@ -1073,24 +1038,17 @@ pid_t rad_fork(int exec_wait)
  
                 tf = rad_malloc(sizeof(*tf));
                 memset(tf, 0, sizeof(*tf));
-               
+
                 tf->pid = child_pid;
  
-               /*
-                *      Lock the mutex.
-                */
                 pthread_mutex_lock(&thread_pool.wait_mutex);
-
-               rcode = lrad_hash_table_insert(thread_pool.waiters, tf);
-
-               /*
-                *      Unlock the mutex.
-                */
+               rcode = fr_hash_table_insert(thread_pool.waiters, tf);
                 pthread_mutex_unlock(&thread_pool.wait_mutex);
  
                 if (!rcode) {
                         radlog(L_ERR, "Failed to store PID, creating what will be a zombie process %d",
                                (int) child_pid);
+                       free(tf);
                 }
         }
  
@@ -1100,59 +1058,78 @@ pid_t rad_fork(int exec_wait)
         return child_pid;
  }
  
+
  /*
- *     We may not need this any more...
+ *     Wait 10 seconds at most for a child to exit, then give up.
   */
-pid_t rad_waitpid(pid_t pid, int *status, int options)
+pid_t rad_waitpid(pid_t pid, int *status)
  {
+       int i;
         thread_fork_t mytf, *tf;
  
-       reap_children();        /* be nice to non-wait thingies */
+       if (!pool_initialized) return waitpid(pid, status, 0);
  
         if (pid <= 0) return -1;
  
-       if ((options & WNOHANG) == 0) return -1;
-
         mytf.pid = pid;
  
         pthread_mutex_lock(&thread_pool.wait_mutex);
-       tf = lrad_hash_table_finddata(thread_pool.waiters, &mytf);
+       tf = fr_hash_table_finddata(thread_pool.waiters, &mytf);
+       pthread_mutex_unlock(&thread_pool.wait_mutex);
  
-       if (!tf) {              /* not found.  It's a problem... */
-               pthread_mutex_unlock(&thread_pool.wait_mutex);
-               return waitpid(pid, status, options);
-       }
+       if (!tf) return -1;
  
-       if (tf->exited) {
-               *status = tf->status;
-               lrad_hash_table_delete(thread_pool.waiters, &mytf);
-               pthread_mutex_unlock(&thread_pool.wait_mutex);
-               return pid;
+       for (i = 0; i < 100; i++) {
+               reap_children();
+
+               if (tf->exited) {
+                       *status = tf->status;
+
+                       pthread_mutex_lock(&thread_pool.wait_mutex);
+                       fr_hash_table_delete(thread_pool.waiters, &mytf);
+                       pthread_mutex_unlock(&thread_pool.wait_mutex);
+                       return pid;
+               }
+               usleep(100000); /* sleep for 1/10 of a second */
         }
-       
+
         /*
-        *      Don't wait, and it hasn't exited.  Return.
+        *      10 seconds have passed, give up on the child.
          */
+       pthread_mutex_lock(&thread_pool.wait_mutex);
+       fr_hash_table_delete(thread_pool.waiters, &mytf);
         pthread_mutex_unlock(&thread_pool.wait_mutex);
+
         return 0;
  }
-
-#else /* HAVE_PTHREAD_H */
+#else
  /*
- *     "thread" code when we don't have threads.
+ *     No rad_fork or rad_waitpid
   */
-int thread_pool_init(int spawn_flag)
+#endif
+
+void thread_pool_lock(void)
  {
-       return 0;
+       pthread_mutex_lock(&thread_pool.queue_mutex);
  }
  
-/*
- *     call "radrespond".
- */
-int thread_pool_addrequest(REQUEST *request, RAD_REQUEST_FUNP fun)
+void thread_pool_unlock(void)
  {
-       rad_respond(request, fun);
-       return 1;
+       pthread_mutex_unlock(&thread_pool.queue_mutex);
  }
  
+void thread_pool_queue_stats(int *array)
+{
+       int i;
+
+       if (pool_initialized) {
+               for (i = 0; i < RAD_LISTEN_MAX; i++) {
+                       array[i] = fr_fifo_num_elements(thread_pool.fifo[i]);
+               }
+       } else {
+               for (i = 0; i < RAD_LISTEN_MAX; i++) {
+                       array[i] = 0;
+               }
+       }
+}
  #endif /* HAVE_PTHREAD_H */