get rid of race condition on proxying by moving the rad_send()
[freeradius.git] / src / main / threads.c
1 /*
2  * threads.c    request threading support
3  *
4  * Version:     $Id$
5  *
6  *   This program is free software; you can redistribute it and/or modify
7  *   it under the terms of the GNU General Public License as published by
8  *   the Free Software Foundation; either version 2 of the License, or
9  *   (at your option) any later version.
10  *
11  *   This program is distributed in the hope that it will be useful,
12  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  *   GNU General Public License for more details.
15  *
16  *   You should have received a copy of the GNU General Public License
17  *   along with this program; if not, write to the Free Software
18  *   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
19  *
20  * Copyright 2000  The FreeRADIUS server project
21  * Copyright 2000  Alan DeKok <aland@ox.org>
22  */
23
24 #include <freeradius-devel/autoconf.h>
25
26 #include <stdlib.h>
27 #include <string.h>
28
29 /*
30  *      Other OS's have sem_init, OS X doesn't.
31  */
32 #ifndef DARWIN
33 #include <semaphore.h>
34 #else
35 #include <mach/task.h>
36 #include <mach/semaphore.h>
37
38 #undef sem_t
39 #define sem_t semaphore_t
40 #undef sem_init
41 #define sem_init(s,p,c) semaphore_create(mach_task_self(),s,SYNC_POLICY_FIFO,c)
42 #undef sem_wait
43 #define sem_wait(s) semaphore_wait(*s)
44 #undef sem_post
45 #define sem_post(s) semaphore_signal(*s)
46 #endif
47
48 #include <signal.h>
49
50 #ifdef HAVE_SYS_WAIT_H
51 #include <sys/wait.h>
52 #endif
53
54 #include <freeradius-devel/radiusd.h>
55 #include <freeradius-devel/rad_assert.h>
56 #include <freeradius-devel/conffile.h>
57 #include <freeradius-devel/modules.h>
58
59 static const char rcsid[] =
60 "$Id$";
61
62 #ifdef HAVE_PTHREAD_H
63
64 #ifdef HAVE_OPENSSL_CRYPTO_H
65 #include <openssl/crypto.h>
66 #endif
67 #ifdef HAVE_OPENSSL_ERR_H
68 #include <openssl/err.h>
69 #endif
70
71 #define SEMAPHORE_LOCKED        (0)
72 #define SEMAPHORE_UNLOCKED      (1)
73
74 #define THREAD_RUNNING          (1)
75 #define THREAD_CANCELLED        (2)
76 #define THREAD_EXITED           (3)
77
78 #define NUM_FIFOS               (2)
79
80 /*
81  *     Ordered this way because we prefer proxy, then ongoing, then
82  *     start.
83  */
84 #define FIFO_START   (1)
85 #define FIFO_PROXY   (0)
86
87 /*
88  *  A data structure which contains the information about
89  *  the current thread.
90  *
91  *  pthread_id     pthread id
92  *  thread_num     server thread number, 1...number of threads
93  *  semaphore     used to block the thread until a request comes in
94  *  status        is the thread running or exited?
95  *  request_count the number of requests that this thread has handled
96  *  timestamp     when the thread started executing.
97  */
98 typedef struct THREAD_HANDLE {
99         struct THREAD_HANDLE *prev;
100         struct THREAD_HANDLE *next;
101         pthread_t            pthread_id;
102         int                  thread_num;
103         int                  status;
104         unsigned int         request_count;
105         time_t               timestamp;
106         REQUEST              *request;
107 } THREAD_HANDLE;
108
109 /*
110  *      For the request queue.
111  */
112 typedef struct request_queue_t {
113         REQUEST           *request;
114         RAD_REQUEST_FUNP  fun;
115 } request_queue_t;
116
117 typedef struct thread_fork_t {
118         pid_t           pid;
119         int             status;
120         int             exited;
121 } thread_fork_t;
122
123
124 /*
125  *      A data structure to manage the thread pool.  There's no real
126  *      need for a data structure, but it makes things conceptually
127  *      easier.
128  */
129 typedef struct THREAD_POOL {
130         THREAD_HANDLE *head;
131         THREAD_HANDLE *tail;
132
133         int total_threads;
134         int active_threads;     /* protected by queue_mutex */
135         int max_thread_num;
136         int start_threads;
137         int max_threads;
138         int min_spare_threads;
139         int max_spare_threads;
140         unsigned int max_requests_per_thread;
141         unsigned long request_count;
142         time_t time_last_spawned;
143         int cleanup_delay;
144         int spawn_flag;
145
146         pthread_mutex_t wait_mutex;
147         lrad_hash_table_t *waiters;
148
149         /*
150          *      All threads wait on this semaphore, for requests
151          *      to enter the queue.
152          */
153         sem_t           semaphore;
154
155         /*
156          *      To ensure only one thread at a time touches the queue.
157          */
158         pthread_mutex_t queue_mutex;
159
160         int             max_queue_size;
161         int             num_queued;
162         int             fifo_state;
163         lrad_fifo_t     *fifo[NUM_FIFOS];
164 } THREAD_POOL;
165
166 static THREAD_POOL thread_pool;
167 static int pool_initialized = FALSE;
168
169
170 /*
171  *      A mapping of configuration file names to internal integers
172  */
173 static const CONF_PARSER thread_config[] = {
174         { "start_servers",           PW_TYPE_INTEGER, 0, &thread_pool.start_threads,           "5" },
175         { "max_servers",             PW_TYPE_INTEGER, 0, &thread_pool.max_threads,             "32" },
176         { "min_spare_servers",       PW_TYPE_INTEGER, 0, &thread_pool.min_spare_threads,       "3" },
177         { "max_spare_servers",       PW_TYPE_INTEGER, 0, &thread_pool.max_spare_threads,       "10" },
178         { "max_requests_per_server", PW_TYPE_INTEGER, 0, &thread_pool.max_requests_per_thread, "0" },
179         { "cleanup_delay",           PW_TYPE_INTEGER, 0, &thread_pool.cleanup_delay,           "5" },
180         { "max_queue_size",          PW_TYPE_INTEGER, 0, &thread_pool.max_queue_size,           "65536" },
181         { NULL, -1, 0, NULL, NULL }
182 };
183
184
185 #ifdef HAVE_OPENSSL_CRYPTO_H
186
187 /*
188  *      If we're linking against OpenSSL, then it is the
189  *      duty of the application, if it is multithreaded,
190  *      to provide OpenSSL with appropriate thread id
191  *      and mutex locking functions
192  *
193  *      Note: this only implements static callbacks.
194  *      OpenSSL does not use dynamic locking callbacks
195  *      right now, but may in the futiure, so we will have
196  *      to add them at some point.
197  */
198
199 static pthread_mutex_t *ssl_mutexes = NULL;
200
201 static unsigned long ssl_id_function(void)
202 {
203         return (unsigned long) pthread_self();
204 }
205
206 static void ssl_locking_function(int mode, int n, const char *file, int line)
207 {
208         file = file;            /* -Wunused */
209         line = line;            /* -Wunused */
210
211         if (mode & CRYPTO_LOCK) {
212                 pthread_mutex_lock(&(ssl_mutexes[n]));
213         } else {
214                 pthread_mutex_unlock(&(ssl_mutexes[n]));
215         }
216 }
217
218 static int setup_ssl_mutexes(void)
219 {
220         int i;
221
222         ssl_mutexes = rad_malloc(CRYPTO_num_locks() * sizeof(pthread_mutex_t));
223         if (!ssl_mutexes) {
224                 radlog(L_ERR, "Error allocating memory for SSL mutexes!");
225                 return 0;
226         }
227
228         for (i = 0; i < CRYPTO_num_locks(); i++) {
229                 pthread_mutex_init(&(ssl_mutexes[i]), NULL);
230         }
231
232         CRYPTO_set_id_callback(ssl_id_function);
233         CRYPTO_set_locking_callback(ssl_locking_function);
234
235         return 1;
236 }
237 #endif
238
239
240 /*
241  *      We don't want to catch SIGCHLD for a host of reasons.
242  *
243  *      - exec_wait means that someone, somewhere, somewhen, will
244  *      call waitpid(), and catch the child.
245  *
246  *      - SIGCHLD is delivered to a random thread, not the one that
247  *      forked.
248  *
249  *      - if another thread catches the child, we have to coordinate
250  *      with the thread doing the waiting.
251  *
252  *      - if we don't waitpid() for non-wait children, they'll be zombies,
253  *      and will hang around forever.
254  *
255  */
256 static void reap_children(void)
257 {
258         pid_t pid;
259         int status;
260         thread_fork_t mytf, *tf;
261
262         if (lrad_hash_table_num_elements(thread_pool.waiters) == 0) return;
263
264         pthread_mutex_lock(&thread_pool.wait_mutex);
265
266         while (1) {
267                 pid = waitpid(0, &status, WNOHANG);
268                 if (pid <= 0) break;
269
270                 mytf.pid = pid;
271                 tf = lrad_hash_table_finddata(thread_pool.waiters, &mytf);
272                 if (!tf) continue;
273                 
274                 tf->status = status;
275                 tf->exited = 1;
276         }
277
278         pthread_mutex_unlock(&thread_pool.wait_mutex);
279 }
280
281 /*
282  *      Add a request to the list of waiting requests.
283  *      This function gets called ONLY from the main handler thread...
284  *
285  *      This function should never fail.
286  */
287 static int request_enqueue(REQUEST *request, RAD_REQUEST_FUNP fun)
288 {
289         int fifo = FIFO_START;
290         request_queue_t *entry;
291
292         pthread_mutex_lock(&thread_pool.queue_mutex);
293
294         thread_pool.request_count++;
295
296         /*
297          *      FIXME: Handle proxy replies separately?
298          */
299         if (thread_pool.num_queued >= thread_pool.max_queue_size) {
300                 pthread_mutex_unlock(&thread_pool.queue_mutex);
301                 
302                 /*
303                  *      Mark the request as done.
304                  */
305                 radlog(L_ERR|L_CONS, "!!! ERROR !!! The server is blocked: discarding new request %d", request->number);
306                 request->finished = TRUE;
307                 return 0;
308         }
309
310         /*
311          *      Requests get handled in priority.  First, we handle
312          *      replies from a home server, to finish ongoing requests.
313          *
314          *      Then, we handle requests with State, to finish
315          *      multi-packet transactions.
316          *
317          *      Finally, we handle new requests.
318          */
319         if (request->proxy_reply) {
320                 fifo = FIFO_PROXY;
321         } else {
322                 fifo = FIFO_START;
323         }
324
325         entry = rad_malloc(sizeof(*entry));
326         entry->request = request;
327         entry->fun = fun;
328
329         if (!lrad_fifo_push(thread_pool.fifo[fifo], entry)) {
330                 pthread_mutex_unlock(&thread_pool.queue_mutex);
331                 radlog(L_ERR, "!!! ERROR !!! Failed inserting request %d into the queue", request->number);
332                 request->finished = TRUE;
333                 return 0;
334         }
335
336         thread_pool.num_queued++;
337
338         pthread_mutex_unlock(&thread_pool.queue_mutex);
339
340         /*
341          *      There's one more request in the queue.
342          *
343          *      Note that we're not touching the queue any more, so
344          *      the semaphore post is outside of the mutex.  This also
345          *      means that when the thread wakes up and tries to lock
346          *      the mutex, it will be unlocked, and there won't be
347          *      contention.
348          */
349         sem_post(&thread_pool.semaphore);
350
351         return 1;
352 }
353
354 /*
355  *      Remove a request from the queue.
356  */
357 static int request_dequeue(REQUEST **request, RAD_REQUEST_FUNP *fun)
358 {
359         int fifo_state;
360         request_queue_t *entry;
361
362         reap_children();
363
364         pthread_mutex_lock(&thread_pool.queue_mutex);
365
366         fifo_state = thread_pool.fifo_state;
367
368  retry:
369         do {
370                 /*
371                  *      Pop an entry from the current queue, and go to
372                  *      the next queue.
373                  */
374                 entry = lrad_fifo_pop(thread_pool.fifo[fifo_state]);
375                 fifo_state++;
376                 if (fifo_state >= NUM_FIFOS) fifo_state = 0;
377         } while ((fifo_state != thread_pool.fifo_state) && !entry);
378
379         if (!entry) {
380                 pthread_mutex_unlock(&thread_pool.queue_mutex);
381                 *request = NULL;
382                 *fun = NULL;
383                 return 0;
384         }
385
386         rad_assert(thread_pool.num_queued > 0);
387         thread_pool.num_queued--;
388         *request = entry->request;
389         *fun = entry->fun;
390         free(entry);
391
392         rad_assert(*request != NULL);
393         rad_assert((*request)->magic == REQUEST_MAGIC);
394         rad_assert(*fun != NULL);
395
396         /*
397          *      If the request has sat in the queue for too long,
398          *      kill it.
399          *
400          *      The main clean-up code won't delete the request from
401          *      the request list, until it's marked "finished"
402          */
403         if ((*request)->options & RAD_REQUEST_OPTION_STOP_NOW) {
404                 (*request)->finished = 1;
405                 goto retry;
406         }
407
408         /*
409          *      The thread is currently processing a request.
410          */
411         thread_pool.active_threads++;
412         thread_pool.fifo_state = fifo_state;
413
414         pthread_mutex_unlock(&thread_pool.queue_mutex);
415
416         rad_assert((*request)->child_pid == NO_SUCH_CHILD_PID);
417
418         return 1;
419 }
420
421
422 /*
423  *      The main thread handler for requests.
424  *
425  *      Wait on the semaphore until we have it, and process the request.
426  */
427 static void *request_handler_thread(void *arg)
428 {
429         RAD_REQUEST_FUNP  fun;
430         THREAD_HANDLE     *self = (THREAD_HANDLE *) arg;
431 #ifdef HAVE_PTHREAD_SIGMASK
432         sigset_t set;
433
434         /*
435          *      Block SIGHUP handling for the child threads.
436          *
437          *      This ensures that only the main server thread will
438          *      process HUP signals.
439          *
440          *      If we don't have sigprocmask, then it shouldn't be
441          *      a problem, either, as the sig_hup handler should check
442          *      for this condition.
443          */
444         sigemptyset(&set);
445         sigaddset(&set, SIGHUP);
446         sigaddset(&set, SIGINT);
447         sigaddset(&set, SIGQUIT);
448         sigaddset(&set, SIGTERM);
449         pthread_sigmask(SIG_BLOCK, &set, NULL);
450 #endif
451
452         /*
453          *      Loop forever, until told to exit.
454          */
455         do {
456                 int finished;
457
458                 /*
459                  *      Wait to be signalled.
460                  */
461                 DEBUG2("Thread %d waiting to be assigned a request",
462                        self->thread_num);
463         re_wait:
464                 if (sem_wait(&thread_pool.semaphore) != 0) {
465                         /*
466                          *      Interrupted system call.  Go back to
467                          *      waiting, but DON'T print out any more
468                          *      text.
469                          */
470                         if (errno == EINTR) {
471                                 DEBUG2("Re-wait %d", self->thread_num);
472                                 goto re_wait;
473                         }
474                         radlog(L_ERR, "Thread %d failed waiting for semaphore: %s: Exiting\n",
475                                self->thread_num, strerror(errno));
476                         break;
477                 }
478
479                 DEBUG2("Thread %d got semaphore", self->thread_num);
480
481                 /*
482                  *      Try to grab a request from the queue.
483                  *
484                  *      It may be empty, in which case we fail
485                  *      gracefully.
486                  */
487                 if (!request_dequeue(&self->request, &fun)) continue;
488
489                 self->request->child_pid = self->pthread_id;
490                 self->request_count++;
491
492                 DEBUG2("Thread %d handling request %d, (%d handled so far)",
493                        self->thread_num, self->request->number,
494                        self->request_count);
495
496                 /*
497                  *      Respond, and reset request->child_pid
498                  */
499                 finished = rad_respond(self->request, fun);
500                 self->request = NULL;
501
502                 /*
503                  *      Update the active threads.
504                  */
505                 pthread_mutex_lock(&thread_pool.queue_mutex);
506
507                 /*
508                  *      We haven't replied to the client, but we HAVE
509                  *      sent a proxied packet, and we have NOT
510                  *      received a proxy response.  In that case, send
511                  *      the proxied packet now.  Doing this in the mutex
512                  *      avoids race conditions.
513                  *
514                  *      FIXME: this work should really depend on a
515                  *      "state", and "next handler", rather than
516                  *      horrid hacks like thise.
517                  */
518                 if (!self->request->reply->data &&
519                     self->request->proxy && self->request->proxy->data
520                     && !self->request->proxy_reply)
521                         self->request->proxy_listener->send(self->request->proxy_listener,
522                                                             (char *)self->request->proxysecret);
523
524                 self->request->child_pid = NO_SUCH_CHILD_PID;
525                 self->request->finished = finished;
526                 self->request = NULL;
527                 
528                 rad_assert(thread_pool.active_threads > 0);
529                 thread_pool.active_threads--;
530                 pthread_mutex_unlock(&thread_pool.queue_mutex);
531         } while (self->status != THREAD_CANCELLED);
532
533         DEBUG2("Thread %d exiting...", self->thread_num);
534
535 #ifdef HAVE_OPENSSL_ERR_H
536         /*
537          *      If we linked with OpenSSL, the application
538          *      must remove the thread's error queue before
539          *      exiting to prevent memory leaks.
540          */
541         ERR_remove_state(0);
542 #endif
543
544         /*
545          *  Do this as the LAST thing before exiting.
546          */
547         self->status = THREAD_EXITED;
548
549         return NULL;
550 }
551
552 /*
553  *      Take a THREAD_HANDLE, delete it from the thread pool and
554  *      free its resources.
555  *
556  *      This function is called ONLY from the main server thread,
557  *      ONLY after the thread has exited.
558  */
559 static void delete_thread(THREAD_HANDLE *handle)
560 {
561         THREAD_HANDLE *prev;
562         THREAD_HANDLE *next;
563
564         rad_assert(handle->request == NULL);
565
566         DEBUG2("Deleting thread %d", handle->thread_num);
567
568         prev = handle->prev;
569         next = handle->next;
570         rad_assert(thread_pool.total_threads > 0);
571         thread_pool.total_threads--;
572
573         /*
574          *      Remove the handle from the list.
575          */
576         if (prev == NULL) {
577                 rad_assert(thread_pool.head == handle);
578                 thread_pool.head = next;
579         } else {
580                 prev->next = next;
581         }
582
583         if (next == NULL) {
584                 rad_assert(thread_pool.tail == handle);
585                 thread_pool.tail = prev;
586         } else {
587                 next->prev = prev;
588         }
589
590         /*
591          *      Free the handle, now that it's no longer referencable.
592          */
593         free(handle);
594 }
595
596
597 /*
598  *      Spawn a new thread, and place it in the thread pool.
599  *
600  *      The thread is started initially in the blocked state, waiting
601  *      for the semaphore.
602  */
603 static THREAD_HANDLE *spawn_thread(time_t now)
604 {
605         int rcode;
606         THREAD_HANDLE *handle;
607         pthread_attr_t attr;
608
609         /*
610          *      Ensure that we don't spawn too many threads.
611          */
612         if (thread_pool.total_threads >= thread_pool.max_threads) {
613                 DEBUG2("Thread spawn failed.  Maximum number of threads (%d) already running.", thread_pool.max_threads);
614                 return NULL;
615         }
616
617         /*
618          *      Allocate a new thread handle.
619          */
620         handle = (THREAD_HANDLE *) rad_malloc(sizeof(THREAD_HANDLE));
621         memset(handle, 0, sizeof(THREAD_HANDLE));
622         handle->prev = NULL;
623         handle->next = NULL;
624         handle->pthread_id = NO_SUCH_CHILD_PID;
625         handle->thread_num = thread_pool.max_thread_num++;
626         handle->request_count = 0;
627         handle->status = THREAD_RUNNING;
628         handle->timestamp = time(NULL);
629
630         /*
631          *      Initialize the thread's attributes to detached.
632          *
633          *      We could call pthread_detach() later, but if the thread
634          *      exits between the create & detach calls, it will need to
635          *      be joined, which will never happen.
636          */
637         pthread_attr_init(&attr);
638         pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
639
640         /*
641          *      Create the thread detached, so that it cleans up it's
642          *      own memory when it exits.
643          *
644          *      Note that the function returns non-zero on error, NOT
645          *      -1.  The return code is the error, and errno isn't set.
646          */
647         rcode = pthread_create(&handle->pthread_id, &attr,
648                         request_handler_thread, handle);
649         if (rcode != 0) {
650                 radlog(L_ERR|L_CONS, "FATAL: Thread create failed: %s",
651                        strerror(rcode));
652                 exit(1);
653         }
654         pthread_attr_destroy(&attr);
655
656         /*
657          *      One more thread to go into the list.
658          */
659         thread_pool.total_threads++;
660         DEBUG2("Thread spawned new child %d. Total threads in pool: %d",
661                         handle->thread_num, thread_pool.total_threads);
662
663         /*
664          *      Add the thread handle to the tail of the thread pool list.
665          */
666         if (thread_pool.tail) {
667                 thread_pool.tail->next = handle;
668                 handle->prev = thread_pool.tail;
669                 thread_pool.tail = handle;
670         } else {
671                 rad_assert(thread_pool.head == NULL);
672                 thread_pool.head = thread_pool.tail = handle;
673         }
674
675         /*
676          *      Update the time we last spawned a thread.
677          */
678         thread_pool.time_last_spawned = now;
679
680         /*
681          *      And return the new handle to the caller.
682          */
683         return handle;
684 }
685
686 /*
687  *      Temporary function to prevent server from executing a SIGHUP
688  *      until all threads are finished handling requests.  This returns
689  *      the number of active threads to 'radiusd.c'.
690  */
691 int total_active_threads(void)
692 {
693         /*
694          *      We don't acquire the mutex, so this is just an estimate.
695          *      We can't return with the lock held, so there's no point
696          *      in getting the guaranteed correct value; by the time
697          *      the caller sees it, it can be wrong again.
698          */
699         return thread_pool.active_threads;
700 }
701
702
703 static uint32_t pid_hash(const void *data)
704 {
705         const thread_fork_t *tf = data;
706
707         return lrad_hash(&tf->pid, sizeof(tf->pid));
708 }
709
710 static int pid_cmp(const void *one, const void *two)
711 {
712         const thread_fork_t *a = one;
713         const thread_fork_t *b = two;
714
715         return (a->pid - b->pid);
716 }
717
718 /*
719  *      Allocate the thread pool, and seed it with an initial number
720  *      of threads.
721  *
722  *      FIXME: What to do on a SIGHUP???
723  */
724 int thread_pool_init(int spawn_flag)
725 {
726         int             i, rcode;
727         CONF_SECTION    *pool_cf;
728         time_t          now;
729
730         DEBUG("Initializing the thread pool...");
731         now = time(NULL);
732
733         /*
734          *      After a SIGHUP, we don't over-write the previous values.
735          */
736         if (!pool_initialized) {
737                 /*
738                  *      Initialize the thread pool to some reasonable values.
739                  */
740                 memset(&thread_pool, 0, sizeof(THREAD_POOL));
741                 thread_pool.head = NULL;
742                 thread_pool.tail = NULL;
743                 thread_pool.total_threads = 0;
744                 thread_pool.max_thread_num = 1;
745                 thread_pool.cleanup_delay = 5;
746                 thread_pool.spawn_flag = spawn_flag;
747
748                 if ((pthread_mutex_init(&thread_pool.wait_mutex,NULL) != 0)) {
749                         radlog(L_ERR, "FATAL: Failed to initialize wait mutex: %s",
750                                strerror(errno));
751                         exit(1);
752                 }               
753                 
754                 /*
755                  *      Create the hash table of child PID's
756                  */
757                 thread_pool.waiters = lrad_hash_table_create(pid_hash,
758                                                              pid_cmp,
759                                                              free);
760                 if (!thread_pool.waiters) {
761                         radlog(L_ERR, "FATAL: Failed to set up wait hash");
762                         exit(1);
763                 }
764         }
765
766         /*
767          *      We're not spawning new threads, don't do
768          *      anything.
769          */
770         if (!spawn_flag) return 0;
771
772         pool_cf = cf_section_find("thread");
773         if (pool_cf != NULL) {
774                 /*
775                  *      FIXME: Check for errors?
776                  */
777                 cf_section_parse(pool_cf, NULL, thread_config);
778         }
779
780         /*
781          *      The pool has already been initialized.  Don't spawn
782          *      new threads, and don't forget about forked children,
783          */
784         if (pool_initialized) {
785                 return 0;
786         }
787
788         /*
789          *      Initialize the queue of requests.
790          */
791         memset(&thread_pool.semaphore, 0, sizeof(thread_pool.semaphore));
792         rcode = sem_init(&thread_pool.semaphore, 0, SEMAPHORE_LOCKED);
793         if (rcode != 0) {
794                 radlog(L_ERR|L_CONS, "FATAL: Failed to initialize semaphore: %s",
795                        strerror(errno));
796                 exit(1);
797         }
798
799         rcode = pthread_mutex_init(&thread_pool.queue_mutex,NULL);
800         if (rcode != 0) {
801                 radlog(L_ERR, "FATAL: Failed to initialize queue mutex: %s",
802                        strerror(errno));
803                 exit(1);
804         }
805
806         /*
807          *      Allocate multiple fifos.
808          */
809         for (i = 0; i < NUM_FIFOS; i++) {
810                 thread_pool.fifo[i] = lrad_fifo_create(65536, NULL);
811                 if (!thread_pool.fifo[i]) {
812                         radlog(L_ERR, "FATAL: Failed to set up request fifo");
813                         exit(1);
814                 }
815         }
816
817 #ifdef HAVE_OPENSSL_CRYPTO_H
818         /*
819          *      If we're linking with OpenSSL too, then we need
820          *      to set up the mutexes and enable the thread callbacks.
821          */
822         if (!setup_ssl_mutexes()) {
823                 radlog(L_ERR, "FATAL: Failed to set up SSL mutexes");
824                 exit(1);
825         }
826 #endif
827
828
829         /*
830          *      Create a number of waiting threads.
831          *
832          *      If we fail while creating them, do something intelligent.
833          */
834         for (i = 0; i < thread_pool.start_threads; i++) {
835                 if (spawn_thread(now) == NULL) {
836                         return -1;
837                 }
838         }
839
840         DEBUG2("Thread pool initialized");
841         pool_initialized = TRUE;
842         return 0;
843 }
844
845
846 /*
847  *      Assign a new request to a free thread.
848  *
849  *      If there isn't a free thread, then try to create a new one,
850  *      up to the configured limits.
851  */
852 int thread_pool_addrequest(REQUEST *request, RAD_REQUEST_FUNP fun)
853 {
854         /*
855          *      We've been told not to spawn threads, so don't.
856          */
857         if (!thread_pool.spawn_flag) {
858                 request->finished = rad_respond(request, fun);
859                 return 1;
860         }
861
862         /*
863          *      Add the new request to the queue.
864          */
865         if (!request_enqueue(request, fun)) return 0;
866
867         /*
868          *      If the thread pool is busy handling requests, then
869          *      try to spawn another one.  We don't acquire the mutex
870          *      before reading active_threads, so our thread count is
871          *      just an estimate.  It's fine to go ahead and spawn an
872          *      extra thread in that case.
873          *      NOTE: the log message may be in error since active_threads
874          *      is an estimate, but it's only in error about the thread
875          *      count, not about the fact that we can't create a new one.
876          */
877         if (thread_pool.active_threads == thread_pool.total_threads) {
878                 if (spawn_thread(request->timestamp) == NULL) {
879                         radlog(L_INFO,
880                                "The maximum number of threads (%d) are active, cannot spawn new thread to handle request",
881                                thread_pool.max_threads);
882                         return 1;
883                 }
884         }
885
886         return 1;
887 }
888
889 /*
890  *      Check the min_spare_threads and max_spare_threads.
891  *
892  *      If there are too many or too few threads waiting, then we
893  *      either create some more, or delete some.
894  */
895 int thread_pool_clean(time_t now)
896 {
897         int spare;
898         int i, total;
899         THREAD_HANDLE *handle, *next;
900         int active_threads;
901         static time_t last_cleaned = 0;
902
903         /*
904          *      Loop over the thread pool deleting exited threads.
905          */
906         for (handle = thread_pool.head; handle; handle = next) {
907                 next = handle->next;
908
909                 /*
910                  *      Maybe we've asked the thread to exit, and it
911                  *      has agreed.
912                  */
913                 if (handle->status == THREAD_EXITED) {
914                         delete_thread(handle);
915                 }
916         }
917
918         /*
919          *      We don't need a mutex lock here, as we're reading
920          *      active_threads, and not modifying it.  We want a close
921          *      approximation of the number of active threads, and this
922          *      is good enough.
923          */
924         active_threads = thread_pool.active_threads;
925         spare = thread_pool.total_threads - active_threads;
926         if (debug_flag) {
927                 static int old_total = -1;
928                 static int old_active = -1;
929
930                 if ((old_total != thread_pool.total_threads) ||
931                                 (old_active != active_threads)) {
932                         DEBUG2("Threads: total/active/spare threads = %d/%d/%d",
933                                         thread_pool.total_threads, active_threads, spare);
934                         old_total = thread_pool.total_threads;
935                         old_active = active_threads;
936                 }
937         }
938
939         /*
940          *      If there are too few spare threads, create some more.
941          */
942         if (spare < thread_pool.min_spare_threads) {
943                 total = thread_pool.min_spare_threads - spare;
944
945                 DEBUG2("Threads: Spawning %d spares", total);
946                 /*
947                  *      Create a number of spare threads.
948                  */
949                 for (i = 0; i < total; i++) {
950                         handle = spawn_thread(now);
951                         if (handle == NULL) {
952                                 return -1;
953                         }
954                 }
955
956                 /*
957                  *      And exit, as there can't be too many spare threads.
958                  */
959                 return 0;
960         }
961
962         /*
963          *      Only delete spare threads if we haven't already done
964          *      so this second.
965          */
966         if (now == last_cleaned) {
967                 return 0;
968         }
969         last_cleaned = now;
970
971         /*
972          *      Only delete the spare threads if sufficient time has
973          *      passed since we last created one.  This helps to minimize
974          *      the amount of create/delete cycles.
975          */
976         if ((now - thread_pool.time_last_spawned) < thread_pool.cleanup_delay) {
977                 return 0;
978         }
979
980         /*
981          *      If there are too many spare threads, delete one.
982          *
983          *      Note that we only delete ONE at a time, instead of
984          *      wiping out many.  This allows the excess servers to
985          *      be slowly reaped, just in case the load spike comes again.
986          */
987         if (spare > thread_pool.max_spare_threads) {
988
989                 spare -= thread_pool.max_spare_threads;
990
991                 DEBUG2("Threads: deleting 1 spare out of %d spares", spare);
992
993                 /*
994                  *      Walk through the thread pool, deleting the
995                  *      first idle thread we come across.
996                  */
997                 for (handle = thread_pool.head; (handle != NULL) && (spare > 0) ; handle = next) {
998                         next = handle->next;
999
1000                         /*
1001                          *      If the thread is not handling a
1002                          *      request, but still live, then tell it
1003                          *      to exit.
1004                          *
1005                          *      It will eventually wake up, and realize
1006                          *      it's been told to commit suicide.
1007                          */
1008                         if ((handle->request == NULL) &&
1009                             (handle->status == THREAD_RUNNING)) {
1010                                 handle->status = THREAD_CANCELLED;
1011                                 /*
1012                                  *      Post an extra semaphore, as a
1013                                  *      signal to wake up, and exit.
1014                                  */
1015                                 sem_post(&thread_pool.semaphore);
1016                                 spare--;
1017                                 break;
1018                         }
1019                 }
1020         }
1021
1022         /*
1023          *      If the thread has handled too many requests, then make it
1024          *      exit.
1025          */
1026         if (thread_pool.max_requests_per_thread > 0) {
1027                 for (handle = thread_pool.head; handle; handle = next) {
1028                         next = handle->next;
1029
1030                         /*
1031                          *      Not handling a request, but otherwise
1032                          *      live, we can kill it.
1033                          */
1034                         if ((handle->request == NULL) &&
1035                             (handle->status == THREAD_RUNNING) &&
1036                             (handle->request_count > thread_pool.max_requests_per_thread)) {
1037                                 handle->status = THREAD_CANCELLED;
1038                                 sem_post(&thread_pool.semaphore);
1039                         }
1040                 }
1041         }
1042
1043         /*
1044          *      Otherwise everything's kosher.  There are not too few,
1045          *      or too many spare threads.  Exit happily.
1046          */
1047         return 0;
1048 }
1049
1050
1051 /*
1052  *      Thread wrapper for fork().
1053  */
1054 pid_t rad_fork(int exec_wait)
1055 {
1056         pid_t child_pid;
1057
1058         if (exec_wait) return fork();
1059
1060         reap_children();        /* be nice to non-wait thingies */
1061
1062         if (lrad_hash_table_num_elements(thread_pool.waiters) >= 1024) {
1063                 return -1;
1064         }
1065
1066         /*
1067          *      Fork & save the PID for later reaping.
1068          */
1069         child_pid = fork();
1070         if (child_pid > 0) {
1071                 int rcode;
1072                 thread_fork_t *tf;
1073
1074                 tf = rad_malloc(sizeof(*tf));
1075                 memset(tf, 0, sizeof(*tf));
1076                 
1077                 tf->pid = child_pid;
1078
1079                 /*
1080                  *      Lock the mutex.
1081                  */
1082                 pthread_mutex_lock(&thread_pool.wait_mutex);
1083
1084                 rcode = lrad_hash_table_insert(thread_pool.waiters, tf);
1085
1086                 /*
1087                  *      Unlock the mutex.
1088                  */
1089                 pthread_mutex_unlock(&thread_pool.wait_mutex);
1090
1091                 if (!rcode) {
1092                         radlog(L_ERR, "Failed to store PID, creating what will be a zombie process %d",
1093                                (int) child_pid);
1094                 }
1095         }
1096
1097         /*
1098          *      Return whatever we were told.
1099          */
1100         return child_pid;
1101 }
1102
1103 /*
1104  *      We may not need this any more...
1105  */
1106 pid_t rad_waitpid(pid_t pid, int *status, int options)
1107 {
1108         thread_fork_t mytf, *tf;
1109
1110         reap_children();        /* be nice to non-wait thingies */
1111
1112         if (pid <= 0) return -1;
1113
1114         if ((options & WNOHANG) == 0) return -1;
1115
1116         mytf.pid = pid;
1117
1118         pthread_mutex_lock(&thread_pool.wait_mutex);
1119         tf = lrad_hash_table_finddata(thread_pool.waiters, &mytf);
1120
1121         if (!tf) {              /* not found.  It's a problem... */
1122                 pthread_mutex_unlock(&thread_pool.wait_mutex);
1123                 return waitpid(pid, status, options);
1124         }
1125
1126         if (tf->exited) {
1127                 *status = tf->status;
1128                 lrad_hash_table_delete(thread_pool.waiters, &mytf);
1129                 pthread_mutex_unlock(&thread_pool.wait_mutex);
1130                 return pid;
1131         }
1132         
1133         /*
1134          *      Don't wait, and it hasn't exited.  Return.
1135          */
1136         pthread_mutex_unlock(&thread_pool.wait_mutex);
1137         return 0;
1138 }
1139
1140 #else /* HAVE_PTHREAD_H */
1141 /*
1142  *      "thread" code when we don't have threads.
1143  */
1144 int thread_pool_init(int spawn_flag)
1145 {
1146         return 0;
1147 }
1148
1149 /*
1150  *      call "radrespond".
1151  */
1152 int thread_pool_addrequest(REQUEST *request, RAD_REQUEST_FUNP fun)
1153 {
1154         rad_respond(request, fun);
1155         return 1;
1156 }
1157
1158 #endif /* HAVE_PTHREAD_H */