Be more forgiving when starting proxy sockets.
[freeradius.git] / src / main / event.c
index fffe2f5..844bdb6 100644 (file)
@@ -43,7 +43,6 @@ RCSID("$Id$")
 extern pid_t radius_pid;
 extern int dont_fork;
 extern int check_config;
-extern void force_log_reopen(void);
 extern char *debug_condition;
 
 /*
@@ -98,8 +97,11 @@ static fr_packet_list_t *proxy_list = NULL;
  *     We keep the proxy FD's here.  The RADIUS Id's are marked
  *     "allocated" per Id, via a bit per proxy FD.
  */
+static int             proxy_all_used = FALSE;
 static int             proxy_fds[32];
 static rad_listen_t    *proxy_listeners[32];
+static void check_for_zombie_home_server(REQUEST *request);
+static void remove_from_proxy_hash(REQUEST *request);
 #else
 #define remove_from_proxy_hash(foo)
 #endif
@@ -130,8 +132,8 @@ static void tv_add(struct timeval *tv, int usec_delay)
        tv->tv_usec += usec_delay;
 
        if (tv->tv_usec > USEC) {
-               tv->tv_usec -= USEC;
-               tv->tv_sec++;
+               tv->tv_sec += tv->tv_usec / USEC;
+               tv->tv_usec %= USEC;
        }
 }
 
@@ -146,6 +148,42 @@ static void remove_from_request_hash(REQUEST *request)
 }
 
 
+static void ev_request_free(REQUEST **prequest)
+{
+       REQUEST *request;
+       
+       if (!prequest || !*prequest) return;
+
+       request = *prequest;
+
+#ifdef WITH_COA
+       if (request->coa) {
+               /*
+                *      Divorce the child from the parent first,
+                *      then clean up the child.
+                */
+               request->coa->parent = NULL;
+               ev_request_free(&request->coa);
+       }
+
+       /*
+        *      Divorce the parent from the child, and leave the
+        *      parent still alive.
+        */
+       if (request->parent && (request->parent->coa == request)) {
+               request->parent->coa = NULL;
+       }
+#endif
+
+       if (request->ev) fr_event_delete(el, &request->ev);
+#ifdef WITH_PROXY
+       if (request->in_proxy_hash) remove_from_proxy_hash(request);
+#endif
+       if (request->in_request_hash) remove_from_request_hash(request);
+
+       request_free(prequest);
+}
+
 #ifdef WITH_PROXY
 static REQUEST *lookup_in_proxy_hash(RADIUS_PACKET *reply)
 {
@@ -199,9 +237,24 @@ static REQUEST *lookup_in_proxy_hash(RADIUS_PACKET *reply)
 
 static void remove_from_proxy_hash(REQUEST *request)
 {
+       /*
+        *      Check this without grabbing the mutex because it's a
+        *      lot faster that way.
+        */
        if (!request->in_proxy_hash) return;
 
+       /*
+        *      The "not in hash" flag is definitive.  However, if the
+        *      flag says that it IS in the hash, there might still be
+        *      a race condition where it isn't.
+        */
        PTHREAD_MUTEX_LOCK(&proxy_mutex);
+
+       if (!request->in_proxy_hash) {
+               PTHREAD_MUTEX_UNLOCK(&proxy_mutex);
+               return;
+       }
+
        fr_packet_list_yank(proxy_list, request->proxy);
        fr_packet_list_id_free(proxy_list, request->proxy);
 
@@ -216,47 +269,66 @@ static void remove_from_proxy_hash(REQUEST *request)
                request->home_server->currently_outstanding--;
        }
 
+       /*
+        *      Got from YES in hash, to NO, not in hash while we hold
+        *      the mutex.  This guarantees that when another thread
+        *      grans the mutex, the "not in hash" flag is correct.
+        */
+       request->in_proxy_hash = FALSE;
+
        PTHREAD_MUTEX_UNLOCK(&proxy_mutex);
+}
 
-       request->in_proxy_hash = FALSE;
+static int proxy_add_fds(rad_listen_t *proxy_listener)
+{
+       int i, proxy, found = -1;
+
+       proxy = proxy_listener->fd;
+       for (i = 0; i < 32; i++) {
+               /*
+                *      Found a free entry.  Save the socket,
+                *      and remember where we saved it.
+                */
+               if (proxy_fds[(proxy + i) & 0x1f] == -1) {
+                       found = (proxy + i) & 0x1f;
+                       proxy_fds[found] = proxy;
+                       proxy_listeners[found] = proxy_listener;
+                       break;
+               }
+       }
+
+       return found;
 }
 
 static int proxy_id_alloc(REQUEST *request, RADIUS_PACKET *packet)
 {
-       int i, proxy, found;
        rad_listen_t *proxy_listener;
 
        if (fr_packet_list_id_alloc(proxy_list, packet)) return 1;
 
+       if (proxy_all_used) return 0;
+
        /*
         *      Allocate a new proxy fd.  This function adds
         *      it to the tail of the list of listeners.  With
         *      some care, this can be thread-safe.
         */
-       proxy_listener = proxy_new_listener();
+       proxy_listener = proxy_new_listener(&packet->src_ipaddr, FALSE);
        if (!proxy_listener) {
-               RDEBUG2("ERROR: Failed to create a new socket for proxying requests.");
+               radlog(L_PROXY, "Failed to create a new socket for proxying requests.");
                return 0;
        }
        
        /*
         *      Cache it locally.
         */
-       found = -1;
-       proxy = proxy_listener->fd;
-       for (i = 0; i < 32; i++) {
-               /*
-                *      Found a free entry.  Save the socket,
-                *      and remember where we saved it.
-                */
-               if (proxy_fds[(proxy + i) & 0x1f] == -1) {
-                       found = (proxy + i) & 0x1f;
-                       proxy_fds[found] = proxy;
-                       proxy_listeners[found] = proxy_listener;
-                       break;
-               }
+       if (proxy_add_fds(proxy_listener) < 0) {
+               proxy_all_used = TRUE;
+               listen_free(&proxy_listener);
+               radlog(L_ERR, "Failed creating new proxy socket: server is too busy and home servers appear to be down");
+               return 0;
        }
-       rad_assert(found >= 0);
+
        
        if (!fr_packet_list_socket_add(proxy_list, proxy_listener->fd)) {
                        RDEBUG2("ERROR: Failed to create a new socket for proxying requests.");
@@ -295,7 +367,6 @@ static int insert_into_proxy_hash(REQUEST *request, int retransmit)
         */
        if (request->home_server) {
                request->home_server->currently_outstanding++;
-               request->home_server->stats.total_requests++;
        }
 
        if (retransmit) {
@@ -348,10 +419,12 @@ static int insert_into_proxy_hash(REQUEST *request, int retransmit)
        if (!fr_packet_list_insert(proxy_list, &request->proxy)) {
                fr_packet_list_id_free(proxy_list, request->proxy);
                PTHREAD_MUTEX_UNLOCK(&proxy_mutex);
-               RDEBUG2("ERROR: Failed to insert entry into proxy list");
+               RDEBUG2("ERROR: Failed to insert entry into proxy list.");
                return 0;
        }
 
+       request->in_proxy_hash = TRUE;
+
        PTHREAD_MUTEX_UNLOCK(&proxy_mutex);
 
        RDEBUG3(" proxy: allocating destination %s port %d - Id %d",
@@ -360,8 +433,6 @@ static int insert_into_proxy_hash(REQUEST *request, int retransmit)
               request->proxy->dst_port,
               request->proxy->id);
 
-       request->in_proxy_hash = TRUE;
-
        return 1;
 }
 
@@ -380,8 +451,9 @@ static void wait_for_proxy_id_to_expire(void *ctx)
        request->when = request->proxy_when;
 
 #ifdef WITH_COA
-       if ((request->proxy->code == PW_COA_REQUEST) ||
-           (request->proxy->code == PW_DISCONNECT_REQUEST)) {
+       if (((request->proxy->code == PW_COA_REQUEST) ||
+            (request->proxy->code == PW_DISCONNECT_REQUEST)) &&
+           (request->packet->code != request->proxy->code)) {
                request->when.tv_sec += request->home_server->coa_mrd;
        } else
 #endif
@@ -390,19 +462,16 @@ static void wait_for_proxy_id_to_expire(void *ctx)
        if ((request->num_proxied_requests == request->num_proxied_responses) ||
            timercmp(&now, &request->when, >)) {
                if (request->packet) {
-                       RDEBUG2("Cleaning up request %d ID %d with timestamp +%d",
+                       RDEBUG2("Cleaning up request %u ID %d with timestamp +%d",
                               request->number, request->packet->id,
                               (unsigned int) (request->timestamp - fr_start_time));
                } else {
-                       RDEBUG2("Cleaning up request %d with timestamp +%d",
+                       RDEBUG2("Cleaning up request %u with timestamp +%d",
                               request->number,
                               (unsigned int) (request->timestamp - fr_start_time));
                }
 
-               fr_event_delete(el, &request->ev);
-               remove_from_proxy_hash(request);
-               remove_from_request_hash(request);
-               request_free(&request);
+               ev_request_free(&request);
                return;
        }
 
@@ -416,20 +485,36 @@ static void wait_for_child_to_die(void *ctx)
        REQUEST *request = ctx;
 
        rad_assert(request->magic == REQUEST_MAGIC);
+       remove_from_request_hash(request);
 
-       if ((request->child_state == REQUEST_QUEUED) |
-           (request->child_state == REQUEST_RUNNING)) {
-               request->delay += (request->delay >> 1);
-               tv_add(&request->when, request->delay);
+       /*
+        *      If it's still queued (waiting for a thread to pick it
+        *      up) OR, it's running AND there's still a child thread
+        *      handling it, THEN delay some more.
+        */
+       if ((request->child_state == REQUEST_QUEUED) ||
+           ((request->child_state == REQUEST_RUNNING) &&
+            (pthread_equal(request->child_pid, NO_SUCH_CHILD_PID) == 0))) {
 
-               RDEBUG2("Child is still stuck for request %d", request->number);
+               /*
+                *      Cap delay at max_request_time
+                */
+               if (request->delay < (USEC * request->root->max_request_time)) {
+                       request->delay += (request->delay >> 1);
+                       radlog(L_INFO, "WARNING: Child is hung for request %u in component %s module %s.",
+                              request->number, request->component, request->module);
+               } else {
+                       request->delay = USEC * request->root->max_request_time;
+                       RDEBUG2("WARNING: Child is hung after \"max_request_time\" for request %u",
+                               request->number);
+               }
+               tv_add(&request->when, request->delay);
 
                INSERT_EVENT(wait_for_child_to_die, request);
                return;
        }
 
-       RDEBUG2("Child is finally responsive for request %d", request->number);
-       remove_from_request_hash(request);
+       RDEBUG2("Child is finally responsive for request %u", request->number);
 
 #ifdef WITH_PROXY
        if (request->proxy) {
@@ -438,7 +523,7 @@ static void wait_for_child_to_die(void *ctx)
        }
 #endif
 
-       request_free(&request);
+       ev_request_free(&request);
 }
 #endif
 
@@ -459,76 +544,15 @@ static void cleanup_delay(void *ctx)
        }
 #endif
 
-       RDEBUG2("Cleaning up request %d ID %d with timestamp +%d",
+       RDEBUG2("Cleaning up request %u ID %d with timestamp +%d",
               request->number, request->packet->id,
               (unsigned int) (request->timestamp - fr_start_time));
 
-       fr_event_delete(el, &request->ev);
-       request_free(&request);
+       ev_request_free(&request);
 }
 
 
 /*
- *     FIXME: Put into a libradius function.
- */
-#define MAX_PACKET_CODE (52)
-static const char *packet_codes[] = {
-  "",
-  "Access-Request",
-  "Access-Accept",
-  "Access-Reject",
-  "Accounting-Request",
-  "Accounting-Response",
-  "Accounting-Status",
-  "Password-Request",
-  "Password-Accept",
-  "Password-Reject",
-  "Accounting-Message",
-  "Access-Challenge",
-  "Status-Server",
-  "Status-Client",
-  "14",
-  "15",
-  "16",
-  "17",
-  "18",
-  "19",
-  "20",
-  "Resource-Free-Request",
-  "Resource-Free-Response",
-  "Resource-Query-Request",
-  "Resource-Query-Response",
-  "Alternate-Resource-Reclaim-Request",
-  "NAS-Reboot-Request",
-  "NAS-Reboot-Response",
-  "28",
-  "Next-Passcode",
-  "New-Pin",
-  "Terminate-Session",
-  "Password-Expired",
-  "Event-Request",
-  "Event-Response",
-  "35",
-  "36",
-  "37",
-  "38",
-  "39",
-  "Disconnect-Request",
-  "Disconnect-ACK",
-  "Disconnect-NAK",
-  "CoA-Request",
-  "CoA-ACK",
-  "CoA-NAK",
-  "46",
-  "47",
-  "48",
-  "49",
-  "IP-Address-Allocate",
-  "IP-Address-Release"
-};
-
-
-/*
  *     In daemon mode, AND this request has debug flags set.
  */
 #define DEBUG_PACKET if (!debug_flag && request->options && request->radlog) debug_packet
@@ -543,6 +567,8 @@ static void debug_packet(REQUEST *request, RADIUS_PACKET *packet, int direction)
 
        if (!packet) return;
 
+       rad_assert(request->radlog != NULL);
+
        if (direction == 0) {
                received = "Received";
                from = "from";  /* what else? */
@@ -562,9 +588,9 @@ static void debug_packet(REQUEST *request, RADIUS_PACKET *packet, int direction)
         *
         *      This really belongs in a utility library
         */
-       if ((packet->code > 0) && (packet->code < MAX_PACKET_CODE)) {
+       if ((packet->code > 0) && (packet->code < FR_MAX_PACKET_CODE)) {
                RDEBUG("%s %s packet %s host %s port %d, id=%d, length=%d",
-                      received, packet_codes[packet->code], from,
+                      received, fr_packet_codes[packet->code], from,
                       inet_ntop(ip->af, &ip->ipaddr, buffer, sizeof(buffer)),
                       port, packet->id, packet->data_len);
        } else {
@@ -588,7 +614,7 @@ static void reject_delay(void *ctx)
        rad_assert(request->magic == REQUEST_MAGIC);
        rad_assert(request->child_state == REQUEST_REJECT_DELAY);
 
-       RDEBUG2("Sending delayed reject for request %d", request->number);
+       RDEBUG2("Sending delayed reject for request %u", request->number);
 
        DEBUG_PACKET(request, request->reply, 1);
 
@@ -616,7 +642,7 @@ void revive_home_server(void *ctx)
         */
        if (home->ev) fr_event_delete(el, &home->ev);
 
-       radlog(L_INFO, "PROXY: Marking home server %s port %d alive again... we have no idea if it really is alive or not.",
+       radlog(L_PROXY, "Marking home server %s port %d alive again... we have no idea if it really is alive or not.",
               inet_ntop(home->ipaddr.af, &home->ipaddr.ipaddr,
                         buffer, sizeof(buffer)),
               home->port);
@@ -635,13 +661,15 @@ static void no_response_to_ping(void *ctx)
        home = request->home_server;
        home->num_received_pings = 0;
 
-       RDEBUG2("No response to status check %d from home server %s port %d",
+       radlog(L_ERR, "No response to status check %d for home server %s port %d",
               request->number,
               inet_ntop(request->proxy->dst_ipaddr.af,
                         &request->proxy->dst_ipaddr.ipaddr,
                         buffer, sizeof(buffer)),
               request->proxy->dst_port);
 
+       check_for_zombie_home_server(request);
+
        wait_for_proxy_id_to_expire(request);
 }
 
@@ -656,7 +684,7 @@ static void received_response_to_ping(REQUEST *request)
        home = request->home_server;
        home->num_received_pings++;
 
-       RDEBUG2("Received response to status check %d (%d in current sequence)",
+       radlog(L_PROXY, "Received response to status check %d (%d in current sequence)",
               request->number, home->num_received_pings);
 
        /*
@@ -665,7 +693,6 @@ static void received_response_to_ping(REQUEST *request)
        fr_event_delete(el, &request->ev);
        remove_from_proxy_hash(request);
        rad_assert(request->in_request_hash == FALSE);
-       request_free(&request);
 
        /*
         *      The control socket may have marked the home server as
@@ -691,7 +718,7 @@ static void received_response_to_ping(REQUEST *request)
                RDEBUG2("Hmm... no event for home server.  Oh well.");
        }
 
-       radlog(L_INFO, "PROXY: Marking home server %s port %d alive",
+       radlog(L_PROXY, "Marking home server %s port %d alive",
               inet_ntop(request->proxy->dst_ipaddr.af,
                         &request->proxy->dst_ipaddr.ipaddr,
                         buffer, sizeof(buffer)),
@@ -710,8 +737,9 @@ static void ping_home_server(void *ctx)
        REQUEST *request;
        VALUE_PAIR *vp;
 
-       if (home->state == HOME_STATE_ALIVE) {
-               radlog(L_INFO, "Suspicious proxy state... continuing");
+       if ((home->state == HOME_STATE_ALIVE) ||
+           (home->ping_check == HOME_PING_CHECK_NONE) ||
+           (home->ev != NULL)) {
                return;
        }
 
@@ -771,9 +799,9 @@ static void ping_home_server(void *ctx)
        rad_assert(request->proxy_listener == NULL);
 
        if (!insert_into_proxy_hash(request, FALSE)) {
-               RDEBUG2("ERROR: Failed inserting status check %d into proxy hash.  Discarding it.",
+               RDEBUG2("ERROR: Failed to insert status check %d into proxy list.  Discarding it.",
                       request->number);
-               request_free(&request);
+               ev_request_free(&request);
                return;
        }
        rad_assert(request->proxy_listener != NULL);
@@ -782,6 +810,8 @@ static void ping_home_server(void *ctx)
 
        request->next_callback = NULL;
        request->child_state = REQUEST_PROXIED;
+       gettimeofday(&request->when, NULL);
+       home->when = request->when;
        request->when.tv_sec += home->ping_timeout;;
 
        INSERT_EVENT(no_response_to_ping, request);
@@ -807,7 +837,7 @@ void mark_home_server_dead(home_server *home, struct timeval *when)
        int previous_state = home->state;
        char buffer[128];
 
-       radlog(L_INFO, "PROXY: Marking home server %s port %d as dead.",
+       radlog(L_PROXY, "Marking home server %s port %d as dead.",
               inet_ntop(home->ipaddr.af, &home->ipaddr.ipaddr,
                         buffer, sizeof(buffer)),
               home->port);
@@ -906,6 +936,8 @@ static int setup_post_proxy_fail(REQUEST *request)
        DICT_VALUE *dval = NULL;
        VALUE_PAIR *vp;
 
+       request->child_state = REQUEST_RUNNING;
+
        if (request->packet->code == PW_AUTHENTICATION_REQUEST) {
                dval = dict_valbyname(PW_POST_PROXY_TYPE, "Fail-Authentication");
 
@@ -971,7 +1003,6 @@ static void post_proxy_fail_handler(REQUEST *request)
         *      to do next.
         */
        if (!setup_post_proxy_fail(request)) {
-               request->child_state = REQUEST_RUNNING;
                request_post_handler(request);
 
        } else {
@@ -1030,45 +1061,82 @@ static void no_response_to_proxied_request(void *ctx)
                return;
        }
 
-       radlog(L_ERR, "Rejecting request %d due to lack of any response from home server %s port %d",
-              request->number,
-              inet_ntop(request->proxy->dst_ipaddr.af,
-                        &request->proxy->dst_ipaddr.ipaddr,
-                        buffer, sizeof(buffer)),
-              request->proxy->dst_port);
-
        check_for_zombie_home_server(request);
 
        home = request->home_server;
 
-       post_proxy_fail_handler(request);
-
        /*
-        *      Don't touch request due to race conditions
+        *      The default as of 2.1.7 is to allow requests to
+        *      fail-over to a backup home server when this one does
+        *      not respond.  The old behavior can be configured as
+        *      well.
         */
-       if (home->state == HOME_STATE_IS_DEAD) {
-               rad_assert(home->ev != NULL); /* or it will never wake up */
-               return;
+       if (home->no_response_fail) {
+               radlog(L_ERR, "Rejecting request %u (proxy Id %d) due to lack of any response from home server %s port %d",
+                      request->number, request->proxy->id,
+                      inet_ntop(request->proxy->dst_ipaddr.af,
+                                &request->proxy->dst_ipaddr.ipaddr,
+                                buffer, sizeof(buffer)),
+                      request->proxy->dst_port);
+
+               post_proxy_fail_handler(request);
+       } else {
+               rad_assert(request->ev == NULL);
+               request->child_state = REQUEST_RUNNING;
+               wait_a_bit(request);
        }
 
        /*
-        *      Enable the zombie period when we notice that the home
-        *      server hasn't responded.  We do NOT back-date the start
-        *      of the zombie period.
+        *      Don't touch request due to race conditions
         */
-       if (home->state == HOME_STATE_ALIVE) {
-               radlog(L_ERR, "PROXY: Marking home server %s port %d as zombie (it looks like it is dead).",
-                      inet_ntop(home->ipaddr.af, &home->ipaddr.ipaddr,
-                                buffer, sizeof(buffer)),
-                      home->port);
-               home->state = HOME_STATE_ZOMBIE;
-               home->zombie_period_start = now;
 
+       /*
+        *      If it's not alive, don't try to make it a zombie.
+        */
+       if (home->state != HOME_STATE_ALIVE) {
                /*
-                *      Start pinging the home server.
+                *      Don't check home->ev due to race conditions.
                 */
-               ping_home_server(home);
+               return;
+       }
+
+       /*
+        *      We've received a real packet recently.  Don't mark the
+        *      server as zombie until we've received NO packets for a
+        *      while.  The "1/4" of zombie period was chosen rather
+        *      arbitrarily.  It's a balance between too short, which
+        *      gives quick fail-over and fail-back, or too long,
+        *      where the proxy still sends packets to an unresponsive
+        *      home server.
+        */
+       if ((home->last_packet + ((home->zombie_period + 3) / 4)) >= now.tv_sec) {
+               return;
        }
+
+       /*
+        *      Enable the zombie period when we notice that the home
+        *      server hasn't responded for a while.  We back-date the
+        *      zombie period to when we last received a response from
+        *      the home server.
+        */
+       home->state = HOME_STATE_ZOMBIE;
+       
+       home->zombie_period_start.tv_sec = home->last_packet;
+       home->zombie_period_start.tv_sec = USEC / 2;
+       
+       fr_event_delete(el, &home->ev);
+       home->currently_outstanding = 0;
+       home->num_received_pings = 0;
+       
+       radlog(L_PROXY, "Marking home server %s port %d as zombie (it looks like it is dead).",
+              inet_ntop(home->ipaddr.af, &home->ipaddr.ipaddr,
+                        buffer, sizeof(buffer)),
+              home->port);
+       
+       /*
+        *      Start pinging the home server.
+        */
+       ping_home_server(home);
 }
 #endif
 
@@ -1100,6 +1168,25 @@ static void wait_a_bit(void *ctx)
        switch (request->child_state) {
        case REQUEST_QUEUED:
        case REQUEST_RUNNING:
+               /*
+                *      If we're not thread-capable, OR we're capable,
+                *      but have been told to run without threads,
+                *      complain when the requests is queued for a
+                *      thread, or running in a child thread.
+                */
+#ifdef HAVE_PTHREAD_H
+               if (!have_children)
+#endif
+               {
+                       rad_assert("We do not have threads, but the request is marked as queued or running in a child thread" == NULL);
+                       break;
+               }
+
+#ifdef HAVE_PTHREAD_H
+               /*
+                *      If we have threads, wait for the child thread
+                *      to stop.
+                */
                when = request->received;
                when.tv_sec += request->root->max_request_time;
 
@@ -1116,24 +1203,18 @@ static void wait_a_bit(void *ctx)
                 *      Request still has more time.  Continue
                 *      waiting.
                 */
-               if (timercmp(&now, &when, <) ||
-                   ((request->listener->type == RAD_LISTEN_DETAIL) &&
-                    (request->child_state == REQUEST_QUEUED))) {
+               if (timercmp(&now, &when, <)) {
                        if (request->delay < (USEC / 10)) {
                                request->delay = USEC / 10;
                        }
                        request->delay += request->delay >> 1;
 
-#ifdef WITH_DETAIL
                        /*
-                        *      Cap wait at some sane value for detail
-                        *      files.
+                        *      Cap delays at something reasonable.
                         */
-                       if ((request->listener->type == RAD_LISTEN_DETAIL) &&
-                           (request->delay > (request->root->max_request_time * USEC))) {
+                       if (request->delay > (request->root->max_request_time * USEC)) {
                                request->delay = request->root->max_request_time * USEC;
                        }
-#endif
 
                        request->when = now;
                        tv_add(&request->when, request->delay);
@@ -1141,51 +1222,29 @@ static void wait_a_bit(void *ctx)
                        break;
                }
 
-#if defined(HAVE_PTHREAD_H)
+               request->master_state = REQUEST_STOP_PROCESSING;
+
                /*
                 *      A child thread MAY still be running on the
                 *      request.  Ask the thread to stop working on
                 *      the request.
                 */
-               if (have_children) {
-                       /* FIXME: kill unresponsive children? */
-
-                       /*
-                        *      Print this error message ONLY if
-                        *      there's a child currently processing
-                        *      the request.  As we don't have thread
-                        *      locks here, there may be race
-                        *      conditions on this check.  But it's
-                        *      just an error message, so that's OK.
-                        */
-                       if (!pthread_equal(request->child_pid, NO_SUCH_CHILD_PID)) {
-                               radlog(L_ERR, "WARNING: Unresponsive child for request %d, in module %s component %s",
-                                      request->number,
-                                      request->module ? request->module : "<server core>",
-                                      request->component ? request->component : "<server core>");
-                       }
+               if (have_children &&
+                   (pthread_equal(request->child_pid, NO_SUCH_CHILD_PID) == 0)) {
+                       radlog(L_ERR, "WARNING: Unresponsive child for request %u, in component %s module %s",
+                              request->number,
+                              request->component ? request->component : "<server core>",
+                              request->module ? request->module : "<server core>");
 
-                       request->master_state = REQUEST_STOP_PROCESSING;
-                       
-                       request->delay = USEC / 4;
-                       tv_add(&request->when, request->delay);
-                       callback = wait_for_child_to_die;
-                       break;
                }
+                       
+               request->delay = USEC;
+               tv_add(&request->when, request->delay);
+               callback = wait_for_child_to_die;
+               break;
 #endif
 
                /*
-                *      Else there are no child threads.  We probably
-                *      should have just marked the request as 'done'
-                *      elsewhere, like in the post-proxy-fail
-                *      handler.  But doing that would involve
-                *      checking for max_request_time in multiple
-                *      places, so this may be simplest.
-                */
-               request->child_state = REQUEST_DONE;
-               /* FALL-THROUGH */
-
-               /*
                 *      Mark the request as no longer running,
                 *      and clean it up.
                 */
@@ -1194,7 +1253,7 @@ static void wait_a_bit(void *ctx)
                request->child_pid = NO_SUCH_CHILD_PID;
 #endif
 
-#ifdef WTH_COA
+#ifdef WITH_COA
                /*
                 *      This is a CoA request.  It's been divorced
                 *      from everything else, so we clean it up now.
@@ -1207,9 +1266,7 @@ static void wait_a_bit(void *ctx)
                        /*
                         *      FIXME: Do CoA MIBs
                         */
-                       fr_event_delete(el, &request->ev);
-                       remove_from_proxy_hash(request);
-                       request_free(&request);
+                       ev_request_free(&request);
                        return;
                }
 #endif
@@ -1249,11 +1306,8 @@ static void wait_a_bit(void *ctx)
         *      mode, with no threads...
         */
        if (!callback) {
-               RDEBUG("WARNING: Internal sanity check failed in event handler for request %d: Discarding the request!", request->number);
-               fr_event_delete(el, &request->ev);
-               remove_from_proxy_hash(request);
-               remove_from_request_hash(request);
-               request_free(&request);
+               RDEBUG("WARNING: Internal sanity check failed in event handler for request %u: Discarding the request!", request->number);
+               ev_request_free(&request);
                return;
        }
 
@@ -1381,7 +1435,7 @@ static void retransmit_coa_request(void *ctx)
        
        if (update_event_timestamp(request->proxy, now.tv_sec)) {
                if (!insert_into_proxy_hash(request, TRUE)) {
-                       DEBUG("ERROR: Failed re-inserting CoA request into proxy hash.");
+                       DEBUG("ERROR: Failed to insert retransmission of CoA request into proxy list.");
                        return;
                }
 
@@ -1412,18 +1466,24 @@ static int originated_coa_request(REQUEST *request)
        rad_assert(!request->in_proxy_hash);
        rad_assert(request->proxy_reply == NULL);
 
+       /*
+        *      Check whether we want to originate one, or cancel one.
+        */
        vp = pairfind(request->config_items, PW_SEND_COA_REQUEST);
-       if (!vp && request->coa) vp = pairfind(request->coa->proxy->vps, PW_SEND_COA_REQUEST);
+       if (!vp && request->coa) {
+               vp = pairfind(request->coa->proxy->vps, PW_SEND_COA_REQUEST);
+       }
+
        if (vp) {
                if (vp->vp_integer == 0) {
-                       request_free(&request->coa);
+                       ev_request_free(&request->coa);
                        return 1;       /* success */
                }
-
-               if (!request->coa) request_alloc_coa(request);
-               if (!request->coa) return 0;
        }
 
+       if (!request->coa) request_alloc_coa(request);
+       if (!request->coa) return 0;
+
        coa = request->coa;
 
        /*
@@ -1448,7 +1508,7 @@ static int originated_coa_request(REQUEST *request)
                        RDEBUG2("WARNING: No such home_server_pool %s",
                               vp->vp_strvalue);
        fail:
-                       request_free(&request->coa);
+                       ev_request_free(&request->coa);
                        return 0;
                }
 
@@ -1569,11 +1629,19 @@ static int originated_coa_request(REQUEST *request)
        coa->proxy->dst_port = coa->home_server->port;
 
        if (!insert_into_proxy_hash(coa, FALSE)) {
-               DEBUG("ERROR: Failed inserting CoA request into proxy hash.");
+               DEBUG("ERROR: Failed to insert CoA request into proxy list.");
                goto fail;
        }
 
        /*
+        *      We CANNOT divorce the CoA request from the parent
+        *      request.  This function is running in a child thread,
+        *      and we need access to the main event loop in order to
+        *      to add the timers for the CoA packet.  See
+        *      wait_a_bit().
+        */
+
+       /*
         *      Forget about the original request completely at this
         *      point.
         */
@@ -1612,7 +1680,9 @@ static int originated_coa_request(REQUEST *request)
         */
        request->num_proxied_requests = 1;
        request->num_proxied_responses = 0;
+#ifdef HAVE_PTHREAD_H
        request->child_pid = NO_SUCH_CHILD_PID;
+#endif
 
        update_event_timestamp(request->proxy, request->proxy_when.tv_sec);
 
@@ -1769,7 +1839,8 @@ static int request_pre_handler(REQUEST *request)
        }
 
        if (rcode < 0) {
-               radlog(L_ERR, "%s Dropping packet without response.", fr_strerror());
+               RDEBUG("%s Dropping packet without response.", fr_strerror());
+               request->reply->offset = -2; /* bad authenticator */
                request->child_state = REQUEST_DONE;
                return 0;
        }
@@ -1782,8 +1853,8 @@ static int request_pre_handler(REQUEST *request)
 #ifdef WITH_PROXY
        if (request->proxy) {
                return process_proxy_reply(request);
-#endif
        }
+#endif
 
        return 1;
 }
@@ -1798,13 +1869,20 @@ static int proxy_request(REQUEST *request)
        struct timeval when;
        char buffer[128];
 
+#ifdef WITH_COA
+       if (request->coa) {
+               RDEBUG("WARNING: Cannot proxy and originate CoA packets at the same time.  Cancelling CoA request");
+               ev_request_free(&request->coa);
+       }
+#endif
+
        if (request->home_server->server) {
-               RDEBUG("ERROR: Cannot perform real proxying to a virtual server.");
+               RDEBUG("ERROR: Cannot proxy to a virtual server.");
                return 0;
        }
 
        if (!insert_into_proxy_hash(request, FALSE)) {
-               RDEBUG("ERROR: Failed inserting request into proxy hash.");
+               RDEBUG("ERROR: Failed to insert entry into proxy list.");
                return 0;
        }
 
@@ -1825,7 +1903,7 @@ static int proxy_request(REQUEST *request)
        }
        request->next_callback = no_response_to_proxied_request;
 
-       RDEBUG2("Proxying request %d to home server %s port %d",
+       RDEBUG2("Proxying request %u to home server %s port %d",
               request->number,
               inet_ntop(request->proxy->dst_ipaddr.af,
                         &request->proxy->dst_ipaddr.ipaddr,
@@ -1888,21 +1966,42 @@ static int proxy_to_virtual_server(REQUEST *request)
 
        } else {
                RDEBUG2("Unknown packet type %d", request->proxy->code);
-               request_free(&fake);
+               ev_request_free(&fake);
                return 0;
        }
 
        RDEBUG2(">>> Sending proxied request internally to virtual server.");
        radius_handle_request(fake, fun);
-       RDEBUG2("<<< Received proxied response from internal virtual server.");
+       RDEBUG2("<<< Received proxied response code %d from internal virtual server.", fake->reply->code);
 
-       request->proxy_reply = fake->reply;
-       fake->reply = NULL;
+       if (fake->reply->code != 0) {
+               request->proxy_reply = fake->reply;
+               fake->reply = NULL;
+       } else {
+               /*
+                *      There was no response
+                */
+               setup_post_proxy_fail(request);
+       }
 
-       request_free(&fake);
+       ev_request_free(&fake);
 
        process_proxy_reply(request);
-       fun(request);
+
+       /*
+        *      Process it through the normal section again, but ONLY
+        *      if we received a proxy reply..
+        */
+       if (request->proxy_reply) {
+               if (request->server) RDEBUG("server %s {",
+                                           request->server != NULL ?
+                                           request->server : ""); 
+               fun(request);
+               
+               if (request->server) RDEBUG("} # server %s",
+                                           request->server != NULL ?
+                                           request->server : "");
+       }
 
        return 2;               /* success, but NOT '1' !*/
 }
@@ -1929,13 +2028,50 @@ static int successfully_proxied_request(REQUEST *request)
         *
         *      FIXME: This should really be a serious error.
         */
-       if (request->in_proxy_hash) {
+       if (request->in_proxy_hash ||
+           (request->proxy_reply && (request->proxy_reply->code != 0))) {
                return 0;
        }
 
        realmpair = pairfind(request->config_items, PW_PROXY_TO_REALM);
        if (!realmpair || (realmpair->length == 0)) {
-               return 0;
+               int pool_type;
+
+               vp = pairfind(request->config_items, PW_HOME_SERVER_POOL);
+               if (!vp) return 0;
+
+               switch (request->packet->code) {
+               case PW_AUTHENTICATION_REQUEST:
+                       pool_type = HOME_TYPE_AUTH;
+                       break;
+
+#ifdef WITH_ACCOUNTING
+               case PW_ACCOUNTING_REQUEST:
+                       pool_type = HOME_TYPE_ACCT;
+                       break;
+#endif
+
+#ifdef WITH_COA
+               case PW_COA_REQUEST:
+               case PW_DISCONNECT_REQUEST:
+                       pool_type = HOME_TYPE_COA;
+                       break;
+#endif
+
+               default:
+                       return 0;
+               }
+
+               pool = home_pool_byname(vp->vp_strvalue, pool_type);
+               if (!pool) {
+                       RDEBUG2("ERROR: Cannot proxy to unknown pool %s",
+                               vp->vp_strvalue);
+                       return 0;
+               }
+
+               realmname = NULL; /* no realms */
+               realm = NULL;
+               goto found_pool;
        }
 
        realmname = (char *) realmpair->vp_strvalue;
@@ -1957,6 +2093,12 @@ static int successfully_proxied_request(REQUEST *request)
                pool = realm->acct_pool;
 #endif
 
+#ifdef WITH_COA
+       } else if ((request->packet->code == PW_COA_REQUEST) ||
+                  (request->packet->code == PW_DISCONNECT_REQUEST)) {
+               pool = realm->acct_pool;
+#endif
+
        } else {
                rad_panic("Internal sanity check failed");
        }
@@ -1967,6 +2109,7 @@ static int successfully_proxied_request(REQUEST *request)
                return 0;
        }
 
+found_pool:
        home = home_server_ldb(realmname, pool, request);
        if (!home) {
                RDEBUG2("ERROR: Failed to find live home server for realm %s",
@@ -1975,11 +2118,18 @@ static int successfully_proxied_request(REQUEST *request)
        }
        request->home_pool = pool;
 
+#ifdef WITH_COA
+       /*
+        *      Once we've decided to proxy a request, we cannot send
+        *      a CoA packet.  So we free up any CoA packet here.
+        */
+       ev_request_free(&request->coa);
+#endif
        /*
         *      Remember that we sent the request to a Realm.
         */
-       pairadd(&request->packet->vps,
-               pairmake("Realm", realmname, T_OP_EQ));
+       if (realmname) pairadd(&request->packet->vps,
+                              pairmake("Realm", realmname, T_OP_EQ));
 
        /*
         *      Strip the name, if told to.
@@ -1987,7 +2137,7 @@ static int successfully_proxied_request(REQUEST *request)
         *      Doing it here catches the case of proxied tunneled
         *      requests.
         */
-       if (realm->striprealm == TRUE &&
+       if (realm && (realm->striprealm == TRUE) &&
           (strippedname = pairfind(request->proxy->vps, PW_STRIPPED_USER_NAME)) != NULL) {
                /*
                 *      If there's a Stripped-User-Name attribute in
@@ -2026,7 +2176,8 @@ static int successfully_proxied_request(REQUEST *request)
         *      since we can't use the request authenticator
         *      anymore - we changed it.
         */
-       if (pairfind(request->proxy->vps, PW_CHAP_PASSWORD) &&
+       if ((request->packet->code == PW_AUTHENTICATION_REQUEST) &&
+           pairfind(request->proxy->vps, PW_CHAP_PASSWORD) &&
            pairfind(request->proxy->vps, PW_CHAP_CHALLENGE) == NULL) {
                vp = radius_paircreate(request, &request->proxy->vps,
                                       PW_CHAP_CHALLENGE, PW_TYPE_OCTETS);
@@ -2109,7 +2260,7 @@ static int successfully_proxied_request(REQUEST *request)
        }
 
        if (!proxy_request(request)) {
-               RDEBUG("ERROR: Failed to proxy request %d", request->number);
+               RDEBUG("ERROR: Failed to proxy request %u", request->number);
                return -1;
        }
        
@@ -2126,7 +2277,7 @@ static void request_post_handler(REQUEST *request)
        if ((request->master_state == REQUEST_STOP_PROCESSING) ||
            (request->parent &&
             (request->parent->master_state == REQUEST_STOP_PROCESSING))) {
-               RDEBUG2("Request %d was cancelled.", request->number);
+               RDEBUG2("request %u was cancelled.", request->number);
 #ifdef HAVE_PTHREAD_H
                request->child_pid = NO_SUCH_CHILD_PID;
 #endif
@@ -2153,7 +2304,11 @@ static void request_post_handler(REQUEST *request)
        }
 #endif
 
-       if ((request->reply->code == 0) &&
+       /*
+        *      Catch Auth-Type := Reject BEFORE proxying the packet.
+        */
+       if ((request->packet->code == PW_AUTHENTICATION_REQUEST) &&
+           (request->reply->code == 0) &&
            ((vp = pairfind(request->config_items, PW_AUTH_TYPE)) != NULL) &&
            (vp->vp_integer == PW_AUTHTYPE_REJECT)) {
                request->reply->code = PW_AUTHENTICATION_REJECT;
@@ -2167,15 +2322,7 @@ static void request_post_handler(REQUEST *request)
            (request->packet->code != PW_STATUS_SERVER)) {
                int rcode = successfully_proxied_request(request);
 
-#ifdef WITH_COA
-               /*
-                *      If we proxy it, we CANNOT originate a CoA
-                *      request at the same time.
-                */
-               if (rcode != 0) request_free(&request->coa);
-#endif
-
-               if (rcode == 1) return;
+               if (rcode == 1) return; /* request is invalid */
 
                /*
                 *      Failed proxying it (dead home servers, etc.)
@@ -2197,6 +2344,13 @@ static void request_post_handler(REQUEST *request)
                 *      OR we proxied it internally to a virutal server.
                 */
        }
+
+#ifdef WITH_COA
+       else if (request->proxy && request->coa) {
+               RDEBUG("WARNING: Cannot proxy and originate CoA packets at the same time.  Cancelling CoA request");
+               ev_request_free(&request->coa);
+       }
+#endif
 #endif
 
        /*
@@ -2235,13 +2389,23 @@ static void request_post_handler(REQUEST *request)
                        vp = pairfind(request->config_items,
                                      PW_RESPONSE_PACKET_TYPE);
                        if (!vp) {
-                               RDEBUG2("There was no response configured: rejecting request %d",
+                               RDEBUG2("There was no response configured: rejecting request %u",
                                       request->number);
                                request->reply->code = PW_AUTHENTICATION_REJECT;
+
                        } else if (vp->vp_integer == 256) {
-                               RDEBUG2("Not responding to request %d",
+                               RDEBUG2("Not responding to request %u",
                                       request->number);
 
+                               /*
+                                *      Force cleanup after a long
+                                *      time, so that we don't
+                                *      re-process the packet.
+                                */
+                               request->next_when.tv_sec += request->root->max_request_time;
+                               request->next_callback = cleanup_delay;
+                               child_state = REQUEST_CLEANUP_DELAY;
+                               break;
                        } else {
                                request->reply->code = vp->vp_integer;
 
@@ -2270,7 +2434,7 @@ static void request_post_handler(REQUEST *request)
                        when.tv_sec += request->root->reject_delay;
 
                        if (timercmp(&when, &request->next_when, >)) {
-                               RDEBUG2("Delaying reject of request %d for %d seconds",
+                               RDEBUG2("Delaying reject of request %u for %d seconds",
                                       request->number,
                                       request->root->reject_delay);
                                request->next_when = when;
@@ -2283,6 +2447,10 @@ static void request_post_handler(REQUEST *request)
                        }
                }
 
+#ifdef WITH_COA
+       case PW_COA_REQUEST:
+       case PW_DISCONNECT_REQUEST:
+#endif
                request->next_when.tv_sec += request->root->cleanup_delay;
                request->next_callback = cleanup_delay;
                child_state = REQUEST_CLEANUP_DELAY;
@@ -2303,14 +2471,9 @@ static void request_post_handler(REQUEST *request)
                break;
 
        default:
-               if ((request->packet->code > 1024) &&
-                   (request->packet->code < (1024 + 254 + 1))) {
-                       request->next_callback = NULL;
-                       child_state = REQUEST_DONE;
-                       break;
-               }
-
-               radlog(L_ERR, "Unknown packet type %d", request->packet->code);
+               /*
+                *      DHCP, VMPS, etc.
+                */
                request->next_callback = NULL;
                child_state = REQUEST_DONE;
                break;
@@ -2332,10 +2495,14 @@ static void request_post_handler(REQUEST *request)
 #ifdef WITH_COA
        /*
         *      Now that we've completely processed the request,
-        *      see if we need to originate a CoA request.
-        */
-       if (request->coa ||
-           (pairfind(request->config_items, PW_SEND_COA_REQUEST) != NULL)) {
+        *      see if we need to originate a CoA request.  But ONLY
+        *      if it wasn't proxied.
+        */
+       if (!request->proxy &&
+           (request->packet->code != PW_COA_REQUEST) &&
+           (request->packet->code != PW_DISCONNECT_REQUEST) &&
+           (request->coa ||
+            (pairfind(request->config_items, PW_SEND_COA_REQUEST) != NULL))) {
                if (!originated_coa_request(request)) {
                        RDEBUG2("Do CoA Fail handler here");
                }
@@ -2378,7 +2545,7 @@ static void request_post_handler(REQUEST *request)
        }
 #endif
 
-       RDEBUG2("Finished request %d.", request->number);
+       RDEBUG2("Finished request %u.", request->number);
        rad_assert(child_state >= 0);
        request->child_state = child_state;
 
@@ -2405,7 +2572,7 @@ static void received_retransmit(REQUEST *request, const RADCLIENT *client)
        discard:
 #endif
                radlog(L_ERR, "Discarding duplicate request from "
-                      "client %s port %d - ID: %d due to unfinished request %d",
+                      "client %s port %d - ID: %d due to unfinished request %u",
                       client->shortname,
                       request->packet->src_port,request->packet->id,
                       request->number);
@@ -2450,7 +2617,7 @@ static void received_retransmit(REQUEST *request, const RADCLIENT *client)
 
                        home = home_server_ldb(NULL, request->home_pool, request);
                        if (!home) {
-                               RDEBUG2("Failed to find live home server for request %d", request->number);
+                               RDEBUG2("ERROR: Failed to find live home server for request %u", request->number);
                        no_home_servers:
                                /*
                                 *      Do post-request processing,
@@ -2484,7 +2651,7 @@ static void received_retransmit(REQUEST *request, const RADCLIENT *client)
                         *      Try to proxy the request.
                         */
                        if (!proxy_request(request)) {
-                               RDEBUG("ERROR: Failed to re-proxy request %d", request->number);
+                               RDEBUG("ERROR: Failed to re-proxy request %u", request->number);
                                goto no_home_servers;
                        }
 
@@ -2521,6 +2688,14 @@ static void received_retransmit(REQUEST *request, const RADCLIENT *client)
 
        case REQUEST_CLEANUP_DELAY:
        case REQUEST_DONE:
+               if (request->reply->code == 0) {
+                       RDEBUG2("Ignoring retransmit from client %s port %d "
+                               "- ID: %d, no reply was configured",
+                               client->shortname,
+                               request->packet->src_port, request->packet->id);
+                       return;
+               }
+
                /*
                 *      FIXME: This sends duplicate replies to
                 *      accounting requests, even if Acct-Delay-Time
@@ -2543,7 +2718,7 @@ static void received_conflicting_request(REQUEST *request,
                                         const RADCLIENT *client)
 {
        radlog(L_ERR, "Received conflicting packet from "
-              "client %s port %d - ID: %d due to unfinished request %d.  Giving up on old request.",
+              "client %s port %d - ID: %d due to unfinished request %u.  Giving up on old request.",
               client->shortname,
               request->packet->src_port, request->packet->id,
               request->number);
@@ -2572,10 +2747,21 @@ static void received_conflicting_request(REQUEST *request,
 #endif
 
                /*
+                *      Catch race conditions.  It may have switched
+                *      from running to done while this code is being
+                *      executed.
+                */
+       case REQUEST_REJECT_DELAY:
+       case REQUEST_CLEANUP_DELAY:
+       case REQUEST_DONE:
+               break;
+
+               /*
                 *      It's in some other state, and therefore also
                 *      in the event queue.  At some point, the
                 *      child will notice, and we can then delete it.
                 */
+       case REQUEST_PROXIED:
        default:
                rad_assert(request->ev != NULL);
                break;
@@ -2661,6 +2847,26 @@ int received_request(rad_listen_t *listener,
                        struct timeval when;
 
                default:
+                       /*
+                        *      Special hacks for race conditions.
+                        *      The reply is encoded, and therefore
+                        *      likely sent.  We received a *new*
+                        *      packet from the client, likely before
+                        *      the next line or two of code which
+                        *      updated the child state.  In this
+                        *      case, just accept the new request.
+                        */
+                       if ((request->reply->code != 0) &&
+                           request->reply->data) {
+                               radlog(L_INFO, "WARNING: Allowing fast client %s port %d - ID: %d for recent request %u.",
+                                      client->shortname,
+                                      packet->src_port, packet->id,
+                                      request->number);
+                               remove_from_request_hash(request);
+                               request = NULL;
+                               break;
+                       }
+
                        gettimeofday(&when, NULL);
                        when.tv_sec -= 1;
 
@@ -2675,7 +2881,7 @@ int received_request(rad_listen_t *listener,
                         */
                        if (timercmp(&when, &request->received, <)) {
                                radlog(L_ERR, "Discarding conflicting packet from "
-                                      "client %s port %d - ID: %d due to recent request %d.",
+                                      "client %s port %d - ID: %d due to recent request %u.",
                                       client->shortname,
                                       packet->src_port, packet->id,
                                       request->number);
@@ -2744,8 +2950,8 @@ int received_request(rad_listen_t *listener,
         *      Remember the request in the list.
         */
        if (!fr_packet_list_insert(pl, &request->packet)) {
-               radlog(L_ERR, "Failed to insert request %d in the list of live requests: discarding", request->number);
-               request_free(&request);
+               radlog(L_ERR, "Failed to insert request %u in the list of live requests: discarding", request->number);
+               ev_request_free(&request);
                return 0;
        }
 
@@ -2816,52 +3022,16 @@ REQUEST *received_proxy_response(RADIUS_PACKET *packet)
                return NULL;
        }
 
-       gettimeofday(&now, NULL);
-
-       request->home_server->state = HOME_STATE_ALIVE;
-       
-#ifdef WITH_COA
        /*
-        *      This is a response to a CoA packet that we originated.
-        *      It's handled differently from normal proxied packets.
-        */
-       if (request->packet->code != request->proxy->code) {
-               /*
-                *      The parent request is done, but we haven't
-                *      figured that out yet.  Separate the two
-                *      requests here, the FIRST time we process the
-                *      packet.  If there is a proxy reply already, it
-                *      gets ignored below.
-                */
-               if (!request->proxy_reply && request->parent &&
-                   (request->parent->coa == request)) {
-                       request->parent->coa = NULL;
-                       request->parent = NULL;
-               }
-
-               /*
-                *      request->reply exists, and we don't care about
-                *      it here.  So we skip the next step.
-                */
-               rad_assert(request->packet != NULL);
-               rad_assert(request->reply != NULL);
-       } else
-#endif
-
-       if (request->reply && request->reply->code != 0) {
-               RDEBUG2("We already replied to this request.  Discarding response.");
-               return NULL;
-       }
-       
-       /*
-        *      We had previously received a reply, so we
-        *      don't need to do anything here.
+        *      We haven't replied to the NAS, but we have seen an
+        *      earlier reply from the home server.  Ignore this packet,
+        *      as we're likely still processing the previous reply.
         */
        if (request->proxy_reply) {
                if (memcmp(request->proxy_reply->vector,
                           packet->vector,
                           sizeof(request->proxy_reply->vector)) == 0) {
-                       RDEBUG2("Discarding duplicate reply from host %s port %d  - ID: %d for request %d",
+                       RDEBUG2("Discarding duplicate reply from host %s port %d  - ID: %d for request %u",
                               inet_ntop(packet->src_ipaddr.af,
                                         &packet->src_ipaddr.ipaddr,
                                         buffer, sizeof(buffer)),
@@ -2870,7 +3040,7 @@ REQUEST *received_proxy_response(RADIUS_PACKET *packet)
                } else {
                        /*
                         *      ? The home server gave us a new proxy
-                        *      reply, which doesn't match the old
+                        *      reply which doesn't match the old
                         *      one.  Delete it.
                         */
                        RDEBUG2("Ignoring conflicting proxy reply");
@@ -2879,6 +3049,103 @@ REQUEST *received_proxy_response(RADIUS_PACKET *packet)
                /* assert that there's an event queued for request? */
                return NULL;
        }
+
+       /*
+        *      Verify the packet before doing ANYTHING with it.  This
+        *      means we're doing more MD5 checks in the server core.
+        *      However, we can fix that by moving to multiple threads
+        *      listening on sockets.
+        *
+        *      We do this AFTER looking the request up in the hash,
+        *      and AFTER vhecking if we saw a previous request.  This
+        *      helps minimize the DoS effect of people attacking us
+        *      with spoofed packets.
+        */
+       if (rad_verify(packet, request->proxy,
+                      request->home_server->secret) != 0) {
+               DEBUG("Ignoring spoofed proxy reply.  Signature is invalid");
+               return NULL;
+       }
+
+       gettimeofday(&now, NULL);
+
+       /*
+        *      "ping" packets have a different algorithm for marking
+        *      a home server alive.  They also skip all of the CoA,
+        *      etc. checks.
+        */
+       if (!request->packet) {
+               request->proxy_reply = packet;
+               received_response_to_ping(request);
+               request->proxy_reply = NULL; /* caller will free it */
+               ev_request_free(&request);
+               return NULL;
+       }
+
+       /*
+        *      Maybe move this earlier in the decision process?
+        *      Having it here means that late or duplicate proxy
+        *      replies no longer get the home server marked as
+        *      "alive".  This might be good for stability, though.
+        *
+        *      FIXME: Do we really want to do this whenever we
+        *      receive a packet?  Setting this here means that we
+        *      mark it alive on *any* packet, even if it's lost all
+        *      of the *other* packets in the last 10s.
+        *
+        *      This behavior could be configurable.
+        */
+       request->home_server->state = HOME_STATE_ALIVE;
+       request->home_server->last_packet = now.tv_sec;
+       
+#ifdef WITH_COA
+       /*
+        *      When originating CoA, the "proxy" reply is the reply
+        *      to the CoA request that we originated.  At this point,
+        *      the original request is finished, and it has a reply.
+        *
+        *      However, if we haven't separated the two requests, do
+        *      so now.  This is done so that cleaning up the original
+        *      request won't cause the CoA request to be free'd.  See
+        *      util.c, request_free()
+        */
+       if (request->parent && (request->parent->coa == request)) {
+               request->parent->coa = NULL;
+               request->parent = NULL;
+
+               /*
+                *      The proxied packet was different from the
+                *      original packet, AND the proxied packet was
+                *      a CoA: allow it.
+                */
+       } else if ((request->packet->code != request->proxy->code) &&
+                  ((request->proxy->code == PW_COA_REQUEST) ||
+                   (request->proxy->code == PW_DISCONNECT_REQUEST))) {
+         /*
+          *    It's already divorced: do nothing.
+          */
+         
+       } else
+               /*
+                *      Skip the next set of checks, as the original
+                *      reply is cached.  We want to be able to still
+                *      process the CoA reply, AND to reference the
+                *      original request/reply.
+                *
+                *      This is getting to be really quite a bit of a
+                *      hack.
+                */
+#endif
+
+       /*
+        *      If there's a reply to the NAS, ignore everything
+        *      related to proxy responses
+        */
+       if (request->reply && request->reply->code != 0) {
+               RDEBUG2("Ignoring proxy reply that arrived after we sent a reply to the NAS");
+               return NULL;
+       }
+
 #ifdef WITH_STATS
        /*
         *      The average includes our time to receive packets and
@@ -2888,7 +3155,7 @@ REQUEST *received_proxy_response(RADIUS_PACKET *packet)
         *      We update the response time only for the FIRST packet
         *      we receive.
         */
-       else if (request->home_server->ema.window > 0) {
+       if (request->home_server->ema.window > 0) {
                radius_stats_ema(&request->home_server->ema,
                                 &now, &request->proxy_when);
        }
@@ -2903,7 +3170,7 @@ REQUEST *received_proxy_response(RADIUS_PACKET *packet)
        case REQUEST_REJECT_DELAY:
        case REQUEST_CLEANUP_DELAY:
        case REQUEST_DONE:
-               radlog(L_ERR, "Reply from home server %s port %d  - ID: %d arrived too late for request %d. Try increasing 'retry_delay' or 'max_request_time'",
+               radlog(L_ERR, "Reply from home server %s port %d  - ID: %d arrived too late for request %u. Try increasing 'retry_delay' or 'max_request_time'",
                       inet_ntop(packet->src_ipaddr.af,
                                 &packet->src_ipaddr.ipaddr,
                                 buffer, sizeof(buffer)),
@@ -2954,15 +3221,6 @@ REQUEST *received_proxy_response(RADIUS_PACKET *packet)
        }
 #endif
 
-       /*
-        *      There's no incoming request, so it's a proxied packet
-        *      we originated.
-        */
-       if (!request->packet) {
-               received_response_to_ping(request);
-               return NULL;
-       }
-
        request->child_state = REQUEST_QUEUED;
        request->when = now;
        request->delay = USEC;
@@ -2981,6 +3239,8 @@ REQUEST *received_proxy_response(RADIUS_PACKET *packet)
 void event_new_fd(rad_listen_t *this)
 {
        char buffer[1024];
+
+       if (this->status == RAD_LISTEN_STATUS_KNOWN) return;
        
        this->print(this, buffer, sizeof(buffer));
        
@@ -2988,7 +3248,7 @@ void event_new_fd(rad_listen_t *this)
                if (just_started) {
                        DEBUG("Listening on %s", buffer);
                } else {
-                       DEBUG2(" ... adding new socket %s", buffer);
+                       radlog(L_INFO, " ... adding new socket %s", buffer);
                }
                if (!fr_event_fd_insert(el, 0, this->fd,
                                        event_socket_handler, this)) {
@@ -3001,7 +3261,7 @@ void event_new_fd(rad_listen_t *this)
        }
        
        if (this->status == RAD_LISTEN_STATUS_CLOSED) {
-               DEBUG2(" ... closing socket %s", buffer);
+               radlog(L_INFO, " ... closing socket %s", buffer);
                
                fr_event_fd_delete(el, 0, this->fd);
                this->status = RAD_LISTEN_STATUS_FINISH;
@@ -3036,13 +3296,14 @@ static void handle_signal_self(int flag)
                time_t when;
                static time_t last_hup = 0;
 
-               DEBUG("Received HUP signal.");
-
                when = time(NULL);
                if ((int) (when - last_hup) < 5) {
                        radlog(L_INFO, "Ignoring HUP (less than 5s since last one)");
                        return;
                }
+
+               radlog(L_INFO, "Received HUP signal.");
+
                last_hup = when;
 
                fr_event_loop_exit(el, 0x80);
@@ -3058,31 +3319,18 @@ static void handle_signal_self(int flag)
                for (this = mainconfig.listen;
                     this != NULL;
                     this = this->next) {
-                       int delay;
-                       struct timeval when;
-
                        if (this->type != RAD_LISTEN_DETAIL) continue;
-                       
-                       delay = detail_delay(this);
-                       if (!delay) continue;
-
-                       fr_event_now(el, &now);
-                       when = now;
-                       tv_add(&when, delay);
 
-                       if (delay > (USEC / 10)) {
-                               DEBUG("Delaying next detail event for %d.%01u seconds.",
-                                      delay / USEC, (delay % USEC) / 100000);
-                       }
+                       /*
+                        *      This one didn't send the signal, skip
+                        *      it.
+                        */
+                       if (!this->decode(this, NULL)) continue;
 
                        /*
-                        *      Reset the detail timer.
+                        *      Go service the interrupt.
                         */
-                       if (!fr_event_insert(el, event_poll_detail, this,
-                                            &when, NULL)) {
-                               radlog(L_ERR, "Failed remembering detail timer");
-                               exit(1);
-                       }
+                       event_poll_detail(this);
                }
        }
 #endif
@@ -3093,8 +3341,6 @@ static void handle_signal_self(int flag)
                for (this = mainconfig.listen;
                     this != NULL;
                     this = this->next) {
-                       if (this->status == RAD_LISTEN_STATUS_KNOWN) continue;
-
                        event_new_fd(this);
                }
        }
@@ -3182,21 +3428,12 @@ static void event_socket_handler(fr_event_list_t *xel, UNUSED int fd,
  */
 static void event_poll_detail(void *ctx)
 {
-       int rcode;
+       int rcode, delay;
        RAD_REQUEST_FUNP fun;
        REQUEST *request;
        rad_listen_t *this = ctx;
        struct timeval when;
-
-       fr_event_now(el, &now);
-       when = now;
-
-       /*
-        *      Set the next poll time to be 1.0 to 1.1s, to help
-        *      spread the load a bit over time.
-        */
-       when.tv_sec += 1;
-       when.tv_usec += fr_rand() % (USEC / 10);
+       listen_detail_t *detail = this->data;
 
        rad_assert(this->type == RAD_LISTEN_DETAIL);
 
@@ -3215,15 +3452,18 @@ static void event_poll_detail(void *ctx)
                }
        }
 
+       if (!fr_event_now(el, &now)) gettimeofday(&now, NULL);
+       when = now;
+
        /*
-        *      Reset the poll to fire one second from now.  If the
-        *      detail reader DOES read a packet, it will send us a
-        *      signal when it's done, and the signal handler will
-        *      reset the timer to a more appropriate (i.e. shorter)
-        *      value.
+        *      Backdoor API to get the delay until the next poll
+        *      time.
         */
+       delay = this->encode(this, NULL);
+       tv_add(&when, delay);
+
        if (!fr_event_insert(el, event_poll_detail, this,
-                            &when, NULL)) {
+                            &when, &detail->ev)) {
                radlog(L_ERR, "Failed creating handler");
                exit(1);
        }
@@ -3245,7 +3485,7 @@ static void event_status(struct timeval *wake)
        }
 
        if (!wake) {
-               DEBUG("Ready to process requests.");
+               radlog(L_INFO, "Ready to process requests.");
 
        } else if ((wake->tv_sec != 0) ||
                   (wake->tv_usec >= 100000)) {
@@ -3292,13 +3532,6 @@ int radius_event_init(CONF_SECTION *cs, int spawn_flag)
 
        request_num_counter = 0;
 
-       /*
-        *      Move all of the thread calls to this file?
-        *
-        *      It may be best for the mutexes to be in this file...
-        */
-       have_children = spawn_flag;
-
 #ifdef WITH_PROXY
        if (mainconfig.proxy_requests) {
                /*
@@ -3318,26 +3551,36 @@ int radius_event_init(CONF_SECTION *cs, int spawn_flag)
        }
 #endif
 
-       /*
-        *      Just before we spawn the child threads, force the log
-        *      subsystem to re-open the log file for every write.
-        */
-       if (spawn_flag) force_log_reopen();
-
 #ifdef HAVE_PTHREAD_H
 #ifndef __MINGW32__
        NO_SUCH_CHILD_PID = (pthread_t ) (0);
 #else
        NO_SUCH_CHILD_PID = pthread_self(); /* not a child thread */
 #endif
-       if (thread_pool_init(cs, spawn_flag) < 0) {
+       /*
+        *      Initialize the threads ONLY if we're spawning, AND
+        *      we're running normally.
+        */
+       if (spawn_flag && !check_config &&
+           (thread_pool_init(cs, &spawn_flag) < 0)) {
                exit(1);
        }
 #endif
 
+       /*
+        *      Move all of the thread calls to this file?
+        *
+        *      It may be best for the mutexes to be in this file...
+        */
+       have_children = spawn_flag;
+
        if (check_config) {
                DEBUG("%s: #### Skipping IP addresses and Ports ####",
                       mainconfig.name);
+               if (listen_init(cs, &head) < 0) {
+                       fflush(NULL);
+                       exit(1);
+               }
                return 1;
        }
 
@@ -3414,33 +3657,29 @@ int radius_event_init(CONF_SECTION *cs, int spawn_flag)
 
                switch (this->type) {
 #ifdef WITH_DETAIL
-                       struct timeval when;
-
                case RAD_LISTEN_DETAIL:
                        DEBUG("Listening on %s", buffer);
 
                        /*
-                        *      Add some initial jitter to help spread
-                        *      the load a bit.
+                        *      Detail files are always known, and aren't
+                        *      put into the socket event loop.
                         */
-                       when.tv_sec = fr_start_time + 1;
-                       when.tv_usec = fr_rand() % USEC;
+                       this->status = RAD_LISTEN_STATUS_KNOWN;
 
-                       if (!fr_event_insert(el, event_poll_detail, this,
-                                            &when, NULL)) {
-                               radlog(L_ERR, "Failed creating detail poll timer");
-                               exit(1);
-                       }
+                       /*
+                        *      Set up the first poll interval.
+                        */
+                       event_poll_detail(this);
                        break;
 #endif
 
 #ifdef WITH_PROXY
                case RAD_LISTEN_PROXY:
-                       rad_assert(proxy_fds[this->fd & 0x1f] == -1);
-                       rad_assert(proxy_listeners[this->fd & 0x1f] == NULL);
-                       
-                       proxy_fds[this->fd & 0x1f] = this->fd;
-                       proxy_listeners[this->fd & 0x1f] = this;
+                       if (proxy_add_fds(this) < 0) {
+                               radlog(L_ERR, "Failed creating new proxy socket");
+                               return 0;
+                       }
+
                        if (!fr_packet_list_socket_add(proxy_list,
                                                         this->fd)) {
                                rad_assert(0 == 1);
@@ -3452,15 +3691,6 @@ int radius_event_init(CONF_SECTION *cs, int spawn_flag)
                        break;
                }
 
-               /*
-                *      The file descriptor isn't ready.  Poll for
-                *      when it will become ready.  This is for the
-                *      detail file fd's.
-                */
-               if (this->fd < 0) {
-                       continue;
-               }
-
                event_new_fd(this);
        }
 
@@ -3478,9 +3708,7 @@ static int request_hash_cb(UNUSED void *ctx, void *data)
        rad_assert(request->in_proxy_hash == FALSE);
 #endif
 
-       fr_event_delete(el, &request->ev);
-       remove_from_request_hash(request);
-       request_free(&request);
+       ev_request_free(&request);
 
        return 0;
 }
@@ -3491,13 +3719,7 @@ static int proxy_hash_cb(UNUSED void *ctx, void *data)
 {
        REQUEST *request = fr_packet2myptr(REQUEST, proxy, data);
 
-       fr_packet_list_yank(proxy_list, request->proxy);
-       request->in_proxy_hash = FALSE;
-
-       if (!request->in_request_hash) {
-               fr_event_delete(el, &request->ev);
-               request_free(&request);
-       }
+       ev_request_free(&request);
 
        return 0;
 }
@@ -3517,9 +3739,7 @@ void radius_event_free(void)
         *      referenced from anywhere else.  Remove them first.
         */
        if (proxy_list) {
-               PTHREAD_MUTEX_LOCK(&proxy_mutex);
                fr_packet_list_walk(proxy_list, NULL, proxy_hash_cb);
-               PTHREAD_MUTEX_UNLOCK(&proxy_mutex);
                fr_packet_list_free(proxy_list);
                proxy_list = NULL;
        }
@@ -3549,11 +3769,13 @@ void radius_handle_request(REQUEST *request, RAD_REQUEST_FUNP fun)
                rad_assert(request != NULL);
                
                if (request->server) RDEBUG("server %s {",
-                                            request->server); 
+                                           request->server != NULL ?
+                                           request->server : ""); 
                fun(request);
 
                if (request->server) RDEBUG("} # server %s",
-                                            request->server);
+                                            request->server != NULL ?
+                                           request->server : "");
 
                request_post_handler(request);
        }