Be more forgiving when starting proxy sockets.
[freeradius.git] / src / main / event.c
index 08e98cb..844bdb6 100644 (file)
@@ -43,7 +43,6 @@ RCSID("$Id$")
 extern pid_t radius_pid;
 extern int dont_fork;
 extern int check_config;
-extern void force_log_reopen(void);
 extern char *debug_condition;
 
 /*
@@ -55,10 +54,15 @@ static int                  request_num_counter = 0;
 static struct timeval          now;
 time_t                         fr_start_time;
 static int                     have_children;
-static int                     has_detail_listener = FALSE;
 static int                     just_started = TRUE;
 
 #ifndef __MINGW32__
+#ifdef HAVE_PTHREAD_H
+#define WITH_SELF_PIPE (1)
+#endif
+#endif
+
+#ifdef WITH_SELF_PIPE
 static int self_pipe[2];
 #endif
 
@@ -93,8 +97,11 @@ static fr_packet_list_t *proxy_list = NULL;
  *     We keep the proxy FD's here.  The RADIUS Id's are marked
  *     "allocated" per Id, via a bit per proxy FD.
  */
+static int             proxy_all_used = FALSE;
 static int             proxy_fds[32];
 static rad_listen_t    *proxy_listeners[32];
+static void check_for_zombie_home_server(REQUEST *request);
+static void remove_from_proxy_hash(REQUEST *request);
 #else
 #define remove_from_proxy_hash(foo)
 #endif
@@ -102,6 +109,9 @@ static rad_listen_t *proxy_listeners[32];
 static void request_post_handler(REQUEST *request);
 static void wait_a_bit(void *ctx);
 static void event_socket_handler(fr_event_list_t *xel, UNUSED int fd, void *ctx);
+#ifdef WITH_DETAIL
+static void event_poll_detail(void *ctx);
+#endif
 
 static void NEVER_RETURNS _rad_panic(const char *file, unsigned int line,
                                    const char *msg)
@@ -122,8 +132,8 @@ static void tv_add(struct timeval *tv, int usec_delay)
        tv->tv_usec += usec_delay;
 
        if (tv->tv_usec > USEC) {
-               tv->tv_usec -= USEC;
-               tv->tv_sec++;
+               tv->tv_sec += tv->tv_usec / USEC;
+               tv->tv_usec %= USEC;
        }
 }
 
@@ -138,6 +148,42 @@ static void remove_from_request_hash(REQUEST *request)
 }
 
 
+static void ev_request_free(REQUEST **prequest)
+{
+       REQUEST *request;
+       
+       if (!prequest || !*prequest) return;
+
+       request = *prequest;
+
+#ifdef WITH_COA
+       if (request->coa) {
+               /*
+                *      Divorce the child from the parent first,
+                *      then clean up the child.
+                */
+               request->coa->parent = NULL;
+               ev_request_free(&request->coa);
+       }
+
+       /*
+        *      Divorce the parent from the child, and leave the
+        *      parent still alive.
+        */
+       if (request->parent && (request->parent->coa == request)) {
+               request->parent->coa = NULL;
+       }
+#endif
+
+       if (request->ev) fr_event_delete(el, &request->ev);
+#ifdef WITH_PROXY
+       if (request->in_proxy_hash) remove_from_proxy_hash(request);
+#endif
+       if (request->in_request_hash) remove_from_request_hash(request);
+
+       request_free(prequest);
+}
+
 #ifdef WITH_PROXY
 static REQUEST *lookup_in_proxy_hash(RADIUS_PACKET *reply)
 {
@@ -178,6 +224,7 @@ static REQUEST *lookup_in_proxy_hash(RADIUS_PACKET *reply)
         *      responded at all.
         */
        if (!request->proxy_reply &&
+           request->home_server &&
            request->home_server->currently_outstanding) {
                request->home_server->currently_outstanding--;
        }
@@ -190,9 +237,24 @@ static REQUEST *lookup_in_proxy_hash(RADIUS_PACKET *reply)
 
 static void remove_from_proxy_hash(REQUEST *request)
 {
+       /*
+        *      Check this without grabbing the mutex because it's a
+        *      lot faster that way.
+        */
        if (!request->in_proxy_hash) return;
 
+       /*
+        *      The "not in hash" flag is definitive.  However, if the
+        *      flag says that it IS in the hash, there might still be
+        *      a race condition where it isn't.
+        */
        PTHREAD_MUTEX_LOCK(&proxy_mutex);
+
+       if (!request->in_proxy_hash) {
+               PTHREAD_MUTEX_UNLOCK(&proxy_mutex);
+               return;
+       }
+
        fr_packet_list_yank(proxy_list, request->proxy);
        fr_packet_list_id_free(proxy_list, request->proxy);
 
@@ -202,17 +264,93 @@ static void remove_from_proxy_hash(REQUEST *request)
         *      home server.
         */
        if (!request->proxy_reply &&
+           request->home_server &&
            request->home_server->currently_outstanding) {
                request->home_server->currently_outstanding--;
        }
 
+       /*
+        *      Got from YES in hash, to NO, not in hash while we hold
+        *      the mutex.  This guarantees that when another thread
+        *      grans the mutex, the "not in hash" flag is correct.
+        */
+       request->in_proxy_hash = FALSE;
+
        PTHREAD_MUTEX_UNLOCK(&proxy_mutex);
+}
 
-       request->in_proxy_hash = FALSE;
+static int proxy_add_fds(rad_listen_t *proxy_listener)
+{
+       int i, proxy, found = -1;
+
+       proxy = proxy_listener->fd;
+       for (i = 0; i < 32; i++) {
+               /*
+                *      Found a free entry.  Save the socket,
+                *      and remember where we saved it.
+                */
+               if (proxy_fds[(proxy + i) & 0x1f] == -1) {
+                       found = (proxy + i) & 0x1f;
+                       proxy_fds[found] = proxy;
+                       proxy_listeners[found] = proxy_listener;
+                       break;
+               }
+       }
+
+       return found;
+}
+
+static int proxy_id_alloc(REQUEST *request, RADIUS_PACKET *packet)
+{
+       rad_listen_t *proxy_listener;
+
+       if (fr_packet_list_id_alloc(proxy_list, packet)) return 1;
+
+       if (proxy_all_used) return 0;
+
+       /*
+        *      Allocate a new proxy fd.  This function adds
+        *      it to the tail of the list of listeners.  With
+        *      some care, this can be thread-safe.
+        */
+       proxy_listener = proxy_new_listener(&packet->src_ipaddr, FALSE);
+       if (!proxy_listener) {
+               radlog(L_PROXY, "Failed to create a new socket for proxying requests.");
+               return 0;
+       }
+       
+       /*
+        *      Cache it locally.
+        */
+       if (proxy_add_fds(proxy_listener) < 0) {
+               proxy_all_used = TRUE;
+               listen_free(&proxy_listener);
+               radlog(L_ERR, "Failed creating new proxy socket: server is too busy and home servers appear to be down");
+               return 0;
+       }
+
+       
+       if (!fr_packet_list_socket_add(proxy_list, proxy_listener->fd)) {
+                       RDEBUG2("ERROR: Failed to create a new socket for proxying requests.");
+               return 0;
+               
+       }
+       
+       if (!fr_packet_list_id_alloc(proxy_list, packet)) {
+                       RDEBUG2("ERROR: Failed to create a new socket for proxying requests.");
+               return 0;
+       }
+       
+       /*
+        *      Signal the main thread to add the new FD to the list
+        *      of listening FD's.
+        */
+       radius_signal_self(RADIUS_SIGNAL_SELF_NEW_FD);
+       return 1;
 }
 
 
-static int insert_into_proxy_hash(REQUEST *request)
+static int insert_into_proxy_hash(REQUEST *request, int retransmit)
 {
        int i, proxy;
        char buf[128];
@@ -220,66 +358,40 @@ static int insert_into_proxy_hash(REQUEST *request)
        rad_assert(request->proxy != NULL);
        rad_assert(proxy_list != NULL);
 
-       request->proxy->sockfd = -1;
-
        PTHREAD_MUTEX_LOCK(&proxy_mutex);
 
-       request->home_server->currently_outstanding++;
-
-       if (!fr_packet_list_id_alloc(proxy_list, request->proxy)) {
-               int found;
-               rad_listen_t *proxy_listener;
-
-               /*
-                *      Allocate a new proxy fd.  This function adds
-                *      it to the tail of the list of listeners.  With
-                *      some care, this can be thread-safe.
-                */
-               proxy_listener = proxy_new_listener();
-               if (!proxy_listener) {
-                       PTHREAD_MUTEX_UNLOCK(&proxy_mutex);
-                       RDEBUG2("ERROR: Failed to create a new socket for proxying requests.");
-                       return 0;
-               }
-
-               /*
-                *      Cache it locally.
-                */
-               found = -1;
-               proxy = proxy_listener->fd;
-               for (i = 0; i < 32; i++) {
-                       /*
-                        *      Found a free entry.  Save the socket,
-                        *      and remember where we saved it.
-                        */
-                       if (proxy_fds[(proxy + i) & 0x1f] == -1) {
-                               found = (proxy + i) & 0x1f;
-                               proxy_fds[found] = proxy;
-                               proxy_listeners[found] = proxy_listener;
-                               break;
-                       }
-               }
-               rad_assert(found >= 0);
+       /*
+        *      Keep track of maximum outstanding requests to a
+        *      particular home server.  'max_outstanding' is
+        *      enforced in home_server_ldb(), in realms.c.
+        */
+       if (request->home_server) {
+               request->home_server->currently_outstanding++;
+       }
 
-               if (!fr_packet_list_socket_add(proxy_list, proxy_listener->fd)) {
-                       PTHREAD_MUTEX_UNLOCK(&proxy_mutex);
-                       RDEBUG2("ERROR: Failed to create a new socket for proxying requests.");
-                       return 0;
+       if (retransmit) {
+               RADIUS_PACKET packet;
 
-               }
+               packet = *request->proxy;
 
-               if (!fr_packet_list_id_alloc(proxy_list, request->proxy)) {
+               if (!proxy_id_alloc(request, &packet)) {
                        PTHREAD_MUTEX_UNLOCK(&proxy_mutex);
-                       RDEBUG2("ERROR: Failed to create a new socket for proxying requests.");
                        return 0;
                }
 
                /*
-                *      Signal the main thread to add the new FD to the list
-                *      of listening FD's.
+                *      Yank the request, free the old Id, and
+                *      remember the new Id.
                 */
-               radius_signal_self(RADIUS_SIGNAL_SELF_NEW_FD);
+               fr_packet_list_yank(proxy_list, request->proxy);
+               fr_packet_list_id_free(proxy_list, request->proxy);
+               *request->proxy = packet;
+
+       } else if (!proxy_id_alloc(request, request->proxy)) {
+               PTHREAD_MUTEX_UNLOCK(&proxy_mutex);
+               return 0;
        }
+
        rad_assert(request->proxy->sockfd >= 0);
 
        /*
@@ -307,10 +419,12 @@ static int insert_into_proxy_hash(REQUEST *request)
        if (!fr_packet_list_insert(proxy_list, &request->proxy)) {
                fr_packet_list_id_free(proxy_list, request->proxy);
                PTHREAD_MUTEX_UNLOCK(&proxy_mutex);
-               RDEBUG2("ERROR: Failed to insert entry into proxy list");
+               RDEBUG2("ERROR: Failed to insert entry into proxy list.");
                return 0;
        }
 
+       request->in_proxy_hash = TRUE;
+
        PTHREAD_MUTEX_UNLOCK(&proxy_mutex);
 
        RDEBUG3(" proxy: allocating destination %s port %d - Id %d",
@@ -319,8 +433,6 @@ static int insert_into_proxy_hash(REQUEST *request)
               request->proxy->dst_port,
               request->proxy->id);
 
-       request->in_proxy_hash = TRUE;
-
        return 1;
 }
 
@@ -331,30 +443,35 @@ static int insert_into_proxy_hash(REQUEST *request)
 static void wait_for_proxy_id_to_expire(void *ctx)
 {
        REQUEST *request = ctx;
-       home_server *home = request->home_server;
 
        rad_assert(request->magic == REQUEST_MAGIC);
        rad_assert(request->proxy != NULL);
 
        if (!fr_event_now(el, &now)) gettimeofday(&now, NULL);
        request->when = request->proxy_when;
-       request->when.tv_sec += home->response_window;
+
+#ifdef WITH_COA
+       if (((request->proxy->code == PW_COA_REQUEST) ||
+            (request->proxy->code == PW_DISCONNECT_REQUEST)) &&
+           (request->packet->code != request->proxy->code)) {
+               request->when.tv_sec += request->home_server->coa_mrd;
+       } else
+#endif
+       request->when.tv_sec += request->home_server->response_window;
 
        if ((request->num_proxied_requests == request->num_proxied_responses) ||
            timercmp(&now, &request->when, >)) {
                if (request->packet) {
-                       RDEBUG2("Cleaning up request %d ID %d with timestamp +%d",
+                       RDEBUG2("Cleaning up request %u ID %d with timestamp +%d",
                               request->number, request->packet->id,
                               (unsigned int) (request->timestamp - fr_start_time));
                } else {
-                       RDEBUG2("Cleaning up request %d with timestamp +%d",
+                       RDEBUG2("Cleaning up request %u with timestamp +%d",
                               request->number,
                               (unsigned int) (request->timestamp - fr_start_time));
                }
-               fr_event_delete(el, &request->ev);
-               remove_from_proxy_hash(request);
-               remove_from_request_hash(request);
-               request_free(&request);
+
+               ev_request_free(&request);
                return;
        }
 
@@ -368,20 +485,36 @@ static void wait_for_child_to_die(void *ctx)
        REQUEST *request = ctx;
 
        rad_assert(request->magic == REQUEST_MAGIC);
+       remove_from_request_hash(request);
 
-       if ((request->child_state == REQUEST_QUEUED) |
-           (request->child_state == REQUEST_RUNNING)) {
-               request->delay += (request->delay >> 1);
-               tv_add(&request->when, request->delay);
+       /*
+        *      If it's still queued (waiting for a thread to pick it
+        *      up) OR, it's running AND there's still a child thread
+        *      handling it, THEN delay some more.
+        */
+       if ((request->child_state == REQUEST_QUEUED) ||
+           ((request->child_state == REQUEST_RUNNING) &&
+            (pthread_equal(request->child_pid, NO_SUCH_CHILD_PID) == 0))) {
 
-               RDEBUG2("Child is still stuck for request %d", request->number);
+               /*
+                *      Cap delay at max_request_time
+                */
+               if (request->delay < (USEC * request->root->max_request_time)) {
+                       request->delay += (request->delay >> 1);
+                       radlog(L_INFO, "WARNING: Child is hung for request %u in component %s module %s.",
+                              request->number, request->component, request->module);
+               } else {
+                       request->delay = USEC * request->root->max_request_time;
+                       RDEBUG2("WARNING: Child is hung after \"max_request_time\" for request %u",
+                               request->number);
+               }
+               tv_add(&request->when, request->delay);
 
                INSERT_EVENT(wait_for_child_to_die, request);
                return;
        }
 
-       RDEBUG2("Child is finally responsive for request %d", request->number);
-       remove_from_request_hash(request);
+       RDEBUG2("Child is finally responsive for request %u", request->number);
 
 #ifdef WITH_PROXY
        if (request->proxy) {
@@ -390,7 +523,7 @@ static void wait_for_child_to_die(void *ctx)
        }
 #endif
 
-       request_free(&request);
+       ev_request_free(&request);
 }
 #endif
 
@@ -411,76 +544,15 @@ static void cleanup_delay(void *ctx)
        }
 #endif
 
-       RDEBUG2("Cleaning up request %d ID %d with timestamp +%d",
+       RDEBUG2("Cleaning up request %u ID %d with timestamp +%d",
               request->number, request->packet->id,
               (unsigned int) (request->timestamp - fr_start_time));
 
-       fr_event_delete(el, &request->ev);
-       request_free(&request);
+       ev_request_free(&request);
 }
 
 
 /*
- *     FIXME: Put into a libradius function.
- */
-#define MAX_PACKET_CODE (52)
-static const char *packet_codes[] = {
-  "",
-  "Access-Request",
-  "Access-Accept",
-  "Access-Reject",
-  "Accounting-Request",
-  "Accounting-Response",
-  "Accounting-Status",
-  "Password-Request",
-  "Password-Accept",
-  "Password-Reject",
-  "Accounting-Message",
-  "Access-Challenge",
-  "Status-Server",
-  "Status-Client",
-  "14",
-  "15",
-  "16",
-  "17",
-  "18",
-  "19",
-  "20",
-  "Resource-Free-Request",
-  "Resource-Free-Response",
-  "Resource-Query-Request",
-  "Resource-Query-Response",
-  "Alternate-Resource-Reclaim-Request",
-  "NAS-Reboot-Request",
-  "NAS-Reboot-Response",
-  "28",
-  "Next-Passcode",
-  "New-Pin",
-  "Terminate-Session",
-  "Password-Expired",
-  "Event-Request",
-  "Event-Response",
-  "35",
-  "36",
-  "37",
-  "38",
-  "39",
-  "Disconnect-Request",
-  "Disconnect-ACK",
-  "Disconnect-NAK",
-  "CoA-Request",
-  "CoA-ACK",
-  "CoA-NAK",
-  "46",
-  "47",
-  "48",
-  "49",
-  "IP-Address-Allocate",
-  "IP-Address-Release"
-};
-
-
-/*
  *     In daemon mode, AND this request has debug flags set.
  */
 #define DEBUG_PACKET if (!debug_flag && request->options && request->radlog) debug_packet
@@ -495,6 +567,8 @@ static void debug_packet(REQUEST *request, RADIUS_PACKET *packet, int direction)
 
        if (!packet) return;
 
+       rad_assert(request->radlog != NULL);
+
        if (direction == 0) {
                received = "Received";
                from = "from";  /* what else? */
@@ -514,9 +588,9 @@ static void debug_packet(REQUEST *request, RADIUS_PACKET *packet, int direction)
         *
         *      This really belongs in a utility library
         */
-       if ((packet->code > 0) && (packet->code < MAX_PACKET_CODE)) {
+       if ((packet->code > 0) && (packet->code < FR_MAX_PACKET_CODE)) {
                RDEBUG("%s %s packet %s host %s port %d, id=%d, length=%d",
-                      received, packet_codes[packet->code], from,
+                      received, fr_packet_codes[packet->code], from,
                       inet_ntop(ip->af, &ip->ipaddr, buffer, sizeof(buffer)),
                       port, packet->id, packet->data_len);
        } else {
@@ -540,7 +614,7 @@ static void reject_delay(void *ctx)
        rad_assert(request->magic == REQUEST_MAGIC);
        rad_assert(request->child_state == REQUEST_REJECT_DELAY);
 
-       RDEBUG2("Sending delayed reject for request %d", request->number);
+       RDEBUG2("Sending delayed reject for request %u", request->number);
 
        DEBUG_PACKET(request, request->reply, 1);
 
@@ -568,7 +642,7 @@ void revive_home_server(void *ctx)
         */
        if (home->ev) fr_event_delete(el, &home->ev);
 
-       radlog(L_INFO, "PROXY: Marking home server %s port %d alive again... we have no idea if it really is alive or not.",
+       radlog(L_PROXY, "Marking home server %s port %d alive again... we have no idea if it really is alive or not.",
               inet_ntop(home->ipaddr.af, &home->ipaddr.ipaddr,
                         buffer, sizeof(buffer)),
               home->port);
@@ -579,30 +653,38 @@ void revive_home_server(void *ctx)
 static void no_response_to_ping(void *ctx)
 {
        REQUEST *request = ctx;
-       home_server *home = request->home_server;
+       home_server *home;
        char buffer[128];
 
+       rad_assert(request->home_server != NULL);
+
+       home = request->home_server;
        home->num_received_pings = 0;
 
-       RDEBUG2("No response to status check %d from home server %s port %d",
+       radlog(L_ERR, "No response to status check %d for home server %s port %d",
               request->number,
               inet_ntop(request->proxy->dst_ipaddr.af,
                         &request->proxy->dst_ipaddr.ipaddr,
                         buffer, sizeof(buffer)),
               request->proxy->dst_port);
 
+       check_for_zombie_home_server(request);
+
        wait_for_proxy_id_to_expire(request);
 }
 
 
 static void received_response_to_ping(REQUEST *request)
 {
-       home_server *home = request->home_server;
+       home_server *home;
        char buffer[128];
 
+       rad_assert(request->home_server != NULL);
+
+       home = request->home_server;
        home->num_received_pings++;
 
-       RDEBUG2("Received response to status check %d (%d in current sequence)",
+       radlog(L_PROXY, "Received response to status check %d (%d in current sequence)",
               request->number, home->num_received_pings);
 
        /*
@@ -611,7 +693,6 @@ static void received_response_to_ping(REQUEST *request)
        fr_event_delete(el, &request->ev);
        remove_from_proxy_hash(request);
        rad_assert(request->in_request_hash == FALSE);
-       request_free(&request);
 
        /*
         *      The control socket may have marked the home server as
@@ -637,7 +718,7 @@ static void received_response_to_ping(REQUEST *request)
                RDEBUG2("Hmm... no event for home server.  Oh well.");
        }
 
-       radlog(L_INFO, "PROXY: Marking home server %s port %d alive",
+       radlog(L_PROXY, "Marking home server %s port %d alive",
               inet_ntop(request->proxy->dst_ipaddr.af,
                         &request->proxy->dst_ipaddr.ipaddr,
                         buffer, sizeof(buffer)),
@@ -656,8 +737,9 @@ static void ping_home_server(void *ctx)
        REQUEST *request;
        VALUE_PAIR *vp;
 
-       if (home->state == HOME_STATE_ALIVE) {
-               radlog(L_INFO, "Suspicious proxy state... continuing");
+       if ((home->state == HOME_STATE_ALIVE) ||
+           (home->ping_check == HOME_PING_CHECK_NONE) ||
+           (home->ev != NULL)) {
                return;
        }
 
@@ -716,10 +798,10 @@ static void ping_home_server(void *ctx)
 
        rad_assert(request->proxy_listener == NULL);
 
-       if (!insert_into_proxy_hash(request)) {
-               RDEBUG2("ERROR: Failed inserting status check %d into proxy hash.  Discarding it.",
+       if (!insert_into_proxy_hash(request, FALSE)) {
+               RDEBUG2("ERROR: Failed to insert status check %d into proxy list.  Discarding it.",
                       request->number);
-               request_free(&request);
+               ev_request_free(&request);
                return;
        }
        rad_assert(request->proxy_listener != NULL);
@@ -728,6 +810,8 @@ static void ping_home_server(void *ctx)
 
        request->next_callback = NULL;
        request->child_state = REQUEST_PROXIED;
+       gettimeofday(&request->when, NULL);
+       home->when = request->when;
        request->when.tv_sec += home->ping_timeout;;
 
        INSERT_EVENT(no_response_to_ping, request);
@@ -753,7 +837,7 @@ void mark_home_server_dead(home_server *home, struct timeval *when)
        int previous_state = home->state;
        char buffer[128];
 
-       radlog(L_INFO, "PROXY: Marking home server %s port %d as dead.",
+       radlog(L_PROXY, "Marking home server %s port %d as dead.",
               inet_ntop(home->ipaddr.af, &home->ipaddr.ipaddr,
                         buffer, sizeof(buffer)),
               home->port);
@@ -852,12 +936,31 @@ static int setup_post_proxy_fail(REQUEST *request)
        DICT_VALUE *dval = NULL;
        VALUE_PAIR *vp;
 
+       request->child_state = REQUEST_RUNNING;
+
        if (request->packet->code == PW_AUTHENTICATION_REQUEST) {
                dval = dict_valbyname(PW_POST_PROXY_TYPE, "Fail-Authentication");
 
        } else if (request->packet->code == PW_ACCOUNTING_REQUEST) {
                dval = dict_valbyname(PW_POST_PROXY_TYPE, "Fail-Accounting");
 
+#ifdef WITH_COA
+               /*
+                *      See no_response_to_coa_request
+                */
+       } else if (((request->packet->code >> 8) & 0xff) == PW_COA_REQUEST) {
+               request->packet->code &= 0xff; /* restore it */
+
+               if (request->proxy->code == PW_COA_REQUEST) {
+                       dval = dict_valbyname(PW_POST_PROXY_TYPE, "Fail-CoA");
+
+               } else if (request->proxy->code == PW_DISCONNECT_REQUEST) {
+                       dval = dict_valbyname(PW_POST_PROXY_TYPE, "Fail-Disconnect");
+               } else {
+                       return 0;
+               }
+
+#endif
        } else {
                return 0;
        }
@@ -900,7 +1003,6 @@ static void post_proxy_fail_handler(REQUEST *request)
         *      to do next.
         */
        if (!setup_post_proxy_fail(request)) {
-               request->child_state = REQUEST_RUNNING;
                request_post_handler(request);
 
        } else {
@@ -959,45 +1061,82 @@ static void no_response_to_proxied_request(void *ctx)
                return;
        }
 
-       radlog(L_ERR, "Rejecting request %d due to lack of any response from home server %s port %d",
-              request->number,
-              inet_ntop(request->proxy->dst_ipaddr.af,
-                        &request->proxy->dst_ipaddr.ipaddr,
-                        buffer, sizeof(buffer)),
-              request->proxy->dst_port);
-
        check_for_zombie_home_server(request);
 
        home = request->home_server;
 
-       post_proxy_fail_handler(request);
-
        /*
-        *      Don't touch request due to race conditions
+        *      The default as of 2.1.7 is to allow requests to
+        *      fail-over to a backup home server when this one does
+        *      not respond.  The old behavior can be configured as
+        *      well.
         */
-       if (home->state == HOME_STATE_IS_DEAD) {
-               rad_assert(home->ev != NULL); /* or it will never wake up */
-               return;
+       if (home->no_response_fail) {
+               radlog(L_ERR, "Rejecting request %u (proxy Id %d) due to lack of any response from home server %s port %d",
+                      request->number, request->proxy->id,
+                      inet_ntop(request->proxy->dst_ipaddr.af,
+                                &request->proxy->dst_ipaddr.ipaddr,
+                                buffer, sizeof(buffer)),
+                      request->proxy->dst_port);
+
+               post_proxy_fail_handler(request);
+       } else {
+               rad_assert(request->ev == NULL);
+               request->child_state = REQUEST_RUNNING;
+               wait_a_bit(request);
        }
 
        /*
-        *      Enable the zombie period when we notice that the home
-        *      server hasn't responded.  We do NOT back-date the start
-        *      of the zombie period.
+        *      Don't touch request due to race conditions
         */
-       if (home->state == HOME_STATE_ALIVE) {
-               radlog(L_ERR, "PROXY: Marking home server %s port %d as zombie (it looks like it is dead).",
-                      inet_ntop(home->ipaddr.af, &home->ipaddr.ipaddr,
-                                buffer, sizeof(buffer)),
-                      home->port);
-               home->state = HOME_STATE_ZOMBIE;
-               home->zombie_period_start = now;
 
+       /*
+        *      If it's not alive, don't try to make it a zombie.
+        */
+       if (home->state != HOME_STATE_ALIVE) {
                /*
-                *      Start pinging the home server.
+                *      Don't check home->ev due to race conditions.
                 */
-               ping_home_server(home);
+               return;
+       }
+
+       /*
+        *      We've received a real packet recently.  Don't mark the
+        *      server as zombie until we've received NO packets for a
+        *      while.  The "1/4" of zombie period was chosen rather
+        *      arbitrarily.  It's a balance between too short, which
+        *      gives quick fail-over and fail-back, or too long,
+        *      where the proxy still sends packets to an unresponsive
+        *      home server.
+        */
+       if ((home->last_packet + ((home->zombie_period + 3) / 4)) >= now.tv_sec) {
+               return;
        }
+
+       /*
+        *      Enable the zombie period when we notice that the home
+        *      server hasn't responded for a while.  We back-date the
+        *      zombie period to when we last received a response from
+        *      the home server.
+        */
+       home->state = HOME_STATE_ZOMBIE;
+       
+       home->zombie_period_start.tv_sec = home->last_packet;
+       home->zombie_period_start.tv_sec = USEC / 2;
+       
+       fr_event_delete(el, &home->ev);
+       home->currently_outstanding = 0;
+       home->num_received_pings = 0;
+       
+       radlog(L_PROXY, "Marking home server %s port %d as zombie (it looks like it is dead).",
+              inet_ntop(home->ipaddr.af, &home->ipaddr.ipaddr,
+                        buffer, sizeof(buffer)),
+              home->port);
+       
+       /*
+        *      Start pinging the home server.
+        */
+       ping_home_server(home);
 }
 #endif
 
@@ -1009,9 +1148,45 @@ static void wait_a_bit(void *ctx)
 
        rad_assert(request->magic == REQUEST_MAGIC);
 
+#ifdef WITH_COA
+       /*
+        *      The CoA request is a new (internally generated)
+        *      request, created in a child thread.  We therefore need
+        *      some way to tie its events back into the main event
+        *      handler.
+        */
+       if (request->coa && !request->coa->proxy_reply &&
+           request->coa->next_callback) {
+               request->coa->when = request->coa->next_when;
+               INSERT_EVENT(request->coa->next_callback, request->coa);
+               request->coa->next_callback = NULL;
+               request->coa->parent = NULL;
+               request->coa = NULL;
+       }
+#endif
+
        switch (request->child_state) {
        case REQUEST_QUEUED:
        case REQUEST_RUNNING:
+               /*
+                *      If we're not thread-capable, OR we're capable,
+                *      but have been told to run without threads,
+                *      complain when the requests is queued for a
+                *      thread, or running in a child thread.
+                */
+#ifdef HAVE_PTHREAD_H
+               if (!have_children)
+#endif
+               {
+                       rad_assert("We do not have threads, but the request is marked as queued or running in a child thread" == NULL);
+                       break;
+               }
+
+#ifdef HAVE_PTHREAD_H
+               /*
+                *      If we have threads, wait for the child thread
+                *      to stop.
+                */
                when = request->received;
                when.tv_sec += request->root->max_request_time;
 
@@ -1028,24 +1203,18 @@ static void wait_a_bit(void *ctx)
                 *      Request still has more time.  Continue
                 *      waiting.
                 */
-               if (timercmp(&now, &when, <) ||
-                   ((request->listener->type == RAD_LISTEN_DETAIL) &&
-                    (request->child_state == REQUEST_QUEUED))) {
+               if (timercmp(&now, &when, <)) {
                        if (request->delay < (USEC / 10)) {
                                request->delay = USEC / 10;
                        }
                        request->delay += request->delay >> 1;
 
-#ifdef WITH_DETAIL
                        /*
-                        *      Cap wait at some sane value for detail
-                        *      files.
+                        *      Cap delays at something reasonable.
                         */
-                       if ((request->listener->type == RAD_LISTEN_DETAIL) &&
-                           (request->delay > (request->root->max_request_time * USEC))) {
+                       if (request->delay > (request->root->max_request_time * USEC)) {
                                request->delay = request->root->max_request_time * USEC;
                        }
-#endif
 
                        request->when = now;
                        tv_add(&request->when, request->delay);
@@ -1053,51 +1222,29 @@ static void wait_a_bit(void *ctx)
                        break;
                }
 
-#if defined(HAVE_PTHREAD_H)
+               request->master_state = REQUEST_STOP_PROCESSING;
+
                /*
                 *      A child thread MAY still be running on the
                 *      request.  Ask the thread to stop working on
                 *      the request.
                 */
-               if (have_children) {
-                       /* FIXME: kill unresponsive children? */
-
-                       /*
-                        *      Print this error message ONLY if
-                        *      there's a child currently processing
-                        *      the request.  As we don't have thread
-                        *      locks here, there may be race
-                        *      conditions on this check.  But it's
-                        *      just an error message, so that's OK.
-                        */
-                       if (!pthread_equal(request->child_pid, NO_SUCH_CHILD_PID)) {
-                               radlog(L_ERR, "WARNING: Unresponsive child for request %d, in module %s component %s",
-                                      request->number,
-                                      request->module ? request->module : "<server core>",
-                                      request->component ? request->component : "<server core>");
-                       }
+               if (have_children &&
+                   (pthread_equal(request->child_pid, NO_SUCH_CHILD_PID) == 0)) {
+                       radlog(L_ERR, "WARNING: Unresponsive child for request %u, in component %s module %s",
+                              request->number,
+                              request->component ? request->component : "<server core>",
+                              request->module ? request->module : "<server core>");
 
-                       request->master_state = REQUEST_STOP_PROCESSING;
-                       
-                       request->delay = USEC / 4;
-                       tv_add(&request->when, request->delay);
-                       callback = wait_for_child_to_die;
-                       break;
                }
+                       
+               request->delay = USEC;
+               tv_add(&request->when, request->delay);
+               callback = wait_for_child_to_die;
+               break;
 #endif
 
                /*
-                *      Else there are no child threads.  We probably
-                *      should have just marked the request as 'done'
-                *      elsewhere, like in the post-proxy-fail
-                *      handler.  But doing that would involve
-                *      checking for max_request_time in multiple
-                *      places, so this may be simplest.
-                */
-               request->child_state = REQUEST_DONE;
-               /* FALL-THROUGH */
-
-               /*
                 *      Mark the request as no longer running,
                 *      and clean it up.
                 */
@@ -1105,6 +1252,24 @@ static void wait_a_bit(void *ctx)
 #ifdef HAVE_PTHREAD_H
                request->child_pid = NO_SUCH_CHILD_PID;
 #endif
+
+#ifdef WITH_COA
+               /*
+                *      This is a CoA request.  It's been divorced
+                *      from everything else, so we clean it up now.
+                */
+               if (!request->in_request_hash &&
+                   request->proxy &&
+                   (request->packet->code != request->proxy->code) &&
+                   ((request->proxy->code == PW_COA_REQUEST) ||
+                    (request->proxy->code == PW_DISCONNECT_REQUEST))) {
+                       /*
+                        *      FIXME: Do CoA MIBs
+                        */
+                       ev_request_free(&request);
+                       return;
+               }
+#endif
                request_stats_final(request);
                cleanup_delay(request);
                return;
@@ -1141,32 +1306,410 @@ static void wait_a_bit(void *ctx)
         *      mode, with no threads...
         */
        if (!callback) {
-               RDEBUG("WARNING: Internal sanity check failed in event handler for request %d: Discarding the request!", request->number);
-               fr_event_delete(el, &request->ev);
-               remove_from_proxy_hash(request);
-               remove_from_request_hash(request);
-               request_free(&request);
+               RDEBUG("WARNING: Internal sanity check failed in event handler for request %u: Discarding the request!", request->number);
+               ev_request_free(&request);
                return;
        }
 
        INSERT_EVENT(callback, request);
 }
 
-
-#ifdef WITH_PROXY
-static int process_proxy_reply(REQUEST *request)
+#ifdef WITH_COA
+static void no_response_to_coa_request(void *ctx)
 {
-       int rcode;
-       int post_proxy_type = 0;
-       VALUE_PAIR *vp;
-       
-       /*
-        *      Delete any reply we had accumulated until now.
-        */
-       pairfree(&request->reply->vps);
-       
-       /*
-        *      Run the packet through the post-proxy stage,
+       REQUEST *request = ctx;
+       char buffer[128];
+
+       rad_assert(request->magic == REQUEST_MAGIC);
+       rad_assert(request->child_state == REQUEST_PROXIED);
+       rad_assert(request->home_server != NULL);
+       rad_assert(!request->in_request_hash);
+
+       radlog(L_ERR, "No response to CoA request sent to %s",
+              inet_ntop(request->proxy->dst_ipaddr.af,
+                        &request->proxy->dst_ipaddr.ipaddr,
+                        buffer, sizeof(buffer)));
+
+       /*
+        *      Hack.
+        */
+       request->packet->code |= (PW_COA_REQUEST << 8);
+       post_proxy_fail_handler(request);
+}
+
+
+static int update_event_timestamp(RADIUS_PACKET *packet, time_t when)
+{
+       VALUE_PAIR *vp;
+
+       vp = pairfind(packet->vps, PW_EVENT_TIMESTAMP);
+       if (!vp) return 0;
+
+       vp->vp_date = when;
+
+       if (packet->data) {
+               free(packet->data);
+               packet->data = NULL;
+               packet->data_len = 0;
+       }
+
+       return 1;               /* time stamp updated */
+}
+
+
+/*
+ *     Called when we haven't received a response to a CoA request.
+ */
+static void retransmit_coa_request(void *ctx)
+{
+       int delay, frac;
+       struct timeval mrd;
+       REQUEST *request = ctx;
+
+       rad_assert(request->magic == REQUEST_MAGIC);
+       rad_assert(request->child_state == REQUEST_PROXIED);
+       rad_assert(request->home_server != NULL);
+       rad_assert(!request->in_request_hash);
+       rad_assert(request->parent == NULL);
+       
+       fr_event_now(el, &now);
+
+       /*
+        *      Cap count at MRC, if it is non-zero.
+        */
+       if (request->home_server->coa_mrc &&
+           (request->num_coa_requests >= request->home_server->coa_mrc)) {
+               no_response_to_coa_request(request);
+               return;
+       }
+
+       /*
+        *      RFC 5080 Section 2.2.1
+        *
+        *      RT = 2*RTprev + RAND*RTprev
+        *         = 1.9 * RTprev + rand(0,.2) * RTprev
+        *         = 1.9 * RTprev + rand(0,1) * (RTprev / 5)
+        */
+       delay = fr_rand();
+       delay ^= (delay >> 16);
+       delay &= 0xffff;
+       frac = request->delay / 5;
+       delay = ((frac >> 16) * delay) + (((frac & 0xffff) * delay) >> 16);
+
+       delay += (2 * request->delay) - (request->delay / 10);
+
+       /*
+        *      Cap delay at MRT, if MRT is non-zero.
+        */
+       if (request->home_server->coa_mrt &&
+           (delay > (request->home_server->coa_mrt * USEC))) {
+               int mrt_usec = request->home_server->coa_mrt * USEC;
+
+               /*
+                *      delay = MRT + RAND * MRT
+                *            = 0.9 MRT + rand(0,.2)  * MRT
+                */
+               delay = fr_rand();
+               delay ^= (delay >> 15);
+               delay &= 0x1ffff;
+               delay = ((mrt_usec >> 16) * delay) + (((mrt_usec & 0xffff) * delay) >> 16);
+               delay += mrt_usec - (mrt_usec / 10);
+       }
+
+       request->delay = delay;
+       request->when = now;
+       tv_add(&request->when, request->delay);
+       mrd = request->proxy_when;
+       mrd.tv_sec += request->home_server->coa_mrd;
+
+       /*
+        *      Cap duration at MRD.
+        */
+       if (timercmp(&mrd, &request->when, <)) {
+               request->when = mrd;
+               INSERT_EVENT(no_response_to_coa_request, request);
+
+       } else {
+               INSERT_EVENT(retransmit_coa_request, request);
+       }
+       
+       if (update_event_timestamp(request->proxy, now.tv_sec)) {
+               if (!insert_into_proxy_hash(request, TRUE)) {
+                       DEBUG("ERROR: Failed to insert retransmission of CoA request into proxy list.");
+                       return;
+               }
+
+               request->num_proxied_requests = 0;
+               request->num_proxied_responses = 0;
+       }
+
+       request->num_proxied_requests++;
+       request->num_coa_requests++; /* is NOT reset by code 3 lines above! */
+
+       request->proxy_listener->send(request->proxy_listener,
+                                     request);
+}
+
+
+/*
+ *     The original request is either DONE, or in CLEANUP_DELAY.
+ */
+static int originated_coa_request(REQUEST *request)
+{
+       int delay, rcode, pre_proxy_type = 0;
+       VALUE_PAIR *vp;
+       REQUEST *coa;
+       fr_ipaddr_t ipaddr;
+       char buffer[256];
+
+       rad_assert(request->proxy == NULL);
+       rad_assert(!request->in_proxy_hash);
+       rad_assert(request->proxy_reply == NULL);
+
+       /*
+        *      Check whether we want to originate one, or cancel one.
+        */
+       vp = pairfind(request->config_items, PW_SEND_COA_REQUEST);
+       if (!vp && request->coa) {
+               vp = pairfind(request->coa->proxy->vps, PW_SEND_COA_REQUEST);
+       }
+
+       if (vp) {
+               if (vp->vp_integer == 0) {
+                       ev_request_free(&request->coa);
+                       return 1;       /* success */
+               }
+       }
+
+       if (!request->coa) request_alloc_coa(request);
+       if (!request->coa) return 0;
+
+       coa = request->coa;
+
+       /*
+        *      src_ipaddr will be set up in proxy_encode.
+        */
+       memset(&ipaddr, 0, sizeof(ipaddr));
+       vp = pairfind(coa->proxy->vps, PW_PACKET_DST_IP_ADDRESS);
+       if (vp) {
+               ipaddr.af = AF_INET;
+               ipaddr.ipaddr.ip4addr.s_addr = vp->vp_ipaddr;
+
+       } else if ((vp = pairfind(coa->proxy->vps,
+                                 PW_PACKET_DST_IPV6_ADDRESS)) != NULL) {
+               ipaddr.af = AF_INET6;
+               ipaddr.ipaddr.ip6addr = vp->vp_ipv6addr;
+               
+       } else if ((vp = pairfind(coa->proxy->vps,
+                                 PW_HOME_SERVER_POOL)) != NULL) {
+               coa->home_pool = home_pool_byname(vp->vp_strvalue,
+                                                 HOME_TYPE_COA);
+               if (!coa->home_pool) {
+                       RDEBUG2("WARNING: No such home_server_pool %s",
+                              vp->vp_strvalue);
+       fail:
+                       ev_request_free(&request->coa);
+                       return 0;
+               }
+
+               /*
+                *      Prefer
+                */
+       } else if (request->client->coa_pool) {
+               coa->home_pool = request->client->coa_pool;
+
+       } else if (request->client->coa_server) {
+               coa->home_server = request->client->coa_server;
+
+       } else {
+               /*
+                *      If all else fails, send it to the client that
+                *      originated this request.
+                */
+               memcpy(&ipaddr, &request->packet->src_ipaddr, sizeof(ipaddr));
+       }
+
+       /*
+        *      Use the pool, if it exists.
+        */
+       if (coa->home_pool) {
+               coa->home_server = home_server_ldb(NULL, coa->home_pool, coa);
+               if (!coa->home_server) {
+                       RDEBUG("WARNING: No live home server for home_server_pool %s", vp->vp_strvalue);
+                       goto fail;
+               }
+
+       } else if (!coa->home_server) {
+               int port = PW_COA_UDP_PORT;
+
+               vp = pairfind(coa->proxy->vps, PW_PACKET_DST_PORT);
+               if (vp) port = vp->vp_integer;
+
+               coa->home_server = home_server_find(&ipaddr, port);
+               if (!coa->home_server) {
+                       RDEBUG2("WARNING: Unknown destination %s:%d for CoA request.",
+                              inet_ntop(ipaddr.af, &ipaddr.ipaddr,
+                                        buffer, sizeof(buffer)), port);
+                       goto fail;
+               }
+       }
+
+       vp = pairfind(coa->proxy->vps, PW_PACKET_TYPE);
+       if (vp) {
+               switch (vp->vp_integer) {
+               case PW_COA_REQUEST:
+               case PW_DISCONNECT_REQUEST:
+                       coa->proxy->code = vp->vp_integer;
+                       break;
+                       
+               default:
+                       DEBUG("Cannot set CoA Packet-Type to code %d",
+                             vp->vp_integer);
+                       goto fail;
+               }
+       }
+
+       if (!coa->proxy->code) coa->proxy->code = PW_COA_REQUEST;
+
+       /*
+        *      The rest of the server code assumes that
+        *      request->packet && request->reply exist.  Copy them
+        *      from the original request.
+        */
+       rad_assert(coa->packet != NULL);
+       rad_assert(coa->packet->vps == NULL);
+       memcpy(coa->packet, request->packet, sizeof(*request->packet));
+       coa->packet->vps = paircopy(request->packet->vps);
+       coa->packet->data = NULL;
+       rad_assert(coa->reply != NULL);
+       rad_assert(coa->reply->vps == NULL);
+       memcpy(coa->reply, request->reply, sizeof(*request->reply));
+       coa->reply->vps = paircopy(request->reply->vps);
+       coa->reply->data = NULL;
+       coa->config_items = paircopy(request->config_items);
+
+       /*
+        *      Call the pre-proxy routines.
+        */
+       vp = pairfind(request->config_items, PW_PRE_PROXY_TYPE);
+       if (vp) {
+               RDEBUG2("  Found Pre-Proxy-Type %s", vp->vp_strvalue);
+               pre_proxy_type = vp->vp_integer;
+       }
+
+       if (coa->home_pool && coa->home_pool->virtual_server) {
+               const char *old_server = coa->server;
+               
+               coa->server = coa->home_pool->virtual_server;
+               RDEBUG2(" server %s {", coa->server);
+               rcode = module_pre_proxy(pre_proxy_type, coa);
+               RDEBUG2(" }");
+               coa->server = old_server;
+       } else {
+               rcode = module_pre_proxy(pre_proxy_type, coa);
+       }
+       switch (rcode) {
+       default:
+               goto fail;
+
+       /*
+        *      Only send the CoA packet if the pre-proxy code succeeded.
+        */
+       case RLM_MODULE_NOOP:
+       case RLM_MODULE_OK:
+       case RLM_MODULE_UPDATED:
+               break;
+       }
+
+       /*
+        *      Source IP / port is set when the proxy socket
+        *      is chosen.
+        */
+       coa->proxy->dst_ipaddr = coa->home_server->ipaddr;
+       coa->proxy->dst_port = coa->home_server->port;
+
+       if (!insert_into_proxy_hash(coa, FALSE)) {
+               DEBUG("ERROR: Failed to insert CoA request into proxy list.");
+               goto fail;
+       }
+
+       /*
+        *      We CANNOT divorce the CoA request from the parent
+        *      request.  This function is running in a child thread,
+        *      and we need access to the main event loop in order to
+        *      to add the timers for the CoA packet.  See
+        *      wait_a_bit().
+        */
+
+       /*
+        *      Forget about the original request completely at this
+        *      point.
+        */
+       request = coa;
+
+       gettimeofday(&request->proxy_when, NULL);       
+       request->received = request->next_when = request->proxy_when;
+       rad_assert(request->proxy_reply == NULL);
+
+       /*
+        *      Implement re-transmit algorithm as per RFC 5080
+        *      Section 2.2.1.
+        *
+        *      We want IRT + RAND*IRT
+        *      or 0.9 IRT + rand(0,.2) IRT
+        *
+        *      2^20 ~ USEC, and we want 2.
+        *      rand(0,0.2) USEC ~ (rand(0,2^21) / 10)
+        */
+       delay = (fr_rand() & ((1 << 22) - 1)) / 10;
+       request->delay = delay * request->home_server->coa_irt;
+       delay = request->home_server->coa_irt * USEC;
+       delay -= delay / 10;
+       delay += request->delay;
+     
+       request->delay = delay;
+       tv_add(&request->next_when, delay);
+       request->next_callback = retransmit_coa_request;
+       
+       /*
+        *      Note that we set proxied BEFORE sending the packet.
+        *
+        *      Once we send it, the request is tainted, as
+        *      another thread may have picked it up.  Don't
+        *      touch it!
+        */
+       request->num_proxied_requests = 1;
+       request->num_proxied_responses = 0;
+#ifdef HAVE_PTHREAD_H
+       request->child_pid = NO_SUCH_CHILD_PID;
+#endif
+
+       update_event_timestamp(request->proxy, request->proxy_when.tv_sec);
+
+       request->child_state = REQUEST_PROXIED;
+
+       DEBUG_PACKET(request, request->proxy, 1);
+
+       request->proxy_listener->send(request->proxy_listener,
+                                     request);
+       return 1;
+}
+#endif /* WITH_COA */
+
+#ifdef WITH_PROXY
+static int process_proxy_reply(REQUEST *request)
+{
+       int rcode;
+       int post_proxy_type = 0;
+       VALUE_PAIR *vp;
+       
+       /*
+        *      Delete any reply we had accumulated until now.
+        */
+       pairfree(&request->reply->vps);
+       
+       /*
+        *      Run the packet through the post-proxy stage,
         *      BEFORE playing games with the attributes.
         */
        vp = pairfind(request->config_items, PW_POST_PROXY_TYPE);
@@ -1175,9 +1718,7 @@ static int process_proxy_reply(REQUEST *request)
                post_proxy_type = vp->vp_integer;
        }
        
-       rad_assert(request->home_pool != NULL);
-       
-       if (request->home_pool->virtual_server) {
+       if (request->home_pool && request->home_pool->virtual_server) {
                const char *old_server = request->server;
                
                request->server = request->home_pool->virtual_server;
@@ -1188,6 +1729,15 @@ static int process_proxy_reply(REQUEST *request)
        } else {
                rcode = module_post_proxy(post_proxy_type, request);
        }
+
+#ifdef WITH_COA
+       if (request->packet->code == request->proxy->code)
+         /*
+          *    Don't run the next bit if we originated a CoA
+          *    packet, after receiving an Access-Request or
+          *    Accounting-Request.
+          */
+#endif
        
        /*
         *      There may NOT be a proxy reply, as we may be
@@ -1278,7 +1828,7 @@ static int request_pre_handler(REQUEST *request)
                                                  &my_debug, 1,
                                                  &result);
                        if (result) {
-                               request->priority = 2;
+                               request->options = 2;
                                request->radlog = radlog_request;
                        }
                }
@@ -1289,23 +1839,22 @@ static int request_pre_handler(REQUEST *request)
        }
 
        if (rcode < 0) {
-               radlog(L_ERR, "%s Dropping packet without response.", fr_strerror());
+               RDEBUG("%s Dropping packet without response.", fr_strerror());
+               request->reply->offset = -2; /* bad authenticator */
                request->child_state = REQUEST_DONE;
                return 0;
        }
 
-#ifdef WITH_PROXY
-       if (!request->proxy)
-#endif
-         {
-                 request->username = pairfind(request->packet->vps,
+       if (!request->username) {
+               request->username = pairfind(request->packet->vps,
                                             PW_USER_NAME);
+       }
 
 #ifdef WITH_PROXY
-       } else {
+       if (request->proxy) {
                return process_proxy_reply(request);
-#endif
        }
+#endif
 
        return 1;
 }
@@ -1320,13 +1869,20 @@ static int proxy_request(REQUEST *request)
        struct timeval when;
        char buffer[128];
 
+#ifdef WITH_COA
+       if (request->coa) {
+               RDEBUG("WARNING: Cannot proxy and originate CoA packets at the same time.  Cancelling CoA request");
+               ev_request_free(&request->coa);
+       }
+#endif
+
        if (request->home_server->server) {
-               RDEBUG("ERROR: Cannot perform real proxying to a virtual server.");
+               RDEBUG("ERROR: Cannot proxy to a virtual server.");
                return 0;
        }
 
-       if (!insert_into_proxy_hash(request)) {
-               RDEBUG("ERROR: Failed inserting request into proxy hash.");
+       if (!insert_into_proxy_hash(request, FALSE)) {
+               RDEBUG("ERROR: Failed to insert entry into proxy list.");
                return 0;
        }
 
@@ -1347,7 +1903,7 @@ static int proxy_request(REQUEST *request)
        }
        request->next_callback = no_response_to_proxied_request;
 
-       RDEBUG2("Proxying request %d to home server %s port %d",
+       RDEBUG2("Proxying request %u to home server %s port %d",
               request->number,
               inet_ntop(request->proxy->dst_ipaddr.af,
                         &request->proxy->dst_ipaddr.ipaddr,
@@ -1410,26 +1966,46 @@ static int proxy_to_virtual_server(REQUEST *request)
 
        } else {
                RDEBUG2("Unknown packet type %d", request->proxy->code);
-               request_free(&fake);
+               ev_request_free(&fake);
                return 0;
        }
 
        RDEBUG2(">>> Sending proxied request internally to virtual server.");
        radius_handle_request(fake, fun);
-       RDEBUG2("<<< Received proxied response from internal virtual server.");
+       RDEBUG2("<<< Received proxied response code %d from internal virtual server.", fake->reply->code);
 
-       request->proxy_reply = fake->reply;
-       fake->reply = NULL;
+       if (fake->reply->code != 0) {
+               request->proxy_reply = fake->reply;
+               fake->reply = NULL;
+       } else {
+               /*
+                *      There was no response
+                */
+               setup_post_proxy_fail(request);
+       }
 
-       request_free(&fake);
+       ev_request_free(&fake);
 
        process_proxy_reply(request);
-       fun(request);
+
+       /*
+        *      Process it through the normal section again, but ONLY
+        *      if we received a proxy reply..
+        */
+       if (request->proxy_reply) {
+               if (request->server) RDEBUG("server %s {",
+                                           request->server != NULL ?
+                                           request->server : ""); 
+               fun(request);
+               
+               if (request->server) RDEBUG("} # server %s",
+                                           request->server != NULL ?
+                                           request->server : "");
+       }
 
        return 2;               /* success, but NOT '1' !*/
 }
 
-
 /*
  *     Return 1 if we did proxy it, or the proxy attempt failed
  *     completely.  Either way, the caller doesn't touch the request
@@ -1442,7 +2018,7 @@ static int successfully_proxied_request(REQUEST *request)
        VALUE_PAIR *realmpair;
        VALUE_PAIR *strippedname;
        VALUE_PAIR *vp;
-       char *realmname;
+       char *realmname = NULL;
        home_server *home;
        REALM *realm = NULL;
        home_pool_t *pool;
@@ -1452,13 +2028,50 @@ static int successfully_proxied_request(REQUEST *request)
         *
         *      FIXME: This should really be a serious error.
         */
-       if (request->in_proxy_hash) {
+       if (request->in_proxy_hash ||
+           (request->proxy_reply && (request->proxy_reply->code != 0))) {
                return 0;
        }
 
        realmpair = pairfind(request->config_items, PW_PROXY_TO_REALM);
        if (!realmpair || (realmpair->length == 0)) {
-               return 0;
+               int pool_type;
+
+               vp = pairfind(request->config_items, PW_HOME_SERVER_POOL);
+               if (!vp) return 0;
+
+               switch (request->packet->code) {
+               case PW_AUTHENTICATION_REQUEST:
+                       pool_type = HOME_TYPE_AUTH;
+                       break;
+
+#ifdef WITH_ACCOUNTING
+               case PW_ACCOUNTING_REQUEST:
+                       pool_type = HOME_TYPE_ACCT;
+                       break;
+#endif
+
+#ifdef WITH_COA
+               case PW_COA_REQUEST:
+               case PW_DISCONNECT_REQUEST:
+                       pool_type = HOME_TYPE_COA;
+                       break;
+#endif
+
+               default:
+                       return 0;
+               }
+
+               pool = home_pool_byname(vp->vp_strvalue, pool_type);
+               if (!pool) {
+                       RDEBUG2("ERROR: Cannot proxy to unknown pool %s",
+                               vp->vp_strvalue);
+                       return 0;
+               }
+
+               realmname = NULL; /* no realms */
+               realm = NULL;
+               goto found_pool;
        }
 
        realmname = (char *) realmpair->vp_strvalue;
@@ -1480,6 +2093,12 @@ static int successfully_proxied_request(REQUEST *request)
                pool = realm->acct_pool;
 #endif
 
+#ifdef WITH_COA
+       } else if ((request->packet->code == PW_COA_REQUEST) ||
+                  (request->packet->code == PW_DISCONNECT_REQUEST)) {
+               pool = realm->acct_pool;
+#endif
+
        } else {
                rad_panic("Internal sanity check failed");
        }
@@ -1490,6 +2109,7 @@ static int successfully_proxied_request(REQUEST *request)
                return 0;
        }
 
+found_pool:
        home = home_server_ldb(realmname, pool, request);
        if (!home) {
                RDEBUG2("ERROR: Failed to find live home server for realm %s",
@@ -1498,60 +2118,18 @@ static int successfully_proxied_request(REQUEST *request)
        }
        request->home_pool = pool;
 
+#ifdef WITH_COA
        /*
-        *      Remember that we sent the request to a Realm.
+        *      Once we've decided to proxy a request, we cannot send
+        *      a CoA packet.  So we free up any CoA packet here.
         */
-       pairadd(&request->packet->vps,
-               pairmake("Realm", realmname, T_OP_EQ));
-
-#ifdef WITH_DETAIL
-       /*
-        *      We read the packet from a detail file, AND it came from
-        *      the server we're about to send it to.  Don't do that.
-        */
-       if ((request->packet->code == PW_ACCOUNTING_REQUEST) &&
-           (request->listener->type == RAD_LISTEN_DETAIL) &&
-           (home->ipaddr.af == AF_INET) &&
-           (request->packet->src_ipaddr.af == AF_INET) &&
-           (home->ipaddr.ipaddr.ip4addr.s_addr == request->packet->src_ipaddr.ipaddr.ip4addr.s_addr)) {
-               RDEBUG2("    rlm_realm: Packet came from realm %s, proxy cancelled", realmname);
-               return 0;
-       }
+       ev_request_free(&request->coa);
 #endif
-
        /*
-        *      Allocate the proxy packet, only if it wasn't already
-        *      allocated by a module.  This check is mainly to support
-        *      the proxying of EAP-TTLS and EAP-PEAP tunneled requests.
-        *
-        *      In those cases, the EAP module creates a "fake"
-        *      request, and recursively passes it through the
-        *      authentication stage of the server.  The module then
-        *      checks if the request was supposed to be proxied, and
-        *      if so, creates a proxy packet from the TUNNELED request,
-        *      and not from the EAP request outside of the tunnel.
-        *
-        *      The proxy then works like normal, except that the response
-        *      packet is "eaten" by the EAP module, and encapsulated into
-        *      an EAP packet.
+        *      Remember that we sent the request to a Realm.
         */
-       if (!request->proxy) {
-               if ((request->proxy = rad_alloc(TRUE)) == NULL) {
-                       radlog(L_ERR|L_CONS, "no memory");
-                       exit(1);
-               }
-
-               /*
-                *      Copy the request, then look up name and
-                *      plain-text password in the copy.
-                *
-                *      Note that the User-Name attribute is the
-                *      *original* as sent over by the client.  The
-                *      Stripped-User-Name attribute is the one hacked
-                *      through the 'hints' file.
-                */
-               request->proxy->vps =  paircopy(request->packet->vps);
-       }
+       if (realmname) pairadd(&request->packet->vps,
+                              pairmake("Realm", realmname, T_OP_EQ));
 
        /*
         *      Strip the name, if told to.
@@ -1559,7 +2137,7 @@ static int successfully_proxied_request(REQUEST *request)
         *      Doing it here catches the case of proxied tunneled
         *      requests.
         */
-       if (realm->striprealm == TRUE &&
+       if (realm && (realm->striprealm == TRUE) &&
           (strippedname = pairfind(request->proxy->vps, PW_STRIPPED_USER_NAME)) != NULL) {
                /*
                 *      If there's a Stripped-User-Name attribute in
@@ -1598,7 +2176,8 @@ static int successfully_proxied_request(REQUEST *request)
         *      since we can't use the request authenticator
         *      anymore - we changed it.
         */
-       if (pairfind(request->proxy->vps, PW_CHAP_PASSWORD) &&
+       if ((request->packet->code == PW_AUTHENTICATION_REQUEST) &&
+           pairfind(request->proxy->vps, PW_CHAP_PASSWORD) &&
            pairfind(request->proxy->vps, PW_CHAP_CHALLENGE) == NULL) {
                vp = radius_paircreate(request, &request->proxy->vps,
                                       PW_CHAP_CHALLENGE, PW_TYPE_OCTETS);
@@ -1621,9 +2200,6 @@ static int successfully_proxied_request(REQUEST *request)
         *      pre-proxy may use this information, or change it.
         */
        request->proxy->code = request->packet->code;
-       request->proxy->dst_ipaddr = home->ipaddr;
-       request->proxy->dst_port = home->port;
-       request->home_server = home;
 
        /*
         *      Call the pre-proxy routines.
@@ -1684,7 +2260,7 @@ static int successfully_proxied_request(REQUEST *request)
        }
 
        if (!proxy_request(request)) {
-               RDEBUG("ERROR: Failed to proxy request %d", request->number);
+               RDEBUG("ERROR: Failed to proxy request %u", request->number);
                return -1;
        }
        
@@ -1692,7 +2268,6 @@ static int successfully_proxied_request(REQUEST *request)
 }
 #endif
 
-
 static void request_post_handler(REQUEST *request)
 {
        int child_state = -1;
@@ -1702,19 +2277,38 @@ static void request_post_handler(REQUEST *request)
        if ((request->master_state == REQUEST_STOP_PROCESSING) ||
            (request->parent &&
             (request->parent->master_state == REQUEST_STOP_PROCESSING))) {
-               RDEBUG2("Request %d was cancelled.", request->number);
+               RDEBUG2("request %u was cancelled.", request->number);
 #ifdef HAVE_PTHREAD_H
                request->child_pid = NO_SUCH_CHILD_PID;
 #endif
-               request->child_state = REQUEST_DONE;
-               return;
+               child_state = REQUEST_DONE;
+               goto cleanup;
        }
 
        if (request->child_state != REQUEST_RUNNING) {
                rad_panic("Internal sanity check failed");
        }
 
-       if ((request->reply->code == 0) &&
+#ifdef WITH_COA
+       /*
+        *      If it's not in the request hash, it's a CoA request.
+        *      We hope.
+        */
+       if (!request->in_request_hash &&
+           request->proxy &&
+           ((request->proxy->code == PW_COA_REQUEST) ||
+            (request->proxy->code == PW_DISCONNECT_REQUEST))) {
+               request->next_callback = NULL;
+               child_state = REQUEST_DONE;
+               goto cleanup;
+       }
+#endif
+
+       /*
+        *      Catch Auth-Type := Reject BEFORE proxying the packet.
+        */
+       if ((request->packet->code == PW_AUTHENTICATION_REQUEST) &&
+           (request->reply->code == 0) &&
            ((vp = pairfind(request->config_items, PW_AUTH_TYPE)) != NULL) &&
            (vp->vp_integer == PW_AUTHTYPE_REJECT)) {
                request->reply->code = PW_AUTHENTICATION_REJECT;
@@ -1728,7 +2322,7 @@ static void request_post_handler(REQUEST *request)
            (request->packet->code != PW_STATUS_SERVER)) {
                int rcode = successfully_proxied_request(request);
 
-               if (rcode == 1) return;
+               if (rcode == 1) return; /* request is invalid */
 
                /*
                 *      Failed proxying it (dead home servers, etc.)
@@ -1750,6 +2344,13 @@ static void request_post_handler(REQUEST *request)
                 *      OR we proxied it internally to a virutal server.
                 */
        }
+
+#ifdef WITH_COA
+       else if (request->proxy && request->coa) {
+               RDEBUG("WARNING: Cannot proxy and originate CoA packets at the same time.  Cancelling CoA request");
+               ev_request_free(&request->coa);
+       }
+#endif
 #endif
 
        /*
@@ -1788,13 +2389,23 @@ static void request_post_handler(REQUEST *request)
                        vp = pairfind(request->config_items,
                                      PW_RESPONSE_PACKET_TYPE);
                        if (!vp) {
-                               RDEBUG2("There was no response configured: rejecting request %d",
+                               RDEBUG2("There was no response configured: rejecting request %u",
                                       request->number);
                                request->reply->code = PW_AUTHENTICATION_REJECT;
+
                        } else if (vp->vp_integer == 256) {
-                               RDEBUG2("Not responding to request %d",
+                               RDEBUG2("Not responding to request %u",
                                       request->number);
 
+                               /*
+                                *      Force cleanup after a long
+                                *      time, so that we don't
+                                *      re-process the packet.
+                                */
+                               request->next_when.tv_sec += request->root->max_request_time;
+                               request->next_callback = cleanup_delay;
+                               child_state = REQUEST_CLEANUP_DELAY;
+                               break;
                        } else {
                                request->reply->code = vp->vp_integer;
 
@@ -1823,7 +2434,7 @@ static void request_post_handler(REQUEST *request)
                        when.tv_sec += request->root->reject_delay;
 
                        if (timercmp(&when, &request->next_when, >)) {
-                               RDEBUG2("Delaying reject of request %d for %d seconds",
+                               RDEBUG2("Delaying reject of request %u for %d seconds",
                                       request->number,
                                       request->root->reject_delay);
                                request->next_when = when;
@@ -1836,6 +2447,10 @@ static void request_post_handler(REQUEST *request)
                        }
                }
 
+#ifdef WITH_COA
+       case PW_COA_REQUEST:
+       case PW_DISCONNECT_REQUEST:
+#endif
                request->next_when.tv_sec += request->root->cleanup_delay;
                request->next_callback = cleanup_delay;
                child_state = REQUEST_CLEANUP_DELAY;
@@ -1856,15 +2471,11 @@ static void request_post_handler(REQUEST *request)
                break;
 
        default:
-               if ((request->packet->code > 1024) &&
-                   (request->packet->code < (1024 + 254 + 1))) {
-                       request->next_callback = NULL;
-                       child_state = REQUEST_DONE;
-                       break;
-               }
-
-               radlog(L_ERR, "Unknown packet type %d", request->packet->code);
-               rad_panic("Unknown packet type");
+               /*
+                *      DHCP, VMPS, etc.
+                */
+               request->next_callback = NULL;
+               child_state = REQUEST_DONE;
                break;
        }
 
@@ -1881,6 +2492,25 @@ static void request_post_handler(REQUEST *request)
                request->listener->send(request->listener, request);
        }
 
+#ifdef WITH_COA
+       /*
+        *      Now that we've completely processed the request,
+        *      see if we need to originate a CoA request.  But ONLY
+        *      if it wasn't proxied.
+        */
+       if (!request->proxy &&
+           (request->packet->code != PW_COA_REQUEST) &&
+           (request->packet->code != PW_DISCONNECT_REQUEST) &&
+           (request->coa ||
+            (pairfind(request->config_items, PW_SEND_COA_REQUEST) != NULL))) {
+               if (!originated_coa_request(request)) {
+                       RDEBUG2("Do CoA Fail handler here");
+               }
+               /* request->coa is stil set, so we can update events */
+       }
+#endif
+
+ cleanup:
        /*
         *      Clean up.  These are no longer needed.
         */
@@ -1915,8 +2545,8 @@ static void request_post_handler(REQUEST *request)
        }
 #endif
 
-       RDEBUG2("Finished request %d.", request->number);
-
+       RDEBUG2("Finished request %u.", request->number);
+       rad_assert(child_state >= 0);
        request->child_state = child_state;
 
        /*
@@ -1934,7 +2564,7 @@ static void received_retransmit(REQUEST *request, const RADCLIENT *client)
 
        RAD_STATS_TYPE_INC(request->listener, total_dup_requests);
        RAD_STATS_CLIENT_INC(request->listener, client, total_dup_requests);
-
+       
        switch (request->child_state) {
        case REQUEST_QUEUED:
        case REQUEST_RUNNING:
@@ -1942,7 +2572,7 @@ static void received_retransmit(REQUEST *request, const RADCLIENT *client)
        discard:
 #endif
                radlog(L_ERR, "Discarding duplicate request from "
-                      "client %s port %d - ID: %d due to unfinished request %d",
+                      "client %s port %d - ID: %d due to unfinished request %u",
                       client->shortname,
                       request->packet->src_port,request->packet->id,
                       request->number);
@@ -1961,9 +2591,15 @@ static void received_retransmit(REQUEST *request, const RADCLIENT *client)
                 *
                 *      Instead, we just discard the packet.  We may
                 *      eventually respond, or the client will send a
-                *      new accounting packet.
+                *      new accounting packet.            
+                *
+                *      The same comments go for Status-Server, and
+                *      other packet types.
+                *
+                *      FIXME: coa: when we proxy CoA && Disconnect
+                *      packets, this logic has to be fixed.
                 */
-               if (request->packet->code == PW_ACCOUNTING_REQUEST) {
+               if (request->packet->code != PW_AUTHENTICATION_REQUEST) {
                        goto discard;
                }
 
@@ -1981,7 +2617,7 @@ static void received_retransmit(REQUEST *request, const RADCLIENT *client)
 
                        home = home_server_ldb(NULL, request->home_pool, request);
                        if (!home) {
-                               RDEBUG2("Failed to find live home server for request %d", request->number);
+                               RDEBUG2("ERROR: Failed to find live home server for request %u", request->number);
                        no_home_servers:
                                /*
                                 *      Do post-request processing,
@@ -1993,9 +2629,6 @@ static void received_retransmit(REQUEST *request, const RADCLIENT *client)
                        }
 
                        request->proxy->code = request->packet->code;
-                       request->proxy->dst_ipaddr = home->ipaddr;
-                       request->proxy->dst_port = home->port;
-                       request->home_server = home;
 
                        /*
                         *      Free the old packet, to force re-encoding
@@ -2018,7 +2651,7 @@ static void received_retransmit(REQUEST *request, const RADCLIENT *client)
                         *      Try to proxy the request.
                         */
                        if (!proxy_request(request)) {
-                               RDEBUG("ERROR: Failed to re-proxy request %d", request->number);
+                               RDEBUG("ERROR: Failed to re-proxy request %u", request->number);
                                goto no_home_servers;
                        }
 
@@ -2055,6 +2688,21 @@ static void received_retransmit(REQUEST *request, const RADCLIENT *client)
 
        case REQUEST_CLEANUP_DELAY:
        case REQUEST_DONE:
+               if (request->reply->code == 0) {
+                       RDEBUG2("Ignoring retransmit from client %s port %d "
+                               "- ID: %d, no reply was configured",
+                               client->shortname,
+                               request->packet->src_port, request->packet->id);
+                       return;
+               }
+
+               /*
+                *      FIXME: This sends duplicate replies to
+                *      accounting requests, even if Acct-Delay-Time
+                *      or Event-Timestamp is in the packet.  In those
+                *      cases, the Id should be changed, and the packet
+                *      re-calculated.
+                */
                RDEBUG2("Sending duplicate reply "
                       "to client %s port %d - ID: %d",
                       client->shortname,
@@ -2070,7 +2718,7 @@ static void received_conflicting_request(REQUEST *request,
                                         const RADCLIENT *client)
 {
        radlog(L_ERR, "Received conflicting packet from "
-              "client %s port %d - ID: %d due to unfinished request %d.  Giving up on old request.",
+              "client %s port %d - ID: %d due to unfinished request %u.  Giving up on old request.",
               client->shortname,
               request->packet->src_port, request->packet->id,
               request->number);
@@ -2099,10 +2747,21 @@ static void received_conflicting_request(REQUEST *request,
 #endif
 
                /*
+                *      Catch race conditions.  It may have switched
+                *      from running to done while this code is being
+                *      executed.
+                */
+       case REQUEST_REJECT_DELAY:
+       case REQUEST_CLEANUP_DELAY:
+       case REQUEST_DONE:
+               break;
+
+               /*
                 *      It's in some other state, and therefore also
                 *      in the event queue.  At some point, the
                 *      child will notice, and we can then delete it.
                 */
+       case REQUEST_PROXIED:
        default:
                rad_assert(request->ev != NULL);
                break;
@@ -2188,6 +2847,26 @@ int received_request(rad_listen_t *listener,
                        struct timeval when;
 
                default:
+                       /*
+                        *      Special hacks for race conditions.
+                        *      The reply is encoded, and therefore
+                        *      likely sent.  We received a *new*
+                        *      packet from the client, likely before
+                        *      the next line or two of code which
+                        *      updated the child state.  In this
+                        *      case, just accept the new request.
+                        */
+                       if ((request->reply->code != 0) &&
+                           request->reply->data) {
+                               radlog(L_INFO, "WARNING: Allowing fast client %s port %d - ID: %d for recent request %u.",
+                                      client->shortname,
+                                      packet->src_port, packet->id,
+                                      request->number);
+                               remove_from_request_hash(request);
+                               request = NULL;
+                               break;
+                       }
+
                        gettimeofday(&when, NULL);
                        when.tv_sec -= 1;
 
@@ -2202,7 +2881,7 @@ int received_request(rad_listen_t *listener,
                         */
                        if (timercmp(&when, &request->received, <)) {
                                radlog(L_ERR, "Discarding conflicting packet from "
-                                      "client %s port %d - ID: %d due to recent request %d.",
+                                      "client %s port %d - ID: %d due to recent request %u.",
                                       client->shortname,
                                       packet->src_port, packet->id,
                                       request->number);
@@ -2271,8 +2950,8 @@ int received_request(rad_listen_t *listener,
         *      Remember the request in the list.
         */
        if (!fr_packet_list_insert(pl, &request->packet)) {
-               radlog(L_ERR, "Failed to insert request %d in the list of live requests: discarding", request->number);
-               request_free(&request);
+               radlog(L_ERR, "Failed to insert request %u in the list of live requests: discarding", request->number);
+               ev_request_free(&request);
                return 0;
        }
 
@@ -2327,58 +3006,32 @@ int received_request(rad_listen_t *listener,
 REQUEST *received_proxy_response(RADIUS_PACKET *packet)
 {
        char            buffer[128];
-       home_server     *home;
        REQUEST         *request;
 
-       if (!home_server_find(&packet->src_ipaddr, packet->src_port)) {
-               radlog(L_ERR, "Ignoring request from unknown home server %s port %d",
-                      inet_ntop(packet->src_ipaddr.af,
-                                &packet->src_ipaddr.ipaddr,
-                                buffer, sizeof(buffer)),
-                              packet->src_port);
-               rad_free(&packet);
-               return NULL;
-       }
-
        /*
         *      Also removes from the proxy hash if responses == requests
         */
        request = lookup_in_proxy_hash(packet);
 
        if (!request) {
-               radlog(L_PROXY, "No outstanding request was found for proxy reply from home server %s port %d - ID %d",
+               radlog(L_PROXY, "No outstanding request was found for reply from host %s port %d - ID %d",
                       inet_ntop(packet->src_ipaddr.af,
                                 &packet->src_ipaddr.ipaddr,
                                 buffer, sizeof(buffer)),
                       packet->src_port, packet->id);
-               rad_free(&packet);
-               return NULL;
-       }
-
-       home = request->home_server;
-
-       gettimeofday(&now, NULL);
-
-       /*
-        *      FIXME: mark the home server alive?
-        */
-       home->state = HOME_STATE_ALIVE;
-
-       if (request->reply && request->reply->code != 0) {
-               RDEBUG2("We already replied to this request.  Discarding response from home server.");
-               rad_free(&packet);
                return NULL;
        }
 
        /*
-        *      We had previously received a reply, so we don't need
-        *      to do anything here.
+        *      We haven't replied to the NAS, but we have seen an
+        *      earlier reply from the home server.  Ignore this packet,
+        *      as we're likely still processing the previous reply.
         */
        if (request->proxy_reply) {
                if (memcmp(request->proxy_reply->vector,
                           packet->vector,
                           sizeof(request->proxy_reply->vector)) == 0) {
-                       RDEBUG2("Discarding duplicate reply from home server %s port %d  - ID: %d for request %d",
+                       RDEBUG2("Discarding duplicate reply from host %s port %d  - ID: %d for request %u",
                               inet_ntop(packet->src_ipaddr.af,
                                         &packet->src_ipaddr.ipaddr,
                                         buffer, sizeof(buffer)),
@@ -2387,16 +3040,112 @@ REQUEST *received_proxy_response(RADIUS_PACKET *packet)
                } else {
                        /*
                         *      ? The home server gave us a new proxy
-                        *      reply, which doesn't match the old
+                        *      reply which doesn't match the old
                         *      one.  Delete it.
                         */
                        RDEBUG2("Ignoring conflicting proxy reply");
                }
-
+               
                /* assert that there's an event queued for request? */
-               rad_free(&packet);
                return NULL;
        }
+
+       /*
+        *      Verify the packet before doing ANYTHING with it.  This
+        *      means we're doing more MD5 checks in the server core.
+        *      However, we can fix that by moving to multiple threads
+        *      listening on sockets.
+        *
+        *      We do this AFTER looking the request up in the hash,
+        *      and AFTER vhecking if we saw a previous request.  This
+        *      helps minimize the DoS effect of people attacking us
+        *      with spoofed packets.
+        */
+       if (rad_verify(packet, request->proxy,
+                      request->home_server->secret) != 0) {
+               DEBUG("Ignoring spoofed proxy reply.  Signature is invalid");
+               return NULL;
+       }
+
+       gettimeofday(&now, NULL);
+
+       /*
+        *      "ping" packets have a different algorithm for marking
+        *      a home server alive.  They also skip all of the CoA,
+        *      etc. checks.
+        */
+       if (!request->packet) {
+               request->proxy_reply = packet;
+               received_response_to_ping(request);
+               request->proxy_reply = NULL; /* caller will free it */
+               ev_request_free(&request);
+               return NULL;
+       }
+
+       /*
+        *      Maybe move this earlier in the decision process?
+        *      Having it here means that late or duplicate proxy
+        *      replies no longer get the home server marked as
+        *      "alive".  This might be good for stability, though.
+        *
+        *      FIXME: Do we really want to do this whenever we
+        *      receive a packet?  Setting this here means that we
+        *      mark it alive on *any* packet, even if it's lost all
+        *      of the *other* packets in the last 10s.
+        *
+        *      This behavior could be configurable.
+        */
+       request->home_server->state = HOME_STATE_ALIVE;
+       request->home_server->last_packet = now.tv_sec;
+       
+#ifdef WITH_COA
+       /*
+        *      When originating CoA, the "proxy" reply is the reply
+        *      to the CoA request that we originated.  At this point,
+        *      the original request is finished, and it has a reply.
+        *
+        *      However, if we haven't separated the two requests, do
+        *      so now.  This is done so that cleaning up the original
+        *      request won't cause the CoA request to be free'd.  See
+        *      util.c, request_free()
+        */
+       if (request->parent && (request->parent->coa == request)) {
+               request->parent->coa = NULL;
+               request->parent = NULL;
+
+               /*
+                *      The proxied packet was different from the
+                *      original packet, AND the proxied packet was
+                *      a CoA: allow it.
+                */
+       } else if ((request->packet->code != request->proxy->code) &&
+                  ((request->proxy->code == PW_COA_REQUEST) ||
+                   (request->proxy->code == PW_DISCONNECT_REQUEST))) {
+         /*
+          *    It's already divorced: do nothing.
+          */
+         
+       } else
+               /*
+                *      Skip the next set of checks, as the original
+                *      reply is cached.  We want to be able to still
+                *      process the CoA reply, AND to reference the
+                *      original request/reply.
+                *
+                *      This is getting to be really quite a bit of a
+                *      hack.
+                */
+#endif
+
+       /*
+        *      If there's a reply to the NAS, ignore everything
+        *      related to proxy responses
+        */
+       if (request->reply && request->reply->code != 0) {
+               RDEBUG2("Ignoring proxy reply that arrived after we sent a reply to the NAS");
+               return NULL;
+       }
+
 #ifdef WITH_STATS
        /*
         *      The average includes our time to receive packets and
@@ -2406,29 +3155,28 @@ REQUEST *received_proxy_response(RADIUS_PACKET *packet)
         *      We update the response time only for the FIRST packet
         *      we receive.
         */
-       else if (home->ema.window > 0) {
-               radius_stats_ema(&home->ema, &now, &request->proxy_when);
+       if (request->home_server->ema.window > 0) {
+               radius_stats_ema(&request->home_server->ema,
+                                &now, &request->proxy_when);
        }
 #endif
 
-
        switch (request->child_state) {
        case REQUEST_QUEUED:
        case REQUEST_RUNNING:
-               rad_panic("Internal sanity check failed for child state");
-               break;
+               radlog(L_ERR, "Internal sanity check failed for child state");
+               /* FALL-THROUGH */
 
        case REQUEST_REJECT_DELAY:
        case REQUEST_CLEANUP_DELAY:
        case REQUEST_DONE:
-               radlog(L_ERR, "Reply from home server %s port %d  - ID: %d arrived too late for request %d. Try increasing 'retry_delay' or 'max_request_time'",
+               radlog(L_ERR, "Reply from home server %s port %d  - ID: %d arrived too late for request %u. Try increasing 'retry_delay' or 'max_request_time'",
                       inet_ntop(packet->src_ipaddr.af,
                                 &packet->src_ipaddr.ipaddr,
                                 buffer, sizeof(buffer)),
                       packet->src_port, packet->id,
                       request->number);
                /* assert that there's an event queued for request? */
-               rad_free(&packet);
                return NULL;
 
        case REQUEST_PROXIED:
@@ -2473,15 +3221,6 @@ REQUEST *received_proxy_response(RADIUS_PACKET *packet)
        }
 #endif
 
-       /*
-        *      There's no incoming request, so it's a proxied packet
-        *      we originated.
-        */
-       if (!request->packet) {
-               received_response_to_ping(request);
-               return NULL;
-       }
-
        request->child_state = REQUEST_QUEUED;
        request->when = now;
        request->delay = USEC;
@@ -2500,6 +3239,8 @@ REQUEST *received_proxy_response(RADIUS_PACKET *packet)
 void event_new_fd(rad_listen_t *this)
 {
        char buffer[1024];
+
+       if (this->status == RAD_LISTEN_STATUS_KNOWN) return;
        
        this->print(this, buffer, sizeof(buffer));
        
@@ -2507,7 +3248,7 @@ void event_new_fd(rad_listen_t *this)
                if (just_started) {
                        DEBUG("Listening on %s", buffer);
                } else {
-                       DEBUG2(" ... adding new socket %s", buffer);
+                       radlog(L_INFO, " ... adding new socket %s", buffer);
                }
                if (!fr_event_fd_insert(el, 0, this->fd,
                                        event_socket_handler, this)) {
@@ -2520,7 +3261,7 @@ void event_new_fd(rad_listen_t *this)
        }
        
        if (this->status == RAD_LISTEN_STATUS_CLOSED) {
-               DEBUG2(" ... closing socket %s", buffer);
+               radlog(L_INFO, " ... closing socket %s", buffer);
                
                fr_event_fd_delete(el, 0, this->fd);
                this->status = RAD_LISTEN_STATUS_FINISH;
@@ -2536,21 +3277,6 @@ void event_new_fd(rad_listen_t *this)
        }
 }
 
-#ifdef WITH_DETAIL
-static void event_detail_timer(void *ctx)
-{
-       rad_listen_t *listener = ctx;
-       RAD_REQUEST_FUNP fun;
-       REQUEST *request;
-
-       if (listener->recv(listener, &fun, &request)) {
-               if (!thread_pool_addrequest(request, fun)) {
-                       request->child_state = REQUEST_DONE;
-               }
-       }
-}
-#endif
-
 static void handle_signal_self(int flag)
 {
        if ((flag & (RADIUS_SIGNAL_SELF_EXIT | RADIUS_SIGNAL_SELF_TERM)) != 0) {
@@ -2570,13 +3296,14 @@ static void handle_signal_self(int flag)
                time_t when;
                static time_t last_hup = 0;
 
-               DEBUG("Received HUP signal.");
-
                when = time(NULL);
                if ((int) (when - last_hup) < 5) {
                        radlog(L_INFO, "Ignoring HUP (less than 5s since last one)");
                        return;
                }
+
+               radlog(L_INFO, "Received HUP signal.");
+
                last_hup = when;
 
                fr_event_loop_exit(el, 0x80);
@@ -2586,31 +3313,24 @@ static void handle_signal_self(int flag)
        if ((flag & RADIUS_SIGNAL_SELF_DETAIL) != 0) {
                rad_listen_t *this;
                
+               /*
+                *      FIXME: O(N) loops suck.
+                */
                for (this = mainconfig.listen;
                     this != NULL;
                     this = this->next) {
-                       int delay;
-                       struct timeval when;
-
                        if (this->type != RAD_LISTEN_DETAIL) continue;
-                       
-                       delay = detail_delay(this);
-                       if (!delay) continue;
-
-                       fr_event_now(el, &now);
-                       when = now;
-                       tv_add(&when, delay);
 
-                       if (delay > 100000) {
-                               DEBUG("Delaying next detail event for %d.%01u seconds.",
-                                      delay / USEC, (delay % USEC) / 100000);
-                       }
+                       /*
+                        *      This one didn't send the signal, skip
+                        *      it.
+                        */
+                       if (!this->decode(this, NULL)) continue;
 
-                       if (!fr_event_insert(el, event_detail_timer, this,
-                                            &when, NULL)) {
-                               radlog(L_ERR, "Failed remembering timer");
-                               exit(1);
-                       }
+                       /*
+                        *      Go service the interrupt.
+                        */
+                       event_poll_detail(this);
                }
        }
 #endif
@@ -2621,14 +3341,12 @@ static void handle_signal_self(int flag)
                for (this = mainconfig.listen;
                     this != NULL;
                     this = this->next) {
-                       if (this->status == RAD_LISTEN_STATUS_KNOWN) continue;
-
                        event_new_fd(this);
                }
        }
 }
 
-#ifdef __MINGW32__
+#ifndef WITH_SELF_PIPE
 void radius_signal_self(int flag)
 {
        handle_signal_self(flag);
@@ -2705,47 +3423,47 @@ static void event_socket_handler(fr_event_list_t *xel, UNUSED int fd,
 
 
 /*
- *     This function is called periodically to see if any FD's are
- *     available for reading.
+ *     This function is called periodically to see if this detail
+ *     file is available for reading.
  */
-static void event_poll_detail(UNUSED void *ctx)
+static void event_poll_detail(void *ctx)
 {
-       int rcode;
+       int rcode, delay;
        RAD_REQUEST_FUNP fun;
        REQUEST *request;
-       rad_listen_t *this;
+       rad_listen_t *this = ctx;
        struct timeval when;
+       listen_detail_t *detail = this->data;
 
-       fr_event_now(el, &now);
-       when = now;
-       when.tv_sec += 1;
-
-       for (this = mainconfig.listen; this != NULL; this = this->next) {
-               if (this->type != RAD_LISTEN_DETAIL) continue;
-
-               if (this->fd >= 0) continue;
+       rad_assert(this->type == RAD_LISTEN_DETAIL);
 
-               /*
-                *      Try to read something.
-                *
-                *      FIXME: This does poll AND receive.
-                */
-               rcode = this->recv(this, &fun, &request);
-               if (!rcode) continue;
-               
+       /*
+        *      Try to read something.
+        *
+        *      FIXME: This does poll AND receive.
+        */
+       rcode = this->recv(this, &fun, &request);
+       if (rcode != 0) {
                rad_assert(fun != NULL);
                rad_assert(request != NULL);
-                       
+               
                if (!thread_pool_addrequest(request, fun)) {
                        request->child_state = REQUEST_DONE;
                }
        }
 
+       if (!fr_event_now(el, &now)) gettimeofday(&now, NULL);
+       when = now;
+
        /*
-        *      Reset the poll.
+        *      Backdoor API to get the delay until the next poll
+        *      time.
         */
-       if (!fr_event_insert(el, event_poll_detail, NULL,
-                            &when, NULL)) {
+       delay = this->encode(this, NULL);
+       tv_add(&when, delay);
+
+       if (!fr_event_insert(el, event_poll_detail, this,
+                            &when, &detail->ev)) {
                radlog(L_ERR, "Failed creating handler");
                exit(1);
        }
@@ -2767,7 +3485,7 @@ static void event_status(struct timeval *wake)
        }
 
        if (!wake) {
-               DEBUG("Ready to process requests.");
+               radlog(L_INFO, "Ready to process requests.");
 
        } else if ((wake->tv_sec != 0) ||
                   (wake->tv_usec >= 100000)) {
@@ -2795,67 +3513,6 @@ static void event_status(struct timeval *wake)
 
 }
 
-#if defined(HAVE_SETRESUID) && defined (HAVE_GETRESUID)
-static void fr_suid_up(void)
-{
-       uid_t ruid, euid, suid;
-       
-       if (getresuid(&ruid, &euid, &suid) < 0) {
-               radlog(L_ERR, "Failed getting saved UID's");
-               _exit(1);
-       }
-
-       if (setresuid(-1, suid, -1) < 0) {
-               radlog(L_ERR, "Failed switching to privileged user");
-               _exit(1);
-       }
-
-       if (geteuid() != suid) {
-               radlog(L_ERR, "Switched to unknown UID");
-               _exit(1);
-       }
-}
-
-extern uid_t server_uid;
-extern int did_setuid;
-static void fr_suid_down(void)
-{
-       uid_t ruid, euid, suid;
-
-       if (!did_setuid) return;
-
-       if (getresuid(&ruid, &euid, &suid) < 0) {
-               radlog(L_ERR, "Failed getting saved UID's");
-               _exit(1);
-       }
-
-       if (setresuid(server_uid, server_uid, server_uid) < 0) {
-               radlog(L_ERR, "Failed to permanently switch UID to %u: %s",
-                      server_uid, strerror(errno));
-               _exit(1);
-       }
-
-       if (geteuid() != server_uid) {
-               radlog(L_ERR, "Switched to unknown UID");
-               _exit(1);
-       }
-
-
-       if (getresuid(&ruid, &euid, &suid) < 0) {
-               radlog(L_ERR, "Failed getting saved UID's: %s",
-                      strerror(errno));
-               _exit(1);
-       }
-}
-#else
-/*
- *     Much less secure...
- */
-#define fr_suid_up()
-#define fr_suid_down()
-#endif
-
-
 /*
  *     Externally-visibly functions.
  */
@@ -2871,17 +3528,10 @@ int radius_event_init(CONF_SECTION *cs, int spawn_flag)
        if (!el) return 0;
 
        pl = fr_packet_list_create(0);
-       if (!el) return 0;
+       if (!pl) return 0;      /* leak el */
 
        request_num_counter = 0;
 
-       /*
-        *      Move all of the thread calls to this file?
-        *
-        *      It may be best for the mutexes to be in this file...
-        */
-       have_children = spawn_flag;
-
 #ifdef WITH_PROXY
        if (mainconfig.proxy_requests) {
                /*
@@ -2901,30 +3551,40 @@ int radius_event_init(CONF_SECTION *cs, int spawn_flag)
        }
 #endif
 
-       /*
-        *      Just before we spawn the child threads, force the log
-        *      subsystem to re-open the log file for every write.
-        */
-       if (spawn_flag) force_log_reopen();
-
 #ifdef HAVE_PTHREAD_H
 #ifndef __MINGW32__
        NO_SUCH_CHILD_PID = (pthread_t ) (0);
 #else
        NO_SUCH_CHILD_PID = pthread_self(); /* not a child thread */
 #endif
-       if (thread_pool_init(cs, spawn_flag) < 0) {
+       /*
+        *      Initialize the threads ONLY if we're spawning, AND
+        *      we're running normally.
+        */
+       if (spawn_flag && !check_config &&
+           (thread_pool_init(cs, &spawn_flag) < 0)) {
                exit(1);
        }
 #endif
 
+       /*
+        *      Move all of the thread calls to this file?
+        *
+        *      It may be best for the mutexes to be in this file...
+        */
+       have_children = spawn_flag;
+
        if (check_config) {
                DEBUG("%s: #### Skipping IP addresses and Ports ####",
                       mainconfig.name);
+               if (listen_init(cs, &head) < 0) {
+                       fflush(NULL);
+                       exit(1);
+               }
                return 1;
        }
 
-#ifndef __MINGW32__
+#ifdef WITH_SELF_PIPE
        /*
         *      Child threads need a pipe to signal us, as do the
         *      signal handlers.
@@ -2950,7 +3610,7 @@ int radius_event_init(CONF_SECTION *cs, int spawn_flag)
                radlog(L_ERR, "Failed creating handler for signals");
                exit(1);
        }
-#endif
+#endif /* WITH_SELF_PIPE */
 
 #ifdef WITH_PROXY
        /*
@@ -2966,13 +3626,24 @@ int radius_event_init(CONF_SECTION *cs, int spawn_flag)
        DEBUG("%s: #### Opening IP addresses and Ports ####",
               mainconfig.name);
 
-       fr_suid_up();           /* sockets may bind to privileged ports */
-
+       /*
+       *       The server temporarily switches to an unprivileged
+       *       user very early in the bootstrapping process.
+       *       However, some sockets MAY require privileged access
+       *       (bind to device, or to port < 1024, or to raw
+       *       sockets).  Those sockets need to call suid up/down
+       *       themselves around the functions that need a privileged
+       *       uid.
+       */
        if (listen_init(cs, &head) < 0) {
                _exit(1);
        }
        
-       fr_suid_down();
+       /*
+        *      At this point, no one has any business *ever* going
+        *      back to root uid.
+        */
+       fr_suid_down_permanent();
 
        /*
         *      Add all of the sockets to the event loop.
@@ -2988,17 +3659,27 @@ int radius_event_init(CONF_SECTION *cs, int spawn_flag)
 #ifdef WITH_DETAIL
                case RAD_LISTEN_DETAIL:
                        DEBUG("Listening on %s", buffer);
-                       has_detail_listener = TRUE;
+
+                       /*
+                        *      Detail files are always known, and aren't
+                        *      put into the socket event loop.
+                        */
+                       this->status = RAD_LISTEN_STATUS_KNOWN;
+
+                       /*
+                        *      Set up the first poll interval.
+                        */
+                       event_poll_detail(this);
                        break;
 #endif
 
 #ifdef WITH_PROXY
                case RAD_LISTEN_PROXY:
-                       rad_assert(proxy_fds[this->fd & 0x1f] == -1);
-                       rad_assert(proxy_listeners[this->fd & 0x1f] == NULL);
-                       
-                       proxy_fds[this->fd & 0x1f] = this->fd;
-                       proxy_listeners[this->fd & 0x1f] = this;
+                       if (proxy_add_fds(this) < 0) {
+                               radlog(L_ERR, "Failed creating new proxy socket");
+                               return 0;
+                       }
+
                        if (!fr_packet_list_socket_add(proxy_list,
                                                         this->fd)) {
                                rad_assert(0 == 1);
@@ -3010,31 +3691,9 @@ int radius_event_init(CONF_SECTION *cs, int spawn_flag)
                        break;
                }
 
-               /*
-                *      The file descriptor isn't ready.  Poll for
-                *      when it will become ready.  This is for the
-                *      detail file fd's.
-                */
-               if (this->fd < 0) {
-                       continue;
-               }
-
                event_new_fd(this);
        }
 
-       if (has_detail_listener) {
-               struct timeval when;
-               
-               gettimeofday(&when, NULL);
-               when.tv_sec += 1;
-               
-               if (!fr_event_insert(el, event_poll_detail, NULL,
-                                    &when, NULL)) {
-                       radlog(L_ERR, "Failed creating handler");
-                       exit(1);
-               }
-       }
-
        mainconfig.listen = head;
 
        return 1;
@@ -3049,9 +3708,7 @@ static int request_hash_cb(UNUSED void *ctx, void *data)
        rad_assert(request->in_proxy_hash == FALSE);
 #endif
 
-       fr_event_delete(el, &request->ev);
-       remove_from_request_hash(request);
-       request_free(&request);
+       ev_request_free(&request);
 
        return 0;
 }
@@ -3062,13 +3719,7 @@ static int proxy_hash_cb(UNUSED void *ctx, void *data)
 {
        REQUEST *request = fr_packet2myptr(REQUEST, proxy, data);
 
-       fr_packet_list_yank(proxy_list, request->proxy);
-       request->in_proxy_hash = FALSE;
-
-       if (!request->in_request_hash) {
-               fr_event_delete(el, &request->ev);
-               request_free(&request);
-       }
+       ev_request_free(&request);
 
        return 0;
 }
@@ -3088,9 +3739,7 @@ void radius_event_free(void)
         *      referenced from anywhere else.  Remove them first.
         */
        if (proxy_list) {
-               PTHREAD_MUTEX_LOCK(&proxy_mutex);
                fr_packet_list_walk(proxy_list, NULL, proxy_hash_cb);
-               PTHREAD_MUTEX_UNLOCK(&proxy_mutex);
                fr_packet_list_free(proxy_list);
                proxy_list = NULL;
        }
@@ -3120,11 +3769,13 @@ void radius_handle_request(REQUEST *request, RAD_REQUEST_FUNP fun)
                rad_assert(request != NULL);
                
                if (request->server) RDEBUG("server %s {",
-                                            request->server); 
+                                           request->server != NULL ?
+                                           request->server : ""); 
                fun(request);
 
                if (request->server) RDEBUG("} # server %s",
-                                            request->server);
+                                            request->server != NULL ?
+                                           request->server : "");
 
                request_post_handler(request);
        }