Fix corner case when not threaded.
[freeradius.git] / src / main / event.c
index be055ba..65fcb6b 100644 (file)
@@ -399,7 +399,7 @@ retry:
        if (!fr_packet_list_insert(proxy_list, &request->proxy)) {
                fr_packet_list_id_free(proxy_list, request->proxy);
                PTHREAD_MUTEX_UNLOCK(&proxy_mutex);
-               radlog(L_PROXY, "Failed to insert entry into proxy list");
+               radlog(L_PROXY, "Failed to insert entry into proxy list.");
                return 0;
        }
 
@@ -500,7 +500,8 @@ static void wait_for_child_to_die(void *ctx)
                               request->component, request->module);
                } else {
                        request->delay = USEC * request->root->max_request_time;
-                       RDEBUG2("WARNING: Child is still stuck");
+                       RDEBUG2("WARNING: Child is hung after \"max_request_time\" for request %u",
+                               request->number);
                }
                tv_add(&request->when, request->delay);
 
@@ -815,7 +816,7 @@ static void ping_home_server(void *ctx)
        rad_assert(request->proxy_listener == NULL);
 
        if (!insert_into_proxy_hash(request)) {
-               radlog(L_PROXY, "Failed inserting status check %d into proxy hash.  Discarding it.",
+               radlog(L_PROXY, "Failed to insert status check %d into proxy list.  Discarding it.",
                       request->number);
                ev_request_free(&request);
                return;
@@ -1141,33 +1142,53 @@ static void no_response_to_proxied_request(void *ctx)
        }
 #endif
 
-       if (home->state == HOME_STATE_IS_DEAD) {
-               rad_assert(home->ev != NULL); /* or it will never wake up */
+       /*
+        *      If it's not alive, don't try to make it a zombie.
+        */
+       if (home->state != HOME_STATE_ALIVE) {
+               /*
+                *      Don't check home->ev due to race conditions.
+                */
                return;
        }
 
        /*
-        *      Enable the zombie period when we notice that the home
-        *      server hasn't responded.  We do NOT back-date the start
-        *      of the zombie period.
+        *      We've received a real packet recently.  Don't mark the
+        *      server as zombie until we've received NO packets for a
+        *      while.  The "1/4" of zombie period was chosen rather
+        *      arbitrarily.  It's a balance between too short, which
+        *      gives quick fail-over and fail-back, or too long,
+        *      where the proxy still sends packets to an unresponsive
+        *      home server.
         */
-       if (home->state == HOME_STATE_ALIVE) {
-               home->state = HOME_STATE_ZOMBIE;
-               home->zombie_period_start = now;        
-               fr_event_delete(el, &home->ev);
-               home->currently_outstanding = 0;
-               home->num_received_pings = 0;
-
-               radlog(L_PROXY, "Marking home server %s port %d as zombie (it looks like it is dead).",
-                      inet_ntop(home->ipaddr.af, &home->ipaddr.ipaddr,
-                                buffer, sizeof(buffer)),
-                      home->port);
-
-               /*
-                *      Start pinging the home server.
-                */
-               ping_home_server(home);
+       if ((home->last_packet + ((home->zombie_period + 3) / 4)) >= now.tv_sec) {
+               return;
        }
+
+       /*
+        *      Enable the zombie period when we notice that the home
+        *      server hasn't responded for a while.  We back-date the
+        *      zombie period to when we last received a response from
+        *      the home server.
+        */
+       home->state = HOME_STATE_ZOMBIE;
+       
+       home->zombie_period_start.tv_sec = home->last_packet;
+       home->zombie_period_start.tv_sec = USEC / 2;
+       
+       fr_event_delete(el, &home->ev);
+       home->currently_outstanding = 0;
+       home->num_received_pings = 0;
+       
+       radlog(L_PROXY, "Marking home server %s port %d as zombie (it looks like it is dead).",
+              inet_ntop(home->ipaddr.af, &home->ipaddr.ipaddr,
+                        buffer, sizeof(buffer)),
+              home->port);
+       
+       /*
+        *      Start pinging the home server.
+        */
+       ping_home_server(home);
 }
 #endif
 
@@ -1211,16 +1232,16 @@ static void wait_a_bit(void *ctx)
        case REQUEST_RUNNING:
                /*
                 *      If we're not thread-capable, OR we're capable,
-                *      but have been told to run without threads,
-                *      complain when the requests is queued for a
-                *      thread, or running in a child thread.
+                *      but have been told to run without threads, and
+                *      the request is still running.  This is usually
+                *      because the request was proxied, and the home
+                *      server didn't respond.
                 */
 #ifdef HAVE_PTHREAD_H
                if (!have_children)
 #endif
                {
-                       rad_assert("We do not have threads, but the request is marked as queued or running in a child thread" == NULL);
-                       break;
+                       goto done;
                }
 
 #ifdef HAVE_PTHREAD_H
@@ -1273,9 +1294,11 @@ static void wait_a_bit(void *ctx)
                 */
                if (have_children &&
                    (pthread_equal(request->child_pid, NO_SUCH_CHILD_PID) == 0)) {
-                       radlog(L_ERR, "WARNING: Unresponsive child in module %s component %s",
-                              request->module ? request->module : "<server core>",
-                              request->component ? request->component : "<server core>");
+                       radlog(L_ERR, "WARNING: Unresponsive child for request %u, in component %s module %s",
+                              request->number,
+                              request->component ? request->component : "<server core>",
+                              request->module ? request->module : "<server core>");
+
                }
                        
                request->delay = USEC;
@@ -1289,6 +1312,7 @@ static void wait_a_bit(void *ctx)
                 *      and clean it up.
                 */
        case REQUEST_DONE:
+       done:
 #ifdef HAVE_PTHREAD_H
                request->child_pid = NO_SUCH_CHILD_PID;
 #endif
@@ -1485,7 +1509,7 @@ static void retransmit_coa_request(void *ctx)
                 *      Don't free the old Id on error.
                 */
                if (!insert_into_proxy_hash(request)) {
-                       radlog(L_PROXY,"Failed re-inserting CoA request into proxy hash.");
+                       radlog(L_PROXY,"Failed to insert retransmission of CoA request into proxy list.");
                        return;
                }
 
@@ -1691,7 +1715,7 @@ static int originated_coa_request(REQUEST *request)
        coa->proxy->dst_port = coa->home_server->port;
 
        if (!insert_into_proxy_hash(coa)) {
-               radlog(L_PROXY, "Failed inserting CoA request into proxy hash.");
+               radlog(L_PROXY, "Failed to insert CoA request into proxy list.");
                goto fail;
        }
 
@@ -1922,8 +1946,8 @@ static int request_pre_handler(REQUEST *request)
 #ifdef WITH_PROXY
        if (request->proxy) {
                return process_proxy_reply(request);
-#endif
        }
+#endif
 
        return 1;
 }
@@ -1946,12 +1970,12 @@ static int proxy_request(REQUEST *request)
 #endif
 
        if (request->home_server->server) {
-               RDEBUG("ERROR: Cannot perform real proxying to a virtual server.");
+               RDEBUG("ERROR: Cannot proxy to a virtual server.");
                return 0;
        }
 
        if (!insert_into_proxy_hash(request)) {
-               radlog(L_PROXY, "Failed inserting request into proxy hash.");
+               radlog(L_PROXY, "Failed to insert request into proxy list.");
                return 0;
        }
 
@@ -2549,8 +2573,11 @@ static void request_post_handler(REQUEST *request)
         *      and it should re-send it.
         *      If configured, encode, sign, and send.
         */
-       if ((request->reply->code != 0) ||
-           (request->listener->type == RAD_LISTEN_DETAIL)) {
+       if ((request->reply->code != 0)
+#ifdef WITH_DETAIL
+           || (request->listener->type == RAD_LISTEN_DETAIL)
+#endif
+           ) {
                DEBUG_PACKET(request, request->reply, 1);
                request->listener->send(request->listener, request);
        }
@@ -2619,6 +2646,7 @@ static void request_post_handler(REQUEST *request)
 }
 
 
+#ifdef WITH_PROXY
 static void rad_retransmit_packet(REQUEST *request)
 {
        char buffer[256];
@@ -2664,7 +2692,7 @@ static int rad_retransmit(REQUEST *request)
                
                home = home_server_ldb(NULL, request->home_pool, request);
                if (!home) {
-                       RDEBUG2("Failed to find live home server for request");
+                       RDEBUG2("ERROR: Failed to find live home server for request");
                no_home_servers:
                        /*
                         *      Do post-request processing,
@@ -2708,7 +2736,7 @@ static int rad_retransmit(REQUEST *request)
 
        return 1;
 }
-
+#endif
 
 static void received_retransmit(REQUEST *request, const RADCLIENT *client)
 {
@@ -2991,7 +3019,10 @@ int received_request(rad_listen_t *listener,
        /*
         *      We may want to quench the new request.
         */
-       if ((listener->type != RAD_LISTEN_DETAIL) &&
+       if (
+#ifdef WITH_DETAIL
+           (listener->type != RAD_LISTEN_DETAIL) &&
+#endif
            !can_handle_new_request(packet, client, root)) {
                return 0;
        }
@@ -3198,8 +3229,11 @@ REQUEST *received_proxy_response(RADIUS_PACKET *packet)
         *      receive a packet?  Setting this here means that we
         *      mark it alive on *any* packet, even if it's lost all
         *      of the *other* packets in the last 10s.
+        *
+        *      This behavior could be configurable.
         */
        request->home_server->state = HOME_STATE_ALIVE;
+       request->home_server->last_packet = now.tv_sec;
        
 #ifdef WITH_COA
        /*
@@ -3535,7 +3569,7 @@ int event_new_fd(rad_listen_t *this)
                }               
 #ifdef WITH_PROXY
                else {
-                       int count = this->count;
+                       int count;
 
                        /*
                         *      Duplicate code
@@ -3617,7 +3651,10 @@ int event_new_fd(rad_listen_t *this)
         */
        if (this->status == RAD_LISTEN_STATUS_CLOSED) {
                int count = this->count;
+
+#ifdef WITH_DETAIL
                rad_assert(this->type != RAD_LISTEN_DETAIL);
+#endif
 
 #ifdef WITH_PROXY
                /*
@@ -3893,7 +3930,10 @@ static void event_socket_handler(fr_event_list_t *xel, UNUSED int fd,
 
        xel = xel;
 
-       if ((listener->type != RAD_LISTEN_DETAIL) &&
+       if (
+#ifdef WITH_DETAIL
+           (listener->type != RAD_LISTEN_DETAIL) &&
+#endif
            (listener->fd < 0)) {
                char buffer[256];
 
@@ -3913,7 +3953,7 @@ static void event_socket_handler(fr_event_list_t *xel, UNUSED int fd,
        thread_pool_addrequest(request, fun);
 }
 
-
+#ifdef WITH_DETAIL
 /*
  *     This function is called periodically to see if this detail
  *     file is available for reading.
@@ -3945,7 +3985,7 @@ static void event_poll_detail(void *ctx)
                exit(1);
        }
 }
-
+#endif
 
 static void event_status(struct timeval *wake)
 {