Fix corner case when not threaded.
[freeradius.git] / src / main / event.c
index 24ae441..65fcb6b 100644 (file)
@@ -399,7 +399,7 @@ retry:
        if (!fr_packet_list_insert(proxy_list, &request->proxy)) {
                fr_packet_list_id_free(proxy_list, request->proxy);
                PTHREAD_MUTEX_UNLOCK(&proxy_mutex);
-               radlog(L_PROXY, "Failed to insert entry into proxy list");
+               radlog(L_PROXY, "Failed to insert entry into proxy list.");
                return 0;
        }
 
@@ -480,6 +480,7 @@ static void wait_for_child_to_die(void *ctx)
        REQUEST *request = ctx;
 
        rad_assert(request->magic == REQUEST_MAGIC);
+       remove_from_request_hash(request);
 
        /*
         *      If it's still queued (waiting for a thread to pick it
@@ -491,14 +492,16 @@ static void wait_for_child_to_die(void *ctx)
             (pthread_equal(request->child_pid, NO_SUCH_CHILD_PID) == 0))) {
 
                /*
-                *      Cap delay at five minutes.
+                *      Cap delay at max_request_time
                 */
-               if (request->delay < (USEC * 60 * 5)) {
+               if (request->delay < (USEC * request->root->max_request_time)) {
                        request->delay += (request->delay >> 1);
                        radlog_request(L_INFO, 0, request, "WARNING: Child is hung in component %s module %s.",
                               request->component, request->module);
                } else {
-                       RDEBUG2("Child is still stuck");
+                       request->delay = USEC * request->root->max_request_time;
+                       RDEBUG2("WARNING: Child is hung after \"max_request_time\" for request %u",
+                               request->number);
                }
                tv_add(&request->when, request->delay);
 
@@ -507,7 +510,6 @@ static void wait_for_child_to_die(void *ctx)
        }
 
        RDEBUG2("Child is finally responsive");
-       remove_from_request_hash(request);
 
 #ifdef WITH_PROXY
        if (request->proxy) {
@@ -814,7 +816,7 @@ static void ping_home_server(void *ctx)
        rad_assert(request->proxy_listener == NULL);
 
        if (!insert_into_proxy_hash(request)) {
-               radlog(L_PROXY, "Failed inserting status check %d into proxy hash.  Discarding it.",
+               radlog(L_PROXY, "Failed to insert status check %d into proxy list.  Discarding it.",
                       request->number);
                ev_request_free(&request);
                return;
@@ -1140,33 +1142,53 @@ static void no_response_to_proxied_request(void *ctx)
        }
 #endif
 
-       if (home->state == HOME_STATE_IS_DEAD) {
-               rad_assert(home->ev != NULL); /* or it will never wake up */
+       /*
+        *      If it's not alive, don't try to make it a zombie.
+        */
+       if (home->state != HOME_STATE_ALIVE) {
+               /*
+                *      Don't check home->ev due to race conditions.
+                */
                return;
        }
 
        /*
-        *      Enable the zombie period when we notice that the home
-        *      server hasn't responded.  We do NOT back-date the start
-        *      of the zombie period.
+        *      We've received a real packet recently.  Don't mark the
+        *      server as zombie until we've received NO packets for a
+        *      while.  The "1/4" of zombie period was chosen rather
+        *      arbitrarily.  It's a balance between too short, which
+        *      gives quick fail-over and fail-back, or too long,
+        *      where the proxy still sends packets to an unresponsive
+        *      home server.
         */
-       if (home->state == HOME_STATE_ALIVE) {
-               home->state = HOME_STATE_ZOMBIE;
-               home->zombie_period_start = now;        
-               fr_event_delete(el, &home->ev);
-               home->currently_outstanding = 0;
-               home->num_received_pings = 0;
-
-               radlog(L_PROXY, "Marking home server %s port %d as zombie (it looks like it is dead).",
-                      inet_ntop(home->ipaddr.af, &home->ipaddr.ipaddr,
-                                buffer, sizeof(buffer)),
-                      home->port);
-
-               /*
-                *      Start pinging the home server.
-                */
-               ping_home_server(home);
+       if ((home->last_packet + ((home->zombie_period + 3) / 4)) >= now.tv_sec) {
+               return;
        }
+
+       /*
+        *      Enable the zombie period when we notice that the home
+        *      server hasn't responded for a while.  We back-date the
+        *      zombie period to when we last received a response from
+        *      the home server.
+        */
+       home->state = HOME_STATE_ZOMBIE;
+       
+       home->zombie_period_start.tv_sec = home->last_packet;
+       home->zombie_period_start.tv_sec = USEC / 2;
+       
+       fr_event_delete(el, &home->ev);
+       home->currently_outstanding = 0;
+       home->num_received_pings = 0;
+       
+       radlog(L_PROXY, "Marking home server %s port %d as zombie (it looks like it is dead).",
+              inet_ntop(home->ipaddr.af, &home->ipaddr.ipaddr,
+                        buffer, sizeof(buffer)),
+              home->port);
+       
+       /*
+        *      Start pinging the home server.
+        */
+       ping_home_server(home);
 }
 #endif
 
@@ -1178,6 +1200,7 @@ static void wait_a_bit(void *ctx)
 
        rad_assert(request->magic == REQUEST_MAGIC);
 
+#ifdef HAVE_PTHREAD_H
        /*
         *      The socket was closed.  Tell the request that
         *      there is no point in continuing.
@@ -1185,6 +1208,7 @@ static void wait_a_bit(void *ctx)
        if (request->listener->status != RAD_LISTEN_STATUS_KNOWN) {
                goto stop_processing;
        }
+#endif
 
 #ifdef WITH_COA
        /*
@@ -1206,6 +1230,25 @@ static void wait_a_bit(void *ctx)
        switch (request->child_state) {
        case REQUEST_QUEUED:
        case REQUEST_RUNNING:
+               /*
+                *      If we're not thread-capable, OR we're capable,
+                *      but have been told to run without threads, and
+                *      the request is still running.  This is usually
+                *      because the request was proxied, and the home
+                *      server didn't respond.
+                */
+#ifdef HAVE_PTHREAD_H
+               if (!have_children)
+#endif
+               {
+                       goto done;
+               }
+
+#ifdef HAVE_PTHREAD_H
+               /*
+                *      If we have threads, wait for the child thread
+                *      to stop.
+                */
                when = request->received;
                when.tv_sec += request->root->max_request_time;
 
@@ -1222,24 +1265,18 @@ static void wait_a_bit(void *ctx)
                 *      Request still has more time.  Continue
                 *      waiting.
                 */
-               if (timercmp(&now, &when, <) ||
-                   ((request->listener->type == RAD_LISTEN_DETAIL) &&
-                    (request->child_state == REQUEST_QUEUED))) {
+               if (timercmp(&now, &when, <)) {
                        if (request->delay < (USEC / 10)) {
                                request->delay = USEC / 10;
                        }
                        request->delay += request->delay >> 1;
 
-#ifdef WITH_DETAIL
                        /*
-                        *      Cap wait at some sane value for detail
-                        *      files.
+                        *      Cap delays at something reasonable.
                         */
-                       if ((request->listener->type == RAD_LISTEN_DETAIL) &&
-                           (request->delay > (request->root->max_request_time * USEC))) {
+                       if (request->delay > (request->root->max_request_time * USEC)) {
                                request->delay = request->root->max_request_time * USEC;
                        }
-#endif
 
                        request->when = now;
                        tv_add(&request->when, request->delay);
@@ -1248,7 +1285,8 @@ static void wait_a_bit(void *ctx)
                }
 
        stop_processing:
-#if defined(HAVE_PTHREAD_H)
+               request->master_state = REQUEST_STOP_PROCESSING;
+
                /*
                 *      A child thread MAY still be running on the
                 *      request.  Ask the thread to stop working on
@@ -1256,35 +1294,25 @@ static void wait_a_bit(void *ctx)
                 */
                if (have_children &&
                    (pthread_equal(request->child_pid, NO_SUCH_CHILD_PID) == 0)) {
-                       request->master_state = REQUEST_STOP_PROCESSING;
+                       radlog(L_ERR, "WARNING: Unresponsive child for request %u, in component %s module %s",
+                              request->number,
+                              request->component ? request->component : "<server core>",
+                              request->module ? request->module : "<server core>");
 
-                       radlog_request(L_ERR, 0, request, "WARNING: Unresponsive child in module %s component %s",
-                              request->module ? request->module : "<server core>",
-                              request->component ? request->component : "<server core>");
-                       
-                       request->delay = USEC / 4;
-                       tv_add(&request->when, request->delay);
-                       callback = wait_for_child_to_die;
-                       break;
                }
+                       
+               request->delay = USEC;
+               tv_add(&request->when, request->delay);
+               callback = wait_for_child_to_die;
+               break;
 #endif
 
                /*
-                *      Else no child thread is processing the
-                *      request.  We probably should have just marked
-                *      the request as 'done' elsewhere, like in the
-                *      post-proxy-fail handler.  But doing that would
-                *      involve checking for max_request_time in
-                *      multiple places, so this may be simplest.
-                */
-               request->child_state = REQUEST_DONE;
-               /* FALL-THROUGH */
-
-               /*
                 *      Mark the request as no longer running,
                 *      and clean it up.
                 */
        case REQUEST_DONE:
+       done:
 #ifdef HAVE_PTHREAD_H
                request->child_pid = NO_SUCH_CHILD_PID;
 #endif
@@ -1481,7 +1509,7 @@ static void retransmit_coa_request(void *ctx)
                 *      Don't free the old Id on error.
                 */
                if (!insert_into_proxy_hash(request)) {
-                       radlog(L_PROXY,"Failed re-inserting CoA request into proxy hash.");
+                       radlog(L_PROXY,"Failed to insert retransmission of CoA request into proxy list.");
                        return;
                }
 
@@ -1687,7 +1715,7 @@ static int originated_coa_request(REQUEST *request)
        coa->proxy->dst_port = coa->home_server->port;
 
        if (!insert_into_proxy_hash(coa)) {
-               radlog(L_PROXY, "Failed inserting CoA request into proxy hash.");
+               radlog(L_PROXY, "Failed to insert CoA request into proxy list.");
                goto fail;
        }
 
@@ -1918,8 +1946,8 @@ static int request_pre_handler(REQUEST *request)
 #ifdef WITH_PROXY
        if (request->proxy) {
                return process_proxy_reply(request);
-#endif
        }
+#endif
 
        return 1;
 }
@@ -1942,12 +1970,12 @@ static int proxy_request(REQUEST *request)
 #endif
 
        if (request->home_server->server) {
-               RDEBUG("ERROR: Cannot perform real proxying to a virtual server.");
+               RDEBUG("ERROR: Cannot proxy to a virtual server.");
                return 0;
        }
 
        if (!insert_into_proxy_hash(request)) {
-               radlog(L_PROXY, "Failed inserting request into proxy hash.");
+               radlog(L_PROXY, "Failed to insert request into proxy list.");
                return 0;
        }
 
@@ -2545,8 +2573,11 @@ static void request_post_handler(REQUEST *request)
         *      and it should re-send it.
         *      If configured, encode, sign, and send.
         */
-       if ((request->reply->code != 0) ||
-           (request->listener->type == RAD_LISTEN_DETAIL)) {
+       if ((request->reply->code != 0)
+#ifdef WITH_DETAIL
+           || (request->listener->type == RAD_LISTEN_DETAIL)
+#endif
+           ) {
                DEBUG_PACKET(request, request->reply, 1);
                request->listener->send(request->listener, request);
        }
@@ -2558,6 +2589,8 @@ static void request_post_handler(REQUEST *request)
         *      if it wasn't proxied.
         */
        if (!request->proxy &&
+           (request->packet->code != PW_COA_REQUEST) &&
+           (request->packet->code != PW_DISCONNECT_REQUEST) &&
            (request->coa ||
             (pairfind(request->config_items, PW_SEND_COA_REQUEST, 0) != NULL))) {
                if (!originated_coa_request(request)) {
@@ -2613,6 +2646,7 @@ static void request_post_handler(REQUEST *request)
 }
 
 
+#ifdef WITH_PROXY
 static void rad_retransmit_packet(REQUEST *request)
 {
        char buffer[256];
@@ -2658,7 +2692,7 @@ static int rad_retransmit(REQUEST *request)
                
                home = home_server_ldb(NULL, request->home_pool, request);
                if (!home) {
-                       RDEBUG2("Failed to find live home server for request");
+                       RDEBUG2("ERROR: Failed to find live home server for request");
                no_home_servers:
                        /*
                         *      Do post-request processing,
@@ -2702,7 +2736,7 @@ static int rad_retransmit(REQUEST *request)
 
        return 1;
 }
-
+#endif
 
 static void received_retransmit(REQUEST *request, const RADCLIENT *client)
 {
@@ -2985,7 +3019,10 @@ int received_request(rad_listen_t *listener,
        /*
         *      We may want to quench the new request.
         */
-       if ((listener->type != RAD_LISTEN_DETAIL) &&
+       if (
+#ifdef WITH_DETAIL
+           (listener->type != RAD_LISTEN_DETAIL) &&
+#endif
            !can_handle_new_request(packet, client, root)) {
                return 0;
        }
@@ -3192,8 +3229,11 @@ REQUEST *received_proxy_response(RADIUS_PACKET *packet)
         *      receive a packet?  Setting this here means that we
         *      mark it alive on *any* packet, even if it's lost all
         *      of the *other* packets in the last 10s.
+        *
+        *      This behavior could be configurable.
         */
        request->home_server->state = HOME_STATE_ALIVE;
+       request->home_server->last_packet = now.tv_sec;
        
 #ifdef WITH_COA
        /*
@@ -3529,7 +3569,7 @@ int event_new_fd(rad_listen_t *this)
                }               
 #ifdef WITH_PROXY
                else {
-                       int count = this->count;
+                       int count;
 
                        /*
                         *      Duplicate code
@@ -3611,7 +3651,10 @@ int event_new_fd(rad_listen_t *this)
         */
        if (this->status == RAD_LISTEN_STATUS_CLOSED) {
                int count = this->count;
+
+#ifdef WITH_DETAIL
                rad_assert(this->type != RAD_LISTEN_DETAIL);
+#endif
 
 #ifdef WITH_PROXY
                /*
@@ -3887,7 +3930,20 @@ static void event_socket_handler(fr_event_list_t *xel, UNUSED int fd,
 
        xel = xel;
 
-       if (listener->fd < 0) rad_panic("Socket was closed on us!");
+       if (
+#ifdef WITH_DETAIL
+           (listener->type != RAD_LISTEN_DETAIL) &&
+#endif
+           (listener->fd < 0)) {
+               char buffer[256];
+
+               listener->print(listener, buffer, sizeof(buffer));
+               radlog(L_ERR, "FATAL: Asked to read from closed socket: %s",
+                      buffer);
+       
+               rad_panic("Socket was closed on us!");
+               _exit(1);
+       }
        
        if (!listener->recv(listener, &fun, &request)) return;
 
@@ -3897,7 +3953,7 @@ static void event_socket_handler(fr_event_list_t *xel, UNUSED int fd,
        thread_pool_addrequest(request, fun);
 }
 
-
+#ifdef WITH_DETAIL
 /*
  *     This function is called periodically to see if this detail
  *     file is available for reading.
@@ -3929,7 +3985,7 @@ static void event_poll_detail(void *ctx)
                exit(1);
        }
 }
-
+#endif
 
 static void event_status(struct timeval *wake)
 {
@@ -4038,6 +4094,10 @@ int radius_event_init(CONF_SECTION *cs, int spawn_flag)
        if (check_config) {
                DEBUG("%s: #### Skipping IP addresses and Ports ####",
                       mainconfig.name);
+               if (listen_init(cs, &head) < 0) {
+                       fflush(NULL);
+                       exit(1);
+               }
                return 1;
        }