Merge pull request #76 from painless-security/jennifer/trpc_deadlock
[trust_router.git] / tr / tr_trp.c
index ed07918..9f9c558 100644 (file)
@@ -1,4 +1,38 @@
-#include <stdio.h>  /* TODO: remove this --jlr */
+/*
+ * Copyright (c) 2016, JANET(UK)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of JANET(UK) nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <stdio.h>
 #include <pthread.h>
 #include <fcntl.h>
 #include <event2/event.h>
 
 #include <gsscon.h>
 #include <tr.h>
+#include <tr_mq.h>
 #include <tr_rp.h>
+#include <trp_route.h>
 #include <trp_internal.h>
+#include <trp_peer.h>
 #include <trp_ptable.h>
 #include <trp_rtable.h>
 #include <tr_config.h>
@@ -39,6 +76,12 @@ static void msg_free_helper(void *p)
 {
   tr_msg_free_decoded((TR_MSG *)p);
 }
+
+static void tr_free_name_helper(void *arg)
+{
+  tr_free_name((TR_NAME *)arg);
+}
+
 /* takes a TR_MSG and puts it in a TR_MQ_MSG for processing by the main thread */
 static TRP_RC tr_trps_msg_handler(TRPS_INSTANCE *trps,
                                   TRP_CONNECTION *conn,
@@ -50,12 +93,12 @@ static TRP_RC tr_trps_msg_handler(TRPS_INSTANCE *trps,
   /* n.b., conn is available here, but do not hold onto the reference
    * because it may be cleaned up if the originating connection goes
    * down before the message is processed */
-  mq_msg=tr_mq_msg_new(tmp_ctx, TR_MQMSG_MSG_RECEIVED);
+  mq_msg=tr_mq_msg_new(tmp_ctx, TR_MQMSG_MSG_RECEIVED, TR_MQ_PRIO_NORMAL);
   if (mq_msg==NULL) {
     return TRP_NOMEM;
   }
   tr_mq_msg_set_payload(mq_msg, (void *)tr_msg, msg_free_helper);
-  trps_mq_append(trps, mq_msg);
+  trps_mq_add(trps, mq_msg);
   talloc_free(tmp_ctx); /* cleans up the message if it did not get appended correctly */
   return TRP_SUCCESS;
 }
@@ -71,13 +114,13 @@ static int tr_trps_gss_handler(gss_name_t client_name, gss_buffer_t gss_name,
 
   tr_debug("tr_trps_gss_handler()");
 
-  if ((!client_name) || (!gss_name) || (!trps) || (!cfg_mgr)) {
+  if ((!client_name) || (!trps) || (!cfg_mgr)) {
     tr_debug("tr_trps_gss_handler: Bad parameters.");
     return -1;
   }
   
   /* look up the TRPS peer matching the GSS name */
-  if (NULL==trps_get_peer(trps, &name)) {
+  if (NULL==trps_get_peer_by_gssname(trps, &name)) {
     tr_warning("tr_trps_gss_handler: Connection attempt from unknown peer (GSS name: %.*s).", name.len, name.buf);
     return -1;
   }
@@ -101,15 +144,27 @@ static void *tr_trps_thread(void *arg)
   TR_MQ_MSG *msg=NULL;
 
   tr_debug("tr_trps_thread: started");
+  if (trps_authorize_connection(trps, conn)!=TRP_SUCCESS)
+    goto cleanup;
+
+  msg=tr_mq_msg_new(tmp_ctx, TR_MQMSG_TRPS_CONNECTED, TR_MQ_PRIO_HIGH);
+  tr_mq_msg_set_payload(msg, (void *)tr_dup_name(trp_connection_get_peer(conn)), tr_free_name_helper);
+  if (msg==NULL) {
+    tr_err("tr_trps_thread: error allocating TR_MQ_MSG");
+    goto cleanup;
+  } 
+  trps_mq_add(trps, msg); /* steals msg context */
+  msg=NULL;
+
   trps_handle_connection(trps, conn);
 
-  msg=tr_mq_msg_new(tmp_ctx, TR_MQMSG_TRPS_DISCONNECTED);
+cleanup:
+  msg=tr_mq_msg_new(tmp_ctx, TR_MQMSG_TRPS_DISCONNECTED, TR_MQ_PRIO_HIGH);
   tr_mq_msg_set_payload(msg, (void *)conn, NULL); /* do not pass a free routine */
   if (msg==NULL)
     tr_err("tr_trps_thread: error allocating TR_MQ_MSG");
   else
-    trps_mq_append(trps, msg);
-
+    trps_mq_add(trps, msg);
   tr_debug("tr_trps_thread: exit");
   talloc_free(tmp_ctx);
   return NULL;
@@ -129,24 +184,27 @@ static void tr_trps_event_cb(int listener, short event, void *arg)
     tr_debug("tr_trps_event_cb: unexpected event on TRPS socket (event=0x%X)", event);
   } else {
     /* create a thread to handle this connection */
-    asprintf(&name, "trustrouter@%s", trps->hostname);
-    gssname=tr_new_name(name);
-    free(name); name=NULL;
+    name = talloc_asprintf(tmp_ctx, "trustrouter@%s", trps->hostname);
+    if (name == NULL)
+      goto cleanup;
+    gssname=tr_new_name(name); /* name cleaned up with tmp_ctx */
+
     conn=trp_connection_accept(tmp_ctx, listener, gssname);
     if (conn!=NULL) {
       /* need to monitor this fd and trigger events when read becomes possible */
       thread_data=talloc(conn, struct trps_thread_data);
       if (thread_data==NULL) {
         tr_err("tr_trps_event_cb: unable to allocate trps_thread_data");
-        talloc_free(tmp_ctx);
-        return;
+        goto cleanup;
       }
       thread_data->conn=conn;
       thread_data->trps=trps;
-      trps_add_connection(trps, conn); /* remember the connection */
+      trps_add_connection(trps, conn); /* remember the connection - this puts conn and the thread data in trps's talloc context */
       pthread_create(trp_connection_get_thread(conn), NULL, tr_trps_thread, thread_data);
     }
   }
+
+ cleanup:
   talloc_free(tmp_ctx);
 }
 
@@ -155,9 +213,8 @@ static void tr_trps_cleanup_conn(TRPS_INSTANCE *trps, TRP_CONNECTION *conn)
   /* everything belonging to the thread is in the TRP_CONNECTION
    * associated with it */
   tr_debug("tr_trps_cleanup_conn: freeing %p", conn);
-/*  pthread_join(*trp_connection_get_thread(conn), NULL); -- removed while debugging, put back!!! --jlr */
+  pthread_join(*trp_connection_get_thread(conn), NULL);
   trps_remove_connection(trps, conn);
-  talloc_report_full(conn, stderr);
   trp_connection_free(conn);
   tr_debug("tr_trps_cleanup_conn: deleted connection");
 }
@@ -170,45 +227,108 @@ static void tr_trps_cleanup_trpc(TRPS_INSTANCE *trps, TRPC_INSTANCE *trpc)
   tr_debug("tr_trps_cleanup_trpc: deleted connection");
 }
 
-static void tr_trps_print_route_table(TRPS_INSTANCE *trps, FILE *f)
+/**
+ * Get a dynamically allocated string with a description of the route table.
+ * Caller must free the string using talloc_free().
+ *
+ * @param memctx talloc context for the string
+ * @param trps trps instance containing the route table
+ * @return pointer to the output, or NULL on error
+ */
+static char *tr_trps_route_table_to_str(TALLOC_CTX *memctx, TRPS_INSTANCE *trps)
 {
-  char *table=trp_rtable_to_str(NULL, trps->rtable, " | ", NULL);
-  if (table==NULL)
-    fprintf(f, "Unable to print route table.\n");
-  else {
-    fprintf(f, "%s\n", table);
-    talloc_free(table);
-  }
+  return trp_rtable_to_str(memctx, trps->rtable, " | ", NULL);
+}
+
+/**
+ * Get a dynamically allocated string with a description of the community table.
+ * Caller must free the string using talloc_free().
+ *
+ * @param memctx talloc context for the string
+ * @param trps trps instance containing the community table
+ * @return pointer to the output, or NULL on error
+ */
+static char *tr_trps_comm_table_to_str(TALLOC_CTX *memctx, TRPS_INSTANCE *trps)
+{
+  return tr_comm_table_to_str(memctx, trps->ctable);
 }
 
+/**
+ * Event handler to process TRP messages from connection threads. These
+ * are added to the message queue (mq) in tr_trps_msg_handler(), which
+ * runs in the other threads.
+ *
+ * @param socket Ignored
+ * @param event Ignored
+ * @param arg Pointer to the TRPS_INSTANCE
+ */
 static void tr_trps_process_mq(int socket, short event, void *arg)
 {
   TRPS_INSTANCE *trps=talloc_get_type_abort(arg, TRPS_INSTANCE);
   TR_MQ_MSG *msg=NULL;
   const char *s=NULL;
+  TRP_PEER *peer = NULL;
+  char *tmp = NULL;
 
-  talloc_report_full(trps->mq, stderr);
   msg=trps_mq_pop(trps);
   while (msg!=NULL) {
     s=tr_mq_msg_get_message(msg);
-    if (0==strcmp(s, TR_MQMSG_TRPS_DISCONNECTED)) {
-      tr_trps_cleanup_conn(trps,
-                           talloc_get_type_abort(tr_mq_msg_get_payload(msg),
-                                                 TRP_CONNECTION));
+    if (0==strcmp(s, TR_MQMSG_TRPS_CONNECTED)) {
+      TR_NAME *peer_gssname=(TR_NAME *)tr_mq_msg_get_payload(msg);
+      peer=trps_get_peer_by_gssname(trps, peer_gssname); /* get the peer record */
+      tmp = tr_name_strdup(peer_gssname); /* get the name as a null-terminated string */
+      if (peer==NULL)
+        tr_err("tr_trps_process_mq: incoming connection from unknown peer (%s) reported.", tmp);
+      else {
+        trp_peer_set_incoming_status(peer, PEER_CONNECTED);
+        tr_notice("tr_trps_process_mq: incoming connection from %s established.", tmp);
+      }
+      free(tmp);
+    }
+    else if (0==strcmp(s, TR_MQMSG_TRPS_DISCONNECTED)) {
+      TRP_CONNECTION *conn=talloc_get_type_abort(tr_mq_msg_get_payload(msg), TRP_CONNECTION);
+      TR_NAME *peer_gssname=trp_connection_get_peer(conn);
+      peer=trps_get_peer_by_gssname(trps, peer_gssname); /* get the peer record */
+      tmp = tr_name_strdup(peer_gssname); /* get the name as a null-terminated string */
+      if (peer==NULL) {
+        tr_err("tr_trps_process_mq: incoming connection from unknown peer (%.*s) lost.", tmp);
+      } else {
+        trp_peer_set_incoming_status(peer, PEER_DISCONNECTED);
+        tr_trps_cleanup_conn(trps, conn);
+        tr_notice("tr_trps_process_mq: incoming connection from %s lost.", tmp);
+      }
+      free(tmp);
+    }
+    else if (0==strcmp(s, TR_MQMSG_TRPC_CONNECTED)) {
+      TR_NAME *svcname=(TR_NAME *)tr_mq_msg_get_payload(msg);
+      peer=trps_get_peer_by_servicename(trps, svcname);
+      tmp = tr_name_strdup(svcname);
+      if (peer==NULL)
+        tr_err("tr_trps_process_mq: outgoing connection to unknown peer (%s) reported.", tmp);
+      else {
+        trp_peer_set_outgoing_status(peer, PEER_CONNECTED);
+        tr_notice("tr_trps_process_mq: outgoing connection to %s established.", tmp);
+      }
+      free(tmp);
     }
     else if (0==strcmp(s, TR_MQMSG_TRPC_DISCONNECTED)) {
-      /* trpc connection died */
-      tr_trps_cleanup_trpc(trps,
-                           talloc_get_type_abort(tr_mq_msg_get_payload(msg),
-                                                 TRPC_INSTANCE));
+      TRPC_INSTANCE *trpc=talloc_get_type_abort(tr_mq_msg_get_payload(msg), TRPC_INSTANCE);
+      TR_NAME *svcname=trpc_get_gssname(trpc);
+      peer=trps_get_peer_by_servicename(trps, svcname);
+      tmp = tr_name_strdup(svcname);
+      if (peer==NULL)
+        tr_err("tr_trps_process_mq: outgoing connection to unknown peer (%s) lost.", tmp);
+      else {
+        trp_peer_set_outgoing_status(peer, PEER_DISCONNECTED);
+        tr_notice("tr_trps_process_mq: outgoing connection to %s lost.", tmp);
+        tr_trps_cleanup_trpc(trps, trpc);
+      }
+      free(tmp);
     }
 
     else if (0==strcmp(s, TR_MQMSG_MSG_RECEIVED)) {
       if (trps_handle_tr_msg(trps, tr_mq_msg_get_payload(msg))!=TRP_SUCCESS)
         tr_notice("tr_trps_process_mq: error handling message.");
-      else {
-        tr_trps_print_route_table(trps, stderr);
-      }
     }
     else
       tr_notice("tr_trps_process_mq: unknown message '%s' received.", tr_mq_msg_get_message(msg));
@@ -224,9 +344,10 @@ static void tr_trps_update(int listener, short event, void *arg)
   TRPS_INSTANCE *trps=cookie->trps;
   struct event *ev=cookie->ev;
 
-  tr_debug("tr_trps_update: sending scheduled route updates.");
+  tr_debug("tr_trps_update: sending scheduled route/community updates.");
   trps_update(trps, TRP_UPDATE_SCHEDULED);
   event_add(ev, &(trps->update_interval));
+  tr_debug("tr_trps_update: update interval=%d", trps->update_interval.tv_sec);
 }
 
 static void tr_trps_sweep(int listener, short event, void *arg)
@@ -234,10 +355,23 @@ static void tr_trps_sweep(int listener, short event, void *arg)
   struct tr_trps_event_cookie *cookie=talloc_get_type_abort(arg, struct tr_trps_event_cookie);
   TRPS_INSTANCE *trps=cookie->trps;
   struct event *ev=cookie->ev;
+  char *table_str=NULL;
 
   tr_debug("tr_trps_sweep: sweeping routes.");
   trps_sweep_routes(trps);
-  tr_trps_print_route_table(trps, stderr);
+  tr_debug("tr_trps_sweep: sweeping communities.");
+  trps_sweep_ctable(trps);
+  table_str=tr_trps_route_table_to_str(NULL, trps);
+  if (table_str!=NULL) {
+    tr_debug(table_str);
+    talloc_free(table_str);
+  }
+
+  table_str=tr_trps_comm_table_to_str(NULL, trps);
+  if (table_str!=NULL) {
+    tr_debug(table_str);
+    talloc_free(table_str);
+  }
   /* schedule the event to run again */
   event_add(ev, &(trps->sweep_interval));
 }
@@ -249,7 +383,7 @@ static void tr_connection_update(int listener, short event, void *arg)
   struct event *ev=cookie->ev;
 
   tr_debug("tr_connection_update: checking peer connections.");
-  tr_connect_to_peers(trps);
+  tr_connect_to_peers(trps, ev);
   /* schedule the event to run again */
   event_add(ev, &(trps->connect_interval));
 }
@@ -303,6 +437,7 @@ TRP_RC tr_trps_event_init(struct event_base *base, TR_INSTANCE *tr)
   struct tr_trps_event_cookie *sweep_cookie=NULL;
   struct timeval zero_time={0,0};
   TRP_RC retval=TRP_ERROR;
+  size_t ii=0;
 
   if (tr->events != NULL) {
     tr_notice("tr_trps_event_init: tr->events was not null. Freeing before reallocating..");
@@ -333,28 +468,31 @@ TRP_RC tr_trps_event_init(struct event_base *base, TR_INSTANCE *tr)
   trps_cookie->cfg_mgr=tr->cfg_mgr;
 
   /* get a trps listener */
-  listen_ev->sock_fd=trps_get_listener(tr->trps,
-                                       tr_trps_msg_handler,
-                                       tr_trps_gss_handler,
-                                       tr->cfg_mgr->active->internal->hostname,
-                                       tr->cfg_mgr->active->internal->trps_port,
-                                       (void *)trps_cookie);
-  if (listen_ev->sock_fd < 0) {
+  listen_ev->n_sock_fd=trps_get_listener(tr->trps,
+                                         tr_trps_msg_handler,
+                                         tr_trps_gss_handler,
+                                         tr->cfg_mgr->active->internal->hostname,
+                                         tr->cfg_mgr->active->internal->trps_port,
+                                         (void *)trps_cookie,
+                                         listen_ev->sock_fd,
+                                         TR_MAX_SOCKETS);
+  if (listen_ev->n_sock_fd==0) {
     tr_crit("Error opening TRP server socket.");
     retval=TRP_ERROR;
     tr_trps_events_free(tr->events);
     tr->events=NULL;
     goto cleanup;
   }
-  trps_cookie->ev=listen_ev->ev; /* in case it needs to frob the event */
-  
-  /* and its event */
-  listen_ev->ev=event_new(base,
-                          listen_ev->sock_fd,
-                          EV_READ|EV_PERSIST,
-                          tr_trps_event_cb,
-                          (void *)(tr->trps));
-  event_add(listen_ev->ev, NULL);
+
+  /* Set up events for the sockets */
+  for (ii=0; ii<listen_ev->n_sock_fd; ii++) {
+    listen_ev->ev[ii]=event_new(base,
+                                listen_ev->sock_fd[ii],
+                                EV_READ|EV_PERSIST,
+                                tr_trps_event_cb,
+                                (void *)(tr->trps));
+    event_add(listen_ev->ev[ii], NULL);
+  }
   
   /* now set up message queue processing event, only triggered by
    * tr_trps_mq_cb() */
@@ -419,29 +557,27 @@ cleanup:
   return retval;
 }
 
-
-struct trpc_notify_cb_data {
-  int msg_ready;
-  pthread_cond_t cond;
-  pthread_mutex_t mutex;
-};
-
-static void tr_trpc_mq_cb(TR_MQ *mq, void *arg)
-{
-  struct trpc_notify_cb_data *cb_data=(struct trpc_notify_cb_data *) arg;
-  pthread_mutex_lock(&(cb_data->mutex));
-  if (!cb_data->msg_ready) {
-    cb_data->msg_ready=1;
-    pthread_cond_signal(&(cb_data->cond));
-  }
-  pthread_mutex_unlock(&(cb_data->mutex));
-}
-
 /* data passed to thread */
 struct trpc_thread_data {
   TRPC_INSTANCE *trpc;
   TRPS_INSTANCE *trps;
 };
+
+/**
+ * Thread for handling TRPC (outgoing) connections
+ *
+ * Opens a connection to a peer. If successful, notifies the trps thread by
+ * posting a TR_MQMSG_TRPC_CONNECTED message to the trps message queue.
+ * It then waits for messages on trpc->mq. Normally these will be TR_MQMSG_TRPC_SEND
+ * messages, which this thread forwards to the peer. If its connection is lost or
+ * a TR_MQMSG_ABORT message is received on trpc->mq, the thread sends a
+ * TR_MQMSG_TRPC_DISCONNECTED message to the trps thread, then cleans up and
+ * terminates.
+ *
+ * The trps may continue queueing messages for this client even when the
+ * connection is down. To prevent the queue from growing endlessly, this thread
+ * should clear its queue after failed connection attempts.
+ */
 static void *tr_trpc_thread(void *arg)
 {
   TALLOC_CTX *tmp_ctx=talloc_new(NULL);
@@ -452,78 +588,94 @@ static void *tr_trpc_thread(void *arg)
   TR_MQ_MSG *msg=NULL;
   const char *msg_type=NULL;
   char *encoded_msg=NULL;
-
-  struct trpc_notify_cb_data cb_data={0,
-                                      PTHREAD_COND_INITIALIZER,
-                                      PTHREAD_MUTEX_INITIALIZER};
+  TR_NAME *peer_gssname=NULL;
+  struct timespec wait_until = {0};
+  int exit_loop=0;
 
   tr_debug("tr_trpc_thread: started");
 
-  /* set up the mq for receiving */
-  pthread_mutex_lock(&(cb_data.mutex)); /* hold this lock until we enter the main loop */
-
-  tr_mq_lock(trpc->mq);
-  tr_mq_set_notify_cb(trpc->mq, tr_trpc_mq_cb, (void *) &cb_data);
-  tr_mq_unlock(trpc->mq);
-
+  /* Try to make the outgoing connection */
   rc=trpc_connect(trpc);
-/*  talloc_report_full(trpc, stderr);*/
   if (rc!=TRP_SUCCESS) {
-    /* was tr_notice --jlr */
-    fprintf(stderr, "tr_trpc_thread: failed to initiate connection to %s:%d.",
-            trpc_get_server(trpc),
-            trpc_get_port(trpc));
-    fflush(stderr);
+    tr_notice("tr_trpc_thread: failed to initiate connection to %s:%d.",
+              trpc_get_server(trpc),
+              trpc_get_port(trpc));
+    trpc_mq_clear(trpc); /* clear the queue even though we did not connect */
   } else {
-    tr_debug("tr_trpc_thread: connected to peer %s", trpc->conn->peer->buf);
-    while (1) {
-      cb_data.msg_ready=0;
-      pthread_cond_wait(&(cb_data.cond), &(cb_data.mutex));
-      /* verify the condition */
-      if (cb_data.msg_ready) {
-        msg=trpc_mq_pop(trpc);
-        if (msg==NULL) {
-          /* no message in the queue */
-          tr_err("tr_trpc_thread: notified of msg, but queue empty");
-          break;
-        }
-
-        msg_type=tr_mq_msg_get_message(msg);
-
-        if (0==strcmp(msg_type, TR_MQMSG_ABORT)) {
-          tr_mq_msg_free(msg);
-          break; /* exit loop */
-        }
-        else if (0==strcmp(msg_type, TR_MQMSG_TRPC_SEND)) {
-          encoded_msg=tr_mq_msg_get_payload(msg);
-          if (encoded_msg==NULL)
+    /* Retrieve the GSS name used by the peer for authentication */
+    peer_gssname=trp_connection_get_peer(trpc_get_conn(trpc));
+    if (peer_gssname==NULL) {
+      tr_err("tr_trpc_thread: could not duplicate peer_gssname.");
+      talloc_free(tmp_ctx);
+      return NULL;
+    }
+    tr_debug("tr_trpc_thread: connected to peer %.*s",
+             peer_gssname->len, peer_gssname->buf);
+
+    msg=tr_mq_msg_new(tmp_ctx, TR_MQMSG_TRPC_CONNECTED, TR_MQ_PRIO_HIGH);
+    tr_mq_msg_set_payload(msg, (void *)tr_dup_name(peer_gssname), tr_free_name_helper);
+    if (msg==NULL) {
+      tr_err("tr_trpc_thread: error allocating TR_MQ_MSG");
+      talloc_free(tmp_ctx);
+      return NULL;
+    }
+    trps_mq_add(trps, msg); /* steals msg context */
+    msg=NULL;
+
+    /* Loop until we get an abort message or until the connection is lost. */
+    while(!exit_loop) {
+      /* Wait up to 10 minutes for a message to be queued to send to the peer.
+       * Log a warning if we go longer than that, but don't give up. */
+      if (tr_mq_pop_timeout(10 * 60, &wait_until) != 0) {
+        tr_err("tr_trpc_thread: unable to set abort timeout");
+        break; /* immediately exit the loop, don't go through cleanup */
+      }
+
+      /* Pop a message from the queue. */
+      msg = trpc_mq_pop(trpc, &wait_until);
+      if (msg) {
+        msg_type = tr_mq_msg_get_message(msg);
+        if (0 == strcmp(msg_type, TR_MQMSG_ABORT)) {
+          tr_debug("tr_trpc_thread: received abort message from main thread.");
+          exit_loop = 1;
+        } else if (0 == strcmp(msg_type, TR_MQMSG_TRPC_SEND)) {
+          encoded_msg = tr_mq_msg_get_payload(msg);
+          if (encoded_msg == NULL)
             tr_notice("tr_trpc_thread: null outgoing TRP message.");
           else {
             rc = trpc_send_msg(trpc, encoded_msg);
-            if (rc!=TRP_SUCCESS) {
+            if (rc == TRP_SUCCESS) {
+              tr_debug("tr_trpc_thread: sent message.");
+            } else {
               tr_notice("tr_trpc_thread: trpc_send_msg failed.");
-              tr_mq_msg_free(msg);
-              break;
+              /* Assume this means we lost the connection. */
+              exit_loop = 1;
             }
           }
-        }
-        else
+        } else
           tr_notice("tr_trpc_thread: unknown message '%s' received.", msg_type);
 
         tr_mq_msg_free(msg);
+      } else {
+        tr_warning("tr_trpc_thread: no outgoing messages to %.*s for 10 minutes",
+                   peer_gssname->len, peer_gssname->buf);
       }
     }
   }
 
-  tr_debug("tr_trpc_thread: exiting.");
-  msg=tr_mq_msg_new(tmp_ctx, TR_MQMSG_TRPC_DISCONNECTED);
+  /* Send a DISCONNECTED message to the main thread */
+  tr_debug("tr_trpc_thread: notifying main thread of disconnection.");
+  msg=tr_mq_msg_new(tmp_ctx, TR_MQMSG_TRPC_DISCONNECTED, TR_MQ_PRIO_NORMAL);
   tr_mq_msg_set_payload(msg, (void *)trpc, NULL); /* do not pass a free routine */
-  if (msg==NULL)
+  if (msg==NULL) {
+    /* can't notify main thread */
     tr_err("tr_trpc_thread: error allocating TR_MQ_MSG");
-  else
-    trps_mq_append(trps, msg);
+  } else {
+    trps_mq_add(trps, msg);
+  }
 
   talloc_free(tmp_ctx);
+  tr_debug("tr_trpc_thread: thread terminating.");
   return NULL;
 }
 
@@ -534,28 +686,28 @@ static TRP_ROUTE **tr_make_local_routes(TALLOC_CTX *mem_ctx,
                                          size_t *n_routes)
 {
   TALLOC_CTX *tmp_ctx=talloc_new(NULL);
-  TR_APC *apc=NULL;
+  TR_APC *comm=NULL;
   TRP_ROUTE *new_entry=NULL;
   TRP_ROUTE **entries=NULL;
-  size_t n_apcs=0, ii=0;
+  size_t n_comms=0, ii=0;
 
   *n_routes=0;
 
-  if (realm==NULL)
+  if ((realm==NULL) || (realm->origin!=TR_REALM_LOCAL))
     goto cleanup;
 
-  /* count apcs */
-  for (apc=realm->apcs, n_apcs=0; apc!=NULL; apc=apc->next,n_apcs++) {}
+  /* count comms */
+  for (comm=realm->apcs, n_comms=0; comm!=NULL; comm=comm->next,n_comms++) {}
 
-  entries=talloc_array(tmp_ctx, TRP_ROUTE *, n_apcs);
-  for (apc=realm->apcs,ii=0; apc!=NULL; apc=apc->next, ii++) {
+  entries=talloc_array(tmp_ctx, TRP_ROUTE *, n_comms);
+  for (comm=realm->apcs,ii=0; comm!=NULL; comm=comm->next, ii++) {
     new_entry=trp_route_new(entries);
     if (new_entry==NULL) {
       tr_crit("tr_make_local_routes: unable to allocate entry.");
       talloc_free(entries);
       goto cleanup;
     }
-    trp_route_set_apc(new_entry, tr_dup_name(apc->id));
+    trp_route_set_comm(new_entry, tr_dup_name(comm->id));
     trp_route_set_realm(new_entry, tr_dup_name(realm->realm_id));
     trp_route_set_peer(new_entry, tr_new_name("")); /* no peer, it's us */
     trp_route_set_metric(new_entry, 0);
@@ -566,37 +718,27 @@ static TRP_ROUTE **tr_make_local_routes(TALLOC_CTX *mem_ctx,
   }
 
   talloc_steal(mem_ctx, entries);
-  *n_routes=n_apcs;
+  *n_routes=n_comms;
  cleanup:
   talloc_free(tmp_ctx);
   return entries;
 }
 
-struct tr_trpc_status_change_cookie {
-  TRPS_INSTANCE *trps;
-  TRPC_INSTANCE *trpc;
-  TRP_PEER *peer;
-};
-static void tr_trpc_status_change(TRP_CONNECTION *conn, void *cookie)
+void tr_peer_status_change(TRP_PEER *peer, void *cookie)
 {
-  struct tr_trpc_status_change_cookie *cook=talloc_get_type_abort(cookie, struct tr_trpc_status_change_cookie);
-  TRP_PEER *peer=cook->peer;
-  TR_NAME *gssname=trp_peer_get_gssname(peer);
+  TRPS_INSTANCE *trps=talloc_get_type_abort(cookie, TRPS_INSTANCE);
 
-  if (trp_connection_get_status(conn)==TRP_CONNECTION_UP)
-    tr_debug("tr_trpc_status_change: connection to %.*s now up.", gssname->len, gssname->buf);
-  else
-    tr_debug("tr_trpc_status_change: connection to %.*s now down.", gssname->len, gssname->buf);
+  if (TRP_SUCCESS!=trps_wildcard_route_req(trps, trp_peer_get_servicename(peer)))
+    tr_err("tr_send_wildcard: error sending wildcard route request.");
 }
 
 /* starts a trpc thread to connect to server:port */
-TRP_RC tr_trpc_initiate(TRPS_INSTANCE *trps, TRP_PEER *peer)
+TRP_RC tr_trpc_initiate(TRPS_INSTANCE *trps, TRP_PEER *peer, struct event *ev)
 {
   TALLOC_CTX *tmp_ctx=talloc_new(NULL);
   TRPC_INSTANCE *trpc=NULL;
   TRP_CONNECTION *conn=NULL;
   struct trpc_thread_data *thread_data=NULL;
-  struct tr_trpc_status_change_cookie *status_change_cookie=NULL;
   TRP_RC rc=TRP_ERROR;
 
   tr_debug("tr_trpc_initiate entered");
@@ -615,23 +757,10 @@ TRP_RC tr_trpc_initiate(TRPS_INSTANCE *trps, TRP_PEER *peer)
     goto cleanup;
   }
 
-  status_change_cookie=talloc(conn, struct tr_trpc_status_change_cookie);
-  if (status_change_cookie==NULL) {
-    tr_crit("tr_trpc_initiate: could not allocate connection status cookie.");
-    rc=TRP_NOMEM;
-    goto cleanup;
-  }
-  status_change_cookie->trps=trps;
-  status_change_cookie->trpc=trpc;
-  status_change_cookie->peer=peer;
-  conn->status_change_cookie=status_change_cookie;
-  status_change_cookie=NULL;
-  conn->status_change_cb=tr_trpc_status_change;
-
   trpc_set_conn(trpc, conn);
   trpc_set_server(trpc, talloc_strdup(trpc, trp_peer_get_server(peer)));
   trpc_set_port(trpc, trp_peer_get_port(peer));
-  trpc_set_gssname(trpc, trp_peer_dup_gssname(peer));
+  trpc_set_gssname(trpc, trp_peer_dup_servicename(peer));
   tr_debug("tr_trpc_initiate: allocated connection");
   
   /* start thread */
@@ -651,7 +780,6 @@ TRP_RC tr_trpc_initiate(TRPS_INSTANCE *trps, TRP_PEER *peer)
   rc=TRP_SUCCESS;
 
  cleanup:
-  talloc_report_full(tmp_ctx, stderr);
   talloc_free(tmp_ctx);
   return rc;
 }
@@ -670,7 +798,7 @@ TRP_RC tr_add_local_routes(TRPS_INSTANCE *trps, TR_CFG *cfg)
   if (trust_router_name==NULL)
     return TRP_NOMEM;
 
-  for (cur=cfg->idp_realms; cur!=NULL; cur=cur->next) {
+  for (cur=cfg->ctable->idp_realms; cur!=NULL; cur=cur->next) {
     local_routes=tr_make_local_routes(tmp_ctx, cur, trust_router_name, &n_routes);
     for (ii=0; ii<n_routes; ii++)
       trps_add_route(trps, local_routes[ii]);
@@ -691,7 +819,7 @@ static int tr_conn_attempt_due(TRPS_INSTANCE *trps, TRP_PEER *peer, struct times
 }
 
 /* open missing connections to peers */
-TRP_RC tr_connect_to_peers(TRPS_INSTANCE *trps)
+TRP_RC tr_connect_to_peers(TRPS_INSTANCE *trps, struct event *ev)
 {
   TALLOC_CTX *tmp_ctx=talloc_new(NULL);
   TRP_PTABLE_ITER *iter=trp_ptable_iter_new(tmp_ctx);
@@ -710,16 +838,17 @@ TRP_RC tr_connect_to_peers(TRPS_INSTANCE *trps)
        peer=trp_ptable_iter_next(iter))
   {
     if (trps_find_trpc(trps, peer)==NULL) {
+      TR_NAME *label=trp_peer_get_label(peer);
       tr_debug("tr_connect_to_peers: %.*s missing connection.",
-               trp_peer_get_gssname(peer)->len, trp_peer_get_gssname(peer)->buf);
+               label->len, label->buf);
       /* has it been long enough since we last tried? */
       if (tr_conn_attempt_due(trps, peer, &curtime)) {
         trp_peer_set_last_conn_attempt(peer, &curtime); /* we are trying again now */
-        if (tr_trpc_initiate(trps, peer)!=TRP_SUCCESS) {
+        if (tr_trpc_initiate(trps, peer, ev)!=TRP_SUCCESS) {
           tr_err("tr_connect_to_peers: unable to initiate TRP connection to %s:%u.",
                  trp_peer_get_server(peer),
                  trp_peer_get_port(peer));
-        }
+        } 
       }
     }
   }
@@ -738,6 +867,7 @@ void tr_config_changed(TR_CFG *new_cfg, void *cookie)
 {
   TR_INSTANCE *tr=talloc_get_type_abort(cookie, TR_INSTANCE);
   TRPS_INSTANCE *trps=tr->trps;
+  char *table_str=NULL;
 
   tr->cfgwatch->poll_interval.tv_sec=new_cfg->internal->cfg_poll_interval;
   tr->cfgwatch->poll_interval.tv_usec=0;
@@ -745,13 +875,44 @@ void tr_config_changed(TR_CFG *new_cfg, void *cookie)
   tr->cfgwatch->settling_time.tv_sec=new_cfg->internal->cfg_settling_time;
   tr->cfgwatch->settling_time.tv_usec=0;
 
+  /* These need to be updated */
+  tr->tids->hostname = new_cfg->internal->hostname;
+  tr->mons->hostname = new_cfg->internal->hostname;
+
+  /* Update the authorized monitoring gss names */
+  if (tr->mons->authorized_gss_names) {
+    tr_debug("tr_config_changed: freeing tr->mons->authorized_gss_names");
+    tr_gss_names_free(tr->mons->authorized_gss_names);
+  }
+  if (new_cfg->internal->monitoring_credentials != NULL) {
+    tr->mons->authorized_gss_names = tr_gss_names_dup(tr->mons, new_cfg->internal->monitoring_credentials);
+  } else {
+    tr->mons->authorized_gss_names = tr_gss_names_new(tr->mons);
+  }
+  if (tr->mons->authorized_gss_names == NULL) {
+    tr_err("tr_config_changed: Error configuring monitoring credentials");
+  }
+
   trps_set_connect_interval(trps, new_cfg->internal->trp_connect_interval);
   trps_set_update_interval(trps, new_cfg->internal->trp_update_interval);
   trps_set_sweep_interval(trps, new_cfg->internal->trp_sweep_interval);
+  trps_set_ctable(trps, new_cfg->ctable);
+  trps_set_ptable(trps, new_cfg->peers);
+  trps_set_peer_status_callback(trps, tr_peer_status_change, (void *)trps);
   trps_clear_rtable(trps); /* should we do this every time??? */
   tr_add_local_routes(trps, new_cfg); /* should we do this every time??? */
   trps_update_active_routes(trps); /* find new routes */
   trps_update(trps, TRP_UPDATE_TRIGGERED); /* send any triggered routes */
-  tr_trps_print_route_table(trps, stderr);
+  tr_print_config(new_cfg);
+  table_str=tr_trps_route_table_to_str(NULL, trps);
+  if (table_str!=NULL) {
+    tr_info(table_str);
+    talloc_free(table_str);
+  }
+  table_str=tr_trps_comm_table_to_str(NULL, trps);
+  if (table_str!=NULL) {
+    tr_info(table_str);
+    talloc_free(table_str);
+  }
 }