-#! /bin/sh
+#!/bin/sh
+######################################################################
#
-# radwatch Script to watch RADIUS. Sends mail to root and
-# restarts radiusd when it dies [which ofcourse
-# never happens :)]
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
#
-# WARNING! This script SHOULD NOT BE USED! It's only here for historical
-# purposes, and WILL be deleted in a future version of the
-# the server.
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
#
-# If you want to watch and re-start the server, we recommend
-# reading the file ../doc/supervise-radiusd.txt
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
#
+# Copyright (C) 2009 Network RADIUS SARL <info@networkradius.com>
#
-# Version: $Id$
+######################################################################
#
+# radwatch - Start the radius daemon and restart upon crash.
+#
+# It also catches signals sent to it, and then re-sends those signals
+# to the radius server it is watching.
+#
+# If you want to watch and re-start the server, we recommend
+# reading the file doc/supervise-radiusd.txt
+
+#
+# This simplifies the script, and avoids most issues with (say)
+# Debian re-naming "radiusd" to "freeradius".
+#
+name=radiusd
prefix=@prefix@
exec_prefix=@exec_prefix@
sbindir=@sbindir@
localstatedir=@localstatedir@
logdir=@logdir@
-rundir=${localstatedir}/run/radius
+rundir=${localstatedir}/run/${name}
+sysconfdir=@sysconfdir@
+pid_file=${rundir}/${name}.pid
+log_file=${logdir}/${name}_safe.log
-MAILTO=root
-RADIUSD=$sbindir/radiusd
+#
+# Figure out what arguments to pass tail
+#
+tail="tail -n "
+echo foo | ${tail}1 > /dev/null 2>&1
+if test "$?" != "0"
+then
+ tail="tail -"
+fi
-exec >> $logdir/radwatch.log 2>&1
+RADIUSD=$sbindir/${name}
+RADDBDIR=${sysconfdir}/raddb
-# get the path to the radiusd
-if [ "$1" ] && [ -x "$1" ]
+#
+# If you want to send email, define this field to be an email address.
+# This part of the functionality hasn't been well tested, so please
+# test it before putting it into production.
+#
+# It also presumes that you have a functioning mail system on
+# the maching running RADIUS. You will need to check that the
+# "mail" command exists, and sends mail to the address below, e.g.:
+#
+# echo test | mail -s "Testing" $MAILTO
+#
+# If you receive the message, then enable MAILTO. Otherwise, fix
+# your mail system so that it delivers mail.
+#
+MAILTO=
+
+#
+# Allow "radiusd_safe -X" for testing the radiusd_safe functionality.
+#
+ARGS="$@"
+
+test -f $RADIUSD || exit 0
+test -f $RADDBDIR/radiusd.conf || exit 0
+
+ulimit -c unlimited
+
+#
+# See if the PID file exists. It might have been left over after
+# a crash, or it might be because the RADIUS server is still running.
+#
+if test -f $pid_file
then
- RADIUSD=$1
- shift
+ PID=`cat $pid_file`
+ #
+ # Check if the process exists, AND if it has the right name
+ #
+ if ps -p $PID | grep $name > /dev/null
+ then
+ echo "`date +'%a %b %e %H:%M:%S %Y'` : Fatal: A $name process already exists at PID $PID. We cannot start another one." >> $log_file
+ echo "A $name process already exists"
+ exit 1
+ fi
+
+ #
+ # A RADIUS server doesn't exist. Delete the stale PID file.
+ #
+ rm -f $pid_file
+ if test -f $pid_file
+ then
+ echo "`date +'%a %b %e %H:%M:%S %Y'` : Fatal: Cannot remove the pid file: $pid_file" >> $log_file
+ echo "Fatal error: Cannot remove the pid file: $pid_file"
+ echo "Please remove it manually and start $0 again"
+ echo "$name daemon not started"
+ exit 1
+ fi
fi
-cd $logdir
-[ -d $logdir/radacct ] && cd $logdir/radacct
-ulimit -c unlimited
+started=0
+restarts=0
+last_email=0
+now=0
+
+#
+# Save our PID.
+#
+echo $$ > ${rundir}/${name}_safe.pid
+
+#
+# Loop forever, or until we're told to exit via a signal.
+#
+while :
+do
+ #
+ # The first time around, just start the server.
+ # After that, see if we are re-starting in the same second
+ # as the last time. If so, sleep for a second. Otherwise,
+ # if we're not starting in the same second, then just restart
+ # the server.
+ #
+ # This helps prevent CPU spikes when something goes catastrophically
+ # wrong, and the server re-starts continuously. (e.g. disk full, etc.)
+ #
+ now_s=`date +'%a %b %e %H:%M:%S %Y'`
+ if test "$started" != "0"
+ then
+ # Send mail when the server starts
+ if test "$MAILTO" != ""
+ then
+ # don't print minutes and seconds: cheap way
+ # of sending email only once an hour.
+ now=`date +'%a %b %e %H %Y'`
+ restarts=`expr $restarts + 1`
+
+ # send email the first time it restarts
+ if test "$last_email" = "0"
+ then
+ cat | mail -s "ERROR - $name died, restarting.." $MAILTO <<EOF
+$name has restarted unexpectedly at $now
+
+See $log_file for details. Last 20 lines are:
+
+----------------------------------------------------------------------
+`${tail}20 $log_file`
+EOF
+ last_email="$now"
+ restarts=0
+ else
+ # Send email only once every hour (or so)
+ if test "$now" != "$last_email"
+ then
+ cat | mail -s "ERROR - $name died, restarting.." $MAILTO <<EOF
+$name has restarted $restarts times since last email at $last_email
+
+See $log_file for details. Last 100 lines are:
+
+----------------------------------------------------------------------
+`${tail}100 $log_file`
+EOF
+ last_email="$now"
+ restarts=0
+ fi
+ fi
+ fi
+
+ if test "$started" = "$now_s"
+ then
+ # Allow us to be killed
+ trap - HUP INT QUIT TERM TSTP
+ sleep 1
+ fi
+ fi
+ started="$now_s"
+
+ mysig=
+ trap 'mysig=1' HUP
+ trap 'mysig=2' INT
+ trap 'mysig=3' QUIT
+ trap 'mysig=15' TERM
+ trap 'mysig=18' TSTP
+
+ eval "$RADIUSD -f $ARGS < /dev/null >> $log_file 2>&1 &"
+ PID=$!
+
+ if test "$?" != "0"
+ then
+ echo "Failed to start $name. See $log_file for details"
+ echo "$name daemon not started"
+ exit 1
+ fi
+
+ echo $PID > $pid_file
+
+ #
+ # Wait for the process to exit.
+ #
+ wait $PID
+ code=$?
+
+ #
+ # On *BSD and Linux, sending *us* a signal results in "wait" returning
+ # with 128+sig. On Solaris, it results in "wait" returning with "0".
+ #
+ # If this happens, we reset our expectations here so that the code
+ # below will work correctly.
+ #
+ if test "$code" = "0"
+ then
+ if test "$mysig" != ""
+ then
+ code=`expr $mysig + 128`
+ fi
+ fi
+
+ case "$code" in
+ 0)
+ echo "`date +'%a %b %e %H:%M:%S %Y'` : Info: $name exited normally. Exiting" | tee -a $log_file
+ break
+ ;;
+
+ 127)
+ echo "`date +'%a %b %e %H:%M:%S %Y'` : Info: $name exited unexpectedly. Restarting it." | tee -a $log_file
+ ;;
+
+ *)
+ #
+ # The server exited of its own accord.
+ #
+ if test "$code" -lt 128
+ then
+ echo "`date +'%a %b %e %H:%M:%S %Y'` : Info: $name exited unexpectedly on exit code $code. Restarting it." | tee -a $log_file
+ else
+ sig=`expr $code - 128`
+
+ #
+ # Was the signal sent to us, or to the child process?
+ #
+ if test "$mysig" != ""
+ then
+ echo "`date +'%a %b %e %H:%M:%S %Y'` : Info: Caught signal $sig: Signalling $name to exit." | tee -a $log_file
+ kill -$sig $PID
+ break
+ else
+ echo "`date +'%a %b %e %H:%M:%S %Y'` : Info: $name exited unexpectedly on signal $sig. Restarting it." | tee -a $log_file
+ fi
+ fi
+ ;;
+ esac
+done
-(
- trap 'echo `date`: exit; kill `cat $rundir/radiusd.pid`; exit 0' TERM
- trap "" HUP TSTP
-
- while :
- do
- # Use `wait', otherwise the trap doesn't work.
- $RADIUSD -f $* &
- wait
- exec >> $logdir/radwatch.log 2>&1
- echo "`date`: Radius died, restarting.."
- date | mail -s "Radius died, restarting.." $MAILTO
- sleep 10
- done
-) &
-
-echo "$!" > $rundir/radwatch.pid
-
-sleep 1
+rm -f $pid_file ${rundir}/${name}_safe.pid
+exit 0