#!/bin/bash
#
# NAME
# ----
# - xymon_bacula_check.sh 
#
# DESCRIPTION
# -----------
# - Simple Bash shell script to poll a Bacula backup server director
#   using the Bacula bconsole program and report back to Xymon any
#   failed jobs or jobs waiting on operator intervention. Optionally,
#   this script will also report back on any media having a status
#   of "Error"
#
# - The most current version of this script may be
#   found at http://www.revpol.com/xymon_bacula_check_script
#
# - Instructions to integrate the output of this script to be monitored
#   by a Xymon server may also be found at the above URL
#
# - If you find this script useful, I'd love to know. Send me an email!
#
# William A. Arlofski
# Reverse Polarity, LLC
# 860-824-2433 Office
# http://www.revpol.com/
# 
# HISTORY
# ------
# - 20090504 - Initial script to just monitor the last "x" jobs
#              and report any backup job failures to Xymon server
# - 20100604 - Added additional functionality to monitor for operator 
#              intervention required (eg: waiting to mount a tape etc)
# - 20100604 - Initial posting and release
# - 20100606 - Some wording changes and some changes to the grep command 
#              line to be more specific in the conditions caught
# - 20100618 - Modified the $MSG variable assignments so that custom text could
#              be prepended to the status report sent to the Xymon server
#            - Minor spelling errors corrected
# - 20100720 - Changed grep test for alternate wording when a mount was
#              required
# - 20100722 - Changed the green status report to include the last $NUMJOBS
#              listing instead of saying:
#              "The last $NUMJOBS Bacula jobs have completed successfully."
# - 20100725 - The "last $NUMJOBS run..." message was coming up two jobs short
#              Swapped the order of grep and tail in the "list jobs" command
#              to make sure we were getting only lines that begin with a "|"
#              before tailing for $NUMJOBS
#            - Modified the wording for the "Last jobs run" header
#            - Modified the wording for the "No devices waiting" header
#            - Modified wording of the bconsole test failure message
#            - Fixed the bconsole test logic. Why was I testing for an exit 
#              code of "1" in the first place. Sigh...
#            - Changed the Date/Time headers to "Start Time" to reflect the 
#              correct information gathered from the "list jobs" command
#            - Added JOBHEADER, STOREHEADER and MEDIAHEADER variables to 
#              reduce duplication and minimize formatting problems due to
#              embedded tabs
#            - Added media test and added CHK_MEDIA variable to enable or
#              disable the media tests
#            - Added ADMINMSG variable to be prepended to Xymon status report
#            - Added additional comments throughout code
#
###############################################################################
#
# Copyright (C) 2010 William A. Arlofski - waa-at-revpol-dot-com
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License, version 2, as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
# or visit http://www.gnu.org/licenses/gpl.txt
#
###############################################################################
#
# Set some variables
# ------------------
# Local system binaries
# ---------------------
SUDO="/usr/bin/sudo"
GREP="/bin/grep"
SED="/usr/bin/sed"
AWK="/usr/bin/awk"
TAIL="/usr/bin/tail"
BCONFIG="/etc/bacula/bconsole.conf"
BCONSOLE="/usr/sbin/bconsole"
BCMD="$SUDO $BCONSOLE -c $BCONFIG"

# Assign some script variables
# ----------------------------
COLUMN="Bacula"
COLOR="green"
MSG=""

# How many of the most recent jobs
# should be inspected for this test
# ---------------------------------
NUMJOBS="10"

# Do you want to check if any
# media  has a status of "Error"
# ------------------------------
CHK_MEDIA="1"

# If CHK_MEDIA is set to "1", enter a space-separated
# list of pools to be be checked. Use the word "ALL"
# to check all pools
# ---------------------------------------------------
POOLS="ALL"
#POOLS="Pool_1 Pool_2 Pool_3"

# This is entirely optional. It will be pre-pended to
# the MSG sent to the Xymon server so that it shows on
# the test page above the status messages
# ----------------------------------------------------
ADMINMSG="<ul><li>This Bacula server is running on host.example.com</li><li>host.example.com resides in the main server room</li><li>Please contact John Doe at extention 1234 if there are devices waiting on a mount request</li></ul><br />"

# Set some headers for the list outputs
# -------------------------------------
# THIS PART IS REALLY UGLY, and horrible. 
# Depending on the length of your media and pool names, you
# may need to adjust these. Looking forward to the web updates
# to Xymon so clean CSS can replace tabs etc. 

# Set Job list header variable
# ** NOTE ** There are embedded TABS in these two for formatting purposes.
#            If you cut and pasted this from a web page, then you will need
#            to play around with the formatting. Or you may just donwload the 
#            script from the URL in the header of this script
# ---------------------------------------------------------------------------
JOBHEADER="JobId	Job Name		Job Start Time		Job Files	Job Bytes		Status
-----	----------------------	-------------------	---------	---------------		------"

# Set storage check header variable
# ---------------------------------------------------------------------------
STORHEADER="JobId  Level			Name			Status
-------------------------------------------------------------------------------"

# Set media test header
# ---------------------
MEDIAHEADER="Media ID	Media Name		 	Status		Media Type	Last Written
--------	----------			-------		------------	-------------------"

#
# --------------------------------------------------------------------------------
# Nothing below this line should require any editing please let me know if it does
# --------------------------------------------------------------------------------
#

# Test sudo/bconsole connectivity
# -------------------------------
echo "quit" | $BCMD > /dev/null 2>&1 

if [ "$?" != "0" ]; then 
  COLOR="yellow"
  MSG="${MSG}
  <HR><H3>&$COLOR Problem executing sudo, bconsole or problem connecting to the Bacula Director</H3>"
  else

   # Run Bacula's bconsole program and 
   # check for mount or storage requests
   # -----------------------------------
   data=$(echo "status dir" | $BCMD | $GREP -i "is waiting.*\(for a mount\|on mount\|on storage\)" | $SED -e 's/^ \+//')

   # Now we assume that the bonsole connectivity test above succeeded
   # and that the "status dir" command just above also succeeded and
   # test to see if our grep returned anything
   #
   # ** NOTE ** The formatting of this shell script is ugly and incorrect
   #            anywhere we need formatted output on the Xymon test page,
   #            sorry for the mess
   # --------------------------------------------------------------------
   if [ ! -z "$data" ]; then
     COLOR="yellow"
     MSG="${MSG}
<HR><B>&$COLOR The following jobs are waiting on a storage device or a mount request:</B>

$STORHEADER
$data"
   else
     MSG="${MSG}<HR><B>&$COLOR No jobs are waiting on a storage device or a mount request</B>"
   fi

# Run the media tests?
# --------------------
if [ $CHK_MEDIA = "1" ]; then
  # Loop through all the pools defined in the POOLS
  # variable to test for media with Error status
  # -----------------------------------------------
  for pool in $POOLS; do
    POOL_COLOR="green"
    # Test for special case "ALL"
    # ---------------------------
    if [ "$POOLS" = "ALL" ]; then 
      pool="All Pools"
      poolstring=""
       else
        poolstring="pool=$pool"
    fi      

    # Run Bacula's bconsole program
    # to get a listing of media with
    # a status of Error
    # ------------------------------
    data=$(echo "list media $poolstring" | $BCMD | $GREP -i "^|.*error" | $SED -e 's/|\ \+/|/g'  \
         | $AWK -F"|" "{print \$2 \"\t\" \"\t\" \$3 \"\t\" \$4 \"\t\" \$12 \"\t\" \$13}" )

    # Now we assume that the bonsole connectivity test above succeeded
    # and that the "list media" or "list media pool=XXXX" command just 
    # above also succeeded and test to see if our grep returned anything
    # -----------------------------------------------------------------
    if [ ! -z "$data" ]; then
      COLOR="yellow"
      POOL_COLOR="yellow"
      MSG="${MSG}<HR><B>&$POOL_COLOR The following media in pool \"$pool\" have a status of Error:</B>

$MEDIAHEADER
$data"
    else
      MSG="${MSG}<HR><B>&$POOL_COLOR Pool \"$pool\" has no media with a status of Error</B>"
    fi
  done 
fi

# Run Bacula's bconsole program
# to get a listing of failed jobs
# -------------------------------
JOBS_COLOR="green"
data=$(echo "list jobs" | $BCMD | $GREP "^|" | $TAIL -n$NUMJOBS | $SED -e 's/|\ /|/g'  \
     | $AWK -F"|" "{print \$2 \"\t\" \$3 \"\t\" \$4 \"\t\" \$7 \"\t\" \$8 \"\t\" \$9}" )

# Now we assume that the bonsole connectivity test above succeeded
# and that the "list jobs" command just above also succeeded and
# we grep the "bad jobs" from the job listing
# ---------------------------------------------------------------
data_bad_jobs=$(echo "$data" | $GREP -v "[TR] .*$")

if [ ! -z "$data_bad_jobs" ]; then
  COLOR="yellow"
  JOBS_COLOR="yellow"
  MSG="${MSG}<HR><B>&$COLOR Of the $NUMJOBS most recent Bacula jobs run, the following did not complete successfully:</B>

$JOBHEADER
$data_bad_jobs"
fi

MSG="${MSG}
<HR><B>&$JOBS_COLOR Last $NUMJOBS completed or currently running jobs:</B>

$JOBHEADER
$data"

fi

# Now we send the status message to Xymon
# ---------------------------------------
$BB $BBDISP "status $MACHINE.$COLUMN $COLOR `date`

${ADMINMSG}
${MSG}
"
exit 0

