#! /bin/sh
 
#    Nov 02/06 - F.Majaess
 
#id  mntrllq - Used to check the LoadLeveler batch queues on the IBM back-end(s).
 
#    AUTHOR  - F.Majaess
 
#hd  PURPOSE - "mntrllq" script is used to check the LoadLeveler batch 
#hd            queues on one or more of the IBM back-end(s) at "ri" 
#hd            seconds repeat interval and up to "nt" times.
 
#pr  PARAMETERS:
#pr 
#pr    PRIMARY
#pr 
#pr      nt     = number of times to perform the check (=100)
#pr      ri     = interval time between the checks in seconds (=20)
#pr      user   = username - used only in conjunction with the "own"
#pr               parameter below (=$USER)
#pr      target = target IBM "ha/sp/al" cluster/node to do the check on.
#pr               (="local host" if invoked from one of the IBM 
#pr                                 back-end(s),
#pr                ="hadar", otherwise)
#pr 
#pr      filter = a suitable "egrep" string command to filter the output with.
#pr 
#pr   SECONDARY
#pr 
#pr        own = switch to list only user's own jobs
#pr              (=no/yes)
#pr 
#pr       wide = switch to enable wide column format
#pr              (=no/yes)
#pr 
 
#ex  EXAMPLE: 
#ex 
#ex    mntrllq nt=3 ri=5 ha sp
#ex 
#ex  The above will result in checking LL queues up to 3 times
#ex  at 5 seconds time interval on hadar and spica cluster headnodes.
#ex 
#ex    mntrllq ha filter=' acrn'
#ex  
#ex  The above will result in listing the jobs filtered by " acrn"
#ex   (ie. in general listing "acrn" jobs)
#ex 
#ex    mntrllq wide own user='acrngcm acrnesm acrncbn acrnocn' sp   
#ex  
#ex  The above will just list the jobs associated with any of the 
#ex  above 4 accounts with wide column format enabled.
#ex 
#ex    mntrllq ha own user='acrngcm acrnesm' filter=' single_s | cfs_xfer '
#ex  
#ex  The above will result in listing jobs associated with, any of
#ex  the above 2 accounts, and in "single_s" or "cfs_xfer" queue.
#ex  

# Check and possibly adjust for parameters specified on the 
# script call ...

unset target
while [ $# -gt 0 ]
  do
    case "$1" in
    -*) set $1 ; shift ;;
    user=*) user=`echo $1 | sed -e 's/user=//'` ; shift ;;
    nt=*) nt=`echo $1 | sed -e 's/nt=//'` ; shift ;;
    ri=*) ri=`echo $1 | sed -e 's/ri=//'` ; shift ;;
    filter=*) filter=`echo $1 | sed -e 's/filter=//'` ; shift ;;
    own) own=yes ; shift ;;
    wide) wide=yes ; shift ;;
    *) target="$target $1" ; shift ;;
    esac
  done

# Set the defaults.
 
nt=${nt:='500'}
ri=${ri:='20'}
own=${own:='no'}
wide=${wide:='no'}
user=${user:=$USER}
if [ "$wide" = 'yes' ] ; then
 Optns="$Optns -W"
fi
if [ "$own" = 'yes' ] ; then
 Optns="$Optns -u $user"
fi

  Optns="$Optns -f %id %jn %o %dq %st %p %c %nh %h"
  cmd=${cmd:="llq $Optns"}

  if [ "$SITE_ID" = 'Dorval' -a "$OS" = 'AIX' ] ; then
   HOSTIDf=`echo $HOSTID | cut -c 1-3`
   case $HOSTIDf in
     c1f*|c1h*|c1r*|c1s* ) lclhost='spica' ; target=${target:='spica'}  ;;
     c2f*|c2h*|c2r*|c2s* ) lclhost='hadar' ; target=${target:='hadar'}  ;;
   # c6f* ) lclhost='saiph'; target=${target:='saiph'} ;;
   # c7f* ) lclhost='zeta' ; target=${target:='zeta'}  ;;
     c8f* ) lclhost='algol' ; target=${target:='algol'}  ;;
   # c4f* ) lclhost='maia' ; target=${target:='maia'}  ;;
   # c3f* ) lclhost='rigel'; target=${target:='rigel'} ;;
   #  * )  lclhost='maia' ; target=${target:='maia'}  ;;
   #  * )  lclhost='zeta' ; target=${target:='zeta'}  ;;
      * )  lclhost='hadar' ; target=${target:='hadar'}  ;;
   esac
  else
     # target=${target:='maia'}
     # target=${target:='zeta'}
     target=${target:='hadar'}
     # target=${target:='rigel maia naos'}
     lclhost="$HOSTID"
  fi

# Setup the list of platforms to check based on "target" setting.

  # nodes_2chk='maia naos rigel'
  unset nodes_2chk
  for pltfrm in $target
   do
    case $pltfrm in
     ha|hadar) nodes_2chk="$nodes_2chk hadar"  ;;
     sp|spica) nodes_2chk="$nodes_2chk spica"  ;;
   # za|zeta) nodes_2chk="$nodes_2chk zeta"    ;;
   #sa|saiph) nodes_2chk="$nodes_2chk saiph"   ;;
     al|algol) nodes_2chk="$nodes_2chk algol"  ;;
   # ma|maia) nodes_2chk="$nodes_2chk maia"    ;;
   #      * ) nodes_2chk="$nodes_2chk $pltfrm" ;;
          * ) echo "Skipped invalid entry: ${pltfrm}!" ;;
    esac
   done

#   ****   Task of the script...   ****

if [ -n "$nodes_2chk" ] ; then
 
# * Perform the check "nt" times at "ri" sec. repeat interval.
 
while [ $nt -gt 0 ] 
do
  echo "" ; echo "As of `date`:" ; echo ""
  for nod in $nodes_2chk
    do
     echo "=== ${nod} check:"
     # if [ "$OS" = 'AIX' -a `echo $HOSTID | cut -c 1-2` = 'c4' ] ; then
     if [ "$nod" = "$lclhost" ] ; then
      if [ -n "$filter" ] ; then
      #eval "$cmd" | sed -e 's/-------------$//' | sed -n -e '1,2p' -e '3,$s/'"$filter"'/'"$filter"'/p'
       eval "$cmd" | sed -e 's/-------------$//' | egrep "^Step Id|^----------|$filter"
      else
       eval "$cmd" | sed -e 's/-------------$//'
      fi
     else 
      if [ -n "$filter" ] ; then
       # ssh ${nod} "${cmd}" | sed -e 's/-------------$//' | sed -n -e '1,2p' -e '3,$s/'"$filter"'/'"$filter"'/p'
       ssh ${nod} "${cmd}" | sed -e 's/-------------$//' | egrep "^Step Id|^----------|$filter"
      else
       ssh ${nod} "${cmd}" | sed -e 's/-------------$//'
      fi
     fi
#    echo "************************************************************" ; echo ""
#    echo "" ; echo ""
     echo ""
    done
  
  if [ $nt -gt 1 ] ; then
    sleep ${ri}
  fi
  nt=`expr $nt - 1`
  echo "" ; echo " Remaining checks: ${nt} times at ${ri} sec. interval " ; echo ""
done
else
 echo 'A valid target cluster needs to be specified!'
 exit 1
fi
