#! /bin/sh
 
#    Jun 16/14 - F.Majaess
 
#id  chkcjhm - Used to identify safely restartable local back-end jhome job(s).
 
#    AUTHOR  - F.Majaess
 
#hd  PURPOSE - "chkcjhm" script is used to check on and identify safely 
#hd            restartable local back-end jhome job(s).
#hd            NOTE: Script is limited to be run only on one of the 
#hd                  back-end clusters at CMC. 
 
#pr  PARAMETERS:
#pr 
#pr    PRIMARY
#pr 
#pr      dirlist= List of full path of valid JHOME subdirectories on local
#pr               back-end cluster.
#pr               (='/fs/dev/crb/??d0[1-3]/data/JHOME_*')
#pr 
 
#ex  EXAMPLE: 
#ex 
#ex    chkcjhm dirlist=/fs/dev/crb/had02/data/JHOME_eer_2301m01_2310m12_20140521060253_9437216 
#ex 
#ex  The above will result in checking targeted "dirlist" provided
#ex  the script was invoked on Hadar back-end cluster.
#ex  

for arg in $@
do
  case $arg in
        -*) set $arg                   ;;
       *=*) eval $arg                  ;;
         *) :                          ;;
  esac
done

if [ "$SITE_ID" = 'Dorval' -a "$OS" = 'AIX' ] ; then
 :
else
 echo "chkcjhm: Sorry, script is restricted to be run only on one the back-end clusters at CMC"
 exit 1
fi
HOSTID=${HOSTID-`hostname | cut -d'.' -f1`}
HOSTIDf=`echo $HOSTID | cut -c 1-5`

case $HOSTID in
 c1*) HOST='SPICA' ; hdest='spica' ;;
 c2*) HOST='HADAR' ; hdest='hadar' ;;
 *) HOST='invalid'
esac
TZ=UTC0; export TZ
# echo " ===> As of `date -u '+%Y%j%H%M%S'` ( `date -u '+%Y%m%d%H%M%S'` ; `date -u` ) on ${HOST}:" ; echo ""
# set -x
dirlist=${dirlist:=`echo /fs/dev/crb/??d0[1-3]/data/JHOME_*`}
if [ "$dirlist" != '/fs/dev/crb/??d0[1-3]/data/JHOME_*' ] ; then
 for JDir in $dirlist
 do
  if [ -d "$JDir/." ] ; then
   ODir=`stat -c %U $JDir`
   RunID=`basename $JDir | $AWK -F '_' '{print $2}'`
   RunIDF=`echo $RunID | $AWK '{printf "%-9s",$1;}'`
   DateRange=`basename $JDir | $AWK -F '_' '{print $3 "_" $4 ; }'`
   Cmnt=''
   Cmnt2=''
   StaTus="Inactive"
   ActiveJobs='NO'
   
   # Set "AutoRestart" ...
  
   if [ -f "$JDir/home/jhome_files_on_datapath" -o -f "$JDir/home/jhome_files_complete" ] ; then
     AutoRestart='autorestart=disabled'
   elif [ -s "$JDir/home/autorestart" ] ; then 
     AutoRestart=`cat $JDir/home/autorestart | tail -1` 
   else
    #AutoRestart='autorestart=off'
     AutoRestart=`ls -1dt $HOME/../*/.queue/.crawork/${RunID}_[0-9]*_string 2>> /dev/null | sed -e 's/\/acrn\/.*\.\.\//\/acrn\//g' | head -1`
   fi

   # if [ -n "$AutoRestart" -a "$AutoRestart" != 'autorestart=disabled' ] ; then
   if [ "$AutoRestart" != 'autorestart=disabled' ] ; then
  
    # Check llq ...
   
    LlqOK=`llq -W -f %jn 2>>/dev/null | tail -1 | sed -e 's/^.*waiting,.*pending,.*$/YES/g'`
    if [ "$LlqOK" = 'YES' ] ; then
     RunIdx="${RunID}"'_'
     ActiveJobs=`llq -W -f %jn -u $ODir | sed -n -e '3,$p' | sed -e '/^ *$/,$d' | sed -n -e "/$RunIdx/p" | sed -e 's/.*'"$RunIdx"'.*$/YES/g' | uniq | tail -1`
     RunIdp='_'"${RunID}"
     ActiveJobs=${ActiveJobs:=`llq -W -f %jn -u $ODir | sed -n -e '3,$p' | sed -e '/^ *$/,$d' | sed -n -e "/$RunIdp/p" | sed -e 's/.*'"$RunIdp"'.*$/YES/g' | uniq | tail -1`}
     if [ "$ActiveJobs" = 'YES' ] ; then
      StaTus="Active  "
      Cmnt=' +++ JOB restartable however associated active jobs were detected +++ '
      if [ -n "$AutoRestart" ] ; then
        Cmnt2=" | ??? ssh ${ODir}@${hdest}  rsub resume ${AutoRestart} $hdest ???" 
      else
        AutoRestart='*** Missing string file ***'
        Cmnt2=" | ??? no associated string file is found ???"
      fi
     else
      Cmnt=" --> JOB restartable ; no associated active jobs were detected <--"
      if [ -n "$AutoRestart" ] ; then
        Cmnt2=" | ssh ${ODir}@${hdest}  rsub resume ${AutoRestart} $hdest"
      else
        AutoRestart='*** Missing string file ***'
        Cmnt2=" | ??? no associated string file is found ; JHOME subdirectory needs to be deleted ???"
      fi
     fi
    else
     Cmnt=' +++ JOB likely restartable however NOT ABLE TO CHECK FOR ASSOCIATED ACTIVE LL JOBS SINCE llq NOT FULLY FUNCTIONAL +++> '
     StaTus="Unknown "
    fi
    # [ "$StaTus" != "Active  " ] && echo "" || :
    # echo "`date -u '+%Y%j%H%M%S'` | $ODir | $RunIDF | $DateRange | $StaTus | $JDir | $AutoRestart | $Cmnt $Cmnt2 "
    # [ "$StaTus" != "Active  " ] && echo "" || :
    [ "$StaTus" != "Active  " ] && echo "" && 
    echo "`date -u '+%Y%j%H%M%S'` | $ODir | $RunIDF | $DateRange | $StaTus | $JDir | $AutoRestart | $Cmnt $Cmnt2 " &&
    echo "" || :
   fi
  else
   if [ "$JDir" = "$dirlist" ] ; then
    echo "chkcjhm: Invalid $JDir specified subdirectory on ${HOST}!"
    exit 1
   fi
  fi
 done
fi
