#!/bin/sh
#=========================================================================
# Create a group of jobs to run at regular intervals         --- block ---
# $Id$
#=========================================================================
#
# Larry Solheim  ...Nov 2013
#=========================================================================
#
#     keyword :: block
# description :: run a block of jobs at regular intervals
#
#
  set -a
  . betapath2

#  * ........................... Parmsub Parameters ......................

  uxxx='uxxx'; runid="job000"; nqsprfx="${runid}_"; nqsext=''
  crawork="${runid}_job"; username="acrnxxx"; user="XXX";

  jobname=block;
  stime=1800; gptime=$stime; time=$stime
  memory1="5000mb"; memory=$memory1

  noprint=on

  # nextjob must be off, always
  eval nextjob\=off

  debug=off

  # Temporary directory where this script will run
  block_CCRNTMP=''
  CCRNTMP=${block_CCRNTMP:=$CCRNTMP}

  # RUNPATH on execution machine
  block_RUNPATH=''
  RUNPATH=${block_RUNPATH:=$RUNPATH}

#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#
#=#=#   Begin execute script  #=#=#
#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#

  . comjcl.cdk
cat > Execute_Script <<'end_of_script'

  # Auto export is required for make_file_name_list
  set -a

  # ---Start_submit_ignore_code----

  # These variables are set when the job string is created
  previous_year=NotSet
  previous_month=NotSet

  current_year=NotSet
  current_month=NotSet

  next_year=NotSet
  next_month=NotSet

  run_start_year=NotSet
  run_start_month=NotSet
  run_stop_year=NotSet
  run_stop_month=NotSet

  # Indicate how cccjob should be invoked
  # Setting CCCJOB_ROOT will allow a job specific version of cccjob to used
  CCCJOB_ROOT=''
  if [ -z "$CCCJOB_ROOT" ]; then
    CCCJOB_ENV=''
    MAKE_FILE_NAME_LIST=make_file_name_list
    cccjob_root_def=''
  else
    eval CCCJOB_ENV=\'env CCCJOB_ROOT\=$CCCJOB_ROOT\'
    MAKE_FILE_NAME_LIST="$CCCJOB_ROOT/bin/make_file_name_list"
    eval cccjob_root_def=\'CCCJOB_ROOT\=$CCCJOB_ROOT\'
  fi

  # Define a date string that will be used in file names etc
  stamp=`date "+%Y%m%d%H%M%S_"$$`

  ToF(){
    #   usage: ToF var_name
    # purpose: Possibly reset the value of var_name to "0" (false) or "1" (true)
    #          If var_name is null or has a value of "off" or "no" then reset to "0"
    #          If var_name has a value of "on" or "yes" then reset to "1"
    #          Otherwise return with var_name unchanged
    set +x
    [ -z "$1" ] && bail "ToF requires a variable name as an argument"
    eval ToF_var\=\$$1
    XXX=`echo $ToF_var|sed 's/ //g'`
    eval ToF_var\=$XXX
    if [ -n "$ToF_var" ]; then
      if   [ "$ToF_var" = 'on'  ]; then eval ToF_var\=1
      elif [ "$ToF_var" = 'off' ]; then eval ToF_var\=0
      elif [ "$ToF_var" = 'yes' ]; then eval ToF_var\=1
      elif [ "$ToF_var" = 'no'  ]; then eval ToF_var\=0
      else
        eval ToF_var\=\$$1
      fi
    else
      eval ToF_var\=0
    fi
    eval $1=$ToF_var
    set -x
  }

  # this_host will simply be the output from uname
  this_host=`uname -n|awk -F\. '{print \$1}' -`

  # this_mach will be a known alias (or possibly the actual machine name)
  this_mach=$this_host

  # Optionally define a prefix for all ssh commands
  # to reroute through a head node when required
  ROUTE_SSH=''

  # create_block_remote flags whether or not the block job string will be created
  # on this machine or on a remote (front end) machine
  create_block_remote=off

  case $this_mach in
      c1*) this_mach=spica
           ROUTE_SSH="ssh spica"
           create_block_remote=on
           ;;
      c2*) this_mach=hadar
           ROUTE_SSH="ssh hadar"
           create_block_remote=on
           ;;
     ib3*) this_mach=pollux
           ROUTE_SSH="ssh pollux"
           ;;
   joule*) this_mach=joule
           ROUTE_SSH="ssh joule"
           ;;
  esac
  ToF create_block_remote

  # Emulate echo -n option if not recognized by echo (ie echo without return)
  if [ "X`echo -n`" = "X-n" ]; then
    echo_n() { echo ${1+"$@"}'\c'; }
  else
    echo_n() { echo -n ${1+"$@"}; }
  fi

  # Define a file name that may be used to contain error messages
  error_out="$HOME/.queue/error_block_${runid}_${this_mach}_$stamp"
  [ ! -z "$error_out" ] && rm -f $error_out

  # bail is a simple error exit routine
  bail(){
    # Send error message to stdout
    echo `date`" $this_host  $runid --- block: $*"
    # Also copy to a file in case stdout goes missing in action
    echo `date`" $this_host  $runid --- block: $*" >>$error_out
    exit 1
  }

  # If either reset_start_year or reset_stop_year are set then they must be
  # of the form old_year:new_year (ie a colon separated pair of integers)
  # where the first integer is the year that needs to be changed
  # and the second integer is the year that it will be changed to.
  # These may potentially change the value of start_year or stop_year that
  # are defined after the call to make_file_name_list below
  block_reset_start_year=''
  reset_start_year=${block_reset_start_year:=''}
  block_reset_stop_year=''
  reset_stop_year=${block_reset_stop_year:=''}
  # reset_end_year is defined here for backward compatibility
  # It is effectively equivalent to reset_stop_year.
  block_reset_end_year=''
  reset_end_year=${block_reset_end_year:=''}

  # This invocation of make_file_name_list will process the *_year and *_month
  # variables defined above and output a file containing definitions for
  # start_year, start_mon, stop_year, stop_mon
  fopts=''
  block_mon_offset=''
  if [ -n "$block_mon_offset" ]; then
    # Set a user supplied month offset
    eval fopts=\"--mon_offset\=$block_mon_offset\"
  fi
  tmp_file_list="tmp_file_list_${runid}_${stamp}"
  $MAKE_FILE_NAME_LIST $fopts --dates_only $tmp_file_list >>$error_out 2>&1 || \
    bail "Problem in make_file_name_list"
  rm -f $error_out

  # Verify that the output list is not empty
  [ ! -s "$tmp_file_list" ] && bail "Unable to determine start/stop dates."

  # A file list was created ...source it to define
  # start_year, start_mon, stop_year, stop_mon, days_in_job
  # in the current environment
  : ; . $tmp_file_list
  rm -f $tmp_file_list

  # Define a variable containing a string that will identify the
  # current year/month range, for possible use in file names etc.
  ym_range="${start_year}m${start_mon}_${stop_year}m${stop_mon}"
  echo "runid = $runid     range = $ym_range"

  # logall = on turns on creation of log files for all jobs in this chunk
  # These log files will be copied to an external log directory at the end
  # of each time series chunk
  logall=on
  ToF logall
  logall_def=''
  [ $logall -eq 1 ] && eval logall_def=\'logall\=on noprint\=off\'

  # use_jhome_run_dir = on means execute all scripts in the "run" subdir
  # of the JHOME root directory rather than in the standard CCRNTMP dir
  use_jhome_run_dir=off
  ToF use_jhome_run_dir

  # Initialize restart
  # If restart is defined and non null then the --restart= option will be added
  # to the cccjob command line below
  # restart may be reassigned by a user supplied code snippet below
  restart=''

  # Initialize block_jobdefs (the name of a file passed to the block job) and
  # next_block_jobdefs (the name of a file passed to the next_block job)
  # They will be reassigned by a user supplied code snippet below
  block_jobdefs=''
  next_block_jobdefs=''

  # User supplied shell script will be inserted here at job creation time.
  # This code snippet must define block_JOBDESC (and other variables as required)
  # and also create a local file named $block_jobdefs via a here document.
  #
  # This code snippet may optionally create a second
  # local file named $next_block_jobdefs via a here doc, that will be passed
  # to next_block if it exists
  #
  # These defs will be used below to create the desired job string
  # Some variables specific to this job (block) and/or next_block that should
  # be passed in via this user supplied code snippet are as follows.
  #   exec_post_block = off/on
  #       ...avoid running any post processing after each block of jobs completes
  #       ...this post processing is always done, by default
  #   remove_jhome_dir = off/on
  #       ...remove the temporary JHOME and JHOME_DATA dirs in which the block ran
  #       ...these dirs are always removed, by default
  # <<INSERT_BLOCK>>

  # This set of variable definitions must contain a definition for block_JOBDESC
  [ -z "$block_JOBDESC" ] && bail "Missing definition for block_JOBDESC."

  # The variable block_jobdefs must be defined and a file named
  # $block_jobdefs must be created above
  [ -z "$block_jobdefs" ] && bail "block_jobdefs is not defined."
  [ -s "$block_jobdefs" ] || bail "The job defs file is missing or empty."

  # The name of the file containing this job at the time the job is created
  # and the name of the machine on which it was created
  ctime_file_name=''
  ctime_file_mach=''

  # A flag to determine if stale JHOME jobs should be restarted automatically
  autorestart=off

  # Identify the name of the string containing this job
  this_job_string="${HOME}/.queue/.crawork/${crawork}_string"

  # Allow the user to disallow the remote copy of the first job in the run
  # This is required in the special case where non-jhome jobs, with nextjob = on,
  # are inserted at the top of the job string
  copy_initial_block_job_from_remote=on
  ToF copy_initial_block_job_from_remote

  if [ $copy_initial_block_job_from_remote -eq 1 ]; then
    # If this is the first job in the run then copy the job string from
    # where it was created to the crawork dir on the execution machine
    if [ $start_year -eq $run_start_year and $start_mon -eq $run_start_month ]; then
      # Copy the job string from a possibly remote location at the beginning
      # of the entire job string
      [ -z $ctime_file_name ] && bail "ctime_file_name is not defined."
      [ -z $ctime_file_mach ] && bail "ctime_file_mach is not defined."
      $ROUTE_SSH scp ${ctime_file_mach}:$ctime_file_name $this_job_string ||
          bail "Problem copying ${ctime_file_mach}:$ctime_file_name to $this_job_string"
      chmod u+rw $this_job_string ||
          bail "Unable to change permissions on $this_job_string"
    fi
  fi

  # Verify that this path is valid and the file is not empty before proceeding
  [ -s "$this_job_string" ] || bail "Missing or empty crawork string $this_job_string"

  # Create a string containing the current crawork definition
  # Hide this crawork assignment from cccjob variable substitution
  eval parent_crawork_def=parent_crawork\=$crawork

  # Create a crawork value for the child job
  # Hide this crawork assignment from cccjob variable substitution
  eval child_crawork_def=crawork\=${runid}_${ym_range}_$stamp

  # Pass the next_block_jobdefs file to next_block on the cccjob command line
  # below if next_block_jobdefs is defined in the current environment
  if [ -n "$next_block_jobdefs" ]; then
    next_block_JOBDESC="next_block=${next_block_jobdefs}:s"
  else
    next_block_JOBDESC="next_block:s"
  fi

  # use_jhome determines if the job will run in a separate directory, or not
  # use_jhome = on  means use JHOME and JHOME_DATA
  # use_jhome = off means use the standard HOME and DATAPATH/RUNPATH
  use_jhome=on
  ToF use_jhome

  if [ $use_jhome -eq 1 ]; then

    # Run the child job from the JHOME dir and use JHOME_DATA

    # JHOME and JHOME_DATA identify where the child job will run
    [ -z "$RUNPATH" ] && bail "RUNPATH must be defined."
    [ -z "$runid" ]   && bail "runid must be defined."

    # Define a prefix to be used to locate existing jhome_root dirs
    # that were created for the same runid and date range
    # These dirs may be present if the current job has been restarted
    jhome_root_pfx=$RUNPATH/JHOME_${runid}_${ym_range}_

    # Define a common root dir in which JHOME and JHOME_DATA will live
    # Use runid, the current date range and a time stamp to uniquely identify this dir
    jhome_root=${jhome_root_pfx}${stamp}

    # Define JHOME and JHOME_DATA and add these defs to a variable to be
    # used on the following cccjob commmand line
    # Do not allow cccjob substitution for these variable definitions
    eval JHOME\=$jhome_root/home
    eval JHOME_DATA\=$jhome_root/data
    eval jhomedefs=\"JHOME\=$JHOME JHOME_DATA\=$JHOME_DATA\"

    if [ $use_jhome_run_dir -eq 1 ]; then
      eval JHOME_RUN\=$jhome_root/run
      eval jhomedefs=\"$jhomedefs JHOME_RUN\=$JHOME_RUN\"
    fi

    # Assign variables with the values of HOME and JHOME that have any
    # trailing slash removed, to be used in the following comparison
    jhomenoslash=`echo $JHOME|sed 's/\/$//'`
    homenoslash=`echo $HOME|sed 's/\/$//'`
    if [ -n "$JHOME" -a x"$jhomenoslash" = x"$homenoslash" ]; then
      bail "JHOME = HOME = $JHOME"
    fi

    # JHOME_LOGD is the name of a directory used to contain stdout and error files
    # produced by runs that were done within a JHOME dir that remains on disk at the
    # time a similar (or the same) job gets resubmitted. This kind of info will
    # be lost when the JHOME dir gets deleted unless it is moved elsewhere.
    # This will be a machine specific dir that is relative to the standard DATAPATH
    # on that machine so we need to determine a value for that standard DATAPATH
    STANDARD_DATAPATH=`ssh $this_mach 'echo $DATAPATH'`
    # Determine an absolute path name from the known location that is
    # relative to DATAPATH (ie $DATAPATH/../JHOME_ERR_LOG_DIR)
    pcmd='use Cwd qw(abs_path); print abs_path("'$STANDARD_DATAPATH/../JHOME_ERR_LOG_DIR'")'
    JHOME_LOGD=`perl -e "$pcmd"` ||
      bail "Unable to determine abs path for $STANDARD_DATAPATH/../JHOME_ERR_LOG_DIR"

    echo "  DATAPATH = $DATAPATH"
    echo "   RUNPATH = $RUNPATH"
    echo "      HOME = $HOME"
    echo "     JHOME = $JHOME"
    echo "JHOME_DATA = $JHOME_DATA"
    echo "JHOME_LOGD = $JHOME_LOGD"

    # If any JHOME root directories exists for the same run and date range
    # then remove them completely, but first copy .queue and .crawork files
    # found therein to the log directory
    found_jhome=`(ls -1d ${jhome_root_pfx}* || : ) 2>/dev/null`
    if [ -n "$found_jhome" ]; then
      # Directories exist for the same runid and date range
      for jhd in $found_jhome; do
        # Ignore anything that is not a directory
        [ -d "$jhd" ] || continue

        if [ -d "$JHOME_LOGD" ]; then
          # Copy the contents of the ".queue" and ".crawork" directories to a
          # known location to preserve output/error files from a previous run

          # Create a subdir in JHOME_LOGD with the same name
          # as the jhome dir we are about to delete
          curr_logd=`basename $jhd`
          curr_logd="$JHOME_LOGD/$curr_logd"
          if [ ! -d $curr_logd ]; then
            mkdir -m 750 $curr_logd || bail "Unable to create log dir = $curr_logd"
          fi

          # Add a listing all contents in the previous jhome dir to the log dir
          ls -laR $jhd >$curr_logd/contents-prior-to-restart || :
          bzip2 $curr_logd/contents-prior-to-restart || :

          if [ -d "$jhd/home/.queue" ]; then
            # Compress all files before moving them to the log dir
            # Preserve access/mod times, permissions and user/group IDs
            # Avoid a non-zero return status when this dir is empty
            bzip2 $jhd/home/.queue/* 2>/dev/null || :
            cp -p $jhd/home/.queue/* $curr_logd 2>/dev/null || :
            if [ -d "$jhd/home/.queue/.crawork" ]; then
              # Compress all files before moving them to the log dir
              # Preserve access/mod times, permissions and user/group IDs
              # Avoid a non-zero return status when this dir is empty
              bzip2 $jhd/home/.queue/.crawork/* 2>/dev/null || :
              cp -rp $jhd/home/.queue/.crawork $curr_logd 2>/dev/null || :
            else
              echo "** WW ** $jhd/home/.queue/.crawork is not a directory"
            fi
            echo "** WW ** Log info written to $curr_logd"
          else
            echo "** WW ** $jhd/home/.queue is not a directory"
          fi

          # Reset permissions on log files and dirs
          find $curr_logd -type d -exec chmod 1771 {} \;
          find $curr_logd -type f -exec chmod 744 {} \;
        fi

        # Remove this directory
        rm -fr $jhd || bail "Unable to remove $jhd"
        echo "Removed existing dir $jhd"

        # Give the file system ample time to catch up
        sleep 5
      done
    fi

    # Create the JHOME root dir and subdirs for use in the child job
    # that is created below
    mkdir -m 750 $jhome_root      || bail "Unable to create $jhome_root"
    mkdir -m 750 $jhome_root/home || bail "Unable to create $jhome_root/home"
    mkdir -m 750 $jhome_root/data || bail "Unable to create $jhome_root/data"
    mkdir -m 750 $jhome_root/run  || bail "Unable to create $jhome_root/run"

    if [ -n "$autorestart" ]; then
      if [ $autorestart = "on" ]; then
        # Create a file named autorestart in $JHOME that contains a full pathname
        # for the crawork string that should be resubmitted for auto restart
        echo "$HOME/.queue/.crawork/${crawork}_string" > $JHOME/autorestart ||
          bail "Cannot create $JHOME/autorestart"
        chmod a+r $JHOME/autorestart
      fi
    fi

  else

    # Do not use JHOME
    # The child job will use the standard HOME/DATAPATH/RUNPATH
    jhomedefs=''

  fi

  # use_block_lock toggles using a lock file to ensure that the next_block job
  # does not get resubmitted until both the diagnostic and the model job
  # have completed (when using pdiag in a model string)
  use_block_lock=on
  if [ -n "$block_JOBDESC" ]; then
    has_pdiag=`echo $block_JOBDESC|sed -n '/pdiag=/p'`
    # If pdiag does not appear in the job description then dont use the lock file
    [ -z "$has_pdiag" ] && use_block_lock=off
  fi
  ToF use_block_lock

  # Define a lock file name to be used to determine when to submit the next_block job
  if [ $use_block_lock -eq 1 ]; then
    block_lock_dir="${JHOME:-$HOME}/.queue/.crawork"
    # Ensure this dir exists with the right permissions
    [ -d "$block_lock_dir" ] ||
      (mkdir -p -m 733 $block_lock_dir || bail "Cannot create $block_lock_dir")
    chmod 733 ${JHOME:-$HOME}/.queue
    next_block_lock="$block_lock_dir/lock_block_${runid}_${ym_range}_${this_mach}_$stamp"
    next_block_lock_def="next_block_lock=$next_block_lock"
  else
    block_lock_dir=''
    next_block_lock=''
    next_block_lock_def=''
  fi

  if [ -n "$next_block_lock" ]; then
    # Create (or overwrite) the next_block lock file
    echo "1" > $next_block_lock
  fi

  # Define a variable containing local defs for the following cccjob commmand line
  locdefs="parent_job_string=$this_job_string $parent_crawork_def "
  locdefs="$locdefs use_jhome=$use_jhome $next_block_lock_def "
  locdefs="$locdefs $child_crawork_def $jhomedefs $cccjob_root_def $logall_def "

  # Allow the user to determine the execution machine for the child job
  # By default the child job will be submitted to the machine on which this job runs
  block_exec_mach=''

  # Run cccjob to create the child job and put the output into a local
  # file named $block_job
  block_job=block_${stamp}_job

  cccjob_opt=''
  if [ $start_year -eq $run_start_year and $start_mon -eq $run_start_month ]; then
    # If this is the first job in the run and restart is defined
    # then add the restart option to the cccjob command line
    if [ -n "$restart" ]; then
      # Remove all white space from restart
      restart=`echo $restart|sed 's/ *//g'`
      cccjob_opt="--restart=$restart"
    fi
  fi

  if [ -n "$cccjob_opt_always_add_updates" ]; then
    # Add an option to add update (if present) to every job in the string
    [ $cccjob_opt_always_add_updates -eq 1 ] && cccjob_opt="$cccjob_opt --always_add_updates"
  fi

  start="${start_year}:${start_mon}"
  stop="${stop_year}:${stop_mon}"

  # Put the command to create the block job string into a shell variable
  create_block_cmd="$CCCJOB_ENV cccjob --out=$block_job --job='$block_JOBDESC $next_block_JOBDESC' --start=$start --stop=$stop $cccjob_opt $block_jobdefs $locdefs"

  # Replace all single quotes with double quotes to avoid conflicts with cmds below
  create_block_cmd=`echo $create_block_cmd|perl -pe 's<\x{27}><\x{22}>g'`

  if [ $create_block_remote -eq 1 ]; then
    # The block job string is to be created on a remote machine
    # Define a remote temporary dir in which the job string will be created
    # This must be a valid absolute path name on the remote machine
    remote_mach=joule
    remote_tmp_dir=`$ROUTE_SSH ssh $remote_mach ls -1d '$HOME/tmp'` ||
        bail "Unable to find remote dir on $remote_mach   $remote_tmp_dir"

    # Put the pwd in a variable for use below
    CWD=`pwd`

    # Write a script to create the jobstring on the remote machine
    remote_cmds=remote_cmds_$$
    cat > $remote_cmds <<end_remote_cmds
      # Change to the remote working directory
      cd $remote_tmp_dir
      # Identify the current head node on stdout
      hostname
      # Copy in the file containing jobdefs
      scp ${this_mach}:$CWD/$block_jobdefs .
      # Create the job string
      $create_block_cmd
      # Move the newly created job string back to the current machine
      scp $block_job ${this_mach}:$CWD/$block_job
      # Clean up the remote dir
      rm -f $block_jobdefs $block_job
end_remote_cmds

    # Create the job on the remote machine and copy it back here
    $ROUTE_SSH ssh $remote_mach 'bash -s' -- < $remote_cmds ||
        bail "Cannot create block job string on $remote_mach"
  else
    # The block job string is to be created on the current machine
    eval $create_block_cmd ||
        bail "Error executing cccjob ... --job=$block_JOBDESC $next_block_JOBDESC ..."
  fi
  [ -s "$block_job" ] || bail "Failed to create a job string."

  # Submit the job just created
  subto=${block_exec_mach:=$this_mach}
  [ -z "$subto" ] &&
      bail "Unable to determine the name of the execution machine."
  rsub $block_job $subto || bail "Error submitting $block_job to ${subto}."

  # ---Stop_submit_ignore_code----

end_of_script

  . endjcl.cdk

#end_of_job

