#!/bin/sh
#=======================================================================
# Copy a list of files from cfs to disk after       --- load_sublist ---
# breaking it into sub lists that each contain a run time determined
# number of files such that no sub list is greater than a user defined
# size limit or number of files. Lines of the form "#break" found in a
# user supplied file containing the list of files to be loaded will
# also force a new sublist to be started.
# $Id: load_sublist_jobdef 669 2012-04-24 20:44:05Z acrnrls $
#=======================================================================
#
#     keyword :: load_sublist
# description :: load a list of files from cfs in smaller groups

 set -a
 . betapath2

 jobname=load_sublist; time="1800" ; memory="900mb" ;

 runid=job000; nqsprfx="${runid}_"; nqsext='';
 crawork="${runid}_job"; username="acrnxxx"; user="XXX";
 uxxx='uxxx'; load_sublist_uxxx=$uxxx; # memory99=1

 nextjob=on
 noprint=on

 # Temporary directory where this script will run 
 load_sublist_CCRNTMP=''
 CCRNTMP=${load_sublist_CCRNTMP:=$CCRNTMP}

 # RUNPATH on execution machine
 load_sublist_RUNPATH=''
 RUNPATH=${load_sublist_RUNPATH:=$RUNPATH}

 # Alternate path to a directory where .queue/.crawork will be found
 JHOME=''

 if [ -n "$JHOME" -a x"$JHOME" != x"$HOME" ]; then
   # Allow optional reset of DATAPATH/RUNPATH
   JHOME_DATA=''
   DATAPATH=${JHOME_DATA:=$DATAPATH}
   RUNPATH=${JHOME_DATA:=$RUNPATH}
   # Allow optional reset of CCRNTMP
   JHOME_RUN=''
   CCRNTMP=${JHOME_RUN:=$CCRNTMP}
 fi

 # BERUNPATH must be set if files are to be copied from a machine that is
 # not the default back end machine
 load_sublist_BERUNPATH=''
 BERUNPATH=${load_sublist_BERUNPATH:=$BERUNPATH}

 # RMTRUNPATH is used when vic = on is specified
 load_sublist_RMTRUNPATH=''
 RMTRUNPATH=${load_sublist_RMTRUNPATH:=$RMTRUNPATH}

 # rtd parameters
 # PhysA = on/off     physical atmosphere variables
 # PhysO = on/off     physical ocean variables
 # CarbA = on/off     atmosphere carbon variables
 # CarbO = on/off     ocean carbon variables
 # CarbL = on/off     land carbon variables
 PhysA=on
 PhysO=on
 CarbA=on
 CarbO=on
 CarbL=on

 . comjcl.cdk

cat > Execute_Script <<'end_of_script'

  set -a

#  * ........................... Parmsub Parameters ............................

  # ---Start_submit_ignore_code----

  # bail is a simple error exit routine
  bail(){
    echo "load_sublist: $1"
    echo " " >> haltit
    exit 1
  }

  # Setting CCCJOB_ROOT will allow a job specific version
  # of make_file_name_list to used
  CCCJOB_ROOT=''
  if [ -z "$CCCJOB_ROOT" ]; then
    MAKE_FILE_NAME_LIST=make_file_name_list
  else
    MAKE_FILE_NAME_LIST="$CCCJOB_ROOT/bin/make_file_name_list"
  fi

  stamp=`date "+%j%H%M%S"$$`

  # These variables are set when the job string is created
  previous_year=NotSet
  previous_month=NotSet

  current_year=NotSet
  current_month=NotSet

  next_year=NotSet
  next_month=NotSet

  run_start_year=NotSet
  run_start_month=NotSet
  run_stop_year=NotSet
  run_stop_month=NotSet

  # These parameters may be required by make_file_name_list and so must be
  # present in the environment at the time make_file_name_list is invoked
  load_cfsuser=$username
  load_sublist_cfsuser=$load_cfsuser
  cfsuser=$load_sublist_cfsuser

  load_masterdir=off
  load_sublist_masterdir=${load_masterdir:=off}
  masterdir=${load_sublist_masterdir:=off}

  load_shortermdir=on
  load_sublist_shortermdir=${load_shortermdir:=on}
  shortermdir=${load_sublist_shortermdir:=on}

  # If either reset_start_year or reset_stop_year are set then they must be
  # of the form old_year:new_year (ie a colon separated pair of integers)
  # where the first integer is the year that needs to be changed
  # and the second integer is the year that it will be changed to.
  # These may potentially change the value of start_year or stop_year that
  # are defined after the call to make_file_name_list below
  load_sublist_reset_start_year=''
  reset_start_year=${load_sublist_reset_start_year:=''}
  load_sublist_reset_stop_year=''
  reset_stop_year=${load_sublist_reset_stop_year:=''}
  # reset_end_year is defined here for backward compatibility
  # It is effectively equivalent to reset_stop_year.
  load_sublist_reset_end_year=''
  reset_end_year=${load_sublist_reset_end_year:=''}

  # make_file_name_list uses the variables current_year, current_month,
  # previous_year, previous_month, next_year and next_month to
  # determine start and stop dates that may be used for file name creation.

  # This invocation of make_file_name_list will process the *_year and *_months
  # variables defined above and output a file containing definitions for
  # start_year, start_mon, stop_year, stop_mon, end_year, end_mon
  fopts=''
  load_sublist_mon_offset=''
  if [ -n "$load_sublist_mon_offset" ]; then
    # Set a user supplied month offset
    eval fopts=\"--mon_offset\=$load_sublist_mon_offset\"
  fi
  tmp_file_list="tmp_file_list_${runid}_${stamp}"
  $MAKE_FILE_NAME_LIST $fopts --dates_only $tmp_file_list || \
    bail "Problem in make_file_name_list"

  # Verify that the output list is not empty
  [ ! -s "$tmp_file_list" ] && bail "Unable to create file list"

  # A file list was created ...source it
  # This will define start_year, start_mon, stop_year, stop_mon,
  # end_year and end_mon in the current environment
  : ; . $tmp_file_list
  rm -f $tmp_file_list

  # Define a variable containing a string that will identify the
  # current year/month range, for possible use in file names etc.
  ym_range="${start_year}m${start_mon}_${stop_year}m${stop_mon}"

  # arclabel may be used to determine the name of the cmcarc file that will be
  # searched for files in the list of files to be loaded.
  # If set then the cmcarc filename will be ${arclabel}_${timestamp}_arc.
  # If not set then the cmcarc filename will be ${commonprefix}_${timestamp}_arc
  # where commonprefix is the common prefix of all of the individual file names
  # of the files that are loaded.
  load_sublist_arclabel=''
  arclabel=${load_sublist_arclabel:-''}

  join=0
  # <<INSERT_LOAD_SUBLIST>>

  # Create an empty local file to contain the list of files to be processed
  local_flist="FILE_LIST_$stamp"
  rm -f $local_flist
  touch $local_flist

  # If the user has supplied a file list when the job string was created
  # then copy these file names into local_flist
  if [ $join -gt 0 ]; then
    n=0
    while [ $n -lt $join ]; do
      n=`expr $n + 1`
      eval curr_file=\$file$n
      echo $curr_file >> $local_flist
      eval file${n}=
    done
    join=0
  fi

  # If load_file_list is defined then add the file names defined therein
  # to the list of files to be loaded. This will be in addition to any
  # provided by the user when this string was created.
  # load_file_list may contain a white space separated list of file names.
  # The files provided in load_file_list must be available via "access"
  load_file_list=''

  if [ -n "$load_file_list" ]; then
    # Load file list may be a white space separated list of file names
    # Each file in this list will contain a list of
    # file names of files to be loaded from cfs
    for add_file_list in $load_file_list; do
      # access a file containing the list of file names
      access FILE_LIST2_$stamp $add_file_list nocp=off

      # Concatenate lists
      cat $local_flist FILE_LIST2_$stamp > tmp_$local_flist
      mv tmp_$local_flist $local_flist

      release FILE_LIST2_$stamp
    done
  fi

  fopts=''
  # The --size_max= option implies multi-list output
  # User specification of the maximum size of a single arcfile
  load_sublist_maxarcsize=''
  load_sublist_max_arcsize=$load_sublist_maxarcsize
  max_arcsize=${load_sublist_maxarcsize:=8GB}
  fopts="$fopts --size_max=$max_arcsize"

  # The --number_max= option implies multi-list output
  # User specification of the maximum number of files allowed in a single arcfile
  load_sublist_max_files_per_arc=''
  max_files_per_arc=${load_sublist_max_files_per_arc:=''}
  [ -n "$max_files_per_arc" ] && fopts="$fopts --number_max=$max_files_per_arc"

  # The --pattern= option supplies file name patterns that will be used by
  # make_file_name_list to add files found on disk that match the patterns.
  load_sublist_pattern=''
  pattern=$load_sublist_pattern
  [ -n "$pattern" ] && fopts="$fopts --pattern=$pattern"

  # Add the --in= command line option for make_file_name_list
  # if there is a non-empty file containing a file name list to use
  [ -s "$local_flist" ] && in_opt="--in=$local_flist"

  # If neither a list of file names nor at least one file name pattern are
  # supplied then make_file_name_list will not create output file lists
  if [  -z "$in_opt" -a -z "$pattern" ]; then
    bail "Neither file name list nor file name pattern were supplied"
  fi

  # This invocation of make_file_name_list will create multiple files,
  # each containing a list of file name definitions of the form file1=...
  # Each file name list will be put into a file named ${fname_defs}001,
  # ${fname_defs}002, ...
  fname_defs="FNAME_DEFS_"
  rm -f FNAME_DEFS_*
  $MAKE_FILE_NAME_LIST --cfs_files $fopts $in_opt $fname_defs 2>&1 || \
    bail "Problem in make_file_name_list"

  # Count the number of files containing file lists that were created by
  # the previous invocation of make_file_name_list
  nlists=`ls -1 ${fname_defs}*|wc -l`

  # Create a file containing parmsub definitions used by load_list

  # pass bemach to load_list if it is set here
  bemach=''
  if [ -z "$bemach" ]; then
    # If bemach is not set explicitly then see if this job is executing on
    # a back end machine and use that machine name for bemach
    this_mach=`uname -n|awk -F\. '{print \$1}' -`
    on_back_end=0
    case $this_mach in
                     c[0-9]*) on_back_end=1 ;;
      zeta|saiph|spica|hadar) on_back_end=1 ;;
                 za|sa|sp|ha) on_back_end=1 ;;
    esac
    if [ $on_back_end -eq 1 ]; then
      # Set this_mach to its known alias
      case $this_mach in
        ha|ha*) this_mach=hadar ;;
        sp|sp*) this_mach=spica ;;
           c1*) this_mach=spica ;;
           c2*) this_mach=hadar ;;
        za|ze*) this_mach=zeta  ;;
        sa|sa*) this_mach=saiph ;;
           c6*) this_mach=saiph ;;
           c7*) this_mach=zeta  ;;
      esac
      bemach=$this_mach
    fi
  fi

  # The default if none of sv, both, vic or svvic are set to "on" is to
  # load files from the back end machine

  # sv : off means load files from the back end
  # sv : on  means load files from pollux
  load_sv=off
  load_sublist_sv=${load_sv:=off}
  sv=${load_sublist_sv:=off}

  both=off    # both  : on means load files from pollux and the back end machine
  vic=off     # vic   : on means send files to Victoria
  svvic=off   # svvic : on means load files onto pollux and send files to Victoria

  # nolist : off means save CFSDATA* files in ~/info
  # nolist : on means do not save CFSDATA* files in ~/info
  load_sublist_nolist=''
  nolist=${load_sublist_nolist:=on}

  # besc = on causes a tloader job to run on the back end (spica/hadar)
  load_sublist_besc=''
  besc=${load_sublist_besc:=off}

  load_list_CCRNTMP=$load_sublist_CCRNTMP
  load_list_RUNPATH=$load_sublist_RUNPATH

  # months is set to 12 so that stdout from the cccjob command is minimized
  eval months\=12

  varlist='load_list_CCRNTMP
           load_list_RUNPATH
           besc
           RUNPATH
           CCRNTMP
           BERUNPATH
           RMTRUNPATH
           JHOME
           JHOME_DATA
           JHOME_RUN
           months
           crawork
           arclabel
           uxxx
           runid
           noprint
           bemach
           cfsuser
           masterdir
           shortermdir
           nolist
           sv
           vic
           svvic'
  rm -f load_defs
  touch load_defs
  for var in $varlist; do
    eval val=\$$var
    # If this variable is defined add it to the list
    [ -n "$val" ] && echo ${var}=\'$val\' >> load_defs
  done

  job_string_to_insert="INSERT_JOB_STRING_${runid}_$stamp"
  rm -f $job_string_to_insert
  touch $job_string_to_insert
  nn=0
  for fname in `ls -1 ${fname_defs}*`; do
    nn=`expr $nn + 1`
    if [ $nn -eq $nlists ]; then
      # This is the last load_list job in the current series of jobs
      # Use load_list_keepdskcpy = off for this job
      eval keepdskcpy_opt=load_list_keepdskcpy\=off
    else
      # Use load_list_keepdskcpy = on for this job
      # keepdskcpy : on means do not remove arcfiles from disk on cfs after they
      # have been loaded. This will eliminate the need to reload the same arc file
      # on a subsequent tloader when files from the previously loaded arc file are
      # required by the subsequent tloader job.
      eval keepdskcpy_opt=load_list_keepdskcpy\=on
    fi
    cccjob --out=CURR_JOB --job="load_list=${fname}:s" load_defs $keepdskcpy_opt \
           --start="${start_year}:${start_mon}" --stop="${stop_year}:${stop_mon}"
    cat CURR_JOB >> $job_string_to_insert

    echo "====================================="
    echo "$fname"
    cat $fname
    echo "====================================="
  done

  load_sublist_dry_run=''
  dry_run=${load_sublist_dry_run:=off}
  if [ x"$dry_run" = "xon" ]; then
    # Simply copy the job string created here to the users tmp dir
    cp $job_string_to_insert ${JHOME:-$HOME}/tmp/load_list_${runid}_$stamp
  else
    # Insert the job just created into the existing string from which this job came
    # The variable "crawork" must be defined/exported in the current environment
    splice_job_string $job_string_to_insert || bail "Problem in splice_job_string"
    rm -f $job_string_to_insert
  fi

  # ---Stop_submit_ignore_code----

end_of_script

 . endjcl.cdk

#end_of_job
