#!/bin/sh
#=======================================================================
# Copy a list of files to cfs after breaking it     --- dump_sublist ---
# into sub lists that each contain a run time determined number of files
# such that no sub list is greater than a user defined size limit or
# number of files. Lines of the form "#break" found in a user supplied
# file containing the list of files to be dumped will also force a new
# sublist to be started.
# $Id: dump_sublist_jobdef 671 2012-05-04 23:35:22Z acrnrls $
#=======================================================================
#
#     keyword :: dump_sublist
# description :: dump a list of files to cfs in groups

 set -a
 . betapath2

 jobname=dump_sublist; time="2700" ; memory="900mb" ;

 runid=job000; nqsprfx="${runid}_"; nqsext='';
 crawork="${runid}_job"; username="acrnxxx"; user="XXX";
 uxxx='uxxx'; dump_sublist_uxxx=$uxxx; # memory99=1

 nextjob=on
 noprint=on

 # Temporary directory where this script will run 
 default_CCRNTMP=$CCRNTMP
 dump_sublist_CCRNTMP=''
 CCRNTMP=${dump_sublist_CCRNTMP:=''}
 CCRNTMP=${CCRNTMP:=$default_CCRNTMP}

 # RUNPATH on execution machine
 default_RUNPATH=$RUNPATH
 dump_sublist_RUNPATH=''
 RUNPATH=${dump_sublist_RUNPATH:=''}
 RUNPATH=${RUNPATH:=$default_RUNPATH}

 # BERUNPATH must be set when dumping files from the back end
 default_BERUNPATH=$BERUNPATH
 dump_sublist_BERUNPATH=''
 BERUNPATH=${dump_sublist_BERUNPATH:=''}
 BERUNPATH=${BERUNPATH:=$default_BERUNPATH}

 # RMTRUNPATH is used when vic = on is specified
 default_RMTRUNPATH=$RMTRUNPATH
 dump_sublist_RMTRUNPATH=''
 RMTRUNPATH=${dump_sublist_RMTRUNPATH:=''}
 RMTRUNPATH=${RMTRUNPATH:=$default_RMTRUNPATH}

 # Alternate path to a directory where .queue/.crawork will be found
 JHOME=''

 if [ -n "$JHOME" -a x"$JHOME" != x"$HOME" ]; then
   # Allow optional reset of DATAPATH/RUNPATH
   JHOME_DATA=''
   DATAPATH=${JHOME_DATA:=$DATAPATH}
   RUNPATH=${JHOME_DATA:=$RUNPATH}
   # Allow optional reset of CCRNTMP
   JHOME_RUN=''
   CCRNTMP=${JHOME_RUN:=$CCRNTMP}
 fi

 # rtd parameters
 # PhysA = on/off     physical atmosphere variables
 # PhysO = on/off     physical ocean variables
 # CarbA = on/off     atmosphere carbon variables
 # CarbO = on/off     ocean carbon variables
 # CarbL = on/off     land carbon variables
 PhysA=on
 PhysO=on
 CarbA=on
 CarbO=on
 CarbL=on

 . comjcl.cdk

cat > Execute_Script <<'end_of_script'

  set -a

#  * ........................... Parmsub Parameters ............................

  # ---Start_submit_ignore_code----

 echo "   CCRNTMP = $CCRNTMP"
 echo "   RUNPATH = $RUNPATH"
 echo " BERUNPATH = $BERUNPATH"
 echo "RMTRUNPATH = $RMTRUNPATH"

  stamp=`date "+%j%H%M%S"$$`

  # Identify the execution machine and whether or not it is a back end machine
  # this_host will be the original output from uname
  # this_mach will be a known alias (or possibly the actual machine name)
  this_host=`uname -n|awk -F\. '{print \$1}' -`
  this_mach=$this_host
  on_back_end=0
  case $this_mach in
                   c[0-9]*) on_back_end=1 ;;
    zeta|saiph|spica|hadar) on_back_end=1 ;;
               za|sa|sp|ha) on_back_end=1 ;;
  esac

  # Ensure this_mach contains a known alias
  case $this_mach in
     ha|ha*) this_mach=hadar  ;;
     sp|sp*) this_mach=spica  ;;
        c1*) this_mach=spica  ;;
        c2*) this_mach=hadar  ;;
     za|ze*) this_mach=zeta   ;;
     sa|sa*) this_mach=saiph  ;;
        c6*) this_mach=saiph  ;;
        c7*) this_mach=zeta   ;;
      alef*) this_mach=alef   ;;
       ib3*) this_mach=pollux ;;
  esac

  # pass bemach to dump_list if it is set here
  bemach=''
  if [ -z "$bemach" ]; then
    # If bemach is not set explicitly then, if this job is executing on
    # a back end machine use that machine name for bemach
    if [ $on_back_end -eq 1 ]; then
      bemach=$this_mach
    fi
  fi
  if [ -n "$bemach" ]; then
    # Reset BERUNPATH if bemach is set
    XXX=`ssh $bemach echo '$RUNPATH' 2>/dev/null` || bail "Unable to determine BERUNPATH"
    BERUNPATH=$XXX
  fi

  # Define a file name that may be used to emit error messages
  error_out="${JHOME:-$HOME}/.queue/error_dump_sublist_${runid}_$stamp"
  [ ! -z "$error_out" ] && rm -f $error_out

  # bail is a simple error exit routine
  bail(){
    echo `date`" $this_mach  $runid --- dump_sublist: $*"
    echo `date`" $this_mach  $runid --- dump_sublist: $*" >>$error_out
    exit 1
  }

  # Setting CCCJOB_ROOT will allow a job specific version
  # of make_file_name_list to used
  CCCJOB_ROOT=''
  if [ -z "$CCCJOB_ROOT" ]; then
    MAKE_FILE_NAME_LIST=make_file_name_list
  else
    MAKE_FILE_NAME_LIST="$CCCJOB_ROOT/bin/make_file_name_list"
  fi

  # These variables are set when the job string is created
  previous_year=NotSet
  previous_month=NotSet

  current_year=NotSet
  current_month=NotSet

  next_year=NotSet
  next_month=NotSet

  run_start_year=NotSet
  run_start_month=NotSet
  run_stop_year=NotSet
  run_stop_month=NotSet

  # These parameters may be required by make_file_name_list and so must be
  # present in the environment at the time make_file_name_list is invoked
  dump_cfsuser=$username
  dump_sublist_cfsuser=$dump_cfsuser
  cfsuser=$dump_sublist_cfsuser

  dump_masterdir=off
  dump_sublist_masterdir=${dump_masterdir:=off}
  masterdir=${dump_sublist_masterdir:=off}

  dump_shortermdir=on
  dump_sublist_shortermdir=${dump_shortermdir:=on}
  shortermdir=${dump_sublist_shortermdir:=on}

  # If either reset_start_year or reset_stop_year are set then they must be
  # of the form old_year:new_year (ie a colon separated pair of integers)
  # where the first integer is the year that needs to be changed
  # and the second integer is the year that it will be changed to.
  # These may potentially change the value of start_year or stop_year that
  # are defined after the call to make_file_name_list below
  dump_sublist_reset_start_year=''
  reset_start_year=${dump_sublist_reset_start_year:=''}
  dump_sublist_reset_stop_year=''
  reset_stop_year=${dump_sublist_reset_stop_year:=''}
  # reset_end_year is defined here for backward compatibility
  # It is effectively equivalent to reset_stop_year.
  dump_sublist_reset_end_year=''
  reset_end_year=${dump_sublist_reset_end_year:=''}

  # make_file_name_list uses the variables current_year, current_month,
  # previous_year, previous_month, next_year and next_month to
  # determine start and stop dates that may be used for file name creation.

  # This invocation of make_file_name_list will process the *_year and *_months
  # variables defined above and output a file containing definitions for
  # start_year, start_mon, stop_year, stop_mon, end_year, end_mon
  fopts=''
  dump_sublist_mon_offset=''
  if [ -n "$dump_sublist_mon_offset" ]; then
    # Set a user supplied month offset
    eval fopts=\"--mon_offset\=$dump_sublist_mon_offset\"
  fi
  tmp_file_list="tmp_file_list_${runid}_${stamp}"
  $MAKE_FILE_NAME_LIST $fopts --dates_only $tmp_file_list || \
    bail "Problem in make_file_name_list"

  # Verify that the output list is not empty
  [ ! -s "$tmp_file_list" ] && bail "Unable to create file list"

  # A file list was created ...source it
  # This will define start_year, start_mon, stop_year, stop_mon,
  # end_year and end_mon in the current environment
  : ; . $tmp_file_list
  rm -f $tmp_file_list

  # Define a variable containing a string that will identify the
  # current year/month range, for possible use in file names etc.
  ym_range="${start_year}m${start_mon}_${stop_year}m${stop_mon}"

  # arclabel may be used to determine the name of the cmcarc format file
  # that will be saved on the cfs tape archive. If set then the cmcarc filename
  # will be ${arclabel}_${timestamp}_arc. If not set then the cmcarc filename
  # will be ${commonprefix}_${timestamp}_arc where commonprefix is the common
  # prefix of all of the individual file names of the files that are dumped.
  dump_sublist_arclabel=''
  arclabel=${dump_sublist_arclabel:-''}

  join=0
  # <<INSERT_DUMP_SUBLIST>>

  # Create an empty local file to contain the list of files to be processed
  local_flist="FILE_LIST_$stamp"
  rm -f $local_flist
  touch $local_flist

  # If the user has supplied a file list when the job string was created
  # then copy these file names into local_flist
  if [ $join -gt 0 ]; then
    n=0
    while [ $n -lt $join ]; do
      n=`expr $n + 1`
      eval curr_file=\$file$n
      echo $curr_file >> $local_flist
      eval file${n}=
    done
    join=0
  fi

  # If dump_file_list is defined then add the file names defined therein
  # to the list of files to be dumped. This will be in addition to any
  # provided by the user when this string was created.
  # dump_file_list may contain a white space separated list of file names.
  # The files provided in dump_file_list must be available via "access"
  dump_file_list=''

  # save_file_list flags the addition of dump_file_list files
  # to the list of files to be dumped
  save_file_list=off
  XXX=`echo $save_file_list|sed 's/ //g'`
  eval save_file_list\=$XXX
  [ "$save_file_list" = 'on' ]  && eval save_file_list\=1
  [ "$save_file_list" = 'off' ] && eval save_file_list\=0
  [ "$save_file_list" = 'yes' ] && eval save_file_list\=1
  [ "$save_file_list" = 'no' ]  && eval save_file_list\=0

  if [ -n "$dump_file_list" ]; then
    # Dump file list may be a white space separated list of file names
    # Each file in this list will contain a list of
    # file names of files to be dumped to cfs
    for add_file_list in $dump_file_list; do
      # access a file containing the list of file names
      access FILE_LIST2_$stamp $add_file_list nocp=off

      if [ $save_file_list -eq 1 ]; then
        # Add the name of the file contining the file list so
        # it will be dumped along with all the other files
        echo $add_file_list >> $local_flist
      fi

      # Concatenate lists
      cat $local_flist FILE_LIST2_$stamp > tmp_$local_flist
      mv tmp_$local_flist $local_flist

      release FILE_LIST2_$stamp
    done
  fi

  fopts=''
  # The --size_max= option implies multi-list output
  # User specification of the maximum size of a single arcfile
  dump_sublist_maxarcsize=''
  dump_sublist_max_arcsize=$dump_sublist_maxarcsize
  max_arcsize=${dump_sublist_maxarcsize:=16GB}
  fopts="$fopts --size_max=$max_arcsize"

  # The --number_max= option implies multi-list output
  # User specification of the maximum number of files allowed in a single arcfile
  dump_sublist_max_files_per_arc=''
  max_files_per_arc=${dump_sublist_max_files_per_arc:=275}
  [ -n "$max_files_per_arc" ] && fopts="$fopts --number_max=$max_files_per_arc"

  # The --pattern= option supplies file name patterns that will be used by
  # make_file_name_list to add files found on disk that match the patterns.
  dump_sublist_pattern=''
  pattern=$dump_sublist_pattern
  [ -n "$pattern" ] && fopts="$fopts --pattern=$pattern"

  # Add the --in= command line option for make_file_name_list
  # if there is a non-empty file containing a file name list to use
  [ -s "$local_flist" ] && in_opt="--in=$local_flist"

  # If neither a list of file names nor at least one file name pattern are
  # supplied then make_file_name_list will not create output file lists
  if [  -z "$in_opt" -a -z "$pattern" ]; then
    bail "Neither file name list nor file name pattern were supplied"
  fi

  # This invocation of make_file_name_list will create multiple files,
  # each containing a list of file name definitions of the form file1=...
  # Each file name list will be put into a file named ${fname_defs}001,
  # ${fname_defs}002, ...
  fname_defs="FNAME_DEFS_"
  rm -f FNAME_DEFS_*
  $MAKE_FILE_NAME_LIST $fopts $in_opt $fname_defs 2>&1 || \
    bail "Problem in make_file_name_list"

  # Create a file containing parmsub definitions used by dump_list

  # Simply pass with_lsarc to dump_list
  dump_sublist_with_lsarc='';
  with_lsarc=${dump_sublist_with_lsarc:=on}

  # The default if none of sv, both, vic or svvic are set to "on" is to
  # dump files from the back end machine

  # sv : off means dump files from the back end
  # sv : on  means dump files from pollux
  dump_sv=off
  dump_sublist_sv=${dump_sv:=off}
  sv=${dump_sublist_sv:=off}

  both=off    # both  : on means dump files from pollux and the back end machine
  vic=off     # vic   : on means dump files from Victoria
  svvic=off   # svvic : on means dump files from pollux and Victoria

  # nolist : off means save CFSDATA* files in ~/info
  # nolist : on means do not save CFSDATA* files in ~/info
  dump_sublist_nolist=''
  nolist=${dump_sublist_nolist:=on}

  # besc = on causes a tdumper job to run on the back end (spica/hadar)
  dump_sublist_besc=''
  besc=${dump_sublist_besc:=off}

  # dpalist = on means store DATAPATH information in a local file for use by access/save/delete
  # Since this local file is never deleted, dpalist = on should only be used when access,
  # save or delete are part of a job that runs in a temporary working directory which gets
  # removed after the job completes. Most decks do this, but not all. You have been warned!
  dump_sublist_dpalist=''

  dump_list_CCRNTMP=$dump_sublist_CCRNTMP
  dump_list_RUNPATH=$dump_sublist_RUNPATH
  dump_list_dpalist=$dump_sublist_dpalist
  dump_list_besc=$besc

  # months is set to 12 so that stdout from the cccjob command is minimized
  eval months\=12

  varlist='dump_list_CCRNTMP
           dump_list_RUNPATH
           dump_list_dpalist
           dump_list_besc
           RMTRUNPATH
           BERUNPATH
           JHOME
           JHOME_DATA
           JHOME_RUN
           with_lsarc
           bemach
           months
           crawork
           arclabel
           uxxx
           runid
           noprint
           cfsuser
           masterdir
           shortermdir
           nolist
           vic
           svvic
           both
           sv'
  rm -f dump_defs
  touch dump_defs
  for var in $varlist; do
    eval val=\$$var
    # If this variable is defined add it to the list
    [ -n "$val" ] && echo ${var}=\'$val\' >> dump_defs
  done

  if [ x"$CCRNTMP" != x"$default_CCRNTMP" ]; then
    # If CCRNTMP was changed by the user then pass it on to dump_list
    eval assg=CCRNTMP\=$CCRNTMP
    echo $assg >> dump_defs
  fi

  if [ x"$RUNPATH" != x"$default_RUNPATH" ]; then
    # If RUNPATH was changed by the user then pass it on to dump_list
    eval assg=RUNPATH\=$RUNPATH
    echo $assg >> dump_defs
  fi

  job_string_to_insert="INSERT_JOB_STRING_${runid}_$stamp"
  rm -f $job_string_to_insert
  touch $job_string_to_insert
  for fname in `ls -1 ${fname_defs}*`; do
    cccjob --out=CURR_JOB --job="dump_list=${fname}:s" dump_defs \
      --start="${start_year}:${start_mon}" --stop="${stop_year}:${stop_mon}"
    cat CURR_JOB >> $job_string_to_insert

    echo "====================================="
    echo "$fname"
    cat $fname
    echo "====================================="
  done

  dump_sublist_dry_run=''
  dry_run=${dump_sublist_dry_run:=off}
  if [ x"$dry_run" = "xon" ]; then
    # Simply copy the job string created here to the users tmp dir
    cp $job_string_to_insert ${JHOME:-$HOME}/tmp/dump_list_${runid}_$stamp
  else
    # Insert the job just created into the existing string from which this job came
    # The variable "crawork" must be defined/exported in the current environment
    splice_job_string $job_string_to_insert ||\
      bail "Problem in splice_job_string"
    rm -f $job_string_to_insert
  fi

  # ---Stop_submit_ignore_code----

end_of_script

 . endjcl.cdk

#end_of_job
