#!/bin/bash
set -e
#
# cccjob template for a diagnostics only job string on PPP2
#
#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#
# config file
canesm_cfg_file=canesm.cfg
#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#

# Source the config file
. ${canesm_cfg_file}

# Run code checking and update logs
[ $production -eq 1 ] && flgs="" || flgs="-d"   # use 'development' mode if production is off
strict_check $flgs $runid create-pool-jobstring canesm.cfg make_pool_job_${runid}
[ $? -ne 0 ] && exit 1

# derive year_start and month_start
start_list=(${start_time//:/ })
start_year=${start_list[0]:-1}
start_month=${start_list[1]:-1}

# derive year_stop and month_stop
stop_list=(${stop_time//:/ })
stop_year=${stop_list[0]:-1}
stop_month=${stop_list[1]:-12}

# derive year_rtdiag_start and month_rtdiag_start
start_rtdiag_time=(${start_rtdiag//:/ })
year_rtdiag_start=${start_rtdiag_time[0]:-1}
month_rtdiag_start=${start_rtdiag_time[1]:-1}

# Change any or all of the following as required

# start and stop control the length of this job string
# They are both of the form year:month or just year, where year and month are positive integers
# With the year:month format, at least one of year or month must be defined.
#   -if month is missing then it will default to 1 for start and 12 for stop
#   -if year is missing then it will default to 1 for start and start_year for stop
# With the year only format, start month will be 1 and stop month will be 12
start=2004
stop=2008

# with_pool = 1 means include pooling of diagnostic files
# with_pool = 0 means do not include pooling
with_pool=1

# months=1 would normally be used here ...but it can be reset if required for special cases
months=1
months_gcm=$months_run # keep months_run value as months_gcm (needed for gcmpak.dk)
months_run=1

#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#
#=#=# BEGIN preamble...do not modify
# Internal definitions
Runame=$(basename $0)
# Error exit with message (executed as a simple command)
bail(){
  echo $(date)" --- *EE* ${Runame}: $*"
  exit 1
}
# Issue a warning but do not exit
warn(){
  echo $(date)" --- *WW* ${Runame}: $*"
}
# Emit an informational message
info(){
  echo $(date)" --- *II* ${Runame}: $*"
}
# A pattern to match a string of the form INT:INT (INT a positive integer or zero)
patt1='^[0-9][0-9]*:[0-9][0-9]*$'
# A pattern to match a single positive integer or zero
patt2='^[0-9][0-9]*$'
# Process start or stop parameters (both will contain year:month values)
process_date_param(){
  local pnam pval pyear pmon dyear dmon
  local -a Aval
  # Process a date parameter (currently only start or stop are recognized)
  [ -z "$1" ] && bail "${FUNCNAME[0]} $*
 *EE* ${FUNCNAME[0]} requires a variable name on the command line whose value is of the form year:month, or just year"
  # Verify that the parameter name is valid and set a default year and month
  # for each name specified, to be used when the year or month info is missing.
  # Note that the default stop year depends on the start year and so the start
  # parameter should be processed before the stop parameter.
  case $1 in
    start) dyear=1; dmon=1  ;;             # Expected parameter name
     stop) dyear=${start_year:-1}; dmon=12 # Expected parameter name
           # If start_year is not defined then let the user know
           [ -z "$start_year" ] &&
           info "${FUNCNAME[0]}:
 *II* ${FUNCNAME[0]}: Normally stop is defined after start is defined.
 *II* ${FUNCNAME[0]}: You are currently defining stop before start is defined.
 *II* ${FUNCNAME[0]}: Setting default stop year to $dyear"
           ;;
    *) bail "${FUNCNAME[0]}: Invalid parameter name -->$1<--" ;;
  esac
  pnam=$1
  eval pval=\$$1
  [ -z "$pval" ] && bail "${FUNCNAME[0]}: $pnam must be defined and not null"
  local msg_comm="$pnam should be of the form year:month, or just year, where month and year are positive integers."

  if [[ $pval =~ ":" ]]; then
    # Split on colon
    Aval=(${pval//:/ })
    if [ ${#Aval[*]} == 0 ]; then
      # The array is empty (ie just a single colon in pval)
      bail "${FUNCNAME[0]}: Invalid value for ${pnam}=$pval missing year and month $msg_comm"
    elif [ ${#Aval[*]} == 1 ]; then
      # Only 1 element in Aval, meaning the single colon is either the first or last character
      if [[ ${pval:0:1} = ":" ]]; then
        # The colon is the first character (year is missing and month is present)
        # Assume the default, that is set above, for year
        pyear=$dyear
        pmon=${Aval[0]}
        warn "${FUNCNAME[0]}: $pnam is missing a value for year. $msg_comm Using $pnam year=$pyear $pnam month=$pmon"
      else
        # The colon is the last character (year is present but month is missing)
        # Assume the default, that is set above, for month
        pyear=${Aval[0]}
        pmon=$dmon
        warn "${FUNCNAME[0]}: $pnam is missing a value for month. $msg_comm Using $pnam year=$pyear $pnam month=$pmon"
      fi
    else
      # At least 2 elements in Aval
      pyear=${Aval[0]:-$dyear}
      pmon=${Aval[1]:-$dmon}
    fi
  elif [[ $pval =~ $patt2 ]]; then
    # The parameter value is a positive integer or zero
    # Assume it is the year and set month to the default value defined above
    pyear=$pval
    pmon=$dmon
    # warn "${FUNCNAME[0]}: ${pnam}=$pval is a single integer. Using $pnam year=$pyear $pnam month=$pmon"
  else
    bail "${FUNCNAME[0]}: Invalid value for ${pnam}=$pval  ...$msg_comm"
  fi
  # At this point the local variables pyear and pmon should be defined with
  # values consistent with the value of the user supplied parameter name.
  # Verify that pyear and pmon are integers with reasonable values
  [[ $pyear =~ $patt2 ]] ||
      bail "Invalid ${pnam}=$pval ...The year is not a positive integer or zero. $msg_comm"
  [[ $pyear < 0 ]] &&
      bail "Invalid ${pnam}=$pval ...The year is less that zero. $msg_comm"
  [[ $pmon  =~ $patt2 ]] ||
      bail "Invalid ${pnam}=$pval ...The month is not a positive integer. $msg_comm"
  [ $pmon -lt 1 -o $pmon -gt 12 ]  &&
    bail "Invalid ${pnam}=${pval} ...month=$pmon is out of range. $msg_comm"

  # Format local year and month values
  pyear=`echo $pyear | awk '{printf "%d",$1}'`
  pmon=`echo $pmon | awk '{printf "%02d",$1}'`

  # Define global variables ${pnam}_year, ${pnam}_mon and ${pnam}_time for use elsewhere
  eval ${pnam}_year=$pyear
  eval ${pnam}_mon=$pmon
  Aval[0]=$pyear
  Aval[1]=$pmon
  eval ${pnam}_time=\(${Aval[@]}\)
  eval echo \"${FUNCNAME[0]}: ${pnam}_year=\$pyear  ${pnam}_mon=\$pmon\"
}
#=#=# END preamble
#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#

# Create variables start_year, start_mon and start_time using info in start
# start should be processed before stop so that reasonable defaults can be used for stop
process_date_param start
echo start_year=$start_year start_mon=$start_mon

# Create variables stop_year, stop_mon and stop_time using info in stop
process_date_param stop
echo stop_year=$stop_year stop_mon=$stop_mon

# The chunk_size in months (must correspond to an integer number of years)
chunk_size=`echo $start_year $start_mon $stop_year $stop_mon | awk '{printf "%d",($3-$1)*12+($4-$2)+1}'`
echo chunk_size=$chunk_size

# months for GCM and restart files
last_month=`expr $start_mon + 11` # last month in a year
end_month=`expr $start_mon + $months_run - 1` # last month in $months_run chunk
exe_month=`expr $start_mon + $months - 1` # last month in $months chunk
months_gs="+`seq $start_mon $months_run $last_month | awk '{print ($1-1)%12+1}'|tr '\n' ','`"
months_rs="+`seq $end_month $months_run $last_month | awk '{print ($1-1)%12+1}'|tr '\n' ','`"
months_ab="+`seq $exe_month $months     $last_month | awk '{print ($1-1)%12+1}'|tr '\n' ','`"

echo months_run=$months_run
echo months_gs=$months_gs
echo months_rs=$months_rs
echo months_ab=$months_ab

# Year of the restart files for the first year
if [ $start_mon -eq 1 ] ;then
  start_year_rs=`expr $start_year - 1`
  start_mon_rs=12
else
  start_year_rs=$start_year
  start_mon_rs=`echo $start_mon | awk '{printf "%02d",$1-1}'`
fi
echo start_year_rs=$start_year_rs start_mon_rs=$start_mon_rs

# months=1 would normally be used here ...but it can be reset if required for special cases
months=1
months_gcm=$months_run # keep months_run value as months_gcm (needed for gcmpak.dk)
months_run=1 # reset months_run to 1

#==========================================================================================
# Variables coming in from ${canesm_cfg_file}, modified here
#==========================================================================================

xc2ppp_suffix_list=`echo $xc2ppp_suffix_list | sed "s/+months_rs/${months_rs}/g;s/+months_gs/${months_gs}/g;s/+months_ab/${months_ab}/g"`
xc2ppp0_suffix_list=`echo $xc2ppp0_suffix_list | sed "s/+months_rs/+${start_mon_rs}/g;s/+months_gs/+${start_mon_rs}/g;s/+months_ab/+${start_mon_rs}/g"`

canesm_load_hist_suffix_list=`echo $canesm_load_hist_suffix_list | sed "s/+months_rs/${months_rs}/g;s/+months_gs/${months_gs}/g;s/+months_ab/${months_ab}/g"`
canesm_load_rs0_suffix_list=`echo $canesm_load_rs0_suffix_list | sed "s/+months_rs/+${start_mon_rs}/g;s/+months_gs/+${start_mon_rs}/g;s/+months_ab/+${start_mon_rs}/g"`
canesm_load_rs_suffix_list=`echo $canesm_load_rs_suffix_list | sed "s/+months_rs/${months_rs}/g;s/+months_gs/${months_gs}/g;s/+months_ab/${months_ab}/g"`

# The job created here uses the second character from uxxx to determine the type of
# pooling and/or the prefix for diagnostic and time series files
# Verify that uxxx contains a reasonable value
[ -z "$uxxx" ] && bail "uxxx must be defined"
ch2=$(echo $uxxx|awk '{print substr($1,2,1)}' -)
[ -z "$ch2" ] && bail "Invalid uxxx = $uxxx"
case $ch2 in
  a|m|c|r|d|f) ;; # These are valid
  *) bail "Invalid prefix found in uxxx = $uxxx" ;;
esac

stamp=`date "+%j%H%M%S"$$`
diagjob_sfx=pooljob_`echo ${CMCFEDEST} | cut -f2 -d'-'`_$stamp
jobdefs=jobdefs_pooljob_$stamp
cat >> $jobdefs << end_jobdefs
  uxxx=$uxxx
  runid=$runid
  months=$months
  months_run=$months_run
  months_gcm=$months_gcm
  chunk_size=$chunk_size
  noprint=$noprint
  debug=$debug
  crawork=${runid}_${diagjob_sfx}

  # Pooling/time series realted variables
  diag_uxxx=d$ch2
  pool_uxxx=p$ch2

  # These variables are used by the [ca]pool alias
  load_diag=off
  dump_diag=off
  delete_diag=off
  dump_pooled=off
  delete_pooled=off
  psdelete_leave_last_pool=off
  transfer_pooled=off
  pool_with_cp=off
end_jobdefs

# Define the name of the file to contain the output job string
date1=`echo ${start_time[0]:-1} ${start_time[1]:-1} | awk '{printf "%3.3dm%2.2d",$1,$2}'`
date2=`echo ${stop_time[0]:-1} ${stop_time[1]:-12} | awk '{printf "%3.3dm%2.2d",$1,$2}'`
if [[ ${#start_time[@]} -gt 2 ]]; then
  [ -n "${start_time[2]}" ] && printf -v d1 "d%2.2d" ${start_time[2]} && date1+="$d1"
fi
if [[ ${#stop_time[@]} -gt 2 ]]; then
  [ -n  "${stop_time[2]}" ] && printf -v d2 "d%2.2d" ${stop_time[2]}  && date2+="$d2"
fi

# Pool diagnostic files
pool_flist=tmp_pool_flist_$stamp
dump_list_pool_sv=''
dump_list_pool_arclabel=''
dump_list_pool_rmdskcpy=''
if [ $with_pool -eq 1 ]; then
  # Determine chunk size in years to use with [ca]pool, which
  # will only accept a pooling interval in years
  # This implies that chunk_size (in months) must correspond
  # to an integer number of years
  chunk_size_ok=`echo $chunk_size|awk '{if($1%12==0){print "1"}else{print "0"}}' -`
  [ $chunk_size_ok -eq 0 ] &&
      bail "Chunk size $chunk_size months must be an integer number of years."
  chunk_size_y=`echo $chunk_size|awk '{printf "%d",$1/12}' -`
  if [ $chunk_size_y -gt 1 ]; then
    POOLING=`echo "${ch2}pool:${chunk_size_y}y"|sed 's/ //g'`
  else
    # The ?pool alias will not allow a pooling interval of 1 year or less
    first_djf_on_disk=1
    if [ $first_djf_on_disk -eq 1 ]; then
      # The first DJF should be on disk ... pool all seasons
      POOLING="pool_sea:DMJS pool_ann:${chunk_size}m"
      rm -f $pool_flist
      cat <<EOF >$pool_flist
p${ch2}_${runid}_${start_year}_djf_gp
p${ch2}_${runid}_${start_year}_djf_xp
p${ch2}_${runid}_${start_year}_jja_gp
p${ch2}_${runid}_${start_year}_jja_xp
p${ch2}_${runid}_${start_year}_mam_gp
p${ch2}_${runid}_${start_year}_mam_xp
p${ch2}_${runid}_${start_year}_son_gp
p${ch2}_${runid}_${start_year}_son_xp
p${ch2}_${runid}_${start_year}_ann_gp
p${ch2}_${runid}_${start_year}_ann_xp
EOF
    else
      # Assume the first DJF is not on disk
      POOLING="pool_sea:MJS pool_ann:${chunk_size}m"
      rm -f $pool_flist
      cat <<EOF >$pool_flist
p${ch2}_${runid}_${start_year}_jja_gp
p${ch2}_${runid}_${start_year}_jja_xp
p${ch2}_${runid}_${start_year}_mam_gp
p${ch2}_${runid}_${start_year}_mam_xp
p${ch2}_${runid}_${start_year}_son_gp
p${ch2}_${runid}_${start_year}_son_xp
p${ch2}_${runid}_${start_year}_ann_gp
p${ch2}_${runid}_${start_year}_ann_xp
EOF
    fi
    # DUMPOOL="dump_list_pool=${pool_flist}:${chunk_size}m"
    DUMPOOL=''
    dump_list_pool_arclabel="p${ch2}_${runid}_${chunk_ym_range}_pooled"
    echo "  dump_list_pool_arclabel=$dump_list_pool_arclabel" >> $jobdefs
    # Ensure that the copy of the pooled files on the cfs disk buffer
    # gets deleted as soon as it has been written to tape
    dump_list_pool_rmdskcpy=on
    echo "  dump_list_pool_rmdskcpy=$dump_list_pool_rmdskcpy" >> $jobdefs

    # DELPOOL="del_list_pool=${pool_flist}:${chunk_size}m"
    DELPOOL=''
    POOLING="$POOLING $DUMPOOL $DELPOOL"
    # pool_uxxx must be set explicitly for the case of pooling 1 year or less
    pool_uxxx="p$ch2"
    echo "  pool_uxxx=$pool_uxxx" >> $jobdefs
  fi
else
  POOLING=''
fi

# Job description
JOBDESC="$POOLING"
echo JOBDESC=$JOBDESC

# Create the complete job string
range="${date1}_${date2}"
fout="${runid}_${range}_pool_job"
cccjob --out=$fout --job="$JOBDESC" --start=$start --stop=$stop $jobdefs

rm -f $jobdefs $pool_flist
