#!/bin/bash
#
#   Usage: cplstats [letter options] runid [runid ...]
#          At least one runid must appear on the command line
#
# Purpose: Calculate timing stats from cpl_stats files associated with a given runid
#
# Options:
# All options begin with a dash (-) and must appear before any other command line arg
#   -k  ..keep timer specific files created by cplstats on disk (these files are normally deleted)
#   -h  ..display a usage message and exit
#
########################################################################
#
# Larry Solheim ...Aug 2016

FULLPATH=`type $0|awk '{print $3}'` # pathname of this script
Runame=`basename $FULLPATH`
usage() {
  err_exit=0
  while getopts e opt; do
    case $opt in
      e) err_exit=1 ;;
    esac
  done
  shift `expr $OPTIND - 1`

  [ -n "$1" ] && echo >&2 "${Runame}:" "$@"
  echo >&2 " "
  sed >&2 -n '/^###/q; s/^#$/# /; s/^ *$/# /; 3,$s/^# //p;' "$FULLPATH"
  if [ $err_exit -eq 0 ]; then
    exit
  else
    exit 1
  fi
}

# Create time stamp to be used in file names etc
stamp=`date "+%j%H%M%S"$$`

bail(){
  echo "${Runame}: *** ERROR *** $*"
  exit 1
}

# Set defaults
runid_list=''

# keep = 1 means do not remove per timer files created below
# These files are removed by default
keep=0

# process command line options
while getopts hkx opt
do
  case $opt in
    k) keep=1 ;;
    h) usage  ;;
    x) set -x ;;
    -) shift; break ;; # end of options
    ?) usage -e $USAGE   ;;
  esac
done
shift `expr $OPTIND - 1`

# Process the remaining command line args
for arg in "$@"; do
  case $arg in
    *=*) var=$(echo $arg|awk -F\= '{printf "%s",$1}' -)
         val=$(echo "$arg"|awk '{i=index($0,"=")+1;printf "%s",substr($0,i)}' -)
         # add this variable definition to the current environment
         # [ -n "$var" ] && eval ${var}=\"\$val\"  # preserve quoted assignments
         val=$(echo $val|sed 's/^ *//; s/ *$//')  # remove leading and trailing space
         [ -z "$val" ] && bail "Invalid command line arg --> $arg <-- Empty value."
         case $var in
           runid_list) # Append to the list of runids
                        if [ -z "$runid_list" ]; then
                          runid_list="$val"
                        else
                          runid_list+=" $val"
                        fi
                        ;;
                    *) bail "Invalid command line arg --> $arg <-- Unknown variable." ;;
         esac
         ;;
      *) if [ -z "$runid_list" ]; then
           # The first time through assign runid with the user supplied value
           runid_list=$arg
         else
           runid_list+=" $arg"
         fi
         ;;
  esac
done

[ -z "$runid_list" ] && usage -e "No runids were defined"

for runid in $runid_list; do
  # Find full pathnames for all coupler restart files associated with this runid
  # The coupler restart contains stats files named cplrs_cpl_stats
  flist=$(lsdat mc_${runid}_'*'_cplrs.tar | awk '{if($NF<8){print $NF}}') ||
      bail "Problem finding file names."
  [ -z "$flist" ] && { echo "    No files were found for runid $runid"; continue; }

  # The coupler restart contains stats files named cplrs_cpl_stats

  first_file=$(basename $(echo $flist | awk '{print $1}'))
  last_file=$(basename $(echo $flist | awk '{print $NF}'))
  first_file=${first_file%.[0-9][0-9][0-9]}
  last_file=${last_file%.[0-9][0-9][0-9]}

#  echo "first_file=$first_file"
#  echo " last_file=$last_file"

  # Create arrays containing info that will be used below to extract
  # timer specific lines and write them to individual files
  unset timer_name
  unset timer_rex
  unset timer_file
  declare -a timer_name
  declare -a timer_rex
  declare -a timer_file
  for (( idx=1; idx<=13; idx++ )); do
    printf -v sfx "%2.2d" $idx
    fname=${runid}_stats_$sfx
    rm -f $fname; touch $fname
    timer_file[$idx]=$fname
    # An example of a single line from the cpl_stats file is as follows (some whitespace removed)
    #  1  2451-02-01 00:00:00 total count= 1  seconds net= 1817.269  seconds avg= 1817.269

    case $idx in
       1) timer_name[$idx]="total" ;;
       2) timer_name[$idx]="transfer ATM to CPL and prepare OCN fields" ;;
       3) timer_name[$idx]="transfer OCN to CPL and prepare ATM fields" ;;
       4) timer_name[$idx]="transfer CPL to ATM"   ;;
       5) timer_name[$idx]="transfer CPL to OCN"   ;;
       6) timer_name[$idx]="ATM coupling step"     ;;
       7) timer_name[$idx]="OCN coupling step"     ;;
       8) timer_name[$idx]="prepare NEMO fields"   ;;
       9) timer_name[$idx]="prepare ATM fields"    ;;
      10) timer_name[$idx]="transfer ATM to CPL"   ;;
      11) timer_name[$idx]="transfer OCN to CPL"   ;;
      12) timer_name[$idx]="ATM stop to ATM start" ;;
      13) timer_name[$idx]="OCN stop to OCN start" ;;
    esac
    timer_rex[$idx]=":[0-9][0-9]  *${timer_name[$idx]}  *count"
  done

  # Group per timer info in separate files
  nfile=0
  for rs_file in $flist; do
    echo "rs_file = $rs_file"
    # The coupler restart contains stats files named cplrs_cpl_stats
    tar xf $rs_file cplrs_cpl_stats
    stats_file=cplrs_cpl_stats
    echo "stats_file = $stats_file"
    nfile=$((nfile+1))
    # Create files containing time in seconds (one entry per line) for various quantities
    for (( idx=1; idx<=${#timer_rex[@]}; idx++ )); do
      sed -n "/${timer_rex[$idx]}/p" $stats_file >> ${timer_file[$idx]}
    done
  done
  [ $nfile -eq 0 ] && bail "No files were found."
  echo " "
  echo "    $nfile files were found for runid $runid"
  echo "    first file = $first_file"
  echo "    last  file = $last_file"

  # Coupler timer names mapped to terms used in JS Coupler Notes document
  #  ATM coupling step    ==  AGCM + A->C    (e.g. 3.6   sec per cpl time step)
  #  OCN coupling step    ==  OGCM + O->C    (e.g. 1.1   sec per cpl time step)
  #  transfer CPL to ATM  ==  C->A           (e.g. 0.001 sec per cpl time step)
  #  transfer CPL to OCN  ==  C->O           (e.g. 0.08  sec per cpl time step)
  #  prepare ATM fields   ==  O->A remap     (e.g. 0.3   sec per cpl time step)
  #  prepare NEMO fields  ==  A->O remap     (e.g. 0.4   sec per cpl time step)

  # Determine average per coupler time step from these timer specific files
  for (( idx=1; idx<=${#timer_name[@]}; idx++ )); do
    # Ignore empty files
    [ -s ${timer_file[$idx]} ] || { [ $keep -eq 1 ] || rm -f ${timer_file[$idx]}; continue; }
    curr_avg=$(awk 'BEGIN{x=0.0;n=0.0}{x+=$NF;n+=1}END{print x/n}' ${timer_file[$idx]})
    echo "average: ${timer_name[$idx]} = $curr_avg seconds"
    [ $keep -eq 1 ] || rm -f ${timer_file[$idx]}
  done

  if [ $keep -eq 1 ]; then
    echo "Per timer files created for ${runid}:"
    echo "$(ls -l ${timer_file[@]})"
  fi
done
