#!/bin/ksh -e
#
#  Author: Yanjun Jiao
#   Usage: tso.ncat runid [optional yearlist, ctablist, varslist in any orders]
# Purpose: Split and concatenate NetCDF files into big chunk.
#
# The filenames must have CMIP6 standard format:
# fnamtype=${var}_${ctab}_${modid}_${expid}_${ripid}_gn_yyyy[mmddhh]-yyyy[mmddhh].nc
#----------------------------------------------------------------
# make sure this runid is defined in cmip6_experiments.txt
. $RUNPATH/ncconv/bin/tso.runinfo $@

# limit the maximum output file size to 5GB
maxsize=$((5*1024*1024*1024))   

cd $dirwrk00/$runid/$dircmip6
[[ $ctablist ]] || ctablist=$(ls -I fx -I Ofx -I 3hr)    #todo: time stamp of 3hr are wrong

for ctab in $ctablist; do
  case $ctab in
    # maximum years for each concatenated chunk in different frequencies
     *yr*) freq=yr  ; maxyear2D=200; maxyear3D=10 ;;
    *mon*) freq=mon ; maxyear2D=200; maxyear3D=10 ;;    # histoical runs covers 165 years
    *day*) freq=day ; maxyear2D=200; maxyear3D=10 ;;
    *6hr*) freq=6hr ; maxyear2D=10 ; maxyear3D=1  ;;    # 6hrLev are 3D data on model levels to drive RCM, will be in 1-year chunk
    *3hr*) freq=3hr ; maxyear2D=10 ; maxyear3D=1  ;;    # all 3hr are 2D
    *1hr*) freq=1hr ; maxyear2D=10 ; maxyear3D=1  ;;
  esac

  cd $dirwrk00/$runid/$dircmip6/$ctab
  variable=$(ls )

  for var in $variable; do
    if [[ -z $varslist || " $varslist " =~ " $var " ]]; then
      vers=$(ls -1 $dirwrk00/$runid/$dircmip6/$ctab/$var/gn|tail -1)
      cd $dirwrk00/$runid/$dircmip6/$ctab/$var/gn/$vers
      fnamtype=${var}_${ctab}_${modid}_${expid}_${ripid}_gn_

      if [[ $(ls ${fnamtype}*.nc|wc -l) -gt 1 ]]; then
        #build file list for each chunk
        fnamlist=" "
        for fnam in $(ls ${fnamtype}*.nc); do
          date1=$(basename $fnam .nc|cut -d_ -f7|cut -d\- -f1)
          date2=$(basename $fnam .nc|cut -d_ -f7|cut -d\- -f2)
          fnamlist="$fnamlist $fnam"
        done

        #construct output filename
        outfnam=$(ls ${fnamlist} |head -1)
        outfnam=${outfnam%-*.nc}-${date2}.nc

        # determine the total size of output file     
        outsize=$(ls -l ${fnamlist} |awk -F" " 'BEGIN { sum=0 } { sum+=$5 } END { printf "%d\n", sum }')

        if [[ $outsize -le $maxsize ]]; then #concatenate
          echo "Concatenating ... $mipera/$activity/CCCma/$modid/$expid/$ripid/$ctab/$var/gn/$vers/$outfnam"
          #echo  ${fnamlist} $outfnam 
          ncrcat -h ${fnamlist} $outfnam && rm -f $fnamlist
        else
          echo ">>> ERROR: the total size of output file $outsize is larger than $maxsize "
          echo ">>> Check files in ... $mipera/$activity/CCCma/$modid/$expid/$ripid/$ctab/$var"
          exit
        fi
      fi
    fi
  done    #variable loop
done    #ctablist loop

