#!/bin/bash
set -e
# Take in the metadata contained in the given config file and populate the following
# tables to be used in the netcdf converison of model output:
#   - the experiments table
#   - the CMOR controlled vocabulary files (if necessary)
#   - the CCCma variable tables (if necessary)

#=========================
# Define useful functions
#=========================

# Function to bail intelligently
bail_prefix="$(basename $0) ERROR:"
function bail(){
	echo "${bail_prefix}: $*"
	exit 1
}

# fuction to set a temporary git email for commit messages
function set_git_email(){
    # try to get a git email
    GIT_EMAIL=$(git config user.email || : ) 

    # if nothing was returned set it to a default for this local repo
    if [[ -z $GIT_EMAIL ]]; then
        # this is required because when on the BE machs, git will complain
        # because it doesn't like what the auto generated email produces
        git config user.email "$(whoami)@science.gc.ca"
    fi
}

# function to get checked-out branch name of a give repo
function get_branch_name(){
    REPO_DIR=$1
    BRANCH=$(cd $REPO_DIR; git symbolic-ref -q HEAD)    # returns something like 'refs/heads/BRANCHNAME'
    BRANCH=${BRANCH##refs/heads/}                       # strip out refs/heads/
    echo $BRANCH
}

#==============
# Parse CL Args
#==============
for arg in "$@"; do
	case $arg in
        *=*)var=$(echo $arg | awk -F\= '{printf "%s",$1}') 	# split out variable
            val=$(echo $arg | awk -F\= '{printf "%s",$2}') 	# split out variable value
            case $var in
                config) CONFIG_FILE="$val" ;; # file containin configuration params
                srcdir) CCRNSRC="$val" 	   ;; # location of source code
                     *) bail "Invalid command line arg --> $arg <--" ;;
            esac
            ;;
            *) bail "Invalid command line arg --> $arg <--" ;;
    esac
done
[[ -z $CONFIG_FILE ]] && bail "You must provide a config file! ex: config=/path/to/canesm.cfg"
[[ -z $CCRNSRC ]] && bail "CCRNSRC not present in the environment! Source the environment file or set it with 'srcdir' arg"

#===================
# Setup environment
#===================
# get access to required pycmor tools
NCCONV_DIR=${CCRNSRC}/ncconv
export PATH=${NCCONV_DIR}/pycmor:$PATH

# load config file
source $CONFIG_FILE

# make sure conda is available and try to activate the desired pycmor environment
type conda > /dev/null 2>&1 && CONDA_AVAIL=1
if (( CONDA_AVAIL == 1 )) ; then
    source activate ${pycmor_env} || bail "${pycmor_env} not available! Is your environment setup correct?"
else
    bail "conda is not available! Is your environment setup correct?"
fi

# set Hall number
# THIS SHOULD BE REMOVED AT SOME POINT - THE ENVIRONMENT SHOULD CONTAIN THIS INFORMATION
case $(hostname) in
    hpcr3-* | *ppp3* | cs3be* | xc3e* ) HALLN=3 ;;
    hpcr4-* | *ppp4* | cs4be* | xc4e* ) HALLN=4 ;;
    * )
		bail "Could not determine the hall number!"
        ;;
esac

#===============================================
# Check for required meta data and set overrides
#===============================================
# set overrides
[[ -n $override_source_id ]]        && source_id=$override_source_id
[[ -n $override_activity_id ]]      && activity_id=$override_activity_id
[[ -n $override_experiment_id ]]    && experiment_id=$override_experiment_id
[[ -n $override_subexperiment_id ]] && subexperiment_id=$override_subexperiment_id

# check that all necessary fields are set
[[ -z $variant_label ]]         && bail "MODEL VARIANT LABEL (rip number) MUST BE SET!"
[[ -z $source_id ]]             && bail "SOURCE ID MUST BE SET!"
[[ -z $activity_id ]]           && bail "ACTIVITY ID MUST BE SET!"
[[ -z $experiment_id ]]         && bail "EXPERIMENT ID MUST BE SET!"
[[ -z $parent_runid ]]          && bail "PARENT RUNID MUST BE SET!"
[[ -z $parent_branch_time ]]    && bail "PARENT BRANCH TIME MUST BE SET!"

# if subexperiment isn't set, set it to none
[[ -z $subexperiment_id ]] && subexperiment_id="none"

#========================
# Create Experiment Entry
#========================

# define vars pertaining to the experiment table
EXPTAB_DIR=$(dirname $ncconv_exptab)
EXPTAB_FILENAME=$(basename $ncconv_exptab)

# clone experiment table repo, if necessary
if [[ ! -d $EXPTAB_DIR ]]; then
    git clone --branch $ncconv_exptab_cmmt $ncconv_exptab_repo $EXPTAB_DIR 
fi
cd $EXPTAB_DIR

#----------------------------------
# determine branch time information
#----------------------------------
# BTIP => branch time in parent
# BTIC => branch time in child

# branch_time_in_parent
BTIP_yr=${parent_branch_time%_*}
BTIP_mnth=${parent_branch_time#*_m}
BTIP_mnth=${BTIP_mnth##+(0)} # remove leading zeros to allow for easy integer addition         
BTIP_yr=${BTIP_yr##+(0)}
if (( BTIP_mnth == 12 )) ; then
    # roll over to first day of next year
    tmp_yr=$((BTIP_yr+1))
    tmp_yr=$(printf '%04d' "$tmp_yr")       # pad to four digits
    branch_time_in_parent=${tmp_yr}:01:01:00
else
    # roll over to next month
    tmp_yr=$(printf '%04d' "$BTIP_yr")      # pad to four digits
    tmp_mnth=$((BTIP_mnth+1))
    tmp_mnth=$(printf '%02d' "$tmp_mnth")   # pad to two digits
    branch_time_in_parent=${tmp_yr}:${tmp_mnth}:01:00
fi

# branch_time_in_child
BTIC_yr=${run_start_time%:*}
BTIC_yr=$(printf '%04d' "$BTIC_yr")         # pad to four digits
# check if the user gave a month
if [[ $run_start_time == *:* ]]; then
    BTIC_mnth=${run_start_time#*:}
    BTIC_mnth=$(printf '%02d' "$BTIC_mnth") # pad to two digits
else
    # run mustve started from january
    BTIC_mnth=01
fi
branch_time_in_child=${BTIC_yr}:${BTIC_mnth}:01:00

#-----------------------------
# build experiment table entry
#-----------------------------

NEW_ENTRY="| $runid | $activity_id | $source_id | $experiment_id | $subexperiment_id "
NEW_ENTRY=${NEW_ENTRY}"| $variant_label | | $parent_runid | $branch_time_in_parent "
NEW_ENTRY=${NEW_ENTRY}"| $branch_time_in_child | ${run_start_time/:/m}:${run_stop_time/:/m} | $USER | hall${HALLN} | | |"

#-------------------------------------------------------------------------
# If this is a CMIP run confirm that the parent_runid is ALSO in the table
#-------------------------------------------------------------------------

runids=$(awk '{print $2}' $EXPTAB_FILENAME)
if (( CANESM_CMIP == 1 )); then
    PARENT_RUNID_PAT='\b'$parent_runid'\b'
    [[ $runids =~ $PARENT_RUNID_PAT ]] || bail "CANESM_CMIP==1: the parent runid MUST have an entry in the experiment table!"
fi

#----------------------------------------------------------------------------------------------
# check if there is already an entry for this runid, and ask user if they want to update, if so
#----------------------------------------------------------------------------------------------

RUNID_PAT='\b'$runid'\b'
if [[ $runids =~ $RUNID_PAT ]]; then

    # extract old entry, being careful not to match on the parent_runid column
    OLD_ENTRY=$(sed -n "/^| *${RUNID_PAT}/p" $EXPTAB_FILENAME)

    # Parse table headings and entries into arrays, where the '|' is the delimiter
    #   Note: the first entry will be empty because the table starts with a '|'
    IFS='|' read -ra HEADINGS <<< "$(head -n 1 $EXPTAB_FILENAME)"
    IFS='|' read -ra OLD_VALUES <<< "$OLD_ENTRY"
    IFS='|' read -ra NEW_VALUES <<< "$NEW_ENTRY" 

    # check if old entry is equivalent to new entry
    ENTRY_EQUIVALENT="yes"
    for ((i=1;i<${#HEADINGS[@]};++i)); do
        # trim trialing and leading whitespace
        OLD_VALUE=$(echo ${OLD_VALUES[i]} | sed 's/ *$//;s/^ *//') 
        NEW_VALUE=$(echo ${NEW_VALUES[i]} | sed 's/ *$//;s/^ *//') 
        if [[ $NEW_VALUE != $OLD_VALUE ]]; then 
            ENTRY_EQUIVALENT="no"
        fi
    done

    # if entry is different, ask user if they'd like to update it
    if [[ $ENTRY_EQUIVALENT == "no" ]]; then

        # print out old entry
        echo "An existing and different entry for $runid was found in $EXPTAB_FILENAME!"
        echo "Old entry:"
        for ((i=1;i<${#HEADINGS[@]};++i)); do
            # Trim leading/trailing whitespace from HEADING
            HEADING=$(echo ${HEADINGS[i]} | sed 's/ *$//;s/^ *//')
            printf "%30s : %s\n" "$HEADING" "${OLD_VALUES[i]}" 
        done

        # print out new entry and ask user if they wish to replace it
        printf "\nDo you want to replace it with\n"
        for ((i=1;i<${#HEADINGS[@]};++i)); do
            # Trim leading/trailing whitespace from HEADING
            HEADING=$(echo ${HEADINGS[i]} | sed 's/ *$//;s/^ *//')
            printf "%30s : %s\n" "$HEADING" "${NEW_VALUES[i]}" 
        done

        # don't proceed until valid input is provided
        while true; do
            read -r -p "??? [Y/N] " RESPONSE
            case $RESPONSE in
                [yY]) 
                    # replace OLD_ENTRY with NEW_ENTRY
                    sed -i "s#${OLD_ENTRY}#${NEW_ENTRY}#" $EXPTAB_FILENAME
                    ADDED_OR_UPDATED="updated"
                    break
                    ;;
                [nN])
                    # keep OLD_ENTRY
                    echo "Maintaining old entry - update the metadata $CONFIG_FILE !"
                    echo "Halting..."
                    exit 1
                    break
                    ;;
                *)
                    echo "Please respond with [yY/nN]!"
                    ;;
            esac
        done
    fi
else
    # insert entry into experiment table
    echo $NEW_ENTRY >> $EXPTAB_FILENAME
    ADDED_OR_UPDATED="added"
fi

# if entry was added or updated, commit change 
if [[ -n $ADDED_OR_UPDATED ]] ; then
    # if on master create new branch, commit to current one
    CURRENT_BRANCH=$(get_branch_name $EXPTAB_DIR)
    [[ $CURRENT_BRANCH == "master" ]] && git checkout -b $runid 
    
    # add/commit
    set_git_email
    git add $EXPTAB_FILENAME
    git commit -m "$ADDED_OR_UPDATED experiment entry for $runid in $EXPTAB_FILENAME" 
fi

#====================================
# Query and potentially alter the CVs
#====================================

# define locations pertaining to the CVs
suppl_json_pth=${CCRNSRC}/CanESM/CONFIG/COMMON/ncconv_cccma_json_files
CMOR_TABS_DIR=${NCCONV_DIR}/cmor_tools/cmip6-cmor-tables
CMOR_CV_FILE=${CMOR_TABS_DIR}/Tables/CMIP6_CV.json 
cccma_var_file="$suppl_json_pth/cccma_vars.json"

#----------------------------------------------------------------------
# if desired, first add cccma specific variables to the variable tables
#----------------------------------------------------------------------

if (( add_cccma_vars == 1 )) ; then
    # if the given source_id is just an official source id with the runid attached, alter the "true source"
    if [[ "$source_id" == *"-${runid}" ]] ; then
        vartab_source_id=${source_id%-${runid}}
    else
        vartab_source_id=$source_id
    fi
    alter_cmor_tabs add2vartabs $cccma_var_file $vartab_source_id -c $NCCONV_DIR/tables/variable_tables -C $CMOR_TABS_DIR/Tables ||
        bail "Failed to add CCCma variables in $cccma_var_file to the CMOR tables! Is the json syntax correct?"

    # if this is an "official" run, commit changes in the variable table so pycmor can be ran with the '--prod' flag
    if (( CANESM_CMIP == 1 )) ; then
        TMP_PROD_BRANCH="prod-${runid}"

        # update/commit cmor table submodule if necessary
        cd $CMOR_TABS_DIR
        if [[ -n "$(git status --porcelain)" ]]; then
            CV_BRANCH=$(get_branch_name $CMOR_TABS_DIR)
            [[ "$CV_BRANCH" == "$TMP_PROD_BRANCH" ]] || git checkout -b $TMP_PROD_BRANCH
            set_git_email
            git add Tables/*
            git commit -m "added CCCma variables to variable tables"
        fi

        # update/commit main repo if necessary
        cd $NCCONV_DIR
        if [[ -n "$(git status --porcelain)" ]]; then
            NCCONV_BRANCH=$(get_branch_name $NCCONV_DIR)
            [[ "$NCCONV_BRANCH" == "$TMP_PROD_BRANCH" ]] || git checkout -b $TMP_PROD_BRANCH
            set_git_email
            git add tables/variable_tables/*
            git add cmor_tools/cmip6-cmor-tables
            git commit -m "added CCCma variables to variable tables"
        fi
    fi
fi

#--------------------------------------------------------------------------------------------------
# if the experiment entry was added, updated, or equivalent to an older entry, check that its valid
#--------------------------------------------------------------------------------------------------
# Note: we do this for equivalent entries as a user may have entered their experiment info outside of
#       this script

if [[ -n $ADDED_OR_UPDATED ]] || [[ $ENTRY_EQUIVALENT == 'yes' ]]; then 
    # check for source id and experiment id
    check_CVs -q --src $source_id --CV $CMOR_CV_FILE || source_not_present=1 
    check_CVs -q --exp $experiment_id --CV $CMOR_CV_FILE || experiment_not_present=1

    if (( CANESM_CMIP == 1 )) ; then
        # we won't modify the CVs, so source id and experiment id must be present
        (( source_not_present == 1 ))        && bail 'CANESM_CMIP == 1: $source_id MUST BE INCLUDED IN THE OFFICIAL CMOR CVs!'
        (( experiment_not_present == 1 ))    && bail 'CANESM_CMIP == 1: $experiment_id MUST BE INCLUDED IN THE OFFICIAL CMOR CVs!'
    else
        # switch to development CVs, if necessary
        CV_BRANCH=$(get_branch_name $CMOR_TABS_DIR)
        if [[ "$CV_BRANCH" != "cccma" ]]; then
            $(cd $CMOR_TABS_DIR ; git checkout cccma >> /dev/null)
        fi

        # check if the CVs have already been modified
        DIRTY_FILES=$(cd $CMOR_TABS_DIR ; git status --porcelain)
        if [[ -z $DIRTY_FILES ]] ; then
            # CVs haven't been modified - make sure that at least the experiment id or the source id aren't contained in the official tables
            if ! (( source_not_present == 1 || experiment_not_present == 1 )) ; then 
                bail_msg='CANESM_CMIP == 0: either $source_id or $experiment_id must NOT be an entry in the official CMOR CVs!'
                bail_msg="${bail_msg} This is necessary to create unique nc files names unique to those produced from OFFICIAL runs!"
                bail "$bail_msg"
            fi
        fi

        # add CCCma source and experiment ids to CV
        src_id_file=${suppl_json_pth}/cccma_source_ids.json
        exp_id_file=${suppl_json_pth}/cccma_experiments.json
        alter_cmor_tabs add2cv --src $src_id_file --CV $CMOR_CV_FILE || 
            bail "Failed to add CCCma source information in $src_id_file to cmor CVs! Is the json syntax correct?"
        alter_cmor_tabs add2cv --exp $exp_id_file --CV $CMOR_CV_FILE || 
            bail "Failed to add CCCma experiment information in $exp_id_file to cmor CVs! Is the json syntax correct?"

        # confirm that users source id and experiment id are NOW both contained in the CVs
        if (( source_not_present == 1 )) ; then
            # check if the given source id has the runid appended to it, i.e. CanESM5-$runid
            if [[ "$source_id" == *"-${runid}" ]] ; then
                # check if the "true source id" is contained in the CVs, and copy its entry if so
                true_source_id=${source_id%-${runid}}
                check_CVs -q --src $true_source_id --CV $CMOR_CV_FILE && alter_cmor_tabs cpCVent -s $source_id $true_source_id --CV $CMOR_CV_FILE 
            fi

            # check the updated CVs
            check_CVs -q --src $source_id --CV $CMOR_CV_FILE || source_still_not_present=1 
            if (( source_still_not_present == 1 )) ; then
                bail_msg="Unsupported source_id ---> ${source_id} <--- ! source_id must be contained in $src_id_file" 
                bail_msg="${bail_msg} or be one of CanESM5-runid, or CanESM5-CanOE-runid. Note that if you provided"
                bail_msg="${bail_msg} a unique experiment_id, then CanESM5 or CanESM5-CanOE are also valid."
                bail "$bail_msg"
            fi 
        fi
        if (( experiment_not_present == 1 )) ; then
            # check the updated CVs
            check_CVs -q --exp $experiment_id --CV $CMOR_CV_FILE || experiment_still_not_present=1 
            if (( experiment_still_not_present == 1 )) ; then
                bail_msg="Your given experiment_id --> ${experiment_id} <--- is not contained in $exp_id_file!"
                bail_msg="${bail_msg} Add an entry to the json file to allow for netcdf conversion!"
                bail "$bail_msg"
            fi
        fi
    fi

    # bail if user didn't pick supported activity or subexperiment, regardless if this is a CMIP run or not
    check_CVs -q --act $activity_id --CV $CMOR_CV_FILE || bail "$activity_id is not a supported activity_id! See 'canesm.cfg'"
    check_CVs -q --subexp $subexperiment_id --CV $CMOR_CV_FILE || bail "$subexperiment_id is not a supported subexperiment_id! See 'canesm.cfg'"
fi
