#!/usr/bin/env python
import os 
import subprocess
import shutil
import re
import json
import fnmatch
import pandas as pd
import time
import numpy as np
import argparse
import time
import sys

def check_esgf(srch_esgf_rpo="git@gitlab.science.gc.ca:rja001/search_esgf.git", silent=True):
    """
        Using the search_esgf tool, create a temporary list of all CCCma datasets on the esgf server,
        storing it in a temporary json file, which is then used to create a list of unique "conversion
        keys"

        Inputs:
            srch_esgf_rpo   (str)   : (optional) repo containing the external search_esgf tool
            silent          (bool)  : (optional) turn off superflous output

        Returns:
            conv_keys   (list of str)   : list of conversion keys
    """
    FNULL           = open(os.devnull,'w')  # used to send output from shell commands to /dev/null
    cwd             = os.getcwd()
    srch_dir        = "conv-key-srch"
    tmp_json_file   = "search_results.json"

    # clone down search_esgf tool
    cmd  = [ 'git','clone', '--branch', 'CLI-update', srch_esgf_rpo ]
    proc = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    stdout,stderr = proc.communicate()
    return_code = proc.returncode
    if not return_code == 0:
        print("Failed to clone 'search_esgf'!")
        raise Exception(stderr)

    # setup a search 
    os.chdir("search_esgf")
    cmd = [ './setup.py', srch_dir ]
    proc = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    stdout,stderr = proc.communicate()
    return_code = proc.returncode
    if not return_code == 0:
        print("Failed to setup esgf search directory")
        raise Exception(stderr)

    # execute search (hopefully this CLI can be cleaned up at a later date)
    os.chdir(os.path.join("searches",srch_dir))
    cmd = "./search.py --search_esgf true --summarize_search_results false --do_local_inventory false --summarize_local_inventory false"
    cmd += " --check_if_already_downloaded false --retrieve_wget_scripts false --prepare_download false --examples false"
    cmd += " --filters institution_id:CCCma --store_params source_id table_id variable"
    cmd = cmd.split()
    proc = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    stdout,stderr = proc.communicate()
    return_code = proc.returncode
    if not return_code == 0:
        print("Failed to run esgf search!")
        raise Exception(stderr)

    # open stored output json file
    with open(tmp_json_file,'r') as f:
        dat = json.load(f)

    # create list of conversion keys
    conv_keys = []
    for dataset in dat:
        source_id   = dat[dataset]['source_id']
        table_id    = dat[dataset]['table_id']
        variable    = dat[dataset]['variable']
        conv_key    = "{}-{}-{}".format(source_id,variable,table_id)
        conv_keys.append(conv_key)
    if not silent:
        print("Found {} CCCma datasets!".format(len(conv_keys)))
        
    # get unique list of conversion keys
    conv_keys = list(set(conv_keys))

    # navigate back to original working directory and clean search_esgf repo
    os.chdir(cwd)
    shutil.rmtree('search_esgf')
    return conv_keys

def set_frozen(conv_keys,stored_hash_file):
    """
        Take in a list of conversion keys and set the frozen status for all matching keys in the given
        hash file to true.

        Inputs:
            conv_keys       (list of strs)  : list of conversion keys, where each entry is "model-varname-CMORtable"
            stored_hash_file    (str)       : path to the stored hash file

        Returns:
            updated_conv_keys   (list of strs)  : list of updated conversion keys
    """
    updated_conv_keys = []

    # get stored data
    with open(stored_hash_file,'r') as f:
        stored_hash_data = json.load(f)

    for conv_key in conv_keys:
        # for every key with the given conv_key, update the status to frozen
        # if not already so
        updated = False
        for key in stored_hash_data.keys():
            if re.match(r"{}-.*".format(conv_key),key) and not stored_hash_data[key]['frozen']:
                stored_hash_data[key]['frozen'] = True
                updated = True
        if updated:
            updated_conv_keys.append(conv_key)

    # update file
    with open(stored_hash_file,'w') as f:
        json.dump(stored_hash_data,f,sort_keys=True,indent=4)

    # return updated list
    return updated_conv_keys

if __name__ == "__main__":
    # define basic command line parser
    description = "Command line interface to inspect what CCCma data has been published on the esgf server"
    parser = argparse.ArgumentParser(description=description)
    parser.add_argument('-l', "--ls_convkeys", action="store_true", default=False,
                            help="List the unique conversion keys for the published datasets")
    parser.add_argument('-u', "--update_frzn_status", action="store_true", default=False,
                            help="Set frozen=true in the stored hash file, for the conversion keys matching the published datasets")
    parser.add_argument('-f', "--hashfile", action="store", 
                            help="REQUIRED with --update-frozen-status. Points to the json file containing the stored hash information")
    parser.add_argument('-q', "--quiet", action="store_true",
                            help="Supress superfluous output.")
    args = parser.parse_args()

    # arg checking
    if args.update_frzn_status and not args.hashfile:
        parser.error("--updt-frzn-status requireds a hashfile!")

    # get conversion keys of CCCma datasets
    conv_keys = check_esgf()

    # list them if desired
    if args.ls_convkeys:
        for conv_key in conv_keys: print(conv_key)

    # update hash file is desired
    if args.update_frzn_status:
        updated_pairs = set_frozen(conv_keys, args.hashfile)
        
        # check for updates
        if updated_pairs:
            if not args.quiet: 
                print("Updated {}! New frozen conversion keys:".format(args.hashfile)) 
                for pair in updated_pairs: print(pair)
            else:
                print(" ".join(updated_pairs))
        else:
            if not args.quiet: print("No updates to {}".format(args.hashfile))

