#!/usr/bin/env python
"""
    This tool was created to parse the plethora of log information that can
    come out of pycmor.py. It can be imported and then its functions used as
    desired within other python code, but its main purpose is to be called from
    the command line after a pycmor.py conversion has ran.

    Clint Seinen 2019-03-15
"""
import argparse
import sys
import glob
import os
import re
import json

def summarize_driver_logs(runid,work_dir=os.getcwd(),dlog_prefix='pycmor-driver-log'):
    """
        Opens all driver logs in work_dir associated with the given runid and
        combines the output statistics, printing the results stdout. 
        
        When printing, this function first attempts to print the counts
        associated with the keys in the 'preconv_stat_keys' list below, which
        provide information on how pycmor interpretted the variable tables.
        Then, it prints the counts in 'success_stat_keys', which provide a high level
        summary of the success rate for attempted conversions. After these two
        lists are printed, it is assumed that all remaining keys count
        individual errors that occurred, which are printed last.

        Inputs:
            runid       (str) : the runid of interest.
            work_dir    (str) : (optional) the working directory containing the driver logs. 
                                    Defaults to the working dir.
            dlog_prefix (str) : (optional) the common prefix across all driver logs.
                                    Defaults to 'pycmor-driver-log'.
    """
    preconv_stat_keys = [
                        'no. of variables inspected',
                        'no. vars with undefined diag files',
                        'no. vars with undefined diag files AND w/o CCCma Check'
                        ]
    success_stat_keys = [
                        'Attempted conversions',
                        'vars converted w/o exceptions'
                        ]

    conv_stats = {}

    # get list of tables and their associated driver logs
    tables,dlogs = get_tables(dlog_prefix=dlog_prefix,runid=runid)
    
    # determine conversion stats
    for table,dlog in zip(tables,dlogs):
        # get table stats
        with open(dlog,'r') as f:
            log_txt = f.read()
        try: 
            tabstats_txt = re.search('(?<=(pycmor stats:)).*?(?=(\n---))',log_txt,re.DOTALL).group(0)
        except AttributeError:
            print("WARNING: It appears that pycmor stats weren't written into the {} driver log for {}".format(table,runid))
            print("\t Was the conversion interrupted??")
            print("{} stats won't be included in the summary".format(table))
            continue
        tabstats = parse_tabstats(tabstats_txt)

        # add stats to conversion summary
        for key,cnt in tabstats.iteritems():
            if conv_stats.has_key(key):
                conv_stats[key] += cnt
            else:
                conv_stats[key] = cnt

    # output to stdout
    print('\nConversion Statistics for {}:\n'.format(runid))
    
    # preconversion stats
    for key in preconv_stat_keys:
        cnt = conv_stats.pop(key,None)
        if cnt: 
            print('\t{:<55} : {}'.format(key,cnt))
    print("")

    # success stats
    for key in success_stat_keys: 
        cnt = conv_stats.pop(key,None)
        if cnt: 
            print('\t{:<55} : {}'.format(key,cnt))
            
    # error stats
    print("\n\tExceptions/Errors:")
    for key,cnt in conv_stats.iteritems():
        print('\t\t{:<47} : {}'.format(key,cnt))
    print('\n')

def parse_tabstats(txt):
    """
        Parse the text and return a dictionary of the fields with their associated counts.
        Assumes that the format matches the following format:

        "counter-description1: count
         coutner-description2: count
         multi-line description of
         counter3: count
         ...
        "
        Note lines that lines with a colon and no count will be skipped. i.e. in
        "Errors:
            optional deck pblms: 2
        "
        'Errors:' will be skipped as it is assumed it is a heading in the stats text.

        Inputs:
            txt     (str) : text containing the desired stats.

        Outputs:
            stats:  (dict) : dictionary containing the resulting stat information
    """
    stats = {}

    # strip out tabs, trailing/leading whitespace, and split on lines
    lines = txt.replace("\t","").strip().split("\n")

    # process the lines, removing empty lines and headings, and combining
    #   multiline counter descriptions
    processed_lines = []
    join = False
    for l in lines:
        # skip empty lines
        if not l: continue

        # line has a counter
        if re.match('.*:\s*[0-9]+$',l):
            # combine with text from previous line in lines
            if join:
                processed_lines[-1] += l
                join = False
            # add new entry in processed_lines
            else:
                processed_lines.append(l)
        # heading line or multiline cntr description
        else:
            # heading line
            if re.match('.*:\s*$',l):       
                continue
            # part of multiline description
            else:
                if join:
                    processed_lines[-1] += l
                else:
                    processed_lines.append(l)
                join = True

    # create dictionary and return
    for l in processed_lines:
        split_l = l.split(':')
        key     = split_l[0].strip()
        cnt     = int(split_l[1].strip())
        stats[key] = cnt
    return stats

def summarize_pycmor_logs(table, runid, work_dir=os.getcwd(), pycmor_log_dir='pycmor_logs', 
                            cmor_log_dir='cmor_logs', dlog_prefix='pycmor-driver-log'):
    """
        For the specified table and runid, go through the detailed pycmor logs
        in pycmor_log_dir and print, to stdout, a list of all the variables
        that didn't convert and the reason why. 

        Inputs:
            table           (str) : table name 
            runid           (str) : runid used in the conversion of interest
            work_dir        (str) : (optional) the working directory containing the driver logs and pycmor log dir.
                                        Defaults to the working dir.
            pycmor_log_dir  (str) : (optional) the directory name that contains the detailed pycmor log files.
                                        Defaults to 'pycmor_logs'.
            cmor_log_dir    (str) : (optional) the directory name that contains the detailed log files produced by CMOR.
                                        Defaults to 'cmor_logs'.
            dlog_prefix     (str) : (optional) to common prefix across all driver logs.
                                        Defaults to 'pycmor-driver-log'.
    """
    # Define useful vars
    dlog        = os.path.join(work_dir,"{}_{}_{}.log".format(dlog_prefix,runid,table)) # driver log
    stats_rpatt = '(pycmor stats:.*?\n---)'
    llog_gpatt  = os.path.join(work_dir,pycmor_log_dir,runid,table,"*.log")                   # pattern for list logs
    jlog_gpatt  = os.path.join(work_dir,pycmor_log_dir,runid,table,"*.json")                  # pattern for json logs

    # Create description mapping
    descript_dict = {
                    'not_found'             : 'Input file not found',
                    'nodiag_nocheck'        : 'No CCCma diag file defined, with no CCCma sign off!',
                    'cccma_table_def_errors': 'No CCCma TS var name defined, or unknown modelling realm',
                    'cmor_table_mismatch'   : 'Variable name not found in CMIP6 CMOR tables (are var names consistent?)',
                    'optional_deck_errors'  : 'Failed to run the optional deck. See {}/{}/{}/optional_deck_errors.log for details'.format(pycmor_log_dir,runid,table),
                    'cmor_errors'           : 'CMOR threw an exception. See the associated variable log in {}/{}/{}/ for details.'.format(cmor_log_dir,runid,table),
                    'pycmor_errors'         : 'Halted by pycmor error. See {}/{}/{}/pycmor_errors.log for details'.format(pycmor_log_dir,runid,table)
                    }

    # Confirm conversion completed 
    with open(dlog,'r') as f:
        dlog_txt = f.read()
    if not re.search(stats_rpatt,dlog_txt,re.DOTALL):
        # stats not found -> table_utils.convert_table() was interrupted and failed to write logs/stats
        err_txt = "Conversion interrupted in {} for {}! pycmor failed to write conversion stats!".format(table,runid)
        raise Exception(err_txt)

    # Get detailed log filenames, using json logs if possible and skipping hash delta logs
    jlogs = glob.glob(jlog_gpatt)
    llogs = [ log for log in glob.glob(llog_gpatt) if log.replace(".log",".json") not in jlogs ]
    llogs = [ log for log in llogs if "hash_deltas" not in log ]
    logs  = jlogs + llogs

    # parse logs 
    fail_vars = []
    for log in logs:
        # get log name without prefix, to be used to 
        #   extract failure description
        log_name = os.path.basename(log).replace('.json','').replace('.log','')

        # get list of vars from file
        with open(log,'r') as f:
            if '.json' in log:
                log_dat = json.load(f)
                log_vars = log_dat['vars'].keys()
            else:
                log_vars = [ v.strip() for v in f.readlines() ]

        # get description and add to failed conversion list
        try:
            desc = descript_dict[log_name]
        except KeyError:
            desc = log_name
        for var in log_vars:
            fail_vars.append((var,desc))
    
    print("\nConversion Failures in {} for {}.\n".format(table,runid))
    for var, desc in fail_vars:
        print("\t{:^15} : {}".format(var,desc))
    print("\nNOTE: Any failed variables not found in the above list were skipped in the conversion process because")
    print("      they don't have a CCCma diag file defined AND text was found in the CCCma Check column, which we assume")
    print("      means we aren't providing these vars. If we are providing it, please provide the necessary diag file.\n")

def check_deltas(table, runid, work_dir=os.getcwd(), pycmor_log_dir='pycmor_logs', 
                    fdelta_log='frozen_hash_deltas.log',delta_log='hash_deltas.log'):
    """
        For the specified table and runid, checks for hash deltas and returns a list of vars that experienced any.

        Inputs:
            table           (str) : table name
            runid           (str) : runid used to produce the conversion of interest.
            work_dir        (str) : (optional) the working directory containing the driver logs and pycmor log dir.
                                        Defaults to the working dir.
            pycmor_log_dir  (str) : (optional) the directory name, within work_dir that contains the detailed pycmor logs.
                                        Defaults to 'pycmor_logs'
            fdelta_log      (str) : (optional) the filename for the list of frozen vars that experienced hash updates.
                                        Defaults to 'frozen_hash_deltas.log'
            delta_log       (str) : (optional) the filename for the list of vars that experienced hash updates.
                                        Defaults to 'hash_deltas.log'

        Outputs:
            frozen_deltas       (bool)          : True if frozen hash deltas noted.
            frozen_delta_vars   (lst of strs)   : List of frozen variables that experienced hash deltas.
            deltas              (bool)          : True if any hash deltas noted.
            delta_vars          (lst of str)    : List of variables that experienced hash deltas.
    """
    # construct paths
    fdelta_log_pth  = os.path.join(work_dir,pycmor_log_dir,runid,table,fdelta_log)
    delta_log_pth   = os.path.join(work_dir,pycmor_log_dir,runid,table,delta_log)
    
    # check for updates
    deltas      = False
    delta_vars  = []
    fdeltas     = False
    fdelta_vars = []
    if os.path.isfile(delta_log_pth):
        deltas = True
        with open(delta_log_pth,'r') as f:
            delta_vars = [ v.strip() for v in f.readlines() ]
    if os.path.isfile(fdelta_log_pth):
        fdeltas = True
        with open(fdelta_log_pth,'r') as f:
            fdelta_vars = [ v.strip() for v in f.readlines() ]
    return fdeltas,fdelta_vars,deltas,delta_vars

def get_tables(work_dir=os.getcwd(),dlog_prefix='pycmor-driver-log',runid=None):
    """
        Return the list of tables that produced driver logs within the specified directory. 
        Assumes driver log name format like PREFIX_RUNID*_TABLENAME.log

        Inputs:
            work_dir        (str) : (optional) the working directory containing the driver logs and pycmor log dir.
                                        Defaults to the working dir.
            dlog_prefix     (str) : (optional) to common prefix across all driver logs.
                                        Defaults to 'pycmor-driver-log'.
            runid           (str) : (optional) runid used for the conversion of interest. If not given, 
                                        defaults to inspecting all log files matching dlog_prefix*, else 
                                        it looks for those matching dlog_prefix_runid*.

        Outputs:
            tables  (lst of str) : List of table names
            dlogs   (lst of str) : List of associated driver logs
    """
    # get list of driver log files
    if runid:
        patt = "{}_{}*".format(os.path.join(work_dir,dlog_prefix),runid)
    else:
        patt = "{}*".format(os.path.join(work_dir,dlog_prefix))
    dlogs = glob.glob(patt)

    # parse out table name into new list
    tables = [ dlog.split("_")[-1].replace(".log","") for dlog in dlogs ]
    return tables, dlogs

def get_runids(work_dir=os.getcwd(),dlog_prefix='pycmor-driver-log'):
    """
        Get the runids that were used in the conversion ran in the working directory

        Assumes that the driver log files use the following format: 
            
            "${dlog_prefix}_${runid}_*"

        Inputs
        ------
            work_dir        (str) : (optional) the working directory containing the driver logs and pycmor log dir.
                                        Defaults to the working dir.
            dlog_prefix     (str) : (optional) to common prefix across all driver logs.
                                        Defaults to 'pycmor-driver-log'.
        Returns
        -------
            runid (str)

        Raises
        ------
            Exception : if no runids are found within ``work_dir``.
    """
    # get list of driver log files
    patt   = "{}*".format(os.path.join(work_dir,dlog_prefix))
    dlogs  = glob.glob(patt)

    # extract runids from driver log filenames
    runids = [ os.path.basename(dlog).split("_")[1] for dlog in dlogs ]

    # get unique runids
    runids = list(set(runids))

    # check number of runids
    if len(runids) < 1:
        err_strg  = "No conversion logs were found with the prefix {} in {}!\n".format(dlog_prefix,work_dir)
        err_strg += "Are you sure this is a conversion directory?\n"
        raise Exception(err_strg)

    return runids

if __name__ == '__main__':
    #===========================
    # define command line parser
    #===========================
    description = ("Provide summary information to the user from the large amount of log files "+
                   "that come from pycmor. This tool assumes that it is being ran in a directory "+
                   "containing log files associated with pycmor conversions.")

    parser = argparse.ArgumentParser(description=description)
    parser.add_argument('runid', nargs='?', default=None,
                        help=("the runid used in the conversion of interest. If not given, log parse")+
                             (" attempts to provide summary information for all runids used for conversions")+
                             (" in the current working dirctory."))

    # define flag specifying how to parse the log files
    group  = parser.add_mutually_exclusive_group(required=True)
    group.add_argument('-c', "--conversion", action="store_true", 
                        help="provide summary of entire conversion, assumed to produced the logs in this directory")
    group.add_argument('-t', "--table", metavar="TABLE", action="store",
                        help="provide summary of conversion for a single table")
    group.add_argument('-d', "--check_deltas", metavar="TABLE", action="store", 
                        help=("check for hash deltas for a given table or all tables converted in the current working "+
                              "directory, returning a non-zero exit status if "+
                              "deltas are noted for a frozen/published variable. Use '-d all' to check all tables."))
    args=parser.parse_args()

    # assume there are no frozen updates to start program
    frozen_updates = False

    if not args.runid:
        # user didn't provide a runid. provide a summary for all conversions in working dir
        runids = get_runids()
    else:
        # only provide summary information for specified runid
        runids = [args.runid]

    for runid in runids:
        if args.conversion:
            # summarize driver logs from entire conversion
            summarize_driver_logs(runid)
        elif args.table:
            # summarize pycmor_logs for specific table
            summarize_pycmor_logs(args.table,runid)
        elif args.check_deltas:    
            # provide information on hash updates
            delta_dict  = {
                            'all':    {},
                            'frozen': {}
                          } 
            if args.check_deltas == "all":
                tables,_ = get_tables(runid=runid)
            else:
                tables = [args.check_deltas]

            # loop over desired tables and check for hash deltas, storing information in delta_dict
            for tbl in tables:
                fdeltas,fdelta_vars,deltas,delta_vars = check_deltas(tbl,runid)
                if deltas:
                    delta_dict['all'][tbl] = delta_vars
                if fdeltas:
                    delta_dict['frozen'][tbl] = fdelta_vars
            
            # output delta summaries
            #   -- general hash updates
            updates = False
            if delta_dict['all']:
                updates = True
                for tbl,var_list in delta_dict['all'].iteritems():
                    print("Hash updates in {} for {}\n".format(tbl,runid))

                    # if only one table considered, print out updated variables
                    if len(tables) == 1:
                        print("\tUpdated Vars:")
                        for v in var_list:
                            print("\t\t{}".format(v)) 
                        print("")

            if delta_dict['frozen']:
                updates = True
                frozen_updates = True
                for tbl,var_list in delta_dict['frozen'].iteritems():
                    print("**************************************")
                    print("WARNING: Frozen hash updates in {} for {}".format(tbl,runid))
                    print("**************************************\n")

                    # if only one table considered, print out updated variables
                    if len(tables) == 1:
                        print("\tUpdated Frozen Vars:")
                        for v in var_list:
                            print("\t\t{}".format(v))
                        print("")

            if not updates:
                if args.check_deltas == "all":
                    output = "No hash updates from conversion for {}\n".format(runid)
                else:
                    output = "No hash updates in {} for {}\n".format(args.check_deltas,runid)
                print(output)

    # if frozen updates were encountered for any runid, exit with non-zero exit status 
    if frozen_updates: sys.exit(1)