#!/usr/bin/env python
"""
    This is a tool for archiving the setup and source code directories for a given run.

    .. note::

        this tool was written so that it could work on all our machines. As such, 
        some "more recent" features of python haven't been used. These considerations are:
            - subprocess.Popen was used instead of subprocess.check_output
            - subprocess.Popen(['which','hpcarchive']) was used instead of 
              distutils.spawn.find_executable('hpcarchive') or shutils.which('hpcarchive')
            - .format statements use indices when identifying fields.. i.e. "{0}".format('woo')
              instead of simply "{}".format('woo')
            - instead of simply using with tarfile.open(...) as tar:, we had to use
              with contextlib.closing(tarfile.open(...)) as tar.

"""
from __future__ import print_function # required to print to stderr
from contextlib import closing
import traceback
import glob
import argparse
import json
import shutil
import re
import os
import subprocess as sp
import sys
import distutils.spawn
import getpass
import datetime
import tarfile

def get_writeable_projects():
    """
        Get list of writeable hpcarchive projects for the current user.

        Returns
        -------
            projects : list of str
                List of writeable projects.
    """
    proc        = sp.Popen(['hpcarchive','-w'],stdout=sp.PIPE,stderr=sp.PIPE)
    output,err  = proc.communicate()
    projects    = [ proj.strip() for proj in output.split('-')[1:] ] 
    return projects

def process_args(raw_args):
    """ 
        Process raw argument list and produce a dictionary of processed args,
        then check for argument validity, depending on the flags given.

        .. note::
            
            Execution is terminated via ``sys.exit(1)`` if any errors in the 
            arguments or environment is encountered.

        Parameters
        ----------
            raw_args : ``argparse.Namespace`` object
                A namespace object that contains the arguments given at the command line.

        Returns
        -------
            args : dict
                Dictionary of process arguments

    """
    args = { 'runid' : raw_args.RUNID, 'extract' : raw_args.extract }

    # Check that the user has access to hpcarchive
    cmd         = ['which','hpcarchive']
    proc        = sp.Popen(cmd,stdout=sp.PIPE,stderr=sp.PIPE)
    output,err  = proc.communicate()
    if not proc.returncode == 0:
        print("hpcarchive not accessible! Aborting ...",file=sys.stderr)
        sys.exit(1)

    #==============
    # set arguments
    #==============
    args['delete'] = raw_args.delete

    # archive project
    args['archproj'] = raw_args.archproj

    # setup directory
    if raw_args.stpdir: 
        args['setup_dir'] = os.path.abspath(raw_args.stpdir)
    else:
        args['setup_dir'] = os.path.join(os.getcwd(), raw_args.RUNID)

    # archive name
    if raw_args.archname:
        args['archname'] = raw_args.archname
    else:
        if args['extract']:
            # set regular expression that will be used to look for archives
            # Note: this gets changed to an actual archive name is 'check_extract_args'
            args['archname'] = '{0}_.*_archtool'.format(args['runid'])
        else:
            # specify name of archive being created
            user = getpass.getuser() 
            date = datetime.date.today().strftime("%Y%m%d") 
            args['archname'] = "{0}_{1}_{2}_archtool".format(args['runid'],user,date)

    #========================
    # Perform validity checks
    #========================
    if args['extract']:
        check_extract_args(args)
    else:
        check_archive_args(args)
    return args

def check_archive_args(args):
    """ 
        Check that the given argument dictionary will work for archiving,
        exiting with a non-zero exit status if not.

        .. note::
            
            Execution is terminated via ``sys.exit(1)`` if any errors in the 
            arguments are encountered.

        Parameters
        ----------
            args : dict
                A dictionary that contains the arguments
    """
    
    # make sure the specified setup directory exists
    if not os.path.isdir(args['setup_dir']):
        err_txt  = "Run directory {0} doesn't exist!\n".format(args['setup_dir'])
        err_txt += "Please specify a valid path."
        print(err_txt,file=sys.stderr)
        sys.exit(1)

    # check if the given archive name already exists
    hpcarch_cmd = "hpcarchive -Lj -p {0} -c {1}".format(args['archproj'],args['archname']).split()
    prc         = sp.Popen(hpcarch_cmd,stdout=sp.PIPE,stderr=sp.PIPE)
    tmp_otp,err = prc.communicate()
    if not prc.returncode == 0:
        print("An error occurred while inspecting archive names!",file=sys.stderr)
        print("hpcarchive error:",file=sys.stderr)
        print(err,file=sys.stderr)
        sys.exit(1)

    otp = json.loads(tmp_otp)['results']
    if otp:
        print("{0} already exists! Specify a new name for your archive or delete the existing one!".format(args['archname']),file=sys.stderr)
        sys.exit(1)

    # check that the user can write to the given project
    writeable_projects = get_writeable_projects()
    if not args['archproj'] in writeable_projects:
        print("Either you do not have write access to hpcarchive project {0}".format(args['archproj']),file=sys.stderr)
        print("or the project does not exist!",file=sys.stderr)
        print("Please specify a valid project. Aborting..",file=sys.stderr)
        sys.exit(1)

def check_extract_args(args):
    """
        Check that the given argument dictionary will work for archiving,
        exiting with a non-zero exit status if not.

        .. note::
            
            Execution is terminated via ``sys.exit(1)`` if any errors in the 
            arguments are encountered.

        Parameters
        ----------
            args : dict
                A dictionary that contains the arguments.

            .. note::
                
                the 'archname' field gets modified if this function is 
                successful.
    """

    # make sure that directory doesn't exist
    if os.path.isdir(args['setup_dir']):
        err_txt  = "{0} already exists!\n".format(args['setup_dir'])
        err_txt += "Please clean this directory or specify a different one with --stpdir"
        err_txt += " before attempting to extract an archive."
        print(err_txt,file=sys.stderr)
        sys.exit(1)

    # check if archive exists, and if multiple do, pick most recent
    hpcarch_cmd = "hpcarchive -Lxj -p {0} -c ^{1}$".format(args['archproj'],args['archname']).split()
    prc         = sp.Popen(hpcarch_cmd,stdout=sp.PIPE,stderr=sp.PIPE)
    tmp_otp,err = prc.communicate()
    if not prc.returncode == 0:
        print("A problem occured when searching for archives in hpcarchive")
        print("hpcarchive output:")
        print(err)
        sys.exit(1)

    otp = json.loads(tmp_otp)['results']
    archive_ind = 0
    if len(otp) > 1:
        print("Multiple archives found matching the regex pattern {0}".format(args['archname']))
        print("Extracting the archive with the most recent 'archive_time'.")
        # define hpcarchive date format
        fmt = "%Y-%m-%dT%H:%M:%S.%f"

        # get most recent entry
        mostrecent_archive_time = datetime.datetime.strptime(otp[0]['file']['archive_time'], fmt)
        for i,entry in enumerate(otp):
            arch_time = datetime.datetime.strptime(entry['file']['archive_time'], fmt)
            if arch_time > mostrecent_archive_time:
                mostrecent_archive_time = arch_time
                archive_ind = i
    elif len(otp) == 0:
        print("\nNo archives found in project {0} matching the expected archive name : {1}".format(args['archproj'],args['archname']),file=sys.stderr)
        print("If you wish to look in a different project, use the '--archproj' flag to specify it.\n",file=sys.stderr)
        sys.exit(1)

    # assign archive name 
    args['archname'] = otp[archive_ind]['archivename']

def get_ccrnsrc(setup_dir, env_setup_file='env_setup_file'):
    """
        Find the environment setup file in the setup directory and grap the value of
        CCRNSRC, which defines where the 'code' directory lives.

        Parameters
        ----------
            setup_dir : str
                path to the setup directory, which contains the environment setup file
            env_setup_file : str **optional**
                name of the environment setup file. Defaults to 'env_setup_file'

        Returns
        -------
            ccrnsrc : str
                path to the code/source directory, which is set by CCRNSRC
    """
    pth_to_file = os.path.join(setup_dir,env_setup_file)

    # pull out CCRNSRC definition line
    with open(pth_to_file,'r') as f:
        for l in f.readlines():
            if re.match(".*CCRNSRC\s*=",l): 
                tmp_line = l.strip()
                break
    
    # extract the actual CCRNSRC definition
    for cmd in tmp_line.split(";"):
        if re.match("\s*CCRNSRC\s*=",cmd): break
    ccrnsrc = cmd.split("=")[-1].strip()
    return ccrnsrc

def pad_archive_files(files, minimum_mb=600.0, pad_fname='PAD_FILE'):
    """
        Take in a list of files and check if a padding file is required in order to be 
        archived on hpcarchive, which has an archive size minimum.

        Parameters
        ----------
            files : list of str
                list of files for which the total size is being checked
            minimum_mb : float **optional**
                minimum megabyte requirement. Defaults to 600.
            padd_fname : str **optional**
                name of padding file. Defaults to 'PAD_FILE'.

        .. note::

            It is not immediately clear how hpcarchive determines filesize, as calculating
            ``total_size`` below leads to a smaller size then that returned from hpcarchive.
            As a result, it is recommended that you use a value for ``minimum_mb`` that is 
            higher than than the hpcarchive limit.
                
        Returns
        -------
            files : list of str
                the padded file list, with the combined size meeting the minimum requirement.
    """
    bytes2megabytes = 1.0e-6

    # calculate required padding file size, in mb
    total_size = float(sum( [ os.path.getsize(f) for f in files ] ))*bytes2megabytes
    req_padd_size = minimum_mb - total_size

    if req_padd_size > 0:
        print("Padding required. Adding padding file with size {0} mb".format(req_padd_size))

        # convert to an even number of bytes and write file
        num_chars = int(round(req_padd_size/bytes2megabytes))
        with open(pad_fname,'w') as f:
            f.write('0'*num_chars)

        # add to filelist
        files.append(pad_fname)
    else:
        print("No padding required. Minimum size met.")
    return files

def archive_rundirs(runid, setup_dir, archname, archproj, delete=False, pad_fname='PAD_FILE'):
    """ 
        Archive the given setup directory, and its associated source/code directory, deleting
        the archived directories once completed, if desired.

        Parameters
        ----------
            runid : str
                runid for which to archive its setup and source/code directories
            setup_dir : str
                path to the setup directory for the run of interest
            archname : str
                name of resulting hpcarchive entry
            archproj : str
                name of the hpcarhive project to archive to
            delete : bool **optional**
                if True, delete the directories after successfully archiving them. 
                Defaults to ``False``.
            padd_fname : str **optional**
                defines the name of a 'padding file' that is created if it is required
                to meet hpcarchive's minimum archive size. Defaults to 'PAD_FILE'.
    """
    # get location of code/source directory
    ccrnsrc_dir = get_ccrnsrc(setup_dir)
    run_src_dir = os.path.dirname(ccrnsrc_dir)

    # tar up setup and source/code directories
    setup_tarf  = "{0}-setup.tar".format(runid)
    source_tarf = "{0}-src.tar".format(runid)
    with closing(tarfile.open(setup_tarf,"w")) as tar:
        tar.add(setup_dir, arcname=os.path.basename(setup_dir))
    with closing(tarfile.open(source_tarf,"w")) as tar:
        tar.add(run_src_dir, arcname=os.path.basename(run_src_dir))
    
    # check size of tar files and pad archive files if necessary
    files = [ setup_tarf, source_tarf ]
    files = pad_archive_files(files,pad_fname=pad_fname) 

    # archive on hpcarchive
    hpcarch_cmd = ["hpcarchive","-a"] + files + ["-p",archproj,"-c",archname]
    prc         = sp.Popen(hpcarch_cmd,stdout=sp.PIPE,stderr=sp.PIPE)
    tmp_otp,err = prc.communicate()
    
    # clean temporary files and then check return status
    for f in files: os.remove(f)
    if not prc.returncode == 0:
        print("A problem occured when trying to archive the run directories")
        print("hpcarhive error:")
        print(err)
        sys.exit(1)
    else:
        print("Successfully archived run directories for {0} in archive {1}".format(runid,archname))
    
    # removed run directories if desired
    if delete:
        shutil.rmtree(setup_dir)
        shutil.rmtree(run_src_dir)

def extract_rundirs(runid, setup_dir, archname, archproj, pad_fname='PAD_FILE'):
    """
       Extract the given run directory archive, and restoring the setup directory to the 
       desired location, sending the source/code directory to its original location, and
       recreate the necessary directories on the $RUNPATH of all machines.

        Parameters
        ----------
            runid : str
                runid of interest
            setup_dir : str
                path defining where to place the setup directory
            archname : str
                name of of the archive containing the desired run directories
            archproj : str
                name of the hpcarhive project containing ``archname``
            padd_fname : str **optional**
                defines the name of a 'padding file' that may be included in the archive.
                This gets deleted after extraction. Defaults to 'PAD_FILE'.
    """
    # create scratch space and navigate into
    cwd     = os.getcwd()
    tmpdir  = 'arch-tool-tmp'
    os.mkdir(tmpdir)
    os.chdir(tmpdir)

    # extract contents of archname into scratch space
    hpcarch_cmd = "hpcarchive -r . -c {0} -p {1}".format(archname,archproj).split()
    prc         = sp.Popen(hpcarch_cmd,stdout=sp.PIPE,stderr=sp.PIPE)
    tmp_otp,err = prc.communicate()
    
    if not prc.returncode == 0:
        print("A problem occured when trying to extract {0} from {1}".format(archname,archproj),file=sys.stderr)
        print(err,file=sys.stderr)
        sys.exit(1)
    else:
        print("{0} extracted".format(archname))

    # store list of current files to allow for finding new files as they are extracted
    filelist = os.listdir('.')

    # extract setup directory
    setup_tarf = glob.glob("*-setup.tar")[0]
    with closing(tarfile.open(setup_tarf,"r")) as f:
        f.extractall()

    # get name of extracted directory and extract the CCRNSRC location
    extrct_setup_dir = [ d for d in os.listdir('.') if d not in filelist ][0]
    ccrnsrc_dir = get_ccrnsrc(extrct_setup_dir)
    run_src_dir = os.path.dirname(ccrnsrc_dir)

    # move the extracted setup directory to the desired location
    try:
        shutil.move(extrct_setup_dir, setup_dir)
        print("Setup directory extracted to {0}".format(setup_dir))
    except OSError:
        print("Failed to move the extracted setup directory to your specified location",file=sys.stderr)
        print("This is likely because the directory already exists...",file=sys.stderr)
        print(traceback.format_exc(),file=sys.stderr)
        sys.exit(1)
     
    # extract source/code directory
    src_tarf = glob.glob("*-src.tar")[0]
    with closing(tarfile.open(src_tarf,"r")) as f:
        f.extractall()

    # get name of extracted directory and move to desired location
    extrct_run_src_dir = [ d for d in os.listdir('.') if d not in filelist ][0]
    try:
        shutil.move(extrct_run_src_dir, run_src_dir)
        print("Source directory extracted to {0}".format(run_src_dir))
    except OSError:
        print("Failed to move the extracted source directory to the original location:",file=sys.stderr)
        print("\t {0}".format(run_src_dir),file=sys.stderr)
        print("This is likely because the directory already exists...",file=sys.stderr)
        print(traceback.format_exc(),file=sys.stderr)
        sys.exit(1)

    # navigate back into user's cwd and remove scratch space
    os.chdir(cwd)
    shutil.rmtree(tmpdir)

    print("Archive extracted : Check that WRK_DIR in the environment setup file is as expected! ")

if __name__ == '__main__':

    scrpt_name = os.path.basename(__file__)
    
    #====================================
    # Define command line argument parser
    #====================================
    description = ("Either archive to tape, or extract from tape, tar'd setup and source directories"+
                   " for the given RUNID using hpcarchive.")
    parser = argparse.ArgumentParser(description=description)
    
    # REQUIRED
    parser.add_argument("RUNID", action="store", 
                help=("The runid defining what dirs are to be extracted or archived. If '--stpdir' isn't used"+
                      "then {0} will look for, or create, a directory matching given runid in the ".format(scrpt_name)+
                      "current working directory."))

    # OPTIONAL
    parser.add_argument("--stpdir", metavar="SETUP_DIRECTORY", action="store", default=None,
                help=("Path to the setup directory. If not given, assumes the desired directory "+
                      "should be in the users current working directory, with the name of RUNID."))
    parser.add_argument("-d","--delete",action="store_true",default=False,
                help=("When archiving, delete the run directories after operation is successful."+
                      " No effect when extracting."))
    parser.add_argument("-e","--extract",action="store_true",default=False,
                help=("Instead of trying to archive the run directories for the given runid, extract "+
                      "the setup directory, and restore the source/code directory to its original location."))
    parser.add_argument("--archproj", metavar="ARCHIVE_PROJECT", action="store", default='crd_cccma',
                help=("The hpcarchive project to archive to, or containing the desired archive. Defaults to 'crd_cccma' if not given."))
    parser.add_argument("--archname", metavar="ARCHIVE_NAME", action="store", default=None,
                help=("The archive name to look for or create."+
                      " If not used while archiving, defaults to RUNID_USER_DATEOFARCHIVE_archtool."+
                      " If not used while extracting, defaults to looking for archives of the form RUNID_*_archtool"+
                      " and selects the one with the most recent archive date."))

    #==========================================
    # Parse arguments and perform sanity checks
    #==========================================
    raw_args = parser.parse_args()
    args = process_args(raw_args)

    extract = args['extract']
    del args['extract']

    pad_fname = 'PAD_FILE'
    
    #====================
    # Archive or Extract! 
    #====================
    if extract:
        # delete flag has no effect when extracting
        if args['delete']: print("Delete flag ignored when extracting..")
        del args['delete']
        extract_rundirs(pad_fname=pad_fname, **args)
    else:
        archive_rundirs(pad_fname=pad_fname, **args)

