#!/bin/bash
#
#   Usage: fdb [options] action [var=val]
#          action is of the form: keyword [options] [args]
#
# Purpose: Manipulate a CCCma RUNPATH database
#
# Global options:
# All options begin with a dash (-) and must appear before any other command line arg
#   -f  ..force an overwrite of any existing database of the same name
#   -t  ..Emit terse messages when an error is encountered (useful in scripts)
#   -v  ..increase verbosity (multiple "v" options are cumulative)
#   -h  ..display a usage message and exit
#   -x  ..echo all shell commands (used for debugging)
#
# Actions:
#     create              ..create a database (at $DATAPATH_DB), and populate it from a recursive 
#                            scan of $RUNPATH_ROOT, adding sparse entries for all files
#                            ending in three digit extensions.
#      empty              ..create an empty database (at $DATAPATH_DB)
#     detail              ..add user, time, size  information into the database (slow).
#     update              ..update existing database with a recursive scan of RUNPATH_ROOT.
#     clean               ..remove stale entries from datababase (rows where files no longer exist).
#     backup DBBAK        ..create a backup of $DATAPATH_DB into a new database at DBBAK.
#       rows              ..count the number of rows in a database
#       lsrp              ..list all files with a 3 digit numeric extension in RUNPATH_ROOT
#       save FILE         ..save FILE to the database
#     delete FILE [VER]   ..delete FILE from the database
# mdelete [opts] FILELIST ..delete files from the database in space-separated FILELIST. 
#              mdelete options:
#                  -f     ...specify files to delete in an input file, instead of a list.
#     exists FILE [VER]   ..does FILE exist in the database
#       path FILE [VER]   ..get the full pathname to FILE
#    version FILE         ..get the highest version number associated with FILE
#         ls [opts] PATT  ..list files matching PATT that are found in the database
#              ls options:
#                  -h      ...print help message.
#                  -l      ...long listing.
#                  -s      ...sort by size, largest first.
#                  -t      ...sort by time, most recent first.
#                  -r      ...reverse sort order.
#                  -u USER ...list only files owned by USER.
#
# NOTE:
#     The environment variable DATAPATH_DB must be defined and contain the full path
#     to the database being used. If creating a new database, define this variable
#     first. It should be set by default in the CCCma setup.
#
# Definitions:
#    Variables defined on the command line supercede those defined in the environment.
#    This can be useful in certain circumstances to temporarily override an
#    internal variable but should not be used in common practice.
#
# Authors:
#
# Neil Swart and Larry Solheim, Nov 2016
########################################################################

# This upper section parses arguments and does some sanity checks

FULLPATH=`type $0|awk '{print $3}'` # pathname of this script
Runame=`basename $FULLPATH`
usage() {
  err_exit=0
  while getopts e opt; do
    case $opt in
      e) err_exit=1 ;;
    esac
  done
  shift $((OPTIND-1))

  [ -n "$1" ] && echo >&2 "${Runame}:" "$@"
  if [ $terse -eq 0 ]; then
    echo >&2 " "
    sed >&2 -n '/^###/q; s/^#$/# /; s/^ *$/# /; 3,$s/^# //p;' "$FULLPATH"
  fi
  if [ $err_exit -eq 0 ]; then
    exit
  else
    exit 1
  fi
}

# Create time stamp to be used in file names etc
stamp=`date "+%j%H%M%S"$$`

# Set defaults
verbose=1
terse=0

# dbclobber = 1 means overwrite a database if create is invoked and the database file exists
#TODO not yet implimented
dbclobber=0

# process command line options
while getopts fhtvx opt
do
  case $opt in
    f) dbclobber=1 ;;
    t) terse=1 ;;
    v) verbose+=1 ;;
    h) usage  ;;
    x) set -x ;;
    -) shift; break ;; # end of options
    ?) usage -e $USAGE   ;;
  esac
done
shift $((OPTIND-1))
OPTIND=1

# Process any definitions found on the command line
declare -a A_args
for arg in "$@"; do
  case $arg in
      *=*) # add this variable definition to the current environment
           var=$(echo $arg|awk -F\= '{printf "%s",$1}' -)
           val=$(echo "$arg"|awk '{i=index($0,"=")+1;printf "%s",substr($0,i)}' -)
           [ -n "$var" ] && eval ${var}=\"\$val\"  # preserve quoted assignments
           val=$(echo $val|sed 's/^ *//; s/ *$//')  # remove leading and trailing space
           [ -z "$val" ] && { echo "Invalid command line arg --> $arg <-- Empty value."; exit 1; }
           ;;
        *) # Push anything that is not a definition onto A_args
           A_args[${#A_args[*]}]=$arg
           ;;
  esac
done

# Identify the action to be taken along with any optional args
action=${A_args[0]}
[ -z "$action" ] && usage -e "No action was specified on the command line"

# Set an array containing only the action specific args, excluding action keyword
A_opts=("${A_args[@]:1}")

# Verify that action is valid and set action options
case $action in
     create) ;;
      empty) ;;
      clean) ;;
     detail) ;;
     update) ;;
       rows) ;;
       lsrp) ;;
       save) [ -z "${A_opts[0]}" ] &&
                 usage -e "save requires a file name: --> ${A_args[*]} <--"
             ;;
     delete) [ -z "${A_opts[0]}" ] &&
                 usage -e "delete requires a file name: --> ${A_args[*]} <--"
             ;;
     mdelete) [ -z "${A_opts[0]}" ] &&
                 usage -e "mdelete requires file name(s): --> ${A_args[*]} <--"
             ;;

     delpat) [ -z "${A_opts[0]}" ] &&
                 usage -e "delpat requires a file pattern: --> ${A_args[*]} <--"
             ;;
     exists) [ -z "${A_opts[0]}" ] &&
                 usage -e "exists requires a file name: --> ${A_args[*]} <--"
             ;;
       path) [ -z "${A_opts[0]}" ] &&
                 usage -e "path requires a file name: --> ${A_args[*]} <--"
             ;;
    version) [ -z "${A_opts[0]}" ] &&
                 usage -e "version requires a file name: --> ${A_args[*]} <--"
             ;;
         ls) [ -z "${A_opts[0]}" ] &&
                 usage -e "ls requires a file name pattern: --> ${A_args[*]} <--"
             ;;
     backup) [ -z "${A_opts[0]}" ] &&
                 usage -e "backup requires a full path specifying the backup database: --> ${A_args[*]} <--"
             ;;
        *) usage -e "Invalid action found on command line: $@" ;;
esac

# RUNPATH_ROOT must be defined when using the create command
# It is ignored otherwise
# Use the command line or env definition of RUNPATH_ROOT if it was supplied
RUNPATH_ROOT=${RUNPATH_ROOT:=$RUNPATH}
export RUNPATH_ROOT

# DATAPATH_DB contains the full pathname to the database file
# This variable must be defined for any action requested below
# Use the command line or env definition for DATAPATH_DB
# If it was not supplied then issue a fatal error
DATAPATH_DB=${DATAPATH_DB:=''}
# replace colons with spaces
DATAPATH_DB=`echo $DATAPATH_DB | sed 's/:/ /g'`

# use the first element in the database list
DATAPATH_DB=`echo $DATAPATH_DB | cut -f1 -d' '`
export DATAPATH_DB

if [ -z "$DATAPATH_DB" ]; then
  case $action in
    lsrp) # Don't need to know the name of the database
          ;;
       *) # All other actions need the database location
          usage -e "DATAPATH_DB is not defined in the current environment"
          ;;
  esac
fi

#========================================================================================================
# Functions for replicating $DATAPATH with an sqlite database are defined below
export SQL_PRAGMA="PRAGMA synchronous = OFF"
export SQL_TIMEOUT=30000

create_datapath_db() {
    # Creates a new database to hold datapath-like information
    # and creates the table "datapath" with columns: 
    # date, user, filename, fullpath, ver
    #
    # Usage: create_datapath_db database-pathname.db
    #
    # where database-pathname is the full path to the new 
    # database to be created.

    NEW_DATABASE=$1

    [ -z "$NEW_DATABASE" ] &&
      { echo "create_datapath_db: Full pathname to new database required as argument" ; exit 1; }

    [ -e $NEW_DATABASE ] && 
      { echo "create_datapath_db: Database $NEW_DATABASE already exists" ; exit 1; }

    # Create the sqlite database
    local sqlcmds
    sqlcmds="CREATE TABLE datapath
             (date VARCHAR, 
              insert_date VARCHAR, 
              user VARCHAR,
              filename VARCHAR, 
              fullpath VARCHAR,
              ver VARCHAR,
              size VARCHAR,
              checksum VARCHAR,
              CONSTRAINT datapath_pk PRIMARY KEY (filename, ver)
             );"
    local sqlout
    sqlout=$(sqlite3 -init <(echo .timeout ${SQL_TIMEOUT}) $DATAPATH_DB <<< $sqlcmds 2>&1) ||
      { echo "create_datapath_db: Problem creating $DATAPATH_DB"; echo "$sqlout"; exit 1; }

    chmod g+rw $NEW_DATABASE
    chmod g+rw $(dirname $NEW_DATABASE)
    # Populate this new database with info about files from RUNPATH_ROOT
    populate_db_from_runpath
    #ls -al $(dirname $NEW_DATABASE)

    echo "Created new database file $NEW_DATABASE"

}

create_empty_datapath_db() {
    # Creates a new database to hold datapath-like information
    # and creates the table "datapath" with columns: 
    # date, user, filename, fullpath, ver
    #
    # Usage: create_datapath_db database-pathname.db
    #
    # where database-pathname is the full path to the new 
    # database to be created.

    NEW_DATABASE=$1

    [ -z "$NEW_DATABASE" ] &&
      { echo "create_datapath_db: Full pathname to new database required as argument" ; exit 1; }

    [ -e $NEW_DATABASE ] && 
      { echo "create_datapath_db: Database $NEW_DATABASE already exists" ; exit 1; }

    # Create the sqlite database
    local sqlcmds
    sqlcmds="CREATE TABLE datapath
             (date VARCHAR, 
              insert_date VARCHAR, 
              user VARCHAR,
              filename VARCHAR, 
              fullpath VARCHAR,
              ver VARCHAR,
              size VARCHAR,
              checksum VARCHAR,
              CONSTRAINT datapath_pk PRIMARY KEY (filename, ver)
             );"
    local sqlout
    sqlout=$(sqlite3 -init <(echo .timeout ${SQL_TIMEOUT}) $DATAPATH_DB <<< $sqlcmds 2>&1) ||
      { echo "create_datapath_db: Problem creating $DATAPATH_DB"; echo "$sqlout"; exit 1; }

    chmod g+rw $NEW_DATABASE
    chmod g+rw $(dirname $NEW_DATABASE)

    echo "Created new database file $NEW_DATABASE"

}

backup_db(){
    # Creates a copy of an existing database

    BACKUP_DATABASE=$1
    [ -z "$BACKUP_DATABASE" ] &&
      { echo "backup_db: Full pathname to new database required as argument" ; exit 1; }

#    [ -e $BACKUP_DATABASE ] && 
#      { echo "backup_db: Database $BACKUP_DATABASE already exists" ; exit 1; }

    [ -e "$DATAPATH_DB" ] ||
      { echo "backup_db: Database to backup $DATAPATH_DB does not exist" ; exit 1; }


    local sqlcmds
    db_status=$(sqlite3 $DATAPATH_DB "pragma quick_check;" 2>&1) ||
      { echo "backup_db: Problem verifying db integrity"; echo "$db_status"; exit 1; }

    [ "$db_status" = "ok" ] || { echo "WARNING, database $DATAPATH_DB appears corrupt, 
                                       aborting backup!"; exit 1 ; }
    sqlcmds=".backup ${BACKUP_DATABASE}"
    local sqlout
    sqlout=$(sqlite3 -init <(echo .timeout ${SQL_TIMEOUT}) $DATAPATH_DB <<< $sqlcmds 2>&1) ||
      { echo "backup_db: Problem creating backup"; echo "$sqlout"; exit 1; }
}

populate_db_from_runpath() {
    # Populate a database with data for all files with a 3 digit numeric suffix that live
    # in the directory (and recursively all subdirs) pointed to by RUNPATH_ROOT
    # RUNPATH_ROOT must be defined in the invoking environment
    # This can take hours to complete on a typical CCCma RUNPATH_ROOT
    # It takes roughly 90 minutes per 100000 files processed on hadar/spica
    #
    # Usage: populate_db_from_runpath [options]
    # Options:
    #    -u ..update an existing database
    #         If a file exists in the database then do not attemp a save

    # update flags checking if a file exists in the database before calling save
    local update=0

    # process function options
    while getopts :u opt; do
      case $opt in
        u) update=1 ;;
        :) usage -e "populate_db_from_runpath: Option $OPTARG requires and argument" ;;
        ?) usage -e "populate_db_from_runpath: Invalid option $OPTARG" ;;
      esac
    done
    shift $((OPTIND-1))
    OPTIND=1

    # RUNPATH_ROOT is a space separated list of absolute path names to directories
    # that will be searched recursively for files with 3 digit numeric suffixes
    [ -z "$RUNPATH_ROOT" ] && { echo "populate_db_from_runpath: RUNPATH_ROOT must be defined" ; exit 1; }

    local currFile fcount thisfile thisver sqlcmd
    fcount=0
    if [ $update -eq 1 ]; then
      # Check that the file exists in the database
      # If it does then do not attemp to save it again
echo "populate_db_from_runpath: The update option (-u) is not yet implimented"
exit 1
      local not_saved_file=not_saved_$$
      for currFile in $(lsrp); do
        # This assumes there will always be a 3 digit numeric suffix (guaranteed here)
        thisfile=${currFile##*/}
        thisver=${thisfile##*.}
        thisfile=${thisfile%.[0-9][0-9][0-9]}
        # Create a file containing sql commands that will output only files that
        # do not exist in the database when fed to sqlite3
        # sqlcmd="SELECT filename FROM datapath
        #         WHERE filename = '$thisfile' and ver = '$thisver'
        #         ORDER BY ver DESC LIMIT 1 OR OUTPUT fullpath;"
      done
      # Read the resulting file containing non-saved file names
      while read -r currFile; do
        save_to_db -q $currFile
        fcount=$((fcount+1))
      done < $not_saved_file
    else
      # Attempt to save every file
      # save_to_db will emit a non-fatal message if the file already exists in the database
      local sqlcmds

      # tmp files
      tnow=$(date +%Y-%m-%d%H%M%S)
      export tfile=/tmp/create_sqlcmds_${USER}_${tnow}_$$.tmp
      export tfile_lsrp=/tmp/create_lsrp_${USER}_${tnow}_$$.tmp
      export SQL_TIMEOUT 

      printf  "\n python walk \n"

time python - <<END
#!/usr/bin/python

import fnmatch
import os

RUNPATH_ROOT = os.environ['RUNPATH_ROOT']
tfile_lsrp = os.environ['tfile_lsrp']

ofile = open(tfile_lsrp, 'w+')

for RP in RUNPATH_ROOT.split():
    for root, dirs, filenames in os.walk(RP, topdown=True):
        dirs[:] = [d for d in dirs if not d.startswith('JHOME_')]

        for filename in fnmatch.filter(filenames, '*.[0-9][0-9][0-9]'):
            ofile.write(os.path.join(root, filename) + '\n')

ofile.close()
END


      ## sql init
      printf "\n python strings \n"
 
time python - <<END
#!/usr/bin/python
import os
import re

tfile=os.environ['tfile']
tfile_lsrp=os.environ['tfile_lsrp']
SQL_TIMEOUT=os.environ['SQL_TIMEOUT']

outfile = open(tfile, 'w+')

init_string = (".timeout {SQL_TIMEOUT} \n BEGIN TRANSACTION; \n")
init_string = init_string.format(SQL_TIMEOUT=SQL_TIMEOUT)
outfile.write(init_string)

with open(tfile_lsrp, 'r') as infile:
    for line in infile:
        basename = os.path.basename(line)
        filename = os.path.splitext(basename)[0]
        ver = os.path.splitext(basename)[1].replace('.', '')

        if set('[~!@#$%^&*()+{}":;\']+$').intersection(filename):
            continue
 
        s=("INSERT OR IGNORE INTO datapath (insert_date, filename, fullpath, ver) " +  
           "values(strftime('%s', 'now'), '{bn}', '{fullpath}', '{ed}');")

        s=s.format(bn=filename, fullpath=line, ed=ver).replace('\n','')
        outfile.write(s + '\n')

end_string = ("END TRANSACTION; \n")
outfile.write(end_string)

outfile.close()
END

      printf "\n sql \n"

      time sqlite3 $DATAPATH_DB < $tfile ||
                { echo "save_to_db: Problem inserting into database $DATAPATH_DB" ; exit 1 ;  }

    fi
    rm -f $tfile $tfile_lsrp
}

lsrp() {
    # List all files in RUNPATH_ROOT and subdirs that have a 3 digit numeric suffix
    # The resulting list will be sorted with non-unique entries removed
    #
    # Usage: lsrp [options]
    # Options:
    #    -s ..output file base name and version number separated by space (1 file per line)
    #         if -s is not present then the full pathname is output (1 file per line)

    # Output format ID
    local outfmt=0
    export outfmt

    # process function options
    while getopts :s opt; do
      case $opt in
        s) outfmt=1 ;;
        :) usage -e "lsrp: Option $OPTARG requires and argument" ;;
        ?) usage -e "lsrp: Invalid option $OPTARG" ;;
      esac
    done
    shift $((OPTIND-1))
    OPTIND=1

    # RUNPATH_ROOT is a space separated list of absolute path names to directories
    # that will be searched recursively for files with 3 digit numeric suffixes
    [ -z "$RUNPATH_ROOT" ] && { echo "lsrp: RUNPATH_ROOT must be defined" ; exit 1; }

    local currRoot currDir dcount
    dcount=0
    for currRoot in $RUNPATH_ROOT; do
      # Silently ignore non-directory elements found in RUNPATH_ROOT
      [ -d "$currRoot" ] || continue
      for currDir in $(find $currRoot -type d 2>/dev/null); do
        # Silently ignore JHOME directories
        [ x${currDir/\/JHOME_} != x$currDir ] && continue
        dcount=$((dcount+1))
        export currDir
        # Return a list of all files with a 3 digit numeric suffix found in the current dir
        # If a directory cannot be read then nothing will be returned but exit status will be 1
        # All file names that begin with dot "." will be ignored
        perl -e 'opendir(DIR,$ENV{currDir}) or exit 1;
                 @flist = grep { !/^\./ and /\.\d\d\d$/} readdir DIR;
                 closedir(DIR);
                 exit 0 unless scalar(@flist);
                 if ( $ENV{outfmt} == 1 ) {print map {s/\.(\d\d\d)$/ \1/;"$_\n"} @flist}
                 else {print map {"$ENV{currDir}/$_\n"} @flist}'
      done
    done 
}

save_to_db(){
    # Saves information about the user supplied file name to the database
    # defined by the environment variable $DATAPATH_DB
    #
    # usage: save_to_db fullpath-to-file
    #
    # where fullpath-to_file includes the extension, (with version number)
    # All information about the file (user, date, version) is extracted
    # using the fullpath.

    # Check that the database exists
    [ -e "$DATAPATH_DB" ] ||
      { echo "save_to_db: Database $DATAPATH_DB does not exist" ; exit 1; }

    # Process function options
    # Suppress error messages
    local quiet=0
    # Should an error in this function cause fdb to exit with a non-zero status
    local abort_on_error=0
    while getopts :aq opt; do
      case $opt in
        a) abort_on_error=1 ;;
        q) quiet=1 ;;
        :) usage -e "save_to_db: Option $OPTARG requires and argument" ;;
        ?) usage -e "save_to_db: Invalid option $OPTARG" ;;
      esac
    done
    shift $((OPTIND-1))
    OPTIND=1

    local FILE_PATH USER_NAME MTIME cmdout
    local FILE_NAME FILE_VER

    # The full path to the file
    # A missing file name is always a fatal error
    FILE_PATH=$1
    [ -z "$FILE_PATH" ] &&
      { echo "save_to_db: Full pathname to file required as first argument" ; exit 1; }

    # Get information about the file to be stored
    # USER_NAME is the owner of this file
    # MTIME is modification time as seconds since Epoch
    # FILE_SIZE is file size in bytes

    # This requires GNU find which is not available on hadar/spica
    # cmdout=$(find $FILE_PATH -printf '%u %T@ %t' 2>&1)
    cmdout=$(stat $FILE_PATH -c'%U %Y %s' 2>&1)
    # Assume an error if the output from stat begins with "stat:"
    [ x"${cmdout#stat:}" != x"$cmdout" ] &&
        { [[ $quiet = 1 ]] || echo "$cmdout"; [[ $abort_on_error = 1 ]] && exit 1; return; }
    read USER_NAME MTIME FILE_SIZE <<< $cmdout

    FILE_NAME=$(basename ${FILE_PATH})
    [[ $FILE_NAME == *[@'#'$%'&'*=+]* ]] && { echo "save_to_db: File ${FILE_NAME} contains illegal characters, aborting" ; exit 1 ; }
    BASE_NAME="${FILE_NAME%.[0-9][0-9][0-9]}"
    FILE_VER="${FILE_NAME##*.}"
    FILE_FULLPATH=`readlink -f ${FILE_PATH}`

    # Check version number sanity
    # A bad version number is always a fatal error
    [[ $FILE_VER = [0-9][0-9][0-9] ]] ||
      { echo "save_to_db: Invalid version number $FILE_VER"; exit 1 ; }

    # Write file information to the database
    # Note: Attempting to insert a record with filename and ver columns
    # that already exist in the database will result in a non-fatal error
    # When this happens a message is written to stdout (if quiet==0) and the program continues
    local sqlcmds
    sqlcmds="${SQL_PRAGMA}; INSERT INTO datapath (date, insert_date, user, filename, fullpath, ver, size)
             values('$MTIME', strftime('%s', 'now'), '$USER_NAME', '$BASE_NAME',
                    '$FILE_FULLPATH', '$FILE_VER', '$FILE_SIZE');"
    local sqlout
    sqlout=$(sqlite3 -init <(echo .timeout ${SQL_TIMEOUT}) $DATAPATH_DB <<< $sqlcmds 2>&1) ||
      { [ $quiet -eq 1 ] || echo "save_to_db: Problem saving $(basename $FILE_PATH) --- $sqlout";
        [[ $abort_on_error = 1 ]] && exit 1; }
}

del_from_db(){
    # Deletes information from the database defined by the
    #
    # usage: del_from_db [options] filename [ver]
    #
    # filename is the file to delete and ver is the optional version number
    # If no verion is given the highest version number is deleted.
    
    # Check that the database exists
    [ -e $DATAPATH_DB ] ||
      { echo "del_from_db: Database $DATAPATH_DB does not exist" ; exit 1; }

    # Option -a (delete_all=1) means delete all versions of the specified file
    local delete_all=0
    while getopts :a opt; do
      case $opt in
        a) delete_all=1 ;;
        :) usage -e "save_to_db: Option $OPTARG requires and argument" ;;
        ?) usage -e "save_to_db: Invalid option $OPTARG" ;;
      esac
    done
    shift $((OPTIND-1))
    OPTIND=1

    local FILE_NAME FILE_VER

    # Check input filename
    FILE_NAME=$1
    [ -z "$FILE_NAME" ] &&
      { echo "del_from_db: A filename required as the first argument" ; exit 1; }

    # Write the sql command for this transaction
    local sqlcmds
    if [ $delete_all -eq 1 ]; then
      # Deletes all versions of FILE_NAME
      sqlcmds="SELECT count(*) FROM datapath WHERE filename = '${FILE_NAME}';
               DELETE FROM datapath WHERE filename = '${FILE_NAME}';"
    else
      FILE_VER=$2
      if [[ -z "$FILE_VER" ]]; then
          # Delete the entry with the highest version number
          sqlcmds="SELECT count(*) FROM datapath WHERE filename = '${FILE_NAME}' AND
                   ver = (SELECT max(ver) FROM datapath WHERE filename = '${FILE_NAME}');
                   DELETE FROM datapath WHERE filename = '${FILE_NAME}' AND
                   ver = (SELECT max(ver) FROM datapath WHERE filename = '${FILE_NAME}');"
      else
          # Delete the entry for a specific version of this file
          [[ $FILE_VER = [0-9][0-9][0-9] ]] ||
              { echo 'get_fullpath_from_db: The version number is not three digits'; exit 1; }
          sqlcmds="SELECT count(*) FROM datapath WHERE filename = '${FILE_NAME}' and ver = '${FILE_VER}';
                   DELETE FROM datapath WHERE filename = '${FILE_NAME}' and ver = '${FILE_VER}';"
      fi
    fi

    # Delete the selected file or files from the database
    local sqlout
    sqlout=$(sqlite3 -init <(echo .timeout ${SQL_TIMEOUT}) $DATAPATH_DB <<< $sqlcmds 2>&1) ||
      { echo "del_from_db: Problem deleting ${FILE_NAME}.${FILE_VER} from $DATAPATH_DB"; echo "$sqlout"; exit 2; }

    # The function return value is 0 if at least 1 file was deleted and 1 otherwise
    [ -z "$sqlout" ] && return 1
    if [ -z "${sqlout//[0-9 ]}" ]; then
      # Assume the output is an integer
      [ $sqlout -eq 0 ] && return 1
      return 0
    else
      # The output is not an intger
      return 1
    fi
}

delpat(){
# Deletes all files from DB and disk using wildcard patterns.

    # Check input filename
    FILE_NAME=$1
    [ -z "$FILE_NAME" ] &&
      { echo "delpat: A filename required as the first argument" ; exit 1; }

    # replace '*' with '%' and escape '_' (as it is an sql wildcard) for sql query to match wildcard
    SQL_FILE_NAME=$(echo "$FILE_NAME" | sed 's/*/%/g;s/_/\\_/g')
   
    tnow=$(date +%Y-%m-%d%H%M%S)
    export tfile=/tmp/clean_sqlcmds_${USER}_${tnow}_$$.tmp
    export sfile=/tmp/clean_bshcmds_${USER}_${tnow}_$$.tmp
    export tfile_stat=/tmp/clean_stat_${USER}_${tnow}_$$.tmp
    
    sqlite3 $DATAPATH_DB "PRAGMA busy_timeout=${SQL_TIMEOUT}; 
                              SELECT fullpath FROM datapath WHERE filename LIKE '${SQL_FILE_NAME}' ESCAPE '\' AND USER like '${USER}';" > $tfile_stat ||
                                    { echo "delpat: failed " ; exit 1 ; }

python - <<END
#!/usr/bin/python
import os
import subprocess

tfile=os.environ['tfile']
sfile=os.environ['sfile']
tfile_stat=os.environ['tfile_stat']
SQL_TIMEOUT=os.environ['SQL_TIMEOUT']

outfile = open(tfile, 'w+')
bshfile = open(sfile, 'w+')

init_string = (".timeout {SQL_TIMEOUT} \n BEGIN TRANSACTION; \n")
init_string = init_string.format(SQL_TIMEOUT=SQL_TIMEOUT)
outfile.write(init_string)

with open(tfile_stat, 'r') as infile:
    next(infile) # skip the pragma timeout line
    for line in infile:
        line=line.replace('\n', '')
        basename = os.path.basename(line)
        filename = os.path.splitext(basename)[0]
        ver = os.path.splitext(basename)[1].replace('.', '')
       
        s="DELETE from datapath WHERE filename='{BASE_NAME}' AND ver='{FILE_VER}';"
        s=s.format(BASE_NAME=filename, FILE_VER=ver).replace('\n','')
        outfile.write(s + '\n')
       
        sdel="release {LINE}".format(LINE=line)
        print sdel
        bshfile.write(sdel + '\n')
       
end_string = ("END TRANSACTION; \n")
outfile.write(end_string)

outfile.close()
bshfile.close()
END

    if [ -s $sfile ]; then
      read -p "Do you want to proceed with deletion (y/[n])" -n1 -r 
      echo

      if  [[ $REPLY =~ ^[Yy]$ ]] ; then
         local sqlout
         sqlout=$(sqlite3  $DATAPATH_DB < $tfile) ||
                                   { echo "delpat: failed to delete files" ; echo $sqlout; exit 1 ; }

         . $sfile
      else
         echo "Aborting deletion"
      fi
    else
      echo "No files to delete"
    fi

    rm -f $tfile $sfile $tfile_stat
}

mdelete(){
    OPTIND=1         # Reset in case getopts has been used previously in the shell.
    INFILE=0
    while getopts "fh:" opt; do
        case "$opt" in
         h) echo "$lsdoc"|sed 's/^#/   /g'; exit ;;
         f) INFILE=1  # using an input file not a list
            ;;
          *) usage -e "mdelete: Invalid action found on command line: $@" ;;
        esac
    done

    shift $((OPTIND-1))
    [ "$1" = "--" ] && shift

    FILE_NAMES=$@

# Deletes all files from DB and disk using wildcard patterns.

    # Check input filename
    FILE_NAME=$1
    [ -z "$FILE_NAME" ] &&
      { echo "mdelete: A filename required as the first argument" ; exit 1; }

    # replace '*' with '%' for sql query to match wildcard
    #SQL_FILE_NAME=$(echo "$FILE_NAME" | sed 's/*/%/g')
   
    tnow=$(date +%Y-%m-%d%H%M%S)
    export tfile=/tmp/mdel_sqlcmds_${USER}_${tnow}_$$.tmp
    export sfile=/tmp/mdel_bshcmds_${USER}_${tnow}_$$.tmp
    #export tfile_stat=/tmp/mdel_stat_${USER}_${tnow}_$$.tmp
    export infile_list=/tmp/mdel_infilelist_${USER}_${tnow}_$$.tmp

    if [ "$INFILE" == 1 ] ; then
#        echo "using input file $FILE_NAMES"
        cat $FILE_NAMES > $infile_list
    else
#        echo "using list of files: $FILE_NAMES"
        touch $infile_list
        for f in $FILE_NAMES; do echo $f >> $infile_list ; done
    fi



    # Loop over all names provided in the input file.
    
#     sqlite3 $DATAPATH_DB "PRAGMA busy_timeout=${SQL_TIMEOUT}; 
#                               SELECT fullpath FROM datapath WHERE filename LIKE '${SQL_FILE_NAME}' AND USER like '${USER}';" > $tfile_stat ||
#                                     { echo "delpat: failed " ; exit 1 ; }

python - <<END
#!/usr/bin/python
import os
import subprocess

bshfile=os.environ['sfile']
tfile=os.environ['tfile']
infile_list=os.environ['infile_list']
SQL_TIMEOUT=os.environ['SQL_TIMEOUT']

sqldelcmdsfile = open(tfile, 'w+')
sqlliscmdsfile = open(tfile + '-query', 'w+')

init_string = (".timeout {SQL_TIMEOUT} \n BEGIN TRANSACTION; \n")
init_string = init_string.format(SQL_TIMEOUT=SQL_TIMEOUT)
sqldelcmdsfile.write(init_string)

init_string = (".timeout {SQL_TIMEOUT} \n.output {OFILE}\n BEGIN TRANSACTION; \n")
init_string = init_string.format(OFILE=bshfile, SQL_TIMEOUT=SQL_TIMEOUT)
sqlliscmdsfile.write(init_string)

with open(infile_list, 'r') as infile:
#    next(infile) # skip the pragma timeout line
    for line in infile:
        line=line.replace('\n', '')
        filename=line
        s="DELETE from datapath where filename='{FILE_NAME}' AND ver = (SELECT max(ver) FROM datapath WHERE filename='{FILE_NAME}') ;" 
        s=s.format(FILE_NAME=filename).replace('\n','')
        sqldelcmdsfile.write(s + '\n')

        s="select fullpath from datapath where filename='{FILE_NAME}' AND ver = (SELECT max(ver) FROM datapath WHERE filename='{FILE_NAME}') ;" 
        s=s.format(FILE_NAME=filename).replace('\n','')
        sqlliscmdsfile.write(s + '\n')
       
end_string = ("END TRANSACTION; \n")
sqldelcmdsfile.write(end_string)
sqlliscmdsfile.write(end_string)                                                                                                                                                                    
sqldelcmdsfile.close()
sqlliscmdsfile.close()
END

    if [ -s $tfile ]; then
        local sqlout
        # This creates a list of files to rm into a file names $sfile
        sqlout=$(sqlite3  $DATAPATH_DB < ${tfile}-query) ||
                                   { echo "mdelete: failed to delete files" ; echo $sqlout; exit 1 ; }
        [ -s $sfile ] || { echo "mdelete: failed to delete files" ; exit 1 ; } 
        release `cat $sfile` || { echo "mdelete: failed to delete files" ; exit 1 ; }
             
        # This removes those entries from the database
        sqlout=$(sqlite3  $DATAPATH_DB < $tfile) ||
                                   { echo "mdelete: failed to delete files" ; echo $sqlout; exit 1 ; }
    else
      echo "No files to delete"
    fi

    rm -f $tfile ${tfile}-query $sfile $infile_list
}

get_fullpath_from_db() {
    # Return the full path name on stdout given the filename and optionally a version number
    #
    # Usage: get_fullpath_from_db filename [ver]
    #
    # filename is the file to retrieve and ver is the optional version number
    # If no verion is given the highest version number in the database is used.
    #
    # No wildcards allowed since matched must be exact.
    # The fullpath is retrieved from the DATAPATH database
    # at the location defined in $DATAPATH_DB. An error
    # is returned if the file does not exist.

    local FILE_NAME FILE_VER

    # Check that the database exists and is not empty
    [ -s $DATAPATH_DB ] ||
        { echo "get_fullpath_from_db: Database $DATAPATH_DB is missing or empty" ; exit 1; }

    # Check input arguments
    FILE_NAME=$1
    [ -z "$FILE_NAME" ] &&
        { echo "get_fullpath_from_db: A filename is required as the first argument" ; exit 1; }

    # Check optional input of version number
    local sqlcmds
    FILE_VER=$2
    if [[ -z "$FILE_VER" ]]; then
        # Return the path with the highest version number
        sqlcmds="SELECT fullpath FROM datapath WHERE filename = '${FILE_NAME}' ORDER BY ver DESC LIMIT 1;"
    else
        # Return the path to a specific version of this file
        [[ $FILE_VER = [0-9][0-9][0-9] ]] ||
            { echo 'get_fullpath_from_db: The version number is not three digits'; exit 1; }
        sqlcmds="SELECT fullpath FROM datapath WHERE filename = '${FILE_NAME}' AND ver = '${FILE_VER}';"
    fi

    # The query will return an empty string if the file does not exist
    local sqlout
    sqlout=$(sqlite3 -init <(echo .timeout ${SQL_TIMEOUT}) $DATAPATH_DB <<< $sqlcmds 2>&1) ||
        { echo "get_fullpath_from_db: Failure in select fullpath for $FILE_NAME"; echo "$sqlout"; exit 2; }

    # The function return value is 0 if the file exists and 1 otherwise
    # If the file does not exist in the database (sqlout is empty) then nothing will be sent to stdout
    [ -z "$sqlout" ] && return 1
    echo $sqlout
}


exists_in_db() {
    # Returns 1 if a file exists in the database else returns 0
    #
    # Usage: exists_in_db filename [ver]
    #
    # filename is the file to retrieve and ver is the optional version number
    #

    local FILE_NAME FILE_VER

    # Check that the database exists and is not empty
    [ -s $DATAPATH_DB ] || { echo "exists_in_db: Database $DATAPATH_DB is missing or empty" ; exit 1; }

    # Check input arguments
    FILE_NAME=$1
     [ -z "$FILE_NAME" ] &&
       { echo "exists_in_db: A filename is required as the first argument" ; exit 1; }

    # Check optional input of version number
    local sqlcmds
    FILE_VER=${2}
    if [[ -z "$FILE_VER" ]]; then
        # Does any version of this file exist
        sqlcmds="SELECT EXISTS(SELECT 1 FROM datapath WHERE filename = '${FILE_NAME}');"
    else
        # Does a specific version of this file exist
        [[ $FILE_VER = [0-9][0-9][0-9] ]] || { echo 'exists_in_db: version number not three digits'; exit 1 ; }
        sqlcmds="SELECT EXISTS(SELECT 1 FROM datapath WHERE filename = '${FILE_NAME}' and ver = '${FILE_VER}'  );"
    fi

    # Query the database for existence of this file
    # If the query results in an error from sqlite then exit here with a non-zero status
    local sqlout
    sqlout=$(sqlite3 -init <(echo .timeout ${SQL_TIMEOUT}) $DATAPATH_DB <<< $sqlcmds 2>&1) ||
        { echo "exists_in_db: $sqlout"; exit 2; }

    # The function return value is 0 if the file exists and 1 otherwise
    [ x"$sqlout" = x1 ] && return 0
    return 1   
}

get_file_ver_from_db(){
    # Return the highest version on stdout of the file given the filename
    #
    # Usage: get_file_ver_from_db filename
    #
    # No wildcards allowed since matched must be exact.
    # An error is returned if the file does not exist.

    local FILE_NAME FILE_VER

    # Check that the database exists
    [ -e $DATAPATH_DB ] ||
        { echo "get_file_ver_from_db: Database $DATAPATH_DB does not exist" ; exit 1; }

    # Check input arguments
    FILE_NAME=$1
     [ -z "$FILE_NAME" ] &&
       { echo "get_file_ver_from_db: A filename is required as the first argument" ; exit 1; } 

    # Query the database for the version number
    # The query will return an empty string if the file does not exist
    local sqlcmds
    sqlcmds="SELECT ver FROM datapath WHERE filename = '${FILE_NAME}' ORDER BY ver DESC LIMIT 1;"
    local sqlout
    sqlout=$(sqlite3 -init <(echo .timeout ${SQL_TIMEOUT}) $DATAPATH_DB <<< $sqlcmds 2>&1) ||
      { echo "get_file_ver_from_db: Failure in getting version"; echo "$sqlout"; exit 1; }

    # The function return value is 0 if the file exists and 1 otherwise
    # If the file does not exist in the database (sqlout is empty) then nothing will be sent to stdout
    [ -z "$sqlout" ] && return 1
    echo $sqlout
}

lsdb() {
########################################################################
lsdoc='
#   Usage: fdb ls [options] <PATTERN> [-u <USER> ]
#
# Purpose: lists the database of RUNPATH_ROOT (DATAPATH)
#
# Options:
# All options begin with a dash (-) 
#      -h   ...print this help message.
#      -l   ...long listing.
#      -s   ...sort by size, largest first.
#      -t   ...sort by time, most recent first.
#      -r   ...reverse order.
#      -u <USER> ...list only files owned by <USER>.
#
# NOTE:
#
# The environment variable DATAPATH_DB must be defined and contain the full path
# to the database being used. It should be set by default in the CCCma setup.
#
#
# Authors:
#
# Neil Swart, Nov 2016'
########################################################################

    OPTIND=1         # Reset in case getopts has been used previously in the shell.

    order=ver
    direction=DESC
    columns=filename
    width=79
    USER_NAME='%'


    while getopts "hlstru:" opt; do
        case "$opt" in
         h) echo "$lsdoc"|sed 's/^#/   /g'; exit ;;
         l)  columns="datetime(date, 'unixepoch'), user, size, fullpath"
                    width="20 8 6 150"
            ;;
         s)  order=size
            ;;
         t)  order=date
            ;;
         r)  direction=ASC
            ;;
         u) USER_NAME="$OPTARG"
            ;;
          *) usage -e "lsdb: Invalid action found on command line: $@" ;;
        esac
    done

    shift $((OPTIND-1))
    [ "$1" = "--" ] && shift

    PATTERN=$@

    # Check that the database exists
    [ -e "$DATAPATH_DB" ] ||
        { echo "lsdb: Database $DATAPATH_DB does not exist or is not writable" ; exit 1; }

    # Check input arguments
    [ -z "$PATTERN" ] &&
       { echo "lsdb: The pattern is required as first argument" ; exit 1; }
    # replace '*' with '%' for sql query to match wildcard
    SQL_FILE_NAME=$(echo "$PATTERN" | sed 's/*/%/g;s/_/\\_/g')

    [ -z "$USER_NAME" ] &&{ USER_NAME='%' ; }

    local query_line
    query_line="SELECT "${columns}" FROM datapath WHERE filename LIKE '${SQL_FILE_NAME}' ESCAPE '\'
                AND USER like '${USER_NAME}' ORDER BY ${order} ${direction};"

    sparse_db && { query_line="SELECT ${columns} FROM datapath WHERE filename LIKE '${SQL_FILE_NAME}' ESCAPE '\';" ;
                   width=150; }

    local sqlcmds
    sqlcmds=".mode column
.width $width 
.timeout ${SQL_TIMEOUT}

$query_line
"
    local sqlout
    sqlout=$(sqlite3 $DATAPATH_DB <<< "$sqlcmds" 2>&1) ||
          { echo "lsdb: failed to get data"; echo "$sqlout"; exit 1; }

    printf "$sqlout \n"
}

rows_in_db(){
# Returns the number of rows in the database

    # Check that the database exists
    [ -e $DATAPATH_DB ] ||
        { echo "rows_in_db: Database $DATAPATH_DB does not exist" ; exit 1; }

    local sqlcmds
    sqlcmds="SELECT count(*) FROM datapath;"
    local sqlout
    sqlout=$(sqlite3 -init <(echo .timeout ${SQL_TIMEOUT}) $DATAPATH_DB <<< $sqlcmds 2>&1) ||
      { echo "rows_in_db: failed to get rows"; echo "$sqlout"; exit 1; }
    echo $sqlout
}

sparse_db() {
    # Return 0 if database contains sparse rows (no user, size, time).

  local sqlcmds
  sqlcmds="
.timeout ${SQL_TIMEOUT}
SELECT count(*) FROM datapath WHERE user IS NULL OR user = '';"

   empty_cols=$(sqlite3  $DATAPATH_DB <<< "$sqlcmds" 2>&1) ||
                                       { echo "sparse_db: failed "; echo $empty_cols ; exit 1 ; }
    [ "$empty_cols" -gt "0" ] && return 0
    return 1
}

clean_db(){
# Checks if fullpath exists on disk, deletes row if not.

    # Check that the database exists
    [ -e $DATAPATH_DB ] ||
        { echo "$DATAPATH_DB does not exist or is not writable" ; exit 1; }
 
    local sqlcmds
    sqlcmds="PRAGMA busy_timeout=${SQL_TIMEOUT};
             SELECT fullpath FROM datapath ORDER BY date DESC;"

    [ sparse_db ] && sqlcmds="PRAGMA busy_timeout=${SQL_TIMEOUT};
                              SELECT fullpath FROM datapath ORDER BY rowid DESC;"

    tnow=$(date +%Y-%m-%d%H%M%S)
    export tfile=/tmp/clean_sqlcmds_${USER}_${tnow}_$$.tmp
    export tfile_stat=/tmp/clean_stat_${USER}_${tnow}_$$.tmp
    
    sqlite3 $DATAPATH_DB "PRAGMA busy_timeout=${SQL_TIMEOUT}; 
                              SELECT fullpath FROM datapath;" > $tfile_stat ||
                                    { echo "clean_detail_db: failed " ; exit 1 ; }

python - <<END
#!/usr/bin/python
import os
import subprocess

tfile=os.environ['tfile']
tfile_stat=os.environ['tfile_stat']
SQL_TIMEOUT=os.environ['SQL_TIMEOUT']

outfile = open(tfile, 'w+')

init_string = (".timeout {SQL_TIMEOUT} \n BEGIN TRANSACTION; \n")
init_string = init_string.format(SQL_TIMEOUT=SQL_TIMEOUT)
outfile.write(init_string)

with open(tfile_stat, 'r') as infile:
    for line in infile:
       
        line=line.replace('\n', '')
        basename = os.path.basename(line)
        filename = os.path.splitext(basename)[0]
        ver = os.path.splitext(basename)[1].replace('.', '')
       
        if subprocess.call("[ -e " + line + " ]", shell=True):
            s="DELETE from datapath WHERE filename='{BASE_NAME}' AND ver='{FILE_VER}';"
            s=s.format(BASE_NAME=filename, FILE_VER=ver).replace('\n','')
            outfile.write(s + '\n')

end_string = ("END TRANSACTION; \n")
outfile.write(end_string)

outfile.close()
END

    local sqlout
    sqlout=$(sqlite3  $DATAPATH_DB < $tfile) ||
                              { echo "clean_db: failed to clean database" ; echo $sqlout; exit 1 ; }

    rm -f $tfile $tfile_stat
}

add_detail_db(){
# Adds owner, modified time, file size (and optionally sha1 checksum for files)

    # Check that the database exists
    [ -e $DATAPATH_DB ] ||
        { echo "$DATAPATH_DB does not exist or is not writable" ; exit 1; }
 
    #local sqlinit
    #sqlinit="PRAGMA busy_timeout=${SQL_TIMEOUT};"
    # Since this is a very expensive operation, break it into manageable chunks
    chunk_size=10000
    num_sparse=$(sqlite3 $DATAPATH_DB "SELECT count(*) FROM datapath WHERE user IS NULL OR user = '';")
    num_iters=$((num_sparse / ${chunk_size} + 2))

    for i in $(seq 1 $num_iters); do
        tnow=$(date +%Y-%m-%d%H%M%S)
        export tfile=/tmp/detail_sqlcmds_${USER}_${tnow}_$$.tmp
        export tfile_stat=/tmp/detail_stat_${USER}_${tnow}_$$.tmp
        t_db=/tmp/detail_$USER_$tnow

        sqlite3 $DATAPATH_DB "PRAGMA busy_timeout=${SQL_TIMEOUT}; 
                              SELECT fullpath FROM datapath WHERE user 
                              IS NULL OR user = '' LIMIT $chunk_size;" > $tfile_stat ||
                                    { echo "add_detail_db: failed " ; exit 1 ; }

python - <<END
#!/usr/bin/python
import os
import subprocess
from pwd import getpwuid

tfile=os.environ['tfile']
tfile_stat=os.environ['tfile_stat']
SQL_TIMEOUT=os.environ['SQL_TIMEOUT']

outfile = open(tfile, 'w+')

init_string = (".timeout {SQL_TIMEOUT} \n BEGIN TRANSACTION; \n")
init_string = init_string.format(SQL_TIMEOUT=SQL_TIMEOUT)
outfile.write(init_string)

with open(tfile_stat, 'r') as infile:
    for line in infile:
        line=line.replace('\n', '')
        basename = os.path.basename(line)
        filename = os.path.splitext(basename)[0]
        ver = os.path.splitext(basename)[1].replace('.', '')

        if not subprocess.call("[ -e " + line + " ]", shell=True):
            try:
                fstat=os.stat(line)
                USER_NAME=getpwuid(fstat.st_uid).pw_name         
                FILE_SIZE=fstat.st_size
                MTIME=fstat.st_mtime
            except:
                USER_NAME='FAILED'
                FILE_SIZE=999999
                MTIME=0
            
            s=("UPDATE datapath SET date='{MTIME}', user='{USER_NAME}', size='{FILE_SIZE}' " + 
               "WHERE filename='{BASE_NAME}' AND ver='{FILE_VER}';") 
            s=s.format(MTIME=MTIME, USER_NAME=USER_NAME, FILE_SIZE=FILE_SIZE, BASE_NAME=filename, 
                       FILE_VER=ver).replace('\n','')
        else:
            s="DELETE from datapath WHERE filename='{BASE_NAME}' AND ver='{FILE_VER}';"
            s=s.format(BASE_NAME=filename, FILE_VER=ver).replace('\n','')

        outfile.write(s + '\n')

end_string = ("END TRANSACTION; \n")
outfile.write(end_string)

outfile.close()
END

        sqlite3  $DATAPATH_DB < $tfile ||
                              { echo "add_detail_db: failed to add details (size,user, time) to database" ; }

         rm -f $tfile $tfile_stat
     done
}

#========================================================================================================

# Here we actually call the functions above to do the requested action

case $action in
     create) create_datapath_db $DATAPATH_DB ;;
      empty) create_empty_datapath_db $DATAPATH_DB ;;
     update) populate_db_from_runpath "${A_opts[@]}" ;;
     backup) backup_db   "${A_opts[@]}" ;;
       rows) rows_in_db  "${A_opts[@]}";;
       save) save_to_db  "${A_opts[@]}" ;;
     delete) del_from_db "${A_opts[@]}" ;;
    mdelete) mdelete     "${A_opts[@]}" ;;
     delpat) delpat      "${A_opts[@]}" ;;
     exists) exists_in_db "${A_opts[@]}" ;;
       path) get_fullpath_from_db "${A_opts[@]}" ;;
    version) get_file_ver_from_db "${A_opts[@]}" ;;
         ls) lsdb "${A_opts[@]}" ;;
       lsrp) lsrp "${A_opts[@]}" ;;
      clean) clean_db ;;
     detail) add_detail_db ;;
          *) usage -e "Invalid action $action" ;;
esac

