#! /bin/sh

#    Jun 07/2018 - F.Majaess

#id archive - archives file(s) onto the archive server.

#   AUTHOR  - F.Majaess

#hd PURPOSE - "archive" code is used to handle creating subdirectory
#hd           to archive via "hpcarchive" with collected individual 
#hd           files in it.
#hd           Note: The script is not intended to be called directly by 
#hd                 the user, instead it's to be called by higher level
#hd                 scripts.
#hd                 Current default is to calculate md5sum checksum for 
#hd                 archived files. This can be altered via "nochksum" 
#hd                 option.

#pr PARAMETERS:
#pr
#pr   POSITIONAL
#pr
#pr     f1 ... fn  = list of n file names to be archived.
#pr                  (filenames may contain path information)
#pr
#pr   PRIMARY
#pr
#pr       arclabel = A substring prefix to use in the "archive name" of the
#pr                  constructed subdirectory.
#pr                  If it is not specified, the "archive name" will
#pr                  be constructed by incorporating the maximum number of 
#pr                  shared fields (if applicable) among the individual 
#pr                  filenames, where the fields are identified based on 
#pr                  '_' separator between them.
#pr
#pr   SECONDARY
#pr
#pr        shorterm= switch used to target the short term "archive project"
#pr                  for archiving the files on the archive server.
#pr                  (=no/yes)
#pr
#pr        nochksum= optional switch to alter default md5sum checksum
#pr                  creation.
#pr                  (=no/yes)

#ex EXAMPLE:
#ex   
#ex     archive filea.009 /path/fileb.005 filen.001 filen.002 filex*
#ex  
#ex     The above will results in collecting targeted files into a 
#ex     subdirectory which will be archived via "hpcarchive" on the archive 
#ex     server.

#  * Reset field separators (otherwise those defined in the parent process
#  * will be used and these may cause problems if they include special 
#  * characters used in this script).

IFS=' '
export IFS
# set -x

#  * Set variable 'AWK' to the version of "awk" to be used.

AWK=${AWK:='awk'}

#  * Obtain the list of file(s) to be archived and possibly
#  * set other options..

arg_list=$@
for arg in $arg_list
do
  case $arg in
       -*) set $arg                    ;;
      *=*) eval "$arg"                 ;;
 shorterm) shorterm='yes'              ;;
 nochksum) nochksum='yes'              ;;
        *) if [ -z "$file_list" ] ; then
            file_list="$arg"
           else
            file_list=`clnstrng "$file_list $arg" ' '`
           fi
  esac
done

arcvmid='hpnls'
ArcRootPath='/archive/eccc/crd'
ArcSrvr_prfx='hpnl'

# Possibly massage "force_output_return" if it's set ...
# It's used to control creating $Diagdir/.force_output_return
# file in case of severe issue.

if [ -n "$force_output_return" ] ; then
 if [ "$force_output_return" = 'on' ] ; then
  force_output_return='yes'
 elif [ "$force_output_return" = 'off' ] ; then
  force_output_return='no'
 fi
else
  force_output_return='yes'
fi

# Ensure "hpcarhive" setup is in place

type hpcarchive >> /dev/null 2>>/dev/null && HpcStat="$?" || HpcStat="$?"
if [ "$HpcStat" -eq 0 ] ; then
 :
else
 echo "" ; echo " archive: Sorry, not able to locate hpcarchive, possibly due to lack of proper setup!"
 exit 1
fi

#  * Test for access to the archive server.

unset hstnm
hstnm=`ssh $arcvmid hostname 2>/dev/null | cut -c 1-4 `
hststrgtdirck=`ssh $arcvmid hsmls -1d ${ArcRootPath}/ccrn`
while [ "$hstnm" != "${ArcSrvr_prfx}" -o "$hststrgtdirck" != "${ArcRootPath}/ccrn" ]
 do
  Tmstmp=`date`
  echo "" ; echo "" ; echo "  retriev: archive server is not accessible ~ $Tmstmp ! " ; echo ""
  sleep 300
  unset hstnm 
  hstnm=`ssh $arcvmid hostname 2>/dev/null | cut -c 1-4 `
  hststrgtdirck=`ssh $arcvmid hsmls -1d ${ArcRootPath}/ccrn`
 done
#

# hpcarchive project name to use

shorterm=${shorterm:='no'}
if [ "$shorterm" = 'yes' ] ; then
 hpcarc_project='crd_short_term'
fi
if [ -z "$hpcarc_project" ] ; then
 GrpNm=`id -ng`
 case $GrpNm in 
  eccc_ccrn_shr|eccc_ccrn_dat) hpcarc_project='crd_cccma' ;;
  eccc_ccmr) hpcarc_project='crd_ccmr' ;;
  eccc_ccrm) hpcarc_project='crd_cdas' ;;
  eccc_ccrp) hpcarc_project='crd_cps' ;;
  eccc_ccdp) hpcarc_project='crd_ccdp' ;;
  *) : ;;
 esac
fi

if [ -z "$hpcarc_project" ] ; then
 echo "" ; echo " archive: Sorry, not able to configure needed hpcarchive project name !"
 exit 1
fi 

#  * Ensure the account is able to write into the specified hpcarchive project name.

HpCnW=`hpcarchive -w | sed -n -e "s/^- ${hpcarc_project} *$/Yes/gp"`
if [ "$HpCnW" != 'Yes' ] ; then
#echo "" ; echo "  archive: Sorry, $hpcarc_project is not a valid project name to target writing into!"
#exit 1
 echo "" ; echo "  archive: Will attempt archiving despite not able to validate $hpcarc_project project name!"
fi 

#  * If necessary, adjust hpcarchive options to use.
#  * Default is to calculate checksum for archived files.
 
# hpcarc_aoptns=${hpcarc_aoptns:-'-v'}
if [ -n "$hpcarc_aoptns" ] ; then
 hpcarc_aoptns=`echo $hpcarc_aoptns | sed -e 's/-v / /' -e 's/-v$/ /'`
 hpcarc_aoptns=`echo $hpcarc_aoptns | sed -e 's/-k / /' -e 's/-k$/ /'`
fi
hpcarc_aoptns="$hpcarc_aoptns -v "

# hpcarc_aoptns='-v'
if [ -n "$nochksum" -a \( "$nochksum" = 'on' -o "$nochksum" = 'yes' \)  ] ; then
 if [ -n "$hpcarc_aoptns" ] ; then
  :
  # hpcarc_aoptns=`echo $hpcarc_aoptns | sed -e 's/-n / /' -e 's/-n$/ /'`
  # hpcarc_aoptns="$hpcarc_aoptns -n "
 fi 
else
 # "-n" & "-k" combination is problematic. Filter out "-n" when "-k" is to be used.
 hpcarc_aoptns=`echo $hpcarc_aoptns | sed -e 's/-n / /' -e 's/-n$/ /'`
 hpcarc_aoptns="$hpcarc_aoptns -k "
fi

#  * Ensure a list of files is specified.

if [ -z "$file_list" ] ; then
 echo "" ; echo "  archive: no archive file list is found!"
 exit 1
fi

# echo "file_list=$file_list"

# Set needed parameters...

Lgname=${Lgname:-`whoami`}
# ArcSzlmt=20480 # <-- 20GB in MB
ArcSzlmt=92160 # <-- 90GB in MB
Arcmnsz=500 # <-- 500MB
TmnLog=${TmnLog:-'yes'}
if [ "$TmnLog" = 'yes' ] ; then
 Strt_Time=`date`
fi

# Issue a message in case of just a single indiviual file is specified.

Nfiles=0
for fname in $file_list
do
 Nfiles=`expr $Nfiles + 1`
done

if [ "$Nfiles" -le 1  ] ; then
  echo "archive: Note: only one file - $file_list - is specified!"
fi

# Proceed with creating the "archive" subdirectory with idividual files.

# Decide on "archive name" to use:

hpcarc_name=${hpcarc_name:-${arclabel}}
if [ -z "$hpcarc_name" ] ; then
 unset Tlist
 # set -x
 for fname in $file_list
 do
 
   name=`basename $fname`
 
   #  * Setup for filename on the archive server under which the 
   #  * transfered file get saved.
 
   rname=`expr "$name" : '\(.*\)\.[0-9][0-9][0-9]'` 
   rname=${rname:=$name} 
   rname=`echo $rname | tr '\*' '_'` 
   rname=`echo $rname | tr '[A-Z]' '[a-z]'` 

   Tlist="$Tlist $rname"
 done
 if [ -n "$Tlist" ] ; then 
   Tlist=`clnstrng "$Tlist" ' '` 
   hpcarc_name=`arc_shrd $Tlist`
   hpcarc_name=${hpcarc_name:-'misc'}
 else
  echo "archive: Not able to construct valid list of files!"
  exit 1
 fi
fi 
hpcarc_name=`echo $hpcarc_name | tr '[A-Z]' '[a-z]'`

# Setup and switch to temporary "archive" subdirectory...

Date=`date -u '+%Y%j'`
Time=`date -u '+%H%M%S'`
# Tdatetime=`date -u '+%Y%j%H%M%S'`
Tdatetime="${Date}${Time}"
hpcarc_name="${hpcarc_name}_${Tdatetime}"
Tmphpcarcdir="$hpcarc_name"
Prevwd=`pwd`
mkdir -m 755 $Tmphpcarcdir
if [ ! -d "$Tmphpcarcdir/." ] ; then
 echo "archive: Not able to create temporary archive subdirectory!"
 exit 1
fi
cd $Tmphpcarcdir

# Populate the archive subdirectory.

unset Tlist
# set -x
for fname in $file_list
do

  name=`basename $fname`

  #  * Setup for filename on the archive server under which the 
  #  * transfered file get saved.

  rname=`expr "$name" : '\(.*\)\.[0-9][0-9][0-9]'` 
  rname=${rname:=$name} 
  rname=`echo $rname | tr '\*' '_'` 
  rname=`echo $rname | tr '[A-Z]' '[a-z]'` 

  FullPth=`dirname $fname | sed -e 's/^\/.*$/yes/'`
  if [ "$FullPth" = 'yes' ] ; then
   # if [ -n "$nochksum" -a \( "$nochksum" = 'on' -o "$nochksum" = 'yes' \)  ] ; then
   #  ln -s $fname $rname
   # else
   #  # ln $fname $rname # No luck
    cp -p $fname $rname ; chmod u+w $rname
   # fi
  else
   # if [ -n "$nochksum" -a \( "$nochksum" = 'on' -o "$nochksum" = 'yes' \)  ] ; then
   #  ln -s $Prevwd/$fname $rname
   # else
   #  # ln $Prevwd/$fname $rname # No luck
    cp -p $Prevwd/$fname $rname ; chmod u+w $rname
   # fi
  fi
  # Tlist=`clnstrng "$Tlist $rname" ' '`
  Tlist="$Tlist $rname"
 # fi
done
Tlist=`clnstrng "$Tlist" ' '`

# Procced with the archival provided minimum archive size
# is satisfied. May enforce upper size limit as well.

cd $Prevwd

(set -x ; \ls -ld $Tmphpcarcdir ; \ls -al $Tmphpcarcdir ) ; echo ""
set -x
ArcSzchk=`du -smL $Tmphpcarcdir | $AWK '{ print $1 ; }'` 
set +x
ArcSzchk=${ArcSzchk:-0}
if [ "$ArcSzchk" -ge "$Arcmnsz" -a "$ArcSzchk" -le "$ArcSzlmt" ] ; then
 Date_bfr=`date` 
 set -x
 # if [ -n "$nochksum" -a \( "$nochksum" = 'on' -o "$nochksum" = 'yes' \)  ] ; then
 #  time hpcarchive $hpcarc_aoptns -p $hpcarc_project -n -a $hpcarc_name -c $hpcarc_name && HpcStat="$?" || HpcStat="$?"
 # else
   time hpcarchive $hpcarc_aoptns -p $hpcarc_project -a $hpcarc_name -c $hpcarc_name && HpcStat="$?" || HpcStat="$?"
 # fi
 set +x
 if [ "$HpcStat" -ne 0 ] ; then
  echo ""
  echo "archive: Sorry abnormal hpcarchive exit of $HpcStat is encountered!"
  if [ -f "$CCRNSRC/arc_dir/log/hpcarc_low_file_size_log" ] ; then
   echo " SEVERE: $Lgname `date -u` Nfiles=$Nfiles hpcarc_name=$hpcarc_name ; Abnormal hpcarchive exit of $HpcStat is encountered!"  >> $CCRNSRC/arc_dir/log/hpcarc_low_file_size_log
  fi 
  if [ -n "${Diagdir}" -a -d "${Diagdir}/." -a "${force_output_return}" != 'no' ] ; then touch ${Diagdir}/.force_output_return ; fi 
  rm -r -f $Tmphpcarcdir 
  exit 2
 fi
 Date_aftr=`date` 
else
 echo ""
 echo "archive: Sorry, $Tmphpcarcdir archive directory size (in MB) of $ArcSzchk is outside $Arcmnsz - $ArcSzlmt range!" 
 if [ -f "$CCRNSRC/arc_dir/log/hpcarc_low_file_size_log" ] ; then
  echo " archive: SEVERE - $Lgname `date -u` Nfiles=$Nfiles ; `pwd`/$Tmphpcarcdir archive directory size (in MB) of $ArcSzchk is outside $Arcmnsz - $ArcSzlmt range!"  >> $CCRNSRC/arc_dir/log/hpcarc_low_file_size_log
 fi 
 if [ -n "${Diagdir}" -a -d "${Diagdir}/." -a "${force_output_return}" != 'no' ] ; then touch ${Diagdir}/.force_output_return ; fi 
 rm -r -f $Tmphpcarcdir 
 exit 2
fi 
#######

# Produce list of archived files.

if [ -s "$Prevwd/.archive.list" ] ; then
 # (\rm -f $Prevwd/.archive.list || : ) 
 echo "" ; echo "archive: Note - will append archive info. to existing $Prevwd/.archive.list file" ; echo ""
fi
touch $Prevwd/.archive.list 
hpcarchive_ver=`hpcarchive -h | egrep '^HPCArchive ' | $AWK '{ print $2 ;}'`  
fseq=0 
for prmtr in $Tlist
 do
  fseq=`expr $fseq + 1`  
  Sz=`ls -ldL $Tmphpcarcdir/$prmtr | $AWK '{ print \$5 ; } '`  
  lprmtr=`echo $prmtr | tr '[A-Z]' '[a-z]'` 
  echo "$lprmtr $Date $Time $hpcarc_name $fseq $hpcarc_project $hpcarchive_ver $Sz" | $AWK '{printf "%-40s%1s%8d%6.6d%1s%-40s%1s%4.3d%1s%-20s%1s%4s%1s%12s\n",$1," ",$2,$3," ",$4," ",$5," ",$6," ",$7," ",$8}' >> $Prevwd/.archive.list
 done  

# Relocate generated files into "mother" subdirectory and
# then delete the temporary subdirectory.

cd $Prevwd 
rm -r -f $Tmphpcarcdir 
echo "" ; echo "archive: Original set of files to archive consisted of:" ; echo "" 
echo " $file_list" 
echo "" ; echo "archive: Revised set of archived files under $hpcarc_name ARCHIVE name consisted of:" ; echo "" 
echo " $Tlist" 
echo "" 

# If asked for, log some info.

if [ "$TmnLog" = 'yes' -a -f "$CCRNSRC/arc_dir/log/hpnls_archive_log" ] ; then
 echo "archive: `whoami`,Nfiles=$Nfiles; ArcSzchk=$ArcSzchk ; hpcarc_aoptns=$hpcarc_aoptns ; hpcarc_project=$hpcarc_project ; hpcarc_name=$hpcarc_name ; runid=$runid,`hostname`, Start: ${Strt_Time}; Bfr_hpcarchive: ${Date_bfr}; Aftr_hpcarchive: ${Date_aftr}; End: `date` (`pwd`/$hpcarc_name) " >> $CCRNSRC/arc_dir/log/hpnls_archive_log
fi 
