#!/usr/bin/env perl
########################################################################
# Insert a job string into another job string.
#
# Larry Solheim Mar 2010
#
# $Id: splice_job_string 654 2011-07-06 17:10:41Z acrnrls $
########################################################################

require 5;
use Getopt::Long;
use Text::Tabs;

# Declare global variables
use vars qw(%env $verbose);

# Identify this script by name
chomp($Runame = `basename $0`);

# Define a unique stamp for file name id etc
chomp($stamp = `date "+%j%H%M%S"$$`);

# The name of a file containing an existing job string.
# This name is either supplied by the user on the command line
# or determined internally.
$orig_job_string = "";

# The name of a file containing a job string to be inserted into
# the existing job string.
$extra_job_string = "";

# The new string will be inserted after the $position job found in
# the original job string.
# A job is delimited at the end by the line "#end_of_job".
$position = 1;

# replace_job is a boolean flag to determine whether or not the original
# job string is overwritten by the modified version
$replace_job = 1;

# force_create is a boolean flag to determine whether or not the crawork file that the
# user supplied job will be inserted into should be created if it does not exist
$force_create = 0;

# Indicate the degree of verbosity on stdout
$verbose = 2;

# cwHOME will be the name of a dir in which .queue/.crawork lives
# It must also contain a "tmp" subdir or allow invoking user to create one
# This is typically the users home dir but may be different if JHOME
# is present in the current environment
$cwHOME = $ENV{HOME};
$cwHOME = $ENV{JHOME} if $ENV{JHOME};
die "${Runame}: Unable to determine a cwHOME directory.\n" unless $cwHOME;

@ARGVin = @ARGV;

# Define a usage function
$Usage = sub {
  my ($msg)=@_;
  if ($msg) {print "${Runame}: $msg\n"};
  print "ARGV: @ARGVin\n\n";
  print <<EOR;
Usage: $Runame [options] extra_job_string || rmlock=name [crawork=value]

Purpose: Insert an extra job into an existing job string.

Options:
  --position=Int    ...Specify the location in the original job string at which
                       the extra job is to be inserted. Position is the number of
                       jobs from the beginning of the string, counting the first job
                       as number 1. The extra job is inserted after the "position" job.
                       position=0 means insert before any other jobs in the string.
                       position=-1 means insert at the end of the string.
                       (default position=1)
  --job=file        ...The name of a file containing the original job string, into
                       which the extra job is to be inserted. A definition for crawork
                       is required when this option is not supplied.
                       (default is "~/.queue/.crawork/\${crawork}_string")
  --[no]replace_job ...[do not] replace the original job with the modified version
                       With --noreplace_job a copy of the modified script will be
                       left in a temporary directory (~/tmp).
                       (default is to replace the original job)
  --verbose         ...increase verbosity (additive)
  --help            ...show this usage info

Comments:
  Either a single file name of a file containing the extra job string or the assignment
  rmlock=name where name is the name of a lock file to delete is required on the
  command line.

  If a file name is provided then the contents of that file will be inserted at the
  specified position or position 1 if --position is not supplied.

  If rmlock=name is supplied then the script will create a job whose sole purpose is
  to delete the lock file "name" and insert it at the specified position.

  A definition for crawork will also be required if the --job option is not used or
  rmlock=name is specified on the command line.
EOR
  die "\n";
};

# Process command line arguments
$Getopt::Long::ignorecase = 0;
$Getopt::Long::order = $PERMUTE;
&GetOptions("help"            => \&$Usage,
            "verbose"         => sub {$verbose++},
            "position=i"      => \$position,
            "job=s"           => \$orig_job_string,
            "replace_job!"    => \$replace_job,
            "force_create!"   => \$force_create,
            "<>"              => sub {push @NonOpt,$_[0]})
    or die "${Runame}: Error on command line.\n";

# Any non-option command line args should be a variable assignment
# of the form "var=value" or the file name containing the extra job string.
foreach (@NonOpt) {
  next unless $_;
  # Strip any enclosing single or double quotes
  s/^'(.*?)'$/$1/;
  s/^"(.*?)"$/$1/;
  unless (/\w+=/) {
    # This is not a variable assignment
    if ($extra_job_string) {
      # There was more than one file name supplied on the command line
      die "${Runame}: Attempting to redefine extra job string file name $extra_job_string as $_.\n";
    } else {
      # This is the input file name
      $extra_job_string = $_;
    }
    next;
  }
  my ($var,$val) = /^\s*(.*?)=(.*)/;
  # Strip quotes from the value, if any
  $val =~ s/^\s*"(.*)"\s*$/$1/;
  $val =~ s/^\s*'(.*)'\s*$/$1/;
  # Add variable defs found on the command line to the env hash
  $env{$var} = "$val";
  if ($var eq "crawork") {
    # Strip all whitespace from the value of this variable
    $env{$var} =~ s/\s+//g;
    $crawork = $env{$var};
  }
  if ($var eq "rmlock") {
    # The user has requested that a single job that will delete the named
    # lock file be inserted. This job is defined at the end of this script
    # Strip all whitespace from the value of this variable
    $env{$var} =~ s/\s+//g;
    if ($extra_job_string) {
      # There was more than one file name supplied on the command line
      die "${Runame}: The extra job string file name is not allowed with --> $_ <--\n";
    } else {
      $extra_job_string = "REMOVE_LOCK_FILE";
    }
  }
}

if ($verbose > 2) {
  print "Input variable definitions:\n";
  print "extra_job_string=$extra_job_string\n";
  foreach (sort keys %env) {
    print "$_ = $env{$_}\n" if defined $env{$_};
  }
  print "\n";
}

&$Usage("The name of a file containing the extra job string is required on the command line.\n")
  unless $extra_job_string;

# If crawork was not defined on the command line then check the current env
unless ($crawork) {
  $crawork = $ENV{crawork};
  $crawork =~ s/\s+//g if $crawork;
  $env{crawork} = $crawork;
}

# Assign values for crawork and certain other parameters that are required
# by the internally created job "rmlock"
foreach (qw(crawork noprint masterdir shortermdir cfsuser arclabel
            check_cfs_arcfile uxxx runid)) {
  $rmlock_parms{$_} = $env{$_};
  $rmlock_parms{$_} = $ENV{$_} unless $rmlock_parms{$_};
  $rmlock_parms{$_} =~ s/\s+//g if $rmlock_parms{$_};
}

# The file name of the file containing the original job string may be supplied
# on the command line or set internally.
# Any name supplied explicitly on the command line will take precendence over
# the internal definition.
unless ($orig_job_string) {
  die "${Runame}: A value for crawork is required.\n" unless $crawork;
  die "${Runame}: Unable to determine a home directory.\n" unless $cwHOME;
  $orig_job_string = "${cwHOME}/.queue/.crawork/${crawork}_string";
}

# Read the extra job string into memory
my $extra_job = '';
if ($extra_job_string =~ /^\s*REMOVE_LOCK_FILE\s*$/) {

  # First check that the original job string does not contain
  # an "rmlock" job as the second job in the string
  my @OJOBS = read_job_string($orig_job_string, 2);
  if (scalar(@OJOBS)>1 and $OJOBS[1] =~ /jobname=rmlock/) {
    # The original string already contains an rmlock job
    # Do nothing
    print "An rmlock job exists in the string $orig_job_string\n";
    exit 0;
  }

  # Read from internal data
  my $readnow = 0;
  while (<DATA>) {
    if (/^##########\s*JOB:/) {
      my ($job) = /^##########\s*JOB:\s*(\w+)/;
      $readnow = 0;
      $readnow = 1 if $job eq "rmlock";
      next;
    }
    next unless $readnow;
    # Expand tabs as each line is read
    $extra_job .= expand($_);
  }
  $extra_job =~ s/LOCK_FILE/$env{rmlock}/g;
  die "${Runame}: A value for crawork is required.\n" unless $crawork;
  foreach (keys %rmlock_parms) {
    next unless $rmlock_parms{$_};
    # Replace any (possibly quoted) words on the RHS of the assignment
    # for the current variable with the new variable value
    $extra_job =~ s/$_=(?:`[^`]?`|'[^']?'|"[^"]*?"|\w+)/$_=$rmlock_parms{$_}/g;
    # Check for empty values (e.g. var= ; or var= \n) and replace
    $extra_job =~ s/$_=[ \t]*(\n|;)/$_=$rmlock_parms{$_}$1/g;
  }
} else {
  # Read from a user supplied file
  open (EXTRA, "<$extra_job_string")
    || die "${Runame}: cannot open $extra_job_string for input";
    while (<EXTRA>) {
      # Expand tabs as each line is read
      $extra_job .= expand($_);
    }
  close EXTRA;
  # Ensure that the value of crawork used in the user supplied extra job is the same
  # as the crawork value used in the original job string
  die "${Runame}: A value for crawork is required.\n" unless $crawork;
  # Replace any (possibly quoted) words on the RHS of the crawork assignment
  # with the crawork value from the original job string
  $extra_job =~ s/crawork=(?:`[^`]?`|'[^']?'|"[^"]*?"|[^;#\s]+)/crawork=$crawork/g;
  # Check for empty values (e.g. var= ; or var= \n) and replace
  $extra_job =~ s/crawork=[ \t]*(\n|;)/crawork=$crawork$1/g;
}

# Make sure we have a valid job with no leading or trailing whitespace
die "${Runame}: Extra job string is empty.\n" if $extra_job =~ /^\s*$/s;
# Strip any leading or trailing white space from the extra job
$extra_job =~ s/^\s*(\S.*?)\s*$/$1/s;
# Ensure a final "#end_of_job" line
$extra_job .= "\n#end_of_job\n" unless $extra_job =~ /\n\s*#\s*end_of_job\s*$/si;
# Ensure a single trailing newline on the extra job
$extra_job =~ s/\s*$/\n/s;
if ($verbose > 3) {
  print "Extra Job:\n$extra_job";
}

# Determine a temporary file name to hold interm job string
die "${Runame}: Unable to determine a home directory.\n" unless $cwHOME;
$tmpd = "${cwHOME}/tmp";
unless (-d $tmpd) {
  # Attempt to create this dir
  system("mkdir","$tmpd") == 0
    or die "Unable to create tmp dir $tmpd. $?\n";
}
$tmp_job_string="$tmpd/tmp_splice_$stamp";

# Read the original job string line by line and write the original job string
# with the extra job string inserted to a temporary file
if ( $force_create ) {
  # If the original job string does not exist then create it
  unless ( -s "$orig_job_string" ) {
    system("touch","$orig_job_string") == 0 or die "Cannot touch $orig_job_string. $?\n";
  }
}
open (ORIG, "<$orig_job_string")
  || die "${Runame}: cannot open $orig_job_string for input";
open (NEW, ">$tmp_job_string")
  || die "${Runame}: cannot open $tmp_job_string for output";
if ($position == 0) {
  # Insert the extra job first
  print NEW $extra_job;
}
$curr_position = 0;
foreach (<ORIG>) {
  print NEW $_;
  if (/^\s*#\s*end_of_job\s*$/mi) {
    $curr_position++;
    if ($curr_position == $position) {
      # Insert the extra job
      print NEW $extra_job;
      next;
    }
  }
}
if ($position == -1) {
  # Insert the extra job last
  print NEW $extra_job;
}
close ORIG;
close NEW;

if ($replace_job) {
  system("rm","-f","$orig_job_string") == 0
    or die "Unable to remove $orig_job_string. $?\n";
  system("mv","-f","$tmp_job_string","$orig_job_string") == 0
    or die "Unable to overwrite $orig_job_string. $?\n";
} else {
  print "Created $tmp_job_string\n";
}

exit 0;
########################################################
##################### End of main ######################
########################################################

sub read_job_string {
  # Read (at most) $njobs jobs from a string of jobs in a file
  # A job is delimited at the end by a line of the form "#end_of_job"
  use strict;
  use File::Basename;
  my $fname = shift;
  my $njobs = shift;

  # Create a tag to use with error diagnostics
  my ($package, $file, $line, $subname) = caller(0);
  my $errID = basename($file)."::${subname}:";

  die "$errID requires a file name as an argument\n" unless $fname;
  $njobs = 2 unless defined $njobs;
  die "$errID njobs must be an integer>0. njobs=$njobs\n" unless $njobs =~ /^\d+$/;
  die "$errID njobs must be an integer>0. njobs=$njobs\n" unless $njobs>0;

  open (FNAME, "<$fname") || die "$errID Cannot open $fname for input";
  my $job = '';
  my @JOBS = ();
  my $curr_job = 1;
  foreach (<FNAME>) {
    $job .= $_;
    if (/^\s*#\s*end_of_job\s*$/mi) {
      push @JOBS, $job;
      $job = '';
      last if $curr_job == $njobs;
      $curr_job++;
    }
  }
  close FNAME;

  # Return a list of jobs read from the file, up to a maximum of $njobs
  return wantarray ? @JOBS : scalar(@JOBS);
}

__END__

########## JOB: rmlock

#!/bin/sh
#=======================================================================
# Remove lock file LOCK_FILE
# Optionally check the corresponding arcfile on cfs and abort if
# permissions are not set correctly.
#=======================================================================
 set -a
 . betapath2

#  * ........................... Parmsub Parameters ............................

 crawork=''
 nextjob=on
 noprint=on

 uxxx=''
 runid=''
 masterdir=off
 shortermdir=on
 cfsuser=''
 arclabel=''

 check_cfs_arcfile=on

 jobname=rmlock; time="600"; memory="100mb";

 . comjcl.cdk

cat > Execute_Script <<'end_of_script'

  # Use -e option if recognized by echo
  if [ "X`echo -e`" = "X-e" ]; then
    echo_e() { echo ${1+"$@"}; }
  else
    echo_e() { echo -e ${1+"$@"}; }
  fi

  bail(){
    echo_e `date`" --- rmlock: $*"
    exit 1
  }

  if [ x"$check_cfs_arcfile" = x"on" ]; then
    # Ensure that permissions on the arcfile just written are read only
    # for owner and group

    # Require arclabel to be set
    [ x"$arclabel" = x ] && \
      bail "Unable to check arcfile on cfs. Arclabel is not defined."

    # USR is the last 3 letters of the users login name
    # This should be the user that owns the arcfile
    if [ -n "$cfsuser" ]; then
      # Assume cfsuser is the owner if cfsuser is defined
      USR=`echo $cfsuser|awk '{print substr($0,length($0)-2)}' -`
    else
      # Otherwise the invoking user must be the owner
      USR=`whoami|awk '{print substr($0,length($0)-2)}' -`
    fi

    # Determine the directory on cfs in which the arcfile resides
    if [ x"$masterdir" = x"on" ]; then
      # runid must be the second underscore (_) separated field
      # in arclabel when masterdir is "on"
      xrunid=`echo $arclabel|awk -F_ '{print $2;exit}' -`
      # The first 2 characters of arclabel, together with the runid,
      # are used to determine the subdir in which the arc file lives
      ch1=`echo $arclabel|awk '{print substr($1,1,1)}' -`
      ch2=`echo $arclabel|awk '{print substr($1,2,1)}' -`
      [ -z "$ch1" ] && bail "Invalid arclabel prefix. arclabel = $arclabel"
      [ -z "$ch2" ] && bail "Invalid arclabel prefix. arclabel = $arclabel"
      ARCDIRX=/home/cfs_ccrd/ccrn/offcl_data/$ch2/$xrunid/$ch1
    else
      if [ x"$shortermdir" = x"on" ]; then
        ARCDIRX=/home/cfs_ccrd/ccrd_short_term_archive/$USR
      else
        ARCDIRX=/home/cfs_ccrd/ccrd_user_archive/$USR
      fi
    fi

    # Get a listing for the arcfile from cfs
    if [ x"$masterdir" = x"on" ]; then
      lsarc=`ssh cfs2 ls -lL $ARCDIRX/${arclabel}_\*_arc 2>&1` ||\
        bail "Problem listing arcfile.\n$lsarc"
###      # If successful then determine the number of lines that matched
###      lsline=`ssh cfs ls -lL $ARCDIRX/${arclabel}_\*_arc|wc -l`
###      if [ $? -ne 0 ]; then
###        # Try once more
###        lsline=`ssh cfs ls -lL $ARCDIRX/${arclabel}_\*_arc|wc -l` ||\
###          bail "Problem listing arcfile.\nlsline = $lsline"
###      fi
    else
      # User arcfiles begin with uxxx_ where "xxx" is the
      # last 3 letters in the users account name
      lsarc=`ssh cfs2 ls -lL $ARCDIRX/u${USR}_${arclabel}_\*_arc 2>&1` ||\
        bail "Problem listing arcfile.\n$lsarc"
###      lsline=`ssh cfs ls -lL $ARCDIRX/u${USR}_${arclabel}_\*_arc|wc -l`
###      if [ $? -ne 0 ]; then
###        lsline=`ssh cfs ls -lL $ARCDIRX/u${USR}_${arclabel}_\*_arc|wc -l` ||\
###          bail "Problem listing arcfile.\nlsline = $lsline"
###      fi
    fi
###    if [ "$lsline" -ne 1 ]; then
###      bail "More than one arcfile matches the label ${arclabel}.\n$lsarc"
###    fi
    perm=`echo $lsarc|awk '{print $1;exit}' -`
    owner=`echo $lsarc|awk '{print $3;exit}' -`
    if [ x`echo $perm|sed -n '/.r--r-----/p'` = x ]; then
      bail "Permissions on arcfile are incorrect.\n$lsarc"
    fi
    if [ x"$masterdir" = x"on" ]; then
      [ x"$owner" != "xacrnsrc" ] && \
        bail "Owner of arcfile should be acrnsrc when masterdir=on.\n$lsarc"
    fi
  fi

  # Remove the lock file, if any
  rm -f LOCK_FILE

end_of_script

 . endjcl.cdk

#end_of_job

