#!/usr/bin/perl -w
########################################################################
# Check for running jobs, compare files on cfs and DATAPATH and look
# in crawork and .queue dirs for job related files.
#
# Larry Solheim Feb 2010
#
# $Id: check_jobs 668 2012-04-19 01:12:42Z acrnrls $
########################################################################

require 5;
use File::Basename;
use Getopt::Long;
use AnyDBM_File;

# Identify this script by name
chomp($Runame = `basename $0`);

# define a unique stamp for file name id etc
# chomp($stamp = `date "+%j"$$`);

# verbose controls the amount of info written to stdout
$verbose = 0;

# Store all non-option command line args in NonOpt
@NonOpt = ();

@owners = ();
@runids = ();

# Look at batch queues on the back end to see if jobs are running
$check_queues = 1;

# Compare files on cfs with those on DATAPATH
$check_cfs = 1;

# Compare files in the masterdir info file with those on DATAPATH
$check_mdinfo = 0;

# Look in crawork dir for run related files
$check_crawork=1;

# Look in .queue dir for run related files
$check_qdir=0;

# Look in both crawork and .queue dirs for run related files
$check_jobdirs=0;

# This is the long term user data directory on cfs
$CFSLONG = "/home/cfs_ccrd/ccrd_user_archive";

# This is the short term user data directory on cfs
$CFSSHORT = "/home/cfs_ccrd/ccrd_short_term_archive";

# This is the official data dir on cfs
$CFSOFFICIAL = "/home/cfs_ccrd/ccrn/offcl_data";

# This may hold the cfs audit info for masterdir
%CFSDB = ();

# Define a usage function
$Usage = sub {
  my ($msg)=@_;
  if ($msg) {print "${Runame}: $msg\n"};
  print <<EOR;
  Usage: $Runame [options] runid [runid ...]
Purpose: Check on runs given their runids
Options:
  --noqueues      ...do not check queues for running jobs
                     (default is to check queues)
  --nocfs         ...do not check files on the cfs and DATAPATH
                     (default is to check files)
  --jobdirs       ...check "crawork" and ".queue" dirs for runid related files
                     (default is to check dirs)
  --owners=LIST   ...supply a comma separated list of account names (e.g. ocn,gcm,cbn)
                     These will be used when the script is unable to determine the
                     owner of any of the runids supplied on the command line.
                     The default owners list is the invoking users account name.
  --verbose       ...increase verbosity (additive)
  --help          ...show this usage info
EOR
  die "\n";
};

# Process command line arguments
$Getopt::Long::ignorecase = 0;
$Getopt::Long::order = $PERMUTE;
&GetOptions("help"            => \&$Usage,
            "verbose"         => sub {$verbose++},
            "cfs!"            => \$check_cfs,
            "mdinfo!"         => \$check_mdinfo,
            "queues!"         => \$check_queues,
            "dotc!"           => \$check_crawork,
            "dotq!"           => \$check_qdir,
            "jobdirs!"        => \$check_jobdirs,
            "owners=s@"       => sub {$_[1]=~s/\s*//g; push @owners,split(/,/,$_[1])},
            "<>"              => sub {push @NonOpt,$_[0]})
    or die "${Runame}: Error on command line.\n";

# Any non-option command line args should be a variable assignment
# of the form "var=value" or a runid
%env = ();
foreach (@NonOpt) {
  next unless $_;
  # Strip any enclosing single or double quotes
  s/^'(.*?)'$/$1/;
  s/^"(.*?)"$/$1/;
  unless (/\w+=/) {
    # This is not a variable assignment
    # Assume it is a runid
    push @runids, $_;
    next;
  }
  my ($var,$val) = /^\s*(.*?)=(.*)/;
  # Strip quotes from the value, if any
  $val =~ s/^\s*"(.*)"\s*$/$1/;
  $val =~ s/^\s*'(.*)'\s*$/$1/;
  # Overwrite variable defs found in the env hash with their command line values
  # when variables of the same name also appear on the command line
  $env{$var} = "$val";
}

if (scalar(@owners)) {
  # Prepend each user supplied account name with "acrn"
  # if it does not already begin with "acrn"
  map {s/^\s*(.*?)\s*$/$1/; substr($_,0,0)="acrn" unless /^acrn/} @owners;
} else {
  # Assign the owners list, if not set on command line, with
  # the single account name belonging to the invoking user
  push @owners, (getpwuid($<))[0];
}

if ($check_jobdirs) {
  # Check both crawork and .queue dirs
  $check_crawork = 1;
  $check_qdir = 1;
}

if ($verbose > 2) {
  print "Input variable definitions:\n";
  foreach (sort keys %env) {
    print "$_ = $env{$_}\n" if defined $env{$_};
  }
  print "\n";
}

&$Usage("At least one runid is required on the command line\n")
  unless scalar(@runids);

if ($verbose > 2) {
  print "owners: ",join(" ",@owners),"\n";
  print "runids: ",join(" ",@runids),"\n";
}
if ($verbose > 10) {
  print "Long  term user data directory on cfs: $CFSLONG\n";
  print "Short term user data directory on cfs: $CFSSHORT\n";
  print "       Official data directory on cfs: $CFSOFFICIAL\n";
}

$mlist{JAN} = 1;
$mlist{FEB} = 2;
$mlist{MAR} = 3;
$mlist{APR} = 4;
$mlist{MAY} = 5;
$mlist{JUN} = 6;
$mlist{JUL} = 7;
$mlist{AUG} = 8;
$mlist{SEP} = 9;
$mlist{OCT} = 10;
$mlist{NOV} = 11;
$mlist{DEC} = 12;

$xmlist{M01} = "JAN";
$xmlist{M02} = "FEB";
$xmlist{M03} = "MAR";
$xmlist{M04} = "APR";
$xmlist{M05} = "MAY";
$xmlist{M06} = "JUN";
$xmlist{M07} = "JUL";
$xmlist{M08} = "AUG";
$xmlist{M09} = "SEP";
$xmlist{M10} = "OCT";
$xmlist{M11} = "NOV";
$xmlist{M12} = "DEC";

foreach $runid (@runids) {
  # Define a prefix for each line of output
  $PFX = uc($runid) . ": ";

  # owner and mach will be set if any jobs are found in the back end queues
  # feowner and femach will be set if any jobs are found in the front end queues
  undef %JOB;
  $JOB{owner} = '';
  $JOB{mach} = '';
  $JOB{feowner} = '';
  $JOB{femach} = '';

  if ($check_queues) {
    # Check saiph queues
#xxx    my @saiphq = `ssh saiph 'llq -f %id %jn %o %dq %dd %st %nh %c' 2>&1`;
#xxx    my @onsaiph = ();
#xxx    foreach (@saiphq) {
#xxx      next unless /^c/;
#xxx      $owner = (split /\s+/)[2];
#xxx      # Ignore any lines not referring to acrn accounts
#xxx      next unless $owner =~ /^acrn/;
#xxx      $jobname = (split /\s+/)[1];
#xxx      if ($jobname =~ /${runid}_/) {
#xxx        push @onsaiph,"$_";
#xxx      }
#xxx    }
#xxx    if (scalar @onsaiph) {
#xxx      $JOB{mach} = "saiph";
#xxx      # Set owner from the first line of llq output found
#xxx      $JOB{owner} = (split /\s+/,$onsaiph[0])[2];
#xxx      print "${PFX}Run ",uc($runid)," appears to be running on saiph.\n";
#xxx      print "${PFX}\n";
#xxx      print "${PFX}$saiphq[0]";
#xxx      print "${PFX}$saiphq[1]";
#xxx      print "${PFX}",join("${PFX}",@onsaiph),"\n";
#xxx    }
#xxx
#xxx    # Check zeta queues
#xxx    my @zetaq = `ssh zeta 'llq -f %id %jn %o %dq %dd %st %nh %c' 2>&1`;
#xxx    my @onzeta = ();
#xxx    foreach (@zetaq) {
#xxx      next unless /^c/;
#xxx      $owner = (split /\s+/)[2];
#xxx      # Ignore any lines not referring to acrn accounts
#xxx      next unless $owner =~ /^acrn/;
#xxx      $jobname = (split /\s+/)[1];
#xxx      if ($jobname =~ /${runid}_/) {
#xxx        push @onzeta,"$_";
#xxx      }
#xxx    }
#xxx    if (scalar @onzeta) {
#xxx      $JOB{mach} = "zeta";
#xxx      # Set owner from the first line of llq output found
#xxx      $JOB{owner} = (split /\s+/,$onzeta[0])[2];
#xxx      print "${PFX}Run ",uc($runid)," appears to be running on zeta.\n";
#xxx      print "${PFX}\n";
#xxx      print "${PFX}$zetaq[0]";
#xxx      print "${PFX}$zetaq[1]";
#xxx      print "${PFX}",join("${PFX}",@onzeta),"\n";
#xxx    }

    # Check hadar queues
    my @hadarq = `ssh -x hadar 'llq -f %id %jn %o %dq %dd %st %nh %c' 2>&1`;
    my @onhadar = ();
    foreach (@hadarq) {
      next unless /^c/;
      $owner = (split /\s+/)[2];
      # Ignore any lines not referring to acrn accounts
      next unless $owner =~ /^acrn/;
      $jobname = (split /\s+/)[1];
      if ($jobname =~ /${runid}_/) {
        push @onhadar,"$_";
      }
    }
    if (scalar @onhadar) {
      $JOB{mach} = "hadar";
      # Set owner from the first line of llq output found
      $JOB{owner} = (split /\s+/,$onhadar[0])[2];
      print "${PFX}Run ",uc($runid)," appears to be running on hadar.\n";
      print "${PFX}\n";
      print "${PFX}$hadarq[0]";
      print "${PFX}$hadarq[1]";
      print "${PFX}",join("${PFX}",@onhadar),"\n";
    }

    # Check spica queues
    my @spicaq = `ssh -x spica 'llq -f %id %jn %o %dq %dd %st %nh %c' 2>&1`;
    my @onspica = ();
    foreach (@spicaq) {
      next unless /^c/;
      $owner = (split /\s+/)[2];
      # Ignore any lines not referring to acrn accounts
      next unless $owner =~ /^acrn/;
      $jobname = (split /\s+/)[1];
      if ($jobname =~ /${runid}_/) {
        push @onspica,"$_";
      }
    }
    if (scalar @onspica) {
      $JOB{mach} = "spica";
      # Set owner from the first line of llq output found
      $JOB{owner} = (split /\s+/,$onspica[0])[2];
      print "${PFX}Run ",uc($runid)," appears to be running on spica.\n";
      print "${PFX}\n";
      print "${PFX}$spicaq[0]";
      print "${PFX}$spicaq[1]";
      print "${PFX}",join("${PFX}",@onspica),"\n";
    }

    unless ( scalar @onhadar or scalar @onspica ) {
      print "${PFX}Run ",uc($runid)," does not appear to be running on the back end.\n\n";
    }

    # Check pollux queues
    my @polluxq = `ssh -x pollux qstat 2>&1`;
    my @onpollux = ();
    foreach (@polluxq) {
      next unless /^\d/;
      $owner = (split /\s+/)[3];
      # Ignore any lines not referring to acrn accounts
      next unless $owner =~ /^acrn/;
      $jobname = (split /\s+/)[2];
      if ($jobname =~ /${runid}/) {
        push @onpollux,"$_";
      }
    }
    if (scalar @onpollux) {
      $JOB{femach} = "pollux";
      # Set owner from the first line of llq output found
      $JOB{feowner} = (split /\s+/,$onpollux[0])[3];
      print "${PFX}Run ",uc($runid)," appears to be running on pollux.\n";
      print "${PFX}\n";
      print "${PFX}$polluxq[0]";
      print "${PFX}$polluxq[1]";
      print "${PFX}",join("${PFX}",@onpollux),"\n";
    }

    # Check alef queues
    my @alefq = `ssh -x alef qstat 2>&1`;
    my @onalef = ();
    foreach (@alefq) {
      next unless /^\d/;
      $owner = (split /\s+/)[3];
      # Ignore any lines not referring to acrn accounts
      next unless $owner =~ /^acrn/;
      $jobname = (split /\s+/)[2];
      if ($jobname =~ /${runid}/) {
        push @onalef,"$_";
      }
    }
    if (scalar @onalef) {
      $JOB{femach} = "alef";
      # Set owner from the first line of llq output found
      $JOB{feowner} = (split /\s+/,$onalef[0])[3];
      print "${PFX}Run ",uc($runid)," appears to be running on alef.\n";
      print "${PFX}\n";
      print "${PFX}$alefq[0]";
      print "${PFX}$alefq[1]";
      print "${PFX}",join("${PFX}",@onalef),"\n";
    }

    unless (scalar @onalef or scalar @onpollux) {
      print "${PFX}Run ",uc($runid)," does not appear to be running on the front end.\n\n";
    }
  }

  if ($check_crawork) {
    unless ($JOB{owner} or $JOB{feowner}) {
      warn "${PFX}Unable to determine the owner of job ",uc($runid),".\n";
      warn "${PFX}Will look through user supplied accounts (via --owners command line option).\n";
      # Try accounts from the owners list
      foreach my $owner (@owners) {
        print "${PFX}Checking crawork in account $owner for ",uc($runid)," related files.\n";
        my @cwlist = `ssh -x $owner\@alef 'ls -lrt ./.queue/.crawork' 2>&1`;
        my @cw = grep {/$runid/} @cwlist;
        if (scalar @cw) {
          # Get the date from the remote machine for comparison with dates in this listing
          chomp($rmdate = `ssh -x alef 'date' 2>&1`);
          print "${PFX}The current date on the remote machine is $rmdate.\n";
          print "${PFX}",join("${PFX}",@cw),"\n";
        } else {
          print "${PFX}No files related to run ",uc($runid)," found in ~$owner/.queue/.crawork\n\n";
        }
      }
    } else {
      if ($JOB{owner}) {
        $owner = $JOB{owner};
      } else {
        $owner = $JOB{feowner};
      }
      print "${PFX}Checking crawork in account $owner for ",uc($runid)," related files.\n";
      my @cwlist = `ssh -x $owner\@alef 'ls -lrt ./.queue/.crawork' 2>&1`;
      my @cw = grep {/$runid/} @cwlist;
      if (scalar @cw) {
        # Get the date from the remote machine for comparison with dates in this listing
        chomp($rmdate = `ssh -x alef 'date' 2>&1`);
        print "${PFX}The current date on the remote machine is $rmdate.\n";
        print "${PFX}",join("${PFX}",@cw),"\n";
      } else {
        print "${PFX}No files related to run ",uc($runid)," found in ~$owner/.queue/.crawork\n\n";
      }
    }
  }

  if ($check_qdir) {
    unless ($JOB{owner} or $JOB{feowner}) {
      warn "${PFX}Unable to determine the owner of job ",uc($runid),".\n" unless $check_crawork;
      warn "${PFX}Will look through user supplied accounts (via --owners command line option).\n";
      # Try accounts from the owners list
      foreach my $owner (@owners) {
        print "${PFX}Checking .queue in account $owner for ",uc($runid)," related files.\n";
        my @cwlist = `ssh -x $owner\@alef 'ls -lrt ./.queue/' 2>&1`;
        my @cw = grep {/$runid/} @cwlist;
        if (scalar @cw) {
          # Get the date from the remote machine for comparison with dates in this listing
          chomp($rmdate = `ssh -x alef 'date' 2>&1`);
          print "${PFX}The current date on the remote machine is $rmdate.\n";
          print "${PFX}",join("${PFX}",@cw),"\n";
        } else {
          print "${PFX}No files related to run ",uc($runid)," found in ~$owner/.queue\n\n";
        }
      }
    } else {
      if ($JOB{owner}) {
        $owner = $JOB{owner};
      } else {
        $owner = $JOB{feowner};
      }
      print "${PFX}Checking .queue in account $owner for ",uc($runid)," related files.\n";
      my @cwlist = `ssh -x $owner\@alef 'ls -lrt ./.queue/' 2>&1`;
      my @cw = grep {/$runid/} @cwlist;
      if (scalar @cw) {
        # Get the date from the remote machine for comparison with dates in this listing
        chomp($rmdate = `ssh -x alef 'date' 2>&1`);
        print "${PFX}The current date on the remote machine is $rmdate.\n";
        print "${PFX}",join("${PFX}",@cw),"\n";
      } else {
        print "${PFX}No files related to run ",uc($runid)," found in ~$owner/.queue\n\n";
      }
    }
  }

  if ($check_mdinfo) {
    # This file is 600MB and takes a long time to copy/read...only do it once
    print "${PFX}Processing CFS masterdir info in ~acrnsrc/info/adtcfs\n\n";
    undef @files_be;
    undef %CFSDB;
    unless (scalar keys %CFSDB) {
      print "${PFX}Loading CFS file info ... ";
      my $timein = `date "+%s.%N"`;
      $on_fe = 1;
      if ($on_fe) {
        # Copy the masterdir info for the current runid from alef
        @mdinfo = `ssh -x alef \'egrep -i \\^\\ \\*USER\\:\\|$runid \$CCRNSRC/info/adtcfs\' 2>&1`;
      } else {
        # The masterdir info file is on a locally accessable disk
        @mdinfo = `egrep -i \^\ \*USER\:\|$runid $ENV{CCRNSRC}/info/adtcfs 2>&1`;
      }
      my $cpusec = `date "+%s.%N"` - $timein;
      print "$cpusec seconds to load.\n";
      $curr_owner   = "UNKNOWN";
      $curr_arcname = "UNKNOWN";
      $curr_dir     = "UNKNOWN";
      foreach (@mdinfo) {
        # The current user is identified on lines of the form "USER: acrnxyz"
        chomp($curr_owner = (split(/\s+/))[-1]) if /^\s*USER:/;
        if (/^\s*-/) {
          # This line contains an arcfile name
          chomp(my $curr_path = (split(/\s+/))[-1]);
          $curr_arcname = (split(/\//,$curr_path))[-1];
          ($curr_dir) = $curr_path =~ /^(.*)\//;
        }
        if (/^\s+\^+\>+ /) {
          # This line contains a regular file name
          $curr_file = (split(/\s+/))[-1] if /^\s+\^+\>+ /;
          $curr_file =~ s/\s+//g;
          $CFSDB{$curr_file}{arcfile} = $curr_arcname;
          $CFSDB{$curr_file}{dir}     = $curr_dir;
          $CFSDB{$curr_file}{owner}   = $curr_owner;
          my ($year,$mon) =  $curr_file =~ /_(\d\d\d\d?)_m(\d\d)_/;
          # Ignore this arcfile unless we can determine a year and month range
          if ($year and $mon) {
            $CFSDB{$curr_file}{year}  = $year;
            $CFSDB{$curr_file}{month} = $mon;
          } else {
            $CFSDB{$curr_file}{year}  = -1;
            $CFSDB{$curr_file}{month} = -1;
          }
        }
      }
      $write_infodb = 0;
      if ($write_infodb){
        dbmopen(%CFSDB, 'MD_INFO', 0644);
        dbmclose %CFSDB;
        $cpusec = `date "+%s.%N"` - $timein;
        print "$cpusec seconds after write of DB.\n";
      }
    }
    undef @curr_files;
    undef %CFS;
    undef %MISSING;
    foreach (keys %CFSDB) {
      next unless /^mc_${runid}_/;
      push @curr_files, $_;
      # print "curr_file=$_  curr_dir=$CFSDB{$_}{dir}  \n";
      # print "curr_arcfile=$CFSDB{$_}{arcfile}  curr_owner=$CFSDB{$_}{owner}\n";
      # Extract a year and month from the current file name

      my $year = $CFSDB{$_}{year};
      my $mon  = $CFSDB{$_}{month};
      # Ignore this arcfile unless we can determine a year and month range
      unless ($year > 0 and  $mon > 0) {
        if ($verbose > 0) {
          print "${PFX}Unable to determine year/month range from file name $_\n";
        }
        next;
      }

      my $ykey = "Y$year";
      my $mkey = "M$mon";
      unless (exists $CFS{$ykey}) {
        # Populate this year with keys for each month, initialized to 0
        foreach (keys %mlist) {
          $CFS{$ykey}{$_} = 0;
        }
      }

      # Increment the count for this year and month
      # Note: There may (will) be multiple files for each year/month
      #       (e.g. .._gs, .._ss, .._gz)
      $CFS{$ykey}{$xmlist{$mkey}}++;
    }

    # Get a list of all files on the back end that contain
    # the current runid as part of their file name
    my $rmcmd = q('ls -lLrt $DATAPATH/*_) . ${runid} . q(_*' 2>&1);
#xxx    @files_be = `ssh zeta $rmcmd`;
    @files_ha = `ssh -x hadar $rmcmd`;

    if ($verbose == 0) {
      # List the tail of DATAPATH files that were found, if any
      if (scalar @files_ha) {
        my $n2 = $#files_ha;
        my $n1 = $n2>=10 ? $n2-10 : 0;
        print "${PFX}The most recent files found on hadar back end DATAPATH for runid ",uc($runid)," are:\n";
        for (my $n=$n1; $n<=$n2; $n++) {
          print "${PFX}$files_ha[$n]";
        }
        print "\n";
      } else {
        print "${PFX}No files for runid ",uc($runid)," were found on hadar DATAPATH.\n";
      }
    }

    @files_sp = `ssh -x spica $rmcmd`;

    if ($verbose == 0) {
      # List the tail of DATAPATH files that were found, if any
      if (scalar @files_sp) {
        my $n2 = $#files_sp;
        my $n1 = $n2>=10 ? $n2-10 : 0;
        print "${PFX}The most recent files found on spica back end DATAPATH for runid ",uc($runid)," are:\n";
        for (my $n=$n1; $n<=$n2; $n++) {
          print "${PFX}$files_sp[$n]";
        }
        print "\n";
      } else {
        print "${PFX}No files for runid ",uc($runid)," were found on spica DATAPATH.\n";
      }
    }

    @files_be = (@files_ha, @files_sp);

    # Determine first and last year and month for all files found on cfs
    $first_year = (sort keys %CFS)[0];
    foreach (sort {$mlist{$a} <=> $mlist{$b}} keys %{$CFS{$first_year}}) {
      next if $CFS{$first_year}{$_} == 0;
      $first_mon = $_;
      last;
    }
    $last_year = (sort keys %CFS)[-1];
    foreach (reverse sort {$mlist{$a} <=> $mlist{$b}} keys %{$CFS{$last_year}}) {
      next if $CFS{$last_year}{$_} == 0;
      $last_mon = $_;
      last;
    }
    $first_year =~  s/^Y//;
    $last_year =~  s/^Y//;
    print "${PFX}History files found in ~acrnsrc/info/adtcfs for run ",uc($runid),
          " range from $first_mon, $first_year to $last_mon, $last_year.\n";

    # Determine if any months are missing in the files on cfs
    my $ycheck = $first_year - 1;
    foreach my $ykey (sort keys %CFS) {
      $ycheck++;
      my ($year) = $ykey =~ /^Y(.*)/;
      if ($year != $ycheck) {
        # We have missed an entire year
        push @{$MISSING{"Y$ycheck"}}, qw(JAN FEB MAR APR MAY JUN JUL AUG SEP OCT NOV DEC);
        # Increment ycheck to catch up with year
        $ycheck++;
        next;
      } else {
        # Determine any missing months in the range first_year/last_year
        my @missing = grep {$CFS{$ykey}{$_} == 0} (keys %{$CFS{$ykey}});
        if (scalar @missing) {
          my @smissing = sort {$mlist{$a} <=> $mlist{$b}} @missing;
          if ($year == $first_year) {
            my @miss = ();
            foreach (@smissing) {
              next if $mlist{$first_mon} > $mlist{$_};
              push @miss, $_;
            }
            @smissing = @miss;
          } elsif ($year == $last_year) {
            my @miss = ();
            foreach (@smissing) {
              next if  $mlist{$_} > $mlist{$last_mon};
              push @miss, $_;
            }
            @smissing = @miss;
          }
          # Keep track of any missing months
          push @{$MISSING{$ykey}}, @smissing if scalar @smissing;
        }
      }
    }

    # Print a message about any missing months
    foreach my $ykey (sort keys %MISSING) {
      my ($year) = $ykey =~ /^Y(.*)/;
      my @miss = @{$MISSING{$ykey}};
      if (scalar(@miss) == 12) {
        print "${PFX}    Run ",uc($runid)," is missing the entire year of $year\n";
      } else {
        print "${PFX}    Run ",uc($runid)," is missing months in ${year}:   ",
              join(' ',@miss),"\n";
      }
    }

    # Attempt to determine if any of the missing files are still on disk
    foreach my $ykey (sort keys %MISSING) {
      my ($year) = $ykey =~ /^Y(.*)/;
      foreach my $mname (@{$MISSING{$ykey}}) {
        my $str = sprintf("%s_%3.3d_m%2.2d",$runid,$year,$mlist{$mname});
        my @hits = grep {/$str/} @files_be;
        if (scalar @hits) {
          print "${PFX}    Some or all of the missing files for $mname, $year may be found on the back end.\n";
          print "$PFX",join(${PFX},@hits),"\n";
        } else {
          print "${PFX}    The missing files for $mname, $year do not appear to be on the back end.\n";
        }
      }
    }

    unless (scalar keys %MISSING) {
      # No missing file detected
      print "${PFX}There appear to be no missing files on CFS for run ",uc($runid),
            " in this range.\n";
    }

    print "\n";
  }

  if ($check_cfs) {
    # Check files on cfs for official runs
    # my @cfsdirs = ("${CFSOFFICIAL}/c/${runid}/m/", "${CFSOFFICIAL}/c/${runid}/d/");
    my @cfsdirs = ("${CFSOFFICIAL}/c/${runid}/m/");
    my $ndir = 0;
    undef @files_be;
    foreach my $cfsdir (@cfsdirs) {
      $ndir++;
      # print "${PFX}Processing CFS dir $cfsdir\n\n";
      # Get a list of files for the current runid from the current CFS dir
      my @cfs = `ssh -x cfs ls -l $cfsdir 2>&1`;
      undef %CFS;
      undef %MISSING;
      undef %REPEATED;
      $ncfs = 0;
      $ncfs_ym = 0;
      foreach (@cfs) {
        next unless /^-/;
        $ncfs++;
        # Extract arc file name
        my $arcname = (split /\s+/)[-1];
        # Extract a year/month range from the arc file name
        my ($y1,$m1,$y2,$m2) = $arcname =~ /_(\d\d\d\d?)m(\d\d)_(\d\d\d\d?)m(\d\d)_/;
        # Ignore this arcfile unless we can determine a year/month range
        unless ($y1) {
          if ($verbose > 0) {
            print "${PFX}Unable to determine year/month range from arc file name $arcname\n";
          }
          next;
        }

        $ncfs_ym++;
        # Determine the list of months in the current range of months
        # and define a hash key to be used for the current year in %CFS
        if ($y1 == $y2) {
          @curr_months = ( $m1 .. $m2 );
          $ykey = "Y$y1";
        } else {
          die "year $y1 != year $y2 is not yet implimented\n";
        }

        unless (exists $CFS{$ykey}) {
          # Populate this year with keys for each month, initialized to 0
          foreach (keys %mlist) {
            $CFS{$ykey}{$_} = 0;
          }
        }

        # Increment the hash element corresponding to the current year/month to
        # count the number of times this year and month has been encountered
        foreach my $mname (keys %mlist) {
          foreach my $mon (@curr_months) {
            if ( $mon == $mlist{$mname} ) {
              # Increment the count for this year and month
              $CFS{$ykey}{$mname}++;
              last;
            }
          }
        }
      }

      if ($ndir == 1) {
        # Get a list of all files on the back end that contain
        # the current runid as part of their file name
        my $rmcmd = q('ls -lLrt $DATAPATH/*_) . ${runid} . q(_*' 2>&1);
#xxx        @files_be = `ssh zeta $rmcmd`;
        @files_ha = `ssh -x hadar $rmcmd`;
        $fsize_be = 0.0;
        foreach (@files_ha) {
          next unless /^-/;
          my $curr_size = (split /\s+/)[4];
          $fsize_be += 1.0*$curr_size;
        }
        $fsize_be = $fsize_be/(1024.0*1024.0*1024.0);

        if ($verbose == 0) {
          # List the tail of DATAPATH files that were found, if any
          if (scalar @files_ha) {
            print "${PFX}Total size of all ",uc($runid)," files on hadar back end is $fsize_be GB\n";
            my $n2 = $#files_ha;
            my $n1 = $n2>=10 ? $n2-10 : 0;
            print "${PFX}The most recent files found on hadar back end DATAPATH for runid ",uc($runid)," are:\n";
            for (my $n=$n1; $n<=$n2; $n++) {
              print "${PFX}$files_ha[$n]";
            }
            print "\n";
          } else {
            print "${PFX}No files for runid ",uc($runid)," were found on hadar DATAPATH.\n";
          }
        }

        @files_sp = `ssh -x spica $rmcmd`;
        $fsize_be = 0.0;
        foreach (@files_sp) {
          next unless /^-/;
          my $curr_size = (split /\s+/)[4];
          $fsize_be += 1.0*$curr_size;
        }
        $fsize_be = $fsize_be/(1024.0*1024.0*1024.0);

        if ($verbose == 0) {
          # List the tail of DATAPATH files that were found, if any
          if (scalar @files_sp) {
            print "${PFX}Total size of all ",uc($runid)," files on spica back end is $fsize_be GB\n";
            my $n2 = $#files_sp;
            my $n1 = $n2>=10 ? $n2-10 : 0;
            print "${PFX}The most recent files found on spica back end DATAPATH for runid ",uc($runid)," are:\n";
            for (my $n=$n1; $n<=$n2; $n++) {
              print "${PFX}$files_sp[$n]";
            }
            print "\n";
          } else {
            print "${PFX}No files for runid ",uc($runid)," were found on spica DATAPATH.\n";
          }
        }

        @files_be = (@files_ha, @files_sp);
      }

      if ($ncfs_ym == 0) {
        if ($ncfs == 0) {
          # No file for this runid were found on cfs
          if ($ndir == 1) {
            print "${PFX}No history files for runid ",uc($runid)," were found on CFS.\n";
          } elsif ($ndir == 2) {
            print "${PFX}No diagnostic files for runid ",uc($runid)," were found on CFS.\n";
          }
        } else {
          # There were files found but no year/month ranges could
          # be determined from the existing arc file names
          print "${PFX}No year/month info available for runid ",uc($runid)," on CFS.\n";
        }
        next;
      }

      if ($verbose > 0) {
        print "${PFX}#=#=#=#=# ",uc($runid)," files on cfs\n";
        foreach (@cfs) {
          next unless /^-/;
          print "${PFX}$_";
        }
        print "${PFX}#=#=#=#=# ",uc($runid)," files on the back end\n";
        print "${PFX}Total size of all ",uc($runid)," files on back end is $fsize_be GB\n";
        $nfbe = 0;
        foreach (@files_be) {
          next unless /^-/;
          $nfbe++;
          print "${PFX}$_";
        }
        if ($nfbe == 0) {
          print "${PFX}No files for runid ",uc($runid)," were found of the back end.\n";
        }
      }

      # Determine first and last year and month for all files found on cfs
      $first_year = (sort keys %CFS)[0];
      foreach (sort {$mlist{$a} <=> $mlist{$b}} keys %{$CFS{$first_year}}) {
        next if $CFS{$first_year}{$_} == 0;
        $first_mon = $_;
        last;
      }
      $last_year = (sort keys %CFS)[-1];
      foreach (reverse sort {$mlist{$a} <=> $mlist{$b}} keys %{$CFS{$last_year}}) {
        next if $CFS{$last_year}{$_} == 0;
        $last_mon = $_;
        last;
      }
      $first_year =~  s/^Y//;
      $last_year =~  s/^Y//;
      print "${PFX}History files found on CFS for run ",uc($runid),
            " range from $first_mon, $first_year to $last_mon, $last_year.\n";

      # Determine if any months are missing or repeated in the files on cfs
      my $ycheck = $first_year - 1;
      foreach my $ykey (sort keys %CFS) {
        $ycheck++;
        my ($year) = $ykey =~ /^Y(.*)/;
        if ($year != $ycheck) {
          # We have missed an entire year
          push @{$MISSING{"Y$ycheck"}}, qw(JAN FEB MAR APR MAY JUN JUL AUG SEP OCT NOV DEC);
          # Increment ycheck to catch up with year
          $ycheck++;
          next;
        } else {
          # Determine any missing months in the range first_year/last_year
          my @missing = grep {$CFS{$ykey}{$_} == 0} (keys %{$CFS{$ykey}});
          if (scalar @missing) {
            my @smissing = sort {$mlist{$a} <=> $mlist{$b}} @missing;
            if ($year == $first_year) {
              my @miss = ();
              foreach (@smissing) {
                next if $mlist{$first_mon} > $mlist{$_};
                push @miss, $_;
              }
              @smissing = @miss;
            } elsif ($year == $last_year) {
              my @miss = ();
              foreach (@smissing) {
                next if  $mlist{$_} > $mlist{$last_mon};
                push @miss, $_;
              }
              @smissing = @miss;
            }
            # Keep track of any missing months
            push @{$MISSING{$ykey}}, @smissing if scalar @smissing;
	  }
        }
        # Determine any months with multiple files on cfs
        my @repeated = grep {$CFS{$ykey}{$_} > 1} (keys %{$CFS{$ykey}});
        if (scalar @repeated) {
          my @srepeated = sort {$mlist{$a} <=> $mlist{$b}} @repeated;
          if ($year == $first_year) {
            my @rep = ();
            foreach (@srepeated) {
              next if $mlist{$first_mon} > $mlist{$_};
              push @rep, $_;
            }
            @srepeated = @rep;
          } elsif ($year == $last_year) {
            my @rep = ();
            foreach (@srepeated) {
              next if  $mlist{$_} > $mlist{$last_mon};
              push @rep, $_;
            }
            @srepeated = @rep;
          }
          # Keep track of any repeated months
          push @{$REPEATED{$ykey}}, @srepeated if scalar @repeated;
        }
      }

      # Print a message about any missing months
      foreach my $ykey (sort keys %MISSING) {
        my ($year) = $ykey =~ /^Y(.*)/;
        my @miss = @{$MISSING{$ykey}};
        if (scalar(@miss) == 12) {
          print "${PFX}    Run ",uc($runid)," is missing the entire year of $year\n";
        } else {
          print "${PFX}    Run ",uc($runid)," is missing months in ${year}:   ",
                join(' ',@miss),"\n";
        }
      }

      # Print a message about any repeated months
      foreach my $ykey (sort keys %REPEATED) {
        my ($year) = $ykey =~ /^Y(.*)/;
        my @rep = @{$REPEATED{$ykey}};
        print "${PFX}    Run ",uc($runid)," has repeated months in ${year}:   ",
              join(' ',@rep),"\n";
      }

      # Attempt to determine if any of the missing files are still on disk
      foreach my $ykey (sort keys %MISSING) {
        my ($year) = $ykey =~ /^Y(.*)/;
        foreach my $mname (@{$MISSING{$ykey}}) {
          my $str = sprintf("%s_%3.3d_m%2.2d",$runid,$year,$mlist{$mname});
          my @hits = grep {/$str/} @files_be;
          if (scalar @hits) {
            print "${PFX}    Some or all of the missing files for $mname, $year may be found on the back end.\n";
            print "$PFX",join(${PFX},@hits),"\n";
          } else {
            print "${PFX}    The missing files for $mname, $year do not appear to be on the back end.\n";
          }
        }
      }

      unless (scalar keys %MISSING) {
        # No missing file detected
        print "${PFX}There appear to be no missing files on CFS for run ",uc($runid),
              " in this range.\n";
      }

      print "\n";
    }
  }

  print "#="x60,"#\n";
  print "#="x60,"#\n\n";
}

exit 0;
########################################################
##################### End of main ######################
########################################################
