#!/usr/bin/env perl
########################################################################
# Create a file or files containing a list of file names using info
# from the environment and/or command line to construct these file names.
# The output list(s) will be placed in text file(s) suitable for being
# "sourced" in a shell script and will contain lines of the form
#
# file1=first_file
# file2=second_file
# ...
# file24=twenty_fourth_file
# join=24
#
# These lines may be preceeded by lines containing other variable defs
# for variables that may be used in the file1, file2,... definitions or
# elsewhere in the invoking script.
#
# There are two modes of operation.
#   1) File names are created internally for each month in a range of
#      months. Unless a file name template is supplied by the user (via the
#      environment variable prefix_list or command line definition thereof),
#      these file names are all of a specific form, namely
#      ${prefix}_${runid}_${year}_m${month}_${suffix}
#      The prefix and suffix are user specified strings, possibly
#      indicating multiple (pre|suf)fixes that will generate multiple
#      file name combinations. The user also supplies runid and a
#      start/stop year and month that will be used to generate a sequence
#      of months for this month range.
#   2) File names are read from a user supplied external file.
#      This external file must be avaialable to this script at execution
#      time and therefore must be in the execution directory or be
#      provided on the command line as a valid path name.
#      The file should contain a single file name per line, except for
#      blank lines or comment lines (whose first non-whitespace
#      character is "#").
#
# Larry Solheim Feb 2010
#
# $Id: make_file_name_list 658 2012-02-23 23:33:53Z acrnrls $
########################################################################

require 5;
use File::Basename;
use File::Copy;
use Getopt::Long;

# Declare global variables
use vars qw(%env $verbose $cfs_files %cfs_file_size);

# Identify this script by name
chomp($Runame = `basename $0`);

# The output file name
# This name may be appened with numeric suffixes (001,002,...)
# if multi-list output is requested
$fname_out = "";

# The input file name
$fname_in = "";

# @pattern may hold a user supplied list of patterns used to generate file names.
# These patterns must be strings, possibly containing valid shell wild cards
# (e.g. *?[]) that will be used with the shell comand "ls $DATAPATH/$pattern"
# to create lists of file names of files currently on disk.
@pattern = ();

# A boolean flag to determine if the input file name is to be added
# to the output file list
$add_fin = 0;

# verbose controls the amount of info written to stdout
$verbose = 0;

# Store all non-option command line args in NonOpt
@NonOpt = ();

# The start and stop arrays will hold year/month in elements 0/1
@start = ();
@stop = ();

# mon_offset may be set by the user to modify the start/stop year/month
$mon_offset = 0;

# A boolean flag to determine if start and stop year/month are the only
# variables that are written to the output files.
$dates_only = 0;

# A boolean flag to determine if multi list output is allowed.
# This is on by default but the user may turn it off via the command line
# option --nomulti_list, in which case the program will abort if any command
# line option that would turn on multi list output is used.
# This is useful when this program is used in a script and it is desireable
# to have a gaurantee that multi list output will not be generated.
$allow_multi_list = 1;

# join_offset is used as the starting value (minus 1) of the counter
# "join" which is incremented each time a new fileN is added to the
# output list of files. This may be set by the user.
$join_offset = 0;

# A boolean flag to determine if a comment line of the form "#break" found
# in the input file (containing a list of file names) will result in a
# special fileN definition of the form "fileN=:break" being added to the
# output file list
$with_break = 0;

# A boolean flag to determine if a comment line of the form "#stop" found
# in the input file (containing a list of file names) will cause processing
# of that file to stop at that point.
$with_stop = 0;

# A boolean flag to determine if whitespace is to be removed
# from each line of the output file list
$with_space = 0;

# Boolean flags to determine if the prefix and/or suffix are to undergo
# variable name expansion in the same way a file name template would.
$expand_prefix = 1;
$expand_suffix = 1;

# If any of $list_months_max, $list_size_max or $list_number_max are greater
# than 0 then there will be (potentially) multiple output files, each of which
# is constrained by a number of months, a file size limit and/or a maximum
# number of files.
$list_months_max = 0;
$list_size_max   = 0;
$list_number_max = 0;

# A boolean flag to indicate that the requested files are found on cfs.
# In this case the individual file size of file pattern match must be determined
# using "lsarc" rather than "ls -Ll $DATAPATH/..."
$cfs_files = 0;

# cfs_file_size will contain a list of files found on the cfs and their sizes
# when $cfs_files is true.
undef %cfs_file_size;

# A boolean flag to indicate that the "others" dir on cfs is to be searched for
# files that match a pattern. This is only used when $cfs_files is true and
# a file name pattern is input.
$use_others_dir = 0;

# These arrays will hold lists of prefixes and suffixes to use in
# the output list of file names
# Each element of the @prefix or @suffix array is a white space separated list.
# Any prefix/suffix in this white space separated list may be
# modified by appending a + followed by a comma separated list of
# numbers (no white space is allowed within this modifier). Each
# number within the modifier list will correspond to a month (1-12)
# for which a file with this suffix is to be included. If the
# modifier exists for a particular suffix then only those months
# indicated in the modifier will be added to the file list.
@prefix = ();
@suffix = ();

# Initialize what we can using info from certain special variables that
# may or may not be found in the current environment
@envlist_in = qw(  current_year   current_month
                previous_year  previous_month
                    next_year      next_month
               run_start_year run_start_month
                run_stop_year  run_stop_month
             reset_start_year reset_end_year reset_stop_year
               runid uxxx suffix_list prefix_list
               cfsuser masterdir shortermdir others);
foreach ( @envlist_in ) {
  # If the environment variable is defined and non-null, assign it to the env hash
  if (defined $ENV{$_} and ( $ENV{$_} or $ENV{$_} =~ /^0+$/ )) {
    # The variable is ignored if it contains the special value "NotSet"
    if ($ENV{$_} !~ /^\s*NotSet\s*$/i) {
      $env{$_} = $ENV{$_};
    }
  }
}

# The start and stop year/month will be initialized from the environment
# under the following conditions
if ( defined $env{current_year} and defined $env{current_month}) {
  if (defined $env{previous_year} and defined $env{previous_month}) {
    $start_year  = $env{previous_year};
    $start_mon = $env{previous_month};
    $stop_year   = $env{current_year};
    $stop_mon  = $env{current_month};
    @start = ($start_year, $start_mon);
    @stop  = ($stop_year,  $stop_mon);
  }
  elsif (defined $env{next_year} and defined $env{next_month}) {
    $start_year  = $env{current_year};
    $start_mon = $env{current_month};
    $stop_year   = $env{next_year};
    $stop_mon  = $env{next_month};
    @start = ($start_year, $start_mon);
    @stop  = ($stop_year,  $stop_mon);
  }
}

# The single prefix defined in the env variable uxxx will be used, if present.
# This may be overridden by the env variable prefix_list
@prefix = ($env{uxxx}) if $env{uxxx};

# The variables prefix_list and suffix_list are strings containing embedded
# colons and whitespace which are interpreted as list delimiters.
# These strings are first separated into colon delimited lists and put into
# @prefix and @suffix. Later on each element of @prefix and @suffix is in turn
# converted to a whitespace delimited list. Each element of this second derived
# list is a single prefix or suffix (possibly modified by appending a "+"
# followed by a comma separated list of integers in the range 1-12).
# These individual (pre|suf)fixes are then iterated over to form the desired
# set of file names.
# Any prefix or suffix in these  lists may be modified by appending a +
# followed by a comma separated list of numbers (no white space is allowed
# within this modifier). Each number within the modifier list will correspond
# to a month (1-12) for which a file with this suffix is to be included.
# If the modifier exists for a particular suffix then only those months
# indicated in the modifier will be added to the file list.
@prefix  = split /\s*:\s*/,$env{prefix_list} if $env{prefix_list};
@suffix  = split /\s*:\s*/,$env{suffix_list} if $env{suffix_list};

## TODO ##
# Define variables to indicate the number of months so far and total number of
# months since run_start_(year/month) (see pdump for descriptions)
$months_to_date = 0;
$months_in_run  = 0;
$is_last_month  = 0;
print $months_to_date if 0;
print $months_in_run  if 0;
print $is_last_month  if 0;
## TODO ##

# Define a usage function
$Usage = sub {
  my ($msg)=@_;
  if ($msg) {print "${Runame}: $msg\n"};
  print <<EOR;
  Usage: $Runame [options] Output_File_Name [var=value ...]
Purpose: Create a file containing a list of file names that are
         constructed from user supplied info such as runid,
         start/stop year/months, prefix, suffix, etc.
Options:
  --in=fname           ...specify an input file containing the file list
  --pattern='patt'     ...specify a file name pattern, possibly containing wild cards
  --cfs_files          ...the files reside on cfs
                          This is meaningful only in conjunction with multi-lists or patterns
  --start=Y:M          ...specify the starting year and month
  --stop=Y:M           ...specify the stopping year and month
  --mon_offset=Integer ...specify a month offset for start and stop dates (default 0)
  --months_max=Integer ...output file lists will be constrained to have a maximum
                          number of months per list.  This implies multi-list output.
  --size_max=Nbytes    ...output file lists will be constrained to have a maximum total
                          size of Nbytes. This implies multi-list output.
  --number_max=Integer ...output file lists will be constrained to have a maximum
                          number of files per list. This implies multi-list output.
  --dates_only         ...only write definitions for start_year, start_mon, stop_year
                          and stop_mon to the output file list. No file names are written.
  --nomulti_list       ...abort if multi list output is requested
  --join=Integer       ...specify an offset for "join" (default 0)
  --with_break         ...process comment lines of the form "#break" found
                          in the input file, if any (on by default with multi-list output)
  --with_stop          ...process comment lines of the form "#stop" found
                          in the input file, if any (on by default with multi-list output)
  --with_space         ...do not remove white space from lines in the output
                          file list (default is to remove white space)
  --[no]expand_prefix  ...[do not] expand variable names in prefix internally
                          (expand prefix by default)
  --[no]expand_suffix  ...[do not] expand variable names in suffix internally
                          (expand suffix by default)
  --verbose            ...increase verbosity (additive)
  --help               ...show this usage info
EOR
  die "\n";
};

# Process command line arguments
$Getopt::Long::ignorecase = 0;
$Getopt::Long::order = $PERMUTE;
&GetOptions("help"           => \&$Usage,
            "verbose"        => sub {$verbose++},
            "in=s"           => \$fname_in,
            "pattern=s@"     => sub {push @pattern,split(/:/,$_[1])},
            "add_fin!"       => \$add_fin,
            "join=i"         => \$join_offset,
            "mon_offset=i"   => \$mon_offset,
            "months_max=i"   => \$list_months_max,
            "size_max=s"     => \$list_size_max,
            "number_max=i"   => \$list_number_max,
            "dates_only!"    => \$dates_only,
            "multi_list!"    => \$allow_multi_list,
            "with_break!"    => \$with_break,
            "with_stop!"     => \$with_stop,
            "with_space!"    => \$with_space,
            "expand_prefix!" => \$expand_prefix,
            "expand_suffix!" => \$expand_suffix,
            "cfs_files!"     => \$cfs_files,
            "others!"        => \$use_others_dir,
            "start=s"        => sub {@start = split /\s*:\s*/,$_[1]},
            "stop=s"         => sub {@stop  = split /\s*:\s*/,$_[1]},
            "<>"             => sub {push @NonOpt,$_[0]})
    or die "${Runame}: Error on command line.\n";

if ($verbose > 2) {
  print "Variables read from the environment:\n";
  foreach (sort keys %env) {
    print "$_ = $env{$_}\n" if defined $env{$_};
  }
  print "\n";
}

# Any non-option command line args should be a variable assignment
# of the form "var=value" or the output file name.
undef $fname_out;
foreach (@NonOpt) {
  next unless $_;
  # Strip any enclosing single or double quotes
  s/^'(.*?)'$/$1/;
  s/^"(.*?)"$/$1/;
  unless (/\w+=/) {
    # This is not a variable assignment
    if ($fname_out) {
      # There was more than one file name supplied on the command line
      die "${Runame}: Attempting to redefine output file name $fname_out as $_.\n";
    } else {
      # This is the output file name
      $fname_out = $_;
    }
    next;
  }
  my ($var,$val) = /^\s*(.*?)=(.*)/;
  # Strip quotes from the value, if any
  $val =~ s/^\s*"(.*)"\s*$/$1/;
  $val =~ s/^\s*'(.*)'\s*$/$1/;
  # Overwrite variable defs found in the env hash with their command line values
  # when variables of the same name also appear on the command line
  $env{$var} = "$val";
  if ($var eq "runid") {
    # Strip all whitespce from runid
    $env{$var} =~ s/\s+//g;
  }
  # uxxx and prefix_list will both reassign @prefix.
  # When both uxxx and prefix_list appear on the command line then the last
  # occurence will be used to assign @prefix. Similarly, when the same variable
  # appears more than once on the command line then the last occurence is used.
  if ($var eq "uxxx") {
    # Reassign the prefix list with this single value
    @prefix = ($val);
  }
  if ($var eq "prefix_list") {
    # Reassign prefix
    @prefix = split /\s*:\s*/,$val;
  }
  if ($var eq "suffix_list") {
    # Reassign suffix
    @suffix = split /\s*:\s*/,$val;
  }
}

if ($verbose > 2) {
  print "Input variable definitions:\n";
  print "fname_out=$fname_out\n";
  foreach (sort keys %env) {
    print "$_ = $env{$_}\n" if defined $env{$_};
  }
  print "\n";
}

&$Usage("An output file name is required on the command line\n")
  unless $fname_out;

# Reset the start or stop year according to the value of reset_start_year,
# reset_end_year or reset_stop_year supplied by the user.
if (defined $env{reset_start_year} and
     ( $env{reset_start_year} or $env{reset_start_year} =~ /^0+$/ )) {
  if ( defined $start[0] and ( $start[0] or $start[0] =~ /^0+$/ )) {
    # If reset_start_year is set then it must be of the form old_year:new_year
    # (ie a colon separated pair of integers) where the first integer is
    # the year that needs to be changed and the second integer is the year
    # that it will be changed to.
    $env{reset_start_year} =~ s/\s+//g;
    my ($old_year,$new_year) = split /\s*:\s*/, $env{reset_start_year};
    die "${Runame}: Invalid value for reset_start_year. --> $env{reset_start_year} <--\n"
      unless ($old_year =~ /^[-+]?\d+$/ and $new_year  =~ /^[-+]?\d+$/);
    # Redefine start year, but only if it is equal to $old_year
    if ($start[0] == $old_year) {
      $start[0]   = 1 * $new_year;
    }
  }
}

# Ensure that both reset_stop_year and reset_end_year have the same value
if ( $env{reset_stop_year} and $env{reset_end_year} ) {
  # If both reset_stop_year and reset_end_year are defined then
  # reset_stop_year will be used as the value of both
  $env{reset_end_year}  = $env{reset_stop_year};
} elsif ( $env{reset_stop_year} ) {
  # If only reset_stop_year is defined then use it as the value for both
  $env{reset_end_year} = $env{reset_stop_year};
} elsif ( $env{reset_end_year} ) {
  # If only reset_end_year is defined then use it as the value for both
  $env{reset_stop_year} = $env{reset_end_year};
}

if (defined $env{reset_stop_year} and
     ( $env{reset_stop_year} or $env{reset_stop_year} =~ /^0+$/ )) {
  if ( defined $stop[0] and ( $stop[0] or $stop[0] =~ /^0+$/ )) {
    # If reset_stop_year is set then it must be of the form old_year:new_year
    # (ie a colon separated pair of integers) where the first integer is
    # the year that needs to be changed and the second integer is the year
    # that it will be changed to.
    $env{reset_stop_year} =~ s/\s+//g;
    my ($old_year,$new_year) = split /\s*:\s*/, $env{reset_stop_year};
    die "${Runame}: Invalid value for reset_stop_year or reset_end_year. --> $env{reset_stop_year} <--\n"
      unless ($old_year =~ /^[-+]?\d+$/ and $new_year  =~ /^[-+]?\d+$/);
    # Redefine stop year, but only if it is equal to $old_year
    if ($stop[0] == $old_year) {
      $stop[0]   = 1 * $new_year;
    }
  }
}

# Specifying --dates_only together with --in=fname_in does not make sense.
if ($dates_only and $fname_in) {
  warn "${Runame}: Incompatible command line options --in=$fname_in and --dates_only found\n";
  warn "${Runame}: Ignoring --in=$fname_in\n";
  $fname_in = '';
}

# Checks on size and number limits for multi-list output files
die "${Runame}: Invalid value for command line option --months_max=$list_months_max.\n"
  unless $list_months_max =~ /^\d+$/;
die "${Runame}: Invalid value for command line option --number_max=$list_number_max.\n"
  unless $list_number_max =~ /^\d+$/;
die "${Runame}: Invalid value for command line option --size_max=$list_size_max\n"
  unless $list_size_max =~ /^\s*\d+\s*(k|m|g|t|p)?b?\s*$/i;

# Convert list_size_max from a string to a floating point number in bytes
($size_mult) = $list_size_max =~ /^\s*(\d+)/i;
# The numeric part of the --size_max option value must be integer
die "${Runame}: Invalid list_size_max=$list_size_max\n" unless $size_mult =~ /^\d+$/;
($size_units) = $list_size_max =~ /^\s*\d+\s*(.*?)\s*$/;
if ($size_units) {
  if ($size_units =~ /^\s*b\s*$/i) {
    $list_size_max = 1.0*$size_mult;
  } elsif ($size_units =~ /^\s*(k|kb)\s*$/i) {
    $list_size_max = 1024.0*$size_mult;
  } elsif ($size_units =~ /^\s*(m|mb)\s*$/i) {
    $list_size_max = (1024.0*1024.0)*$size_mult;
  } elsif ($size_units =~ /^\s*(g|gb)\s*$/i) {
    $list_size_max = (1024.0*1024.0*1024.0)*$size_mult;
  } elsif ($size_units =~ /^\s*(t|tb)\s*$/i) {
    $list_size_max = (1024.0*1024.0*1024.0*1024.0)*$size_mult;
  } elsif ($size_units =~ /^\s*(p|pb)\s*$/i) {
    $list_size_max = (1024.0*1024.0*1024.0*1024.0*1024.0)*$size_mult;
  } else {
    die "${Runame}: Invalid units on command line option --size_max=$list_size_max\n";
  }
} else {
  $list_size_max = 1.0*$size_mult;
}
undef $size_mult;
undef $size_units;

# Set a boolean flag to determine if the output list is to be divided into
# sub lists conditional on file size or file number constraints
if ($list_months_max > 0 or $list_size_max > 0 or $list_number_max > 0 or
    (scalar(@pattern) and $allow_multi_list) ) {
  # If any of the command line options --months_max=... --size_max=... --number_max=...
  # or --pattern=... are provided then multi-list output is requested.
  # This means that output file names will be of the form
  # ${fname_out}001, ${fname_out}002, ... with each output list limited to
  # a certain number of files by the months_max, size_max or number_max contraints.

  # Abort if the user has disallowed multi list output.
  unless ($allow_multi_list) {
    die "${Runame}: Multi list output is not allowed.\n"
  }

  $multi_list_output = 1;
  # In this case also treat "#break" and "#stop" comments in any input file
  # as special commands.
  $with_break = 1;
  $with_stop = 1;
} else {
  $multi_list_output = 0;
}

# flist will contain a list of lines for the output file
@flist = ();
my %uniq_flist;

if ($fname_in or scalar(@pattern)) {
  if ($fname_in) {
    # An input file name was supplied on the command line
    # Attempt to open this file and read its contents, from which the
    # output file list will be defined.

    open(IFILE, "<$fname_in")
      or die "${Runame}: Cannot open $fname_in for reading.\n";
    IREAD: while (<IFILE>) {
      chomp;
      next if /^\s*$/;
      if (/^\s*#/) {
        # This is a comment line
        # These lines are ignored except under the following conditions
        if ($with_break and /^\s*#\s*break\s*$/i) {
          # Add the special file name ":break"
          push @flist, ":break";
        }
        elsif ($with_stop and /^\s*#\s*stop\s*$/i) {
          # Stop appending file names to flist
          last IREAD;
        }
      } else {
        # Use the first white space separated word in this line as the file name
        my ($curr_fname) = /^\s*(\S+)/;
        unless ( $uniq_flist{$curr_fname} ) {
          # Filter out all duplicate file names from the user input list
          $uniq_flist{$curr_fname} = 1;
          push @flist, $curr_fname;
        }
      }
    }
    close (IFILE);

    if ( $cfs_files ) {
      # Initialize the cfs_file_size hash
      chomp(my $cfsuser = `whoami`);
      $cfsuser = $env{cfsuser} if defined $env{cfsuser};
      $cfsuser =~ s/\s*//g;
      my $masterdir = "off";
      $masterdir = $env{masterdir} if defined $env{masterdir};

      # Determine arguments to lsarc according to the input values
      # of masterdir, runid and cfsuser
      chomp(my $stamp = `date "+%j%H%M%S"`);
      my $hitcount = "hits_count_$stamp";
      my $lsarc = "lsarc --env --count_file=$hitcount";
      if ( defined $env{runid} ) {
        # We have a value for runid
        if ( $masterdir =~ /^\s*on\s*$/i ) {
          # Look for files associated with runid in the masterdir
          $lsarc .= " runid=$env{runid}";
        } else {
          # Look for files belonging to cfsuser
          $lsarc .= " --user=$cfsuser";
        }
      } else {
        # runid is not defined
        if ( $masterdir =~ /^\s*on\s*$/i ) {
          # Look for files associated with any runid in the masterdir
          # This is VERY expensive
          $lsarc .= " --global_master";
        } else {
          # Look for files belonging to cfsuser
          $lsarc .= " --user=$cfsuser";
        }
      }

      # Define every file found in flist in the current ENV as
      # file1=$flist[0], file2=$flist[1], ...
      my $nfile_in = 0;
      foreach (@flist) {
        $nfile_in++;
        my $key = sprintf("file%d",$nfile_in);
        $ENV{$key} = $_;
      }
      # Ensure that the next fileN has a null value to ensure that lsarc
      # will stop processing fileN values at the right time.
      # It is possible that the parent env for this script contains fileN
      # definitions for values of N greater than the length of the input list
      $nfile_in++;
      my $key = sprintf("file%d",$nfile_in);
      $ENV{$key} = '';

      # Run lsarc and process its output to get file sizes (in bytes)
      # for each file in flist that is found in the cfs database
      my @lsarc_out = `$lsarc 2>/dev/null`;
      chomp @lsarc_out;
      foreach (@lsarc_out) {
        my @line = split;
        next unless scalar(@line) >= 7;
        my $fname = $line[6];
        my $size  = $line[5];
        $cfs_file_size{$fname} = $size;
      }

      # The above lsarc command will have created a file containing the
      # number of hits found on cfs. This should be the same as the number
      # of files in flist
      my $hits = `cat $hitcount`;
      unlink $hitcount;
      # When there are duplicate file names in the user input file list this
      # condition will be true even when all (unique) files appear on cfs
      # To avoid this problem all duplicate file names were filtered from
      # the user input file list above (when the list is read in)
      if ( $hits != scalar(@flist) ) {
        # Abort if any requested files are missing from cfs
        my $n = 0;
        foreach ( @flist ) {
          $n++;
          if ($cfs_file_size{$_}) {
            printf "%5d Requested %-30s   ...found\n",$n,$_;
          } else {
            printf "%5d Requested %-30s   ...NOT FOUND\n",$n,$_;
          }
        }
        my $req = scalar(@flist);
        die "Unable to find all files requested. Requested $req  Found $hits \n";
      }
    }
  }

  if (scalar(@pattern)) {
    # The user has supplied one or more patterns
    if ( $cfs_files ) {
      # Match patterns against files that are found on the cfs
      foreach my $patt (@pattern) {
        chomp(my $cfsuser = `whoami`);
        $cfsuser = $env{cfsuser} if defined $env{cfsuser};
        $cfsuser =~ s/\s*//g;
        my $masterdir = "off";
        $masterdir = $env{masterdir} if defined $env{masterdir};

        # Determine arguments to lsarc according to the input values
        # of masterdir, runid and cfsuser
        my $lsarc = "lsarc";
        if ( defined $env{runid} ) {
          # We have a value for runid
          if ( $masterdir =~ /^\s*on\s*$/i ) {
            # Look for files associated with runid in the masterdir
            $lsarc .= " runid=$env{runid}";
          } else {
            # Look for files belonging to cfsuser
            $lsarc .= " --user=$cfsuser";
          }
        } else {
          # runid is not defined
          if ( $masterdir =~ /^\s*on\s*$/i ) {
            # Look for files associated with any runid in the masterdir
            # This is VERY expensive
            $lsarc .= " --global_master";
          } else {
            # Look for files belonging to cfsuser
            $lsarc .= " --user=$cfsuser";
          }
        }

        if ( defined $env{others} or $use_others_dir ) {
          # Look in the "others" dir on cfs
            $lsarc .= " --others";
        }

        # Add the current pattern to the lsarc command
        # Always anchor the pattern to the start of the file name and to the end
        # of the file name so that it appears more "ls" like
        $lsarc .= " ^$patt\$";

        if ($verbose > 10) {
          print "$lsarc\n";
        }

        # Run lsarc and process its output to get file names that match the current
        # pattern then push them onto flist and assign to the cfs_file_size hash
        my @lsarc_out = `$lsarc 2>/dev/null`;
        chomp @lsarc_out;
        foreach (@lsarc_out) {
          if ($verbose > 10) {
            print "$_\n";
          }
          my @line = split;
          next unless scalar(@line) >= 7;
          # The file size will be the 6th space separated field
          # The file name will be the 7th space separated field
          my $size  = $line[5];
          my $fname = $line[6];
          unless ( defined $cfs_file_size{$fname} ) {
            # Do not add duplicate names to flist
            push @flist,  $fname;
            $cfs_file_size{$fname} = $size;
          }
        }
      }
    } else {
      # Match patterns against files that are currently on disk
      foreach my $patt (@pattern) {
        my $lsout = `ls -1 $ENV{DATAPATH}/${patt}.[0-9][0-9][0-9] 2>/dev/null`;
        if ($lsout) {
          foreach (split /\s+/,$lsout) {
            # Ignore anything that does not begin with DATAPATH
            next unless m!^$ENV{DATAPATH}!;
            my ($fname) = m!^$ENV{DATAPATH}/?(.*?)(?:\.\d\d\d)?$!;
            # Assign these file names to a hash to remove duplicates
            $FNAME{$fname} = 1;
          }
          push @flist, sort keys %FNAME;
          undef %FNAME;
        } else {
          if ($verbose > -1) {
            warn "No files match the pattern --> $patt <--\n";
          }
        }
      }
    }
  }

  if ($verbose > 10) {
    print join("\n",@flist),"\n";
  }

} else {

  # Generate the file list internally

  # Set defaults for start/stop months (but no default for start/stop years)
  $start[1] = 1 unless $start[1];
  $stop[1] = 12 unless $stop[1];

  # Sanity checks for start and stop
  die "${Runame}: Start year is not set.\n"  unless defined $start[0];
  die "${Runame}: Start month is not set.\n" unless defined $start[1];
  die "${Runame}: Stop year is not set.\n"   unless defined $stop[0];
  die "${Runame}: Stop month is not set.\n"  unless defined $stop[1];
  foreach (@start) {
    next if /^\d+$/;
    die "${Runame}: Non integer start year or month. start = ",join(':',@start),"\n";
  }
  foreach (@stop) {
    next if /^\d+$/;
    die "${Runame}: Non integer stop year or month. stop = ",join(':',@stop),"\n";
  }
  if ($start[0] > $stop[0]) {
    die "${Runame}: Start year > stop year. ($start[0] > $stop[0])\n";
  }
  if ($start[1]<1 or $start[1]>12) {
    die "${Runame}: Start month is out of range. $start[1]\n";
  }
  if ($stop[1]<1  or $stop[1]>12) {
    die "${Runame}: Stop month is out of range. $stop[1]\n";
  }
  if ($start[0] == $stop[0] and $start[1] > $stop[1] ) {
    die "${Runame}: Start month > stop month. ($start[1] > $stop[1] in year $start[0])\n";
  }

  # Ensure mon_offset is an integer
  die "${Runame}: mon_offset is not an integer.\n" unless $mon_offset =~ /^[+-]?\d+$/;

  # Add mon_offset to start and stop dates if requested
  if ($mon_offset != 0) {
    if ($verbose > 2) {print "start dates:\n"}
    my $subOPTS = {VERBOSE => ($verbose > 2)?1:0};
    @start = new_year_mon($start[0], $start[1], $mon_offset, $subOPTS);
    if ($verbose > 2) {print "stop dates:\n"}
    @stop  = new_year_mon($stop[0],  $stop[1],  $mon_offset, $subOPTS);
  }

  # Create formatted version of start/stop year/month and add these
  # to the env hash so they get expanded as nessecary and so that
  # they are available to the subroutine write_dates
  $start_year  = sprintf("%3.3d",$start[0]);
  $start_mon = sprintf("%2.2d",$start[1]);
  $stop_year   = sprintf("%3.3d",$stop[0]);
  $stop_mon  = sprintf("%2.2d",$stop[1]);
  $env{start_year} = $start_year;
  $env{start_mon}  = $start_mon;
  $env{stop_year}  = $stop_year;
  $env{stop_mon}   = $stop_mon;

  # Define days_in_job from the values of (start|stop)_(year|mon)
  $env{days_in_job} = def_days_in_job();

  if ($dates_only) {
    # The only thing that will go into the output file is
    # start_year, start_mon, stop_year and stop_mon
    # Simply write these dates to the output file and quit.
    open(OFILE, ">$fname_out")
      or die "${Runame}: Cannot open $fname_out for output\n";
      write_dates(*OFILE);
    close(OFILE);
    exit;
  }

  # Sanity checks for prefix and suffix
  unless (scalar(@prefix)) {
    print "prefix_list = ",join(':',@prefix),"\n";
    print "suffix_list = ",join(':',@suffix),"\n";
    die "${Runame}: Empty prefix list.\n";
  }

  # Determine if the prefix is composed entirely of templates
  $template_only = 1;
  foreach (@prefix) {
    my $pfx_list = $_;
    # Strip leading and trailing whitespace
    $pfx_list =~ s/^\s*//;
    $pfx_list =~ s/\s*$//;
    die "Empty prefix list\n" unless $pfx_list;
    # Split each prefix list on whitespace
    foreach (split /\s+/,$pfx_list) {
      $template_only = 0 unless /^\s*%/;
    }
  }

  # Do not enforce the following if the prefix contains nothing but
  # file name templates because the suffix is never used in this case
  unless ($template_only) {
    unless (scalar(@suffix)) {
      print "prefix_list = ",join(':',@prefix),"\n";
      print "suffix_list = ",join(':',@suffix),"\n";
      die "${Runame}: Empty suffix list.\n";
    }
    unless (scalar(@prefix) == scalar(@suffix)) {
      print "prefix_list = ",join(':',@prefix),"\n";
      print "suffix_list = ",join(':',@suffix),"\n";
      die "${Runame}: Incompatible prefix and suffix list lengths.\n";
    }
  }

  die "${Runame}: A value for runid is required.\n" unless $env{runid};

  if ($verbose > 1) {
    print "uxxx=$env{uxxx};\n" if $env{uxxx};
    print "runid=$env{runid};\n";
    print "start = ",join(':',@start),"\n" if scalar(@start);
    print "stop  = ",join(':',@stop),"\n" if scalar(@stop);
    print "prefix_list = ",join(':',@prefix),"\n" if scalar(@prefix);
    print "suffix_list = ",join(':',@suffix),"\n" if scalar(@suffix);
  }

  # Create the file list

  $months_total = 0;
  for ($year = $start_year; $year <= $stop_year; $year += 1) {
    my $yfmt= sprintf("%3.3d",$year);
    if ($year == $start_year) {
      $m1 = $start_mon;
    } else {
      $m1 = 1;
    }
    if ($year == $stop_year) {
      $m2 = $stop_mon;
    } else {
      $m2 = 12;
    }
    for ($mon = $m1; $mon <= $m2; $mon += 1) {
      my $mfmt= sprintf("%2.2d",$mon);
      # Count the total number of months
      $months_total += 1;
      if ($list_months_max > 0) {
        if ($months_total > 1) {
          if ($list_months_max == 1 or $months_total % $list_months_max == 1) {
            # Insert a ":break" to start a new list
            push @flist, ":break";
          }
        }
      }
      # Loop over prefix lists found in @prefix.
      # For each prefix list found in @prefix there must be a corresponding suffix
      # list found in @suffix (e.g. prefix[N] is used with suffix[N])
      my $npfx = -1;
      foreach (@prefix) {
        $npfx++;
        my $pfx_list = $_;
        # Strip leading and trailing whitespace
        $pfx_list =~ s/^\s*//;
        $pfx_list =~ s/\s*$//;
        die "Empty prefix list\n" unless $pfx_list;
        # Split each prefix list found in prefix on whitespace
        PREFIX: foreach (split /\s+/,$pfx_list) {
          # Do not overwrite the value in the prefix list
          my $pfx = $_;
          # Strip any month list from pfx
          my ($pfx_mlist) = $pfx =~ /^.*?[+](.*)/;
          my @pfx_mlist = split /\s*,\s*/,$pfx_mlist if $pfx_mlist;
          $pfx =~ s/^(.*?)[+].*/$1/;
          if (scalar @pfx_mlist) {
            # Only a subset of months will be used
            my $use_month = 0;
            foreach (@pfx_mlist) {
  	    if ($_ == $mon) {
                $use_month = 1;
                last;
              }
            }
            next PREFIX unless $use_month;
          }

          # If the prefix begins with the char "%" then it is assumed to be
          # a file name template and is used as the entire file name
          if ($pfx =~ s/^\s*%(.*)$/$1/) {
            # Use the prefix as a file name template
            die "Empty prefix template found in --> $pfx_list <--\n" unless $pfx;
            # Substitute values for certain variables in this template
            # Replace "year" and "mon" with values defined in this loop
            # Replace any variables defined on the command line or exported
            # from the invoking scripts environment with their respective values
            $name = expand_vars($pfx, $yfmt, $mfmt);
            push @flist, $name;
            if ($verbose > 3) {
              print "pfx=$pfx  name=$name\n";
            }
            next PREFIX;
	  }

          if ($expand_prefix) {
            # Note that start_year, start_mon, stop_year, stop_mon will
            # always be expanded when the output file name is sourced
            $pfx = expand_vars($pfx, $yfmt, $mfmt);
          }

          # Use the corresponding suffix list in suffix
          my $sfx_list = $suffix[$npfx];
          # Strip leading and trailing whitespace
          $sfx_list =~ s/^\s*//;
          $sfx_list =~ s/\s*$//;
          die "Empty suffix list\n" unless $sfx_list;
          # Split this suffix list on whitespace
          SUFFIX: foreach (split /\s+/,$sfx_list) {
            # Do not overwrite the value in the suffix list
            my $sfx = $_;
            # Strip any month list from sfx
            my ($sfx_mlist) = $sfx =~ /^.*?[+](.*)/;
            my @sfx_mlist = split /\s*,\s*/,$sfx_mlist if $sfx_mlist;
            $sfx =~ s/^(.*?)[+].*/$1/;
            if ($verbose > 3) {
              print "year=$year  mon=$mon  pfx=$pfx  sfx=$sfx   sfx_mlist=@sfx_mlist\n";
            }
            if (scalar @sfx_mlist) {
              # Only a subset of months will be used
              my $use_month = 0;
              foreach (@sfx_mlist) {
  	      if ($_ == $mon) {
                  $use_month = 1;
                  last;
                }
              }
              next SUFFIX unless $use_month;
            }

            if ($expand_suffix) {
              # Note that start_year, start_mon, stop_year, stop_mon will
              # always be expanded when the output file name is sourced
              $sfx = expand_vars($sfx, $yfmt, $mfmt);
            }

            my  $name = "${pfx}_$env{runid}_${yfmt}_m${mfmt}_$sfx";
            push @flist, $name;
            if ($verbose > 3) {
              print "Added name $name\n";
            }
          }
        }
      }
    }
  }
}

# At this point there should be at least 1 element in @flist
die "Unable to create the output file list.\n" unless scalar(@flist);

# Create a single file containing the file list created above or multiple
# files, each of which contains a subset of this file list.
# Assume the file(s) will be "sourced" in a shell script so that the shell
# variables file1, file2, ... define all the files in this list

if ($multi_list_output) {
  # Break @flist into sub lists such that the total size of all files in each
  # sublist is less than or equal to $list_size_max (in bytes) and/or the total
  # number of files in each sub list is less than or equal to $list_number_max

  # A hash containing the output file name and the list of files to put into
  # that file, for each sublist generated, is added to the @sublists array.
  my @sublists = ();
  $curr_size = 0.0;
  $sublist_size=0.0;
  $join=0;
  $nlist = 1;
  $sublist = {};
  $sublist->{file_name} = sprintf("%s%3.3d",$fname_out,$nlist);
  $sublist->{file_list} = ();
  $sublist->{list_size} = 0.0;
  if ($add_fin and $fname_in) {
    # Insert the input file name as the first file in the first list
    # Note that $fname_in must not be a local file name but rather the file name
    # under which it was "save"d, since it will most likely be "access"ed by the
    # same name when the file list is finally processed by the invoking script.
    # It will also be "access"ed here when $list_size_max > 0.
    $join++;
    $line = sprintf("file%d=%s",$join,$fname_in);
    $line =~ s/\s+//g unless $with_space;
    push @{$sublist->{file_list}}, $line;
    if ($list_size_max > 0) {
      # Add file size for $fname_in
      $curr_size = get_file_size($fname_in);
      $sublist_size += $curr_size;
    }
  }
  foreach $curr_file (@flist) {
    $join++;
    $line = sprintf("file%d=%s",$join,$curr_file);
    $line =~ s/\s+//g unless $with_space;
    push @{$sublist->{file_list}}, $line;
    if ($list_size_max > 0) {
      # Determine cumulative file size for this sub list
      unless ($curr_file =~ /^\s*:/) {
        # Do not attempt to find the size if this is an embedded command
        $curr_size = get_file_size($curr_file);
        $sublist_size += $curr_size;
      }
    } else {
      $curr_size = 0.0;
      $sublist_size=0.0;
    }
    if ($verbose > 10) {
      print "$join  curr_size=$curr_size  sublist_size=$sublist_size";
      print "  list_size_max=$list_size_max  file=$curr_file\n";
    }
    $start_new_sublist = 0;
    if ($list_number_max > 0 and $join == $list_number_max) {
      # Exceeded maximum number of files
      $start_new_sublist = 2;
    }
    if ($curr_file =~ /^\s*:break/) {
      # The user has inserted an embedded break command
      $start_new_sublist = 3;
    } elsif ($curr_file =~ /^\s*:/) {
      # The user has inserted an unknown embedded command
      die "${Runame}: Invalid embedded command --> $curr_file <--\n";
    }
    if ($list_size_max > 0 and $sublist_size > $list_size_max) {
      # Exceeded maximum total size of files
      $start_new_sublist = 1;
    }
    if ($start_new_sublist) {
      # Store the current sub list and start a new sub list
      if ($start_new_sublist == 1) {
        # The maximum size has been exceeded
        # pop the last line of the current list and use it as the first
        # line of the next sub list
        $last_line = pop @{$sublist->{file_list}};
        # Decrease sublist_size and join to reflect the removal of the last
        # file name from the current list
        $sublist_size -= $curr_size;
        $join--;
      } elsif ($start_new_sublist == 3) {
        # The user has inserted an embedded break command
        # Pop off the last entry containing the command string (e.g. ":break")
        pop @{$sublist->{file_list}};
        # Decrement join to reflect the removal of the last line
        $join--;
        $last_line = '';
      } else {
        $last_line = '';
      }
      $sublist->{list_size} = $sublist_size;
      $line = sprintf("join=%d",$join);
      $line =~ s/\s+//g unless $with_space;
      push @{$sublist->{file_list}}, $line;
      if ($join > 0) {
        push @sublists, $sublist;
      } else {
        # This can happen when $list_size_max is less than the
        # size of the first file in @flist
        $nlist--;
      }
      $sublist_size=0.0;
      $join = 0;
      $nlist++;
      undef $sublist;
      $sublist = {};
      $sublist->{file_name} = sprintf("%s%3.3d",$fname_out,$nlist);
      $sublist->{file_list} = ();
      $sublist->{list_size} = 0.0;
      if ($last_line) {
        # Start the next sub list with the last line of the previous list
        $join = 1;
        $sublist_size = $curr_size;
        $line = sprintf("file%d=%s",$join,$curr_file);
        $line =~ s/\s+//g unless $with_space;
        push @{$sublist->{file_list}}, $line;
      }
    }
  }
  if ($sublist->{file_list} and scalar(@{$sublist->{file_list}})) {
    # There is a partial sub list that has not been added to @sublists
    if ($list_size_max > 0) {
      $sublist->{list_size} = $sublist_size;
    }
    unless (@{$sublist->{file_list}}[-1] =~ /^\s*join=/) {
      # Append a "join=..." line to the current sub list
      # if it does not already contain one
      $line = sprintf("join=%d",$join);
      $line =~ s/\s+//g unless $with_space;
      push @{$sublist->{file_list}}, $line;
    }
    push @sublists, $sublist;
  }

  if ($verbose > 4) {
    foreach (@sublists) {
      print "file_name = ",$_->{file_name},"\n";
      print "list_size = ",$_->{list_size},"\n";
      print "file_list =\n",join("\n",@{$_->{file_list}}),"\n";
    }
  }

  # Create the output files
  foreach my $list (@sublists) {
    my $fout = $list->{file_name};
    my $list_size = $list->{list_size};
    $list_size_gb = sprintf("%6.4g", (1.0*$list_size)/1073741824.0);
    open(OFILE, ">$fout") or die "${Runame}: Cannot open $fout for output\n";
      # Add variable defs for start_year, start_mon, stop_year, stop_mon, ...
      write_dates(*OFILE);
      # Write the file list variable defs
      print OFILE join("\n",@{$list->{file_list}}),"\n";
      print OFILE "# Total size of all files in this list: $list_size  ($list_size_gb GB )\n" if $list_size;
    close(OFILE);
    if ($verbose > 0) {
      print "$fout";
      if ($list_size_max > 0) {
        print " contains $list->{list_size} bytes\n";
      } else {
        print "\n";
      }
      print `cat $fout`,"\n";
    }
  }

} else {

  # A single output file will be created that contains all files
  # in the global file list @flist
  open(OFILE, ">$fname_out")
    or die "${Runame}: Cannot open $fname_out for output\n";

  # Add variable defs for start_year,start_mon,stop_year,stop_mon, ...
  write_dates(*OFILE);

  my $line = '';
  $join=$join_offset;
  if ($add_fin and $fname_in) {
    # Insert the input file name as the first file in the list
    # Note that $fname_in must not be a local file name but rather the file name
    # under which it was "save"d, since it will most likely be "access"ed by the
    # same name when the file list is finally processed by the invoking script.
    $join++;
    $line = sprintf("file%d=%s",$join,$fname_in);
    $line =~ s/\s+//g unless $with_space;
    print OFILE "$line\n";
  }
  foreach (@flist) {
    # Ignore file names that begin with the char ":"
    # These are intended to mark embedded commands (e.g. ":break") related to
    # subdividing the list (done in the opposite branch of this if block)
    next if /^\s*:/;
    $join++;
    $line = sprintf("file%d=%s",$join,$_);
    $line =~ s/\s+//g unless $with_space;
    print OFILE "$line\n";
  }
  $line = sprintf("join=%d",$join);
  $line =~ s/\s+//g unless $with_space;
  print OFILE "$line\n";
  close(OFILE);

  if ($verbose > 0) {
    print `cat $fname_out`,"\n";
  }
}

exit 0;
########################################################
##################### End of main ######################
########################################################

sub expand_vars {
  # Substitute values for certain variables into the input file name template.
  #
  # Replace "year" and "mon" with the input values of yfmt and mfmt.
  # Replace any variables defined on the command line or exported from
  # the invoking scripts environment with their respective values.
  use strict;
  use File::Basename;
  my $name_template = shift;
  my $yfmt = shift;
  my $mfmt = shift;

  # Create a tag to use with error diagnostics
  my ($package, $file, $line, $subname) = caller(0);
  my $errID = basename($file)."::${subname}:";

  my $name = $name_template;
  my @vars = $name =~ /\$\{(\w+)\}|\$(\w+)/g;
  foreach my $var (@vars) {
    next unless $var;
    die "$errID Invalid variable name \"$var\" found in $name_template\n"
      if $var =~ /^\d/;
    if ($var eq "year") {
      # Replace any occurrence of "year" with $yfmt
      $name =~ s/\$(\{year\}|year(?!\w))/$yfmt/g;
    } elsif ($var eq "mon") {
      # Replace any occurrence of "mon" with $mfmt
      $name =~ s/\$(\{mon\}|mon(?!\w))/$mfmt/g;
    } elsif ($var eq "end_year") {
      # Replace any occurrence of "end_year" with the value of stop_year
      # This is for backward compatability
      $name =~ s/\$(\{end_year\}|end_year(?!\w))/$env{stop_year}/g;
    } elsif ($var eq "end_mon") {
      # Replace any occurrence of "end_mon" with the value of stop_mon
      # This is for backward compatability
      $name =~ s/\$(\{end_mon\}|end_mon(?!\w))/$env{stop_mon}/g;
    } elsif (exists $env{$var} and defined $env{$var}) {
      # Check the env hash first because these variables may be modified
      # by this script or replaced with values from the command line
      $name =~ s/\$(\{$var\}|$var(?!\w))/$env{$var}/g;
    } elsif (exists $ENV{$var} and defined $ENV{$var}) {
      # If not in the env hash then check the invoking scripts env
      $name =~ s/\$(\{$var\}|$var(?!\w))/$ENV{$var}/g;
    }
  }
  # Any variables that remain in the template will be left "as is"
  # It is assumed that the user will supply these variable values
  # prior to the output script being "sourced".
  # If any variables remain unexpanded they should be of the form
  # "${var}" rather than of the form "$var" because it is possible,
  # under certain conditions, that when $var is finally expaned will
  # not happen as expected.
  # If "$var" is followed immediately by another variable that does
  # get expanded then the template will contain "$varXX" where "XX"
  # is the value of the second variable. This will (most likely) result
  # in an apparent variable name of "varXX" which is not what the
  # user intended. If curly brackets are used in the original template
  # then this scenario will not occur.

  # Return the input file name template with variables expanded.
  return wantarray ? ($name) : $name;
}


sub new_year_mon {
  # Given a year and month together with a month offset determine the
  # new year and month resulting from the month offset being applied.
  use strict;
  use File::Basename;
  # Input current year, month and an offset in months
  my $year = shift;
  my $mon  = shift;
  my $mon_offset  = shift;
  my $OPTS = shift;

  my $verbose = 0;
  if (defined $OPTS->{VERBOSE}) {$verbose = $OPTS->{VERBOSE}};

  # error checking
  my ($package, $file, $line, $subname) = caller(0);
  my $errID = basename($file)."::${subname}:";
  die "$errID Year is not a positive integer.\n"  unless $year =~ /^\d+$/;
  die "$errID Month is not a positive integer.\n" unless $mon =~ /^\d+$/;
  die "$errID Month=$mon is out of range.\n"      if ($mon<1 or $mon>12);
  die "$errID Month offset is not an integer.\n"  unless $mon_offset =~ /^[+-]?\d+$/;

  # Determine the new year and month
  my $new_mon  = $mon + $mon_offset;
  my $new_year = $year + int(($new_mon-1)/12);
  if ($new_mon < 1) {$new_year = $year - 1 + int(($new_mon)/12)};
  if ($new_mon < 1 or $new_mon > 12) {$new_mon = 1 + ($new_mon-1)%12};

  if ($verbose > 0) {
    printf "  mon_offset=%2d\n",$mon_offset;
    printf "      year=%4d      mon=%2d\n",$year,$mon;
    printf "  new_year=%4d  new_mon=%2d\n",$new_year,$new_mon;
  };

  # Return the new year and month
  return wantarray ? ($new_year, $new_mon) : undef;
}

sub what_som_eom {
  # Determine the day of the year at the start and the end a given month
  use strict;
  use File::Basename;
  my $mon = shift;

  my $som = 0;
  my $eom = 0;
  if (      $mon ==  1 ) {
    $som =  1;
    $eom = 31;
  } elsif ( $mon ==  2 ) {
    $som = 32;
    $eom = 59;
  } elsif ( $mon ==  3 ) {
    $som = 60;
    $eom = 90;
  } elsif ( $mon ==  4 ) {
    $som =  91;
    $eom = 120;
  } elsif ( $mon ==  5 ) {
    $som = 121;
    $eom = 151;
  } elsif ( $mon ==  6 ) {
    $som = 152;
    $eom = 181;
  } elsif ( $mon ==  7 ) {
    $som = 182;
    $eom = 212;
  } elsif ( $mon ==  8 ) {
    $som = 213;
    $eom = 243;
  } elsif ( $mon ==  9 ) {
    $som = 244;
    $eom = 273;
  } elsif ( $mon == 10 ) {
    $som = 274;
    $eom = 304;
  } elsif ( $mon == 11 ) {
    $som = 305;
    $eom = 334;
  } elsif ( $mon == 12 ) {
    $som = 335;
    $eom = 365;
  } else {
    my ($package, $file, $line, $subname) = caller(0);
    my $errID = basename($file)."::${subname}:";
    die "$errID month=$mon is out of range.\n";
  }

  # Return start and end day of the year for the current month
  return wantarray ? ($som, $eom) : "${som}:${eom}";
}

sub def_days_in_job {
  # From the values of start_year, start_mon, stop_year and stop_mon found
  # in the env hash determine the total number of days from the start of
  # start_year/start_mon to the end of stop_year/stop_mon
  use strict;
  use File::Basename;

  my $OPTS = shift;
  my $verbose = 0;
  if (defined $OPTS->{VERBOSE}) {$verbose = $OPTS->{VERBOSE}};

  # error checking
  my ($package, $file, $line, $subname) = caller(0);
  my $errID = basename($file)."::${subname}:";
  die "$errID start_year is not defined.\n"  unless defined $env{start_year};
  die "$errID start_mon is not defined.\n"  unless defined $env{start_mon};
  die "$errID stop_year is not defined.\n"  unless defined $env{stop_year};
  die "$errID stop_mon is not defined.\n"  unless defined $env{stop_mon};

  # Determine the number of days
  my $days = 0;

  # The first and last day of the year for both start and stop months
  my ($som_start, $eom_start) = what_som_eom($env{start_mon});
  my ($som_stop, $eom_stop)   = what_som_eom($env{stop_mon});
  if ( $env{start_year} == $env{stop_year} ) {
    if ( $env{start_mon} > $env{stop_mon} ) {
      $days = $eom_start - $som_stop + 1;
    } else {
      # stop_mon > start_mon
      $days = $eom_stop - $som_start + 1;
    }
  } elsif ( $env{start_year} > $env{stop_year} ) {
    $days = (366-$som_stop) + $eom_start + 365*($env{start_year}-1-$env{stop_year});
  } else {
    # stop_year > start_year
    $days = (366-$som_start) + $eom_stop + 365*($env{stop_year}-1-$env{start_year});
  }

  if ($verbose > 0) {
    printf "  days=%2d\n",$days;
  }

  # Return the number of days
  return wantarray ? ($days) : $days;
}

sub write_dates {
  # Write definitions for variables defining year/month start and stop times
  # to the output file, if these variables are defined in the env hash.
  use strict;
  my $outfh = shift;

  # Create a tag to use with error diagnostics
  my ($package, $file, $line, $subname) = caller(0);
  my $errID = basename($file)."::${subname}:";

  $line='';
  if (defined $env{start_year} and defined $env{start_mon}) {
    # Add start_year and start_mon to the list
    $line = sprintf("start_year=%3.3d",$env{start_year});
    print $outfh "$line\n"
        or die "$errID WRITE ERROR start_year=$env{start_year}\n";
    $line = sprintf("start_mon=%2.2d",$env{start_mon});
    print $outfh "$line\n"
        or die "$errID WRITE ERROR start_mon=$env{start_mon}\n";
  }
  if (defined $env{stop_year} and defined $env{stop_mon}) {
    # Add stop_year and stop_mon to the list
    $line = sprintf("stop_year=%3.3d",$env{stop_year});
    print $outfh "$line\n"
        or die "$errID WRITE ERROR stop_year=$env{stop_year}\n";
    $line = sprintf("stop_mon=%2.2d",$env{stop_mon});
    print $outfh "$line\n"
        or die "$errID WRITE ERROR stop_mon=$env{stop_mon}\n";
    # Also add end_year and end_mon for backward compatability
    $line = sprintf("end_year=%3.3d",$env{stop_year});
    print $outfh "$line\n"
        or die "$errID WRITE ERROR end_year=$env{stop_year}\n";
    $line = sprintf("end_mon=%2.2d",$env{stop_mon});
    print $outfh "$line\n"
        or die "$errID WRITE ERROR end_mon=$env{stop_mon}\n";
  }
  if (defined $env{days_in_job}) {
    # Add days_in_job to the list
    $line = sprintf("days_in_job=%d",$env{days_in_job});
    print $outfh "$line\n"
        or die "$errID WRITE ERROR days_in_job=$env{days_in_job}\n";
  }

  # Return void
  return wantarray ? () : undef;
}

sub get_file_size {
  # Determine the size, in bytes, of a file that is saved on DATAPATH
  use strict;
#  use Shell qw(ls);
  my $fname = shift;

  # Create a tag to use with error diagnostics
  my ($package, $file, $line, $subname) = caller(0);
  my $errID = basename($file)."::${subname}:";

  my $size = 0;
  if ( $cfs_files ) {
    # Get file size for files found on cfs from a previously defined hash
    $size = $cfs_file_size{$fname};
    $size = 1.0 * $size;
  } else {
    # List all versions (same file but different edition numbers) of $fname
    # that are found in DATAPATH, resolving links to RUNPATH so that we get
    # the true size of these files.
    # stderr from this ls is dumped to /dev/null. Errors may result when
    # the file is not on DATAPATH or link targets are missing. We trap errors by
    # determining if anything at all was returned, avoiding extraneous output.
#    my @lsline = ls("-Ll","$ENV{DATAPATH}/${fname}.[0-9][0-9][0-9]","2>/dev/null");
    my @lsline = `ls -Ll $ENV{DATAPATH}/${fname}.[0-9][0-9][0-9] 2>/dev/null`;
    warn "$errID $fname is missing from $ENV{DATAPATH} or is empty\n" unless scalar(@lsline);
    die "$errID Unable to determine size of $fname\n" unless scalar(@lsline);
    if ($verbose > 10) {
      print "lsline:\n",join("\n",@lsline),"\n";
    }
    chomp @lsline;
    $size = (split(/\s+/,pop @lsline))[4];
    $size = 1.0 * $size;
  }

  if ($verbose > 10) {
    print "fname=$fname   size=$size\n";
  }

  # Return file size in bytes
  return wantarray ? ($size) : $size;
}
