#!/usr/bin/perl

use Fcntl;

$nb_max_cmds = 1;   # Nombre maximum de commandes tournees en parallele
$exit_status = 0;
%filenames = ();
%retrieve_status = ();
%file_list_split = ();
%file_list_host = ();
#$max_cmd_length = 19000; # Longueur maximale qu'une commande peut avoir (voir avec systune (ncargs=20480))
$max_cmd_length = 9500; # Longueur maximale qu'une commande rsh peut avoir (10K)
# $shell_cmd="rsh";
$shell_cmd="ssh -o NoneSwitch=yes";
$cfsrip_host = "cfsrip";

%file_servers = (
	"cfs_cmo" => cfs1,
	"logs" => cfs1,
	"cfs_cmis" => cfs1,
	"cfs_torard" => cfs1,
	"cfs_ccrd" => cfs2,
	"backup" => cfs2,
	"cfs_cmd" => cfs2,
	"cfs_dorard" => cfs2
	);

&get_arguments();

&setup_manager();


#$* = 1;

foreach $chunk_id (sort keys %file_list_split) {
   open(CFSRIPD, "$shell_cmd $file_list_host{$chunk_id} -n cfsripd $file_list_split{$chunk_id} |") || die "Unable to retrieve files using cfsripd command: $!\n";

   # On attrape les fichiers (messages) au fur et a mesure qu'ils sont prets
   # a etre traites (i.e. qu'ils ont une copie sur disque)
   # Les messages ont la forme suivante:
   #
   # 1) Fichier a ete recharge sur disque:
   # FS0347 29 526237 fsretrieve interim: File FICHIER_PATH_ABSOLU has been retrieved.
   # 2) Fichier est deja present sur disque:
   # FS0005 29 526241 fsretrieve interim: No retrieve needed, the data is already present on the disk for file FICHIER_PATH_ABSOLU.
   # 3) Fichier n'est pas sous le controle de FileServ (mais existe):
   # FS0046 07 00210 fsretrieve interim: I/O error received from system call. <FICHIER_PATH_ABSOLU>
   # 4) Fichier existe mais est vide (donc pas de copie sur tape):
   # FS0066 07 00212 fsretrieve interim: Request to retrieve file with length of zero bytes. <FICHIER_PATH_ABSOLU>
   # 5) Fichier n'existe pas:
   # FS0054 29 526244 fsretrieve interim: File does not exist. <FICHIER_PATH_ABSOLU>
   # ou
   # FS0040 19 00335 fsretrieve interim: Directory pathname specified does not exist. <FICHIER_PATH_ABSOLU>
   # 6) Fichier pas permis en lecture:
   # FS0042 19 00392 fsretrieve interim: User does not have access to file or directory. <FICHIER_PATH_ABSOLU>
   # 7) Le fichier specifie est en fait un repertoire (doit absolument etre un "vrai fichier"):
   # FS0098 04 13197529 fsretrieve interim: Pathname specified is a directory, file must be specified. <FICHIER_PATH_ABSOLU>
   # 8) Pas assez d'espace de libre.
   # FS0004 23 10572646 fsretrieve interim: Not enough disk space available to retrieve file <FICHIER_PATH_ABSOLU>.
   #
   while (<CFSRIPD>) {

     if ( /^FS0347.*\s(\S+)\shas been retrieved/m ) {
        &execute_command($1);
     } elsif ( /^FS0005.*\s(\S+)\./m ) {
        &execute_command($1);
     } elsif ( /^FS0046.*<(\S+)>/m ) {
        &execute_command($1);
     } elsif ( /^FS0066.*<(\S+)>/m ) {
        &execute_command($1);
     } elsif ( /^FS0054.*<(\S+)>/m ) {
        $retrieve_status{$1} = "No such file or directory.";
        $exit_status = 1;
     } elsif ( /^FS0040.*<(\S+)>/m ) {
        $retrieve_status{$1} = "No such file or directory.";
        $exit_status = 1;
     } elsif ( /^FS0042.*<(\S+)>/m ) {
        $retrieve_status{$1} = "User does not have access to file or directory.";
        $exit_status = 1;
     } elsif ( /^FS0098.*<(\S+)>/m ) {
        $retrieve_status{$1} = "Pathname specified is a directory, file must be specified.";
        $exit_status = 1;
     } elsif ( /^FS0004.*Not enough disk space available to retrieve file (\S+)\./m ) {
        $retrieve_status{$1} = "Not enough disk space available to retrieve file.";
        $exit_status = 1;
     } else { # erreur
        die "$_\n";
     }
   }
   close(CFSRIPD);

}

close(PIPE);

while ( wait() != -1 ) {
}

foreach (keys %retrieve_status) {
  if ( !($retrieve_status{$_} =~ /Retrieved/) ) {
    print STDERR "$filenames{$_} : $retrieve_status{$_}\n";
  }
}

exit($exit_status);



# -----------------------------------------------------------------------
# On envoit la commande dans un pipe (i.e. au process qui s'occupe de
# le nombre de commandes en parallele.
#
sub execute_command {

  local( $filename ) = @_;
  local( $cmd_to_exec ) = $cmd_str;

  my @tmp_filename_array = ();
  my $trailing_filename;

  # Replace basename{} with the trailing filename
  @tmp_filename_array = split(/\//, $filenames{$filename});
  $trailing_filename = pop(@tmp_filename_array);
  $cmd_to_exec =~ s/basename\s*{}/$trailing_filename/g;

  # Replace {} with the filename
  $cmd_to_exec =~ s/{}/$filenames{$filename}/g;

  $retrieve_status{$filename} = "Retrieved";

  print(PIPE "$cmd_to_exec\n");
}



# -----------------------------------------------------------------------
# On demarre un child process qui s'occupera d'executer et de controler
# le nombre de commandes en parallele.
# On utilise un pipe entre les deux process afin de pouvoir informer
# le "manager" (child process) des commandes a executer.
#
sub setup_manager {
  local($nb_running_proc) = 0;
  local($pid);
  local($cmd);

  $pid = open(PIPE, "|-");

  unless ($pid) {  # Dans le child process (manager)
    while ($cmd = <STDIN>) { #On lit les commandes du pipe
      chop $cmd;
      if ($nb_running_proc >= $nb_max_cmds) { #Controle du nb de cmd
        wait();
        $nb_running_proc--;
      }

      $nb_running_proc++;
      # On execute la commande dans un child process afin de pouvoir en
      # avoir plusieurs en parallele, et egalement sous le shell restrictif
      # afin d'executer seulement ce qu'on a le droit de faire.
      unless (fork) {
        fcntl STDIN, &F_SETFD, FD_CLOEXEC or die "Can't set close-on-exec flag: $!\n";
        exec $cmd;
      }
    }
    # On fait le menage des process defunct
    while ( wait() != -1 ) { }

    # Le "manager" a fini son travail, alors on termine ce process
    exit 0;
  }

  # Dans le parent (main)
  select(PIPE); $| = 1; # On flush apres chaque write
}




# -----------------------------------------------------------------------
# On evalue les arguments passes en parametre et on en extrait la liste
# des fichiers et la commande a executer pour chacun d'eux.
#
sub get_arguments {

  local($orig_filename);
  local($expanded);
  local($file_list_str, $arg_list_str, $file_list_n_opt, $nb_cmds_str, $shell_cmd_str, $other_opts_str);
  local @file_list = ();
  my $this_file, $rel_file, $abs_file;
  my $i = 1;

  $arg_list_str = join(' ',@ARGV);

  # On separe la liste de fichiers (plus l'option 'n' si presente) et
  # la commande a executer
  $arg_list_str =~ /(.*) -exec (.*)/ || &usage();
  $file_list_n_opt = $1;
  $cmd_str = $2;

  # On separe la liste de fichiers et l'option 'n' (nb commandes en
  # parallele) si presente
  if ( $file_list_n_opt =~ /(.*?) (-[n|p].*)/m ) {
     $file_list_str = $1;
     $other_opts_str=$2;
     if($other_opts_str =~ /-n(.*)/) {
       $nb_cmds_str = $1;
       if ( $nb_cmds_str =~  /\A\D*(\d+)\D*\Z/m ) {
          $nb_max_cmds = $1;
       } else { &usage(); }
     }
     if($other_opts_str =~ /-p\s+(\S+)/m) {
       $shell_cmd_str=$1;
       if($shell_cmd_str =~ /ssh/m) {
         # $shell_cmd="ssh -o noneswitch=yes";
         $shell_cmd="ssh";
       } 
     }

         
  } else {
     $file_list_str = $file_list_n_opt;
  }

  if ($file_list_str eq "-") {
    # On bati la liste de fichiers a partir du STDIN
    while (<STDIN>) {
      chop;
      push(@file_list, split(' ', $_));
    }
  } else {
    # La liste de fichiers est fournie dans la commande
    @file_list = split(' ',$file_list_str);
  }
  
  # On split "file_list" pour que la commande ne depasse pas 20480 caracteres
  # NB: On utilise "file_list_split_tmp" parce qu'on etend les expressions regulieres seulement
  #     sur le CFS.
  $file_list_split_tmp{$i} = "";
  while ($this_file = shift(@file_list)) {
     if ((length($file_list_split_tmp{$i}) + length($this_file)) > $max_cmd_length) {
        $i++;
        $file_list_split_tmp{$i} = "";
     }
     $file_list_split_tmp{$i} .= " $this_file";
  }

  $i = 0;
  $file_list_split{$i} = "";
  foreach $chunk_id (sort keys %file_list_split_tmp) {
     open(TRUE_PATH, "$shell_cmd $cfsrip_host true_path.cfsrip $file_list_split_tmp{$chunk_id} |") || die "Unable to get absolute path names using true_path.cfsrip command: $!\n";
     while (<TRUE_PATH>) {
       chop;
       ($rel_file, $abs_file) = /(\S+) (\S+)/;
       $abs_file =~ s/\/\//\//g;
       $filenames{$abs_file} = $rel_file;
       $retrieve_status{$abs_file} = "Not retrieved on disk, internal problem.";
       # On reconstruit "file_list_split" pour que la commande ne depasse pas 20480 caracteres
       # NB: Maintenant le fichier est etendu.
       if ((length($file_list_split{$i}) + length($rel_file)) > $max_cmd_length) {
          $i++;
          $file_list_split{$i} = "";
	  $file_list_host{$i} = "";
       }
       #Identify cfs host serving the files
       if($file_list_host{$i} =~ /^$/){
          @abs_path_breaks = split(/\//,$abs_file);
          shift(@abs_path_breaks);
          shift @abs_path_breaks if($abs_path_breaks[0] =~ /^home$/i);

          $file_list_host{$i} = $file_servers{"$abs_path_breaks[0]"};

	  # Set default to cfs if file does not reside on cfs1 or cfs2
	  $file_list_host{$i} = "cfs" if($file_list_host{$i} =~ /^$/);

          # Replace CFS with appropriate hostname associated with filesystem in cmd_str
	  $cmd_str =~ s/ cfs / $file_servers{"$abs_path_breaks[0]"} /;
	  $cmd_str =~ s/cfs:/$file_servers{"$abs_path_breaks[0]"}:/;

       }
       $file_list_split{$i} .= " $rel_file";

     }
     close(TRUE_PATH);
  }
}




# ----------------------------------------------------------------------------
sub usage {
    select(STDERR);
    print <<END;
 -----------------------------------------------------------------------------
  USAGE:
  cfsrip file... [-n nb_cmds] [-p remote_shell] -exec command

        file	: File (or list of files) to retrieve from the CFS
                  NB: If name of the file is -, cfsrip will take the list of
                      files from the standard input.
        nb_cmds	: Maximum number of commands to run in parallel (at any
                  given time). Default is 1 command at a time.
	remote_shell : Use ssh or rsh to connect to cfs.  By default, it is rsh.  
        command	: Command to execute for each file retrieved.
		  The command argument "{}" is replaced by the current
		  filename (similar to the "find" command).
		  Also, the string "basename{}" is replaced by the filename
		  only (without the preceding directory path).
		  STDIN should be closed for better use; thus if command starts
		  with "rsh", then it should be be invoked as "rsh -n cfs ...".
		  As soon as a file is retrieved from tape to disk (on the CFS),
                  the command, along with its arguments, is executed as a child
                  process (in order to have the "commands" executed in parallel).

 -----------------------------------------------------------------------------

END
    exit 1;
}
