#!/bin/bash
#
#   Usage: gitrip [options] commit file_path [file_path ...] [var=val]
#          gitrip -q [options] file_path [file_path ...] [var=val]
#          Quantities in square brackets are optional
#
# Purpose: Extract files from a particular commit of a git repository
#          Output files are placed in the invoking directory
#
# Options:
# All options begin with a dash (-)
#   -c  ...Use the default coupler repository
#   -n  ...Use the default NEMO repository
#   -j  ...Use the default cccjob repository
#   -q  ...Query the user for the value of commit
#          In this case all non-option command line args are file names
#   -h  ...show this usage message
#
# Definitions:
#   repo=PATH      ...full path to the git repository
#   commit=COMMIT  ...the commit from which the files are taken
#
# Examples:
#   To extract the files sbccpl.F90 and sbc_ice.F90 from the tip of
#   the master branch in the default NEMO repository try
#     gitrip -n master sbccpl.F90 sbc_ice.F90
#
#   To extract the file cpl_prelude from commit e2187a0 in the default
#   coupler repository try
#     gitrip -c e2187a0 cpl_prelude
#
#   To browse all commits in a particular branch of the default NEMO
#   repository, then select a particular commit from which the file
#   xxyyzz will be extracted use the following command
#     gitrip -n -q xxyyzz
#   First select the branch you want from the list provided. Each commit
#   will appear on a separate line and the first field of each line will
#   be the abbreviated sha1 hash, a commit string you can feed to gitrip
#   when it prompts for the commit after you exit the pager.
#
#   To extract the files f1 and f2 from commit a1b2c3 of a user supplied
#   git repository try
#     gitrip a1b2c3 f1 f2 repo=/path/to/local/git/repo
#
########################################################################
#
# Larry Solheim ...Dec 2016

FULLPATH=`type $0|awk '{print $3}'` # pathname of this script
Runame=`basename $FULLPATH`
usage() {
  err_exit=0
  while getopts e opt; do
    case $opt in
      e) err_exit=1 ;;
    esac
  done
  shift `expr $OPTIND - 1`

  [ -n "$1" ] && echo >&2 "${Runame}:" "$@"
  echo >&2 " "
  sed >&2 -n '/^###/q; s/^#$/# /; s/^ *$/# /; 3,$s/^# //p;' "$FULLPATH"
  if [ $err_exit -eq 0 ]; then
    exit
  else
    exit 1
  fi
}

# echo without return (ie: emulate -n if not recognized by echo)
if [ "X`echo -n`" = "X-n" ]; then
  echo_n() { echo ${1+"$@"}'\c'; }
else
  echo_n() { echo -n ${1+"$@"}; }
fi

# Create time stamp to be used in file names etc
stamp=`date "+%j%H%M%S"$$`

# The git repository from which the requested files will be extracted
repo=''

# The commit in repo that is associated with the requested files
commit=''

# query = 1 means start an interactive selection of the commit
# In this case all non-option, non-definition command line args should be file names
query=0

# Process command line arguments
declare -a cl_args file_list
for arg in "$@"; do
  case $arg in
       -*) # An option
           opt=${arg#-}
           case $opt in
             h) usage ;;
             a) echo "No AGCM default repo is available"; exit 1 ;;
             c) [[ -z "$CCRNSRC" ]] &&
                    { echo "$(basename $0): CCRNSRC must be defined with the -c option"; exit 1; }
                repo=$CCRNSRC/coupler_dir/coupler.git ;;
             n) [[ -z "$CCRNSRC" ]] &&
                    { echo "$(basename $0): CCRNSRC must be defined with the -n option"; exit 1; }
                repo=$CCRNSRC/ocean_dir/nemo-CCCma.git ;;
             j) [[ -z "$CCRNSRC" ]] &&
                    { echo "$(basename $0): CCRNSRC must be defined with the -j option"; exit 1; }
                repo=$CCRNSRC/cccjob_dir/cccjob.git ;;
             q) query=1 ;;
             *) echo "Unknown option -$opt"; exit 1 ;;
           esac
           ;;
      *=*) # add this variable definition to the current environment
           var=$(echo $arg|awk -F\= '{printf "%s",$1}' -)
           val=$(echo "$arg"|awk '{i=index($0,"=")+1;printf "%s",substr($0,i)}' -)
           [ -n "$var" ] && eval ${var}=\"\$val\"  # preserve quoted assignments
           val=$(echo $val|sed 's/^ *//; s/ *$//')  # remove leading and trailing space
           [ -z "$val" ] && { echo "Invalid command line arg --> $arg <-- Empty value."; exit 1; }
           ;;
        *) # Push anything else onto cl_args
           cl_args[${#cl_args[*]}]=$arg
           ;;
  esac
done

read_commit() {
  # This assumes that repo is defined and is a valid git repository
  # This function will do nothing if the shell is non-interactive
  if [[ -t 0 || -p /dev/stdin ]]; then
    # This is an interactive shell
    # List all commits for a particular branch in this repo
    echo "Select a branch from the following list to see all commits therein."
    (cd $repo; git branch) || :
    echo_n "Choose a branch: "
    read branch
    [[ -z "$branch" ]] && { echo "$(basename $0): You must choose a branch"; exit 1; }
    (cd $repo; git log --format="%h %ci%d --- %s" $branch) || :
    commit=''
    echo_n "Choose a commit: "
    read commit
    [[ -z "$commit" ]] && { echo "$(basename $0): You must choose a commit"; exit 1; }
  fi
}

# Sanity checks
[[ -z "$repo" ]] &&
    usage -e "No repository has been provided. Try adding repo=... to the command line."

# Currently, the repository must be a local directory
[[ -d "$repo" ]] || usage -e "$repo is not a directory"

if [[ $query == 1 ]]; then
  # In this case all non-option, non-definition command line args should be file names
  [[ ${#cl_args[*]} == 0 ]] &&
    usage -e "No file names were provided on the command line"
  # Invoke a dialogue with the user that will result in a definition for commit
  read_commit
fi

if [[ -z "$commit" ]]; then
  # Set the first non-option non-definition command line arg to be the commit
  [[ ${#cl_args[*]} > 0 ]] && commit=${cl_args[0]}
  # All remaining command line args should be file names
  [[ ${#cl_args[*]} > 1 ]] && file_list=("${cl_args[@]:1}")
else
  # If commit is supplied via a definition on the command line or via read_commit
  # then all non-option, non-definition command line args should be file names
  [[ ${#cl_args[*]} > 0 ]] && file_list=("${cl_args[@]}")
fi

if [[ -z "$commit" ]]; then
  echo "$(basename $0): No commit has been provided."
  exit 1
else
  # Verify that this is a valid commit
  commit=$(cd $repo; git rev-parse $commit) ||
       { echo "$(basename $0): Commit $commit is not present in repository $repo"; exit 1; }
fi

[[ ${#file_list[*]} == 0 ]] &&
    usage -e "No file names were provided on the command line"

# Use a temporary file for output to avoid overwriting a local file if
# git-cat-file results in an error
tmp_file=tmp_gitrip_$stamp

# Process each entry in file_list in the order provided on the command line
for file in "${file_list[@]}"; do
  # Determine a path name for this file relative to the repository root
  if [[ $file =~ / ]]; then
    # Assume this is already a full path name
    path=$file
  else
    # Look for a file by this name
    path_list=$((cd $repo; git ls-tree -r --name-only $commit) | grep -E '(^|/)'$file\$) ||
        { echo "$(basename $0): Unable to find $file for commit $commit in $repo"; exit 1; }
    # Remove all but the first space separated path from path_list
    path=${path_list%% *}
    if [[ -z "$path" ]]; then
      # Likely the first character of path_list is a blank
      echo "$(basename $0): No path found matching --> $file <--"
      exit 1
    fi
    if [[ "$path" != "$path_list" ]]; then
      # There was more than 1 file path returned by git-ls-tree
      # This is a fatal error for now
      echo "$(basename $0): More than 1 file was found matching --> $file <--"
      exit 1
    fi
  fi

  # Identify the object given the commit and path relative to repo root
  object=${commit}:$path

  # Full pathname to the output file containing the requested object
  output_file=$PWD/$(basename $file)

  # Write this file to the output directory
  (cd $repo; git cat-file -p $object) > $tmp_file ||
      { echo "$(basename $0): git-cat-file failed for object $object"; rm -f $tmp_file; exit 1; }
  mv $tmp_file $output_file

  if [[ -t 0 || -p /dev/stdin ]]; then
    # This is an interactive shell
    echo "Created $(basename $output_file)"
  fi
done
