# # (gH)   -_-  preLefse.pl  ;  TimeStamp (unix) : 21 Octobre 2016 vers 16:03

#################################################################################
#
# this script helps to insert and delete lignes in a file
#
#################################################################################

print "\n preLefse Management script (gH) version 2.07\n\n" ;

use strict ;
use warnings ;
use POSIX qw/ceil/;
use Getopt::Long qw(GetOptions);

# check if the parameter dataFile is present

if ($#ARGV==-1) {
  print " MANDATORY PARAMETER MISSING.\n\n" ;
  print " Syntax: perl preLefse.pl INPUT_FILE Action Parameter\n" ;
  print "         where Action    is -exp -ins -del -stat \n" ;
  print "         and   Parameter is -out -fic \n\n" ;
  print " Each action except -stat requires a number or an interval of numbers and the -out parameter.\n" ;
  print " Each parameter has to be followed by a file name.\n\n" ;
  print " Parameter -fic is required only for the -ins action and has to be found before the -out parameter.\n\n" ;
  &examples() ;
  print " Help: perl preLefse.pl -h or perl preLefse.pl --help\n\n" ;
  print " The associated help web page is:\n\n" ;
  print "       http://forge.info.univ-angers.fr/~gh/Metaseed/preLefse.php\n\n" ;
  exit(-1) ;
} ; # end if

# detailed help

if (($ARGV[0] eq "-h") or ($ARGV[0] eq "--help")) { &help() ; } ;

# check if the input file is present

my $dataFile = $ARGV[0] ;
my @lesArgs  = @ARGV ;

&checkFile("Data file ",$dataFile) ;

my $stats   ;
my $insert  ;
my $export  ;
my $delete  ;
my $insFile = "" ;
my $outFile = "" ;

GetOptions(
    '-stat'   => \$stats,
    '-exp=s'  => \$export,
    '-del=s'  => \$delete,
    '-ins=s'  => \$insert,
    '-out=s' => \$outFile,
    '-fic=s' => \$insFile,
) or die("Invalid or uncomplete syntax. Use option -h to read the help.\n\n") ;

# action to be executed

my $actionOk = 0 ;
my $nbl      = (split(" ",`wc -l $dataFile`))[0] ; # number of lines in the dataFile
if (defined $outFile) { system("rm -f $outFile") ; } ;

# check that only one action is used

my $nbactions = 0 ;
if (defined $stats)  { $nbactions++ ; } ;
if (defined $export) { $nbactions++ ; } ;
if (defined $delete) { $nbactions++ ; } ;
if (defined $insert) { $nbactions++ ; } ;

if ($nbactions>1) {
   print " Only one action can be executed at a time. STOP.\n\n" ;
   exit(-2) ;
} ; # fin si

if (defined $stats) {
    $actionOk = 1 ;
    print" Statistics for the file $dataFile:\n" ;
    my ($nbl,$nbc,$nbv,$minc,$maxc) = &stats($dataFile) ;
    print "  $nbl lines, ".sprintf("%0.1f",$nbc)." columns; " ;
    print "$nbv empty lines; " ;
    if ($nbv>0) { print "for non empty lines, " ; } ;
    print "min colums: $minc, max columns: $maxc." ;
    print "\n\n" ;
    exit(0) ;
} ; # fin de si

if (defined $export) {
   if ($export =~ /^[0-9]+/) {
       $actionOk = 1  ;
       my ($from,$to) = &checkInterval($export,"-exp") ;
       &checkFileName($outFile,"-exp","-out") ;
       system("touch $outFile") ;
       if ($to==$from) {
         print " Exporting line $from to $outFile.\n" ;
       } else {
         print " Exporting lines $from-$to to $outFile.\n" ;
       } ; # fin de si
       if ($from > $nbl) { &tooMuchFor($dataFile,$nbl,$from,"export") ; } ;
       $to   = &checkLineNumber($to,  $nbl,$dataFile) ;
       $from = &checkLineNumber($from,$nbl,$dataFile) ;
       &export($dataFile,$from,$to,$outFile,">") ;
   } else {
     &invalid("export",$export) ;
   } ; # fin de si
   &tellEmptyOutfile($outFile) ;
   print "\n" ;
   exit(0) ;
} ; # fin de si

if (defined $delete) {
   if ($delete =~ /^[0-9]+/) {
       $actionOk = 1  ;
       my ($from,$to) = &checkInterval($delete,"-del") ;
       &checkFileName($outFile,"-del","-out") ;
       system("touch $outFile") ;
       if ($to==$from) {
         print " Deleting line $from, the rest of the file is sent to $outFile.\n" ;
       } else {
         print " Deleting lines $from-$to, the rest of the file is sent to $outFile.\n" ;
       } ; # fin de si
       if ($from > $nbl) { &tooMuchFor($dataFile,$nbl,$from,"delete") ; } ;
       my $n1 = 1 ;
       my $n2 = $from - 1 ;
       my $n3 = $to + 1 ;
       my $n4 = (split(" ",`wc -l $dataFile`))[0] ;
       if ($n2==0) {
         if ($n3<=$nbl) {
           &export($dataFile,$n3,$n4,$outFile,">") ;
         } ; # fin de si
       } else {
         &export($dataFile,$n1,$n2,$outFile,">") ;
         if ($n3<=$nbl) {
           &export($dataFile,$n3,$n4,$outFile,">>") ;
         } ; # fin de si
       } ; # fin de si
   } else {
        &invalid("delete",$delete) ;
   } ; # fin de si
   &tellEmptyOutfile($outFile) ;
   print "\n" ;
   exit(0) ;
} ; # fin de si

if (defined $insert) {
   if ($insert =~ /^[0-9]+$/) {
       $actionOk = 1  ;
       my ($from,$to) = &checkInterval($insert,"-ins") ;
       &checkFileName($insFile,"-ins","-fic") ;
       &checkFile("File to insert",$insFile) ;
       &checkFileName($outFile,"-ins","-out") ;
       if ($to>$nbl) {
          my $newTo = $nbl + 1 ;
          print " File $dataFile has only $nbl lines. Value $to will be reduced to $newTo.\n" ;
          $to = $newTo ;
       } ; # fin de si
       print " Inserting file $insFile as the new line $to in $dataFile, result file is $outFile.\n" ;
       my $n1 = 1 ;
       my $n2 = $to -1 ;
       my $n3 = $to ;
       my $n4 = $nbl ;
       my $n5 = (split(" ",`wc -l $insFile`))[0] ;
       $n4 = &checkLineNumber($n4,$nbl) ;
       if ($n2==0) {
         &export($insFile,   1,$n5,$outFile,">") ;
         &export($dataFile,$n3,$n4,$outFile,">>") ;
       } else {
         &export($dataFile,$n1,$n2,$outFile,">") ;
         &export($insFile,   1,$n5,$outFile,">>") ;
         if ($n3<=$nbl) {
            &export($dataFile,$n3,$n4,$outFile,">>") ;
         } ; # fin de si
       } ; # fin de si
   } else {
        &invalid("insert",$insert) ;
   } ; # fin de si
   &tellEmptyOutfile($outFile) ;
   print "\n" ;
   exit(0) ;
} ; # fin de si

if ($actionOk==0) {
  print " No action to execute for file $dataFile.\n" ;
  print " Please use -stat, -del, -ins or -exp.\n" ;
} ; # fin si

print "\n" ;

exit(0) ;

####################################################### subs

sub help {

    print " The script preLefse.pl helps to delete or insert lines in a data file for LEfSE analysis.\n\n" ;
    print " First give the name of the file to be processed and an action to process:\n\n" ;
    print "   -stat to check the number of lines and columns.\n" ;
    print "   -exp  to export (copy) one line or a block of lines to the output file.\n" ;
    print "   -del  to delete one line or a block of lines.\n" ;
    print "   -ins  to insert a file at a specific position in the data file.\n\n" ;
    print " The number of lines to delete or to export is a single integer such as 3 or an interval line 1:7.\n\n" ;
    print " After the action -ins there must a parameter -fic to tell the name of the file that will be inserted.\n" ;
    print " For all the actions except -stat, there must a parameter -out to tell where the data should be written.\n\n" ;
    &examples() ;
    print " There is a web page that gives more details (in French) just open:\n\n" ;
    print "       http://forge.info.univ-angers.fr/~gh/Metaseed/preLefse.php\n\n" ;
    exit(-2) ;

} ; # fin de sub help

#######################################################

sub trim { my $s = shift; $s =~ s/^\s+|\s+$//g; return $s };

#######################################################

sub examples {

    print " Examples: perl preLefse.pl myDatat.txt -stat\n" ;
    print "           perl preLefse.pl example.txt -exp 1   -out oxygen.txt \n" ;
    print "           perl preLefse.pl example.txt -del 1:3 -out example2.txt\n" ;
    print "           perl preLefse.pl example.txt -ins 1   -fic example3.txt -out example4.txt\n" ;
    print "\n" ;

} ; # fin de sub examples

#######################################################

sub checkFile {

    my ($text,$file) = @_ ;
    if (!-e $file) {
      print " $text $file not found. STOP.\n\n" ;
      exit(-3) ;
    } ; # end if

} ; # fin de sub checkFile

#######################################################

sub stats {

    my $filen = $_[0] ;
    my $nbli  = 0  ; # lines
    my $nbco  = 0  ; # columns (fields)
    my $nbvi  = 0  ; # empty lines
    my $minco = 123456789  ; # minimum number of columns
    my $maxco =         0  ; # maximum number of columns

    open(FIC ,"<$filen") || die "\n impossible to open the file $filen \n\n" ;
    while (my $lig=<FIC>) {
       $nbli++ ;
       $lig = trim($lig) ;
       if ($lig eq "") {
         $nbvi++ ;
       } else {
         my @words= split(" ",$lig) ;
         my $nbwo = $#words ;
         $nbco   += $nbwo ;
         if ($nbwo>$maxco) { $maxco = $nbwo ; } ;
         if ($nbwo<$minco) { $minco = $nbwo ; } ;
       } ; #fin si
    } ; # fin de tant que
    close(FIC) ;

    if ($nbli>0) { $nbco /= $nbli ; } else { $minco = 0 ; } ;
    return ($nbli,$nbco,$nbvi,$minco,$maxco) ;

} ; # fin de sub stats

#######################################################

sub invalid {

    my ($act,$parm) = @_ ;
    print " Expecting a positive integer or a non empty interval of positive integers after $act, got $parm.\n" ;
    print " STOP.\n\n" ;
    exit(-4) ;

} ; # fin de sub invalid

#######################################################

sub checkInterval {

    my ($int,$act) = @_ ;
    my $vfrom = 0 ;
    my $vto   = 0 ;
    if ($int  =~ /^[0-9]+$/)     { $vfrom = $int ; $vto = $vfrom } ;
    if ($int  =~ /^(.*?):(.*)$/) { $vfrom = $1      ; $vto = $2    } ;
    if (($vfrom !~/[1-9][0-9]*$/) or ($vto !~/[1-9][0-9]*$/) or ($vfrom<=0) or ($vto<$vfrom) or ($vto<=0)) {
       &invalid($act,$int) ;
    } ; # fin de si
    return ($vfrom,$vto) ;

} ; # fin de sub checkInterval

#######################################################

sub checkFileName {

    my ($outf,$act,$parmn) = @_ ;
    if ($outf eq "") {
       print " For action $act you need to give the name of a file after the $parmn parameter.\n" ;
       print " STOP.\n\n" ;
       exit(-5) ;
    } ; # fin si

} ; # fin de sub checkFileName

#######################################################

sub tooMuchFor {

    my ($datafile,$fnbl,$vfrom,$act) = @_ ;
    print " File $datafile has only $fnbl lines. Value $vfrom is invalid. Nothing to $act.\n" ;
    print " STOP.\n\n" ;
    exit(-6) ;

} ; # fin de sub tooMuchFor

#######################################################

sub export {

    my ($input,$nfrom,$nto,$output,$wmode) = @_ ;
    my $nbe = 1 + $nto - $nfrom ;
    my $cmd = "head -n $nto $input | tail -n $nbe $wmode $output " ;
    system($cmd) ;

} ; # fin de sub export

#######################################################

sub checkLineNumber {

    my ($ln,$limit,$datafile) = @_ ;
    my $rv = $ln ;
    if ($ln>$limit) {
        print " Line number $ln is greater than real number of lines ($limit) in $datafile. Reducing it to $limit.\n" ;
        $rv = $limit ;
    } ;
    return( $rv ) ;

} ; # fin de sub checkLineNumber

#######################################################

sub tellEmptyOutfile {

    my $nf = $_[0] ;
    my $nl = (split(" ",`wc -l $nf`))[0] ;
    if ($nl==0) {
        print " Warning: the output file $nf is empty (0) lines.\n\n" ;
    } ; # fin si

} ; # fin de sub tellEmptyOutfile

############ normal end of script #######################################
