#!/usr/bin/perl -w

my $helpstr = <<EOM;

getrundata - get data for experiment database from run files

Usave:  getrundata [directory ...]

Recursively descends specified directories, or current directory by default,
to find and catalogue all run files it finds.

- uses find to recursively descend directories looking for run files (*.frm)
- uses dumprun to get lengths and descriptions and to summarise tr & wf names
- outputs lines in format:
  "exptname","runname","path","seconds","descriptions","traces","waveforms"
- if available, these additional fields will appear after the waveforms field:
  "series","exptr","age","level","rec","stim","purp","nb","barriers","ttags"
  - seconds and nb are numbers
  - if are no barriers, nb is 0, and barriers is empty
  - rec, stim, barriers and ttags are multi-entry lists with
    ASCII GS (hex 1D) as entry separator
  - all other fields are text strings
  "date"
  - if available, this field will appear after all the others above
    it is available if it can be obtained from an ABF file via the
    axon2run utility, if the run has a start time recorded in its
    header, or if the date is obviously part of the exptname
  "start time"
  - if requested by -t option, this field is shown after all others
    (it is the time shown by lsrun -t, or axon2run -d for an ABF file
    if lsrun only estimates the time)

Copyright (c) 2003-2017, Gilles Detillieux, Spinal Cord Research Centre,
University of Manitoba.  All Rights Reserved.

EOM

# This is a rewrite of the original getrundata script from a shell script
# to Perl, for increased speed and reliability.

use strict;
use Cwd qw(cwd);
use POSIX qw(strftime);

my $times = 0;
while (@ARGV && $ARGV[0] =~ m/^-/ && (my $a = $ARGV[0])) {
   shift;
   while (($a = substr($a, 1)) && (($_) = ($a =~ /^(.)/))) {
      if (m/t/i) {
         ++$times;
      } else {
         die $helpstr;
      }
   }
}
for (my $i = 0; $i < @ARGV; ++$i) {
   # Ugly hack: make sure special characters in file/directory names are escaped
   $ARGV[$i] =~ s/([ \\\'\"\|\;\(\)\$\&])/\\$1/g;
}

#open(my $in, "find @ARGV -name '*.frm' -print0 | xargs -0 dumprun |") ||
open(my $in, "find @ARGV -name '*.frm' -print | sort | tr '\\012' '\\000' | xargs -0 dumprun |") ||
  die "$0: can't open pipe from find command\n";

my $wd = cwd;
my $gs = "\x1D";
my $state = '0';
my $name = "";
my $length = "";
my $descr = "";
my $wfs = "";
my $nwfs = 0;
my $run = "";
my $path = "";
my $expt = "";
my $prevexpt = "";
my $t = "";
my $series = "";
my $exptr = "";
my $age = "";
my $level = "";
my $rec = "";
my $stim = "";
my $purp = "";
my $barriers = "";
my $ttags = "";
my $date = "";
my $sttime = "";
my $mtime = 0;
my $mt = 0;
while (<$in>) {
   chomp;
   s/\s+$//;
   if (m/^Run: (.*)/) {
      $state = 'R';
      $name = $1;
      $name =~ s/\.frm$//;
      $run = $name;
      $run =~ s/^.*\///;
      $path = $wd . "/" . $name;
      $path =~ s/$wd\/\//\//;
      $path =~ s/\/\.\//\// while $path =~ m/\/\.\//;
      $path =~ s/\/[^\/.][^\/]*\/\.\.\//\//
          while $path =~ m/\/[^\/.][^\/]*\/\.\.\//;
      $path =~ s/\.\/[^\/][^\/]*\/\.\.\//\//
          while $path =~ m/\.\/[^\/][^\/]*\/\.\.\//;
      $path =~ s/\/*[^\/]*$//;
      $expt = $path;
      $expt =~ s/^.*\///;
      $prevexpt = "" if $name =~ m/.*000$/;
      if ($expt ne $prevexpt) {
         $prevexpt = $expt;
         $series = "";
         $exptr = "";
         $age = "";
         $level = "";
         $rec = "";
         $stim = "";
         $purp = "";
         $barriers = "";
      }
      $length = "";
      $descr = "";
      $wfs = "";
      $ttags = "";
      $date = "";
      $sttime = "";
      $mtime = 0;
   } elsif (m/^Header size:.*Magic/) {
      $state = 'H';
   } elsif (m/^Run length:.*\(([-+Ee.0-9]*) sec\)/) {
      $state = 'L';
      $length = $1;
   } elsif (m/^Start time:\t* *([-0-9]*) ([:0-9]*)/) {
      $state = 'D';
      $date = $1;
      $sttime = $2;
   } elsif (m/^Tr .* Name/) {
      $state = 'T';
      $nwfs = 0;
   } elsif (m/^WF .* Name/) {
      $state = 'W';
      $nwfs = 0;
      $wfs .= '","';
      if ($sttime eq "") {
         my @suff = (".frm", ".rhd", ".txt", ".abf", ".ABF", ".smr", ".SMR");
         $mt = 0;
         for (my $i = 0; $i < @suff; ++$i) {
            $t = $name . $suff[$i];
            $mt = (stat($t))[9] if -r $t;
            $mtime = $mt if $mt > 0 && ($mtime == 0 || $mtime > $mt);
         }
      }
   } elsif (m/^?.*$/ && $state eq 'R') {
      s/"/'/g;
      s/\\$//;		# final backslash likely a typo (next to Enter key)
      s/\\/\//g;
      $descr = $_;
   } elsif ((($t) = ($_ =~ m/^[- 0-9]* uV  (.*)$/)) && $state =~ m/[TW]/) {
      $t =~ s/ *\(.*=.*\)//;
      #$t =~ s/ *\(.*\)//;		# add for backwards compatibility?
      $t =~ s/,* *[\<\>\\\/] [0-9].* Hz//;
      $t =~ s/ /-/g;
      $t =~ s/"/'/g;
      $wfs .= " " if ++$nwfs > 1;
      $wfs .= $t;
      if ($sttime eq "" && $state eq 'W' &&
            (($t) = ($_ =~ m/^ *([0-9]+)[ 0-9]* uV/))) {
         $t = sprintf("%s.w%02d", $name, $t);
         if (-r $t) {
            $mt = (stat($t))[9];
            $mtime = $mt if $mt > 0 && ($mtime == 0 || $mtime > $mt);
         }
      }
   } elsif (m/^ *$/ && $state eq 'W') {
      $state = '0';
      if (open(my $ttin, "<$name.txt")) {
         my $purpln = 0;
         my $purpprev = "";
         while (<$ttin>) {
            chomp;
            s/\s+$//;
            last if m/^"INFORMATION"/;	# skip Spike2 text export file
            last if $. > 4000;		# whoah, too many lines, ttags unlikely!
            s/"/'/g;			# remove for backwards compatibility?
            s/\\$//;			# final backslash, likely typo
            s/\\/\//g;
            if (m/^.*[^a-z]ser[ies]*=([^;]*[^ ;]).*/i) {
               $t = $1;
               $t =~ s/, *[-=a-z0-9]*//gi;
               $series = $t if $t ne "";
            }
            if (m/^.*[^a-z]e[xp]*=([^;]*[^ ;]).*/i) {
               $t = $1;
               $t =~ s/,[, ]*[a-z0-9]*=.*//i;
               $t =~ s/, *,.*//;
               $t =~ s/, *(rd[0-9]+),.*//i;
               $exptr = $t if $t ne "";
            }
            if (m/^.*[^a-z]([^;]*[^ ;])=exp.*/i) {
               $t = $1;
               $t =~ s/,[, ]*[a-z0-9]*=.*//i;
               $t =~ s/, *,.*//;
               $t =~ s/, *(rd[0-9]+),.*//i;
               $exptr = $t if $t ne "";
            }
            if (m/^Tag.* s: ([ekb][zcs]) *$/i) {
               $t = $1;
               $exptr = $t if $t ne "" && $exptr eq "";
            }
            if (m/^.*[^a-z]age=([^;]*[^ ;]).*/i) {
               $t = $1;
               $t =~ s/, *.*//;
               $age = $t if $t ne "";
            }
            if (m/^Tag.* s: (|[-a-z0-9=, \/]*, *)(rd[0-9]+) *(|, *[-a-z0-9=, \/]*)$/i) {
               $t = $2;
               $age = $t if $t ne "";
            }
            if (m/^Tag.* s: day *([0-9]*) *rat *(|;.*)$/i) {
               $t = "RD" . $1;
               $age = $t if $t ne "";
            }
            if (m/^.*[^a-z]lev[els]*=([^;]*[^ ;]).*/i) {
               $t = $1;
               $t =~ s/,[, ]*[a-z0-9]*=.*//i;
               $t =~ s/, *,.*//;
               $level = $t if $t ne "";
            }
            if (m/^Tag.* s: ((|[-a-z0-9 ]*)(|trans)(ection|lesion)(| [a-z0-9 ]*[^ ])) *$/i) {
               $t = $1;
               $level = $t if $t ne "" && $level eq "";
            }
            if (m/^.*[^a-z]rec[ordings]*=([^;]*[^ ;]).*/i) {
               $t = $1;
               $t =~ s/, */$gs/g;
               $rec = $t if $t ne "";
            }
            if (m/^Tag.* s: ([vei][emngvric, ]*[rgc]) *$/i) {
               $t = $1;
               $t =~ s/, */$gs/g;
               $rec = $t if $t ne "" && $rec eq "";
            }
            if (m/^.*[^a-z]str*im[ulationreds]*=([^;]*[^ ;]).*/i) {
               $t = $1;
               $t =~ s/, */$gs/g;
               $stim = $t if $t ne "";
            }
            if (m/^Tag.* s: (|[-a-z0-9=, \/]*, *)([bse][-bspelchtina, ]*[lht])(|=stim.*) *$/i) {
               $t = $2;
               $t =~ s/, */$gs/g;
               $stim = $t if $t ne "" && $stim eq "";
            }
            if (m/^.*[^a-z]pur[pose]*=([^;]*[^ ;]).*/i) {
               $t = $1;
               $t =~ s/,[, ]*[a-z0-9]*=.*//i;
               if ($t ne "" && $t ne $purpprev) {
                  $purpprev = $t;
                  $t = $purp . " " . $t if ++$purpln > 1;
                  #$t = $purp . "|" . $t if ++$purpln > 1; # for backward comp.?
                  $purp = $t;
               }
            }
            if (m/^Tag.* s: (test.*[^ ]|does.*[^ ]|can .*[^ ]) *$/i) {
               $t = $1;
               $purp = $t if $t ne "" && $purp eq "";
            }
            if (m/^.*[^a-z0-9]([0-9]+) *b[aries]*=([^;]*[^ ;]).*/i) {
               $barriers = $1;
               $t = $2;
               $t =~ s/, */$gs/g;
               $barriers .= '","' . $t;
            }
            if (m/^Tag *[0-9]+, Episode *[0-9]+ *\@ *([-+Ee.0-9]+ *s:.*)/) {
               $t = $1;
               $t =~ s/  *$//;
               $ttags .= $gs if $ttags ne "";
               $ttags .= $t;
            }
         }
         close($ttin);
      }
      $t = "$series$exptr$age$level$rec$stim$purp$barriers$ttags";
      if ($t ne "" || $times) {
         if ($sttime eq "") {
            my $dt = "";
            if (-r "$name.abf") {
               $dt = `axon2run -d '$name'.abf 2>/dev/null`;
               chomp $dt;
               $dt =~ s/: .*//;
               $sttime = $dt;
               $sttime =~ s/.* //;
            }
            if ($sttime eq "" && $mtime > 0) {
               # have to subtract rounded-up run length from mtime
               my $rl = int($length);
               ++$rl if $length > $rl;
               $mtime -= $rl;
               #my @lt = localtime($mtime);
               #$sttime = sprintf("%02d:%02d:%02d~", $lt[2], $lt[1], $lt[0]);
               $sttime = strftime("%H:%M:%S~", localtime($mtime));
            }
            if ($dt eq "" &&
                  $expt =~ m/(\d\d)(jan|feb|mar|apr|may|ju[nl]|aug|sep|oct|nov|dec)(\d\d)/i) {
               $dt = "20$1-$2-$3";
               $dt =~ s/^209/199/;
               $dt =~ s/jan/01/i; $dt =~ s/feb/02/i; $dt =~ s/mar/03/i;
               $dt =~ s/apr/04/i; $dt =~ s/may/05/i; $dt =~ s/jun/06/i;
               $dt =~ s/jul/07/i; $dt =~ s/aug/08/i; $dt =~ s/sep/09/i;
               $dt =~ s/oct/10/i; $dt =~ s/nov/11/i; $dt =~ s/dec/12/i;
            }
            if ($dt eq "" && $mtime > 0) {
               #my @lt = localtime($mtime);
               #$dt = sprintf("%04d-%02d-%02d", 1900+$lt[5], $lt[4]+1, $lt[3]);
               $dt = strftime("%Y-%m-%d", localtime($mtime));
            }
            $date = $dt;
            $date =~ s/ .*//;
         }
      }
      if ($t ne "") {
         $t = '","';
         $barriers = "0$t" if $barriers eq "";
         $t = "$series$t$exptr$t$age$t$level$t$rec$t$stim$t$purp$t$barriers$t$ttags";
         $wfs .= '","' . $t;
         $wfs .= '","' . $date if $date ne "";
      }
      $wfs .= '","' . $date . " " . $sttime if $times && $sttime ne "";
      $t = '","';
      $t = "\"$expt$t$run$t$path$t$length$t$descr$t$wfs\"";
      print "$t\n";
   }
}
close($in);

