#!/bin/sh
#
# getttags -- get time tags from exp't protocol file and store in run.txt files
#
# Usage:  getttags [-v] protocol-file [directory ...]
#
# Matches times in protocol file to start times of runs in specified
# directories, or current directory by default, to find entries written
# during each run's capture, and stores them as time-tag entries in the
# run description files (runname.txt).
#
#	- protocol file can be CSV format, or an Excel or OpenOffice
#	  spreadsheet file (.xls, .xlsx, .ods)
#	- uses lsrun to get names, lengths and start times
#	- uses unoconv to convert Excel or OpenOffice spreadsheets to CSV
#	- looks for lines that start with a time of day in first sheet, or
#	  sheet labelled "Monitoring", matching these with run start times
#	- when an entry occurs during the time a run is captured, it gets
#	  appended to that run's .txt file
#	- this should be done before running getrundata, so that it can read
#	  the time tags from the .txt file
#
# The -v option causes more verbose output on screen, showing tags as read
#
# Copyright (c) 2016, Gilles Detillieux, Spinal Cord Research Centre,
# University of Manitoba.  All Rights Reserved.
#

tt='[0-9][0-9]*:[0-5][0-9]:[0-5][0-9]'
maintt='[SsEeAaLlRrPpBb][EeXxGgTtUuAa]*[RrPpEeVvSsCcIi]*[A-Za-z]*='
gtt='/"* *'"$tt"'/'
#gttp="${gtt}p"
#gttp="${gtt}{p;n;};/${maintt}/p"
gttp="${gtt}p;${gtt}!{/${maintt}/p;}"

#times=
verbose=
while :
do
	case "$1" in
	-\?|-help|--help)	sed -n '3,/^# Univ/s/^#/ /p' "$0"; exit ;;
#	-t|-T)	times=y ; shift ;;
	-v|-V)	verbose=y ; shift ;;
	*)	break ;;
	esac
done

case "$#" in
0)	echo "Usage:  getttags [-v] protocol-file [directory ...]
	or  getttags --help	for detailed usage information" >&2; exit 1 ;;
esac

if ! test -r "$1"
then
	# intercept error before calling unoconv, which doesn't fail
	# gracefully - get shell to give approp. error message:
	read x < "$1"
	exit 1
fi

case "$1" in
*.asc|*.txt|*.csv)
	ttags=`sed -n -e "$gttp" "$1"` ;;
*.xls|*.xlsx|*.ods)
	ttags=`unoconv --stdout -f csv "$1" | sed -n -e "$gttp"`
	test -z "$ttags" &&
	  ttags=`unoconv --stdout -f csv -S Monitoring "$1" | sed -n -e "$gttp"`
	;;
*)	echo "getttags: protocol file must be CSV text or XLS/ODS spreadsheet" \
		>&2; exit 1 ;;
esac

case "$ttags" in
"")	echo "getttags: unable to find time tags in spreadsheet $1" >&2; exit 1
	;;
esac

shift

(echo "$ttags"; lsrun -t -l ${@+"$@"}) |
    awk "$gtt"' && $0 !~ / \([0-9][0-9.]* s\), / {
		lead = $0;
		sub(/"* *'"$tt"'.*/, "", lead);
		#if ("'"$verbose"'" == "y")
		#	print "Removing " length(lead) " leading characters from line " NR
		hms = substr($0, length(lead)+1);
		sub(/,.*/, "", hms);
		h = hms; m = hms; s = hms;
		sub(/^[ "]*/, "", h); sub(/:.*/, "", h);
		sub(/^[ "0-9]*:/, "", m); sub(/:.*/, "", m);
		sub(/^[ "0-9:]*:/, "", s); sub(/[ APMapm]*$/, "", s);
		h += 0; m += 0; s += 0;
		if (hms ~ /[Aa][Mm]/) {
			if (h == 12) h = 0;
		} else if (hms ~ /[Pp][Mm]/) {
			if (h != 12) h += 12;
		} else if (h > 23) {
			if ("'"$verbose"'" == "y")
				print "Converting hour " h " to " h%24
			h = h % 24;
		}
		tt = substr($0, length(lead)+1);
		sub(/^"* *'"$tt"' *, */, "", tt);	# remove time field
		# keep only last text field:
		#if (tt ~ /^.*,[^",]*$/) {
		#	# last field not quoted
		#	sub(/^.*,/, "", tt);
		#} else {
		#	sub(/[",]*$/, "", tt); sub(/^.*,""*/, "", tt);
		#	sub(/^[0-9]*:[0-9]*:.*/, "", tt);
		#}
		# remove quotes and leading/trailing field separators:
		gsub(/"/, "", tt);
		sub(/[, ][, ]*$/, "", tt);
		sub(/^[, ][, ]*/, "", tt);
		tag[++n] = tt;
		tval[n] = h*3600 + m*60 + s;
		if ("'"$verbose"'" == "y")
			print "Got time tag " n " at " h ":" m ":" s " (" tval[n] " s): " tag[n];
	}
	$0 !~ '"$gtt"' && $0 ~ /'"$maintt"'/ {
		# ser=, exp=, age=, lev=, rec=, stim=, purp=, bar=...
		tt=$0;
		gsub(/"/, "", tt);
		sub(/[, ][, ]*$/, "", tt);
		sub(/^[, ][, ]*/, "", tt);
		sub(/, *[Pp]urpose /, ",purp=", tt);
		tag[++n] = tt;
		tval[n] = 0;
		if ("'"$verbose"'" == "y")
			print "Got non-timed tag: " tag[n];
	}
	/ \([0-9][0-9.]* s\), / {
		hms = $0;
		sub(/^.* \([0-9.]* s\), ....-..-.. /, "", hms);
		sub(/: .*$/, "", hms); sub(/~$/, "", hms);
		h = hms; m = hms; s = hms;
		sub(/^[ "]*/, "", h); sub(/:.*/, "", h);
		sub(/^[ "0-9]*:/, "", m); sub(/:.*/, "", m);
		sub(/^[ "0-9:]*:/, "", s);
		stval = h*3600 + m*60 + s;
		es = $0;
		sub(/ s\), .*/, "", es); sub(/.* \(/, "", es);
		es = int(es + 0.999);
		enval = stval + es;
		run = $0;
		sub(/ \([0-9.]* s\), .*/, "", run);
		if ("'"$verbose"'" == "y")
			print "Got run " ++nr " at " h ":" m ":" s " (" stval " s, length " es "s): " run;
		day = 24*3600;
		tn = 0;
		for (i = 1; i <= n; ++i) {
		    if (tval[i] == 0 && fn == 0)
			tval[i] = stval;	# special case for non-timed tag
		    if (tval[i] >= stval && tval[i] <= enval ||
			  enval >= day &&
			      tval[i] >= stval-day && tval[i] <= enval-day) {
			s = tval[i]-stval;
			if (s < 0) s += day;
			printf("Tag %2d, Episode  1 @ %5d s: %s\n", ++tn, s, tag[i]) >> run ".txt";
			if ("'"$verbose"'" == "y")
			  printf("Matched %d in %d-%d: Tag %2d, Episode  1 @ %5d s: %s\n", tval[i], stval, enval, tn, s, tag[i]);
		    }
		}
		++fn;
	}'
