#!/bin/sh
#
# crosscorr - calculate Pearson's cross-correlation from time-series data
#
# Usage:  crosscorr [-x colnum] [-y colnum] [-l numlags] csvfile > output.csv
#
# where:  -x colnum	specifies the column number for the first variable
#			(default is 1)
#	  -y colnum	specifies the column number for the second variable
#			(default is 2)
#	  -l numlags	specifies number of lags to be calculated in either
#			direction (default is 150)
#	  csvfile	specifies the file name of an ASCII file using comma
#			separated values (CSV), holding at least two columns
#			(no default, file must be specified, "-" for std input)
#
# Copyright (c) 2005, Gilles Detillieux, Spinal Cord Research Centre,
# University of Manitoba.  All Rights Reserved.
#

xcol=1 ycol=2 nlag=150
while :
do
	case "$1" in
	-\?|-help|--help)	sed -n '3,/^# Univ/s/^#/ /p' "$0"; exit ;;
	-x)	shift; xcol="$1"; shift ;;
	-x[0-9]*)	xcol=`expr x"$1" : 'x-x\(.*\)'`; shift ;;
	-y)	shift; ycol="$1"; shift ;;
	-y[0-9]*)	ycol=`expr x"$1" : 'x-y\(.*\)'`; shift ;;
	-l)	shift; nlag="$1"; shift ;;
	-l[0-9]*)	nlag=`expr x"$1" : 'x-l\(.*\)'`; shift ;;
	-)	break ;;
	-*)	set --; break ;;
	*)	break ;;
	esac
done

case "$#" in
0)	echo "Usage:  crosscorr [-x colnum] [-y colnum] [-l numlags] csvfile > output.csv
	or crosscorr --help	for detailed usage information" >&2; exit 1 ;;
esac

case "$xcol" in
[0-9]*)	;;
*)	echo "$0: Invalid column number for first variable: -x $xcol" >&2
	exit 1
	;;
esac

case "$ycol" in
[0-9]*)	;;
*)	echo "$0: Invalid column number for first variable: -y $ycol" >&2
	exit 1
	;;
esac

case "$nlag" in
[0-9]*)	;;
*)	echo "$0: Invalid number of lags: -l $nlag" >&2
	exit 1
	;;
esac

awk -F, 'BEGIN { xcol='"$xcol"'; ycol='"$ycol"'; nlag='"$nlag"'; nd=0; }
	/^[ 	]*[-0-9.]/ { xd[nd]=$(xcol); yd[nd]=$(ycol); nd++; }
	END {
	    if (nd-2 < nlag) {
		nlag = nd-2;
		if (nlag < 0)
		    print "'"$0"': Too few data samples in input: " nd > "/dev/stderr";
		else
		    print "'"$0"': Too few data samples in input for specified lags: " nd " - using " nlag " lags" > "/dev/stderr";
	    }
	    # For calculation of r based on method explained in:
	    # http://astronomy.swin.edu.au/~pbourke/other/correlate/
	    # This technique is equivalent to the one described in:
	    # http://www.raunvis.hi.is/~kgm/Papers/RH-18-2003.pdf
	    # (without "pre-whitening")
	    mx = 0.0; my = 0.0;
	    for (i = 0; i < nd; i++) {
		mx += xd[i]; my += yd[i];
	    }
	    mx /= nd; my /= nd;
	    sum_dx2 = 0.0; sum_dy2 = 0.0;
	    for (i = 0; i < nd; i++) {
		sum_dx2 += (xd[i]-mx)*(xd[i]-mx);
		sum_dy2 += (yd[i]-my)*(yd[i]-my);
	    }
	    sqsdxy = sqrt(sum_dx2 * sum_dy2);
	    for (l = -nlag; l <= nlag; l++) {
		#n = 0; sum_x = 0.0; sum_x2 = 0.0;
		#sum_xy = 0.0; sum_y = 0.0; sum_y2 = 0.0;
		#sum_dx2 = 0.0; sum_dy2 = 0.0;
		sum_dxy = 0.0;
		for (i = 0; i < nd; i++) {
		    if (i+l >= 0 && i+l < nd) {
			x = xd[i]; y = yd[i+l];
			sum_dxy += (x-mx)*(y-my);
			#sum_x += x;  sum_x2 += x * x;
			#sum_xy += x * y;
			#sum_y += y;  sum_y2 += y * y;
			#sum_dx2 += (x-mx)*(x-mx);  sum_dy2 += (y-my)*(y-my);
			#n++;
		#    } else {
		#	x = xd[i];
		#	y = my;
		#	y = 0.0;
		#	y = yd[(l<0)?0:(nd-1)];
		#	y = yd[(i+l+nd)%nd];
		#	sum_dxy += (x-mx)*(y-my);
		    }
		}
		# First stab, yielded ccf that was a bit too high...
		#r = (n*sum_xy - sum_x*sum_y) / sqrt((n*sum_x2 - sum_x*sum_x) * (n*sum_y2 - sum_y*sum_y));
		# Second stab, almost the same as first...
		#r = sum_dxy / sqrt(sum_dx2 * sum_dy2);
		r = sum_dxy / sqsdxy;
		printf("%d,%g\n", l, r);
	    }
        }' ${@+"$@"}
