#!/bin/sh
#
# npra - calculate n-point rolling average of data in ASCII (or CSV) file
#
# Usage:  npra [-c 'columns'] [-n npoints] [-d] [-s] file ...
#
# where:  -c 'columns'	specifies the column number(s) to be averaged,
#			using a comma-separated list of numbers
#			(default is last column only)
#	  -n npoints	specifies the number of points to be averaged
#			(default is 5)
#	  -d		specifies that columns to be averaged are duplicated,
#			i.e. both original and smoothed values will be output
#			(default is to replace original with smoothed values)
#	  -s		specifies a strict n-point average, where the number
#			of output samples is less than the input so the 'n'
#			is constant throughout
#			(default is to reduce 'n' at start and end of list)
#	  file		specifies one or more ASCII file names to be processed
#			(no default, file must be specified, - for std input)
#
# Smoothes out data points in one or more columns of input by performing an
# n-point rolling average.  Input and output is in ASCII format, separated
# by space, tab or comma.
#
# Copyright (c) 2006, Gilles Detillieux, Spinal Cord Research Centre,
# University of Manitoba.  All Rights Reserved.
#

copt= nopt=5
dopt=0 sopt=0
while :
do
	case "$1" in
	-\?|-help|--help)	sed -n '3,/^# Univ/s/^#/ /p' "$0"; exit ;;
	-c[0-9]*)	copt=`expr "x$1" : 'x-c\(.*\)'`; shift ;;
	-c)	shift; copt="$1"; shift ;;
	-n[0-9]*)	nopt=`expr "x$1" : 'x-n\(.*\)'`; shift ;;
	-n)	shift; nopt="$1"; shift ;;
	-d)	dopt=1; shift ;;
	-s)	sopt=1; shift ;;
	-)	break ;;
	-*)	set --; break ;;
	*)	break ;;
	esac
done

case "$#" in
0)	echo "Usage:  npra [-c 'columns'] [-n npoints] [-d] [-s] file ...
	or npra --help	for detailed usage information" >&2; exit 1 ;;
esac

case "$nopt" in
[0-9]*)	;;
*)	echo "$0: Invalid number of points to average: -n $nopt" >&2
	exit 1
	;;
esac

sed -e '1{h;s/^[ 	]*[-0-9.e+]*\([^-0-9.]*\).*/\1/p;g;}; s/,/ /g' ${@+"$@"} |
 awk 'BEGIN {
	dupcols = '"$dopt"';
	strictn = '"$sopt"';
	npoints = '"$nopt"';
	nlag = int((npoints+1)/2);
	columns = "'"$copt"'";
	colsep = ",";
	ncols = 0;
	nrows = 0;
	navgcols = 0;
	lastcol = 0;
	sub(/^[ 	][ 	]*/, "", columns);
	while (columns != "") {
		col = columns;
		if (col ~ /^[-:]/) {
			sub(/^[-:]/, "", col);
			sub(/[-:, 	].*/, "", col);
			c = int(col);
			while (++lastcol < c) {
				avgcol[lastcol] = 1;
				++navgcols;
			}
		}
		sub(/[-:, 	].*/, "", col);
		if (col ~ /^[0-9]/) {
			lastcol = int(col);
			avgcol[lastcol] = 1;
			++navgcols;
		} else {
			print "'"$0"': invalid column number " col " in '"$copt"'"
			exit
		}
		sub(/^[-:]/, "", columns);
		sub(/^[0-9]*/, "", columns);
		sub(/^[, 	]*/, "", columns);
	}
      }
      NR == 1 && /^..*$/ {
	colsep = $0;
      }
      NR > 1 {
	if (navgcols == 0) {
		avgcol[NF] = 1;
		++navgcols;
	}
	if (ncols == 0) {
		ncols = NF;
		for (i = 1; i <= ncols; i++)
			sums[i] = 0.0;
	}
	for (i = 1; i <= ncols; i++) {
		if (i > 1 && (nrows >= npoints || !strictn && nrows >= nlag))
		    printf("%s", colsep);
		if (avgcol[i]) {
		    if (nrows >= npoints) {
			if (dupcols)
			    printf("%s%s", data[(nrows-nlag)%npoints, i], colsep);
			printf("%g", sums[i]/npoints);
			sums[i] -= data[nrows%npoints, i];
		    } else if (!strictn && nrows >= nlag) {
			if (dupcols)
			    printf("%s%s", data[(nrows-nlag)%npoints, i], colsep);
			printf("%g", sums[i]/nrows);
		    }
		    sums[i] += $(i);
		} else if (nrows >= npoints || !strictn && nrows >= nlag) {
		    printf("%s", data[(nrows-nlag)%npoints, i]);
		}
		data[nrows%npoints, i] = $(i);
	}
	if (nrows >= npoints || !strictn && nrows >= nlag)
		printf("\n");
	++nrows;
      }
      END {
	if (npoints > nrows && !strictn)
		npoints = nrows;
	n = nrows;
	m = npoints;
	if (m > nrows)
		m = nrows;
	if (nlag > nrows)
		nlag = nrows;
	while (nrows-nlag < n && m > 0) {
	    if (strictn && m < npoints)
		break;
	    for (i = 1; i <= ncols; i++) {
		if (i > 1)
		    printf("%s", colsep);
		if (avgcol[i]) {
		    if (dupcols)
			printf("%s%s", data[(nrows-nlag)%npoints, i], colsep);
		    printf("%g", sums[i]/m);
		    sums[i] -= data[nrows%npoints, i];
		} else {
		    printf("%s", data[(nrows-nlag)%npoints, i]);
		}
	    }
	    printf("\n");
	    ++nrows;
	    --m;
	}
      }'
