#!/bin/sh
#
# normalize - normalize data in ASCII (or CSV) file
#
# Usage:  normalize [-c 'columns'] [-m mult] [-d] file ...
#
# where:  -c 'columns'	specifies the column number(s) to be normalized,
#			using a comma-separated list of numbers
#			(default is last column only)
#	  -m mult	specifies the multiplier for scaling normalized data
#			(default is 1, to normalize to +/- 1)
#	  -d		specifies that columns to be normalized are duplicated,
#			i.e. both original and normalized values will be output
#			(default is to replace original with normalized values)
#	  file		specifies one or more ASCII file names to be processed
#			(no default, file must be specified, - for std input)
#
# Normalizes data points in one or more columns of input by dividing by
# maximum absolute value in each column, and optionally scaling by a given
# multiplication factor.  Input and output is in ASCII format, separated
# by space, tab or comma.
#
# Copyright (c) 2006, Gilles Detillieux, Spinal Cord Research Centre,
# University of Manitoba.  All Rights Reserved.
#

copt= mopt=1
dopt=0
while :
do
	case "$1" in
	-\?|-help|--help)	sed -n '3,/^# Univ/s/^#/ /p' "$0"; exit ;;
	-c[0-9]*)	copt=`expr "x$1" : 'x-c\(.*\)'`; shift ;;
	-c)	shift; copt="$1"; shift ;;
	-m[0-9]*)	mopt=`expr "x$1" : 'x-m\(.*\)'`; shift ;;
	-m)	shift; mopt="$1"; shift ;;
	-d)	dopt=1; shift ;;
	-)	break ;;
	-*)	set --; break ;;
	*)	break ;;
	esac
done

case "$#" in
0)	echo "Usage:  normalize [-c 'columns'] [-m mult] [-d] file ...
	or normalize --help	for detailed usage information" >&2; exit 1 ;;
esac

case "$mopt" in
[.0-9]*|-[.0-9]*)	;;
*)	echo "$0: Invalid number of scaling factor: -m $mopt" >&2
	exit 1
	;;
esac

sed -e '1{h;s/^[ 	]*[-0-9.e+]*\([^-0-9.]*\).*/\1/p;g;}; s/,/ /g' ${@+"$@"} |
 awk 'BEGIN {
	dupcols = '"$dopt"';
	mult = '"$mopt"';
	columns = "'"$copt"'";
	colsep = ",";
	ncols = 0;
	nrows = 0;
	nnormcols = 0;
	lastcol = 0;
	sub(/^[ 	][ 	]*/, "", columns);
	while (columns != "") {
		col = columns;
		if (col ~ /^[-:]/) {
			sub(/^[-:]/, "", col);
			sub(/[-:, 	].*/, "", col);
			c = int(col);
			while (++lastcol < c) {
				normcol[lastcol] = 1;
				++nnormcols;
			}
		}
		sub(/[-:, 	].*/, "", col);
		if (col ~ /^[0-9]/) {
			lastcol = int(col);
			normcol[lastcol] = 1;
			++nnormcols;
		} else {
			print "'"$0"': invalid column number " col " in '"$copt"'"
			exit
		}
		sub(/^[-:]/, "", columns);
		sub(/^[0-9]*/, "", columns);
		sub(/^[, 	]*/, "", columns);
	}
      }
      NR == 1 && /^..*$/ {
	colsep = $0;
      }
      NR > 1 {
	if (nnormcols == 0) {
		normcol[NF] = 1;
		++nnormcols;
	}
	if (ncols == 0) {
		ncols = NF;
		for (i = 1; i <= ncols; i++)
			maxabs[i] = 0.0;
	}
	for (i = 1; i <= ncols; i++) {
		if (normcol[i]) {
		    x = $(i);
		    if (x < 0)
			x = -x;
		    if (maxabs[i] < x)
			maxabs[i] = x;
		}
		data[nrows, i] = $(i);
	}
	++nrows;
      }
      END {
	for (n = 0; n < nrows; ++n) {
	    for (i = 1; i <= ncols; i++) {
		if (i > 1)
		    printf("%s", colsep);
		if (normcol[i]) {
		    if (dupcols)
			printf("%s%s", data[n, i], colsep);
		    printf("%g", data[n, i]*mult/maxabs[i]);
		} else {
		    printf("%s", data[n, i]);
		}
	    }
	    printf("\n");
	}
      }'
