#!/usr/bin/perl
# vim:filetype=perl
#
# Copyright 2004-2006 Charles P. Wright
# Copyright 2004-2006 Erez Zadok
# Copyright 2004-2006 The Research Foundation of SUNY
# Copyright 2004-2006 Stony Brook University


# Read the info file understand how getstats works.

use strict;
use Text::CSV;
use Statistics::Descriptive;
use Statistics::PointEstimation;
use Statistics::Distributions;
use Data::Dumper;
use Getopt::Long qw(:config no_ignore_case pass_through require_order);
use Statistics::LineFit;
use POSIX qw(strtod);
use File::Glob qw(:globally :case);
use Pod::Usage;
use Errno;

use Digest::MD5 qw(md5_hex);

# Should we use the standard transformations, or clear them?
our @TRANSFORMS = ();
our $stdtransforms = 1;

# The list of parsers
our @PARSERS = ();
# The global variables for our "getstats" program
our %globals = ();
$globals{"confidencelevel"} = 95;

# Should we include columns for a process's own time?
$globals{"selftime"} = 0;

# Presentation options
#  spacing	How much space to put things?
#  delimiter
#  low		The expression to evaluate for the "LOW" column
#  high		The expression to evaluate for the "HIGH" column
$globals{"spacing"} = 1;
$globals{"delimiter"} = '';
$globals{"newline"} = "\n";
$globals{"overhead"} = 1;
$globals{"precision"} = 2;
$globals{'outcols'} = ["NAME", "COUNT", "MEAN", "MEDIAN", "LOW", "HIGH", "MIN", "MAX", "SDEV%", "HW%"];
$globals{'colexpr'} = {
"NAME" => '"$name"',
"SDEV" => 'if (defined($stdev)) { return $stdev; } else { return "undef"; }',
"HW" => 'if (defined($delta)) { return $delta; } else { return "undef"; }',
"SDEV%" => 'if (defined($stdev) && $mean) { return eval("100.0 * $stdev / $mean"); } else { return "undef"; }',
"HW%" => 'if (defined($delta) && $mean) { return eval("100.0 * $delta / $mean"); } else { return "undef"; }',
"COUNT" => '$count',
"MEAN" => '$mean',
"MEDIAN" => '$median',
"1Q" => '$percentile_25',
"2Q" => '$percentile_50',
"3Q" => '$percentile_75',
"MODE" => '$mode',
"LOW" => '$mean - $delta',
"HIGH" => '$mean + $delta',
"MIN" => '$min',
"MAX" => '$max',
"O/H" => 'my $basemean = undef; my $basestats = $globals{"stathashes"}->{"$baseline"}{"$name"}; if (defined($basestats)) { $basemean = $basestats->mean(); } if(defined($basemean) && $basemean) { return eval("100.0 * ($mean - $basemean) / $basemean"); } else { return "undef"; }',
};

$globals{'agg_threads'} = {
	"epoch" => "equal",
	"command" => "discard",
	"thread" => "disjoint",
	"elapsed" => "\$max",
	"sys" => "\$sum",
	"user" => "\$sum",
	"selfsys" => "\$sum",
	"selfuser" => "\$sum",
	"procdiff" => "\$max",
	"_" => "explode",
};

$globals{'agg_commands'} = {
	"epoch" => "equal",
	"thread" => "equal",
	"command" => "disjoint",
	"elapsed" => "\$sum",
	"sys" => "\$sum",
	"user" => "\$sum",
	"selfsys" => "\$sum",
	"selfuser" => "\$sum",
	"procdiff" => "\$sum",
	"_" => "explode",
};
$globals{'agg_mean'} = { "_" => "\$mean", };
$globals{'agg_median'} = { "_" => "\$median", };
$globals{'agg_sum'} = { "_" => "\$sum", };
$globals{'agg_max'} = { "_" => "\$max", };
$globals{'agg_min'} = { "_" => "\$min", };


$globals{'twosampledelta'} = 0;

# Where we stuff the statistics and the values
my %stathash = ();
my %linrhash = ();
my %valuehash = ();
# What order the files came in in.
my %order = ();

# How much debug information should we print?
my $debug = 0;
my $indent = "";

# Things that control the summary
my @summarytable = ();

# This is used by foreachcol to control do_replace
my $fec_index = (undef);
my $fec_labels = (undef);
sub globalcol {
	if (defined($fec_index)) {
		return ($fec_index, $fec_labels);
	} else {
		return undef;
	}
}
# This is used by foreachrow to control do_replace
my $fer_ar = (undef);
my $fer_labels = (undef);
sub globalrow {
	if (defined($fer_ar)) {
		return ($fer_ar, $fer_labels);
	} else {
		return undef;
	}
}

sub readarray {
	my ($fname) = @_;
	my @lines;
	my $doglob = 0;

	if ($fname eq "-") {
		@lines = <STDIN>;
	} else {
		if (open(FH, "<:crlf", $fname)) {
			@lines = <FH>;
			close(FH);
		} elsif ($!{ENOENT}) {
			$doglob = 1;
		} else {
			die "Can not open $fname: $!";
		}
	}

	foreach my $parser (sort { ${$a}[2] <=> ${$b}[2] } (@PARSERS)) {
		my ($name, $extension, $prio, $check, $parse) = @{$parser};

		if (&{$check}($fname, @lines)) {
			return &{$parse}($fname, @lines);
		}
	}

	if ($doglob) {
		die "Can not open $fname: No such file or directory\n";
	}

	print STDERR "$fname is not a recognized file format.\n";
	print STDERR "Enabled parsers (lower priorities are used first):\n";
	unshift(@PARSERS, ["Name", "Extension", "Priority"]);
	writedump(\*STDERR, \@PARSERS, 2);
	die;
}

sub writecsv {
	my ($FH, $aref) = @_;

	my $csv = Text::CSV->new();

	for (my $i = 0; $i <= $#$aref; $i++) {
		my $ar = $aref->[$i];
		$csv->combine(@{$ar});
		print $csv->string() . "\n";
	}
}

sub writedump {
	my ($FH, $aref, $limit, $prefix) = @_;
	my @colwidths = ();

	$prefix = "" if (!defined($prefix));

	if ($globals{'spacing'}) {
		for (my $i = 0; $i <= $#$aref; $i++) {
			my $ar = $aref->[$i];
			next if (ref($ar) ne ("ARRAY"));
			my $thislim;
			if (defined($limit)) { $thislim = $limit } else { $thislim = $#$ar; }
			for (my $j = 0; $j <= $thislim; $j++) {
				my $prt = ${$ar}[$j];

				if (!($prt =~ /^-?[0-9]+$/) && is_numeric($prt)) {
					$prt = sprintf("%.0" . $globals{'precision'} . "f", $prt);
				}

				if ($colwidths[$j] < length($prt) + $globals{'spacing'}) {
					$colwidths[$j] = length($prt) + $globals{'spacing'};
				}
			}
		}
	}

	for (my $i = 0; $i <= $#$aref; $i++) {
		print $FH "$prefix";
		my $ar = $aref->[$i];
		if (ref($ar) ne ("ARRAY")) {
			print $FH "$ar\n";
			next;
		}
		my $thislim;
		if (defined($limit)) { $thislim = $limit } else { $thislim = $#$ar; }
		for (my $j = 0; $j <= $thislim; $j++) {
			my $width = "";
			my $prt = ${$ar}[$j];


			if ($globals{'spacing'}) {
				$width = $colwidths[$j];
			}

			if (!($prt =~ /^-?[0-9]+$/) && is_numeric($prt)) {
				printf $FH "%-" . $width . ".0" . $globals{'precision'} . "f%s", $prt, ($j < $thislim ? $globals{'delimiter'} : "");
			} else {
				printf $FH "%-" . $width . "s%s", $prt, ($j < $thislim ? $globals{'delimiter'} : "");
			}
		}
		printf $FH $globals{'newline'};
	}
}

sub remove_col {
	my ($colname, $aref) = @_;
	my $labels = $aref->[0];

	my $name;
	my $pos = 0;

	foreach $name (@{$labels}) {
		if ($name eq $colname) {
			last;
		} else {
			$pos++;
		}
	}

	if ($pos > $#$labels) {
		die "Can not remove $colname because it doesn't exist!\n";
	}

	for (my $i = 0; $i <= $#$aref; $i++) {
		my $ar = $aref->[$i];
		splice(@{$ar}, $pos, 1);
	}

	delete($stathash{$colname});
	delete($linrhash{$colname});
}

sub move_col {
	my ($colname, $newpos, $aref) = @_;
	my $labels = $aref->[0];

	my $name;
	my $pos = 0;


	foreach $name (@{$labels}) {
		if ($name eq $colname) {
			last;
		} else {
			$pos++;
		}
	}

	if ($pos > $#$labels) {
		die "Can not move $colname because it isn't there!\n";
	}

	if ($pos == $newpos) {
		return;
	}

	for (my $i = 0; $i <= $#$aref; $i++) {
		my $ar = $aref->[$i];
		my $val = splice(@{$ar}, $pos, 1);
		splice(@{$ar}, $newpos, 0, $val);
	}
}

sub rename_col {
	my ($oldname, $newname, $aref) = @_;
	my $labels = $aref->[0];

	my $name;
	my $pos = 0;

	foreach $name (@{$labels}) {
		if ($name eq $newname) {
			last;
		} else {
			$pos++;
		}
	}
	if ($pos <= $#$labels) {
		die "Can not rename $oldname to $newname because $newname already exist!\n";
	}

	$pos = 0;
	foreach $name (@{$labels}) {
		if ($name eq $oldname) {
			last;
		} else {
			$pos++;
		}
	}
	if ($pos > $#$labels) {
		die "Can not rename $oldname because it doesn't exist!\n";
	}

	${$labels}[$pos] = $newname;
}


sub add_col {
	my ($colname, $expr, $aref) = @_;
	my $labels = $aref->[0];

	my $name;
	my $pos = 0;

	foreach $name (@{$labels}) {
		if ($name eq $colname) {
			last;
		} else {
			$pos++;
		}
	}

	if ($pos <= $#$labels) {
		die "Can not add $colname because already exists at $pos!\n";
	}

	push(@{$labels}, $colname);

	for (my $i = 1; $i <= $#$aref; $i++) {
		my $ar = $aref->[$i];
		my $newval = do_replace($expr, undef, $labels, $ar);

		my $addval = eval($newval);
		if (!defined($addval)) {
			die "add_col: Could not evaluate:$expr\n => $newval\n$@\n";
		}

		push(@{$ar}, $addval);
	}
}

sub transpose {
	my ($aref) = @_;
	my $labels = $aref->[0];
	my @newarray = ();

	for (my $j = 0; $j <= $#$labels; $j++) {
		my @newrow = ();
		for (my $i = 0; $i <= $#$aref; $i++) {
			my $ar = $aref->[$i];
			push(@newrow, ${$ar}[$j]);
		}
		push(@newarray, [@newrow]);
	}

	@{$aref} = (@newarray);
}

sub col_exists {
	my ($colname, $aref) = @_;
	my $labels = $aref->[0];

	my $name;
	my $pos = 0;

	foreach $name (@{$labels}) {
		if ($name eq $colname) {
			last;
		} else {
			$pos++;
		}
	}

	return (!($pos > $#$labels));
}

sub update_col {
	my ($colname, $expr, $aref) = @_;
	my $labels = $aref->[0];

	my $name;
	my $pos = 0;

	foreach $name (@{$labels}) {
		if ($name eq $colname) {
			last;
		} else {
			$pos++;
		}
	}

	if ($pos > $#$labels) {
		die "Can not update $colname because it doesn't exist!\n";
	}

	for (my $i = 1; $i <= $#$aref; $i++) {
		my $ar = $aref->[$i];
		my $newval = do_replace($expr, undef, $labels, $ar);

		my $upval = eval($newval);
		if (!defined($upval)) {
			die "update: Could not evaluate:$expr\n => $newval\n$@\n";
		}

		${$ar}[$pos] = $upval;
	}
}

sub select_row {
	my ($expr, $aref) = @_;
	my $labels = $aref->[0];
	my @keep = ();

	for (my $i = 1; $i <= $#$aref; $i++) {
		my $ar = $aref->[$i];
		my $newval = do_replace($expr, undef, $labels, $ar);
		my $test = eval($newval);
		if (!defined($test)) {
			die "select: Could not evaluate:$expr\n => $newval\n$@\n";
		}

		if ($test) {
			push(@keep, $ar);
		}
	}

	splice(@{$aref}, 1, $#$aref, @keep);
}

sub group_array {
	my %arhash;
	my ($colname, $aref) = @_;
	my $labels = $aref->[0];

	my $pos = 0;
	my $name;

	foreach $name (@{$labels}) {
		if ($name eq $colname) {
			last;
		} else {
			$pos++;
		}
	}

	if ($pos > $#$labels) {
		print "Can not group on $colname because it doesn't exist.\n";
		return ();
	}

	if ($#$aref == 0) {
		$arhash{""} = [[@{$labels}]];
		return %arhash;
	}

	for (my $i = 1; $i <= $#$aref; $i++) {
		my $ar = $aref->[$i];
		my $key = $ar->[$pos];

		if (!defined($arhash{$key})) {
			$arhash{$key} = [[@{$labels}]];
		}
		push(@{$arhash{$key}}, $ar);
	}

	return %arhash;
}

sub do_aggregate {
	my ($keyhash, $aref) = @_;
	my $labels = $aref->[0];
	my @newlabels = ();
	my @row = ();

	my $pos = 0;
	my $name;
	my $j = 0;

	summary($aref);

	foreach $name (@{$labels}) {
		my $method;
		$method = $keyhash->{$name};
		if (!defined($method)) {
			$method = $keyhash->{"_"};
		}

		if ($method eq "explode") {
			push(@newlabels, $name . "_max");
			push(@newlabels, $name . "_min");
			push(@newlabels, $name . "_mean");
			push(@newlabels, $name . "_sum");
			push(@row, $stathash{${$labels}[$j]}->max());
			push(@row, $stathash{${$labels}[$j]}->min());
			push(@row, $stathash{${$labels}[$j]}->mean());
			push(@row, $stathash{${$labels}[$j]}->sum());
		} elsif ($method eq "equal") {
			push(@newlabels, $name);

			my $ar = $aref->[1];
			my $firstval = ${$ar}[$j];

			for (my $i = 2; $i <= $#$aref; $i++) {
				my $ar = $aref->[$i];
				my $curval = ${$ar}[$j];
				if ($firstval ne $curval) {
					die "Aggregate: $name must be equal, but row $i is not.\n";
				}
			}

			push(@row, $firstval);
		} elsif ($method eq "discard") {
			# Nothing here
		} elsif ($method eq "disjoint") {
			for (my $i = 1; $i <= $#$aref; $i++) {
				for (my $k = $i + 1; $k <= $#$aref; $k++) {
					my $ar = $aref->[$i];
					my $ar2 = $aref->[$k];
					if (${$ar}[$j] eq ${$ar2}[$j]) {
						die "Aggregate: $name must be disjoint, but row $i is equal to row $k.\n";
					}
				}
			}
		} else {
			my $val = do_replace($method, $j, $labels);
			push(@newlabels, $name);
			push(@row, $val);
		}

		$j++;
	}

	@{$aref} = ([@newlabels] , [@row]);
}

sub do_warn {
	my ($expr, $text, $aref) = @_;

	my $newval = do_replace($expr);

	my %ptemp;
	my $test = eval($newval);
	if ($@) {
		die "do_warn: Could not evaluate:$expr\n => $newval\n$@\n";
	}

	if (defined($test) && $test) {
		print STDERR do_replace($text, undef, undef, undef, \%ptemp) . "\n";
		$globals{'wraised'}++;
	}
}

sub predicate {
	my ($expr, $aref) = @_;
	my $labels = $aref->[0];
	my @truth;
	my $istrue;

	summary($aref);

	for (my $i = 1; $i <= $#$aref; $i++) {
		my $ar = $aref->[$i];

		for (my $j = 0; $j <= $#$ar; $j++) {
			my $newval = do_replace($expr, $j, $labels, $ar);

			my $test = eval($newval);
			if (!defined($test)) {
				die "predicate: Could not evaluate:$expr\n => $newval\n$@\n";
			}

			if ($test) {
				push(@truth, $test);
				$istrue++;
			}
		}
	}

	if ($istrue) {
		return @truth;
	} else {
		return 0;
	}
}

sub eval_sub {
	my ($expr,$aref) = @_;
	my $newval = do_replace($expr);
	eval($newval);
	if ($@) {
		die "eval_sub: Could not evaluate $expr ($newval): $@\n";
	}
}

# This now is both a column-wise and row-wise replacement function
sub do_replace {
	my ($newtext, $column, $labels, $row, $ptemp) = @_;
	my ($stat, $linreg);

	if (!defined($column) && defined(globalcol())) {
		($column, $labels) = globalcol();
	}
	if (defined($column)) {
		die if (!defined($labels));
		$stat = $stathash{${$labels}[$column]};
		$linreg = $linrhash{${$labels}[$column]};
	}

	if (!defined($row) && defined(globalrow())) {
		($row, $labels) = globalrow();
	}

	my %reps = ();
	if (defined($row) && defined($column)) {
		$reps{'val'} = ${$row}[$column];
	} else {
		$reps{'val'} = 'undef';
	}
	if (defined($labels)) {
		$reps{'name'} = ${$labels}[$column];
	} else {
		$reps{'name'} = 'undef';
	}

	my $parsetext = $newtext;

	if ($parsetext =~ /\$(\w+\b|\{[^\}]+})/) {
		$newtext = "";

		while ($parsetext =~ /^(.*?)\$((\w+)\b|\{([^\}]+)\})(.*)$/) {
			my ($start, $wholeid, $simpleid, $complexid, $end) = ($1, $2, $3, $4, $5);
			my $retry = 0;
			my $id = $wholeid;

			die if ($simpleid && $complexid);

			if ($simpleid) {
				die if ($id ne $simpleid);
			} elsif ($complexid) {
				die if ($id ne "{" . $complexid . "}");
				$id = $complexid;
			} else {
				die "I didn't find an identifer, but I expected to!\n";
			}

			print STDERR "ID: $id -> " if ($debug >= 5);

			if (defined($reps{$id})) {
				$id = $reps{$id};
			} elsif (defined($ptemp) && defined(${$ptemp}{$id})) {
				$id = ${$ptemp}{$id};
			} elsif (defined($globals{$id})) {
				$id = $reps{$id} = $globals{$id}
			} else {
				if (defined($row)) {
					for (my $j = 0; $j <= $#$row; $j++) {
						$reps{${$labels}[$j]} = ${$row}[$j];
					}
				}

				my @lrreps = ("slope", "intercept", "lr_rSquared", "lr_durbinWatson", "lr_meanSqError", "lr_sigma");
				if (scalar(grep { $_ eq $id; } @lrreps) > 0) {
					if (defined($linreg)) {
						my ($intercept, $slope) = $linreg->coefficients();

						$reps{'slope'} = $slope;
						$reps{'intercept'} = $intercept;
						$reps{'lr_rSquared'} = $linreg->rSquared();
						$reps{'lr_durbinWatson'} = $linreg->durbinWatson();
						$reps{'lr_meanSqError'} = $linreg->meanSqError();
						$reps{'lr_sigma'} = $linreg->sigma();
					} else {
						foreach (@lrreps) {
							$reps{$_} = "undef";
						}
					}
				}
				my @statreps = ("mean", "stdev", "median", "mode", "variance", "delta", "min", "max", "sum", "count");
				if (scalar(grep { $_ eq $id; } (@statreps)) > 0) {
					if (defined($stat)) {
						if ($id eq "mode") {
							$reps{"mode"} = $stat->mode();
						} elsif ($id eq "median") {
							$reps{"median"} = $stat->median();
						} elsif ($id eq "delta") {
							my $pointestimation = new Statistics::PointEstimation::Sufficient;
							$pointestimation->set_significance($globals{"confidencelevel"});
							$pointestimation->load_data($stat->count(),$stat->mean(),$stat->variance());
							if (defined($pointestimation->delta())) {
								$reps{"delta"} = $pointestimation->delta();
							} else {
								$reps{"delta"} = 'undef';
							}
						} else {
							if (defined($stat->standard_deviation())) {
								$reps{'stdev'} = $stat->standard_deviation();
							} else {
								$reps{'stdev'} = 'undef';
							}
							$reps{"mean"} = $stat->mean();
							$reps{"variance"} = $stat->variance();
							$reps{"min"} = $stat->min();
							$reps{"max"} = $stat->max();
							$reps{"sum"} = $stat->sum();
							$reps{"count"} = $stat->count();
						}
						$reps{$id} = "undef" if (!defined($reps{$id}));
					} else {
						foreach (@statreps) {
							$reps{$_} = "undef";
						}
					}
				}

				if ($id =~ /percentile_(\d+)/) {
					if (defined($stat)) {
						$reps{$id} = $stat->percentile($1);
					}
					$reps{$id} = "undef" if (!defined($reps{$id}));
				} elsif ($id =~ /tmean_(\d\.+)_(\d\.+)/) {
					if (defined($stat)) {
						$reps{$id} = $stat->trimmed_mean($1/10000, $2/10000);
					} else {
						$reps{$id} = "undef";
					}
				} elsif ($id =~ /tmean_(\d+)/) {
					if (defined($stat)) {
						$reps{$id} = $stat->trimmed_mean($1/10000);
					} else {
						$reps{$id} = "undef";
					}
				}

				if (!defined($reps{$id})) {
					$reps{$id} = "\$$wholeid";
				}
				$id = $reps{$id}
			}
			print STDERR "$id\n" if ($debug >= 5);
			$newtext .= $start . $id;
			$parsetext = $end;
		}
		$newtext .= $parsetext;
	}

	return $newtext;
}

sub summary {
	my ($aref) = @_;
	my $labels = $aref->[0];
	my $column;

	%stathash = ();
	%linrhash = ();

	for $column (@{$labels}) {
		$stathash{$column} = new Statistics::Descriptive::Full;
		$linrhash{$column} = new Statistics::LineFit;
	}

	for (my $j = 0; $j <= $#$labels; $j++) {
		my $curname = ${$labels}[$j];
		my @y = ();

		for (my $i = 1; $i <= $#$aref; $i++) {
			my $ar = $aref->[$i];
			my $curval = ${$ar}[$j];

			if (!is_numeric($curval)) {
				next;
			}
			push(@y, $curval);
			$stathash{$curname}->add_data($curval);
		}

		my $i = 1;
		my @x = map { $i++; } @y;
		if ($#y > 1) {
			$linrhash{$curname}->setData(\@x, \@y);
		} else {
			delete($linrhash{$curname});
		}
	}
}

sub describe {
	my ($aref) = $_[0];
	shift;

	# Pick sensible columns by default, but allow them to be overridden
	my @outcols;
	if ($#_ >= 0) {
		if ($#_ == 0 && ref($_[0]) eq "REF") {
			@outcols = @{${$_[0]}};
		} else {
			@outcols = @_;
		}
	}

	my $labels = $aref->[0];
	my $labelwidth = 0;
	my $column;

	summary($aref);

	# First put the columns in the summary
	push(@summarytable, [@outcols]);

	for (my $i = 0; $i <= $#$labels; $i++) {
		my @thisrow = ();
		for (my $j = 0; $j <= $#outcols; $j++) {
			my $expr = ${$globals{'colexpr'}}{$outcols[$j]};
			if (!defined($expr)) {
				die "I don't know how to print " . $outcols[$j] . "\n";
			}

			my $newexpr = do_replace($expr, $i, $labels);
			my $val = eval($newexpr);
			if ($@) {
				die $outcols[$j] . ": Could not evaluate: " . $expr . "\n => $newexpr\n$@\n";
			}
			if (!defined($val)) {
				$val = "undef";
			}
			push(@thisrow, $val);
		}
		push(@summarytable, [@thisrow]);
	}
}

sub is_numeric {
	my $str = shift;
	$str =~ s/^\s+//;
	$str =~ s/\s+$//;
	$! = 0;
	my($num, $unparsed) = strtod($str);
	return (($str ne '') && ($unparsed == 0) && !$!);
}

###############################################################################
# The default getstats transformations begin here.                            #
###############################################################################

# How do we aggregate threads?

sub savestats {
	my ($tref, $aref) = @_;
	if ($#$tref != 0) {
		die "savestats takes no arguments.";
	}
	summary($aref);
	if (!defined($globals{"stathashes"})) {
		my %tmphash = ();
		$globals{"stathashes"} = \%tmphash;
	}
	my %tmphash2 = ();
	$globals{"stathashes"}->{$globals{"file"}} = \%tmphash2;
	for my $key (keys(%stathash)) {
		$globals{"stathashes"}->{$globals{"file"}}->{$key} = $stathash{$key};
	}
}


# This defines the standard transform "library."
our %functions = (
# This should set or unset a global variable
"set" => sub {
	my ($tref, $aref) = @_;
	if ($#$tref > 2 || $#$tref <= 0) {
		die "set takes only one or two argument.";
	} else {
		if ($#$tref == 1) {
			$globals{${$tref}[1]} = 1;
		} else {
			$globals{${$tref}[1]} = ${$tref}[2];
		}
	}
},

"unset" => sub {
	my ($tref, $aref) = @_;
	if ($#$tref != 1) {
		die "unset takes only one or two argument.";
	}
	$globals{${$tref}[1]} = 0;
},

"seteval" => sub {
	my ($tref, $aref) = @_;
	if ($#$tref != 2) {
		die "set takes only one or two argument.";
	}
	my($name, $expr) = (${$tref}[1], ${$tref}[2]);
	$globals{$name} = eval($expr);
},
"setexpr" => sub {
	my ($tref, $aref) = @_;
	if ($#$tref != 2) {
		die "set takes only one or two argument.";
	}
	my($name, $expr) = (${$tref}[1], ${$tref}[2]);
	$globals{$name} = do_replace($expr);
},

"push" => sub {
	my ($tref, $aref) = @_;
	if ($#$tref != 2) {
		die "push takes two arguments.";
	} else {
		if (!defined($globals{${$tref}[1]})) {
			@{$globals{${$tref}[1]}} = ('');
		}
		push(@{$globals{${$tref}[1]}}, ${$tref}[2]);
	}
},
"pusheval" => sub {
	my ($tref, $aref) = @_;
	if ($#$tref != 2) {
		die "pusheval: takes two arguments.";
	} else {
		if (!defined($globals{${$tref}[1]})) {
			$globals{${$tref}[1]} = ('');
		}
		if (!defined($globals{${$tref}[1]})) {
			@{$globals{${$tref}[1]}} = ('');
		}
		my $val = eval(${$tref}[2]);
		if (!defined($val)) {
			print "pusheval: Can not evaluate: " . ${$tref}[2] . ": $@\n";
		}
		push(@{$globals{${$tref}[1]}}, $val);
	}
},
"pop" => sub {
	my ($tref, $aref) = @_;
	if ($#$tref != 1) {
		die "pop takes one arguments.";
	}
	if (defined($globals{${$tref}[1]})) {
		pop(@{$globals{${$tref}[1]}});
	}
},

# Rename this relation
"rename_relation" => sub {
	my ($tref, $aref) = @_;
	if ($#$tref != 1) {
		die "rename_relation takes one argument.";
	}
	my $expr = ${$tref}[1];
	my ($new) = eval(do_replace($expr));
	if ($@) {
		die "rename_Relation: Could not evaluate:$expr\n => $new\n$@\n";
	}
	my ($old) = do_replace('$file');
	$valuehash{$new} = $valuehash{$old};
	$order{$new} = $order{$old};
	delete($valuehash{$old});
	delete($order{$old});
},
# Use rename relation to create a basename
"basename" => ["rename_relation", 'use File::Basename; my @suffixes = map { ${$_}[1]; } (@PARSERS); return basename("$file", @suffixes);'],

# This stores the baseline statistics.
"savestats" => \&savestats,

# Split array into one array for each command
"splitcommand" => ["die", "Split command not yet implemented."],

# Remove everything but the exec phases (i.e., don't count setup and cleanup)
"execonly" =>
["ifexist", "apmode", ["block", 
	["select", "'\$apmode' ne 'SETUP' && '\$apmode' ne 'CLEANUP' && '\$apmode' ne 'PRESETUP' && '\$apmode' ne 'POSTCLEANUP'"],
	["warnrow", "'\$apmode' ne 'EXEC'", "\$file: Mode is \$apmode for epoch \$epoch."],
	["remove", "apmode"]]
],

# Combine multiple commands into one command
"unifycommand" =>
["group", "epoch",
	["group", "thread",
		[ "if", "\$count > 1",
			[ "aggregate", $globals{"agg_commands"} ]
		]
	]
],

# Aggregate threads
"aggthreads" =>
["group", "epoch",
	["if", "\$count > 1", 
		["aggregate", $globals{"agg_threads"} ],
# We should just remove thread if there is only one possible answer.
		[ "remove", "thread" ]
	]
],

# Various useful aggregates
"aggmean" => ["aggregate", $globals{"agg_mean"} ],
"aggmedian" => ["aggregate", $globals{"agg_median"} ],
"aggsum" => ["aggregate", $globals{"agg_sum"} ],
"aggmax" => ["aggregate", $globals{"agg_max"} ],
"aggmin" => ["aggregate", $globals{"agg_min"} ],

# Setup the procdiff column
"procdiff" => ["ifexist", "procdiff",
["block",
	["update", "procdiff", "if (\$procdiff) { return 100.0 * eval(\"(\$procdiff - \$user - \$sys) / (\$procdiff)\") } else { return 0; }"],
	["rename", "procdiff", "Excess CPU%"],
]],

# Remove the self times
"selfsys" => ["ifexist", "selfsys",
	["if", "\$selftime;",
		["block",
			["rename", "selfsys", "System (Self)"],
			["rename", "selfuser", "User (Self)"],
		],
		["block",
			["remove", "selfsys"],
			["remove", "selfuser"],
		]
	]
],

"onlycol" => sub {
	my ($tref, $aref) = @_;

	die "onlycol takes zero or one arguments." if ($#$tref != 1);
	my $col = ${$tref}[1];

	my $c = scalar(grep(/$col/, @{$aref->[0]}));
	die "Could not find column $col.\n" if ($c != 1);

	map { do_transform(['remove', $_], $aref) if (!/$col/); } (@{$aref->[0]});
},

# Add a summary line
"addsummary" => sub { my ($tref, $aref) = @_; my @trcpy = @{$tref}; shift(@trcpy); foreach (@trcpy) { push(@summarytable, do_replace($_)); } },

# PASS0: Read in the file, and turn it into something pretty
"readpass" => ["block",
["ifexist", "elapsed", ["block",
["exitfail"],
["remove", "status"],

["execonly"],

["unifycommand"],
["ifexist", "command", ["remove", "command"]],
["aggthreads"],

["add", "io", "\$elapsed - \$user - \$sys"],
["add", "cpu", "100.0 * (\$user + \$sys) / \$elapsed"],
["add", "energy_mc_j", "\$energy_mc * 3600"],
["add", "energy_disk_j", "\$energy_disk * 3600"],
["add", "total_energy", "\$energy_mc_j + \$energy_disk_j"],
["add", "ops/joule", "\$t_ops/\$total_energy"],
["procdiff"],
["selfsys"],
]]],

# This is PASS1, which produces warnings and reorders the columns
"warnpass" => ["block",
["ifexist", "elapsed", ["block",
["otherexec"],
["negio"],
["zscore"],

["remove", "epoch"],
["warnregress"],
["warndelta"],

# Use the canonical order
["move", "elapsed", 0],
["move", "sys", 1],
["move", "user", 2],
["move", "io", 3],
["move", "cpu", 4],
["rename", "user", "User"],
["rename", "sys", "System"],
["rename", "elapsed", "Elapsed"],
["rename", "io", "Wait"],
["rename", "cpu", "CPU%"],
]]],

# This is PASS2, which saves stuff for computing overhead later
"ohpass" => ["block",
["savestats"],
["if", "\$fileno == 0", ["setexpr", "baseline", "\$file"]]
],

# This is PASS3, which creates the nice summary
"summary" =>  ["block",
["descprehook"],
["addsummary", "\$file"],
["if", "\$fileno > 0 && \$overhead", ["push", "outcols", "O/H"] ],
["describe", \$globals{"outcols"} ],
["if", "\$fileno > 0 && \$overhead", ["pop", "outcols"] ],
["addsummary", ""],
["descposthook"],
["if", "\$fileno == (\$filecount - 1)", ["eval", "writedump(\\*STDOUT, \\\@summarytable);"]],
],
# If you want to hook into outcols for the summary, this is the place to do it
"descprehook" => ["noop"],
"descposthook" => ["noop"],

# Warning primitives
"warncol" => sub {
	my ($tref, $aref) = @_;

	if ($#$tref != 2) {
		die "Warncol takes two arguments.";
	}

	do_transform(["foreachcol", ["warn", ${$tref}[1], ${$tref}[2]]], $aref);
},
"warnrow" => sub {
	my ($tref, $aref) = @_;

	if ($#$tref != 2) {
		die "Warnrow takes two arguments.";
	}

	do_transform(["foreachrow", ["warn", ${$tref}[1], ${$tref}[2]]], $aref);
},
"warnval" => sub {
	my ($tref, $aref) = @_;

	if ($#$tref != 2) {
		die "Warnval takes two arguments.";
	}

	do_transform(["foreachcol", ["foreachrow", ["warn", ${$tref}[1], ${$tref}[2]]]], $aref);
},

# Warn if the zscore is too high.
"zscore" => ["block",
["eval", "if (!defined(\$globals{'warn'})) { \$globals{'warn'} = 1; } ; return 1"],
["eval", "if (!defined(\$globals{'zscore-thresh'})) { \$globals{'zscore-thresh'} = 2; } ; return 1"],
["if", "\$warn;", 
	["warnval", "if(\$stdev) { \$ptemp{'zscore'} = eval \"(abs(\$mean - \$val) / \$stdev)\"; \$globals{'pzscore'} = sprintf('%.' . \$globals{'precision'} . 'f', \$ptemp{'zscore'});return ((\$globals{'zscore-thresh'} > 0) && \$ptemp{'zscore'} > \$globals{'zscore-thresh'}); }", "\$file: High z-score of \$pzscore for \$name in epoch \$epoch."],
]],
# Warn if I/O is negative.
"negio" => ["block",
["eval", "if (!defined(\$globals{'warn'})) { \$globals{'warn'} = 1; } ; return 1"],
["if", "\$warn;", 
	["warnrow", "if (\$io < 0) { \$ptemp{'pio'} = sprintf('%.' . \$globals{'precision'} . 'f', \$io); return 1; } return 0;", "\$file: Negative Wait Time, \$io, for epoch \$epoch."],
]],
# Warn if something else used lots of CPU (greater than 5% of what we did)
"otherexec" => ["ifexist", "procdiff", ["block",
	["eval", "if (!defined(\$globals{'warn'})) { \$globals{'warn'} = 1; } ; return 1"],
["eval", "if (!defined(\$globals{'otherexec-thresh'})) { \$globals{'otherexec-thresh'} = 2; } ; return 1"],
["if", "\$warn;", 
	["warnrow", "\$ptemp{'prtprocdiff'} = sprintf('%.3f', \$procdiff); return (\$procdiff > 5);", "\$file: Excess CPU% is \$prtprocdiff\% for epoch \$epoch."],
]]],
# Warn if the slope of linear regression is too high.
"warnregress" => ["block",
["eval", "if (!defined(\$globals{'warn'})) { \$globals{'warn'} = 1; } ; return 1"],
["eval", "if (!defined(\$globals{'regressthresh'})) { \$globals{'regressthresh'} = 1; } ; return 1"],
["if", "\$warn;", 
	["warncol", "if (\$mean && defined(\$slope)) { \$ptemp{'slopepct'} = sprintf('%0.3f', eval(\"(100 * \$slope) / \$mean\")); } else { \$ptemp{'slopepct'} =  'undef'; } return(abs(\$slope) > ((\$regressthresh / 100) * \$mean))", "\$file: Linear regression slope for \$name is: \$slopepct%."],
]],
# Warn if the half-width is greater than 5%.
"warndelta" => ["block",
["eval", "if (!defined(\$globals{'warn'})) { \$globals{'warn'} = 1; } ; return 1"],
["eval", "if (!defined(\$globals{'deltathresh'})) { \$globals{'deltathresh'} = 5; } ; return 1"],
["if", "\$warn;", 
	["warncol", "if (\$mean && defined(\$delta)) { \$ptemp{'deltapct'} = sprintf('%0.3f', eval(\"(100 * \$delta) / \$mean\")); } else { \$ptemp{'deltapct'} =  'undef'; } return(abs(\$delta) > ((\$deltathresh / 100) * \$mean))", "\$file: Half-width for \$name is: \$deltapct%."],
]],
# We don't want our tests to fail, but don't warn if warn is unset.
"exitfail" => ["block",
	["eval", "if (!defined(\$globals{'warn'})) { \$globals{'warn'} = 1; } ; return 1"],
	["if", "\$warn;", 
		["warnrow", "\$status != 0", "\$file: Failure for \$command on epoch \$epoch, thread \$thread, exit status = \$status."]
]],

# Predicate mode
"predicate" => sub {
	my ($tref, $aref) = @_;

	if ($#$tref != 1) {
		die "predicate takes only one argument.";
	}
	$globals{"predicate_expression"} = ${$tref}[1];

	do_transform(["block", ["set", "wraised", 0], ["warncol", "!(" . ${$tref}[1] . ")", "Predicate failed for \$name: " . ${$tref}[1] ], ["if", "\$wraised > 0", ["die", "\$file: predicate \$predicate_expression failed \$wraised times."]]], $aref);
},

# Two sample confidence intervals should be printed afterwards, so on the first
# file just set the baseline, savestats, and then push our new transform onto
# the stack.  For subsequent files do nothing.
"twosamplet" => ["if", "\$fileno == 0",
	["block",
		["setexpr", "baseline", "\$file"],
		["savestats"],
		["eval", 'push(@TRANSFORMS, ["__twosamplet"]);']
	],
],
# This should be run *after* the other transformations so the output is pretty
"__twosamplet" => ["if", "\$fileno != 0",
	["block",
		["echo", "Comparing \$file (Sample 1) to \$baseline (Sample 2)."],
		["savestats"],
		["foreachcol", ["__twosamplet__"]]
	],
],

# Two sample confidence intervals should be printed afterwards, so on the first
# file just push our new transform onto the stack.
"pairwiset" => ["if", "\$fileno == 0",
	["block",
		["set", "pairwisebase", "0"],
		["savestats"],
		["eval", 'push(@TRANSFORMS, ["__pairwiset"]);']
	],
],
# This should be run *after* the other transformations so the output is pretty
"__pairwiset" => ["if", "\$fileno > \$pairwisebase",
	["block",
		["echo", "Comparing \$file (Sample 1) to \$baseline (Sample 2)."],
		["savestats"],
		["foreachcol", ["__twosamplet__"]],
		["if", "\$fileno == \$filecount - 1", 
			["block",
				["setexpr", "pairwisebase", "\$pairwisebase + 1"],
				["eval", 'push(@TRANSFORMS, ["__pairwiset"]);']
			]
		]
	],
# else we aren't bigger than pairwisebase
	["if", "\$fileno == \$pairwisebase", ["setexpr", "baseline", "\$file"]]	
],

# The actual code for the two sample t-test
"__twosamplet__" => sub {
	my $baseline = $globals{"baseline"};
	my $file = $globals{"file"};
	my $name = do_replace("\$name");

	if (defined($globals{"wrscolumns"})) {
		my %cols = ();
		map { $cols{$_} = 1; } split(/,/, $globals{"wrscolumns"});
		return if (!$cols{$name});
	}

	my $twosampledelta = $globals{"twosampledelta"};
	my $basestats = $globals{"stathashes"}->{$baseline}{$name};
	if (!defined($basestats)) { print "$name: No baseline for hypothesis test.\n"; return; }
	my $curstats = $globals{"stathashes"}->{$file}{$name};
	my $cimid = $curstats->mean() - $basestats->mean();
	my ($n1, $n2) = ($curstats->count(), $basestats->count());
	my ($s1, $s2) = ($curstats->standard_deviation(), $basestats->standard_deviation());
	my $df = $n1 + $n2 - 2;
	if ($df == 0) { print "$name: Not enough samples to run a hypothesis test.\n"; return; }
	my $sp = sqrt(((($n1 - 1) * $s1 * $s1) + (($n2 - 1) * $s2 * $s2)) / $df);
	if ($sp == 0) { print "$name: Can not run test when pooled variance is zero.\n"; return; }
	my $tval = Statistics::Distributions::tdistr($df, (1 - $globals{"confidencelevel"}/100)/2);
	my $cidelta = $tval * $sp * sqrt((1 / $n1) + (1 / $n2));
	my $cilow = $cimid - $cidelta;
	my $cihigh = $cimid + $cidelta;
	$globals{"rejects"} = 0;

	my $t = (($cimid) - $twosampledelta)/($sp*(sqrt((1/$n1) + (1/$n2))));

	my @tests;
	push(@tests, ["Null Hyp.", "Alt. Hyp.", "P-value", "Result"]);

	my ($h0, $ha);

	if ($twosampledelta) { 	
		$h0 = sprintf "u1 - u2 <= %.0" . $globals{"precision"} . "f", $twosampledelta;
		$ha = sprintf "u1 - u2 >  %.0" . $globals{"precision"} . "f", $twosampledelta;
	} else {
		$h0 = "u1 <= u2";
		$ha = "u1 >  u2";
	}
	if ($globals{"verbosettest"}) {
		$h0 =~ s/u1/$file/;
		$h0 =~ s/u2/$baseline/;
		$ha =~ s/u1/$file/;
		$ha =~ s/u2/$baseline/;
	}
	my $p = (Statistics::Distributions::tprob($df, $t));
	my $thresh = 1 - ($globals{"confidencelevel"}/100);
	if (!$globals{"rejectonly"} || ($p < $thresh))  {
		push(@tests, [$h0, $ha, $p, ($p < $thresh) ? "REJECT H_0" : "ACCEPT H_0"]);
		$globals{"rejects"}++;
	}
	
	if ($twosampledelta) { 	
		$h0 = sprintf "u1 - u2 >= %.0" . $globals{"precision"} . "f", $twosampledelta;
		$ha = sprintf "u1 - u2 <  %.0" . $globals{"precision"} . "f", $twosampledelta;
	} else {
		$h0 = "u1 >= u2";
		$ha = "u1 <  u2";
	}
	if ($globals{"verbosettest"}) {
		$h0 =~ s/u1/$file/;
		$h0 =~ s/u2/$baseline/;
		$ha =~ s/u1/$file/;
		$ha =~ s/u2/$baseline/;
	}
	my $p = (1 - Statistics::Distributions::tprob($df, $t));
	if (!$globals{"rejectonly"} || ($p < $thresh))  {
		push(@tests, [$h0, $ha, $p, ($p < $thresh) ? "REJECT H_0" : "ACCEPT H_0"]);
		$globals{"rejects"}++;
	}

	if ($twosampledelta) {
		$h0 = sprintf "u1 - u2 == %.0" . $globals{"precision"} . "f", $twosampledelta;
		$ha = sprintf "u1 - u2 != %.0" . $globals{"precision"} . "f", $twosampledelta;
	} else {
		$h0 = "u1 == u2";
		$ha = "u1 != u2";
	}
	if ($globals{"verbosettest"}) {
		$h0 =~ s/u1/$file/;
		$h0 =~ s/u2/$baseline/;
		$ha =~ s/u1/$file/;
		$ha =~ s/u2/$baseline/;
	}
	my $p = 2 *  Statistics::Distributions::tprob($df, abs($t));
	my $thresh = 1 - ($globals{"confidencelevel"}/100);
	if (!$globals{"rejectonly"} || ($p < $thresh))  {
		push(@tests, [$h0, $ha, $p, ($p < $thresh) ? "REJECT H_0" : "ACCEPT H_0"]);
		$globals{"rejects"}++;
	}
	push(@tests, "");
	if (!$globals{"rejectonly"} || ($globals{"rejects"} > 0)) {
		printf "%s: %d%%CI for %s - %s = (%.0" . $globals{"precision"} . "f, %.0" . $globals{"precision"} . "f)\n", "$name", $globals{"confidencelevel"}, "$file", "$baseline", $cilow, $cihigh;
		writedump(\*STDOUT, \@tests);
	}
},

# This is equivalent to two samplet, but for the Wilcoxon Rank-sum Test
"wrs" => ["if", "\$fileno == 0",
	["block",
		["setexpr", "baseline", "\$file"],
		["savestats"],
		["eval", 'push(@TRANSFORMS, ["__wrs"]);']
	],
],
"__wrs" => ["if", "\$fileno != 0",
	["block",
		["echo", "Comparing \$baseline (Sample 1) to \$file (Sample 2)."],
		["savestats"],
		["foreachcol", ["__wrs__"]]
	],
],
"__wrs__" => sub {
	my $baseline = $globals{"baseline"};
	my $file = $globals{"file"};
	my $name = do_replace("\$name");

	if (defined($globals{"ttestcolumns"})) {
		my %cols = ();
		map { $cols{$_} = 1; } split(/,/, $globals{"ttestcolumns"});
		return if (!$cols{$name});
	}

	my $basestats = $globals{"stathashes"}->{$baseline}{$name};
	if (!defined($basestats)) { print "$name: No baseline for hypothesis test.\n"; return; }
	my $curstats = $globals{"stathashes"}->{$file}{$name};

	my @ranks = ();
	foreach my $val ($basestats->get_data()) {
		push(@ranks, [$val, 0]);
	}
	foreach my $val ($curstats->get_data()) {
		push(@ranks, [$val, 1]);
	}
	my @ranks = sort { return ($$a[0] <=> $$b[0]); } (@ranks);

	my $n1 = $basestats->count();
	my $n2 = $curstats->count();
	my $R1 = 0;
	my $R2 = 0;

	for (my $baserank = 1; $baserank <= $#ranks; $baserank++) {
		my $val1 = ${$ranks[$baserank - 1]}[0];
		my $val2 = ${$ranks[$baserank]}[0];
		if ($val1 != $val2) {
			push(@{$ranks[$baserank - 1]}, $baserank);
		} else {
			my $maxrank = $baserank;
			while (($maxrank <= $#ranks) && (${$ranks[$maxrank]}[0] == $val1)) {
				$maxrank++;
			}
			# We purposefully overshoot by one (because the rank in the array is one less)
			# The mean is min + (max - min)/2
			my $userank = $baserank + (($maxrank - $baserank) / 2);
			for (my $i = $baserank - 1; $i < $maxrank; $i++) {
				push(@{$ranks[$i]}, $userank);
			}
			$baserank = $maxrank;
		}
	}

	foreach (@ranks) {
		my $val = $$_[0];
		my $set = $$_[1];
		my $rank = $$_[2];

		if ($set == 0) {
			$R1 += $rank;
		} else {
			$R2 += $rank;
		}
	}

	my $mu_R = ($n1 * ($n1 + $n2 + 1))/2;
	my $sigma_R = sqrt(($n1 * $n2 * ($n1 + $n2 + 1)) / 12);
	my $z = ($R1 - $mu_R) / $sigma_R;

	# Now do the output for all of the three possible tests
	$globals{"rejects"} = 0;

	my @tests;
	push(@tests, ["Null Hyp.", "Alt. Hyp.", "P-value", "Result"]);

	my ($h0, $ha);

	$h0 = "u1 <= u2";
	$ha = "u1 >  u2";
	if ($globals{"verbosettest"}) {
		$h0 =~ s/u1/$baseline/;
		$h0 =~ s/u2/$file/;
		$ha =~ s/u1/$baseline/;
		$ha =~ s/u2/$file/;
	}

	my $p = (Statistics::Distributions::uprob($z));
	my $thresh = 1 - ($globals{"confidencelevel"}/100);
	if (!$globals{"rejectonly"} || ($p < $thresh))  {
		push(@tests, [$h0, $ha, $p, ($p < $thresh) ? "REJECT H_0" : "ACCEPT H_0"]);
		$globals{"rejects"}++;
	}

	$h0 = "u1 >= u2";
	$ha = "u1 <  u2";
	if ($globals{"verbosettest"}) {
		$h0 =~ s/u1/$baseline/;
		$h0 =~ s/u2/$file/;
		$ha =~ s/u1/$baseline/;
		$ha =~ s/u2/$file/;
	}

	my $p = (1 - Statistics::Distributions::uprob($z));
	if (!$globals{"rejectonly"} || ($p < $thresh)) {
		push(@tests, [$h0, $ha, $p, ($p < $thresh) ? "REJECT H_0" : "ACCEPT H_0"]);
		$globals{"rejects"}++;
	}

	$h0 = "u1 == u2";
	$ha = "u1 != u2";
	if ($globals{"verbosettest"}) {
		$h0 =~ s/u1/$baseline/;
		$h0 =~ s/u2/$file/;
		$ha =~ s/u1/$baseline/;
		$ha =~ s/u2/$file/;
	}

	my $p = 2 *  Statistics::Distributions::uprob(abs($z));
	my $thresh = 1 - ($globals{"confidencelevel"}/100);
	if (!$globals{"rejectonly"} || ($p < $thresh)) {
		push(@tests, [$h0, $ha, $p, ($p < $thresh) ? "REJECT H_0" : "ACCEPT H_0"]);
		$globals{"rejects"}++;
	}
	push(@tests, "");
	if (!$globals{"rejectonly"} || ($globals{"rejects"} > 0)) {
		printf "%s: (R1=%d, R2=%d)\n", $name, $R1, $R2;
		writedump(\*STDOUT, \@tests);
	}
},

);

# These are our command line options
our %options;

###############################################################################
# The default getstats transformations end here.                              #
###############################################################################

our %argcounts = (
# This is how we know what the builtins are, if a builtin is not described here
# then you can't specify it on the command line
"remove" => 1,
"add" => 2,
"warn" => 2,
"select" => 1,
"update" => 2,
"rename" => 2,
"move" => 2,
"eval" => 1,
"describe" => -1,
"noop" => -1,
"echo" => -1,
"die" => -1,
"csv" => 0,
"dump" => 0,

# These are standard functions
"pop" => 1,
"savestats" => 0,
"exitfail" => 0,
"aggthreads" => 0,
"unifycommands" => 0,
"execonly" => 0,
"negio" => 0,
"otherexec" => 0,
"warnregress" => 0,
"warndelta" => 0,
"procdiff" => 0,
"zscore" => 0,
"predicate" => 1,
"warnrow" => 2,
"warncol" => 2,
"warnval" => 2,
"rename_relation" => 1,
"onlycol" => 1,

# These are the default passes
"readpass" => 0,
"warnpass" => 0,
"ohpass" => 0,
"summary" => 0
);

sub do_transform {
	my ($transform, $values) = @_;

	my $cmd = ${$transform}[0];

	if ($debug == 1) {
		print STDERR "EXECUTING: $indent $cmd\n";
	} elsif ($debug >= 2) {
		my $VAR1;
		my @friendlytfm = ();
		if ($cmd eq "block") {
			push(@friendlytfm, "block");
			push(@friendlytfm, "...");
		} elsif ($cmd eq "if" || $cmd eq "ifexist" || $cmd eq "group") {
			my $i = 0;
			foreach (@{$transform}) {
				if ($i++ <= 1) {
					eval(Dumper($_));
					push(@friendlytfm, $VAR1);
				} else {
					push(@friendlytfm, "...");
				}
			}
		} else {
			foreach (@{$transform}) {
				eval(Dumper($_));
				push(@friendlytfm, $VAR1);
			}
		}
		print STDERR "EXECUTING: $indent [" . join(", ", @friendlytfm) . "]\n";

		if ($debug >= 10) {
			print STDERR "RELATION:  $indent " . $globals{"file"} . "\n";
			writedump(\*STDERR, $values, undef, "RELATION:  $indent ");
		}
	}

	if ($cmd eq "remove") {
		die "Invalid remove transform!" if not ($#$transform == 1);
		remove_col(${$transform}[1], $values);
	} elsif ($cmd eq "add") {
		die "Invalid add transform!" if not ($#$transform == 2);
		add_col(${$transform}[1], ${$transform}[2], $values);
	} elsif ($cmd eq "warn") {
		die "Invalid warn transform!" if not ($#$transform == 2);
		do_warn(${$transform}[1], ${$transform}[2], $values);
	} elsif ($cmd eq "select") {
		die "Invalid select transform!" if not ($#$transform == 1);
		select_row(${$transform}[1], $values);
	} elsif ($cmd eq "update") {
		die "Invalid add transform!" if not ($#$transform == 2);
		update_col(${$transform}[1], ${$transform}[2], $values);
	} elsif ($cmd eq "rename") {
		die "Invalid rename transform!" if not ($#$transform == 2);
		rename_col(${$transform}[1], ${$transform}[2], $values);
	} elsif ($cmd eq "move") {
		die "Invalid move transform!" if not ($#$transform == 2);
		move_col(${$transform}[1], ${$transform}[2], $values);
	} elsif ($cmd eq "ifexist") {
		die "Invalid ifexist transform!" if not ($#$transform == 2 || $#$transform == 3);
		$indent .= " ";
		if (col_exists(${$transform}[1], $values)) {
			do_transform(${$transform}[2], $values);
		} elsif ($#$transform == 3) {
			do_transform(${$transform}[3], $values);
		}
		chop($indent);
	} elsif ($cmd eq "eval") {
		die "Invalid if transform!" if not ($#$transform == 1);
		eval_sub(${$transform}[1], $values);
	} elsif ($cmd eq "if") {
		die "Invalid if transform!" if not ($#$transform == 2 || $#$transform == 3);
		$indent .= " ";
		if (predicate(${$transform}[1], $values)) {
			do_transform(${$transform}[2], $values);
		} elsif ($#$transform == 3) {
			do_transform(${$transform}[3], $values);
		}
		chop($indent);
	} elsif ($cmd eq "foreachcol") {
		$fec_labels = $values->[0];
		summary($values);

		$indent .= " ";
		for ($fec_index = 0; $fec_index <= $#$fec_labels; $fec_index++) {
			do_transform(${$transform}[1], $values);
		}
		chop($indent);
		undef($fec_index);
		undef($fec_labels);
	} elsif ($cmd eq "foreachrow") {
		die "Invalid foreachrow transform!" if not ($#$transform == 1);
		summary($values);

		$fer_labels = $values->[0];

		$indent .= " ";
		for (my $fer_index = 1; $fer_index <= $#$values; $fer_index++) {
			$fer_ar = $values->[$fer_index];
			do_transform(${$transform}[1], $values);
		}
		chop($indent);
		undef($fer_ar);
		undef($fer_labels);
	} elsif ($cmd eq "foreachcolsafe") {
		my $tlabelsleft = $values->[0];
		my @labelsleft = @{$tlabelsleft};

		do
		{
			my $curlabel = shift(@labelsleft);

			$fec_labels = $values->[0];
			summary($values);

			$indent .= " ";

			for ($fec_index = 0; $fec_index <= $#$fec_labels; $fec_index++) {
				if ($$fec_labels[$fec_index] == $curlabel) {
					do_transform(${$transform}[1], $values);
				}
			}

			chop($indent);

			undef($fec_index);
			undef($fec_labels);
		}
		while ($#labelsleft >= 0);
	} elsif ($cmd eq "block") {
		$indent .= " ";
		for (my $i = 1; $i <= $#$transform; $i++) {
			do_transform(${$transform}[$i], $values);
		}
		chop($indent);
	} elsif ($cmd eq "group") {
		die "Invalid group transform!" if not ($#$transform == 2);
		my %grouped = group_array(${$transform}[1], $values);
		my $key;

		$indent .= " ";
		foreach $key (sort { $grouped{$a} <=> $grouped{$b} } (keys(%grouped))) {
			my $ars = $grouped{$key};
			do_transform(${$transform}[2], $ars);
		}
		chop($indent);

		@{$values} = ();

		my $labeled = 0;

		foreach $key (sort { $a <=> $b } (keys(%grouped))) {
			my $ars = $grouped{$key};

			my $labels = shift(@{$ars});
			if (!$labeled) {
				push(@{$values}, $labels);
				$labeled = 1;
			}

			while(my $r = shift(@{$ars})) {
				push(@{$values}, $r);
			}
		}
	} elsif ($cmd eq "aggregate") {
		die "Invalid group transform!" if not ($#$transform == 1);
		do_aggregate(${$transform}[1], $values);
	} elsif ($cmd eq "describe") {
		my @trcpy = @{$transform};
		shift @trcpy;
		describe($values,  @trcpy);
	} elsif ($cmd eq "csv") {
		writecsv(\*STDOUT, $values);
	} elsif ($cmd eq "dump") {
		writedump(\*STDOUT, $values);
	} elsif ($cmd eq "noop") {
		return;
	} elsif ($cmd eq "echo") {
		for (my $i = 1; $i <= $#$transform; $i++) {
			print do_replace(${$transform}[$i]);
		}
		print "\n";
	} elsif ($cmd eq "die") {
		my $diestr = "";
		for (my $i = 1; $i <= $#$transform; $i++) {
			$diestr .= sprintf "%s", do_replace(${$transform}[$i]);
		}
		die "$diestr\n";
	} else {
		if (!defined($functions{${$transform}[0]})) {
			die "Unknown transform: \"" . ${$transform}[0] . "\"\n";
		}
		my $fxn = $functions{${$transform}[0]};
		if (ref($fxn) eq "CODE") {
			&{$fxn}($transform, $values);
		} elsif (ref($fxn) eq "ARRAY") {
			do_transform($fxn, $values);
		}
	}
}

# We read the parsers in down here so that we can insert transforms and
# functions.
my @md5list = ();
push(@INC, "/usr/local/libexec");
foreach my $prefix (@INC) {
	if (-d "$prefix") {
		my @parsesrc = glob("$prefix/gs_parse_*");
		foreach (@parsesrc) {
			next if /.*~$/;
			open(MD5IT, $_);
			my $md5 = md5_hex(<MD5IT>);
			close(MD5IT);
			next if (scalar(grep { $_ eq $md5; } (@md5list)));
			push(@md5list, $md5);
			require;
		}
	}
}
if ($#PARSERS == -1) {
	print STDERR "There are no getstats parsers available.\n";
	print STDERR "Parsers were installed in /usr/local/libexec (determined by ./configure)\n";
	print STDERR "The parser include path is:\n\t" . join("\n\t", @INC) . "\n";
	exit 1;
}

my @md5list = ();
foreach my $prefix (@INC) {
	if (-d "$prefix") {
		my @tfmsrc = glob("$prefix/gs_transform_*");
		foreach (@tfmsrc) {
			next if /.*~$/;
			open(MD5IT, $_);
			my $md5 = md5_hex(<MD5IT>);
			close(MD5IT);
			next if (scalar(grep { $_ eq $md5; } (@md5list)));
			push(@md5list, $md5);
			require;
		}
	}
}

sub versioninfo {
	print <<END
Auto-pilot 2.4.

Copyright 2001-2006 Charles P. Wright
Copyright 2001-2006 Erez Zadok
Copyright 2001-2006 Stony Brook University
Copyright 2001-2006 Research Foundation of SUNY

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
SUCH DAMAGE.

END
;
	exit 0;
}

###############################################################################
# The rest of the file takes care of reading data and running the             #
# transformations on it                                                       #
###############################################################################

if ($#ARGV == -1 ) {
	push (@ARGV, '-');

}
# This is the first transform to be run
push(@TRANSFORMS, ["readpass"]);
if (!$stdtransforms) {
	@TRANSFORMS = ();
}

my $output;
my @FILES = ();
my $catfiles = 0;
my $shift = 0;

sub do_source {
	if ($#_ != 1) {
		die "source requires exactly one argument.";
	}
	require $_[1] || die "Can not require: " . $_[1] . "$!\n";
}

sub do_output {
	if ($#_ != 1) {
		die "output requires exactly one argument.";
	}
	$output = $_[1];
	open(STDOUT, ">$output") || die "Can not open $output: $!\n";

}

$SIG{__DIE__} = sub {
	die @_ if $^S;
	if (defined($output) && $output) {
		unlink($output);
	}
};

# Read the options for custom transformations
while (1)
{
	my %tmp = ("d+" => \$debug,
	"debug=i" => \$debug,
	"h|help" => sub { pod2usage(-exitval => 0, -verbose => 2); },
	"shift" => sub { $shift = 1; },
	"catfiles!" => \$catfiles,
	"stdtransforms!" => sub { $stdtransforms = $_[1]; if(!$_[1]) { @TRANSFORMS = ()} },
	"set=s" => \%globals,
	"seteval=s" => sub { my ($name, $val) = split(/=/, $_[1], 2); my $evaled = eval($val); if ($@) { die "Could not evaluate $val: $@"; } $globals{$name} = $evaled; },
	"push=s" => sub { my ($name, $val) = split(/=/, $_[1], 2); push(@{$globals{$name}}, $val); },
	"pusheval=s" => sub { my ($name, $val) = split(/=/, $_[1], 2); my $evaled = eval($val); if ($@) { die "Could not evaluate $val: $@"; } push(@{$globals{$name}}, $evaled); },
	"transform=s" => sub { my (@transform) = eval($_[1]); if ($@) { die "Could not evaluate: " . $_[1] . ": $@\n"; } push (@TRANSFORMS, @transform)},
	"shiftform=s" => sub { my (@transform) = eval($_[1]); if ($@) { die "Could not evaluate: " . $_[1] . ": $@\n"; } unshift (@TRANSFORMS, @transform)},
	"source=s" => \&do_source,
	"o|output=s" => \&do_output,
	"v|version" => \&versioninfo,
	);
	while (my ($k, $v) = each(%tmp)) {
		$options{$k} = $v if (!exists($options{$k}));
	}
	GetOptions(%options);
	if ($#ARGV < 0) {
		last;
	}
	if ($ARGV[0] eq "--") {
		last
	} elsif ($ARGV[0] =~ /^--(\w+)$/) {
		my $cmd = $1;
		shift(@ARGV);
		my $builtin = 0;

		if (defined($functions{$cmd}) || defined($argcounts{$cmd})) {
			my @transform = ();
			my $max = $argcounts{$cmd};

			push(@transform, $cmd);

			while(($max-- != 0) && defined(my $arg = shift(@ARGV))) {
				if (!($arg =~ /^--/)) {
					push(@transform, $arg);
				} else {
					unshift(@ARGV, $arg);
					last;
				}
			}
			if ($shift) {
				$shift = 0;
				unshift(@TRANSFORMS, [@transform]);
			} else {
				push(@TRANSFORMS, [@transform]);
			}
		} else {
			die "Unknown function: $cmd\n";
		}
	} else {
		push(@FILES, shift(@ARGV))
	}
}

if ($shift) {
	if (defined($output) && $output) {
		unlink($output);
	}
	pod2usage("Shift specified on the command line, but no transform follows!\n");
}

foreach (@ARGV) {
	push(@FILES);
}

if ($stdtransforms) {
	push(@TRANSFORMS, ["warnpass"]);
	push(@TRANSFORMS, ["ohpass"]);
	push(@TRANSFORMS, ["summary"]);
}


if ($#FILES == -1) {
	if (defined($output) && $output) {
		unlink($output);
	}
	pod2usage("getstats requires at least one input file.\n");
}

my $filenumber = 0;
for my $file (@FILES) {
	if (defined($order{$file})) {
		next;
	}
	$order{$file} = $filenumber++;
	$valuehash{$file} = [readarray($file)];
}
$globals{'filecount'} = $filenumber;

# Handle concatenation
if ($catfiles) {
	my @return;
	my $labels;
	my %colhash = ();

	for my $file (sort { $order{$a} <=> $order{$b} } keys(%valuehash)) {
		my $aref = $valuehash{$file};
		if (!defined($labels)) {
			$labels = $aref->[0];
			if ($#$labels == -1) {
				die "$file: You must have at least one column.";
			}
			my $colorder = 0;
			foreach (@{$labels}) {
				$colhash{$_} = $colorder++;
			}
			foreach (@{$aref}) {
				push(@return, $_);
			}
		} else {
			my $l2 = $aref->[0];
			my %colmap = ();
			my %revmap = ();
			my $cnt = 0;
			foreach (@{$l2}) {
				if (!defined($colhash{$_})) {
					die "$file: $_ is not defined in all files.";
				} else {
					$colmap{$_} = 1;
					$revmap{$cnt++} = $_;
				}
			}
			foreach (keys(%colhash)) {
				die "$file: $_ is not defined in all files." if (!defined($colmap{$_}));
			}
			my @row;
			for (my $i = 1; $i <= $#$aref; $i++) {
				my $inrow = $aref->[$i];
				my $cnt = 0;
				foreach (@{$inrow}) {
					$row[$colhash{$revmap{$cnt++}}] = $_;
				}
				push(@return, [@row]);
			}
		}
	}

	undef(%valuehash);
	undef(%order);
	$valuehash{'Concatenated Results'} = \@return;
	$order{'Concatenated Results'} = 0;
}

# Now run it through however many passes it takes to do all of the transformations.
for (my $pass = 0; $pass <= $#TRANSFORMS; $pass++) {
	for my $file (sort { $order{$a} <=> $order{$b} } keys(%valuehash)) {
		$globals{'pass'} = $pass;
		$globals{'file'} = $file;
		$globals{'fileno'} = $order{$file};

		my @curpass = $TRANSFORMS[$pass];
		foreach my $transform (@curpass) {
			do_transform($transform, $valuehash{$file});
		}
	}
}

exit(0);
__END__
# The rest of this is for pod2usage to print out a nice helpful usage synopsis/manual page
__END__

=head1 NAME

getstats - The Auto-pilot data analysis tool

=head1 SYNOPSIS

B<getstats> [options] [transformations] (I<results file>)+

=head1 DESCRIPTION

Getstats transforms the input and produces tabular reports from Auto-pilot
results files, CSV files, and concatenated GNU time files.  Getstats is a
very flexible transformation engine, with several built-in functions that
can be executed over two-dimensional relations.  For more information see
the full users manual.

=head1 OPTIONS

=over 4

=item B<--[no]catfiles>

Whether or not to concatenate input files into one larger relation.  If  this
option is specified, getstats reads in each individual results file, then
creates a larger relation.  To concatenate two (or more) relations they must
have exactly the same columns.

=item B<--set> I<var>=I<val>

=item B<--seteval> I<var>=I<val>

B<--set> sets a global replacement variable, I<var>, to I<val>.  B<--seteval>
sets a global replacement variable, I<var>, to eval(I<val>). This can be used
to set variables to complex objects like arrays or hashes.

=item B<-o> I<file>

=item B<--output> I<file>

Standard out is redirected to I<file>.  If Getstats fails, then the file is
removed.  This is useful for use in Makefiles.

=item B<--push> I<var>=I<val>

=item B<--pusheval> I<var>=I<val>

B<--push> pushes I<val> onto the global replacement variable I<var>.
B<--pusheval> is Like push, but eval I<val> first.

=item B<--[no]stdtransforms>

Whether or not to use the standard transforms library.
The default is yes.

"readpass" is executed before any other transforms on the command line.
"warnpass", "ohpass" and "summary" are executed after other
transforms.

=item B<--source>

Load in a Perl source file.  This can be used to override functions in the
functions hash, push several complex transforms onto @code{@@TRANSFORMS}, or
whatever your heart desires (and Perl lets you do).

=item B<--transform>

=item B<--shiftform>

B<--transforms> adds a transform between "readpass" and "ohpass".  The
transform needs to be a valid Perl representation of a transform.

B<--shiftform> adds a transform before "readpass".  This transformation is the
first transformation executed, so they go in reverse order on the
command line.  The transform needs to be a valid Perl representation of
a transform.

=head1 TRANSFORMS

Any other option that is treated as a getstats transformation (e.g.,
"--foo") calls the Getstats transformation foo after "readpass".  If you
want to run the transformation before "readpass" specify --shift before
the transform (e.g., --shift --select '"\$command" eq "./configure"').

Not all builtin transformations are supported, specifically the control
transformations "if", "ifexist", "block", and "group" are not supported
(essentially anything that requires another transformation as its
argument).

All library functions, and other builtin transformations are supported.

If you use a function that takes many arguments, like describe then you must
terminate the arguments with -- or another option.  If you don't terminate it
then Getstats assumes your file names are input to that transform.

Getstats knows the number of arguments for the following transformations, if
you define your own, then Getstats assumes it has many arguments.

=head1 REQUIRES

Statistics::Descriptive, Statistics::PointEstimation, Statistics::Distributions,
Statistics::LineFit, Text::CSV, Getopt::Long, File::Glob, Errno

=head1 AUTHORS

Charles P. Wright <cwright@cs.sunysb.edu>,
Erez Zadok <ezk@cs.sunysb.edu>

=cut
