158 lines
4.6 KiB
Perl
Executable file
158 lines
4.6 KiB
Perl
Executable file
#!/usr/bin/env perl
|
|
|
|
# Copyright J.M.P. Alves 2008-2011 (jmalves@vcu.edu)
|
|
# This software is licensed under the GNU General Public License v. 3
|
|
# Please see http://www.fsf.org/licensing/licenses/gpl.html for details
|
|
|
|
# first version 0.2, 2008-03-24, by J.
|
|
# Last update 0.7 2011-07-29, by J.
|
|
|
|
use strict;
|
|
use warnings;
|
|
use Getopt::Long;
|
|
Getopt::Long::Configure ("bundling");
|
|
|
|
my($A,$s,$t,$l,$m,$h,$v,$d,$x,$n,$E,$c);
|
|
|
|
GetOptions ('a' => \$A, 's' => \$s, 't' => \$t, 'l' => \$l, 'x' => \$x, 'e' => \$E,
|
|
'n' => \$n, 'm' => \$m, 'h' => \$h, 'v' => \$v, 'd=i' => \$d, 'c=i' => \$c);
|
|
|
|
my @vals;
|
|
my $version = "0.7.1";
|
|
if($h) { print "Version: $version\n\n"; Help(); }
|
|
if($v) { print "$version\n"; exit; }
|
|
my $type = $E ? "E" : "f";
|
|
$c = $c ? $c - 1 : 0;
|
|
unless(defined $d) { $d = "2$type"; } else { $d .= "$type"; }
|
|
|
|
while(<>) {
|
|
next if $_ =~ /^\s*$/;
|
|
chomp;
|
|
$_ =~ s/^\s*//;
|
|
my @tmp = split(/\s+|\t+/, $_);
|
|
unless($tmp[$c] =~ /^\s*[+\-\d\.]+\s*$/ || $tmp[$c] =~ /^\s*[+\-\d\.e]+\s*$/i) { next; }
|
|
my $flag = 0;
|
|
local $SIG{__WARN__} = sub {
|
|
print "WARNING: Possible non-numeric value found, ignored: $_\n";
|
|
$flag = 1;
|
|
};
|
|
my $test = $tmp[$c] + 1;
|
|
unless($flag) { push @vals, $tmp[$c]; }
|
|
}
|
|
|
|
unless(scalar(@vals)) { print STDERR "ERROR: No numerical values found. I quit.\n"; exit; }
|
|
if(scalar(@vals) == 1) { print STDERR "ERROR: Only one numerical value (@vals) found. Nothing to do, so I quit.\n"; exit; }
|
|
|
|
my($sum, $av, $sd, $median, $min, $max);
|
|
|
|
($sum, $av) = avrg(@vals);
|
|
$sd = stddev($av, @vals);
|
|
($median,$min,$max) = median(@vals);
|
|
|
|
if($l || !($A || $s || $t || $m || $x || $n)) {
|
|
printf "%.$d +/- %.$d, total %.$d, median %.$d, minimum %.$d, maximum %.$d, n = %d\n", $av, $sd, $sum, $median, $min, $max, scalar(@vals);
|
|
exit;
|
|
}
|
|
if($A && !$s) { printf "%.$d\n", $av; exit; }
|
|
if($s) { printf "%.$d\t%.$d\n", $av, $sd; exit; }
|
|
if($t) { printf "%.$d\n", $sum; exit; }
|
|
if($m) { printf "%.$d\n", $median; exit; }
|
|
if($n) { printf "%.$d\n", $min; exit; }
|
|
if($x) { printf "%.$d\n", $max; exit; }
|
|
|
|
exit;
|
|
|
|
##############################
|
|
|
|
sub avrg {
|
|
my $size = scalar(@_);
|
|
my($sum,$med);
|
|
for my $Valor (@_) { $sum += $Valor; }
|
|
if ($size) { $med = $sum/$size; }
|
|
else { $med = 0; }
|
|
return $sum, $med;
|
|
}
|
|
|
|
##############################
|
|
|
|
sub stddev {
|
|
my($media) = shift(@_);
|
|
my(@Lista) = @_;
|
|
my $nonzero = 0;
|
|
my($sum,$sd);
|
|
for ($a=0; $a < scalar(@Lista); $a++) {
|
|
$nonzero++;
|
|
$sum += (($Lista[$a] - $media) ** 2);
|
|
}
|
|
if ($nonzero) { $sd = sqrt($sum/($nonzero-1)); }
|
|
else { $sd = 0; }
|
|
return $sd;
|
|
}
|
|
|
|
##############################
|
|
|
|
sub median {
|
|
my @list = sort {$a<=>$b} @_;
|
|
if(scalar(@list) % 2 != 0) {
|
|
my $ind = int(scalar(@list)/2);
|
|
return $list[$ind], $list[0], $list[$#list];
|
|
}
|
|
else {
|
|
my $ind = scalar(@list)/2 -1;
|
|
my(undef, $median) = avrg($list[$ind],$list[$ind+1]);
|
|
return $median, $list[0], $list[$#list];
|
|
}
|
|
}
|
|
|
|
##############################
|
|
|
|
sub Help {
|
|
my (@stuff) = <DATA>;
|
|
print @stuff;
|
|
exit;
|
|
}
|
|
|
|
##############################
|
|
|
|
__DATA__
|
|
average
|
|
-------
|
|
|
|
Usage:
|
|
average [options]
|
|
|
|
Synopsis:
|
|
Takes a series of numbers and calculates simple statistics: average (arithmetic
|
|
mean), standard deviation, median, total sum, and minimum and maximum values
|
|
present. For version 0.6 and later, also works with scientific notation numbers.
|
|
|
|
Numbers can be in a file or presented from standard input (press control-d
|
|
to end number input after last number). Output is to standard output.
|
|
|
|
Input can also have more than one column, in which case the column to use
|
|
in calculations can be determined using the -c option. Otherwise, the first
|
|
column is used (leading spaces are ignored; repeated whitespace is considered
|
|
as one).
|
|
|
|
Options:
|
|
-d Number of decimal places to show (default: 2);
|
|
-c Column to use for calculations (default: 1);
|
|
-e Output in scientific notation (e.g. 1E12);
|
|
-a Shows only the arithmetic mean;
|
|
-s Shows arithmetic mean and the standard deviation;
|
|
-t Shows only the total sum of the numbers;
|
|
-m Shows only the median;
|
|
-n Shows only the minimum value;
|
|
-x Shows only the maximum value;
|
|
-l Long format, presenting all of the above (default);
|
|
-v Prints program version and exits;
|
|
-h Prints this help message and exits.
|
|
|
|
* Options listed first have precedence over the ones below; e.g. if the user
|
|
uses both -t and -n, only -t will have an effect (total sum only will be shown).
|
|
* If average is used without any options, all statistics are shown (same as -l).
|
|
|
|
Copyright J.M.P. Alves 2008-2011 (jmalves@vcu.edu)
|
|
This software is licensed under the GNU General Public License v. 3.
|
|
Please see http://www.fsf.org/licensing/licenses/gpl.html for details.
|
|
|