From d258cf9c8906d1d941604463e8fe01cfc16ced91 Mon Sep 17 00:00:00 2001 From: Barak Michener Date: Fri, 21 Dec 2012 15:53:29 -0800 Subject: [PATCH] add average to stdbin --- bin/average | 158 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 158 insertions(+) create mode 100755 bin/average diff --git a/bin/average b/bin/average new file mode 100755 index 0000000..3a7988f --- /dev/null +++ b/bin/average @@ -0,0 +1,158 @@ +#!/usr/bin/env perl + +# Copyright J.M.P. Alves 2008-2011 (jmalves@vcu.edu) +# This software is licensed under the GNU General Public License v. 3 +# Please see http://www.fsf.org/licensing/licenses/gpl.html for details + +# first version 0.2, 2008-03-24, by J. +# Last update 0.7 2011-07-29, by J. + +use strict; +use warnings; +use Getopt::Long; +Getopt::Long::Configure ("bundling"); + +my($A,$s,$t,$l,$m,$h,$v,$d,$x,$n,$E,$c); + +GetOptions ('a' => \$A, 's' => \$s, 't' => \$t, 'l' => \$l, 'x' => \$x, 'e' => \$E, + 'n' => \$n, 'm' => \$m, 'h' => \$h, 'v' => \$v, 'd=i' => \$d, 'c=i' => \$c); + +my @vals; +my $version = "0.7.1"; +if($h) { print "Version: $version\n\n"; Help(); } +if($v) { print "$version\n"; exit; } +my $type = $E ? "E" : "f"; +$c = $c ? $c - 1 : 0; +unless(defined $d) { $d = "2$type"; } else { $d .= "$type"; } + +while(<>) { + next if $_ =~ /^\s*$/; + chomp; + $_ =~ s/^\s*//; + my @tmp = split(/\s+|\t+/, $_); + unless($tmp[$c] =~ /^\s*[+\-\d\.]+\s*$/ || $tmp[$c] =~ /^\s*[+\-\d\.e]+\s*$/i) { next; } + my $flag = 0; + local $SIG{__WARN__} = sub { + print "WARNING: Possible non-numeric value found, ignored: $_\n"; + $flag = 1; + }; + my $test = $tmp[$c] + 1; + unless($flag) { push @vals, $tmp[$c]; } +} + +unless(scalar(@vals)) { print STDERR "ERROR: No numerical values found. I quit.\n"; exit; } +if(scalar(@vals) == 1) { print STDERR "ERROR: Only one numerical value (@vals) found. Nothing to do, so I quit.\n"; exit; } + +my($sum, $av, $sd, $median, $min, $max); + +($sum, $av) = avrg(@vals); +$sd = stddev($av, @vals); +($median,$min,$max) = median(@vals); + +if($l || !($A || $s || $t || $m || $x || $n)) { + printf "%.$d +/- %.$d, total %.$d, median %.$d, minimum %.$d, maximum %.$d, n = %d\n", $av, $sd, $sum, $median, $min, $max, scalar(@vals); + exit; +} +if($A && !$s) { printf "%.$d\n", $av; exit; } +if($s) { printf "%.$d\t%.$d\n", $av, $sd; exit; } +if($t) { printf "%.$d\n", $sum; exit; } +if($m) { printf "%.$d\n", $median; exit; } +if($n) { printf "%.$d\n", $min; exit; } +if($x) { printf "%.$d\n", $max; exit; } + +exit; + +############################## + +sub avrg { + my $size = scalar(@_); + my($sum,$med); + for my $Valor (@_) { $sum += $Valor; } + if ($size) { $med = $sum/$size; } + else { $med = 0; } + return $sum, $med; +} + +############################## + +sub stddev { + my($media) = shift(@_); + my(@Lista) = @_; + my $nonzero = 0; + my($sum,$sd); + for ($a=0; $a < scalar(@Lista); $a++) { + $nonzero++; + $sum += (($Lista[$a] - $media) ** 2); + } + if ($nonzero) { $sd = sqrt($sum/($nonzero-1)); } + else { $sd = 0; } + return $sd; +} + +############################## + +sub median { + my @list = sort {$a<=>$b} @_; + if(scalar(@list) % 2 != 0) { + my $ind = int(scalar(@list)/2); + return $list[$ind], $list[0], $list[$#list]; + } + else { + my $ind = scalar(@list)/2 -1; + my(undef, $median) = avrg($list[$ind],$list[$ind+1]); + return $median, $list[0], $list[$#list]; + } +} + +############################## + +sub Help { + my (@stuff) = ; + print @stuff; + exit; +} + +############################## + +__DATA__ +average +------- + +Usage: + average [options] + +Synopsis: + Takes a series of numbers and calculates simple statistics: average (arithmetic + mean), standard deviation, median, total sum, and minimum and maximum values + present. For version 0.6 and later, also works with scientific notation numbers. + + Numbers can be in a file or presented from standard input (press control-d + to end number input after last number). Output is to standard output. + + Input can also have more than one column, in which case the column to use + in calculations can be determined using the -c option. Otherwise, the first + column is used (leading spaces are ignored; repeated whitespace is considered + as one). + +Options: + -d Number of decimal places to show (default: 2); + -c Column to use for calculations (default: 1); + -e Output in scientific notation (e.g. 1E12); + -a Shows only the arithmetic mean; + -s Shows arithmetic mean and the standard deviation; + -t Shows only the total sum of the numbers; + -m Shows only the median; + -n Shows only the minimum value; + -x Shows only the maximum value; + -l Long format, presenting all of the above (default); + -v Prints program version and exits; + -h Prints this help message and exits. + + * Options listed first have precedence over the ones below; e.g. if the user + uses both -t and -n, only -t will have an effect (total sum only will be shown). + * If average is used without any options, all statistics are shown (same as -l). + +Copyright J.M.P. Alves 2008-2011 (jmalves@vcu.edu) +This software is licensed under the GNU General Public License v. 3. +Please see http://www.fsf.org/licensing/licenses/gpl.html for details. +