Beefy Boxes and Bandwidth Generously Provided by pair Networks
The stupid question is the question not asked
 
PerlMonks  

Statistics::Descriptive::ByColumns

by ZZamboni (Curate)
on Jun 12, 2001 at 00:20 UTC ( #87640=sourcecode: print w/ replies, xml ) Need Help??

Category: Math and Statistics
Author/Contact Info Diego Zamboni (zzamboni@perlmonk.org)
Description: This module is a wrapper around Statistics::Descriptive to make it easier to store and get statistics on diferent columns of data. Each column is considered as a separate data set, and its values are added to a different Statistics::Descriptive object that can later be obtained and queried.

Still missing proper POD documentation. See sample program after __END__. Sample data:

93 97 84 70 52 30 20 54 68 2 38 33 26 33 10 45 2 39 96 18 34 79 52 70 86 11 32 67 46 19
#
# Statistics::Descriptive::ByColumns module
# Compute statistics on columns of data.
#
# Idea based on a program by Benjamin Kuperman
# Rewritten in module form by Diego Zamboni
#
# June 6, 2001.

package Statistics::Descriptive::ByColumns;

use strict;
use Carp;
use Statistics::Descriptive;
use vars qw(%default_params);

=head1 NAME

ByColumns - Compute statistic values on columns of data

=head1 SYNOPSIS

  $s=Statistics::Descriptive::ByColumns->new(type => 'Full', columns =
+> 5);
  $s->read_file("values.dat");
  $s->add_row(5, 10, 23, 81, 150);
  $s->add_line("35 12 7 3.2 81");
  $s->add_to_row(1, 38, 25, 41, 12, 99);
  $s->print;
  @objs=$s->get_objects;
  $c1=$s->get_object(1);

=head1 DESCRIPTION

This module is a wrapper around C<Statistics::Descriptive> to make
it easier to store and get statistics on diferent columns of data.
Each column is considered as a separate data set, and its values
are added to a different C<Statistics::Descriptive> object that
can later be obtained and queried.

=cut

%default_params = (
                   type => 'Full',
                   columns => 3,
                  );

# Create a new object, with the specified (or default) parameters.
sub new {
  my $what=shift;
  my $class=ref($what)||$what;
  my $self={@_};
  foreach (keys %default_params) {
    $self->{$_} = $default_params{$_} unless exists($self->{$_});
  }
  # Check parameters
  croak "Invalid type parameter '$self->{type}'"
    unless $self->{type} eq 'Full' || $self->{type} eq 'Sparse';
  croak "Invalid number of columns: $self->{columns}"
    if $self->{columns}<1;
  # Create the objects
  $self->{objs}=[
                 map {
                   ($self->{type} eq 'Full')?
                     Statistics::Descriptive::Full->new():
                     Statistics::Descriptive::Sparse->new();
                 } 1 .. $self->{columns}
                ];
  bless $self, $class;
  return $self;
}

# Add a row of values.
sub add_row {
  my $self=shift;
  my @vals=@_;
  if (@vals != $self->{columns}) {
    carp "Invalid number of columns: got ".scalar(@vals).
      ", need $self->{columns}";
    return;
  }
  foreach my $o (@{$self->{objs}}) {
    $o->add_data(shift @vals);
  }
  return $self;
}

# Split (on whitespace) and add lines of text.
sub add_line {
  my $self=shift;
  my @lines=@_;
  foreach (@lines) {
    chomp;
    my @vals=split;
    $self->add_row(@vals)
      or carp "Invalid line, ignored: $_\n";
  }
  return $self;
}

# Add data to a specific column
sub add_to_column {
  my $self=shift;
  my $col=shift;
  my @vals=@_;
  if ($col<0 || $col>=$self->{columns}) {
    carp "Invalid column number $col. Valid range: 0-".($self->{column
+s}-1);
    return;
  }
  $self->{objs}->[$col]->add_data(@vals);
  return $self;
}

# Read data from a file. Argument is a filename or a handle.
sub read_file {
  my $self=shift;
  my $fname=shift;
  my $h;
  if (!ref($fname)) {
    # It's a filename
    open IN, "<$fname"
      or do { carp "Error opening file '$fname': $!"; return };
    $h=\*IN;
  }
  else {
    # It's a handle
    $h=$fname;
  }
  while (<$h>) {
    chomp;
    my @vals=split;
    $self->add_row(@vals)
      or carp "Invalid line $., ignored: $_\n";
  }
  close IN;
  return $self;
}

# Get the Statistics::Descriptive objects
sub get_objects {
  my $self=shift;
  return @{$self->{objs}};
}

# Get a specific object
sub get_object {
  my $self=shift;
  my $col=shift;
  if ($col<0 || $col>=$self->{columns}) {
    carp "Invalid column number $col. Valid range: 0-$self->{columns}"
+;
    return;
  }
  return $self->{objs}->[$col];
}

# Print the currently accumulated values.
# Can be passed a handle to print to and a header
sub print {
  my $self=shift;
  my $h=shift || \*STDOUT;
  my $hdr=shift;
  my $c=0;
  my $f=($self->{type} eq 'Full');

  if ($hdr) {
    print "#" x length $hdr;
    print "####\n";
    print "# $hdr #\n";
    print "#" x length $hdr;
    print "####\n";
  }
  foreach my $o (@{$self->{objs}}) {
    print $h "Column $c:\n";
    printf $h "\t   Number of items: %d\n", $o->count();
    printf $h "\t              Mean: %f\n", $o->mean();
    printf $h "\t            Median: %f\n", $o->median() if $f;
    printf $h "\t              Mode: %f\n", ($o->mode()||0) if $f;
    printf $h "\t          variance: %f\n", $o->variance();
    printf $h "\tstandard_deviation: %f\n", $o->standard_deviation();
    printf $h "\t               min: %f\n", $o->min();
    printf $h "\t               max: %f\n", $o->max();
    printf $h "\t   Middle 50%% mean: %f\n", $o->trimmed_mean(.25,.25)
+ if $f;
    $c++;
  }
}

1;

__END__

#!perl -w
# Usage: col-stats.pl numcols [file ...]

use Statistics::Descriptive::ByColumns;

my $n=shift @ARGV
  or die "Usage: $0 numcols [file ...]\n";

unless (@ARGV) {
        @ARGV=(\*STDIN);
}
foreach my $f (@ARGV) {
  my $s=Statistics::Descriptive::ByColumns->new(columns => $n);
  $s->read_file($f);
  $s->print(undef, ref($f)?undef:$f);
}

Comment on Statistics::Descriptive::ByColumns
Download Code

Back to Code Catacombs

Log In?
Username:
Password:

What's my password?
Create A New User
Node Status?
node history
Node Type: sourcecode [id://87640]
help
Chatterbox?
and the web crawler heard nothing...

How do I use this? | Other CB clients
Other Users?
Others wandering the Monastery: (19)
As of 2015-07-02 16:37 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    The top three priorities of my open tasks are (in descending order of likelihood to be worked on) ...









    Results (44 votes), past polls