home *** CD-ROM | disk | FTP | other *** search
- Newsgroups: comp.lang.perl
- Path: sparky!uunet!caen!uvaarpa!mmdf
- From: Dov Grobgeld <dov@menora.weizmann.ac.il>
- Subject: Re: Statistic routines ?
- Message-ID: <1992Dec14.075602.5410@uvaarpa.Virginia.EDU>
- Sender: mmdf@uvaarpa.Virginia.EDU (Mail System)
- Reply-To: dov@menora.weizmann.ac.il
- Organization: The Internet
- Date: Mon, 14 Dec 1992 07:56:02 GMT
- Lines: 270
-
- For all it's worth. Here are a couple of routines that I have written
- that I find very useful in my data manipulations.
-
- <<File: histbin>>
- #!/usr/local/bin/perl
- #############################################################################
- # histbin
- #
- # version 0.1
- #
- # Bin data for a histogram.
- #
- # Dov Grobgeld
- # Department of Chemical Physics
- # The Weizmann Institute of Science
- # Rehovot, Israel
- # Email: dov@menora.weizmann.ac.il
- #
- # 1992-06-16
- #############################################################################
-
- # Get options
- while (($_ = $ARGV[0]) =~ /^-/) {
- shift;
- if (/-nBins$/i) {
- $nBins= $ARGV[0];
- shift;
- }
- elsif (/-intv$/i) {
- $binMin= $ARGV[0]; shift;
- $binMax= $ARGV[0]; shift;
- }
- elsif (/-n(orm)?/i) {
- $normalize++;
- }
- elsif (/-h(elp)?$|-u(sage)?$/i) {
- print<<EOH
- histbin - Bins data according to user specifications
-
- Syntax:
-
- histbin [-nBins n] [-intv min max] [-norm] <file>
-
- where <file> is a text file containing a column of values to bin.
-
- Options:
-
- -nBins n Number of bins. Default is n=100.
- -intv min max Interval for binning. Default is min=0 and max=1.
- -norm Normalize the histogram.
- EOH
- ;
- exit;
- }
- }
-
-
- # Set default values
- $binMin=0.0 unless defined $binMin;
- $binMax=1.0 unless defined $binMax;
- $nBins=100 unless defined $nBins;
-
- # Construct index array...
- for $i (0..$nBins-1) {
- $xBins[$i]= $binMin + (1.0 * $i / $nBins) * ($binMax - $binMin);
- }
-
- # Clear the bin array
- for $i (0..$nBins-1) {
- $binCount[$i]=0;
- }
-
- # Read the data file
- while (<>) {
- next if (/^#/); # Skip comments
- ($x=$_) =~ s/^\s+//; $x=~ s/\n//;
- next unless $x=~ /^\d+(\.\d*)?([de][\+\-]?\d+)?$/o;
- if (++$numEntries % 100 == 0) {printf STDERR ("%5d\r",$numEntries); }
- $binIdx= int(1.0 * ($x-$binMin) * $nBins / ($binMax - $binMin));
- $binCount[$binIdx]++ unless $binIdx < 0;
- if ($binIdx >= $nBins || $binIdx < 0) {
- printf STDERR "Warning! Value $x outside of binning range\n";
- }
- }
-
- # print the data
- for $i (0..$nBins-1) {
- if ($normalize) {$binCount[$i] = $binCount[$i]/$numEntries; }
- printf ("%12.8f %15.8g\n",$xBins[$i], $binCount[$i]);
- }
-
- print STDERR "Done binning $numEntries values!\n";
-
-
- <<File: histrebin>>
- #!/usr/local/bin/perl
- ##########################################################################
- # histrebin
- #
- # version 0.1
- #
- # Reads a file of bin values and frequency count and rebins it by
- # a user defined factor. Optionally normalizes the histogram.
- #
- # Dov Grobgeld
- # Department of Chemical Physics
- # The Weizmann Institute of Science
- # Rehovot, Israel
- # Email: dov@menora.weizmann.ac.il
- #
- # 1992-06-16
- ##########################################################################
-
- # Get options
- while (($_ = $ARGV[0]) =~ /^-/) {
- shift;
- if (/-n(orm)?$/) {
- $normalize++;
- }
- elsif (/-s(trip)?$/) {
- $stripComment++;
- }
- elsif (/-h(elp)?$|-u(sage)$/) {
- print <<EOHELP
- histrebin - rebin a histogram
-
- Syntax:
-
- histrebin [-n] [-s] <file> <factor>
-
- where <file> is the file to rebin by a a factor of <factor>. <factor>
- will default to 1, if left out.
-
- histrebin will sum the values of each <factor> bins and create one
- new bin with the accumulated sum.
-
- Options:
-
- -n will cause the histogram to be normalized.
- -s will strip comments.
-
- Examples:
-
- 1. Rebin a histogram by a factor of 4. This would e.g. create 25
- bins instead of 100.
-
- histrebin binned100.dat 4 > binned25.dat
-
- 2. Sum all the 100 bins in the file binned100.dat
-
- histrebin binned100.dat 100
-
- 3. Normalize the histogram in binned100.dat
-
- histrebin -norm binned100.dat > binned100norm.dat
-
- EOHELP
- ; exit;
- }
- else {
- die "Unknown option $_!\n";
- }
- }
-
- $infile=shift;
- open (IN, $infile) || die "No such file $infile!\n";
- ($factor=shift) || ($factor=1);
- if ($factor==0) { die "Invalid factor!\n"; }
-
- # If normalize then go through the whole file and sum the histogram...
- $totalsum=0.0;
- if ($normalize) {
- while (<IN>) {
- next if (/^#/);
- if (/^\s*(\S+)\s+(\S+)/) { $totalsum += $2; }
- }
- close(IN);
- open(IN, $infile);
- }
-
- $bunchindex=0; # Number of bins summed into the bunch
- $bunchsum=0; # Sum of all entries in the bin
- $binlow=0; # Lower limit of the bin
- while (<IN>) {
- if (/^#/) {
- print unless ($stripComment);
- }
- elsif (/^\s*(\S+)\s+(\S+)/) { # Assume naively that these are numbers...
- $bunchindex++;
- $binlow = 0.0 + $1 if ($bunchindex == 1); # Convert to floating point
- $bunchsum += $2;
- if ($bunchindex == $factor) {
- $bunchsum= 1.0 * $bunchsum/$totalsum if $normalize;
- printf("%8.5f %10.8g\n", $binlow, $bunchsum);
- $bunchsum=0;
- $bunchindex=0;
- }
- }
- }
-
- <<File: cutcol>>
- #!/usr/local/bin/perl
- ##############################################################################
- # cutcol
- #
- # Version 0.1
- #
- # Cut columns out of a data file. Great as a prefilter for xgraph.
- #
- # Dov Grobgeld
- # Email: dov@menora.weizmann.ac.il
- # 1992-06-16
- ###############################################################################
- while (($_= $ARGV[0]) =~ /^-/) {
- shift;
- if (/-c$/) { # Other columns than default ones
- $col1= $ARGV[0]; shift;
- $col2= $ARGV[0]; shift;
- }
- elsif (/-s$/) {
- $strip++;
- }
- elsif (/^-h(elp)?$|^-u(sage)?$/) {
- print <<EOH
- cutcol - Cuts 2 columns out of a file
-
- Syntax:
-
- cutcol [-c n1 n2] [-h] [-s]
-
- Options:
-
- -c n1 n2 Specify columns to cut out other than the default n1=1, n2=2
-
- -h This help file
-
- -s Strip comments
- EOH
- ;
- exit;
- }
- }
-
- # Default values
-
- $col1=1 unless defined $col1;
- $col2=2 unless defined $col2;
-
- $col1--; $col2--; # Normalize to zero
-
- while(<>) {
- if (/^\#/) {
- print unless $strip;
- next;
- }
- s/^\s+//; # Strip leading spaces
- @columns= split;
- print "$columns[$col1] $columns[$col2]\n";
- }
-
-
-
- --
- ___ ___
- / o \ o \
- Dov Grobgeld ( o o ) o |
- The Weizmann Institute of Science, Israel \ o /o o /
- "Where the tree of wisdom carries oranges" | | | |
- _| |_ _| |_
-
-