Beefy Boxes and Bandwidth Generously Provided by pair Networks
We don't bite newbies here... much
 
PerlMonks  

Data Regression Utility

by ~~David~~ (Hermit)
on Sep 08, 2004 at 21:59 UTC ( #389485=CUFP: print w/ replies, xml ) Need Help??

Here is a program that can do data regression and plots the information in Tk. You can also save the data to an excel file and it will chart it for you automatically.
#!c:\perl\bin\wperl.exe -w use Win32::OLE qw(in with); use Win32::OLE::Const 'Microsoft Excel'; use Win32::OLE::Variant; use Win32::OLE::NLS qw(:LOCALE :DATE); $Win32::OLE::Warn = 3; # Die on Errors. require Tk; use Tk; use Tk::Font; use Tk::Text; use Tk::ROText; use strict; #Creating the Main Window my $mw = MainWindow->new; $mw -> title("X Y Plotting Window"); #$mw -> geometry("800x600"); #Now I need to create the frames to house the widgets my $menuframe = $mw -> Frame( -relief => "groove", -borderwidth => 2 )->pack( -side => "top", -fill => "x", ); my $midhold = $mw -> Frame() -> pack( -side => "top", -padx => 5, -pady => 5, -fill => "x" ); my $xyframe = $midhold -> Frame() -> pack( -side => "left", -padx => 5, -pady => 5 ); my $plotframe = $midhold -> Frame() -> pack( -side => "right", -padx => 5, -pady => 5 ); my $statusframe = $mw -> Frame() -> pack( -side => "bottom", -padx => 5, -pady => 5, -fill => "x" ); my $calcframe = $mw -> Frame() -> pack( -side => "bottom", -padx => 5, -pady => 5, -fill => "x" ); ############################################################ #Create widgets for Menu my $file_mw = $menuframe -> Menubutton ( -text => "File", -activeforeground => "grey" ) -> pack (-side => 'left'); $file_mw -> command( -label => "Save", -command => sub{&SAVE} ); $file_mw -> command( -label => "Exit", -command => sub{$mw -> destroy} ); my $help_mw = $menuframe -> Menubutton ( -text => "Help", -activeforeground => "grey" ) -> pack (-side => 'right'); $help_mw -> command( -label => "Assumptions", -command => sub{&ASSUME}); $help_mw -> separator(); $help_mw -> command( -label => "About", -command => sub{&ABOUT}); ############################################################ ############################################################ #Create widgets and labels for XY window my $xlabel = $xyframe -> Label( -text => "Temp (X)", -relief => "raised", -padx => 5 ) -> grid(-column => 2, -row => 1, -padx => 5); my $ylabel = $xyframe -> Label( -text => "Etch Rate (Y)", -relief => "raised", -padx => 5 ) -> grid(-column => 3, -row => 1, -padx => 5); my @xinit = qw(-- -- -- -- -- -- -- -- -- --); our ($isLast, @radio, @x, @y); our (@xbox, @ybox); our $i =0; for (@xinit) { my $rowlabel = $i+1; my $row = $i+2; $xyframe->Label(-text=>$rowlabel)->grid(-row=>$row,-column=>1) +; $xbox[$i] = $xyframe->Entry(-bg=>'white', -width => 10, -textv +ariable => \$x[$i], -justify => "center")->grid(-row=>$row,-column=>2 +, -padx => 5, -pady => 5); $ybox[$i] = $xyframe->Entry(-bg=>'white', -width => 10, -textv +ariable => \$y[$i], -justify => "center")->grid(-row=>$row,-column=>3 +, -padx => 5, -pady => 5); $i++; } ############################################################ ############################################################ #Create widgets and labels for Plotting Frame our $cv = $plotframe -> Canvas( -width => 350, -height => 350, -bg => "white", ) -> pack(); our ($m, $b, $rsqrd); my $slopetext = $plotframe -> Label( -text => "Slope = " ) -> pack(-side => "left"); my $slopeentry = $plotframe -> Entry( -textvariable => \$m, -width => 6, -justify => "center" ) -> pack(-side => "left", -padx => 5, -pady => 5); my $btext = $plotframe -> Label( -text => "Intercept = " ) -> pack(-side => "left"); my $bentry = $plotframe -> Entry( -textvariable => \$b, -width => 6, -justify => "center" ) -> pack(-side => "left", -padx => 5, -pady => 5); my $rtext = $plotframe -> Label( -text => "Rsqrd = ", ) -> pack(-side => "left"); my $rentry = $plotframe -> Entry( -textvariable => \$rsqrd, -width => 6, -justify => "center" ) -> pack(-side => "left", -padx => 5, -pady => 5); my $xaxis = $cv -> createLine( 35, 310, 340, 310); my $yaxis = $cv -> createLine( 40, 10, 40, 315); my $xaxistext = $cv -> createText (175, 335, -text => "X-Axis", -tag = +> "xaxis"); my $yaxistext = $cv -> createText (20, 175, -text => "Y-Axis", -tag => + "yaxis"); ############################################################ ############################################################ #Create widgets and labels for Calculate Frame my $calcbutton = $calcframe -> Button( -width => 9, -height => 1, -text => "Calculate", -command => \&CALC ) -> pack(-side => "left", -padx => 30); our $linorArh; my $arhchoice = $calcframe -> Radiobutton( -value => "A", -variable => \$linorArh, ) -> pack (-side => "right"); my $arhlabel = $calcframe -> Label( -text => "Arrhenius" ) -> pack (-side => "right"); my $linearchoice = $calcframe -> Radiobutton( -value => "L", -variable => \$linorArh, ) -> pack (-side => "right"); my $linlabel = $calcframe -> Label( -text => "Linear" ) -> pack (-side => "right"); my $choicetext = $calcframe -> Label( -text => "Data Fit Type: ", -font => '{arial bold} 10' )-> pack (-side => "right"); ############################################################ ############################################################ #Create widgets and labels for Statusbar Frame our $statustext; my $statuslabel = $statusframe -> Label( -text => "Status: " ) -> pack(-side => "left"); my $status =$statusframe -> Entry( -textvariable => \$statustext, -foreground => 'red', -width => 80 ) -> pack(-side => "left"); MainLoop(); sub CALC { our (@x, @y, $linorArh, @xs, @ys, $m, $b, $rsqrd, $cv); my $i; @xs = (0); #did this to keep the @xs and @ys arrays always starti +ng out with one value @ys = (0); for ($i = 0; $i <= $#x; $i++){ if (defined($x[$i])){ if ($x[$i] =~ m/[\d]/){ push (@xs, $x[$i]) } } } for ($i = 0; $i <= $#y; $i++){ if (defined($y[$i])){ if ($y[$i] =~ m/[\d]/){ push (@ys, $y[$i]) } } } shift (@xs); #need to remove the 0 we started with shift (@ys); if (defined($linorArh)){ my ($xref, $yref); if ($#xs > 0){ if ($#xs == $#ys){ #make sure the arrays are the same siz +e if ($linorArh eq "L"){ $xref = \@xs; #generate array refs to dump into + linearfit sub $yref = \@ys; ($m, $b, $rsqrd) = linearfit($xref, $yref); } elsif($linorArh eq "A") { @xs = map { 1 / $x[$_]} 0..$#xs; #convert x and +y to arrhenius @ys = map {log $y[$_]} 0..$#ys; $xref = \@xs; $yref = \@ys; ($m, $b, $rsqrd) = linearfit($xref, $yref); } $m = sprintf("%.3f", $m); $b = sprintf("%.3f", $b); $rsqrd = sprintf("%.3f", $rsqrd); $statustext = "Calculation Complete"; our $iexist; if ($iexist == 0){ my $evalbutton = $calcframe -> Button( -width => 9, -height => 1, -text => "Evaluate", -command => \&EVAL ) -> pack(-side => "left", -padx => 30); $iexist = 1; } } else { $statustext = "Your x and y columns need to be the sam +e size"; } } else { $statustext = "You need more than one value to perform reg +ressions"; } } else { $statustext = "You need to select Linear or Arhenius Data Type +"; } my ($xmax, $ymax, $xpoint1, $xpoint2, $ypoint1, $ypoint2, $xmin, $ +ymin); if (defined($m)){ $xmax = max(@xs); $ymax = max(@ys); $xmin = min(@xs); $ymin = min(@ys); $ypoint1 = ($m * $xmin) + $b; #the next points are the coordi +nates of the regression line $ypoint2 = ($m * $xmax) + $b; $xpoint1 = $xmin; $xpoint2 = $xmax; #here we need to scale all of the values to fit within our can +vas area. my $mx = 300 / ($xmax - $xmin); my $bx = 40 - ($mx * $xmin); my $my = -300 / ($ymax - $ymin); my $by = 310 - ($my * $ymin); our @xcoords = map{($_ * $mx) + $bx}@xs; our @ycoords = map{($_ * $my) + $by}@ys; my (@point, $regressline); $cv -> delete("points", "line"); #need to delete all of the c +anvas items from previous click for ($i=0; $i <= $#xs; $i++){ $point[$i] = $cv -> createText( $xcoords[$i], $ycoords[$i] +, -text => "X", -fill => "red" ,-tag => "points") } my $xregmin = ($xpoint1 * $mx) + $bx; my $xregmax = ($xpoint2 * $mx) + $bx; my $yregmin = ($ypoint1 * $my) + $by; my $yregmax = ($ypoint2 * $my) + $by; #Now We will create the best fit regression line, and update t +he x and y axis labels $regressline = $cv -> createLine($xregmin, $yregmin, $xregmax, + $yregmax, -tag => "line", -fill => "blue", -dash => "."); if ($linorArh eq "A"){ $cv -> delete("xaxis", "yaxis"); my $xaxisupdate = $cv -> createText (175, 335, -text => "1 + / X", -tag => "xaxis"); my $yaxisupdate = $cv -> createText (20, 175, -text => "Ln +(Y)", -tag => "yaxis"); } elsif($linorArh eq "L"){ $cv -> delete("xaxis", "yaxis"); my $xaxistext = $cv -> createText (175, 335, -text => "X-A +xis", -tag => "xaxis"); my $yaxistext = $cv -> createText (20, 175, -text => "Y-Ax +is", -tag => "yaxis"); } } } ###################################################################### +###### ###################################################################### +###### # This window is the about box for all windows # # MAIN -> ABOUT ###################################################################### +##### sub ABOUT #The about box in the help menu! { my $aboutwin = MainWindow -> new; $aboutwin -> title("About"); $aboutwin -> geometry("+210+210"); my $aboutleftframe = $aboutwin -> Frame() -> pack(-side => "left") +; my $aboutrightframe = $aboutwin -> Frame() -> pack(-side => "left" +); my $exitButton = $aboutrightframe -> Button( -text => "Close", -command => sub{destroy $aboutwin} ) -> pack(-side => 'right', -padx => '10', -pady => '10'); my $words = qq( Data Regression Tool Written by David Daycock. September 2004 Send bugs to: david\@netboise.com); my $aboutText = $aboutleftframe->Scrolled('ROText', -height => '10', -width => '20', -wrap => "word", -scrollbars => 'osoe', ); $aboutText -> insert('end', $words); $aboutText -> pack(-side => "left", -padx => '10', -pady => '10'); } sub average { #this subroutine averages the results of an array passed to it. R +eturns a single value. my $sum = 0; foreach $_ (@_) { $sum += $_; } my $avg = $sum / ($#_ + 1); return $avg; } sub linearfit { #this subroutine takes as arguments two array references containin +g linear x and y #it returns the linear best fit regression and r-squared correlati +on. #Usage is @foo = linearfit($xref, $yref) where $xref is a referenc +e to an array holding all values of #x and $yref is a reference to an array holding all values of y. +The index of x and y must be equal #in order for this to work correctly. The function returns an arr +ay in the following order: # @results = (slope of line, intercept of line, rsquared correlati +on) # in order fo this function to work, you need the sum, average and + sqr functions in the file. my @xs = @{$_[0]}; #turns a reference into an array! my @ys = @{$_[1]}; my @xsqrd = sqr(@xs); my @xy = map {$xs[$_] * $ys[$_]} 0..$#xs; my $sumX = sum(@xs); my $sumY = sum(@ys); my $sumXY = sum(@xy); my $sumXsqr = sum(@xsqrd); my $n = $#xs; my $b = (($sumY * $sumXsqr) - ($sumX * $sumXY)) / ((($n+1) * $sumX +sqr) - ($sumX * $sumX)); my $m = ((($n+1) * $sumXY) - ($sumX * $sumY)) / ((($n+1) * $sumXsq +r) - ($sumX * $sumX)); my $w; my @predY; for ($w = 0; $w <= $#xs; $w++) { $predY[$w] = ($xs[$w] * $m ) + $b; } my $yAvg = average(@ys); my $k; my @predYErr; for ($k = 0; $k <= $#predY; $k++) { $predYErr[$k] = $predY[$k] - $yAvg; } my @ssPred = sqr(@predYErr); my $l; my @Err; for ($l = 0; $l <= $#xs; $l++) { $Err[$l] = $ys[$l] - $yAvg; } my @ssTotal = sqr(@Err); my $sumSSpred = sum(@ssPred); my $sumSStotal = sum(@ssTotal); my $rsqrd; if ($sumSStotal == 0) { $rsqrd = 0; } else { $rsqrd = $sumSSpred / $sumSStotal; $rsqrd = sprintf("%.4f", $rsqrd); } my @ans = ($m, $b, $rsqrd); return @ans; } sub sum { #this subroutine sums the numbers passed to it. Returns a single +value. my $sum = 0; foreach my $line (@_) { $sum += $line; } return $sum; } sub sqr { #Calculates the square of any array passed to it, returns an array + with the squares. my @xs = @_; my @result = map {$xs[$_] * $xs[$_]} 0..$#xs; #multiply each line +of an array together. return @result; } sub max { my @array = @_; my @sorted = sort{$a <=> $b}@array; my $result = pop (@sorted); return $result; } sub min { my @array = @_; my @sorted = sort{$a <=> $b}@array; my $result = shift (@sorted); return $result; } ###################################################################### +###### ###################################################################### +###### # This is the Assumptions window from the top main window ###################################################################### +##### sub ASSUME { my $assumewin = MainWindow -> new; $assumewin -> title("Assumptions"); $assumewin -> geometry("+210+210"); my $assumeframe = $assumewin -> Frame() -> pack(-side => "left"); my $words = qq( R-squared value is non-weighted. ); my $assumeText = $assumeframe->Scrolled('ROText', -height => '25', -width => '50', -wrap => "word", -scrollbars => 'osoe', ); $assumeText -> insert('end', $words); $assumeText -> pack(-side => "left", -padx => '10', -pady => '10'); } sub SAVE { our (@xs, @ys); my $Excel = Win32::OLE->GetActiveObject('Excel.Application') | +| Win32::OLE->new('Excel.Application', 'Quit'); $Excel->{DisplayAlerts}=0; my $Book = $Excel->Workbooks->Add(); my $Sheet = $Book->Worksheets("Sheet1"); $Sheet->Activate(); $Sheet->{Name} = "EtchData"; $Sheet->Range("a1")->{Value} = "Temperature"; $Sheet->Range("b1")->{Value} = "Etch Rate"; $Sheet->Columns("A")->AutoFit(); $Sheet->Columns("B")->AutoFit(); foreach my $x (0..$#xs) { my $xindex = $x+2; my $range = 'a'.$xindex; $Sheet->Range($range)->{Value} = $xs[$x]; } foreach my $y(0..$#ys) { my $yindex = $y+2; my $range = 'b'.$yindex; $Sheet->Range($range)->{Value} = $ys[$y]; } my $Chart; unless ($Book->Charts->Count) { $Chart = $Book->Charts->Add({After => $Sheet});# $Shee +t is my data sheet $Chart->{Name} = "Graph"; } else { $Chart = $Book->Charts("Graph"); } my $newnum = $#ys + 2; my $newstr = "B"."$newnum"; $Chart->{ChartType} = xlXYScatter; # xlChartType; $Chart->SetSourceData($Sheet->Range('A2',$newstr),xlColumns); +# Range, xlRowCol $Chart->{HasLegend} = 0; $Chart->{HasTitle} = 1; $Chart->{ChartTitle}->{Characters}->{Text} = "Data Plot"; $Chart->PlotArea->Interior->{ColorIndex} = 2; $Chart->Activate(); # Axes my $Xaxes = $Chart->Axes(xlCategory, xlPrimary); $Xaxes->{HasTitle} = 1; $Xaxes->{AxisTitle}->{Characters}->{Text} = "Temperature"; $Xaxes->{HasMajorGridLines} = 0; my $Yaxes = $Chart->Axes(xlValue, xlPrimary); $Yaxes->{HasTitle} = 1; $Yaxes->{HasMajorGridlines} = 0; $Yaxes->{AxisTitle}->{Characters}->{Text} = "Etch Rate"; $Yaxes->{HasMajorGridLines} = 0; my $excelfile = $file_mw->getSaveFile(-defaultextension=>".xls +"); $Book->SaveAs($excelfile); } sub EVAL #subroutine to evaluate results from regression { our ($yeval, $xeval); #Creating the Main Window my $ew = MainWindow->new; $ew -> title("Evaluation Window"); my $ytext = $ew -> Label(-text => "Y(") -> pack(-side => "left", - +pady => 5, -padx => 2); my $xent = $ew -> Entry (-textvariable => \$xeval, -width => 10, - +justify => "center")-> pack(-side => "left", -pady => 5, -padx => 2); my $yend = $ew -> Label(-text => ") = ") -> pack(-side => "left", +-pady => 5, -padx => 2); my $yent = $ew -> Entry (-textvariable => \$yeval, -width => 10, - +justify => "center")-> pack(-side => "left", -pady => 5, -padx => 2); my $button = $ew -> Button (-text => "Go", -command => \&EVALUATEM +E, -height => 1, -width => 2) -> pack(-side => "left", -pady => 5, -p +adx => 2) } sub EVALUATEME { our ($m, $b, $yeval, $xeval, $statustext, $xold, $yold); if (defined($yeval) == 1 && defined($xeval) == 0){ $xeval = ($yeval - $b) / $m; $xeval = sprintf("%.3f", $xeval); $yeval = sprintf("%.3f", $yeval); $xold = $xeval; $yold = $yeval; } elsif(defined($xeval) == 1 && defined($yeval) == 0){ $yeval = ($m * $xeval) + $b; $xeval = sprintf("%.3f", $xeval); $yeval = sprintf("%.3f", $yeval); $yold = $yeval; $xold = $xeval; } elsif (defined ($xeval) && defined($yeval)){ if ($xold != $xeval){ $yeval = ($m * $xeval) + $b; $xeval = sprintf("%.3f", $xeval); $yeval = sprintf("%.3f", $yeval); $yold = $yeval; $xold = $xeval; } elsif($yold != $yeval){ $xeval = ($yeval - $b) / $m; $xeval = sprintf("%.3f", $xeval); $yeval = sprintf("%.3f", $yeval); $xold = $xeval; $yold = $yeval; } } else { $xeval = "Err"; $yeval = "Err"; } }
~~David~~

Comment on Data Regression Utility
Download Code
Re: Data Regression Utility
by water (Deacon) on Sep 09, 2004 at 04:01 UTC
    Just a suggestion -- you might consider writing your OLS in matrix notation, and then using PDL to sling the matrices, rather than handling all the messy algebra scalar-wise. The matrix approach would also give you multivariate OLS for free, vs. just the single dimensional y=mx+b.

    water

      I was looking for a way to do the work in matrix notation, but was not aware of a method to go about writing it. I will investigate PDL. Thanks,
      ~~David~~

Log In?
Username:
Password:

What's my password?
Create A New User
Node Status?
node history
Node Type: CUFP [id://389485]
Approved by SciDude
help
Chatterbox?
and the web crawler heard nothing...

How do I use this? | Other CB clients
Other Users?
Others chilling in the Monastery: (4)
As of 2014-12-19 06:37 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    Is guessing a good strategy for surviving in the IT business?





    Results (71 votes), past polls