Beefy Boxes and Bandwidth Generously Provided by pair Networks
The stupid question is the question not asked
 
PerlMonks  

Data Regression Utility

by ~~David~~ (Hermit)
on Sep 08, 2004 at 21:59 UTC ( #389485=CUFP: print w/ replies, xml ) Need Help??

Here is a program that can do data regression and plots the information in Tk. You can also save the data to an excel file and it will chart it for you automatically.
#!c:\perl\bin\wperl.exe -w use Win32::OLE qw(in with); use Win32::OLE::Const 'Microsoft Excel'; use Win32::OLE::Variant; use Win32::OLE::NLS qw(:LOCALE :DATE); $Win32::OLE::Warn = 3; # Die on Errors. require Tk; use Tk; use Tk::Font; use Tk::Text; use Tk::ROText; use strict; #Creating the Main Window my $mw = MainWindow->new; $mw -> title("X Y Plotting Window"); #$mw -> geometry("800x600"); #Now I need to create the frames to house the widgets my $menuframe = $mw -> Frame( -relief => "groove", -borderwidth => 2 )->pack( -side => "top", -fill => "x", ); my $midhold = $mw -> Frame() -> pack( -side => "top", -padx => 5, -pady => 5, -fill => "x" ); my $xyframe = $midhold -> Frame() -> pack( -side => "left", -padx => 5, -pady => 5 ); my $plotframe = $midhold -> Frame() -> pack( -side => "right", -padx => 5, -pady => 5 ); my $statusframe = $mw -> Frame() -> pack( -side => "bottom", -padx => 5, -pady => 5, -fill => "x" ); my $calcframe = $mw -> Frame() -> pack( -side => "bottom", -padx => 5, -pady => 5, -fill => "x" ); ############################################################ #Create widgets for Menu my $file_mw = $menuframe -> Menubutton ( -text => "File", -activeforeground => "grey" ) -> pack (-side => 'left'); $file_mw -> command( -label => "Save", -command => sub{&SAVE} ); $file_mw -> command( -label => "Exit", -command => sub{$mw -> destroy} ); my $help_mw = $menuframe -> Menubutton ( -text => "Help", -activeforeground => "grey" ) -> pack (-side => 'right'); $help_mw -> command( -label => "Assumptions", -command => sub{&ASSUME}); $help_mw -> separator(); $help_mw -> command( -label => "About", -command => sub{&ABOUT}); ############################################################ ############################################################ #Create widgets and labels for XY window my $xlabel = $xyframe -> Label( -text => "Temp (X)", -relief => "raised", -padx => 5 ) -> grid(-column => 2, -row => 1, -padx => 5); my $ylabel = $xyframe -> Label( -text => "Etch Rate (Y)", -relief => "raised", -padx => 5 ) -> grid(-column => 3, -row => 1, -padx => 5); my @xinit = qw(-- -- -- -- -- -- -- -- -- --); our ($isLast, @radio, @x, @y); our (@xbox, @ybox); our $i =0; for (@xinit) { my $rowlabel = $i+1; my $row = $i+2; $xyframe->Label(-text=>$rowlabel)->grid(-row=>$row,-column=>1) +; $xbox[$i] = $xyframe->Entry(-bg=>'white', -width => 10, -textv +ariable => \$x[$i], -justify => "center")->grid(-row=>$row,-column=>2 +, -padx => 5, -pady => 5); $ybox[$i] = $xyframe->Entry(-bg=>'white', -width => 10, -textv +ariable => \$y[$i], -justify => "center")->grid(-row=>$row,-column=>3 +, -padx => 5, -pady => 5); $i++; } ############################################################ ############################################################ #Create widgets and labels for Plotting Frame our $cv = $plotframe -> Canvas( -width => 350, -height => 350, -bg => "white", ) -> pack(); our ($m, $b, $rsqrd); my $slopetext = $plotframe -> Label( -text => "Slope = " ) -> pack(-side => "left"); my $slopeentry = $plotframe -> Entry( -textvariable => \$m, -width => 6, -justify => "center" ) -> pack(-side => "left", -padx => 5, -pady => 5); my $btext = $plotframe -> Label( -text => "Intercept = " ) -> pack(-side => "left"); my $bentry = $plotframe -> Entry( -textvariable => \$b, -width => 6, -justify => "center" ) -> pack(-side => "left", -padx => 5, -pady => 5); my $rtext = $plotframe -> Label( -text => "Rsqrd = ", ) -> pack(-side => "left"); my $rentry = $plotframe -> Entry( -textvariable => \$rsqrd, -width => 6, -justify => "center" ) -> pack(-side => "left", -padx => 5, -pady => 5); my $xaxis = $cv -> createLine( 35, 310, 340, 310); my $yaxis = $cv -> createLine( 40, 10, 40, 315); my $xaxistext = $cv -> createText (175, 335, -text => "X-Axis", -tag = +> "xaxis"); my $yaxistext = $cv -> createText (20, 175, -text => "Y-Axis", -tag => + "yaxis"); ############################################################ ############################################################ #Create widgets and labels for Calculate Frame my $calcbutton = $calcframe -> Button( -width => 9, -height => 1, -text => "Calculate", -command => \&CALC ) -> pack(-side => "left", -padx => 30); our $linorArh; my $arhchoice = $calcframe -> Radiobutton( -value => "A", -variable => \$linorArh, ) -> pack (-side => "right"); my $arhlabel = $calcframe -> Label( -text => "Arrhenius" ) -> pack (-side => "right"); my $linearchoice = $calcframe -> Radiobutton( -value => "L", -variable => \$linorArh, ) -> pack (-side => "right"); my $linlabel = $calcframe -> Label( -text => "Linear" ) -> pack (-side => "right"); my $choicetext = $calcframe -> Label( -text => "Data Fit Type: ", -font => '{arial bold} 10' )-> pack (-side => "right"); ############################################################ ############################################################ #Create widgets and labels for Statusbar Frame our $statustext; my $statuslabel = $statusframe -> Label( -text => "Status: " ) -> pack(-side => "left"); my $status =$statusframe -> Entry( -textvariable => \$statustext, -foreground => 'red', -width => 80 ) -> pack(-side => "left"); MainLoop(); sub CALC { our (@x, @y, $linorArh, @xs, @ys, $m, $b, $rsqrd, $cv); my $i; @xs = (0); #did this to keep the @xs and @ys arrays always starti +ng out with one value @ys = (0); for ($i = 0; $i <= $#x; $i++){ if (defined($x[$i])){ if ($x[$i] =~ m/[\d]/){ push (@xs, $x[$i]) } } } for ($i = 0; $i <= $#y; $i++){ if (defined($y[$i])){ if ($y[$i] =~ m/[\d]/){ push (@ys, $y[$i]) } } } shift (@xs); #need to remove the 0 we started with shift (@ys); if (defined($linorArh)){ my ($xref, $yref); if ($#xs > 0){ if ($#xs == $#ys){ #make sure the arrays are the same siz +e if ($linorArh eq "L"){ $xref = \@xs; #generate array refs to dump into + linearfit sub $yref = \@ys; ($m, $b, $rsqrd) = linearfit($xref, $yref); } elsif($linorArh eq "A") { @xs = map { 1 / $x[$_]} 0..$#xs; #convert x and +y to arrhenius @ys = map {log $y[$_]} 0..$#ys; $xref = \@xs; $yref = \@ys; ($m, $b, $rsqrd) = linearfit($xref, $yref); } $m = sprintf("%.3f", $m); $b = sprintf("%.3f", $b); $rsqrd = sprintf("%.3f", $rsqrd); $statustext = "Calculation Complete"; our $iexist; if ($iexist == 0){ my $evalbutton = $calcframe -> Button( -width => 9, -height => 1, -text => "Evaluate", -command => \&EVAL ) -> pack(-side => "left", -padx => 30); $iexist = 1; } } else { $statustext = "Your x and y columns need to be the sam +e size"; } } else { $statustext = "You need more than one value to perform reg +ressions"; } } else { $statustext = "You need to select Linear or Arhenius Data Type +"; } my ($xmax, $ymax, $xpoint1, $xpoint2, $ypoint1, $ypoint2, $xmin, $ +ymin); if (defined($m)){ $xmax = max(@xs); $ymax = max(@ys); $xmin = min(@xs); $ymin = min(@ys); $ypoint1 = ($m * $xmin) + $b; #the next points are the coordi +nates of the regression line $ypoint2 = ($m * $xmax) + $b; $xpoint1 = $xmin; $xpoint2 = $xmax; #here we need to scale all of the values to fit within our can +vas area. my $mx = 300 / ($xmax - $xmin); my $bx = 40 - ($mx * $xmin); my $my = -300 / ($ymax - $ymin); my $by = 310 - ($my * $ymin); our @xcoords = map{($_ * $mx) + $bx}@xs; our @ycoords = map{($_ * $my) + $by}@ys; my (@point, $regressline); $cv -> delete("points", "line"); #need to delete all of the c +anvas items from previous click for ($i=0; $i <= $#xs; $i++){ $point[$i] = $cv -> createText( $xcoords[$i], $ycoords[$i] +, -text => "X", -fill => "red" ,-tag => "points") } my $xregmin = ($xpoint1 * $mx) + $bx; my $xregmax = ($xpoint2 * $mx) + $bx; my $yregmin = ($ypoint1 * $my) + $by; my $yregmax = ($ypoint2 * $my) + $by; #Now We will create the best fit regression line, and update t +he x and y axis labels $regressline = $cv -> createLine($xregmin, $yregmin, $xregmax, + $yregmax, -tag => "line", -fill => "blue", -dash => "."); if ($linorArh eq "A"){ $cv -> delete("xaxis", "yaxis"); my $xaxisupdate = $cv -> createText (175, 335, -text => "1 + / X", -tag => "xaxis"); my $yaxisupdate = $cv -> createText (20, 175, -text => "Ln +(Y)", -tag => "yaxis"); } elsif($linorArh eq "L"){ $cv -> delete("xaxis", "yaxis"); my $xaxistext = $cv -> createText (175, 335, -text => "X-A +xis", -tag => "xaxis"); my $yaxistext = $cv -> createText (20, 175, -text => "Y-Ax +is", -tag => "yaxis"); } } } ###################################################################### +###### ###################################################################### +###### # This window is the about box for all windows # # MAIN -> ABOUT ###################################################################### +##### sub ABOUT #The about box in the help menu! { my $aboutwin = MainWindow -> new; $aboutwin -> title("About"); $aboutwin -> geometry("+210+210"); my $aboutleftframe = $aboutwin -> Frame() -> pack(-side => "left") +; my $aboutrightframe = $aboutwin -> Frame() -> pack(-side => "left" +); my $exitButton = $aboutrightframe -> Button( -text => "Close", -command => sub{destroy $aboutwin} ) -> pack(-side => 'right', -padx => '10', -pady => '10'); my $words = qq( Data Regression Tool Written by David Daycock. September 2004 Send bugs to: david\@netboise.com); my $aboutText = $aboutleftframe->Scrolled('ROText', -height => '10', -width => '20', -wrap => "word", -scrollbars => 'osoe', ); $aboutText -> insert('end', $words); $aboutText -> pack(-side => "left", -padx => '10', -pady => '10'); } sub average { #this subroutine averages the results of an array passed to it. R +eturns a single value. my $sum = 0; foreach $_ (@_) { $sum += $_; } my $avg = $sum / ($#_ + 1); return $avg; } sub linearfit { #this subroutine takes as arguments two array references containin +g linear x and y #it returns the linear best fit regression and r-squared correlati +on. #Usage is @foo = linearfit($xref, $yref) where $xref is a referenc +e to an array holding all values of #x and $yref is a reference to an array holding all values of y. +The index of x and y must be equal #in order for this to work correctly. The function returns an arr +ay in the following order: # @results = (slope of line, intercept of line, rsquared correlati +on) # in order fo this function to work, you need the sum, average and + sqr functions in the file. my @xs = @{$_[0]}; #turns a reference into an array! my @ys = @{$_[1]}; my @xsqrd = sqr(@xs); my @xy = map {$xs[$_] * $ys[$_]} 0..$#xs; my $sumX = sum(@xs); my $sumY = sum(@ys); my $sumXY = sum(@xy); my $sumXsqr = sum(@xsqrd); my $n = $#xs; my $b = (($sumY * $sumXsqr) - ($sumX * $sumXY)) / ((($n+1) * $sumX +sqr) - ($sumX * $sumX)); my $m = ((($n+1) * $sumXY) - ($sumX * $sumY)) / ((($n+1) * $sumXsq +r) - ($sumX * $sumX)); my $w; my @predY; for ($w = 0; $w <= $#xs; $w++) { $predY[$w] = ($xs[$w] * $m ) + $b; } my $yAvg = average(@ys); my $k; my @predYErr; for ($k = 0; $k <= $#predY; $k++) { $predYErr[$k] = $predY[$k] - $yAvg; } my @ssPred = sqr(@predYErr); my $l; my @Err; for ($l = 0; $l <= $#xs; $l++) { $Err[$l] = $ys[$l] - $yAvg; } my @ssTotal = sqr(@Err); my $sumSSpred = sum(@ssPred); my $sumSStotal = sum(@ssTotal); my $rsqrd; if ($sumSStotal == 0) { $rsqrd = 0; } else { $rsqrd = $sumSSpred / $sumSStotal; $rsqrd = sprintf("%.4f", $rsqrd); } my @ans = ($m, $b, $rsqrd); return @ans; } sub sum { #this subroutine sums the numbers passed to it. Returns a single +value. my $sum = 0; foreach my $line (@_) { $sum += $line; } return $sum; } sub sqr { #Calculates the square of any array passed to it, returns an array + with the squares. my @xs = @_; my @result = map {$xs[$_] * $xs[$_]} 0..$#xs; #multiply each line +of an array together. return @result; } sub max { my @array = @_; my @sorted = sort{$a <=> $b}@array; my $result = pop (@sorted); return $result; } sub min { my @array = @_; my @sorted = sort{$a <=> $b}@array; my $result = shift (@sorted); return $result; } ###################################################################### +###### ###################################################################### +###### # This is the Assumptions window from the top main window ###################################################################### +##### sub ASSUME { my $assumewin = MainWindow -> new; $assumewin -> title("Assumptions"); $assumewin -> geometry("+210+210"); my $assumeframe = $assumewin -> Frame() -> pack(-side => "left"); my $words = qq( R-squared value is non-weighted. ); my $assumeText = $assumeframe->Scrolled('ROText', -height => '25', -width => '50', -wrap => "word", -scrollbars => 'osoe', ); $assumeText -> insert('end', $words); $assumeText -> pack(-side => "left", -padx => '10', -pady => '10'); } sub SAVE { our (@xs, @ys); my $Excel = Win32::OLE->GetActiveObject('Excel.Application') | +| Win32::OLE->new('Excel.Application', 'Quit'); $Excel->{DisplayAlerts}=0; my $Book = $Excel->Workbooks->Add(); my $Sheet = $Book->Worksheets("Sheet1"); $Sheet->Activate(); $Sheet->{Name} = "EtchData"; $Sheet->Range("a1")->{Value} = "Temperature"; $Sheet->Range("b1")->{Value} = "Etch Rate"; $Sheet->Columns("A")->AutoFit(); $Sheet->Columns("B")->AutoFit(); foreach my $x (0..$#xs) { my $xindex = $x+2; my $range = 'a'.$xindex; $Sheet->Range($range)->{Value} = $xs[$x]; } foreach my $y(0..$#ys) { my $yindex = $y+2; my $range = 'b'.$yindex; $Sheet->Range($range)->{Value} = $ys[$y]; } my $Chart; unless ($Book->Charts->Count) { $Chart = $Book->Charts->Add({After => $Sheet});# $Shee +t is my data sheet $Chart->{Name} = "Graph"; } else { $Chart = $Book->Charts("Graph"); } my $newnum = $#ys + 2; my $newstr = "B"."$newnum"; $Chart->{ChartType} = xlXYScatter; # xlChartType; $Chart->SetSourceData($Sheet->Range('A2',$newstr),xlColumns); +# Range, xlRowCol $Chart->{HasLegend} = 0; $Chart->{HasTitle} = 1; $Chart->{ChartTitle}->{Characters}->{Text} = "Data Plot"; $Chart->PlotArea->Interior->{ColorIndex} = 2; $Chart->Activate(); # Axes my $Xaxes = $Chart->Axes(xlCategory, xlPrimary); $Xaxes->{HasTitle} = 1; $Xaxes->{AxisTitle}->{Characters}->{Text} = "Temperature"; $Xaxes->{HasMajorGridLines} = 0; my $Yaxes = $Chart->Axes(xlValue, xlPrimary); $Yaxes->{HasTitle} = 1; $Yaxes->{HasMajorGridlines} = 0; $Yaxes->{AxisTitle}->{Characters}->{Text} = "Etch Rate"; $Yaxes->{HasMajorGridLines} = 0; my $excelfile = $file_mw->getSaveFile(-defaultextension=>".xls +"); $Book->SaveAs($excelfile); } sub EVAL #subroutine to evaluate results from regression { our ($yeval, $xeval); #Creating the Main Window my $ew = MainWindow->new; $ew -> title("Evaluation Window"); my $ytext = $ew -> Label(-text => "Y(") -> pack(-side => "left", - +pady => 5, -padx => 2); my $xent = $ew -> Entry (-textvariable => \$xeval, -width => 10, - +justify => "center")-> pack(-side => "left", -pady => 5, -padx => 2); my $yend = $ew -> Label(-text => ") = ") -> pack(-side => "left", +-pady => 5, -padx => 2); my $yent = $ew -> Entry (-textvariable => \$yeval, -width => 10, - +justify => "center")-> pack(-side => "left", -pady => 5, -padx => 2); my $button = $ew -> Button (-text => "Go", -command => \&EVALUATEM +E, -height => 1, -width => 2) -> pack(-side => "left", -pady => 5, -p +adx => 2) } sub EVALUATEME { our ($m, $b, $yeval, $xeval, $statustext, $xold, $yold); if (defined($yeval) == 1 && defined($xeval) == 0){ $xeval = ($yeval - $b) / $m; $xeval = sprintf("%.3f", $xeval); $yeval = sprintf("%.3f", $yeval); $xold = $xeval; $yold = $yeval; } elsif(defined($xeval) == 1 && defined($yeval) == 0){ $yeval = ($m * $xeval) + $b; $xeval = sprintf("%.3f", $xeval); $yeval = sprintf("%.3f", $yeval); $yold = $yeval; $xold = $xeval; } elsif (defined ($xeval) && defined($yeval)){ if ($xold != $xeval){ $yeval = ($m * $xeval) + $b; $xeval = sprintf("%.3f", $xeval); $yeval = sprintf("%.3f", $yeval); $yold = $yeval; $xold = $xeval; } elsif($yold != $yeval){ $xeval = ($yeval - $b) / $m; $xeval = sprintf("%.3f", $xeval); $yeval = sprintf("%.3f", $yeval); $xold = $xeval; $yold = $yeval; } } else { $xeval = "Err"; $yeval = "Err"; } }
~~David~~

Comment on Data Regression Utility
Download Code
Replies are listed 'Best First'.
Re: Data Regression Utility
by water (Deacon) on Sep 09, 2004 at 04:01 UTC
    Just a suggestion -- you might consider writing your OLS in matrix notation, and then using PDL to sling the matrices, rather than handling all the messy algebra scalar-wise. The matrix approach would also give you multivariate OLS for free, vs. just the single dimensional y=mx+b.

    water

      I was looking for a way to do the work in matrix notation, but was not aware of a method to go about writing it. I will investigate PDL. Thanks,
      ~~David~~

Log In?
Username:
Password:

What's my password?
Create A New User
Node Status?
node history
Node Type: CUFP [id://389485]
Approved by SciDude
help
Chatterbox?
and the web crawler heard nothing...

How do I use this? | Other CB clients
Other Users?
Others pondering the Monastery: (10)
As of 2015-07-29 07:08 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    The top three priorities of my open tasks are (in descending order of likelihood to be worked on) ...









    Results (260 votes), past polls