Beefy Boxes and Bandwidth Generously Provided by pair Networks
Problems? Is your data what you think it is?
 
PerlMonks  

smanicka's scratchpad

by smanicka (Scribe)
on Dec 04, 2008 at 18:24 UTC ( [id://728050]=scratchpad: print w/replies, xml ) Need Help??

#!/usr/bin/perl ###################################################################### +########################## # QUINLANS ALGORITHM FOR DECISION TREE # Author : Sandhya Manickavasagam email- smanicak@syr.edu ###################################################################### +########################## use DBI; use Tree::DAG_Node; use AI::DecisionTree; use Graph::Easy; $table ="income"; my $file_to_append = "C:\\Documents and Settings\\smanicka\\Desktop\\q +uinlans\\output_quinlans.txt"; open (MYFILE,">$file_to_append")or warn "Can't open file to append"; print MYFILE "\t\t\tCSE 787 - ANALYTICAL DATA MINING - PROJECT 2\n==== +===================================================================== +=======\n\n"; print MYFILE "\n\n\t\t\tQUINLAN'S DECISION TREE ALGORITHM\n----------- +--------------------------------------------------------------------- +-\n"; print MYFILE "\n\t\t\tName :\t Sandhya Manickavasagam\n\t\t\tSU-ID:660 +185882\n============================================================= +===================\n\n"; my $dtree = new AI::DecisionTree(noise_mode=>'pick_best'); $dbh = DBI->connect('dbi:ODBC:driver=microsoft access driver (*.mdb);d +bq=C:\Documents and Settings\smanicka\Desktop\quinlans\sample.mdb') o +r warn("Sorry,Cant connect to the table\n$DBI::errstr \n") or warn " +Cannot connect to database"; my %data; $query = qq(select count(*) from $table); $sth = $dbh->prepare($query) or print "cannot prepare"; $sth ->execute(); $count_row = $sth->fetchrow_array(); my $query = "select * from $table"; print MYFILE "\nExecuting $query \n"; $sth = $dbh->prepare($query) or print "cannot prepare"; $sth ->execute(); while (@rows=$sth->fetchrow_array()) { $count_col =0; foreach $row (@rows){ # print "$row,"; $count_col ++; } # print "\n"; } print MYFILE "There are $count_row rows of data in this table\n"; print MYFILE "\nThere are $sth->{NUM_OF_FIELDS} columns\n\nAssuming th +at the first column is the transaction number and hence not a part of + the data used for mining : \n\nThe columns under consideration are : +\n\n\t$sth->{NAME}->[1]\n\t$sth->{NAME}->[2]\n\t$sth->{NAME}->[3]\n\t +$sth->{NAME}->[4]\n\t$sth->{NAME}->[5]\n"; for (my $i=1;$i<$count_col;$i++){ push (@columns_list,"$sth->{NAME}->[$i]"); } print MYFILE "\nThe Distinct entries are : \n "; foreach $c (@columns_list){ print MYFILE "\n$c\n================\n"; $query = "select distinct $c from $table"; #print "$query\n"; $sth = $dbh->prepare($query) or print "cannot prepare"; $sth ->execute(); while (@rows = $sth->fetchrow_array()) { foreach $row (@rows){ print MYFILE "$row\n"; push((@{"$c"}),$row); } } } print MYFILE "\n"; foreach $c (@columns_list){ foreach $k ( @{"$c"}) { #print "$k \n"; $occ = 0; $query = qq(select $c from $table where $c = '$k'); $stmnt = qq(select $c from $table where $c = $k); $sth = $dbh->prepare($query) or print "cannot prepare\n"; $sth ->execute() or (($sth = $dbh->prepare($stmnt) or print "cannot pr +epare") and $sth ->execute()); while (@rows = $sth->fetchrow_array()) { $occ++; } print MYFILE "$k occurs $occ times\n"; $ratio = $occ/$count_row; #${$c}{"$k"} = $ratio ; $data{"$k"} = $ratio; } print MYFILE "\n"; } # print "\n"; #foreach $c (@columns_list){ #print "\n$c\n"; #foreach $r (@{"$c"}){ # print "$r \t"; #} #} print MYFILE "\nStoring the entries into a hash table\n--------------- +---------------------------------------\n"; while (($key, $value) = each(%data)){ print MYFILE $key.", ".$value."\n"; } # Calculate ENTROPY sub log2 { my $n = $_; return log($n)/log(2); } #gain of each column should be calculated and column with max gain sho +uld become root #my $root = Tree::DAG_Node->new(); #$root->name("Outlook"); #$new_daughter = Tree::DAG_Node->new(); # $new_daughter->name(""); # $root->add_daughter($new_daughter); # A set of training data for deciding whether to play $query = qq(select * from $table); print MYFILE "\nExecuting $query \n"; $sth = $dbh->prepare($query) or print "cannot prepare"; $sth ->execute(); while(@rows = $sth->fetchrow_array()){ #if($rows[4] eq 1){ #$rows[4] = "yes"; #} #else{ #$rows[4] = "no"; #} foreach $r (@rows){ print "$r \t\t"; } print "\n"; #} ###################################################################### +########## # For the Most part, the entire program will work for any given data.I +f a new # # table needs to be used, please change the mappings below to reflect +the new # # table values. + # ###################################################################### +########## $dtree->add_instance (attributes=> {Income=> qq($rows[1]), Student=> qq ($rows[2]), Credit_Rating=> qq($rows[3]) }, result => qq($rows[4])); } $dtree ->train(); @rules = $dtree->rule_statements(); print MYFILE "\n Mining the Data \n=================================== +=======\n"; $i =0; print MYFILE "\n\nDecision Rules \n=================================== +=============\n"; foreach $rule (@rules){ print MYFILE "$rule \n"; $i = $i + 1; @{"arr".$i} = split (/\s+/,$rule); foreach $a (@{"arr".$i}){ #print "$a \t"; if ($a eq "if") { next; } if ($a eq "and"){ last; } else{ if ($a =~ m/=/){ @ar = split (/=/,$a); $root_of_tree = "<root>$ar[0]</root> \n"; } } } } print MYFILE "\n\n DECISION TREE |N=================================== +====\n\n$root_of_tree\n"; print "\n$root_of_tree\n"; @rules = $dtree->rule_statements(); $k = 0; foreach $rule (@rules){ $k ++; } foreach $rules (@rules){ @rule_arr = split (/\s+/,$rules); for($t =0;$t< $#rule_arr;$t++){ my @node; if ($rule_arr[$t] =~m/=/){ @leaf = split (/\=/,$rule_arr[$t]); push(@node," <branch node = \"$leaf[0]\" attribute =\"$leaf[1]\" \/>") +; } if ($rule_arr[$t] =~ m/\-\>/){ print "\n <Class value>\"$rule_arr[$t+1]\"</Class value>\n\n"; print MYFILE "\n <Class value>\"$rule_arr[$t+1]\"</Class value>\n\n"; } foreach $n (@node){ print "$n \n"; print MYFILE "$n \n"; } } } #if ($rule_arr[$t] =~ m/\=/){ # @leaf = split (/\=/,$rule_arr); # print "Leaf node : $leaf[0]\n"; # } #} #}
1 	high 	no 	fair      No 		
2 	high 	no 	excellent No 		
3 	high 	no 	fair 	  Yes 		
4 	medium 	no 	fair 	  Yes 		
5 	low 	yes 	fair 	  Yes 		
6 	low 	yes 	excellent No 		
7 	low 	yes 	excellent Yes 		
8 	medium 	no 	fair 	  No 		
9 	low 	yes 	fair 	  Yes 		
10 	medium 	yes 	fair 	  Yes 		
11 	medium 	yes 	excellent Yes 		
12 	medium 	no 	excellent Yes 		
13 	high 	yes 	fair 	  Yes 		
14 	medium 	no 	excellent No 

Log In?
Username:
Password:

What's my password?
Create A New User
Domain Nodelet?
Chatterbox?
and the web crawler heard nothing...

How do I use this?Last hourOther CB clients
Other Users?
Others sharing their wisdom with the Monastery: (4)
As of 2025-05-25 02:44 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    No recent polls found

    Notices?
    erzuuliAnonymous Monks are no longer allowed to use Super Search, due to an excessive use of this resource by robots.