# Using Info from AI class implement a classifier that learns using Naive Bayes with Laplace # Smoothing to guess whether a title is a book song or movie. # Store wrong issues as training data in books.txt, movies.txt, and songs.txt use strict; use Data::Dumper; import bag; import Dictionary; # training Data my $bookbag = new bag(); $bookbag->addItem("Small Business Kit For Dummies"); $bookbag->addItem("S-Corporation: Small Business Start-Up Kit"); $bookbag->addItem("Home Business Tax Deductions"); $bookbag->addItem("Essential Technical Analysis"); $bookbag->addItem("Technical Analysis of the Markets"); $bookbag->addItem("Introduction to probability and statistics"); $bookbag->addItem("Differential Equations"); $bookbag->addItem("Principles of Physics"); $bookbag->addItem("Perl Testing, A developer's notebook"); $bookbag->addItem("BreakAWAY Careers"); $bookbag->addItem("A Game of Thrones"); $bookbag->addItem("The Cleric Quintet"); $bookbag->addItem("Exile's Song"); $bookbag->addItem("Honor's War"); my $moviebag = new bag(); $moviebag->addItem("Star Wars Episode IV: A New Hope"); $moviebag->addItem("Shrek"); $moviebag->addItem("Rocky"); $moviebag->addItem("SHrek II"); $moviebag->addItem("Rocky II"); $moviebag->addItem("Star Trek"); $moviebag->addItem("Star Trek II: The Wrath of Khan"); $moviebag->addItem("Independence Day"); $moviebag->addItem("I know what you did last summer"); $moviebag->addItem("A Nightmare on Elm Street"); $moviebag->addItem("Transformers"); $moviebag->addItem("Pirates of The Carribean II: The Black Pearl"); $moviebag->addItem("A Cinderella Story"); $moviebag->addItem("The Never Ending Story"); my $songbag = new bag(); $songbag->addItem("Short Skirt /Long Jacket"); $songbag->addItem("My name is Jonas"); $songbag->addItem("Wild Wild Life"); $songbag->addItem("You can call Me Al"); $songbag->addItem("If you only Knew"); $songbag->addItem("Sweet Sacrifice"); $songbag->addItem("Tear Away"); my $dictionary = new Dictionary( $bookbag, $moviebag, $songbag ); #printf " Size of dictionary is :%d \n", $dictionary->size(); #print Dumper $dictionary; #die; #### Laplace Smoothing Co-efficient. our $k = 7; our $input =''; my $bookprob = base_probability( $k, $bookbag, $moviebag, $songbag); my $movieprob = base_probability( $k, $moviebag, $songbag, $bookbag); my $songprob = base_probability( $k, $songbag, $moviebag, $bookbag); prompt(); $input = ; chomp ($input); while ( $input ne 'q') { ####DO my %choice = (); # calc Probability $choice{MOVIE}= my $m = titleprob($input, $moviebag, $movieprob, $k, $dictionary); $choice{BOOK} = my $bk = titleprob($input, $bookbag, $movieprob, $k, $dictionary); $choice{SONG} = my $s = titleprob($input, $songbag, $movieprob, $k, $dictionary); $choice{MOVIE} =$m / ( $m + $bk + $s ); $choice{BOOK} = $bk / ( $m + $bk + $s ); $choice{SONG} = $s / ( $m + $bk + $s ); #select the most likely my @sortedchoice = sort {$choice{$b} <=>$choice{$a} }( keys %choice ); printf "\n\nThat title is probaby a %s with a %d %% confidence \n", $sortedchoice[0], $choice{$sortedchoice[0]} *100; CHECK: print "Correct? enter b if it should be a book, m if it should be a movie and s if it should be a song\n"; my $check = ; chomp($check); if ($check =~ m/[bms]/i){ print " Noted correction\n"; # add title to appropriate bag switch: { $check eq 'b' && do { $bookbag->addItem($input); last switch;}; $check eq 'm' && do { $moviebag->addItem($input);last switch;}; $check eq 's' && do { $songbag->addItem($input);last switch;}; } # re-create dictionary #print Dumper $bookbag; $dictionary = new Dictionary( $moviebag, $songbag, $bookbag ); #print Dumper $dictionary; #recalculate base probs $bookprob = base_probability( $k, $bookbag, $moviebag, $songbag); $movieprob = base_probability( $k, $moviebag, $songbag, $bookbag); $songprob = base_probability( $k, $songbag, $moviebag, $bookbag); } elsif ($check=~m/yes|y/i){ print "Cool!\n" } else{ goto CHECK; } ####Stuff prompt(); $input = ; chomp ($input); last if ( $input eq 'q' ); } ###################################################################### sub final_probability { # ( $title, moviebag, bookbag,songbag, k, dictionary } sub base_probability() {# ( $k,@listofbags ) { # return base probability for class represented as the first bag # this is a number between 0 and 1 my $k = shift; my @listofbags = @_; my $total = 0; foreach my $bag (@listofbags) { $total += $bag->itemCount(); } my $probability = ( $listofbags[0]->itemCount() + $k)/ ( $total + $k * @listofbags ); return $probability; } sub wordprob {#( $word, $bag, $dictionary, $k) { #Give probability of a particular word in a bag my $word = shift; my $bag = shift; my $prob = shift; my $k = shift; my $prob = ($bag->instanceCount($word) + $k )/ ($bag->bagSize() + $k*$dictionary->size() ); return $prob; } sub titleprob{ #( $title, $bag , $baseprobability, $k, $dictionary){ # Given a base probability and a bag calculate the particular prob of the title. # my $title = shift; my $bag = shift; my $bprob = shift; my $k = shift; my $dictionary = shift; #split up title into words my @words = split / /, $title; my $prob = $bprob; foreach my $word (@words) { $prob *= wordprob($word, $bag, $dictionary, $k); } return $prob; } sub prompt { printf"movie %.4f book %.4f song %.4f \n", $movieprob, $bookprob, $songprob; printf "There are %d words in the dictionary\n", $dictionary->size(); print "Enter a title. (Enter 'q' to quit) :"; } ############################################################################## #### Data structures package bag; BEGIN{ use strict; $| = 1; #unbuffered screen output. } ## Bags sub new { my $class = shift; my $title = shift; my $self = {}; bless $self, $class; if ( defined $title ) { addItem ($title);} return $self; } ###Ops sub addItem(){ # addItem ('Title') # Accept a title, and put words into bag my $self = shift; my $item = shift; $self->{TITLECOUNT} +=1; #chomp it chomp($item); #clear punctuation $item =~ s/[-:?.]|\// /g; my @wordcollection = split /\s+/,$item; foreach my $word (@wordcollection) { $self->{BAGSIZE} +=1; $self->{BAG}{uc ($word)} +=1; } } sub instanceCount(){ # instanceCount('Word') # returns the number of times a word occurs in a bag. my $self = shift; my $word = shift; return $self->{BAG}{uc($word)}; } sub setSize(){ # setSize() # Returns the number of unique elements in bag. my $self = shift; return scalar keys (%{$self->{BAG}}); } sub bagSize(){ # bagSize() # Returns the number of words in bag counting all duplicates. my $self = shift; return $self->{BAGSIZE}; } sub itemCount() { #return how many titles were added to the bag. my $self = shift; return $self->{TITLECOUNT}; } 1; ############################################################################## package Dictionary; use Data::Dumper; import bag; BEGIN{ use strict; $| = 1; #unbuffered screen output. } sub new () { my $class = shift; my $self = {}; bless $self, $class; my @bags = @_; if (defined @bags) { $self->addBags(@bags); } bless $self,$class; } ## Dictionary sub addBags( @bagName ){ my $self = shift; my @bags = @_; my %dictionary = (); #sort by set size @bags = sort { $b->setSize()<=> $a->setSize()} @bags; foreach my $bag (@bags) { %dictionary = ( %{$bag->{BAG}}, %dictionary); #print Dumper %dictionary; } $self->{DICTIONARY} = \%dictionary; } sub size(){ # setSize() # Returns the number of unique elements in bag. my $self = shift; my %dictionary = %{$self->{DICTIONARY}}; # remove empty string key created by concat operation #delete $self->{DICTIONARY}{''}; return scalar keys (%dictionary); } 1;