Beefy Boxes and Bandwidth Generously Provided by pair Networks
We don't bite newbies here... much
 
PerlMonks  

A little cleanup ...

by pablopelos (Initiate)
on Nov 04, 2011 at 06:09 UTC ( #935837=note: print w/ replies, xml ) Need Help??


in reply to Naive Bayes Classifier Using Laplacian Smoothing

A little code cleanup goes a long way. When your were adding to your dictionary it wasn't adding the total count of each word it was just over-riding with the new count. Put in the quicker way to calculate the dictionary size. Also the word count on a bag was not returning a zero if the word did not occur in that bag leading to uninitialized errors.

#!/usr/local/bin/perl -w # Using Info from AI class implement a classifier that learns using Na +ive Bayes with Laplace # Smoothing to guess whether a title is a book song or movie. # Store wrong issues as training data in books.txt, movies.txt, and so +ngs.txt use strict; use Data::Dumper; import bag; import Dictionary; # training Data my $bookbag = new bag(); $bookbag->addItem("Small Business Kit For Fools"); $bookbag->addItem("Small Business Kit For Dummies"); $bookbag->addItem("S-Corporation: Small Business Start-Up Kit"); $bookbag->addItem("Home Business Tax Deductions"); $bookbag->addItem("Essential Technical Analysis"); $bookbag->addItem("Technical Analysis of the Markets"); $bookbag->addItem("Introduction to probability and statistics"); $bookbag->addItem("Differential Equations"); $bookbag->addItem("Principles of Physics"); $bookbag->addItem("Perl Testing, A developer's notebook"); $bookbag->addItem("BreakAWAY Careers"); $bookbag->addItem("A Game of Thrones"); $bookbag->addItem("The Cleric Quintet"); $bookbag->addItem("Exile's Song"); $bookbag->addItem("Honor's War"); my $moviebag = new bag(); $moviebag->addItem("Star Wars Episode IV: A New Hope"); $moviebag->addItem("Shrek"); $moviebag->addItem("Rocky"); $moviebag->addItem("Shrek II"); $moviebag->addItem("Rocky II"); $moviebag->addItem("Star Trek"); $moviebag->addItem("Star Trek II: The Wrath of Khan"); $moviebag->addItem("Independence Day"); $moviebag->addItem("I know what you did last summer"); $moviebag->addItem("A Nightmare on Elm Street"); $moviebag->addItem("Transformers"); $moviebag->addItem("Pirates of The Carribean II: The Black Pearl"); $moviebag->addItem("A Cinderella Story"); $moviebag->addItem("The Never Ending Story"); my $songbag = new bag(); $songbag->addItem("Short Skirt /Long Jacket"); $songbag->addItem("My name is Jonas"); $songbag->addItem("Wild Wild Life"); $songbag->addItem("You can call Me Al"); $songbag->addItem("If you only Knew"); $songbag->addItem("Sweet Sacrifice"); $songbag->addItem("Tear Away"); $songbag->addItem("Taking Care of Business Business"); my $dictionary = new Dictionary( $bookbag, $moviebag, $songbag ); #printf " Size of dictionary is :%d \n", $dictionary->size(); #print Dumper $dictionary; #die; #### Laplace Smoothing Co-efficient. our $k = 7; our $input =''; my $bookprob = base_probability( $k, $bookbag, $moviebag, $songb +ag); my $movieprob = base_probability( $k, $moviebag, $songbag, $bookb +ag); my $songprob = base_probability( $k, $songbag, $moviebag, $bookb +ag); prompt(); $input = <STDIN>; chomp ($input); while ( $input ne 'q') { ####DO my %choice = (); # calc Probability my $m = titleprob($input, $moviebag, $movieprob, $k, $dictionary) +; my $bk = titleprob($input, $bookbag, $movieprob, $k, $dictionary); my $s = titleprob($input, $songbag, $movieprob, $k, $dictionary); $choice{MOVIE} = $m / ( $m + $bk + $s ); $choice{BOOK} = $bk / ( $m + $bk + $s ); $choice{SONG} = $s / ( $m + $bk + $s ); #select the most likely my @sortedchoice = sort {$choice{$b} <=> $choice{$a} }( keys %cho +ice ); printf "\n\nThat title is probaby a %s with a %d %% confidence \n +", $sortedchoice[0], $choice{$sortedchoice[0]} *100; CHECK: print "Correct? (y/yes) \nif not enter b if it should be a book,\ +n\t m if it should be a movie and\n\t s if it should be a son +g\n"; my $check = <STDIN>; chomp($check); if ($check =~ m/[bms]/i){ print " Noted correction\n"; # add title to appropriate bag switch: { $check eq 'b' && do { $bookbag->addItem($input); last swi +tch;}; $check eq 'm' && do { $moviebag->addItem($input);last swi +tch;}; $check eq 's' && do { $songbag->addItem($input);last swit +ch;}; } # re-create dictionary #print Dumper $bookbag; $dictionary = new Dictionary( $moviebag, $songbag, $bookbag ); #print Dumper $dictionary; #recalculate base probs $bookprob = base_probability( $k, $bookbag, $moviebag, $ +songbag); $movieprob = base_probability( $k, $moviebag, $songbag, $ +bookbag); $songprob = base_probability( $k, $songbag, $moviebag, $ +bookbag); } elsif ($check =~ m/yes|y/i){ print "Cool!\n" } else{ goto CHECK; } ####Stuff prompt(); $input = <STDIN>; chomp ($input); last if ( $input eq 'q' ); } ###################################################################### sub final_probability { # ( $title, moviebag, bookbag,songbag, k, dict +ionary } sub base_probability { # return base probability for class represented as the first bag # this is a number between 0 and 1 my ($k, @listofbags) = @_; my $total = 0; foreach my $bag (@listofbags) { $total += $bag->itemCount(); } my $probability = ( $listofbags[0]->itemCount() + $k)/ ( $total + +$k * @listofbags ); return $probability; } sub wordprob { # Give probability of a particular word in a bag my ($word, $bag, $dictionary, $k) = @_; my $prob = ($bag->instanceCount($word) + $k ) / ($bag->bagSize() + + $k*$dictionary->size() ); return $prob; } sub titleprob { # Given a base probability and a bag calculate the particular prob + of the title. my ($title, $bag, $bprob, $k, $dictionary) = @_; #split up title into words my @words = split(/\s/,$title); my $prob = $bprob; foreach my $word (@words) { $prob *= wordprob($word, $bag, $dictionary, $k); } return $prob; } sub prompt { printf("movie %.4f book %.4f song %.4f \n", $movieprob, $bookpro +b, $songprob); printf("There are %d words in the dictionary\n", $dictionary->size +()); print "Enter a title. (Enter 'q' to quit) :"; } ###################################################################### +######## #### Data structures package bag; BEGIN{ use strict; $| = 1; #unbuffered screen output. } # Bag sub new { my $class = shift; my $title = shift; my $self = {}; bless $self, $class; if ( defined $title ) { addItem ($title);} return $self; } # Bag Ops sub addItem { # addItem ('Title') # Accept a title, and put words into bag my $self = shift; my $item = shift; $self->{TITLECOUNT} +=1; #chomp it chomp($item); #clear punctuation $item =~ s/[-:?.]|\// /g; my @wordcollection = split /\s+/,$item; foreach my $word (@wordcollection) { $self->{BAGSIZE} +=1; $self->{BAG}{uc($word)} +=1; } } sub instanceCount { # instanceCount('Word') # returns the number of times a word occurs in a bag. my $self = shift; my $word = shift; return $self->{BAG}{uc($word)} || 0 ; } sub setSize { # setSize() # Returns the number of unique elements in bag. my $self = shift; return scalar keys (%{$self->{BAG}}); } sub bagSize { # bagSize() # Returns the number of words in bag counting all duplicates. my $self = shift; return $self->{BAGSIZE}; } sub itemCount { #return how many titles were added to the bag. my $self = shift; return $self->{TITLECOUNT}; } 1; ###################################################################### +######## package Dictionary; use Data::Dumper; import bag; BEGIN{ use strict; $| = 1; #unbuffered screen output. } # Dictionary sub new { my $class = shift; my $self = {}; bless $self, $class; # bags are @_ if (@_) { $self->addBags(@_); } bless $self,$class; } # Dictionary Ops sub addBags { my $self = shift; my @bags = @_; my %dictionary = (); #sort by set size @bags = sort { $b->setSize()<=> $a->setSize()} @bags; foreach my $bag (@bags) { foreach (keys %{$bag->{BAG}}) { $dictionary{$_} += $bag->{BAG}->{$_}; } #print Dumper(\%dictionary); } $self->{DICTIONARY} = \%dictionary; } sub size { #my $self = shift; return scalar keys %{shift()->{DICTIONARY}}; } 1;


Comment on A little cleanup ...
Download Code

Log In?
Username:
Password:

What's my password?
Create A New User
Node Status?
node history
Node Type: note [id://935837]
help
Chatterbox?
and the web crawler heard nothing...

How do I use this? | Other CB clients
Other Users?
Others taking refuge in the Monastery: (12)
As of 2014-10-01 21:48 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    What is your favourite meta-syntactic variable name?














    Results (39 votes), past polls