#!/usr/bin/perl -w
use strict;
my $dir = 'C:\Users\ZB\Desktop\Text Files';
opendir (DIR, $dir) or die "cannot opendir $dir";
foreach my $file (readdir(DIR))
{
next if $file eq '.';
next if $file eq '..';
process_file ($file);
}
sub process_file
{
my $fname=join("/",@_);
open FIN,"< $fname" or die "$fname:$!";
my @lines=<FIN>;
chomp @lines;
close FIN;
my $count=0;
foreach my $line(@lines)
{
map { $count++ } split (/[\s\t\n]+/,$line);
}
printf "There are %d words in %s\n",$count,$fname;
}
closedir (DIR);
it works just fine, but when I try using it in a bigger program it gives me the ":No such file or directory found at lingo4.pl line 508" error?
#!/usr/bin/perl -w
use strict;
open (INPUT2, "<korpusENG.txt") or die "can't open";
open (INPUT3, "<korpusAFR.txt") or die "can't open";
open (INPUT4, "<korpusNDB.txt") or die "can't open";
open (INPUT5, "<korpusSEP.txt") or die "can't open";
open (INPUT6, "<korpusSIS.txt") or die "can't open";
open (INPUT7, "<korpusSOT.txt") or die "can't open";
open (INPUT8, "<korpusTSO.txt") or die "can't open";
open (INPUT9, "<korpusTSW.txt") or die "can't open";
open (INPUT10, "<korpusVEN.txt") or die "can't open";
open (INPUT11, "<korpusXHO.txt") or die "can't open";
open (INPUT12, "<korpusZUL.txt") or die "can't open";
################################################################
my %EN;
my %AFR;
my %NDB;
my %SEP;
my %SIS;
my %SOT;
my %TSO;
my %TSW;
my %VEN;
my %XHO;
my %ZUL;
my $dir = 'C:\Users\ZB\Desktop\Text Files';
opendir (DIR, $dir) or die "cannot opendir $dir";
while (<INPUT2>) #sit Eng korpus in hash
{
my $word = $_;
chomp $word;
$EN{$word}++;
}
while (<INPUT3>) #sit Afr korpus in hash
{
my $word = $_;
chomp $word;
$AFR{$word}++;
}
while (<INPUT4>) #sit ndbele korpus in hash
{
my $word = $_;
chomp $word;
$NDB{$word}++;
}
while (<INPUT5>) #sit sepedi korpus in hash
{
my $word = $_;
chomp $word;
$SEP{$word}++;
}
while (<INPUT6>) #sit siswati korpus in hash
{
my $word = $_;
chomp $word;
$SIS{$word}++;
}
while (<INPUT7>) #sit sotho korpus in hash
{
my $word = $_;
chomp $word;
$SOT{$word}++;
}
while (<INPUT8>) #sit tsonga korpus in hash
{
my $word = $_;
chomp $word;
$TSO{$word}++;
}
while (<INPUT9>) #sit tswana korpus in hash
{
my $word = $_;
chomp $word;
$TSW{$word}++;
}
while (<INPUT10>) #sit venda korpus in hash
{
my $word = $_;
chomp $word;
$VEN{$word}++;
}
while (<INPUT11>) #sit xhosa korpus in hash
{
my $word = $_;
chomp $word;
$XHO{$word}++;
}
while (<INPUT12>) #sit zulu korpus in hash
{
my $word = $_;
chomp $word;
$ZUL{$word}++;
}
#############################################################
print "To classify a single file, press 1\nTo work within a directory,
+ press 2\n";
my $input = <STDIN>;
chomp($input);
while(&validate($input)==0)
{
print "Incorrect, please try again\n";
$input = <STDIN>;
chomp($input);
}
if ($input == 1)
{
&doc();
}
else
{
print "To classify each document within the directory, press 1\nTo
+ search for all the files of a specific language wihtin the directory
+, press 2\n";
my $input2 = <STDIN>;
chomp($input2);
while(&validate($input2)==0)
{
print "Incorrect, please try again\n";
$input2 = <STDIN>;
chomp($input2);
}
if ($input2 == 1)
{
foreach my $file (readdir(DIR))
{
next if $file eq '.';
next if $file eq '..';
&folder($file);
while(&folder()== 1)
{
print "The $file is ENGLISH";
}
while(folder()== 2)
{
print "The $file is AFRIKAANS";
}
while(folder()== 3)
{
print "The $file is NDEBELE";
}
while(folder()== 4)
{
print "The $file is SEPEDI";
}
while(folder()== 5)
{
print "The $file is SISWATI";
}
while(folder()== 6)
{
print "The $file is SOTHO";
}
while(folder()== 7)
{
print "The $file is TSONGA";
}
while(folder()== 8)
{
print "The $file is TSWANA";
}
while(folder()== 9)
{
print "The $file is VENDA";
}
while(folder()== 10)
{
print "The $file is XHOSA";
}
while(folder()== 11)
{
print "The $file is ZULU";
}
}
}
else
{
print "Search for\n\n1)English\n2)Afrikaans\n3)Ndebele\n4)Sepe
+di\n5)Siswati\n6)Sotho\n7)Tsonga\n8)Tswana\n9)Venda\n10)Xhosa\n11)Zul
+u\n\nPress the number\n";
my $input3 = <STDIN>;
chomp($input3);
while(&validate2($input3)==0)
{
print "Incorrect, please try again\n";
$input3 = <STDIN>;
chomp($input3);
}
}
}
############################################################# sub1
sub doc
{
print "Please type the name of the file you wish to classify\n";
my $filename = <STDIN>;
chomp ($filename);
open (INPUT, "<$filename") or die "can't open";
my @words; #array met input in
my @words2; #array met korrekte input in
my %hash;
while (<INPUT>)
{
my $word = $_;
chomp $word;
push @words, split(/ /, $word);
}
foreach my $correct (@words) #sit net woorde in @words2, niks ander
+ funny goed nie
{
if ($correct =~ /^\w+$/)
{
push (@words2, lc $correct);
}
}
foreach my $val (@words2) #sit input vanaf 2 array in hash in (fre
+kwensie lys)
{
$hash{$val}++;
}
#################################################### #aantal wo
+orde in hash
my $total = 0;
foreach my $keys (keys %hash)
{
$total = $total + $hash{$keys};
}
###################################################################
+ENGELS
my $varEN = 0;
foreach my $keys (keys %hash)
{
foreach my $key (keys %EN)
{
if ($keys eq $key)
{
$varEN = $varEN + $hash{$keys};
}
}
}
my $percentileEN = ($varEN/$total)*100;
###################################################################
+AFRIKAANS
my $varAFR = 0;
foreach my $keys (keys %hash)
{
foreach my $afr (keys %AFR)
{
if ($keys eq $afr)
{
$varAFR = $varAFR + $hash{$keys};
}
}
}
my $percentileAFR = ($varAFR/$total)*100;
####################################################################
+ NDEBELE
my $varNDB = 0;
foreach my $keys (keys %hash)
{
foreach my $ndb (keys %NDB)
{
if ($keys eq $ndb)
{
$varNDB = $varNDB + $hash{$keys};
}
}
}
my $percentileNDB = ($varNDB/$total)*100;
####################################################################
+SEPEDI
my $varSEP = 0;
foreach my $keys (keys %hash)
{
foreach my $sep (keys %SEP)
{
if ($keys eq $sep)
{
$varSEP = $varSEP + $hash{$keys};
}
}
}
my $percentileSEP = ($varSEP/$total)*100;
####################################################################
+SISWATI
my $varSIS = 0;
foreach my $keys (keys %hash)
{
foreach my $sis (keys %SIS)
{
if ($keys eq $sis)
{
$varSIS = $varSIS + $hash{$keys};
}
}
}
my $percentileSIS = ($varSIS/$total)*100;
####################################################################
+SOTHO
my $varSOT = 0;
foreach my $keys (keys %hash)
{
foreach my $sot (keys %SOT)
{
if ($keys eq $sot)
{
$varSOT = $varSOT + $hash{$keys};
}
}
}
my $percentileSOT = ($varSOT/$total)*100;
##################################################################
+TSONGA
my $varTSO = 0;
foreach my $keys (keys %hash)
{
foreach my $tso (keys %TSO)
{
if ($keys eq $tso)
{
$varTSO = $varTSO + $hash{$keys};
}
}
}
my $percentileTSO = ($varTSO/$total)*100;
##################################################################
+TSWANA
my $varTSW = 0;
foreach my $keys (keys %hash)
{
foreach my $tsw (keys %TSW)
{
if ($keys eq $tsw)
{
$varTSW = $varTSW + $hash{$keys};
}
}
}
my $percentileTSW = ($varTSW/$total)*100;
##################################################################
+VENDA
my $varVEN = 0;
foreach my $keys (keys %hash)
{
foreach my $ven (keys %VEN)
{
if ($keys eq $ven)
{
$varVEN = $varVEN + $hash{$keys};
}
}
}
my $percentileVEN = ($varVEN/$total)*100;
##################################################################
+XHOSA
my $varXHO = 0;
foreach my $keys (keys %hash)
{
foreach my $xho (keys %XHO)
{
if ($keys eq $xho)
{
$varXHO = $varXHO + $hash{$keys};
}
}
}
my $percentileXHO = ($varXHO/$total)*100;
###################################################################
+ZULU
my $varZUL = 0;
foreach my $keys (keys %hash)
{
foreach my $zul (keys %ZUL)
{
if ($keys eq $zul)
{
$varZUL = $varZUL + $hash{$keys};
}
}
}
my $percentileZUL = ($varZUL/$total)*100;
########################################################## Deter
+mine lingo through percentiles
my $highest = $percentileEN;
if ($percentileAFR > $highest)
{
$highest = $percentileAFR;
}
if ($percentileNDB > $highest)
{
$highest = $percentileNDB;
}
if ($percentileSEP > $highest)
{
$highest = $percentileSEP;
}
if ($percentileSIS > $highest)
{
$highest = $percentileSIS;
}
if ($percentileSOT > $highest)
{
$highest = $percentileSOT;
}
if ($percentileTSO > $highest)
{
$highest = $percentileTSO;
}
if ($percentileTSW > $highest)
{
$highest = $percentileTSW;
}
if ($percentileVEN > $highest)
{
$highest = $percentileVEN;
}
if ($percentileXHO > $highest)
{
$highest = $percentileXHO;
}
if ($percentileZUL > $highest)
{
$highest = $percentileZUL;
}
#############################################
if ($highest == $percentileEN)
{
print "This document is ENGLISH\n";
}
elsif ($highest == $percentileAFR)
{
print "This document is AFRIKAANS\n";
}
elsif ($highest == $percentileNDB)
{
print "This document is NDEBELE\n";
}
elsif ($highest == $percentileSEP)
{
print "This document is SEPEDI\n";
}
elsif ($highest == $percentileSIS)
{
print "This document is SISWATI\n";
}
elsif ($highest == $percentileSOT)
{
print "This document is SOTHO\n";
}
elsif ($highest == $percentileTSO)
{
print "This document is TSONGA\n";
}
elsif ($highest == $percentileTSW)
{
print "This document is TSWANA\n";
}
elsif ($highest == $percentileVEN)
{
print "This document is VENDA\n";
}
elsif ($highest == $percentileXHO)
{
print "This document is XHOSA\n";
}
elsif ($highest == $percentileZUL)
{
print "This document is ZULU\n";
}
}
######################################################################
+############################
sub folder
{
my $fname = join("/",@_);
open FIN,"< $fname" or die "$fname:$!";
my @words =<FIN>;
chomp @words;
close FIN;
my @words2;
my @words3;
my %hash;
foreach my $line(@words)
{
@words2 = split(/[\s\t\n]+/,$line);
}
foreach my $correct(@words2)
{
if ($correct =~ /^\w+$/)
{
push (@words3, lc $correct);
}
}
foreach my $val (@words3) #sit input vanaf 2 array in hash in (fre
+kwensie lys)
{
$hash{$val}++;
}
######################################################
my $total = 0;
foreach my $keys (keys %hash)
{
$total = $total + $hash{$keys};
}
###################################################################
+ENGELS
my $varEN = 0;
foreach my $keys (keys %hash)
{
foreach my $key (keys %EN)
{
if ($keys eq $key)
{
$varEN = $varEN + $hash{$keys};
}
}
}
my $percentileEN = ($varEN/$total)*100;
###################################################################
+AFRIKAANS
my $varAFR = 0;
foreach my $keys (keys %hash)
{
foreach my $afr (keys %AFR)
{
if ($keys eq $afr)
{
$varAFR = $varAFR + $hash{$keys};
}
}
}
my $percentileAFR = ($varAFR/$total)*100;
####################################################################
+ NDEBELE
my $varNDB = 0;
foreach my $keys (keys %hash)
{
foreach my $ndb (keys %NDB)
{
if ($keys eq $ndb)
{
$varNDB = $varNDB + $hash{$keys};
}
}
}
my $percentileNDB = ($varNDB/$total)*100;
####################################################################
+SEPEDI
my $varSEP = 0;
foreach my $keys (keys %hash)
{
foreach my $sep (keys %SEP)
{
if ($keys eq $sep)
{
$varSEP = $varSEP + $hash{$keys};
}
}
}
my $percentileSEP = ($varSEP/$total)*100;
####################################################################
+SISWATI
my $varSIS = 0;
foreach my $keys (keys %hash)
{
foreach my $sis (keys %SIS)
{
if ($keys eq $sis)
{
$varSIS = $varSIS + $hash{$keys};
}
}
}
my $percentileSIS = ($varSIS/$total)*100;
####################################################################
+SOTHO
my $varSOT = 0;
foreach my $keys (keys %hash)
{
foreach my $sot (keys %SOT)
{
if ($keys eq $sot)
{
$varSOT = $varSOT + $hash{$keys};
}
}
}
my $percentileSOT = ($varSOT/$total)*100;
##################################################################
+TSONGA
my $varTSO = 0;
foreach my $keys (keys %hash)
{
foreach my $tso (keys %TSO)
{
if ($keys eq $tso)
{
$varTSO = $varTSO + $hash{$keys};
}
}
}
my $percentileTSO = ($varTSO/$total)*100;
##################################################################
+TSWANA
my $varTSW = 0;
foreach my $keys (keys %hash)
{
foreach my $tsw (keys %TSW)
{
if ($keys eq $tsw)
{
$varTSW = $varTSW + $hash{$keys};
}
}
}
my $percentileTSW = ($varTSW/$total)*100;
##################################################################
+VENDA
my $varVEN = 0;
foreach my $keys (keys %hash)
{
foreach my $ven (keys %VEN)
{
if ($keys eq $ven)
{
$varVEN = $varVEN + $hash{$keys};
}
}
}
my $percentileVEN = ($varVEN/$total)*100;
##################################################################
+XHOSA
my $varXHO = 0;
foreach my $keys (keys %hash)
{
foreach my $xho (keys %XHO)
{
if ($keys eq $xho)
{
$varXHO = $varXHO + $hash{$keys};
}
}
}
my $percentileXHO = ($varXHO/$total)*100;
###################################################################
+ZULU
my $varZUL = 0;
foreach my $keys (keys %hash)
{
foreach my $zul (keys %ZUL)
{
if ($keys eq $zul)
{
$varZUL = $varZUL + $hash{$keys};
}
}
}
my $percentileZUL = ($varZUL/$total)*100;
########################################################## Deter
+mine lingo through percentiles
my $highest = $percentileEN;
if ($percentileAFR > $highest)
{
$highest = $percentileAFR;
}
if ($percentileNDB > $highest)
{
$highest = $percentileNDB;
}
if ($percentileSEP > $highest)
{
$highest = $percentileSEP;
}
if ($percentileSIS > $highest)
{
$highest = $percentileSIS;
}
if ($percentileSOT > $highest)
{
$highest = $percentileSOT;
}
if ($percentileTSO > $highest)
{
$highest = $percentileTSO;
}
if ($percentileTSW > $highest)
{
$highest = $percentileTSW;
}
if ($percentileVEN > $highest)
{
$highest = $percentileVEN;
}
if ($percentileXHO > $highest)
{
$highest = $percentileXHO;
}
if ($percentileZUL > $highest)
{
$highest = $percentileZUL;
}
if ($highest == $percentileEN)
{
return 1;
}
elsif ($highest == $percentileAFR)
{
return 2;
}
elsif ($highest == $percentileNDB)
{
return 3;
}
elsif ($highest == $percentileSEP)
{
return 4;
}
elsif ($highest == $percentileSIS)
{
return 5;
}
elsif ($highest == $percentileSOT)
{
return 6;
}
elsif ($highest == $percentileTSO)
{
return 7;
}
elsif ($highest == $percentileTSW)
{
return 8;
}
elsif ($highest == $percentileVEN)
{
return 9;
}
elsif ($highest == $percentileXHO)
{
return 10;
}
elsif ($highest == $percentileZUL)
{
return 11;
}
}
######################################################################
+########################################
sub validate
{
my $num = $_[0];
{
if ($num == 1 || $num == 2)
{
return 1;
}
return 0;
}
}
sub validate2
{
my $num = $_[0];
{
if ($num =~ /1-9/)
{
return 1;
}
return 0;
}
}
close (DIR);