Re^3: arabic alphabet ... how to deal with?

in reply to Re^2: arabic alphabet ... how to deal with?
in thread arabic alphabet ... how to deal with?

What about the infile? That needs to be opened UTF-8 as well.

-derby

Comment on Re^3: arabic alphabet ... how to deal with?

Replies are listed 'Best First'.
Re^4: arabic alphabet ... how to deal with? by Anonymous Monk on Feb 12, 2009 at 16:37 UTC
It is uft8 as well ... It does not change the results but also it gives some errors as well regarding the wide character ... even i change my code to this: #!/usr/bin/perl use Lingua::AR::Word::Encode; use Encode::Arabic; open (STOPWORDS, $ARGV[1]) \|\| die "Error opening the stopwords file\n" +; $count = 0; while ($word = <STOPWORDS>) { $word=Lingua::AR::Word::encode($word); chop($word); $stopword[$count] = lc($word); $count++; } close(STOPWORDS); open (INFILE , $ARGV[0]) \|\| die "Error opening the input file\n"; while ($line = <INFILE>) { $line=Lingua::AR::Word::encode($line); chop($line); @entry = split(/ /, $line); $i = 0; while ($entry[$i]) { $found = 0; $j = 0; while (($j<=$count) && ($found==0)) { if (lc($entry[$i]) eq $stopword[$j]) { $found = 1; } $j++; } if ($found == 0) { print "$entry[$i]\n "; } $i++; } } close(INFILE); [download] still does not work , it does not remove my stop words :(	[reply] [d/l]

Replies are listed 'Best First'.

Re^4: arabic alphabet ... how to deal with?
by Anonymous Monk on Feb 12, 2009 at 16:37 UTC

#!/usr/bin/perl
use Lingua::AR::Word::Encode;
use Encode::Arabic; 
open (STOPWORDS, $ARGV[1]) || die "Error opening the stopwords file\n"
+;

$count = 0;
while ($word = <STOPWORDS>)
{
$word=Lingua::AR::Word::encode($word);
    chop($word);
    $stopword[$count] = lc($word);
    $count++;
}
close(STOPWORDS);

open (INFILE , $ARGV[0]) || die "Error opening the input file\n";
while ($line = <INFILE>)
{
$line=Lingua::AR::Word::encode($line);
    chop($line);
    @entry = split(/ /, $line);    
    $i = 0;
    while ($entry[$i])    
    {
        $found = 0;
        $j = 0;
        while (($j<=$count) && ($found==0))
        {
            if (lc($entry[$i]) eq $stopword[$j])
            {
                $found = 1;
            }
            $j++;
        }
        if ($found == 0)
        {
print "$entry[$i]\n ";
        }
        $i++;
    }
}
close(INFILE);
[download]

[reply]
[d/l]

In Section Seekers of Perl Wisdom