Beefy Boxes and Bandwidth Generously Provided by pair Networks
Your skill will accomplish
what the force of many cannot
 
PerlMonks  

Spam Filter

by ncw (Friar)
on Apr 20, 2001 at 03:02 UTC ( #74023=sourcecode: print w/ replies, xml ) Need Help??

Category: E-Mail Programs
Author/Contact Info Nick Craig-Wood <ncw@axis.demon.co.uk>
Description: A utility which combined with a procmail filter will ditch spam for you using the Realtime Blackhole List or any compatible service.

This code provides a nice example of how to do simultaneous lookups with Net::DNS.

Note that there is a CPAN module to do part of this Mail::RBL but it doesn't parse mail messages for you and it doesn't do multiple IP lookups simultaneously.

See the start of the code for some more details and a procmail recipe.

Note that you need to be online when this program is running which you probably will be if you use fetchmail or sendmail directly.

#!/usr/bin/perl
#
# Take a mail message on STDIN or as a file argument and parse out all
# IP addresses in Received: headers.  These are then looked up in the
# Realtime Black Hole spam filter (or similar service) and if any are
# found then the programs prints a 1 to stdout otherwise it returns a
# 0
#
# Use this in a procmail .procmailrc file like this to filter all
# spam into a mailbox
#
# ISSPAM=`/path/to/rbl-test.pl`
#
# :0
# * ISSPAM ?? [1-9]
# {
#     # Add a spam detected header
#     :0 fw
#     | formail -A "X-Spam: $ISSPAM"
#
#     :0:
#     spambox
# }
#
# Takes -d to print extra stuff when debugging
#
# by Nick Craig-Wood <ncw@axis.demon.co.uk>

use strict;
use Net::DNS;
use Getopt::Std;
$^W=1;                          # use this instead of -w to silence wa
+rnings from Net::DNS

# You can put other servers in here as desired
#
# The commented out ones are too vicious for my taste - experiment by
# all means!

my @servers = (
    'blackholes.mail-abuse.org',# reported spammers
#   'dialups.mail-abuse.org',   # dialup users
    'relays.mail-abuse.org',    # open relays
#   'inputs.orbs.org',          # single stage relay filtering 
#   'outputs.orbs.org',         # immediate filtering of multihop rela
+ys.
#   'delayed-outputs.orbs.org', # delayed immediate filtering of multi
+hop relays.
);

# Ip addresses we should ignore here - the private ones
my $IGNORE = qr{
    ^(?:
        (?: 1 \. ) |
        (?: 10 \. ) |
        (?: 172 \. (?:1[6-9]|2\d|3[01]) \. ) |
        (?: 192 \. 168 \. ) |
        (?: 0 \. 0 \. 0 \. 0 $ ) |
        (?: 127 \. 0 \. 0 \. 1 $ )
    )
}x;

my $opt = {};
getopts("d", $opt);
my $DEBUG = $opt->{d};

# Parse the header out of the email joining continued header lines as
# necessary and stopping at the end of the header

my $header = "";
while (<>)
{
    chomp;
    last if $_ eq "";           # end of header
    $header .= "\n" unless /^\s+/;
    $header .= $_;
}
$header .= "\n";

# Parse the IP addresses out of the header

my $octet = qr{(?:\d|(?:[1-9]|1\d|2[0-4])\d|25[0-5])};
my $ip_addr = qr{$octet\.$octet\.$octet\.$octet};
my %ips;

while ($header =~ m/^Received:\s*(.*)$/mg)
{
    my $received = $1;
    while ($received =~ /\b($ip_addr)(?=\b)/og)
    {
        my $ip = $1;
        if ($ip =~ /$IGNORE/)
        {
            print "Ignoring ip '$ip'\n" if $DEBUG;
        }
        else
        {
            print "Found ip: '$ip'\n" if $DEBUG;
            $ips{$ip}++;
        }
    }
}

# Now test the ip addresses

my @blocked = query_ip_addresses(sort keys %ips);
if (@blocked)
{
    print "Blacklisted IP addresses found\n" if $DEBUG;
    print join(", ", @blocked), "\n";
}
else
{
    print "No bad IPs found - all OK\n" if $DEBUG;
    print "0\n";
}

exit(0);


############################################################
# Query the list of IP addresses in parallel
# This speeds up the checker greatly
############################################################

sub query_ip_addresses
{
   my (@ip_addresses) = @_;
   my ($RETRIES) = 2;           # try sending each packet this many ti
+mes
   my ($TIMEOUT) = 5;           # max time for all queries to come bac
+k
   my ($DTIMEOUT) = 0.1;        # time to poll for each query
   my ($retry_interval) = $TIMEOUT / $RETRIES;
   my ($i);
   my (@sock);
   my @blocked;

   # Produce a list of input names to look up
   my (@input) = map
   {
       my $revip = join(".", reverse split /\./, $_);
       map { "$revip.$_." } @servers;
   } @ip_addresses;
   my (@desc) = map
   {
       my $ip = $_;
       map { "$ip in $_" } @servers;
   } @ip_addresses;
   print "querying:\n", map { "  $_\n" } @input if $DEBUG;

   my ($dns) = new Net::DNS::Resolver;
   $dns->recurse(1);            # Do recurse
   $dns->dnsrch(0);             # Ignore the search list
   $dns->defnames(0);           # Don't append stuff to the end if no 
+trailing .
   #$dns->debug(1);

   # Create the background queries
   @sock = map { $dns->bgsend($_, "ANY") } @input;

   my ($retry_at) = $retry_interval;
   for (my $timeout = 0; $timeout < $TIMEOUT && scalar(grep { $_ } @so
+ck); $timeout += $DTIMEOUT)
   {
       print "{TRY}\n" if $DEBUG;
       select(undef, undef, undef, $DTIMEOUT); # sleep for a short tim
+e

       if ($timeout > $retry_at)
       {
           # destroy the sockets and remake them
           for ($i = 0; $i < @sock; $i++)
           {
               next unless $sock[$i];
               $sock[$i] = undef;       # destroy socket
               $sock[$i] = $dns->bgsend($input[$i], "ANY");
               print "{RETRY $input[$i]}\n" if $DEBUG;
           }
           $retry_at += $retry_interval; # reset the retry timeer
       }

       for ($i = 0; $i < @sock; $i++)
       {
           my $sock = $sock[$i];
           my $input = $input[$i];
           next unless $sock && $dns->bgisready($sock);
           my $query = $dns->bgread($sock);
           $sock[$i] = undef;   # destroy the socket

           if ($query)
           {
               print "$input[$i] answer received\n" if $DEBUG;
               foreach my $rr ($query->answer)
               {
                   next unless $rr->type eq "A";
                   print "**** Blacklisted IP found $desc[$i] (", $rr-
+>type, " => ", $rr->address ,")\n" if $DEBUG;
                   push @blocked, $desc[$i];
               }
           }
           else
           {
               print "query failed: ", $dns->errorstring, "\n" if $DEB
+UG;
           }
       }
   }
   
   # destroy any unused sockets - these are timeouts
   for ($i = 0; $i < @sock; $i++)
   {
       next unless $sock[$i];
       print "Timeout on: $input[$i]\n" if $DEBUG;
       $sock[$i] = undef;       # destroy socket
   }
   
   return @blocked;
}

Comment on Spam Filter
Download Code
Re: Spam Filter
by Anonymous Monk on Apr 20, 2001 at 21:47 UTC

    You want Mail::Audit, you do.

      Mail::Audit looks interesting but it would mean ditching procmail which I'd personally find unacceptable.

      I know procmail works, it integrates nicely with sendmail and it never loses mail. Wheras Mail::Audit seems to require being put in a .forward filter which isn't ideal (I think your mail will bounce if your program dies).

      Actually you could run Mail::Audit from procmail fairly easily which would probably give the best of both worlds - procmail's facility as a local delivery agent for sendmail with Mail::Audit's perl user friendliness!

      I know procmail's syntax is a little arcane - but hey this is Perlmonks - we're used to arcane Syntax!

Back to Code Catacombs

Log In?
Username:
Password:

What's my password?
Create A New User
Node Status?
node history
Node Type: sourcecode [id://74023]
help
Chatterbox?
and the web crawler heard nothing...

How do I use this? | Other CB clients
Other Users?
Others scrutinizing the Monastery: (17)
As of 2014-12-18 18:52 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    Is guessing a good strategy for surviving in the IT business?





    Results (59 votes), past polls