#!/usr/bin/env perl 

use strict;
use warnings;
use utf8;
use Text::CSV;
use Carp;
use LWP::Simple qw(get);
use Text::Unidecode qw(unidecode);
use HTML::TreeBuilder::XPath;

# Email Settings
my %email = (
    to      => 'me@example.com,you@example.com',
    subject => 'New ECDS reviews found'
);

# Reviews subroutine and URLs to check
my $review_sites = [
    {
        site         => 'Yelp',
        sub          => \&yelp_checker,
        review_pages => {
            'EB' =>
'http://www.yelp.com/biz/encinitas-country-day-school-encinitas?sort_by=date_desc',
            'MA' =>
'http://www.yelp.com/biz/encinitas-country-day-school-encinitas-2?sort_by=date_desc'
        }
    },
    {
        site         => 'GreatSchools',
        sub          => \&gs_checker,
        review_pages => {
            'MA' =>
'http://www.greatschools.org/california/encinitas/9670-Encinitas-Country-Day-School/?tab=reviews'
        }
    },
    {
        site         => 'PrivateSchoolReview',
        sub          => \&psr_checker,
        review_pages => {
            'MA' =>
              'http://www.privateschoolreview.com/school_ov/school_id/2039'
        }
    },
    {
        site         => 'Kudzu',
        sub          => \&kudzu_checker,
        review_pages => {
            'MA' =>
              'http://www.kudzu.com/m/Encinitas-Country-Day-School-13571675'
        }
    },
    {
        site         => 'MerchantCircle',
        sub          => \&mc_checker,
        review_pages => {
            'MA' =>
'http://www.merchantcircle.com/business/Encinitas.Country.Day.School.760-942-1111?sort=created&dir=desc'
        }
    }
];

# Default date if no record
my $default_date = '00-00-0000';

# Month name to number conversion
my %month = (
    January   => '01',
    February  => '02',
    March     => '03',
    April     => '04',
    May       => '05',
    June      => '06',
    July      => '07',
    August    => '08',
    September => '09',
    October   => '10',
    November  => '11',
    December  => '12'
);

# Where is the reviews file?
my $reviews_filepath = "reviews.txt";

# Where is the alert message file?
my $msg_filepath = "msg.txt";

# Slurp hash from reviews file
my $old_reviews = hash_from_csv($reviews_filepath);

my %new_reviews;

# Iterate through each site
for my $review_site (@$review_sites) {
    my $pages = $review_site->{review_pages};

    # iterate through each campus html and collect xpath nodes
    while ( my ( $campus, $url ) = each %$pages ) {
        my $html = get $url or croak("Can't reach $url $!\n");
        $html =~ s/([^[:ascii:]]+)/unidecode($1)/ge;
        my $tree = HTML::TreeBuilder::XPath->new;
        $tree->parse($html) or croak("Parse failed: $!\n");
        my ($date) = $review_site->{'sub'}->($tree);

        # create hash keys from campus and review site names
        my $campus_site = $campus . '_' . $$review_site{'site'};
        push( @{ $new_reviews{$campus_site} }, $date );
        push( @{ $new_reviews{$campus_site} }, $url );
    }
}

# Write message if new reviews
my $msg = '';
while ( my ( $item, $data ) = each %new_reviews ) {
    unless ( $$old_reviews{$item}[0] eq $$data[0] ) {
        $msg .= "New review on $$data[0]: \n    $$data[1]\n";
    }
}

# Save message.
open my $fh, ">:encoding(utf8)", "$msg_filepath"
  or croak("cannot open $msg_filepath: $!");
print {$fh} $msg or croak("Can't print message:\n$msg\n$!");
close $fh;

# Write new review data to file.
hash_to_csv( \%new_reviews, $reviews_filepath );

# Email message if exists
send_email($msg) if length($msg);

######## SUBROUTINES #######

# import old data from file
sub hash_from_csv {
    my $filepath = shift;
    open my $fh, "<:encoding(utf8)", "$filepath"
      or croak("cannot open  $filepath: $!");
    my $csv = Text::CSV->new( { binary => 1 } );
    my %hash;
    map { $hash{ shift @{$_} } = $_ } @{ $csv->getline_all($fh) };
    close $fh;
    return \%hash;
}

# write new data to file
sub hash_to_csv {
    my ( $hash, $filepath ) = @_;
    open my $fh, ">:encoding(utf8)", "$filepath"
      or croak("cannot open  $filepath: $!");
    my $csv = Text::CSV->new( { binary => 1, eol => "\n" } );
    for ( keys %$hash ) {
        my $colref = [ $_, $$hash{$_}->[0] ];
        $csv->print( $fh, $colref );
    }
    close $fh;
    return;
}

# send email notifications
sub send_email {
    my ($body) = @_;
    open my $pipe, '|-', '/usr/bin/mailx', '-s', $email{subject}, $email{to}
      or croak("can't open pipe to mailx: $!\n");

    print $pipe $body;
    close $pipe;
    croak("mailx exited with a non-zero status: $?\n") if $?;
    return;
}

# extract date of most recent review from GreatSchools tree
sub gs_checker {
    my $tree = shift;
    my $xpath =
'//div[contains(@class,"media mbs")]/div[(@class="author small make-999999 fl pbn mbn")]';
    my $dates = $tree->findnodes($xpath);

    # dates returned as 'month dd, yyyy'
    my $date;
    $date = $$dates[0]->as_trimmed_text() if ( $$dates[0] );
    if ( $date =~ /(\w{3,9})\s+(\d{1,2}),\s+(\d{4})/ ) {
        $date = $3 . '-' . $month{$1} . '-' . $2;
    }
    return ( $date || $default_date );
}

# extract date of most recent review from Yelp tree
sub yelp_checker {
    my $tree  = shift;
    my $xpath = '//meta[@itemprop="datePublished"][1]';
    my $dates = $tree->findnodes($xpath);
    # dates returned as 'yyyy-mm-dd'
    if ( $$dates[0] ) {
        return $$dates[0]->attr('content');
    }
    else {
        return ( $$dates[0] || $default_date );
    }
}

# extract date of most recent review from PrivateSchoolReview tree
sub psr_checker {
    my $tree  = shift;
    my $xpath = '//meta[@itemprop="datePublished"][1]';
    my $dates = $tree->findnodes($xpath);

    # dates returned as 'yyyy-mm-dd'
    if ( $$dates[0] ) {
        return $$dates[0]->attr('content');
    }
    else {
        return ( $$dates[0] || $default_date );
    }
}

# extract date of most recent review from Kudzu tree
sub kudzu_checker {
    my $tree  = shift;
    my $xpath = '//div[@class="review_post_date"]/p/span[@class="rp-date"]';
    my $dates = $tree->findnodes($xpath);

    # date returned as 'mm/dd/yyyy'
    my $date;
    $date = $$dates[0]->as_trimmed_text() if ( $$dates[0] );
    if ( $date =~ /(\d{1,2})\/(\d{1,2})\/(\d{4})/ ) {
        $date = $3 . '-' . $1 . '-' . $2;
    }
    return ( $date || $default_date );
}

# extract date of most recent review from MerchantCircle tree
sub mc_checker {
    my $tree  = shift;
    my $xpath = '//span[@itemprop="datePublished"][1]';
    my $dates = $tree->findnodes($xpath);

    # dates returned as 'Month dd, yyyy at hh:mm PM'
    my $date;
    $date = $$dates[0]->as_trimmed_text() if ( $$dates[0] );
    if ( $date =~
        /\s*(\w{3,9})\s*(\d{1,2})\s*\,\s*(\d{4})\s+at\s+\d{1,2}\:\d{2}\s+[AP]M/
      )
    {
        $date = $3 . '-' . $month{$1} . '-' . $2;
    }
    return ( $date || $default_date );
}