#!/usr/local/bin/perl use strict; use warnings; use lib "$ENV{HOME}/mylib/lib/perl5"; use HTML::TreeBuilder; use LWP::Simple; # Program Name: top_speakers.pl # Author: XXXXX # Purpose: Parses the page http://perlcourse.ecorp.net/conf-mirror/conferences.oreillynet.com/speakers.html # and finds the speakers who had the most sessions and/or tutorials # Original code only found sessions or tutorials, adjusted code to find Sessions, Tutorials, BOF's & Panels # to match expected output per project specification # Define debugging variable - set to positive integer to enable my $DEBUG_FLAG = 0; # Define variable that will contain the URL we will parse my $URL = 'http://perlcourse.ecorp.net/conf-mirror/conferences.oreillynet.com/speakers.html'; # Define our tree using HTML::Treebuilder and parse the document my $tree = HTML::TreeBuilder->new; $tree->parse( get( $URL ) ); # Define our hash that will contain speaker names and their count my %speakers; # Define current speaker variable - used in find_speakers subroutine my $current_speaker; my @nodes = $tree->look_down( _tag => "a", \&find_speakers ); # If in debug mode, Print list of speaker and their total of Sessions or Tutorials if ( $DEBUG_FLAG ) { foreach (sort keys %speakers) { print "$_ = ($speakers{$_})\n"; } } # Set a counter to limit our results, call our sorting routine to # sort in descending order (highest to lowest) and print results # Exit loop once we have 3 speakers displayed. # Technically if there are speakers with the same amount of speaking # engagements they should be weighted equally (equal third etc) but # this was not in the project requirements my $counter = 0; foreach my $key (sort hashValueDescending (keys(%speakers))) { print "$key\t($speakers{$key})\n"; $counter++; last if $counter == 3; } # Delete tree object to free up the memory (Best practice) $tree->delete; # find_speakers subroutine - finds speakers, adds their name to the %speakers hash # then looks for Sessions, Tutorials, BOFs or Panels that the speaker is presenting # and adds those to the total for each speaker sub find_speakers { my ($element) = @_; my $parent = $element->parent; my $text = $element->as_text; # Check if tag is a 'span' as this was consistent for delineating the speakers # throughout the document if ($parent->tag eq 'span'){ print "Speaker = $text\n" if $DEBUG_FLAG; # add current speaker to the hash and initialize to zero # Note: We would need an alternative method if a speaker link appeared more than once $speakers{$text} = 0; # set current speaker $current_speaker = $text; } # Check if the parent tag is a bold element and if the text matches one # of our criteria - Session, Tutorial, BOF or Panel elsif ($parent->tag eq 'b' && $parent->as_text =~/(Session|Tutorial|BOF|Panel)/){ print "$1 = $text\n" if $DEBUG_FLAG; # add record to current speaker - set counter to current speaker contents and increment by 1 # then assign to the $speaker hash my $count = $speakers{$current_speaker} + 1; $speakers{$current_speaker} = $count; } } # hashValueDescending subroutine - sorts the hash in descending numerical order # from highest down to lowest sub hashValueDescending { $speakers{$b} <=> $speakers{$a}; }