#!/usr/bin/env perl use strict; use warnings; use HTML::TableExtract; # qw(tree); use HTML::ElementTable; use Data::Dumper; use FindBin; use File::Util; # This script is intended to parse the Membership Management page # in the mailman administrative interface in order to harvest # the name and email address of each subscriber. my($f) = File::Util->new(); my (@html_files) = $f->list_dir("$FindBin::Bin",'--files-only','--pattern=05\.html'); foreach my $html_file ( @html_files ){ my $html; open( 'HTML', '<', $html_file ) or die "Unable to open $html_file \n"; while(){ $html .= $_; } close(HTML); parse_subscriber_list( $html ); } sub parse_subscriber_list { my $html = shift; my $te = HTML::TableExtract->new( headers => [ 'unsub', 'member', 'mod', 'hide', 'nomail', 'ack', 'not metoo', 'nodupes', 'digest', 'plain', 'language' ] ); my $row_count; $te->parse($html); foreach my $ts ($te->tables){ foreach my $row ($ts->rows){ $row_count++; # chomp( @{$row} ); print "name: email: $row->[1] \n"; } } } exit; __DATA__ hesco@example.net