I am trying to find a way to get the first table header on the below HTML code. If I use look_down and search for td and HeaderTitle, it will also get the second table. Any suggestion?
Expected output:
$VAR1 = [
'Status',
'Results',
'Schedule Start',
'Actual Start',
'Schedule Name',
'Node Name',
'Domain Name',
];
Current output:
$VAR1 = [
'á',
'Status',
'Results',
'Schedule Start',
'Actual Start',
'Schedule Name',
'Node Name',
'Domain Name',
'á',
'Node Name',
'Node Version',
'OS Platform',
'OS Version',
'Activity',
'Bytes Transferred'
];
Code:
use HTML::TreeBuilder;
use Data::Dumper;
use 5.16.0;
my $h = HTML::TreeBuilder->new;
$h->parse_content( do{ local $/; <DATA> } );
my @headers =
map @{ $_->content },
($h->look_down
(
_tag => 'td', class => qr/HeaderTitle\b?/ ,
)
)
;
print Dumper \@headers;
__DATA__
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1
+">
<meta name="GENERATOR" content="TSM Operational Reporting">
<meta name="ProgId" content="FrontPage.Editor.Document">
<title>TSM Operational Reporting</title>
</head>
<DIV class=HeaderBar>Daily Report TSM 24 hour Report for TSM2T generat
+ed at 2016-01-18 09:00:14 on NJ covering 2016-01-17 09:00:14 to 2016-
+01-18 09:00:13</DIV>
<body>
<table border="0" width="100%%">
<DIV class=FooterBar>Server name: <a href="http://10.1.2.2:1980"> TSM<
+/a>, platform: Linux/ppc64, version: 7.1.3.0, date/time: 01/18/2016 0
+8:55:01</DIV>
<tr><td width="100%"><p>
<DIV class=HeaderBar>Client Schedules</DIV>
<TABLE class=HeaderFrame height=100 cellSpacing=0 cols=3 cellPadding=0
+ width="100%" border=0 align="left">
<TR vAlign=top height=100>
<TD vAlign=top width="100%" height="100">
<DIV style="overflow: auto; width: "100%"; height: 200; valign:
+top">
<TABLE cellSpacing=0 cols=4 cellPadding=0 width="100%" border=0
+height="100">
<TR height=25 nowrap>
<TD class=HeaderTitleNoVLine height="14" width="10"> </
+TD>
<TD class=HeaderTitle noWrap align=left height="14">Status</
+TD>
<TD class=HeaderTitle noWrap align=left height="14">Results<
+/TD>
<TD class=HeaderTitle noWrap align=left height="14">Schedule
+ Start</TD>
<TD class=HeaderTitle noWrap align=left height="14">Actual S
+tart</TD>
<TD class=HeaderTitle noWrap align=left height="14">Schedule
+ Name</TD>
<TD class=HeaderTitle noWrap align=left height="14">Node Nam
+e</TD>
<TD class=HeaderTitle noWrap align=left height="14">Domain N
+ame</TD></TR>
<TR class=AltLight height=22>
<TD class=AltLightNoVline align=middle height="17" width="10
+"> </TD>
<TD class=AltLight align=left height="17">Completed</TD>
<TD class=AltLight align=left height="17">Successful</TD>
<TD class=AltLight align=left height="17">2016-01-17-17.00</
+TD>
<TD class=AltLight align=left height="17">2016-01-17-17.09</
+TD>
<TD class=AltLight align=left height="17">NJDLYBACKUP_5PM</T
+D>
<TD class=AltLight align=left height="17">APX23</TD>
<TD class=AltLight align=left height="17">ST15_DOMAIN</TD></
+TR>
<TR class=AltDark height=22>
<TD class=AltLightNoVline align=middle height="17" width="10">
+ </TD>
<TD class=AltLight align=left height="17">Missed</TD>
<TD class=AltLight align=left height="17">Successful</TD>
<TD class=AltLight align=left height="17">2016-01-17-17.00</
+TD>
<TD class=AltLight align=left height="17">2016-01-17-17.09</
+TD>
<TD class=AltLight align=left height="17">NJDLYBACKUP_5PM</T
+D>
<TD class=AltLight align=left height="17">APX24</TD>
<TD class=AltLight align=left height="17">ST15_DOMAIN</TD></
+TR>
</TABLE> </DIV></TD>
</TR></TABLE>
</td>
</tr>
<tr><td width="100%"><p>
<DIV class=HeaderBar>Node Activity Summary</DIV>
<TABLE class=HeaderFrame height=100 cellSpacing=0 cols=3 cellPadding=0
+ width="100%" border=0 align="left">
<TR vAlign=top height=100>
<TD vAlign=top width="100%" height="100">
<DIV style="overflow: auto; width: "100%"; height: 200; valign:
+top">
<TABLE cellSpacing=0 cols=4 cellPadding=0 width="100%" border=0
+height="100">
<TR height=25 nowrap>
<TD class=HeaderTitleNoVLine height="14" width="10"> </
+TD>
<TD class=HeaderTitle noWrap align=left height="14">Node Nam
+e</TD>
<TD class=HeaderTitle noWrap align=left height="14">Node Ver
+sion</TD>
<TD class=HeaderTitle noWrap align=left height="14">OS Platf
+orm</TD>
<TD class=HeaderTitle noWrap align=left height="14">OS Versi
+on</TD>
<TD class=HeaderTitle noWrap align=left height="14">Activity
+</TD>
<TD class=HeaderTitle noWrap align=left height="14">Bytes Tr
+ansferred</TD></TR>
<TR class=AltLight height=22>
<TD class=AltLightNoVline align=middle height="17" width="10
+"> </TD>
<TD class=AltLight align=left height="17">RDFXDB11</TD>
<TD class=AltLight align=left height="17">7.1.0.0</TD>
<TD class=AltLight align=left height="17">WinNT</TD>
<TD class=AltLight align=left height="17">6.01</TD>
<TD class=AltLight align=left height="17">BACKUP</TD>
<TD class=AltLight align=left height="17">105,806,011,655</T
+D></TR>
<TR class=AltDark height=22>
</TABLE> </DIV></TD>
</TR></TABLE>