#!/usr/bin/perl use strict; use warnings; use LWP::Simple; use HTML::TreeBuilder::XPath; my $page; $page .= $_ while ; my $p = HTML::TreeBuilder::XPath->new_from_content( $page ); my @page_content =$p->findnodes( '//div[@id="bodyContent"]' ); for my $content ( @page_content ) { my @preface = $content->findvalues( './h2[1]/preceding-sibling::*' ); my $preface_text; my ( $keyword, $actualised ); for my $pref ( @preface ) { # $pref =~ s/^\s*(\S+)/$1/; $preface_text .= $pref; # print $preface_text, "--\n"; ( undef, $keyword ) = split /:\s*?/, $pref, 2 if $pref =~ /^\s*?Key words/; ( undef, $actualised ) = split /:\s*?/, $pref, 2 if $pref =~ /^Actualised/; } print $keyword, "\n"; print $actualised, "\n"; my @problems = $content->findvalues( './h2[2]/preceding-sibling::*' ); my $probl; $probl .= $_ for @problems; $probl = substr( $probl, length( $preface_text) ); print $probl, "\n"; my @solution_1 = $content->findvalues( './h2[3]/preceding-sibling::*' ); my $sol; $sol .= $_ for @solution_1; $sol = substr( $sol, length( $preface_text ) + length( $probl ) ); print $sol, "\n"; my @solution_2 = $content->findvalues( './h2[4]/preceding-sibling::*' ); my $sol_2; $sol_2 .= $_ for @solution_2; $sol_2 = substr( $sol_2, length( $preface_text ) + length( $probl ) + length( $sol ) ); print $sol_2 , "\n"; } __DATA__

Key words: Some words.

Date: 2012-01-16

Actualised: 2008-01-08

Commented: 05.06.2007

Encoded: Some code.

Problem

Problem description.

Another description.

Solution 1

Solution description.

Solution 2

Solution description.

Comment.

Text of the comment.