use strict; use warnings; use Data::Dumper; my $structure = main(join '',); print Dumper($structure); sub main { my ($s, $c, %hash) = $_[0]; while ($s =~ /START PAGE(?: (\w+))?\s+(.*?)\s+END PAGE/gs) { $hash{$1 ? $1 : ++$c} = page($2); } return \%hash; } sub page { my ($s, $c, %hash) = $_[0]; while ($s =~ /START QUESTION(?: (\w+))?\s+(.*?)\s+END QUESTION/gs) { $hash{$1 ? $1 : ++$c} = question($2); } return \%hash; } sub question { my ($s, %hash) = $_[0]; ($hash{'label'}) = $s =~ /LABEL (.*)/; $s =~ /START CHOICES\s+(.*?)\s+END CHOICES/s; for (split / *\n */, $1) { push @{$hash{'choices'}}, [split / /, $_, 2]; } return \%hash; } __DATA__ START PAGE p1 START QUESTION 4B LABEL Do you like your pie with ice cream? START CHOICES 1 Yes 2 No END CHOICES END QUESTION START QUESTION 4C LABEL Do you like your pie with whipped cream? START CHOICES 1 Yes 2 No END CHOICES END QUESTION END PAGE