use Data::Dumper; my %hoa; while ( my $line = ) { chomp $line; my $fname; if ( $line =~ /^[a-z]/ ) { $fname = $line; } elsif ($line =~ /^[0-9]+,/) { my $content = $line; push @{ $hoa{$fname} }, $content; } } # ----- end while ----- print Dumper \%hoa; #There are many datasets, here I just take 3 of them __DATA__ >data set dm01 >instances 0,-869,foofoofoofoofoofoofoofoo,28 0,-853,barbarbarbarbar,14 >data set yst02 >instances 0,-1566,quxquxquxqux,9 0,-1545,bembembem,9 0,-1394,birbirbirbir,9 >data set mus03 >instances 0,-1274,tingtingtingting,11 0,-1220,tongtongtong,11 0,-475,bubububu,11 0,-459,catcatcatcatcat,11 #### $HoA = { 'yst02'=> [ '0,-1566,quxquxquxqux,9', '0,-1545,bembembem,9', '0,-1394,birbirbirbir,9' ], 'dm01'=> [ '0,-869,foofoofoofoofoofoofoofoo,28', '0,-853,barbarbarbarbar,14' ], 'mus03'=> [ '0,-1274,tingtingtingting,11', '0,-1220,tongtongtong,11', '0,-475,bubububu,11', '0,-459,catcatcatcatcat,11', ], }