use Modern::Perl; use Data::Dump qw/dump/; my %data; my $ac; while () { if (/^#=GF AC\s+(.*)$/) { $ac = $1; next; } if (/^#=GS ([^_]*)/) { push @{ $data{$1} }, $ac; next; } } say dump(%data); __DATA__ # STOCKHOLM 1.0 #=GF ID 1-cysPrx_C #=GF AC PF10417.4 #=GF DE C-terminal domain of 1-Cys peroxiredoxin #=GF AU Finn RD, Coggill PC #=GF SE Gene3D, pdb_1prx ... #=GS A3EU39_9BACT/160-195 AC A3EU39.1 #=GS Q7VQB3_BLOFL/159-194 AC Q7VQB3.1 #=GS Q057V5_BUCCC/160-195 AC Q057V5.1 #=GS A5CDZ8_ORITB/160-195 AC A5CDZ8.1 ... // # LONDON 1.0 #=GF ID 1-cysPrx_C #=GF AC PF10000.3 #=GF DE C-terminal domain of 1-Cys peroxiredoxin #=GF AU Finn RD, Coggill PC #=GF SE Gene3D, pdb_1prx ... #=GS A3EU39_9BACT/160-195 AC A3EU39.1 #=GS Q7VQB8_BLOFL/159-194 AC Q7VQB3.1 #=GS Q057V5_BUCCC/160-195 AC Q057V5.1 #=GS A5CDZ8_ORITB/160-195 AC A5CDZ8.1 // # AMSTERDAM 1.0 #=GF ID 1-cysPrx_C #=GF AC PF10999.3 #=GF DE C-terminal domain of 1-Cys peroxiredoxin #=GF AU Finn RD, Coggill PC #=GF SE Gene3D, pdb_1prx ... #=GS A3EU39_9BACT/160-195 AC A3EU39.1 #=GS Q7VQB8_BLOFL/159-194 AC Q7VQB3.1 #=GS Q057V5_BUCCC/160-195 AC Q057V5.1 #=GS A5CDZ8_ORITB/160-195 AC A5CDZ8.1