# the uniq id's file looks like this: # gi|11995001:156374-156649 dbj|BA000040|:2701685-2702539 dbj|BA000040|:c8987046-8986282 gi|13488050:58289-58570 gi|13470324:5721573-5721854 # the corresponding sequence file looks like this: >gi|11995001:156374-156649, SMa0002 ATGGAGGCTGTTCCCATGAATGTAGACCTCTCACGGCGCAGCTTTTTGAAGCTGGCTGGAGCAGGGGCTG CGGCAACGTCACTCGGTGCGATGGGGTTTGGTGAGGCTGAGGCGGCGGTCGTCGCGCATGTCCGGCCTCA >dbj|BA000040|:2701685-2702539 GAAGGAGCCGATCTGGTCACCTTTTCCGGCGACAAGCTGCTGGGCGGTCCGCAGGCGGGTTTCATCGTCG GGCGCAGGGACCTGATCGCCGA # every unique_id has a corresonding sequence in @sequence # here is my attempt open (GENES, "$ARGV[1]") or die "unable to open file $!\n"; open (IDS, "$ARGV[0]") or die "unable to open file $!\n"; open (GENES, "$ARGV[1]") or die "unable to open file $!\n"; my @ids = ; my @genes = ; my $ids = join ('', @ids); @ids = split ('\n', $ids); my $genes = join ('', @genes); @genes = split ('>', $genes); my @accessions; foreach my $line (@file) { if ($line =~ /^(\w+\|\w+\.{0,1}\d{0,1}\|{0,1}:c{0,1}\d+\-\d+)/) { push @accessions, "$1"; } } # extract uniq id's my %seen=(); my @uniq = (); foreach my $item (@accessions) { unless ($seen{$item}) { $seen{$item}=1; push (@uniq, $item); } } # dig out the correspnding sequence for each id # THIS BIT NOT WORKING ;-( for (my $i=0; $i<@sequence; $i++) { foreach my $id (@uniq) { if ($sequence[$i] =~ /^$id/) { print "$id\n"; } } }