#!/usr/bin/env perl use Modern::Perl; my %seqs; $/ = '>'; # break lines on this instead of newline while(my $line = ){ chomp $line; # remove any trailing > next unless $line; # skip leading blank record before first > my($id, $seq) = split /\s+/, $line, 2; $seq =~ s/[\r\n]//g; # strip newlines and/or carriage returns from sequence unless($seqs{$id} and length($seqs{$id}) > length($seq)){ $seqs{$id} = $seq; # save it if it's a new ID or a longer sequence } } say ">$_ $seqs{$_}" for keys %seqs; __DATA__ >ENSG00000010072 MDDDLMLALRLQEEWNLQEAERDHAQESLSLVDASWELVDPTPDLQALFVQFNDQFFWGQ LEAVEVKWSVRMTLCAGICSYEGKGGMCSIRLSEPLLKLRPRKDLVEVYHTFHDEVDEYR RHWWRCNGPCQHRPPYYGYVKRATNREPSAHDYWWAEHQKTCGGTYIKIKEPENYSKKGK GKAKLGKEPVLAAENKGTFVYILLIFM* >ENSG00000067082 Sequence unavailable >ENSG00000147724 MSEIQGTVEFSVELHKFYNVDLFQRGYYQIRVTLKVSSRIPHRLSASIAGQTESSSLHSA CVHDSTVHSRVFQILYRNEEVPINDAVVFRVHLLLGGERMEDALSEVDFQLKVDLHFTDS EQQLRDVAGAPMVSSRTLGLHFHPRNGLHHQVP >ENSG00000010072 MDDDLMLALRLQEEWNLQEAERDHAQESLSLVDASWELVDPTPDLQALFVQFNDQFFWGQ LEAVEVKWSVRMTLCAGICSYEGKGGMCSIRLSEPLLKLRPRKDLVETLLHEMIHAYLFV TNNDKDREGHGPEFCKHMHRINSLTGANITVYHTFHDEVDEYRRHWWRCNGPCQHRPPYY GYVKRATNREPSAHDYWWAEHQKTCGGTYIKIKEPENYSKKGKGKAKLGKEPVLAAENKD KPNRGEAQLVIPFSGKGYVLGETSNLPSPGKLITSHAINKTQDLLNQNHSANAVRPNSKI KVKFEQNGSSKNSHLVSPAVSNSHQNVLSNYFPRVSFANQKAFRGVNGSPRISVTVGNIP KNSVSSSSQRRVSSSKISLRNSSKVTESASVMPSQDVSGSEDTFPNKRPRLEDKTVFDNF FIKKEQIKSSGNDPKYSTTTAQNSSSSSSQSKMVNCPVCQNEVLESQINEHLDWCLEGDS IKVKSEESL*