use strict; use warnings; while ( my $protein = ) { $protein =~ /([^:]+).+\s+([<=>]+)\s+/; print "$1 - $2\n"; } __DATA__ R00005: 00330: C01010 => C00011 R00005: 00791: C01010 => C00011 R00005: 01100: C01010 <=> C00011 R00006: 00770: C00022 => C00900 R00008: 00362: C06033 => C00022 R00008: 00660: C00022 => C06033 R00010: 00500: C01083 => C00031 R00013: 00630: C00048 => C01146 R00013: 01100: C00048 <=> C01146