#!/usr/bin/perl -w use strict; my $Prep_Re = join '|',('VAN DER','VAN DE','DEN','DE','VAN'); while () { s/\s{2,}/ /g; s/^\s*(.*?)\s*$/$1/; if ( /(.+?) ($Prep_Re) ?((?:[^A-Za-z].*)?)/ ) { my ($Name,$Prep,$Unknown) = ($1,$2,$3); print "$Name == $Prep == $Unknown\n"; } elsif ( /(.+) ([^A-Za-z].*)?/ ) { my ($Name,$Prep,$Unknown) = ($1,'',$2); print "$Name == $Prep == $Unknown\n"; } else { print "No idea for $_\n"; } } __DATA__ WINTER DE ZANDEN VAN DER ŤAť JENSEN 230 WOODHEAD BRINK 130,- HEYDIER DEN <240> SMITSER (4X115PJ) LINDEN VAN DER MOTEL GOLDEN LEEUW