$regex = '(\w+)|(\d+)|(\s+)|([^\w\d\s]+)'; $text = 'The world is foo 2!'; while ( $text=~s/^$regex// ) { print '\w+',$/ if $1; print '\d+',$/ if $2; print '\s+',$/ if $3; print '[^\w\d\s]+',$/ if $4; }