use strict; my %count; # added gratuitous parentheses for embedded formula testing sake. $_='Mo(P(H)3)4(CO)(NH2C2(H)5)'; # at each iteration do subformula with rigtmost left parenthesis. # quit when no more parenthesis s/(.*)\((.*?)\)(\d*)/$1 . $2 x ($3 ? $3 : 1) /e while m/\(/; s/([A-Z](?:[a-z])?)(\d*)/ $count{$1} += $2 ? $2 : 1 ;''/eg; printf "%-2s %3d\n", $_, $count{$_} for sort keys %count;