BEGIN{ FS="\t" } { gsub(/[0-9]/,"",$1) # Delete numbers from formula gsub(/\|/," ",$1) # Replace | by space e=split($1,elem," ") # Create an array of elements for (i=1;i<=e;i++){ num_comp[elem[i]][$1]+=$2 # increase the number of compounds of the combination for each element tot_comp[elem[i]]+=$2 # total number of compounds of each element comb[elem[i]][$1]++ } } END{ PROCINFO["sorted_in"]="@val_num_desc" # Loop arrays in descendent order of value. for (e in tot_comp){ el++ count=0 expected=0 for (c in num_comp[e]) { # Loop over motifs of each element expected+=++count*num_comp[e][c]/tot_comp[e] } print el,e,expected/length(comb[e]),tot_comp[e],length(comb[e]) # Print expected values } }