my $input = $ARGV[0]; open(INFILE,"$input") or die "cannot open file $input!\n"; my $output = $ARGV[1]; my %total_hash_keys=(); my %tri_hash = (); #set bin width my $grid = 2; my $step = 0; my $block_size = 10000; my $block_no = 0; my @points; while(){my @array = split(/\t/,$_); push @points, [@array];} close(INFILE); #construct hash table for(my $i=0;$i<@points;$i++){ for(my $j=$i+1;$j<@points;$j++){ for(my $k=$j+1;$k<@points;$k++){ $step++; my @pt1 = (${$points[$i]}[0],${$points[$i]}[1],${$points[$i]}[2]); my @pt2 = (${$points[$j]}[0],${$points[$j]}[1],${$points[$j]}[2]); my @pt3 = (${$points[$k]}[0],${$points[$k]}[1],${$points[$k]}[2]; #simplified calculation for the value of the hash; my @matrix = (@pt1,@pt2,@pt3); for(my $res=0;$res<@points;$res++){ #transform coor, and bin the new coor as a generated key my @old_xyz = @{$points[$res]}; my @new_xyz = transform(@old_xyz,@matrix); foreach(@new_xyz){$_ = int($_/$grid); } my $key = $new_xyz[0]."_".$new_xyz[1]."_".$new_xyz[2]; foreach(@matrix){$_ = sprintf "%.3f",$_;} my $value = ""; for(my $temp=0;$temp<@matrix;$temp++){$value .= $matrix[$temp]."_"; } $total_hash_keys{$key}=0; push @{$tri_hash{$key}},$value; } if(($step % $block_size) == 0){#write to disk file $block_no = int($step/$block_size); my $tmp_hash_file = "tmp_hash".$block_no; open(OUTFILE,">$tmp_hash_file") or die "cannot write to file $tmp_hash_file!\n"; foreach(keys %tri_hash){ print OUTFILE "$_\t"; print OUTFILE "@{$tri_hash{$_}}\n"; } %tri_hash = ();#free memory } }#for k }#for j }#for i my $total_file_no = int($step/$block_size); open(OUTFILE,">$output") or die "cannot write to file $output!\n"; while(($my_key,$my_value)=each %total_hash_keys){ print OUTFILE $my_key."=>"; for(my $i=1;$i<$total_file_no + 1;$i++){ my $hash_file = "tmp_hash".$i; open(INFILE,"$hash_file") or die; while(){ my @array = split(/\t/,$_); if($array[0] eq $my_key){ chomp ($array[1]); print OUTFILE $array[1]; last; } } close(INFILE); } print OUTFILE "\n"; } sub transform{ my ($x,$y,$z,@t) = @_; my $new_x=$x*$t[0]+$y*$t[3]+$z*$t[6]; my $new_y=$x*$t[1]+$y*$t[4]+$z*$t[7]; my $new_z=$x*$t[2]+$y*$t[5]+$z*$t[8]; return ($new_x,$new_y,$new_z); }