in reply to
Re^3: how to merge many files of sorted hashes?
in thread how to merge many files of sorted hashes?
sorry i have not made it clearer. my actual hash looks like this:
3_2_-1 => -44.368_23.583_-218.345_0.983_-0.012_0.005_-0.382_0.041_0.20
+5_-0.538_-0.876_0.100 -56.368_2.583_-28.745_0.883_-0.012_0.005_-
+0.382_0.041_0.205_-0.538_-0.876_0.100 ...
each element in the array of values is the entries of a 3*4 matrix, and each key can point to a few hundred of such 'matrices'.
how i construct a key-value pair is as such: given a defined 3*4 matrix, i apply this transformation matrix (translation+rotation) to the coordinates of each of the 100 points, and obtain the new coor of that point, say (3.01,1.98,-0.87), which is discretized to be (3,2,-1). then (3,2,-1) is used as a key that points to such a transformation matrix.
here's an example script with simplified calculations.
my $input = $ARGV[0];
open(INFILE,"$input") or die "cannot open file $input!\n";
my $output = $ARGV[1];
my %total_hash_keys=();
my %tri_hash = ();
#set bin width
my $grid = 2;
my $step = 0;
my $block_size = 10000;
my $block_no = 0;
my @points;
while(<INFILE>){my @array = split(/\t/,$_); push @points, [@array];}
close(INFILE);
#construct hash table
for(my $i=0;$i<@points;$i++){
for(my $j=$i+1;$j<@points;$j++){
for(my $k=$j+1;$k<@points;$k++){
$step++;
my @pt1 = (${$points[$i]}[0],${$points[$i]}[1],${$points[$i]}[2]
+);
my @pt2 = (${$points[$j]}[0],${$points[$j]}[1],${$points[$j]}[2]
+);
my @pt3 = (${$points[$k]}[0],${$points[$k]}[1],${$points[$k]}[2]
+;
#simplified calculation for the value of the hash;
my @matrix = (@pt1,@pt2,@pt3);
for(my $res=0;$res<@points;$res++){
#transform coor, and bin the new coor as a generated key
my @old_xyz = @{$points[$res]};
my @new_xyz = transform(@old_xyz,@matrix);
foreach(@new_xyz){$_ = int($_/$grid); }
my $key = $new_xyz[0]."_".$new_xyz[1]."_".$new_xyz[2];
foreach(@matrix){$_ = sprintf "%.3f",$_;}
my $value = "";
for(my $temp=0;$temp<@matrix;$temp++){$value .= $matrix[$temp]
+."_"; }
$total_hash_keys{$key}=0;
push @{$tri_hash{$key}},$value;
}
if(($step % $block_size) == 0){#write to disk file
$block_no = int($step/$block_size);
my $tmp_hash_file = "tmp_hash".$block_no;
open(OUTFILE,">$tmp_hash_file") or die "cannot write to file $
+tmp_hash_file!\n";
foreach(keys %tri_hash){
print OUTFILE "$_\t";
print OUTFILE "@{$tri_hash{$_}}\n";
}
%tri_hash = ();#free memory
}
}#for k
}#for j
}#for i
my $total_file_no = int($step/$block_size);
open(OUTFILE,">$output") or die "cannot write to file $output!\n";
while(($my_key,$my_value)=each %total_hash_keys){
print OUTFILE $my_key."=>";
for(my $i=1;$i<$total_file_no + 1;$i++){
my $hash_file = "tmp_hash".$i; open(INFILE,"$hash_file") or die;
while(<INFILE>){
my @array = split(/\t/,$_);
if($array[0] eq $my_key){
chomp ($array[1]);
print OUTFILE $array[1];
last;
}
}
close(INFILE);
}
print OUTFILE "\n";
}
sub transform{
my ($x,$y,$z,@t) = @_;
my $new_x=$x*$t[0]+$y*$t[3]+$z*$t[6];
my $new_y=$x*$t[1]+$y*$t[4]+$z*$t[7];
my $new_z=$x*$t[2]+$y*$t[5]+$z*$t[8];
return ($new_x,$new_y,$new_z);
}