if ($#ARGV != 2){die "usage $0 filenamein filenameout\n";} open( INFILE, "< $ARGV[0]" ) || die "Cannot open $ARGV[0] $!\n"; open( OUTFILE, "> $ARGV[1]" ) || die "Cannot open $ARGV[1] $!\n"; $nopat=0; $lookup =""; while () { $line=$_; if($line=~m/^Patient ID.*?(\d+)/) { $ID=$1; $nopat=$nopat+1; $blank=""; print $ID."\n"; %hash = (); } if($line=~m/^NAD.*?(\d+)/) { chomp; $nhs=$1; } if($line=~m/^Date Of Birth/) { $dob=$line; $dob=~s/Date Of Birth ://; chomp $dob; } if($line=~m/^Sex/) { $sex=$line; $sex=~s/Sex ://; chomp $sex; } if($line=~m/^Post/) { $postcode=$line; $postcode=~s/Postcode :(\w+)/$1/; chomp $postcode; $lookup=$lookup.$ID.",".$nhs.",".$postcode.",".$dob.",".$sex."\n"; #$age = ; # pause #print $lookup; } if ($line=~m/^\d\d\/\d\d.*?/) { #print "$line"; @names = split(',',$line); $date=@names[0]; $index=@names[1]; $nonsense=@names[2]; $val=@names[3]; $test=@names[3]; $orgtest=@names[3]; $orgindex=@names[1]; chomp $orgtest; chomp $val; chomp $test; #print "$val\n"; #if ($index=~m/O\/E -/) { #$index=~s/\d.*//; #search for digits and deletes everything after it #} $test=~s/\s$//; #substitute all blank spaces at end of line with nothing $test=~s/\s$//; #substitute all blank spaces at end of line with nothing #$index=~s/(\.\d+)|(\d+\.\d+)//;#delete number.number $index=~s/(\d)(\.00)(\s)/$1.$3/; #delet .00s $index=~s/(\d\.\d)(0)(\s)/$1.$3/; #delet .x0s $index=~s/(\d)(\.0)(\s)/$1.$3/; #delet e.0s $index=~s/\s(\.\d)/0.$1/; #add a 0 in front of a period This is so that the units match the old units and that replace works $index=~s/\<|(\)|\()|\/|\^|\.|\*|//g; #substitute all operators $test=~s/\<|(\)|\()|\/|\^|\.|\*|//g; #substitute all operators $test=~s/\s0//; #$test=~s\s0$\\; $testother="$test"."0"; $index=~s/\s$testother\s.*//; $index=~s/\s$test\s.*//; $presub=$index; $index=~s/$testother//; $index=~s/$test//; #search for last #$index=~s/(\.\d+)|(\d+\.\d+)//;#delete number.number #print "-$val- substituted -$index- \n"; #index=~s/x109l|x109 L|109L|x1012L|gdL|iul|iuL|gdl|ugL|uL|gL|mmolL|mmoll| Ul|1012L| gl |mmh|mm|.0|IUL|Uml|ngmL| UL//; $index=~s/\s+$//; #substitute all blank spaces at end of line with nothing $index=~s/\.$//; #substitute all . spaces at end of line with nothing $index=~s/\s$//; #substitute all blank spaces at end of line with nothing $index=~s/(OE -\w*)\d/$1/; #substitute all blank spaces at end of line with nothing $index=~s/\s\d+\scm$//; #$index=~ s/10\*.*//; #Search for 10*whatever and delete it #$index=~s/x109l|x109 L|109L|x1012L//; $val=~ s/mL\/min\/1.73m2//; $val=~ s/10\*.*//; #Search for 10*whatever and delete it $val=~s/109L//; $val=~tr/[0-9][.]//cd; #$index=~s/$val//; #and re dof or final clear up $index=~s/\s+$//; #substitute all blank spaces at end of line with nothing # print "-$test- substituted -$index- \n"; if ($val != "") { $hash{$index} =$hash{$index}.$val.","; #push @lines, $ID.",".$index.",".$date.",".$val.",".$orgtest.",".$orgindex.",".$presub.",".$test.",".$testother."\n"; #HanDY TO TEST push @lines, $ID.",".$index.",".$date.",".$val."\n"; #HanDY TO TEST push @keys, $index."\n"; } } } #print "Sorting and collating records.....takes some time.....\n Wait for this window to close before continuing \n "; my @unique = (); my %seen = (); my @unique = grep { ! $seen{ $_ }++ } @keys; @sorted = sort { $a cmp $b } @unique ; #print @sorted; print OUTFILE @lines; print "\n$. lines of data Processed. "; close (OUTFILE); open( OUTFILE2, "> $ARGV[2]" ) || die "Cannot open $ARGV[2] $!\n"; print OUTFILE2 @sorted; close (OUTFILE2); close (INFILE); $path="lookup.CSV"; open (OUTFILE3, "> $path") || die "Cannot open LOOKUP.CSV $!\n"; print OUTFILE3 $lookup; close (OUTFILE3);