Beefy Boxes and Bandwidth Generously Provided by pair Networks
Perl-Sensitive Sunglasses
 
PerlMonks  

Searching for two elements in two different lines

by noob_mas (Novice)
on Aug 30, 2013 at 08:16 UTC ( #1051570=perlquestion: print w/ replies, xml ) Need Help??
noob_mas has asked for the wisdom of the Perl Monks concerning the following question:

Hello everyone, i write an executable script where it will take in 3 files as input, 1st file is the user setup file. 2nd and 3rd file is a reference file where, i can get the rest of the data input by the user, at the 1st file. The 1st file will have 2columns <column_a> <column_b> , where whenever if both of this element exist in the file2, it will take a set of data from that line and will store it in an array, then using that array, it will search in file 3 for matching data then it will store it in another array. I do realize that this code can be further optimize, just hope that you guys can give a good point on where to optimise it. Thank you in advance. Snippet of my code is as follow;

#!/tools/perl/5.8.8/linux/bin/perl use strict; #use warnings; use Getopt::Long; my ($file1,$file2,$file3); my $output = 'OUTPUT_FILE';##output file name my $count = 0; GetOptions ('file1=s' => \$file1, 'file2=s' => \$file2, 'file3=s' => \ +$file3); open my $fh1, '<', $file1 or die "Can't open $file1: $!"; #file1 list +by user output list open my $fh2, '<', $file2 or die "Can't open $file2: $!"; #XX.file2 fi +le open my $fh3, '<', $file3 or die "Can't open $file3: $!"; #xx.file3 fi +le open my $fh4, '>', $output or die "Can't open $output: $!"; #output fi +le my ($el1,$el2,$output,$input); my (@data,@file2_data,@array_2,@data_1,@array_4,@data_4,@array_5,@arra +y_6,@data_7,@array_8); my ($input_1,$input_2,$input_3,$input_4,$stage_1,$stage_2,$stage_3,$st +age_4,$output_3,$final_join); my ($marc_data,$marc_add,$V_output,$join_1,$join_2,$join_3,$join_4); my ($data,$address,$stage_output,$element2,$marc_data_1,$marc_add_1,$j +oin_for_2nd_stage); my ($element_4,$file3_data,$file3_add,$dataout,$marc_array,$file3_data +_1,$file3_add_1,$dataout_1,$marc_array_1,$join_1st_stage_file2_file3, +$nothing,$nothing_1,$nothing_2,$nothing_3,$dataout_final,$marc_array_ +final); my @array_1 = <$fh2>; foreach (@array_1) { chomp $_; } #can make this as an option 1st... while (<$fh1>){ ##my config file chomp $_; # print "$_\n"; if ($_ !~ /^\s*$/) { #print "$_\n"; @data = split ' ', $_; $el1 = $data[1]; #output $el2 = $data[0]; #input $output = quotemeta ($el1); $input = quotemeta ($el2); foreach my $element(@array_1) { chomp $element; if ($element !~ /^\s*$/) { if (($element =~ m/$output/) && ($element =~ m/$input/)) { # +print out all the matching output lines @file2_data = split ' ',$element; $marc_data = $file2_data[2]; $marc_add = $file2_data[3]; $V_output = $file2_data[7]; $input_1 = $file2_data[11]; $stage_1 = $file2_data[13]; $input_2 = $file2_data[15]; $stage_2 = $file2_data[17]; $input_3 = $file2_data[19]; $stage_3 = $file2_data[21]; $input_4 = $file2_data[23]; $stage_4 = $file2_data[25]; if ($input_1 =~ m/$input/) { $join_1 = join (" ", ("$input_1","$V_output","$marc_data +","$marc_add","$stage_1")); push (@array_2,$join_1); } elsif ($input_2 =~ m/$input/) { $join_2 = join (" ", ("$input_2","$V_output","$marc_data +","$marc_add","$stage_2")); push (@array_2,$join_2); } elsif ($input_3 =~ m/$input/) { $join_3 = join (" ", ("$input_3","$V_output","$marc_data +","$marc_add","$stage_3")); push (@array_2,$join_3); } elsif ($input_4 =~ m/$input/) { $join_4 = join (" ", ("$input_4","$V_output","$marc_data +","$marc_add","$stage_4")); push (@array_2,$join_4); } } } } } else { } } my @array_4 = <$fh3>; foreach (@array_4) { chomp $_; } foreach (@array_2) { chomp $_; my ($input_2,$output_2,$data,$address,$stage_output_1) = split (/ /,$_ +); foreach my $element_4(@array_4) { chomp $element_4; @data_4 = split ' ', $element_4; $file3_data = $data_4[3]; $file3_add = $data_4[4]; $dataout = $data_4[7]; $marc_array = $data_4[9]; if (($file3_data == $data) && ($file3_add == $address)) { $join_1st_stage_file2_file3 = join (" ", ("$input_2","$output_2","$da +ta","$address","$stage_output_1","$file3_data","$file3_add","$dataout +","$marc_array")); push (@array_5,$join_1st_stage_file2_file3); } } } open my $fh5, '<', $file2 or die "Can't open $file2: $!"; #XX.file2 fi +le my @array_3 = <$fh5>; foreach (@array_3) { chomp $_; } foreach (@array_5) { chomp $_; my($input_2,$output_2,$data,$address,$stage_output_1,$file3_data,$fil +e3_add,$dataout,$marc_array) = split (/ /,$_); $stage_output = quotemeta($stage_output_1); $output = quotemeta($output_2); $input = quotemeta($input_2); foreach my $element_2(@array_3) { @data_1 = split ' ',$element_2; $marc_data_1 = $data_1[2]; $marc_add_1 = $data_1[3]; $output_3 = $data_1[7]; $input_1 = $data_1[11]; $stage_1 = $data_1[13]; if ($output_3 =~ m/$output/) { if (($input_1 =~ m/$input/) && ($stage_1 =~ m/$output/)) { $nothing = 'N/A'; $nothing_1 = 'N/A'; $join_for_2nd_stage = join(" ",("$input_2","$output_2","$data","$a +ddress","$stage_output_1","$file3_data","$file3_add","$dataout","$mar +c_array","$nothing","$nothing_1")); push (@array_6,$join_for_2nd_stage); } elsif (($input_1 =~ m/$stage_output/) && ($stage_1 =~ m/$output/)) + { $nothing = $marc_data_1; $nothing_1 = $marc_add_1; $join_for_2nd_stage = join(" ",("$input_2","$output_2","$data","$a +ddress","$stage_output_1","$file3_data","$file3_add","$dataout","$mar +c_array","$nothing","$nothing_1")); push (@array_6,$join_for_2nd_stage); } } } } open my $fh6, '<', $file3 or die "Can't open $file2: $!"; #XX.file2 fi +le my @array_7 = <$fh6>; foreach (@array_7) { chomp $_; #print "$_\n"; } foreach my $element_6(@array_6) { chomp $_; my($input_2,$output_2,$data,$address,$stage_output_1,$file3_data,$file +3_add,$dataout,$marc_array,$nothing,$nothing_1) = split (/ /,$element +_6); foreach my $element_7(@array_7) { #print "$element_7\n"; @data_7 = split ' ', $element_7; $file3_data_1 = $data_7[3]; $file3_add_1 = $data_7[4]; $dataout_1 = $data_7[7]; $marc_array_1 = $data_7[9]; $nothing_2 = 'N/A'; $nothing_3 = 'N/A'; if(($nothing_2 =~ m/$nothing/) && ($nothing_3 =~ m/$nothing_1/)) { + $dataout_final = 'N/A'; $marc_array_final = 'N/A'; $final_join = join (" ",("$input_2","$output_2","$data","$address" +,"$stage_output_1","$file3_data","$file3_add","$dataout","$marc_array +","$nothing","$nothing_1","$dataout_final","$marc_array_final")); #print "$final_join\n"; push (@array_8,$final_join); last; } elsif(($file3_data_1 =~ m/$nothing/) && ($file3_add_1 =~ m/$nothin +g_1/)){ $dataout_final = $dataout_1; $marc_array_final = $marc_array_1; $final_join = join (" ",("$input_2","$output_2","$data","$address" +,"$stage_output_1","$file3_data","$file3_add","$dataout","$marc_array +","$nothing","$nothing_1","$dataout_final","$marc_array_final")); #print "$final_join\n"; push (@array_8,$final_join); last; } } } print $fh4 " STAGE_N + STAGE_N+1\n"; print $fh4 "<<<<<<INPUT>>>>>>>>. <<<<<OUTPUt>>>>>>> <<<<MARC>>><<<<<<D +ATAOUT>>>>>>>>>>>>>>><<<<<<<<<<<<<<MARC_ARRAY>>>>>>>>>>>><<<<<MARC>>> +>><<<<<DATAOUT>>>>>>>>>>>>>>>><<<<<<<<MARC_ARRAY>>>>>>>>>>>>\n"; print $fh4 " DATA ADD + DATA ADD + \n"; foreach my $element_8(@array_8) { chomp $element_8; #print "$element_8\n"; my($input_2,$output_2,$data,$address,$stage_output_1,$file3_data,$file +3_add,$dataout,$marc_array,$nothing,$nothing_1,$dataout_final,$marc_a +rray_final) = split ' ',$element_8; printf $fh4 "%-25s %12s %4s %6s %-30s %30s %2s %4s %20s %20s\n",$input +_2,$output_2,$data,$address,$dataout,$marc_array,$nothing,$nothing_1, +$dataout_final,$marc_array_final; }

Comment on Searching for two elements in two different lines
Download Code
Re: Searching for two elements in two different lines
by hdb (Prior) on Aug 30, 2013 at 08:32 UTC

    Just some humble comments:

    foreach (@array_1) { chomp $_; }

    can be written as

    chomp @array_1;

    Generally, there seems to be too much chomping.

    Instead of

    foreach my $element(@array_1) { if ($element !~ /^\s*$/) { if (($element =~ m/$output/) && ($element =~ m/$input/)) { #print +out all the matching output lines

    you could

    foreach my $element (grep { /$output/ && /$input/ } @array_1) {

    Instead of

    $join_1 = join (" ", "$input_1","$V_output","$marc_data","$marc_add"," +$stage_1")); push (@array_2,$join_1);

    you could

    push @array_2, "$input_1 $V_output $marc_data $marc_add $stage_1";

    Instead of joining it into one string and then splitting it later again (hopefully the data does not contain any spaces...), you could store it in an array of arrays like this:

    push @array_2, [ $input_1, $V_output, $marc_data, $marc_add, $stage_1 +]; # and later foreach (@array_2) { my ($input_2,$output_2,$data,$address,$stage_output_1) = @$_;
Re: Searching for two elements in two different lines
by mtmcc (Hermit) on Aug 30, 2013 at 08:45 UTC
Re: Searching for two elements in two different lines
by Eily (Deacon) on Aug 30, 2013 at 12:45 UTC

    Your code is hard to read, and I only say hard because I gave up too early to end up saying impossible.

    If you have too many variables with names ending with a number, there's probably a design problem. In your case that's already obvious with the files. Either all three of them have the same role, and one can be used as the other without the whole program losing all its sense, and you can store their name and filehandles in an array or whatever data structure seems the best. Or your files have different purposes and you should give them names accordingly.

    The same goes for arrays, you can't have 8 of them called "array" and use them for 8 different purposes.

    As a rule, you should try to name your things by what they mean and not what they do. An array of colour names should be @colour, not @array. And how the hex do you end up with more than one kind of nothing?

    And Perl warns you for a reason, because code that seems to work but has unexpected behaviour is actually worse than code that doesn't work but explicitly fails. So the first optimisation is use warnings

Re: Searching for two elements in two different lines
by jwkrahn (Monsignor) on Aug 31, 2013 at 05:01 UTC
    #use warnings;

    You shouldn't disable warnings.



    my $count = 0;

    You never use this variable anywhere.



    my ($el1,$el2,$output,$input); my (@data,@file2_data,@array_2,@data_1,@array_4,@data_4,@array_5,@arra +y_6,@data_7,@array_8); my ($input_1,$input_2,$input_3,$input_4,$stage_1,$stage_2,$stage_3,$st +age_4,$output_3,$final_join); my ($marc_data,$marc_add,$V_output,$join_1,$join_2,$join_3,$join_4); my ($data,$address,$stage_output,$element2,$marc_data_1,$marc_add_1,$j +oin_for_2nd_stage); my ($element_4,$file3_data,$file3_add,$dataout,$marc_array,$file3_data +_1,$file3_add_1,$dataout_1,$marc_array_1,$join_1st_stage_file2_file3, +$nothing,$nothing_1,$nothing_2,$nothing_3,$dataout_final,$marc_array_ +final);

    Most of these variables are not needed, or at least not here at file scope.



    open my $fh2, '<', $file2 or die "Can't open $file2: $!"; #XX.file2 fi +le open my $fh3, '<', $file3 or die "Can't open $file3: $!"; #xx.file3 fi +le ... my @array_1 = <$fh2>; ... my @array_4 = <$fh3>; ... open my $fh5, '<', $file2 or die "Can't open $file2: $!"; #XX.file2 fi +le my @array_3 = <$fh5>; ... open my $fh6, '<', $file3 or die "Can't open $file2: $!"; #XX.file2 fi +le my @array_7 = <$fh6>;

    You already have $file2 stored in @array_1 and $file3 stored in @array_4 so there is no reason to reopen those files and store them again in @array_3 and @array_7.    Also, the error message for $fh6 says $file2 when it should say $file3.



    $join_1 = join (" ", ("$input_1","$V_output","$marc_data +","$marc_add","$stage_1")); ... $join_2 = join (" ", ("$input_2","$V_output","$marc_data +","$marc_add","$stage_2")); ... $join_3 = join (" ", ("$input_3","$V_output","$marc_data +","$marc_add","$stage_3")); ... $join_4 = join (" ", ("$input_4","$V_output","$marc_data +","$marc_add","$stage_4")); ... $join_1st_stage_file2_file3 = join (" ", ("$input_2","$output_2","$da +ta","$address","$stage_output_1","$file3_data","$file3_add","$dataout +","$marc_array")); ... $join_for_2nd_stage = join(" ",("$input_2","$output_2","$data","$a +ddress","$stage_output_1","$file3_data","$file3_add","$dataout","$mar +c_array","$nothing","$nothing_1")); ... $join_for_2nd_stage = join(" ",("$input_2","$output_2","$data","$a +ddress","$stage_output_1","$file3_data","$file3_add","$dataout","$mar +c_array","$nothing","$nothing_1")); ... $final_join = join (" ",("$input_2","$output_2","$data","$address" +,"$stage_output_1","$file3_data","$file3_add","$dataout","$marc_array +","$nothing","$nothing_1","$dataout_final","$marc_array_final")); ... $final_join = join (" ",("$input_2","$output_2","$data","$address" +,"$stage_output_1","$file3_data","$file3_add","$dataout","$marc_array +","$nothing","$nothing_1","$dataout_final","$marc_array_final"));

    You shouldn't quote variables.    You are basically making copies of all those variables when you don't have to.



    just hope that you guys can give a good point on where to optimise it.

    Here is how I would write it:

    #!/usr/bin/perl use strict; use warnings; use Getopt::Long; my $output_file = 'OUTPUT_FILE'; ## output file name GetOptions( 'file1=s' => \my $file1, 'file2=s' => \my $file2, 'file3=s +' => \my $file3 ); open my $fh1, '<', $file1 or die "Can't open $file1: $!"; # file1 li +st by user output list open my $fh2, '<', $file2 or die "Can't open $file2: $!"; # XX.file2 + file # can make this as an option 1st... my @array_1 = <$fh2>; my @array_2; while ( <$fh1> ) { ## my config file next unless /\S/; my ( $input, $output ) = split; for ( @array_1 ) { next unless /\S/; next unless /\Q$output/ && /\Q$input/; # print out all the matching output lines my @V_output = ( split )[ 7, 2, 3 ]; my %stages = ( split )[ 11, 13, 15, 17, 19, 21, 23, 25 ]; for my $key ( grep /\Q$input/, keys %stages ) { push @array_2, "$key @V_output $stages{$key}"; last; } } } open my $fh3, '<', $file3 or die "Can't open $file3: $!"; # xx.file3 + file my @array_3 = <$fh3>; my @array_4; for ( @array_2 ) { my @data_1 = split; for ( @array_3 ) { my @data_2 = ( split )[ 3, 4, 7, 9 ]; if ( $data_2[ 0 ] == $data_1[ 2 ] && $data_2[ 0 ] == $data_1[ +3 ] ) { push @array_4, "@data_1 @data_2"; } } } my @array_5; for ( @array_4 ) { my @data = split; for ( @array_1 ) { my ( $marc_data, $marc_add, $output, $input, $stage ) = ( spli +t )[ 2, 3, 7, 11, 13 ]; if ( $output =~ /\Q$data[1]/ ) { if ( $input =~ /\Q$data[0]/ && $stage =~ /\Q$data[1]/ ) { push @array_5, "@data N/A N/A"; } elsif ( $input =~ /\Q$data[4]/ && $stage =~ /\Q$data[1]/ ) + { push @array_5, "@data $marc_data $marc_add"; } } } } my @array_6; for ( @array_5 ) { my @data = split; for ( @array_3 ) { my ( $file3_data, $file3_add, $dataout, $marc_array ) = ( spli +t )[ 3, 4, 7, 9 ]; if ( 'N/A' =~ /$data[9]/ && 'N/A' =~ /$data[10]/ ) { push @array_6, "@data N/A N/A"; last; } elsif ( $file3_data =~ /$data[9]/ && $file3_add =~ /$data[10]/ + ) { push @array_6, "@data $dataout $marc_array"; last; } } } open my $fh4, '>', $output_file or die "Can't open $output_file: $!"; + #output file print $fh4 <<'HEADER'; STAGE_N + STAGE_N+1 <<<<<<INPUT>>>>>>>>. <<<<<OUTPUt>>>>>>> <<<<MARC>>><<<<<<DATAOUT>>>>>> +>>>>>>>>><<<<<<<<<<<<<<MARC_ARRAY>>>>>>>>>>>><<<<<MARC>>>>><<<<<DATAO +UT>>>>>>>>>>>>>>>><<<<<<<<MARC_ARRAY>>>>>>>>>>>> DATA ADD + DATA ADD HEADER printf $fh4 "%-25s %12s %4s %6s %-30s %30s %2s %4s %20s %20s\n", ( spl +it )[ 0 .. 3, 7 .. 12 ] for @array_6;

      I am more than happy :-) for the feedback given by u guys, i will surely apply all the tips to optimize the script. Many do's and dont's are learned through here. Thank you for spending time to identify and correct my mistakes.FYI: this is my 1st time (definitely not the last time) writing in this forum, if there are any mistake in my response please let me know.. .THANK YOU GUYS..

Log In?
Username:
Password:

What's my password?
Create A New User
Node Status?
node history
Node Type: perlquestion [id://1051570]
Approved by Corion
help
Chatterbox?
and the web crawler heard nothing...

How do I use this? | Other CB clients
Other Users?
Others contemplating the Monastery: (5)
As of 2014-12-20 16:12 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    Is guessing a good strategy for surviving in the IT business?





    Results (96 votes), past polls