Re: It's all getting messy - remove whitespace

Instead of creating separate arrays like this

my @col1; ## column 1
my @col_ID; ## column 2
my @col3; ## column 3
[download]

you could use a single array of hashes. ( See perldsc )

 @AoH = (
{
  col1   => "col1",
  col_ID => "col_ID",
  col3   => "col3",
},)
[download]

For example, something like this ;

#!/usr/bin/perl
use warnings;
use strict;
use Data::Dump 'pp';

my $inputfile1 = $ARGV[0];
my $outputfile = 'fasta';

#open IN, '<', $inputfile1 
#   or die "Uh oh.. unable to find file $inputfile1 : $!";

open OUT, '>>',$outputfile 
   or die "Could not open $outputfile : $!";
   
my @match;
while ( my $line = <DATA> ) { ;# use IN
  chomp($line);
  if( $line =~ m/splic/) { 
    my @colsplit = split /,/, $line; # use \t
    my $record = {
      'col3'   => $colsplit[2],
      'col1'   => $colsplit[0],
      'col_ID' => $colsplit[1],
      'col_strand_direction' => $colsplit[5],
    };
    
    ##pulls out + or - and subsequent number and [base change]
    if ( $record->{'col3'} =~ m/([+-]\d+)\w+(\[[ACTG]])/) { 
      $record->{'intron_from_boundary'} = $1; 
      $record->{'baseref'} = $2 ;
      $record->{'offset'}  = 13;
      if ($record->{'col_strand_direction'} =~ /\+/){
        $record->{'offset'} += $record->{'intron_from_boundary'} ;
      } else {
        $record->{'offset'} -= $record->{'intron_from_boundary'} ;
      }
    }
    
    push @match,$record;
  }
}

# show data structure
pp @match;

# need to take each intronmatch value 
# and work out its position relative 
# to intron/exon boundary
foreach my $rec (@match) {
  my $offset = $rec->{'offset'};
  my $string = substr($rec->{'col1'},$offset,20);
  print "offset = $offset : $string\n";
  print OUT '>' . $rec->{'col_ID'} . $string . "\n";    
}
close OUT;

__DATA__
1col1abcdefghijklmnopqrstuvwxyz0123456789,1col_ID,+1col3[A],1col4,spli
+c,+
2col1abcdefghijklmnopqrstuvwxyz0123456789,2col_ID,-2col3[C],2col4,spli
+c,-
3col1abcdefghijklmnopqrstuvwxyz0123456789,3col_ID,+3col3[T],3col4,spli
+c,+
4col1abcdefghijklmnopqrstuvwxyz0123456789,4col_ID,-4col3[G],4col4,spli
+c,-
[download]

poj

Comment on Re: It's all getting messy - remove whitespace Select or Download Code


Problems? Is your data what you think it is?
	PerlMonks