#! C:/Perl/bin use strict; use warnings; use File::Path; # This script processes a fasta file containing DNA sequences # Part 1: declare variables, constants, ... # forward (F) barcodes my @forward = ("AGCCTAAGCT", "TCAAGTTAGC", "AGCCTGGCAT", "ACGGTCCATG", "ACTTGCCGAT", "ACGGTGGATC", "ATCCGCCTAG", "ATGGCGGTAC"); # reverse (R) barcodes my @reverse = ("AGCTTAGGCT", "TAGCCTAAGC", "AGCTTGCCAT", "ACGTTCAATG", "ACTGGCGGAT", "ACGTTGAATC", "ATCGGCAAGT", "ATGCCGTTAC"); # primers used for Variable Region 1 (V1) and Variable Region 3 (V3) of 16S rRNA # forward primer (V1 region) my $V1 = 'AGAGTTTGATCCTGGCTCAG'; # reverse primer (V3 region) my $V3 = 'GTATTACCGCGGCTGCTGGCA'; # locate the import-file with data my $input_file = "C:/../input.txt"; # name the filehandler: FASTA_IN open (FASTA_IN, $input_file); # import data (fasta formatted style) as array to read all sequences my @raw_DNA = ; #test imported data #print "@raw_DNA\n"; # close the import-file close FASTA_IN; # Part 3: start processing sequences # 3.1 Create arrays to hold processed results my @Processed_Sequences = (); my @Rejected_Sequences = (); # 3.2 concatenate each barcode with apropriate primer for my $current_barcode(0..$#forward) { my $F = "$forward[$current_barcode]$V1\n"; #test concatenation # print $F; #test current concatenated barcode.primer against sequences and if match, #remove the barcode and primer # =~ m/$F/; #if match print match $F }