#! C:/Perl/bin
use strict;
use warnings;
use File::Path;

# This script processes a fasta file containing DNA sequences

# Part 1: declare variables, constants, ...

# forward (F) barcodes

my @forward = ("AGCCTAAGCT",
               "TCAAGTTAGC",
               "AGCCTGGCAT",
               "ACGGTCCATG",
               "ACTTGCCGAT",
               "ACGGTGGATC",
               "ATCCGCCTAG",
               "ATGGCGGTAC");

# reverse (R) barcodes

my @reverse = ("AGCTTAGGCT",
               "TAGCCTAAGC",
               "AGCTTGCCAT",
               "ACGTTCAATG",
               "ACTGGCGGAT",
               "ACGTTGAATC",
               "ATCGGCAAGT",
               "ATGCCGTTAC");

# primers used for Variable Region 1 (V1) and Variable Region 3 (V3) of 16S rRNA
# forward primer (V1 region)
my $V1 = 'AGAGTTTGATCCTGGCTCAG';

# reverse primer (V3 region)
my $V3 = 'GTATTACCGCGGCTGCTGGCA';


# locate the import-file with data
my $input_file = "C:/../input.txt";

# name the filehandler: FASTA_IN
open (FASTA_IN, $input_file);

# import data (fasta formatted style) as array to read all sequences

my @raw_DNA = <FASTA_IN>;

#test imported data 
#print "@raw_DNA\n";

# close the import-file

close FASTA_IN;

# Part 3: start processing sequences

# 3.1 Create arrays to hold processed results
my @Processed_Sequences = ();
my @Rejected_Sequences = ();

# 3.2 concatenate each barcode with apropriate primer

for my $current_barcode(0..$#forward)
    {
       my $F = "$forward[$current_barcode]$V1\n";
       #test concatenation
    # print $F;
                  
     
     #test current concatenated barcode.primer against sequences and if match,
     #remove the barcode and primer
     
     # =~ m/$F/;
     
     #if match print match $F
     
    }