#! C:/Perl/bin use strict; use warnings; use File::Path; # This script processes a fasta file containing DNA sequences # Part 1: declare variables, constants, ... # forward (F) barcodes my @forward = ("AGCCTAAGCT", "TCAAGTTAGC", "AGCCTGGCAT", "ACGGTCCATG", "ACTTGCCGAT", "ACGGTGGATC", "ATCCGCCTAG", "ATGGCGGTAC"); # reverse (R) barcodes my @reverse = ("AGCTTAGGCT", "TAGCCTAAGC", "AGCTTGCCAT", "ACGTTCAATG", "ACTGGCGGAT", "ACGTTGAATC", "ATCGGCAAGT", "ATGCCGTTAC"); # primers used for Variable Region 1 (V1) and Variable Region 3 (V3) of 16S rRNA # forward primer (V1 region) my $V1 = 'AGAGTTTGATCCTGGCTCAG'; # reverse primer (V3 region) my $V3 = 'GTATTACCGCGGCTGCTGGCA'; # locate the import-file with data my $input_file = "C:/../input.txt"; # concatenate primer to bar codes: my @processed = map { $_ . $V1 } @forwards; # construct a regex to search for my $search_for = join '|', @processed; # compile it: $search_for = qr{$search_for}; open my $FASTA_IN, '<', $input_file or die $!; open my $MATCHED_OUT, '>', 'matched.txt' or die $! open my $NOT_MATCHED_OUT, '>', 'notmatched.txt' or die $' # don't read all the data at once, # rather process it line by line while (my $line = <$FASTA_IN>) { if ($line =~ $search_for) { print $MATCHED_OUT $line; } else { print $NOT_MATCHED_OUT $line; } } close $FASTA_IN; close $MATCHED_OUT; close $NOT_MATCHED_OUT;