#!/usr/bin/perl use warnings; use strict; use v5.10; # Create program to read in a series of VCF files, outputting a two-column # file consisting of chromosome and coordinate for each site seen in one or # more files ('master-site-list'). die "Give the names of each VCF" if @ARGV < 1; my $number_of_files = @ARGV; my $infile; my $current_file_number = 0; my %hash_of_chroms; open my $outfile, '>', "master-site-list" or die "Cannot open output file: $!"; until ( $current_file_number >= $number_of_files ) { open $infile, '<', $ARGV[$current_file_number] or die "Cannot open VCF file: $!"; while (<$infile>) { chomp; next if /^#/; my ( $chr, $coord ) = split(/\s+/); push @{ $hash_of_chroms{$chr} }, $coord unless defined $hash_of_chroms{$chr}[$coord]; # don't add duplicates } $current_file_number++; close $infile; } my %sorted_hash_of_chroms; foreach my $chrom ( keys %hash_of_chroms ) { push @{ $sorted_hash_of_chroms{$chrom} }, sort { $hash_of_chroms{$a}->[0] <=> $hash_of_chroms{$b}->[0] } keys %hash_of_chroms; } foreach my $chrom ( keys %hash_of_chroms ) { say $outfile "$chrom\t@{ $hash_of_chroms{$chrom} }"; } close $outfile;