-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsplitGeno.pl
executable file
·57 lines (47 loc) · 1.3 KB
/
splitGeno.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#!/usr/bin/perl
## split genotype file by chromosome
use strict;
use warnings;
use File::Basename;
use FileHandle;
use Getopt::Long;
sub usage{
print STDERR basename($0) . " [options] <index> <genotypes>\n";
print STDERR " index: Tabix indexed file of rsIDs.\n";
print STDERR " genotypes: Comma separated file with genotypes, samples in columns and sites in rows.\n";
print STDERR " Options:\n";
print STDERR " --out| -o <prefix>: Prefix for output files.\n";
exit;
}
## generate a new file handle
sub file_handle{
my ($prefix, $chrom) = @_;
my $fh = FileHandle->new();
open $fh, ">$prefix.$chrom.geno" or die "Cannot write to $prefix.$chrom.geno: $!";
return $fh;
}
my ($out, $idx, $geno, $header, $id, $prefix, @chrom, $line, %file, $record);
$out = '';
my $status = GetOptions("out|o=s" => \$out);
usage() if not $status or @ARGV != 2;
($idx, $geno) = @ARGV;
open IN, $geno or die "Cannot read $geno: $!";
$header = <IN>;
while($record = <IN>){
$record =~ /^(\D+)(\d+),/;
$prefix = $1;
$id = $2;
$line = `tabix $idx $prefix:$id-$id`;
chomp $line;
if($line){
@chrom = split /\t/, $line;
@chrom = split / /, $chrom[2];
for my $chr (@chrom){
if(not defined $file{$chr}){
$file{$chr} = file_handle($out, $chr);
$file{$chr}->print($header);
}
$file{$chr}->print($record);
}
}
}