-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsubsetGeno.pl
executable file
·49 lines (40 loc) · 1.29 KB
/
subsetGeno.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#!/usr/bin/perl
## remove samples from genotype file to match the samples present in expression file
use warnings;
use strict;
use File::Basename;
use Getopt::Long;
sub usage(){
print STDERR basename($0) . " [options] <genotype file> <expression file>\n";
print STDERR " genotype file: File with genotype calls (coded as 0/1/2).\n";
print STDERR " expression file: File with gene expression values.\n";
print STDERR " Options:\n";
print STDERR " --delim | -d <delimiter>: Delimiter used to separate records in input files. [\\t]";
exit;
}
my ($geno, $express, %present, @index, @sample, @entry, $sep);
my $status = GetOptions("delim|d=s" => \$sep);
usage() if @ARGV != 2;
$geno = shift;
$express = shift;
## get sample names present in expression file
open EXPRESS, $express or die "Cannot read $express: $!";
my $line = <EXPRESS>;
chomp $line;
close EXPRESS;
@sample = split /$sep/, $line;
@present{@sample} = (1) x @sample;
open GENO, $geno or die "Cannot read $geno: $!";
$line = <GENO>;
chomp $line;
@sample = split /$sep/, $line;
@index = grep {defined $present{$sample[$_]}} (0..$#sample);
## print header
print join(',', @sample[@index]) . "\n";
@index = map {$_+1} @index;
while(<GENO>){
@entry = split /$sep/;
print "$entry[0]$sep";
print join($sep, @entry[@index]) . "\n";
}
close GENO;