forked from helengracehuang/MsCaviar
-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathMsCaviar.cpp
More file actions
163 lines (150 loc) · 5.61 KB
/
MsCaviar.cpp
File metadata and controls
163 lines (150 loc) · 5.61 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
#include <iostream>
#include <fstream>
#include <string>
#include <cmath>
#include <ctype.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <iostream>
#include <sstream>
#include <string>
#include <vector>
#include <armadillo>
#include <unistd.h>
#include <vector>
#include "MsUtil.h"
#include "MsPostCal.h"
#include "MsCaviarModel.h"
using namespace std;
/*
Reads the content of the file, return a vector of paths
@param fileName the name of file that contains all the paths to z or ld files
@return vector of paths
*/
vector<string> read_dir(string fileName){
vector<string> dirs;
string data;
ifstream fin(fileName.c_str(), std::ifstream::in);
if (!fin) {
cout << "Error: unable to open " << fileName << endl;
exit(1); // terminate with error
}
while(fin.good()){
getline(fin,data);
if(data != "") {
dirs.push_back(data); }}
fin.close();
return dirs;
}
vector<int> read_sigma(string sample_size) {
vector<int> sizes;
string current_size = "";
for (int i=0; i < sample_size.size(); i++) {
if (sample_size[i] == ',') {
sizes.push_back(stod(current_size)); // push back previous sample size
current_size = ""; // reset
}
else if (isdigit(sample_size[i])) {
current_size += sample_size[i];
}
else {
cout << "Error: sample size is not in the right format" << endl;
exit(1);
}
}
if (current_size != "") {
sizes.push_back(stod(current_size)); // push back last sample size
}
return sizes;
}
int main( int argc, char *argv[] ){
int totalCausalSNP = 3;
double gamma = 0.01;
double rho = 0.95;
bool histFlag = false;
int oc = 0;
double tau_sqr = 0.52;
double sigma_g_squared = 5.2;
double cutoff_threshold = 0;
string ldFile = "";
string zFile = "";
string outputFileName = "";
string sample_s = "";
while ((oc = getopt(argc, argv, "vhl:o:z:r:c:g:f:t:s:n:a:")) != -1) {
switch (oc) {
case 'v':
cout << "Version 0.1\n" << endl;
case 'h':
cout << "Options: " << endl;
cout << "-h, --help show this help message and exit " << endl;
cout << "-o OUTFILE, --out=OUTFILE specify the output file" << endl;
cout << "-l LDFile, --ld_file=LDFILE REQUIRED: the ld input file that contains paths to ld files" << endl;
cout << "-z ZFile, --z_file=ZFILE REQUIRED: the z-score and rsID file that contains paths to z files" << endl;
cout << "-r RHO, --rho-prob=RHO set $rho$ probability (default 0.95)" << endl;
cout << "-g GAMMA, --gamma set $gamma$ the prior of a SNP being causal (default 0.01)" << endl;
cout << "-c causal set the maximum number of causal SNPs (default 3)" << endl;
cout << "-f 1 to out the probaility of different number of causal SNP" << endl;
cout << "-t TAU_SQR, --tau_sqr=TAU_SQR set the heterogeneity (t^2) across studies, default is 0.52" << endl;
cout << "-s SIGMA_G_SQR, --sigma_g_squared=SIGMA_G_SQR set the NCP variance for the smallest study, default is 5.2" << endl;
cout << "-n SAMPLE_SIZE, --sample_size REQUIRED: set the sample sizes (integer) of individual studies, format: enter 50,100 for study 1 with 50 sample size and 2 with 100" << endl;
cout << "-a THRESHOLD set the threshold for cut-off (default 0) when we select the final causal set; if the SNP has a posterior below the threshold, we do not include it in the set" << endl;
exit(0);
// required options: LD and Z-score filenames, and output file name
case 'l':
ldFile = string(optarg);
break;
case 'o':
outputFileName = string(optarg);
break;
case 'z':
zFile = string(optarg);
break;
case 'n':
sample_s = string(optarg);
break;
// optional arguments: parameters for fine mapping
case 'r':
rho = atof(optarg);
break;
case 'c':
totalCausalSNP = atoi(optarg);
break;
case 'g':
gamma = atof(optarg);
break;
case 'f':
histFlag = true;
break;
case 't':
tau_sqr = atof(optarg);
break;
case 's':
sigma_g_squared = atof(optarg);
break;
case ':':
case '?':
case 'a':
cutoff_threshold = atof(optarg);
break;
default:
break;
}
}
if (ldFile == "" or zFile == "" or outputFileName == "" or sample_s == "") {
cout << "Error: -l, -z, -o, and -n are required" << endl;
exit(1);
}
vector<string> ldDir = read_dir(ldFile);
vector<string> zDir = read_dir(zFile);
vector<int> sample_sizes = read_sigma(sample_s);
if (ldDir.size() != zDir.size() || ldDir.size() != sample_sizes.size()) {
cout << "Error: LD files, Z files, and sample sizes do not match in number" << endl;
exit(1);
}
MCaviarModel Mcaviar(ldDir, zDir, sample_sizes, outputFileName, totalCausalSNP, rho, histFlag, gamma, tau_sqr, sigma_g_squared, cutoff_threshold);
Mcaviar.run();
Mcaviar.finishUp();
return 0;
}