forked from joyceyiyiwang/Portability_Questions
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path04a_clumping.sh
69 lines (59 loc) · 2.41 KB
/
04a_clumping.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#!/bin/bash
#SBATCH -J CT
#SBATCH -o CT.o%j
#SBATCH -e CT.o%j
#SBATCH -p normal
#SBATCH -N 3
#SBATCH -n 10
#SBATCH -t 48:00:00
#SBATCH -A OTH21148
#SBATCH [email protected]
#SBATCH --mail-type=begin
#SBATCH --mail-type=end
set -e
source /work2/06568/joyce_w/stampede2/software/anaconda3/etc/profile.d/conda.sh
conda init bash
conda activate pgs
# Directory
scratch='/scratch/06568/joyce_w/pgs_portability_questions/data'
plink='/work2/06568/joyce_w/stampede2/software/plink/plink/plink'
thresholds=(5e-8 1e-5 1e-4 1e-3 1e-2)
# Clump GWAS results
for phenotype in BMI Lymphocyte Height Eosinophil MCH MCV Monocyte Platelet RBC WBC LDL Weight Triglycerides Cystatin_C Body_Fat_Perc
do
for chromosome in $(seq 1 22);
do
# Convert the GWAS output file from the Plink 2 to Plink 1
python 04b_convert_plink2_glm_to_plink1.py \
data/gwas_results/${phenotype}.chr${chromosome}.${phenotype}.glm.linear \
--output data/gwas_results/${phenotype}.chr${chromosome}.${phenotype}.glm.assoc
# Clump GWAS results using GWAS set
$plink \
--memory 70000 \
--bfile ${scratch}/ukb_filtered/chr${chromosome}_filtered \
--keep data/ukb_populations/wb_gwas_id.txt \
--clump data/gwas_results/${phenotype}.chr${chromosome}.${phenotype}.glm.assoc \
--clump-p1 0.01 \
--clump-r2 0.2 \
--clump-kb 250 \
--out data/gwas_results/${phenotype}.chr${chromosome}.${phenotype}
done
# Combine clumped SNPs across chromosomes
head -n 1 data/gwas_results/${phenotype}.chr1.${phenotype}.clumped > data/gwas_results/${phenotype}_combined.clumped
tail -n +2 -q data/gwas_results/${phenotype}.chr*.${phenotype}.clumped >> data/gwas_results/${phenotype}_combined.clumped
# Create files of SNPs meeting several p-value thresholds. Files numbered 0-4.
for threshold in 0 1 2 3 4
do
# Further filter clumped SNPs using p-value thresholds (removes multiallelic SNPs)
python 04c_filter_snps_for_pgs.py \
data/gwas_results/${phenotype}_combined.clumped \
--threshold ${thresholds[$threshold]} \
--output data/pgs/${phenotype}_threshold_${threshold}.txt
done
# Create combined GWAS result files for each phenotype
python 04d_combine_glm_threshold_4.py \
data/gwas_results/${phenotype}.chr*.${phenotype}.glm.linear \
--keep data/pgs/${phenotype}_threshold_4.txt \
--output data/gwas_results/${phenotype}_combined.glm.linear
done
conda deactivate