Skip to content

Commit

Permalink
added some changes
Browse files Browse the repository at this point in the history
  • Loading branch information
guhanrv committed Feb 13, 2020
1 parent f6be4c3 commit 0ee2e56
Show file tree
Hide file tree
Showing 5 changed files with 87 additions and 51 deletions.
4 changes: 2 additions & 2 deletions analyses/GWAS/fet.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,10 @@
)
)
pop_ca_co_mt = pop_ca_co_mt.annotate_rows(
se=1.0 / (pop_ca_co_mt.caac + 0.5)
se=hl.sqrt(1.0 / (pop_ca_co_mt.caac + 0.5)
+ 1.0 / (pop_ca_co_mt.canac + 0.5)
+ 1.0 / (pop_ca_co_mt.coac + 0.5)
+ 1.0 / (pop_ca_co_mt.conac + 0.5)
+ 1.0 / (pop_ca_co_mt.conac + 0.5))
)
pop_ca_co_mt = pop_ca_co_mt.annotate_rows(fet_p_value=pop_ca_co_mt.fet.p_value)

Expand Down
89 changes: 50 additions & 39 deletions analyses/GWAS/liftover_sumstats.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,42 +6,53 @@
rg37.add_liftover("gs://hail-common/references/grch37_to_grch38.over.chain.gz", rg38)

# Loop through summary statistics and assign new loci
for disease in ["cd", "ibd", "uc"]:
for pop in ["AFR", "HISZ12", "HISZ3"]:
tbl = hl.import_table(
"gs://ibd-exomes/v36meta/curated."
+ pop
+ "_"
+ disease
+ "_FET_results.tsv.gz",
force=True,
)
print(tbl.count())
tbl = tbl.annotate(variant=hl.parse_variant(tbl.V, reference_genome="GRCh37"))
tbl = tbl.annotate(locus=tbl.variant.locus, alleles=tbl.variant.alleles)
tbl = tbl.annotate(new_locus=hl.liftover(tbl.locus, "GRCh38"))
tbl = tbl.filter(hl.is_defined(tbl.new_locus))
tbl = tbl.annotate(
new_V=hl.variant_str(tbl.new_locus, tbl.alleles).replace("chr", "")
)
tbl.select(
V=tbl.new_V,
P=tbl.P,
OR=tbl.OR,
SE=tbl.SE,
CaAC=tbl.CaAC,
CaNAC=tbl.CaNAC,
CoAC=tbl.CoAC,
CoNAC=tbl.CoNAC,
maf=tbl.maf,
call_rate=tbl.call_rate,
mean_dp=tbl.mean_dp,
case_phwe=tbl.case_phwe,
control_phwe=tbl.control_phwe,
).export(
"gs://ibd-exomes/v36meta/curated."
+ pop
+ "_"
+ disease
+ "_FET_results_liftover.tsv.gz"
)
# for disease in ["cd", "ibd", "uc"]:
# for pop in ["AFR", "HISZ12", "HISZ3"]:
# tbl = hl.import_table(
# "gs://ibd-exomes/v36meta/curated."
# + pop
# + "_"
# + disease
# + "_FET_results.tsv.gz",
# force=True,
# )
# print(tbl.count())
# tbl = tbl.annotate(variant=hl.parse_variant(tbl.V, reference_genome="GRCh37"))
# tbl = tbl.annotate(locus=tbl.variant.locus, alleles=tbl.variant.alleles)
# tbl = tbl.annotate(new_locus=hl.liftover(tbl.locus, "GRCh38"))
# tbl = tbl.filter(hl.is_defined(tbl.new_locus))
# tbl = tbl.annotate(
# new_V=hl.variant_str(tbl.new_locus, tbl.alleles).replace("chr", "")
# )
# tbl.select(
# V=tbl.new_V,
# P=tbl.P,
# OR=tbl.OR,
# SE=tbl.SE,
# CaAC=tbl.CaAC,
# CaNAC=tbl.CaNAC,
# CoAC=tbl.CoAC,
# CoNAC=tbl.CoNAC,
# maf=tbl.maf,
# call_rate=tbl.call_rate,
# mean_dp=tbl.mean_dp,
# case_phwe=tbl.case_phwe,
# control_phwe=tbl.control_phwe,
# ).export(
# "gs://ibd-exomes/v36meta/curated."
# + pop
# + "_"
# + disease
# + "_FET_results_liftover.tsv.gz"
# )
tbl = hl.import_table("gs://ukbb-exome/ld_indep_array.tsv")
tbl = tbl.annotate(variant=hl.parse_variant(tbl.V, reference_genome="GRCh37"))
tbl = tbl.annotate(locus=tbl.variant.locus, alleles=tbl.variant.alleles)
tbl = tbl.annotate(new_locus=hl.liftover(tbl.locus, "GRCh38"))
tbl = tbl.filter(hl.is_defined(tbl.new_locus))
tbl = tbl.annotate(
new_V=hl.variant_str(tbl.new_locus, tbl.alleles).replace("chr", "")
)
tbl.select(
V=tbl.new_V,
).export("gs://ukbb-exome/ld_indep_exome.tsv")
28 changes: 28 additions & 0 deletions analyses/GWAS/test_adam.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import hail as hl




#ADAM17: 2-9661450-A-G
#RELA: 11:65658293:C:T
#SDF2L1: 22-21998280-G-A

chrs = ["chr2", "chr11", "chr22"]
positions = [9521321, 65658293, 21643991]
refs = ["A", "C", "G"]
alts = ["G", "T", "A"]


for pop in ["AJ", "FIN", "LIT", "NFE"]:
for disease in ["cd", "ibd", "uc"]:
print("Reading in " + pop + " " + disease + " MT...")
mt = hl.read_matrix_table("gs://ibd-exomes/v36meta/" + pop + "_" + disease + ".mt/")

for chrom, position, ref, alt in zip(chrs, positions, refs, alts):
print(chrom, position, ref, alt)
var_mt = mt.filter_rows(
(mt.locus == hl.locus(chrom, position, reference_genome="GRCh38"))
& (mt.alleles == hl.array([ref, alt]))
)

print(var_mt.count())
9 changes: 6 additions & 3 deletions analyses/meta_analysis/cmh.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,9 +146,12 @@ def write_file(vardict, varids, pops, disease, test):


def main():
pops = ["curated.AFR", "AJ", "curated.HISZ12", "curated.HISZ3", "FIN", "LIT", "NFE"]
diseases = ["cd", "ibd", "uc"]
tests = ["FET", "logreg"]
#pops = ["curated.AFR", "AJ", "curated.HISZ12", "curated.HISZ3", "FIN", "LIT", "NFE"]
pops = ["AJ", "FIN", "LIT", "NFE"]
#diseases = ["cd", "ibd", "uc"]
diseases = ["ibd", "uc"]
#tests = ["FET", "logreg"]
tests = ["FET"]
for disease in diseases:
for test in tests:
print("Storing data for " + test + " meta-analysis of " + disease + "...")
Expand Down
8 changes: 1 addition & 7 deletions qc/complete_qc.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,18 +95,12 @@
print("Variants left:")
print(variants.count())

# Annotate back in
# Annotate back in, Autosomal Filtration
mt = mt.annotate_rows(vep=variants[mt.row_key].vep)
mt = mt.filter_rows(hl.is_defined(mt.vep))

variants = None

print("Checkpointing...")
# mt = mt.checkpoint('gs://ibd-exomes/v36meta/v36+ccdg_varnarrow.mt', overwrite=True)
mt = mt.checkpoint("gs://ibd-exomes/v36meta/v36_varnarrow.mt", overwrite=True)

# Autosomal Filtration###########################################

print("Filtering out X, Y, MT...")
mt = mt.filter_rows(
(mt.locus.contig != "X") & (mt.locus.contig != "Y") & (mt.locus.contig != "MT")
Expand Down

0 comments on commit 0ee2e56

Please sign in to comment.