-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy path5_ProteinProperties.py
76 lines (71 loc) · 3.26 KB
/
5_ProteinProperties.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# -*- coding: utf-8 -*-
"""
Investigation of the properties of proteins with different cell cycle regulation using PTMs and stability measurements
- PTMs are observed using previously published bulk and phospho-enriched mass spectrometry (MS) proteomic data
- Differences in PTM regulation is inferred using PTM occupancy for each PTM site
- Protein stability was measured by MS thermal profiling in an external study
- Differences in thermal shifts indicate different stabilities and propensity for unfolding
@author: Anthony J. Cesnik, [email protected]
"""
from SingleCellProteogenomics import (FucciCellCycle, Loaders,
ProteinPropertyAnalysis,
RNADataPreparation, utils)
import matplotlib.pyplot as plt
import numpy as np
# Make PDF text readable
plt.rcParams["pdf.fonttype"] = 42
plt.rcParams["ps.fonttype"] = 42
plt.rcParams["savefig.dpi"] = 300
fucci = FucciCellCycle.FucciCellCycle()
u_rna_plates = ["355","356","357"]
#%% Import the genes names we're analyzing
# Read in RNA-Seq data again and the CCD gene lists
valuetype, use_spikeins, biotype_to_use = "Tpms", False, "protein_coding"
adata, phases = RNADataPreparation.read_counts_and_phases(
valuetype, use_spikeins, biotype_to_use, u_rna_plates
)
adata, phasesfilt = RNADataPreparation.qc_filtering(
adata, do_log_normalize=True, do_remove_blob=True
)
adata = RNADataPreparation.zero_center_fucci(adata)
import_dict = Loaders.load_ptm_and_stability(adata)
wp_ensg, ccd_comp, nonccd_comp, ccdtranscript, wp_max_pol = (
import_dict["wp_ensg"],
import_dict["ccd_comp"],
import_dict["nonccd_comp"],
import_dict["ccdtranscript"],
import_dict["wp_max_pol"],
)
ensg_results, name_results = utils.save_gene_names_by_category(
adata, wp_ensg, ccd_comp, nonccd_comp, ccdtranscript
)
ensg_ccdtranscript, ensg_nonccdtranscript, ensg_ccdprotein, ensg_nonccdprotein, ensg_ccdprotein_transcript_regulated, ensg_ccdprotein_nontranscript_regulated, genes_analyzed, ccd_regev_filtered, ccd_filtered = ensg_results
names_ccdtranscript, names_nonccdtranscript, names_ccdprotein, names_nonccdprotein, names_ccdprotein_transcript_regulated, names_ccdprotein_nontranscript_regulated, names_genes_analyzed, names_ccd_regev_filtered, names_ccd_filtered = name_results
bioccd = np.genfromtxt(
"input/ProteinData/BiologicallyDefinedCCD.txt", dtype="str"
) # from mitotic structures
names_bioccd = utils.ccd_gene_names(bioccd, utils.getGeneNameDict())
#%% Analyze properties of the different groups relative to melting points
proteinProperties = ProteinPropertyAnalysis.ProteinProperties(
wp_ensg,
ensg_ccdprotein,
ensg_ccdprotein_transcript_regulated,
ensg_ccdprotein_nontranscript_regulated,
bioccd,
ensg_nonccdprotein,
ensg_ccdtranscript,
names_bioccd,
names_ccdprotein,
names_ccdprotein_transcript_regulated,
names_ccdprotein_nontranscript_regulated,
names_nonccdprotein,
names_ccdtranscript,
)
proteinProperties.analyze_melting_points()
proteinProperties.analyze_disorder()
proteinProperties.analyze_length()
proteinProperties.statistical_properties_table()
proteinProperties.generate_properties_table()
proteinProperties.generate_statistical_boxplots()
proteinProperties.tm_scatters()
proteinProperties.kinase_families()