Skip to content

Commit 77d84d1

Browse files
Notebook with very minimal starting point
1 parent 6b35c8e commit 77d84d1

File tree

2 files changed

+107
-0
lines changed

2 files changed

+107
-0
lines changed

notebooks/cnv_matching.ipynb

+105
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 12,
6+
"metadata": {},
7+
"outputs": [],
8+
"source": [
9+
"from ga4gh.vrs.dataproxy import create_dataproxy\n",
10+
"seqrepo_rest_service_url = \"seqrepo+https://services.genomicmedlab.org/seqrepo\"\n",
11+
"seqrepo_dataproxy = create_dataproxy(uri=seqrepo_rest_service_url)\n",
12+
"\n",
13+
"import os\n",
14+
"os.environ[\"UTA_DB_URL\"] = \"postgresql://anonymous:[email protected]:5432/uta/uta_20210129b\"\n",
15+
"os.environ[\"SEQREPO_ROOT_DIR\"] = \"https://services.genomicmedlab.org/seqrepo\"\n"
16+
]
17+
},
18+
{
19+
"cell_type": "code",
20+
"execution_count": 13,
21+
"metadata": {},
22+
"outputs": [
23+
{
24+
"ename": "OSError",
25+
"evalue": "Unable to open SeqRepo directory /usr/local/share/seqrepo/latest",
26+
"output_type": "error",
27+
"traceback": [
28+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
29+
"\u001b[0;31mOSError\u001b[0m Traceback (most recent call last)",
30+
"Cell \u001b[0;32mIn[13], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mga4gh\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcat_vrs\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcore_models\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m CategoricalVariant, Constraint, CopyCountConstraint, DefiningContextConstraint, CopyChangeConstraint\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mga4gh\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mvrs\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodels\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m CopyNumberCount, CopyNumberChange, Range\n\u001b[0;32m----> 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mvariation\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmain\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m parsed_to_cn_var\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mmatch_copy_number_count\u001b[39m(copy_number_count: CopyNumberCount,\n\u001b[1;32m 6\u001b[0m categorical_variation: CategoricalVariant):\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28many\u001b[39m(member \u001b[38;5;241m==\u001b[39m copy_number_count \u001b[38;5;28;01mfor\u001b[39;00m member \u001b[38;5;129;01min\u001b[39;00m categorical_variation\u001b[38;5;241m.\u001b[39mmembers):\n",
31+
"File \u001b[0;32m/mnt/c/EpicSource/GA4GH/cat-vrs-python/venv/3.12/lib/python3.12/site-packages/variation/main.py:68\u001b[0m\n\u001b[1;32m 64\u001b[0m TO_COPY_NUMBER_VARIATION \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mTo Copy Number Variation\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 65\u001b[0m ALIGNMENT_MAPPER \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAlignment Mapper\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m---> 68\u001b[0m query_handler \u001b[38;5;241m=\u001b[39m \u001b[43mQueryHandler\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 71\u001b[0m \u001b[38;5;129m@asynccontextmanager\u001b[39m\n\u001b[1;32m 72\u001b[0m \u001b[38;5;28;01masync\u001b[39;00m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mlifespan\u001b[39m(app: FastAPI) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m AsyncGenerator: \u001b[38;5;66;03m# noqa: ARG001\u001b[39;00m\n\u001b[1;32m 73\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Configure FastAPI instance lifespan.\u001b[39;00m\n\u001b[1;32m 74\u001b[0m \n\u001b[1;32m 75\u001b[0m \u001b[38;5;124;03m :param app: FastAPI app instance\u001b[39;00m\n\u001b[1;32m 76\u001b[0m \u001b[38;5;124;03m :return: async context handler\u001b[39;00m\n\u001b[1;32m 77\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n",
32+
"File \u001b[0;32m/mnt/c/EpicSource/GA4GH/cat-vrs-python/venv/3.12/lib/python3.12/site-packages/variation/query.py:32\u001b[0m, in \u001b[0;36mQueryHandler.__init__\u001b[0;34m(self, gene_query_handler)\u001b[0m\n\u001b[1;32m 23\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\n\u001b[1;32m 24\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 25\u001b[0m gene_query_handler: GeneQueryHandler \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m 26\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 27\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Initialize QueryHandler instance.\u001b[39;00m\n\u001b[1;32m 28\u001b[0m \u001b[38;5;124;03m :param gene_query_handler: Gene normalizer query handler instance. If this is\u001b[39;00m\n\u001b[1;32m 29\u001b[0m \u001b[38;5;124;03m provided, will use a current instance. If this is not provided, will create\u001b[39;00m\n\u001b[1;32m 30\u001b[0m \u001b[38;5;124;03m a new instance.\u001b[39;00m\n\u001b[1;32m 31\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m---> 32\u001b[0m cool_seq_tool \u001b[38;5;241m=\u001b[39m \u001b[43mCoolSeqTool\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 33\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mseqrepo_access \u001b[38;5;241m=\u001b[39m cool_seq_tool\u001b[38;5;241m.\u001b[39mseqrepo_access\n\u001b[1;32m 35\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m gene_query_handler:\n",
33+
"File \u001b[0;32m/mnt/c/EpicSource/GA4GH/cat-vrs-python/venv/3.12/lib/python3.12/site-packages/cool_seq_tool/app.py:85\u001b[0m, in \u001b[0;36mCoolSeqTool.__init__\u001b[0;34m(self, transcript_file_path, lrg_refseqgene_path, mane_data_path, db_url, sr, force_local_files)\u001b[0m\n\u001b[1;32m 47\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Initialize CoolSeqTool class.\u001b[39;00m\n\u001b[1;32m 48\u001b[0m \n\u001b[1;32m 49\u001b[0m \u001b[38;5;124;03mInitialization with default resource locations is straightforward:\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 82\u001b[0m \u001b[38;5;124;03m versions of static data files -- just use most recently available, if any\u001b[39;00m\n\u001b[1;32m 83\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 84\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m sr:\n\u001b[0;32m---> 85\u001b[0m sr \u001b[38;5;241m=\u001b[39m \u001b[43mSeqRepo\u001b[49m\u001b[43m(\u001b[49m\u001b[43mroot_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mSEQREPO_ROOT_DIR\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 86\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mseqrepo_access \u001b[38;5;241m=\u001b[39m SeqRepoAccess(sr)\n\u001b[1;32m 87\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtranscript_mappings \u001b[38;5;241m=\u001b[39m TranscriptMappings(\n\u001b[1;32m 88\u001b[0m transcript_file_path\u001b[38;5;241m=\u001b[39mtranscript_file_path,\n\u001b[1;32m 89\u001b[0m lrg_refseqgene_path\u001b[38;5;241m=\u001b[39mlrg_refseqgene_path,\n\u001b[1;32m 90\u001b[0m from_local\u001b[38;5;241m=\u001b[39mforce_local_files,\n\u001b[1;32m 91\u001b[0m )\n",
34+
"File \u001b[0;32m/mnt/c/EpicSource/GA4GH/cat-vrs-python/venv/3.12/lib/python3.12/site-packages/biocommons/seqrepo/seqrepo.py:120\u001b[0m, in \u001b[0;36mSeqRepo.__init__\u001b[0;34m(self, root_dir, writeable, upcase, translate_ncbi_namespace, check_same_thread, use_sequenceproxy, fd_cache_size)\u001b[0m\n\u001b[1;32m 117\u001b[0m os\u001b[38;5;241m.\u001b[39mmakedirs(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_root_dir, exist_ok\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m 119\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mexists(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_root_dir):\n\u001b[0;32m--> 120\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mOSError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUnable to open SeqRepo directory \u001b[39m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mformat(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_root_dir))\n\u001b[1;32m 122\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msequences \u001b[38;5;241m=\u001b[39m FastaDir(\n\u001b[1;32m 123\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_seq_path,\n\u001b[1;32m 124\u001b[0m writeable\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_writeable,\n\u001b[1;32m 125\u001b[0m check_same_thread\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_check_same_thread,\n\u001b[1;32m 126\u001b[0m fd_cache_size\u001b[38;5;241m=\u001b[39mSEQREPO_FD_CACHE_MAXSIZE \u001b[38;5;28;01mif\u001b[39;00m SEQREPO_FD_CACHE_MAXSIZE \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m fd_cache_size\n\u001b[1;32m 127\u001b[0m )\n\u001b[1;32m 128\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maliases \u001b[38;5;241m=\u001b[39m SeqAliasDB(\n\u001b[1;32m 129\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_db_path,\n\u001b[1;32m 130\u001b[0m writeable\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_writeable,\n\u001b[1;32m 131\u001b[0m check_same_thread\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_check_same_thread,\n\u001b[1;32m 132\u001b[0m )\n",
35+
"\u001b[0;31mOSError\u001b[0m: Unable to open SeqRepo directory /usr/local/share/seqrepo/latest"
36+
]
37+
}
38+
],
39+
"source": [
40+
"from ga4gh.cat_vrs.core_models import CategoricalVariant, Constraint, CopyCountConstraint, DefiningContextConstraint, CopyChangeConstraint\n",
41+
"from ga4gh.vrs.models import CopyNumberCount, CopyNumberChange, Range, SequenceLocation\n",
42+
"\n",
43+
"def match_copy_number_count(copy_number_count: CopyNumberCount,\n",
44+
" categorical_variation: CategoricalVariant):\n",
45+
" if any(member == copy_number_count for member in categorical_variation.members):\n",
46+
" return True\n",
47+
"\n",
48+
" count_constraints = get_constraints_of_type(categorical_variation.constraints, CopyCountConstraint)\n",
49+
" if not all(check_overlap(copy_number_count.copies, constraint.copies) for constraint in count_constraints):\n",
50+
" return False\n",
51+
"\n",
52+
" location_constraints = get_constraints_of_type(categorical_variation.constraints, DefiningContextConstraint)\n",
53+
" for constraint in location_constraints:\n",
54+
" if isinstance(constraint.definingContext, SequenceLocation):\n",
55+
" if not constraint.definingContext.sequenceReference.id == \\\n",
56+
" copy_number_count.location.location.sequenceReference.id:\n",
57+
" return False\n",
58+
" \n",
59+
" # TODO: location coordinates\n",
60+
"\n",
61+
" else:\n",
62+
" pass\n",
63+
"\n",
64+
"def get_constraints_of_type(constraints_list: list[Constraint], constraint_type:type):\n",
65+
" return [c for c in constraints_list if isinstance(c, constraint_type)]\n",
66+
"\n",
67+
"def check_overlap(val1: int|range, val2: int|range):\n",
68+
" if isinstance(val1, Range):\n",
69+
" if isinstance(val2, Range):\n",
70+
" return val1[0] < val2[0] < val1[1] \\\n",
71+
" or val1[0] < val2[1] < val1[1]\n",
72+
" elif isinstance(val2, int):\n",
73+
" return val1[0] < val2 < val1[1]\n",
74+
" \n",
75+
" elif isinstance(val1, int):\n",
76+
" if isinstance(val2, Range):\n",
77+
" return val2[0] < val1 < val2[1]\n",
78+
" \n",
79+
" elif isinstance(val2, int):\n",
80+
" return val2 == val2"
81+
]
82+
}
83+
],
84+
"metadata": {
85+
"kernelspec": {
86+
"display_name": "3.12",
87+
"language": "python",
88+
"name": "python3"
89+
},
90+
"language_info": {
91+
"codemirror_mode": {
92+
"name": "ipython",
93+
"version": 3
94+
},
95+
"file_extension": ".py",
96+
"mimetype": "text/x-python",
97+
"name": "python",
98+
"nbconvert_exporter": "python",
99+
"pygments_lexer": "ipython3",
100+
"version": "3.12.1"
101+
}
102+
},
103+
"nbformat": 4,
104+
"nbformat_minor": 2
105+
}

requirements.txt

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
ga4gh.vrs[extras]
2+
variation-normalizer

0 commit comments

Comments
 (0)