|
| 1 | +{ |
| 2 | + "cells": [ |
| 3 | + { |
| 4 | + "cell_type": "code", |
| 5 | + "execution_count": 12, |
| 6 | + "metadata": {}, |
| 7 | + "outputs": [], |
| 8 | + "source": [ |
| 9 | + "from ga4gh.vrs.dataproxy import create_dataproxy\n", |
| 10 | + "seqrepo_rest_service_url = \"seqrepo+https://services.genomicmedlab.org/seqrepo\"\n", |
| 11 | + "seqrepo_dataproxy = create_dataproxy(uri=seqrepo_rest_service_url)\n", |
| 12 | + "\n", |
| 13 | + "import os\n", |
| 14 | + "os.environ[\"UTA_DB_URL\"] = \"postgresql://anonymous:[email protected]:5432/uta/uta_20210129b\"\n", |
| 15 | + "os.environ[\"SEQREPO_ROOT_DIR\"] = \"https://services.genomicmedlab.org/seqrepo\"\n" |
| 16 | + ] |
| 17 | + }, |
| 18 | + { |
| 19 | + "cell_type": "code", |
| 20 | + "execution_count": 13, |
| 21 | + "metadata": {}, |
| 22 | + "outputs": [ |
| 23 | + { |
| 24 | + "ename": "OSError", |
| 25 | + "evalue": "Unable to open SeqRepo directory /usr/local/share/seqrepo/latest", |
| 26 | + "output_type": "error", |
| 27 | + "traceback": [ |
| 28 | + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", |
| 29 | + "\u001b[0;31mOSError\u001b[0m Traceback (most recent call last)", |
| 30 | + "Cell \u001b[0;32mIn[13], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mga4gh\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcat_vrs\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcore_models\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m CategoricalVariant, Constraint, CopyCountConstraint, DefiningContextConstraint, CopyChangeConstraint\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mga4gh\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mvrs\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodels\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m CopyNumberCount, CopyNumberChange, Range\n\u001b[0;32m----> 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mvariation\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmain\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m parsed_to_cn_var\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mmatch_copy_number_count\u001b[39m(copy_number_count: CopyNumberCount,\n\u001b[1;32m 6\u001b[0m categorical_variation: CategoricalVariant):\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28many\u001b[39m(member \u001b[38;5;241m==\u001b[39m copy_number_count \u001b[38;5;28;01mfor\u001b[39;00m member \u001b[38;5;129;01min\u001b[39;00m categorical_variation\u001b[38;5;241m.\u001b[39mmembers):\n", |
| 31 | + "File \u001b[0;32m/mnt/c/EpicSource/GA4GH/cat-vrs-python/venv/3.12/lib/python3.12/site-packages/variation/main.py:68\u001b[0m\n\u001b[1;32m 64\u001b[0m TO_COPY_NUMBER_VARIATION \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mTo Copy Number Variation\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 65\u001b[0m ALIGNMENT_MAPPER \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAlignment Mapper\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m---> 68\u001b[0m query_handler \u001b[38;5;241m=\u001b[39m \u001b[43mQueryHandler\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 71\u001b[0m \u001b[38;5;129m@asynccontextmanager\u001b[39m\n\u001b[1;32m 72\u001b[0m \u001b[38;5;28;01masync\u001b[39;00m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mlifespan\u001b[39m(app: FastAPI) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m AsyncGenerator: \u001b[38;5;66;03m# noqa: ARG001\u001b[39;00m\n\u001b[1;32m 73\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Configure FastAPI instance lifespan.\u001b[39;00m\n\u001b[1;32m 74\u001b[0m \n\u001b[1;32m 75\u001b[0m \u001b[38;5;124;03m :param app: FastAPI app instance\u001b[39;00m\n\u001b[1;32m 76\u001b[0m \u001b[38;5;124;03m :return: async context handler\u001b[39;00m\n\u001b[1;32m 77\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n", |
| 32 | + "File \u001b[0;32m/mnt/c/EpicSource/GA4GH/cat-vrs-python/venv/3.12/lib/python3.12/site-packages/variation/query.py:32\u001b[0m, in \u001b[0;36mQueryHandler.__init__\u001b[0;34m(self, gene_query_handler)\u001b[0m\n\u001b[1;32m 23\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\n\u001b[1;32m 24\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 25\u001b[0m gene_query_handler: GeneQueryHandler \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m 26\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 27\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Initialize QueryHandler instance.\u001b[39;00m\n\u001b[1;32m 28\u001b[0m \u001b[38;5;124;03m :param gene_query_handler: Gene normalizer query handler instance. If this is\u001b[39;00m\n\u001b[1;32m 29\u001b[0m \u001b[38;5;124;03m provided, will use a current instance. If this is not provided, will create\u001b[39;00m\n\u001b[1;32m 30\u001b[0m \u001b[38;5;124;03m a new instance.\u001b[39;00m\n\u001b[1;32m 31\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m---> 32\u001b[0m cool_seq_tool \u001b[38;5;241m=\u001b[39m \u001b[43mCoolSeqTool\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 33\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mseqrepo_access \u001b[38;5;241m=\u001b[39m cool_seq_tool\u001b[38;5;241m.\u001b[39mseqrepo_access\n\u001b[1;32m 35\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m gene_query_handler:\n", |
| 33 | + "File \u001b[0;32m/mnt/c/EpicSource/GA4GH/cat-vrs-python/venv/3.12/lib/python3.12/site-packages/cool_seq_tool/app.py:85\u001b[0m, in \u001b[0;36mCoolSeqTool.__init__\u001b[0;34m(self, transcript_file_path, lrg_refseqgene_path, mane_data_path, db_url, sr, force_local_files)\u001b[0m\n\u001b[1;32m 47\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Initialize CoolSeqTool class.\u001b[39;00m\n\u001b[1;32m 48\u001b[0m \n\u001b[1;32m 49\u001b[0m \u001b[38;5;124;03mInitialization with default resource locations is straightforward:\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 82\u001b[0m \u001b[38;5;124;03m versions of static data files -- just use most recently available, if any\u001b[39;00m\n\u001b[1;32m 83\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 84\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m sr:\n\u001b[0;32m---> 85\u001b[0m sr \u001b[38;5;241m=\u001b[39m \u001b[43mSeqRepo\u001b[49m\u001b[43m(\u001b[49m\u001b[43mroot_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mSEQREPO_ROOT_DIR\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 86\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mseqrepo_access \u001b[38;5;241m=\u001b[39m SeqRepoAccess(sr)\n\u001b[1;32m 87\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtranscript_mappings \u001b[38;5;241m=\u001b[39m TranscriptMappings(\n\u001b[1;32m 88\u001b[0m transcript_file_path\u001b[38;5;241m=\u001b[39mtranscript_file_path,\n\u001b[1;32m 89\u001b[0m lrg_refseqgene_path\u001b[38;5;241m=\u001b[39mlrg_refseqgene_path,\n\u001b[1;32m 90\u001b[0m from_local\u001b[38;5;241m=\u001b[39mforce_local_files,\n\u001b[1;32m 91\u001b[0m )\n", |
| 34 | + "File \u001b[0;32m/mnt/c/EpicSource/GA4GH/cat-vrs-python/venv/3.12/lib/python3.12/site-packages/biocommons/seqrepo/seqrepo.py:120\u001b[0m, in \u001b[0;36mSeqRepo.__init__\u001b[0;34m(self, root_dir, writeable, upcase, translate_ncbi_namespace, check_same_thread, use_sequenceproxy, fd_cache_size)\u001b[0m\n\u001b[1;32m 117\u001b[0m os\u001b[38;5;241m.\u001b[39mmakedirs(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_root_dir, exist_ok\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m 119\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mexists(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_root_dir):\n\u001b[0;32m--> 120\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mOSError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUnable to open SeqRepo directory \u001b[39m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mformat(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_root_dir))\n\u001b[1;32m 122\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msequences \u001b[38;5;241m=\u001b[39m FastaDir(\n\u001b[1;32m 123\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_seq_path,\n\u001b[1;32m 124\u001b[0m writeable\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_writeable,\n\u001b[1;32m 125\u001b[0m check_same_thread\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_check_same_thread,\n\u001b[1;32m 126\u001b[0m fd_cache_size\u001b[38;5;241m=\u001b[39mSEQREPO_FD_CACHE_MAXSIZE \u001b[38;5;28;01mif\u001b[39;00m SEQREPO_FD_CACHE_MAXSIZE \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m fd_cache_size\n\u001b[1;32m 127\u001b[0m )\n\u001b[1;32m 128\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maliases \u001b[38;5;241m=\u001b[39m SeqAliasDB(\n\u001b[1;32m 129\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_db_path,\n\u001b[1;32m 130\u001b[0m writeable\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_writeable,\n\u001b[1;32m 131\u001b[0m check_same_thread\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_check_same_thread,\n\u001b[1;32m 132\u001b[0m )\n", |
| 35 | + "\u001b[0;31mOSError\u001b[0m: Unable to open SeqRepo directory /usr/local/share/seqrepo/latest" |
| 36 | + ] |
| 37 | + } |
| 38 | + ], |
| 39 | + "source": [ |
| 40 | + "from ga4gh.cat_vrs.core_models import CategoricalVariant, Constraint, CopyCountConstraint, DefiningContextConstraint, CopyChangeConstraint\n", |
| 41 | + "from ga4gh.vrs.models import CopyNumberCount, CopyNumberChange, Range, SequenceLocation\n", |
| 42 | + "\n", |
| 43 | + "def match_copy_number_count(copy_number_count: CopyNumberCount,\n", |
| 44 | + " categorical_variation: CategoricalVariant):\n", |
| 45 | + " if any(member == copy_number_count for member in categorical_variation.members):\n", |
| 46 | + " return True\n", |
| 47 | + "\n", |
| 48 | + " count_constraints = get_constraints_of_type(categorical_variation.constraints, CopyCountConstraint)\n", |
| 49 | + " if not all(check_overlap(copy_number_count.copies, constraint.copies) for constraint in count_constraints):\n", |
| 50 | + " return False\n", |
| 51 | + "\n", |
| 52 | + " location_constraints = get_constraints_of_type(categorical_variation.constraints, DefiningContextConstraint)\n", |
| 53 | + " for constraint in location_constraints:\n", |
| 54 | + " if isinstance(constraint.definingContext, SequenceLocation):\n", |
| 55 | + " if not constraint.definingContext.sequenceReference.id == \\\n", |
| 56 | + " copy_number_count.location.location.sequenceReference.id:\n", |
| 57 | + " return False\n", |
| 58 | + " \n", |
| 59 | + " # TODO: location coordinates\n", |
| 60 | + "\n", |
| 61 | + " else:\n", |
| 62 | + " pass\n", |
| 63 | + "\n", |
| 64 | + "def get_constraints_of_type(constraints_list: list[Constraint], constraint_type:type):\n", |
| 65 | + " return [c for c in constraints_list if isinstance(c, constraint_type)]\n", |
| 66 | + "\n", |
| 67 | + "def check_overlap(val1: int|range, val2: int|range):\n", |
| 68 | + " if isinstance(val1, Range):\n", |
| 69 | + " if isinstance(val2, Range):\n", |
| 70 | + " return val1[0] < val2[0] < val1[1] \\\n", |
| 71 | + " or val1[0] < val2[1] < val1[1]\n", |
| 72 | + " elif isinstance(val2, int):\n", |
| 73 | + " return val1[0] < val2 < val1[1]\n", |
| 74 | + " \n", |
| 75 | + " elif isinstance(val1, int):\n", |
| 76 | + " if isinstance(val2, Range):\n", |
| 77 | + " return val2[0] < val1 < val2[1]\n", |
| 78 | + " \n", |
| 79 | + " elif isinstance(val2, int):\n", |
| 80 | + " return val2 == val2" |
| 81 | + ] |
| 82 | + } |
| 83 | + ], |
| 84 | + "metadata": { |
| 85 | + "kernelspec": { |
| 86 | + "display_name": "3.12", |
| 87 | + "language": "python", |
| 88 | + "name": "python3" |
| 89 | + }, |
| 90 | + "language_info": { |
| 91 | + "codemirror_mode": { |
| 92 | + "name": "ipython", |
| 93 | + "version": 3 |
| 94 | + }, |
| 95 | + "file_extension": ".py", |
| 96 | + "mimetype": "text/x-python", |
| 97 | + "name": "python", |
| 98 | + "nbconvert_exporter": "python", |
| 99 | + "pygments_lexer": "ipython3", |
| 100 | + "version": "3.12.1" |
| 101 | + } |
| 102 | + }, |
| 103 | + "nbformat": 4, |
| 104 | + "nbformat_minor": 2 |
| 105 | +} |
0 commit comments