Creating our Table
Before we load our data in, we need to create our table.
-create table "sift" ('chrom' TEXT, 'pos' INT,
+CREATE TABLE "sift" ('chrom' TEXT, 'pos' INT,
'ref' TEXT, 'alt' TEXT,
'score' REAL, 'nseq' INT);
CREATE INDEX main_index on sift (chrom, pos, ref, alt);
Now that the table is created, we can load our sift.csv
file. We need to change the mode to csv
.
mode csv .
-Now we can import our data using the .import
dot command. Because our sift.csv
+Now we can import our data using the .import
dot command. Because our sift.csv
has a header row, we need to skip it, so we use the --skip 1
argument.
--skip 1 sift.csv sift .import
-We can check that we loaded in our data correctly by using the .schema
command
+We can check that we loaded in our data correctly by using the .schema
command and a SELECT *
query:
schema .
CREATE TABLE sift (chrom text, pos int, ref text, alt text, score real, nseq int);
CREATE INDEX main_index on sift (chrom, pos, ref, alt);
+cha
mode box
- .select * from sift limit 5;
-
- .exit
+SELECT * FROM sift LIMIT 5;
┌───────┬──────────┬─────┬─────┬───────┬──────┐
│ chrom │ pos │ ref │ alt │ score │ nseq │
├───────┼──────────┼─────┼─────┼───────┼──────┤
@@ -241,17 +243,18 @@ Creating our Table
│ chr17 │ 43045682 │ T │ C │ 0.0 │ 7 │
│ chr17 │ 43045682 │ T │ G │ 0.0 │ 7 │
└───────┴──────────┴─────┴─────┴───────┴──────┘
-mode tabs
- .create table "hpo" ('ncbi_gene_id' int, "gene_symbol" TEXT,
-"hpo_id" TEXT, "hpo_name" TEXT, "frequency" TEXT,
- "disease_id" TEXT);
-
---skip 1 genes_to_phenotype.txt hpo
- .import
-
-mode box
- .schema
- .select * from hpo limit 10;
When we’re done, we can
+ .exit
mode tabs
.create table "vcf" ("chrom" TEXT, "pos" INT, "id" TEXT,
"ref" TEXT, "alt" TEXT, "qual" INT,
@@ -261,10 +264,13 @@ Creating our Table
mode box.m
.schema
.select * from vcf limit 10;
Fill out sift_annotator.py
+Fill out sift.py
+Now that our data is loaded into our .sqlite
file, we need to set up our mapping. If we look in sift.py
, we’ll see there are stubs for three methods: setup()
, annotate()
, and cleanup()
:
Fill out
cat /Users/Shared/open-cravat/modules/annotators/sift/sift.py
-annotate()
function
+This is what they look like:
+cat /Users/Shared/open-cravat/modules/annotators/sift/sift.py
import sys
+from cravat import BaseAnnotator
+from cravat import InvalidData
+import sqlite3
+import os
+
+class CravatAnnotator(BaseAnnotator):
+
+ def setup(self):
+ """
+ Set up data sources.
+ Cravat will automatically make a connection to
+ data/example_annotator.sqlite using the sqlite3 python module. The
+ sqlite3.Connection object is stored as self.dbconn, and the
+ sqlite3.Cursor object is stored as self.cursor.
+ """
+ pass
+
+ def annotate(self, input_data, secondary_data=None):
+ """
+ The annotator parent class will call annotate for each line of the
+ input file. It takes one positional argument, input_data, and one
+ keyword argument, secondary_data.
+
+ input_data is a dictionary containing the data from the current input
+ line. The keys depend on what what file is used as the input, which can
+ be changed in the module_name.yml file.
+ Variant level includes the following keys:
+ ('uid', 'chrom', 'pos', 'ref_base', 'alt_base')
+ Variant level crx files expand the key set to include:
+ ('hugo', 'transcript','so','all_mappings')
+ Gene level files include
+ ('hugo', 'num_variants', 'so', 'all_so')
+
+ secondary_data is used to allow an annotator to access the output of
+ other annotators. It is described in more detail in the CRAVAT
+ documentation.
+
+ annotate should return a dictionary with keys matching the column names
+ defined in example_annotator.yml. Extra column names will be ignored,
+ and absent column names will be filled with None. Check your output
+ carefully to ensure that your data is ending up where you intend.
+ """
+ out = {}
+ out['placeholder_annotation'] = 'placeholder value'
+ return out
+
+ def cleanup(self):
+ """
+ cleanup is called after every input line has been processed. Use it to
+ close database connections and file handlers. Automatically opened
+ database connections are also automatically closed.
+ """
+ pass
+
+if __name__ == '__main__':
+ annotator = CravatAnnotator(sys.argv)
+ annotator.run()
+We will focus on the annotate()
method first.
annotate()
method
+Our annotate()
method is where we
annotate()
= input_data["chrom"]
- chrom = input_data["pos"]
- pos query = (f'select score, nseq from sift' \
-'where chrom="{chrom}"'\
- 'and pos="{pos}"')
- cursor.execute(query)
- self.= self.cursor.fetchone() result
= input_data["chrom"]
+ chrom = input_data["pos"]
+ pos = (f'select score, nseq from sift' \
+ query 'where chrom="{chrom}"'\
+ 'and pos="{pos}"')
+ self.cursor.execute(query)
+= self.cursor.fetchone() result
def annotate(self, input_data, secondary_data=None):
-= input_data['chrom']
- chrom = input_data['pos']
- pos = input_data['ref_base']
- ref_base = input_data['alt_base']
- alt_base = f'select score, nseq from sift where chrom="{chrom}" and pos={pos} and ref="{ref_base}" and alt="{alt_base}";'
- query self.cursor.execute(query)
- = self.cursor.fetchone()
- result if result is not None:
- = result[0]
- score = result[1]
- num_seq if score <= 0.05:
- = 'Damaging'
- prediction else:
- = 'Tolerated'
- prediction return {
- 'score': score,
- 'seq_count': num_seq,
- 'prediction': prediction,
-
- }else:
- return None
def annotate(self, input_data, secondary_data=None):
+= input_data['chrom']
+ chrom = input_data['pos']
+ pos = input_data['ref_base']
+ ref_base = input_data['alt_base']
+ alt_base = f'select score, nseq from sift where chrom="{chrom}" and pos={pos} and ref="{ref_base}" and alt="{alt_base}";'
+ query self.cursor.execute(query)
+ = self.cursor.fetchone()
+ result if result is not None:
+ = result[0]
+ score = result[1]
+ num_seq if score <= 0.05:
+ = 'Damaging'
+ prediction else:
+ = 'Tolerated'
+ prediction return {
+ 'score': score,
+ 'seq_count': num_seq,
+ 'prediction': prediction,
+
+ }else:
+ return None
Configure sift_annotator.yml
+Now that our annotate()
method is filled in, we need to configure how our annotations will be displayed.
#| eval: false
cat /Users/Shared/open-cravat/modules/annotators/sift_annotator/sift_annotator.yml
# 'title' is the name of the module that will be displayed to the user
diff --git a/making_annotator_modules.qmd b/making_annotator_modules.qmd
index 46afb13..b06ad10 100644
--- a/making_annotator_modules.qmd
+++ b/making_annotator_modules.qmd
@@ -10,7 +10,9 @@ format: html
Creating an annotator module requires the following:
1. Creating an new annotator skeleton using `oc new annotator `
-2. Loading an annotator file into a SQLite database using
+2. Loading an annotator file into a SQLite database (`.sqlite`ss) using `sqlite3`
+3. Mapping the annotator sqlite file in the `.py` file
+4. Customizing the output using the `.yml` file
```{mermaid}
flowchart LR