diff --git a/making_annotator_modules.html b/making_annotator_modules.html index 795b4ed..345049f 100644 --- a/making_annotator_modules.html +++ b/making_annotator_modules.html @@ -106,7 +106,9 @@

Annotator Basic

Creating an annotator module requires the following:

  1. Creating an new annotator skeleton using oc new annotator <modulename>
  2. -
  3. Loading an annotator file into a SQLite database using
  4. +
  5. Loading an annotator file into a SQLite database (<modulename>.sqlitess) using sqlite3
  6. +
  7. Mapping the annotator sqlite file in the <modulename>.py file
  8. +
  9. Customizing the output using the <modulename>.yml file
@@ -210,28 +212,28 @@

Creating our Table

Before we load our data in, we need to create our table.

-
create table "sift" ('chrom' TEXT, 'pos' INT, 
+
CREATE TABLE "sift" ('chrom' TEXT, 'pos' INT, 
                      'ref' TEXT, 'alt' TEXT, 
                      'score' REAL, 'nseq' INT);
 CREATE INDEX main_index on sift (chrom, pos, ref, alt);

Now that the table is created, we can load our sift.csv file. We need to change the mode to csv.

.mode csv
-

Now we can import our data using the .import dot command. Because our sift.csv

+

Now we can import our data using the .import dot command. Because our sift.csv has a header row, we need to skip it, so we use the --skip 1 argument.

.import --skip 1 sift.csv sift
-

We can check that we loaded in our data correctly by using the .schema command

+

We can check that we loaded in our data correctly by using the .schema command and a SELECT * query:

.schema
CREATE TABLE sift (chrom text, pos int, ref text, alt text, score real, nseq int);
 CREATE INDEX main_index on sift (chrom, pos, ref, alt);
+

cha

.mode box
-select * from sift limit 5;
-
-.exit
+SELECT * FROM sift LIMIT 5;
┌───────┬──────────┬─────┬─────┬───────┬──────┐
 │ chrom │   pos    │ ref │ alt │ score │ nseq │
 ├───────┼──────────┼─────┼─────┼───────┼──────┤
@@ -241,17 +243,18 @@ 

Creating our Table

│ chr17 │ 43045682 │ T │ C │ 0.0 │ 7 │ │ chr17 │ 43045682 │ T │ G │ 0.0 │ 7 │ └───────┴──────────┴─────┴─────┴───────┴──────┘
-
.mode tabs
-create table "hpo" ('ncbi_gene_id' int, "gene_symbol" TEXT, 
-                    "hpo_id" TEXT, "hpo_name" TEXT, "frequency" TEXT, 
-                    "disease_id" TEXT);
-
-.import --skip 1 genes_to_phenotype.txt hpo
-
-
-.mode box
-.schema
-select * from hpo limit 10;
+

When we’re done, we can

+
.exit
+
+
+
+ +
+
+Loading VCF Files as Annotations +
+
+
.mode tabs
 create table "vcf" ("chrom" TEXT, "pos" INT, "id" TEXT, 
                     "ref" TEXT, "alt" TEXT, "qual" INT, 
@@ -261,10 +264,13 @@ 

Creating our Table

.mode box.m .schema select * from vcf limit 10;
+
+
-
-

Fill out sift_annotator.py

+
+

Fill out sift.py

+

Now that our data is loaded into our .sqlite file, we need to set up our mapping. If we look in sift.py, we’ll see there are stubs for three methods: setup(), annotate(), and cleanup():

@@ -280,9 +286,70 @@

Fill out

-
cat /Users/Shared/open-cravat/modules/annotators/sift/sift.py
-
-

annotate() function

+

This is what they look like:

+
cat /Users/Shared/open-cravat/modules/annotators/sift/sift.py
+
import sys
+from cravat import BaseAnnotator
+from cravat import InvalidData
+import sqlite3
+import os
+
+class CravatAnnotator(BaseAnnotator):
+
+    def setup(self): 
+        """
+        Set up data sources. 
+        Cravat will automatically make a connection to 
+        data/example_annotator.sqlite using the sqlite3 python module. The 
+        sqlite3.Connection object is stored as self.dbconn, and the 
+        sqlite3.Cursor object is stored as self.cursor.
+        """
+        pass
+    
+    def annotate(self, input_data, secondary_data=None):
+        """
+        The annotator parent class will call annotate for each line of the 
+        input file. It takes one positional argument, input_data, and one
+        keyword argument, secondary_data.
+        
+        input_data is a dictionary containing the data from the current input 
+        line. The keys depend on what what file is used as the input, which can 
+        be changed in the module_name.yml file. 
+        Variant level includes the following keys: 
+            ('uid', 'chrom', 'pos', 'ref_base', 'alt_base')
+        Variant level crx files expand the key set to include:
+            ('hugo', 'transcript','so','all_mappings')
+        Gene level files include
+            ('hugo', 'num_variants', 'so', 'all_so')
+        
+        secondary_data is used to allow an annotator to access the output of
+        other annotators. It is described in more detail in the CRAVAT 
+        documentation.
+        
+        annotate should return a dictionary with keys matching the column names
+        defined in example_annotator.yml. Extra column names will be ignored, 
+        and absent column names will be filled with None. Check your output
+        carefully to ensure that your data is ending up where you intend.
+        """
+        out = {}
+        out['placeholder_annotation'] = 'placeholder value'
+        return out
+    
+    def cleanup(self):
+        """
+        cleanup is called after every input line has been processed. Use it to
+        close database connections and file handlers. Automatically opened
+        database connections are also automatically closed.
+        """
+        pass
+        
+if __name__ == '__main__':
+    annotator = CravatAnnotator(sys.argv)
+    annotator.run()
+

We will focus on the annotate() method first.

+
+

annotate() method

+

Our annotate() method is where we

@@ -312,41 +379,42 @@

annotate()

-
chrom = input_data["chrom"]
-pos = input_data["pos"]
-query = (f'select score, nseq from sift' \
-          'where chrom="{chrom}"'\
-          'and pos="{pos}"')
-self.cursor.execute(query)
-result = self.cursor.fetchone()
+
chrom = input_data["chrom"]
+pos = input_data["pos"]
+query = (f'select score, nseq from sift' \
+          'where chrom="{chrom}"'\
+          'and pos="{pos}"')
+self.cursor.execute(query)
+result = self.cursor.fetchone()
-
def annotate(self, input_data, secondary_data=None):
-    chrom = input_data['chrom']
-    pos = input_data['pos']
-    ref_base = input_data['ref_base']
-    alt_base = input_data['alt_base']
-    query = f'select score, nseq from sift where chrom="{chrom}" and pos={pos} and ref="{ref_base}" and alt="{alt_base}";'
-    self.cursor.execute(query)
-    result = self.cursor.fetchone()
-    if result is not None:
-        score = result[0]
-        num_seq = result[1]
-        if score <= 0.05:
-            prediction = 'Damaging'
-        else:
-            prediction = 'Tolerated'
-        return {
-            'score': score,
-            'seq_count': num_seq,
-            'prediction': prediction,
-        }
-    else:
-        return None
+
def annotate(self, input_data, secondary_data=None):
+    chrom = input_data['chrom']
+    pos = input_data['pos']
+    ref_base = input_data['ref_base']
+    alt_base = input_data['alt_base']
+    query = f'select score, nseq from sift where chrom="{chrom}" and pos={pos} and ref="{ref_base}" and alt="{alt_base}";'
+    self.cursor.execute(query)
+    result = self.cursor.fetchone()
+    if result is not None:
+        score = result[0]
+        num_seq = result[1]
+        if score <= 0.05:
+            prediction = 'Damaging'
+        else:
+            prediction = 'Tolerated'
+        return {
+            'score': score,
+            'seq_count': num_seq,
+            'prediction': prediction,
+        }
+    else:
+        return None

Configure sift_annotator.yml

+

Now that our annotate() method is filled in, we need to configure how our annotations will be displayed.

#| eval: false
 cat /Users/Shared/open-cravat/modules/annotators/sift_annotator/sift_annotator.yml
# 'title' is the name of the module that will be displayed to the user
diff --git a/making_annotator_modules.qmd b/making_annotator_modules.qmd
index 46afb13..b06ad10 100644
--- a/making_annotator_modules.qmd
+++ b/making_annotator_modules.qmd
@@ -10,7 +10,9 @@ format: html
 Creating an annotator module requires the following:
 
 1. Creating an new annotator skeleton using `oc new annotator `
-2. Loading an annotator file into a SQLite database using 
+2. Loading an annotator file into a SQLite database (`.sqlite`ss) using `sqlite3`
+3. Mapping the annotator sqlite file in the `.py` file
+4. Customizing the output using the `.yml` file
 
 ```{mermaid}
 flowchart LR