Skip to content

Commit

Permalink
First draft ncbigene and fix hgnc import (#7380)
Browse files Browse the repository at this point in the history
* First draft ncbigene import

* Changes the HGNC ingest to use the symbol as the label

---------

Co-authored-by: Sabrina Toro <[email protected]>
  • Loading branch information
matentzn and sabrinatoro authored Mar 30, 2024
1 parent d7df62e commit 6ec68b6
Show file tree
Hide file tree
Showing 5 changed files with 67 additions and 26 deletions.
20 changes: 15 additions & 5 deletions src/ontology/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ all: test all_artefacts mappings
# Artefacts
# ----------------------------------------

IMPORTS = uberon cl go pato envo ro hp mf ncbitaxon chebi ncit hgnc foodon so ecto omo chr hsapdv nbo maxo mfomd
IMPORTS = uberon cl go pato envo ro hp mf ncbitaxon chebi ncit hgnc foodon so ecto omo chr hsapdv nbo maxo mfomd ncbigene

IMPORT_ROOTS = $(IMPORTDIR)/merged_import
IMPORT_OWL_FILES = $(foreach n,$(IMPORT_ROOTS), $(n).owl)
Expand Down Expand Up @@ -678,15 +678,25 @@ $(MIRRORDIR)/%.owl: mirror-% | $(MIRRORDIR)

###### END PARTIAL ODK MIGRATION ##############
###### BEGIN CUSTOM IMPORTS ###################
mirror/dipper-%.ttl: | $(MIRRORDIR)
if [ $(MIR) = true ] && [ $(IMP) = true ]; then wget --no-check-certificate https://archive.monarchinitiative.org/latest/rdf/$*.ttl -O $@.tmp &&\
mirror/hgnc_gene.nt: | $(MIRRORDIR)
if [ $(MIR) = true ] && [ $(IMP) = true ]; then curl -L https://data.monarchinitiative.org/monarch-kg/latest/rdf/hgnc_gene.nt.gz | gzip -d > $@.tmp &&\
perl -npe 's@https://www.genenames.org/data/gene-symbol-report/#!/hgnc_id/HGNC:@http://identifiers.org/hgnc/@g' $@.tmp > $@; fi
.PRECIOUS: mirror/dipper-%.ttl
.PRECIOUS: mirror/hgnc_gene.nt

mirror-hgnc: mirror/dipper-hgnc.ttl | $(TMPDIR)
mirror/ncbi_gene.nt: | $(MIRRORDIR)
if [ $(MIR) = true ] && [ $(IMP) = true ]; then curl -L https://data.monarchinitiative.org/monarch-kg/latest/rdf/ncbi_gene.nt.gz | gzip -d > $@.tmp &&\
perl -npe 's@https://www.genenames.org/data/gene-symbol-report/#!/hgnc_id/HGNC:@http://identifiers.org/hgnc/@g' $@.tmp > $@; fi
.PRECIOUS: mirror/ncbi_gene.nt

mirror-hgnc: mirror/hgnc_gene.nt | $(TMPDIR)
if [ $(MIR) = true ] && [ $(IMP) = true ]; then $(ROBOT) merge -i $< \
query --format ttl --query ../sparql/construct/construct-hgnc.sparql $(TMPDIR)/$@.owl; fi

mirror-ncbigene: mirror/ncbi_gene.nt | $(TMPDIR)
if [ $(MIR) = true ] && [ $(IMP) = true ]; then $(ROBOT) merge -i $< \
query --format ttl --query ../sparql/construct/construct-ncbigene.sparql $(TMPDIR)/$@.owl; fi


IMPORTSEED=$(IMPORTDIR)/seed.txt # SHOULD BE TMP
###### END CUSTOM IMPORTS ###################

Expand Down
32 changes: 16 additions & 16 deletions src/ontology/imports/merged_import.owl
Original file line number Diff line number Diff line change
Expand Up @@ -27776,9 +27776,9 @@ SubClassOf(<http://identifiers.org/hgnc/10892> <http://purl.obolibrary.org/obo/S
AnnotationAssertion(rdfs:label <http://identifiers.org/hgnc/10896> "SKI")
SubClassOf(<http://identifiers.org/hgnc/10896> <http://purl.obolibrary.org/obo/SO_0000704>)

# Class: <http://identifiers.org/hgnc/10898> (SKIV2L)
# Class: <http://identifiers.org/hgnc/10898> (SKIC2)

AnnotationAssertion(rdfs:label <http://identifiers.org/hgnc/10898> "SKIV2L")
AnnotationAssertion(rdfs:label <http://identifiers.org/hgnc/10898> "SKIC2")
SubClassOf(<http://identifiers.org/hgnc/10898> <http://purl.obolibrary.org/obo/SO_0000704>)

# Class: <http://identifiers.org/hgnc/1090> (DST)
Expand Down Expand Up @@ -28106,9 +28106,9 @@ SubClassOf(<http://identifiers.org/hgnc/11071> <http://purl.obolibrary.org/obo/S
AnnotationAssertion(rdfs:label <http://identifiers.org/hgnc/11073> "SLC9A3")
SubClassOf(<http://identifiers.org/hgnc/11073> <http://purl.obolibrary.org/obo/SO_0000704>)

# Class: <http://identifiers.org/hgnc/11075> (SLC9A3R1)
# Class: <http://identifiers.org/hgnc/11075> (NHERF1)

AnnotationAssertion(rdfs:label <http://identifiers.org/hgnc/11075> "SLC9A3R1")
AnnotationAssertion(rdfs:label <http://identifiers.org/hgnc/11075> "NHERF1")
SubClassOf(<http://identifiers.org/hgnc/11075> <http://purl.obolibrary.org/obo/SO_0000704>)

# Class: <http://identifiers.org/hgnc/11079> (SLC9A6)
Expand Down Expand Up @@ -32716,9 +32716,9 @@ SubClassOf(<http://identifiers.org/hgnc/1810> <http://purl.obolibrary.org/obo/SO
AnnotationAssertion(rdfs:label <http://identifiers.org/hgnc/18111> "CCDC50")
SubClassOf(<http://identifiers.org/hgnc/18111> <http://purl.obolibrary.org/obo/SO_0000704>)

# Class: <http://identifiers.org/hgnc/18119> (SPATA5)
# Class: <http://identifiers.org/hgnc/18119> (AFG2A)

AnnotationAssertion(rdfs:label <http://identifiers.org/hgnc/18119> "SPATA5")
AnnotationAssertion(rdfs:label <http://identifiers.org/hgnc/18119> "AFG2A")
SubClassOf(<http://identifiers.org/hgnc/18119> <http://purl.obolibrary.org/obo/SO_0000704>)

# Class: <http://identifiers.org/hgnc/18121> (MFRP)
Expand Down Expand Up @@ -33391,9 +33391,9 @@ SubClassOf(<http://identifiers.org/hgnc/19087> <http://purl.obolibrary.org/obo/S
AnnotationAssertion(rdfs:label <http://identifiers.org/hgnc/19100> "IL23R")
SubClassOf(<http://identifiers.org/hgnc/19100> <http://purl.obolibrary.org/obo/SO_0000704>)

# Class: <http://identifiers.org/hgnc/19102> (DDX58)
# Class: <http://identifiers.org/hgnc/19102> (RIGI)

AnnotationAssertion(rdfs:label <http://identifiers.org/hgnc/19102> "DDX58")
AnnotationAssertion(rdfs:label <http://identifiers.org/hgnc/19102> "RIGI")
SubClassOf(<http://identifiers.org/hgnc/19102> <http://purl.obolibrary.org/obo/SO_0000704>)

# Class: <http://identifiers.org/hgnc/19104> (NPHP4)
Expand Down Expand Up @@ -35351,9 +35351,9 @@ SubClassOf(<http://identifiers.org/hgnc/236> <http://purl.obolibrary.org/obo/SO_
AnnotationAssertion(rdfs:label <http://identifiers.org/hgnc/23631> "NPSR1")
SubClassOf(<http://identifiers.org/hgnc/23631> <http://purl.obolibrary.org/obo/SO_0000704>)

# Class: <http://identifiers.org/hgnc/23639> (TTC37)
# Class: <http://identifiers.org/hgnc/23639> (SKIC3)

AnnotationAssertion(rdfs:label <http://identifiers.org/hgnc/23639> "TTC37")
AnnotationAssertion(rdfs:label <http://identifiers.org/hgnc/23639> "SKIC3")
SubClassOf(<http://identifiers.org/hgnc/23639> <http://purl.obolibrary.org/obo/SO_0000704>)

# Class: <http://identifiers.org/hgnc/2364> (CRLF1)
Expand Down Expand Up @@ -35736,9 +35736,9 @@ SubClassOf(<http://identifiers.org/hgnc/24576> <http://purl.obolibrary.org/obo/S
AnnotationAssertion(rdfs:label <http://identifiers.org/hgnc/24579> "CIB2")
SubClassOf(<http://identifiers.org/hgnc/24579> <http://purl.obolibrary.org/obo/SO_0000704>)

# Class: <http://identifiers.org/hgnc/24587> (FAM126A)
# Class: <http://identifiers.org/hgnc/24587> (HYCC1)

AnnotationAssertion(rdfs:label <http://identifiers.org/hgnc/24587> "FAM126A")
AnnotationAssertion(rdfs:label <http://identifiers.org/hgnc/24587> "HYCC1")
SubClassOf(<http://identifiers.org/hgnc/24587> <http://purl.obolibrary.org/obo/SO_0000704>)

# Class: <http://identifiers.org/hgnc/24592> (STEAP3)
Expand Down Expand Up @@ -40181,9 +40181,9 @@ SubClassOf(<http://identifiers.org/hgnc/4174> <http://purl.obolibrary.org/obo/SO
AnnotationAssertion(rdfs:label <http://identifiers.org/hgnc/4175> "GATM")
SubClassOf(<http://identifiers.org/hgnc/4175> <http://purl.obolibrary.org/obo/SO_0000704>)

# Class: <http://identifiers.org/hgnc/4177> (GBA)
# Class: <http://identifiers.org/hgnc/4177> (GBA1)

AnnotationAssertion(rdfs:label <http://identifiers.org/hgnc/4177> "GBA")
AnnotationAssertion(rdfs:label <http://identifiers.org/hgnc/4177> "GBA1")
SubClassOf(<http://identifiers.org/hgnc/4177> <http://purl.obolibrary.org/obo/SO_0000704>)

# Class: <http://identifiers.org/hgnc/4180> (GBE1)
Expand Down Expand Up @@ -44971,9 +44971,9 @@ SubClassOf(<http://identifiers.org/hgnc/8907> <http://purl.obolibrary.org/obo/SO
AnnotationAssertion(rdfs:label <http://identifiers.org/hgnc/8910> "PGR")
SubClassOf(<http://identifiers.org/hgnc/8910> <http://purl.obolibrary.org/obo/SO_0000704>)

# Class: <http://identifiers.org/hgnc/8912> (PHB)
# Class: <http://identifiers.org/hgnc/8912> (PHB1)

AnnotationAssertion(rdfs:label <http://identifiers.org/hgnc/8912> "PHB")
AnnotationAssertion(rdfs:label <http://identifiers.org/hgnc/8912> "PHB1")
SubClassOf(<http://identifiers.org/hgnc/8912> <http://purl.obolibrary.org/obo/SO_0000704>)

# Class: <http://identifiers.org/hgnc/8918> (PHEX)
Expand Down
10 changes: 6 additions & 4 deletions src/sparql/construct/construct-hgnc.sparql
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,16 @@ prefix dc: <http://purl.org/dc/elements/1.1/>
CONSTRUCT {
?gene a owl:Class ;
rdfs:subClassOf <http://purl.obolibrary.org/obo/SO_0000704> ;
rdfs:label ?label ;
<http://purl.org/dc/elements/1.1/description> ?description .
rdfs:label ?symbol ;
<http://purl.org/dc/terms/description> ?description .
}
WHERE {
?gene rdfs:label ?label .
?gene <https://w3id.org/biolink/vocab/category> <https://w3id.org/biolink/vocab/Gene> ;
<https://w3id.org/biolink/vocab/in_taxon> ?taxon_xref ;
<https://w3id.org/biolink/vocab/symbol> ?symbol .

OPTIONAL {
?gene <http://purl.org/dc/elements/1.1/description> ?description .
?gene <http://purl.org/dc/terms/description> ?description .
}

FILTER (!isBlank(?gene))
Expand Down
29 changes: 29 additions & 0 deletions src/sparql/construct/construct-ncbigene.sparql
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
prefix xsd: <http://www.w3.org/2001/XMLSchema#>
prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
prefix owl: <http://www.w3.org/2002/07/owl#>
prefix dc: <http://purl.org/dc/elements/1.1/>

CONSTRUCT {
?gene a owl:Class ;
rdfs:subClassOf <http://purl.obolibrary.org/obo/SO_0000704> ;
rdfs:label ?symbol ;
rdfs:subClassOf [
owl:onProperty <http://purl.obolibrary.org/obo/RO_0002162> ;
owl:someValuesFrom ?taxon ] ;
<http://purl.org/dc/terms/description> ?description .
}
WHERE {
?gene <https://w3id.org/biolink/vocab/category> <https://w3id.org/biolink/vocab/Gene> ;
<https://w3id.org/biolink/vocab/in_taxon> ?taxon_xref ;
<https://w3id.org/biolink/vocab/symbol> ?symbol .

OPTIONAL {
?gene <http://purl.org/dc/terms/description> ?description .
}

FILTER (!isBlank(?gene))
FILTER ( regex(str(?gene), "^http://identifiers.org/ncbigene/"))
BIND(IRI(REPLACE(str(?taxon_xref), "NCBITaxon:","http://purl.obolibrary.org/obo/NCBITaxon_")) AS ?taxon)
}


Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ prefix owl: <http://www.w3.org/2002/07/owl#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>

CONSTRUCT {
?cls rdfs:subClassOf+ [
?cls rdfs:subClassOf [
owl:onProperty <http://purl.obolibrary.org/obo/RO_0000053> ;
owl:someValuesFrom <http://purl.obolibrary.org/obo/MONDO_0021136> ] .
}
Expand Down

0 comments on commit 6ec68b6

Please sign in to comment.