Skip to content

Commit

Permalink
Changes for pub (#106)
Browse files Browse the repository at this point in the history
* fix: fasta write compression

* fix: option for compressed sg_object to gbk output

* fix: file compression suffix

* fix: account for input max proteins being greater than actual proteins

* fix: npatlas import

* fix: lint

* fix: pytest

* chore: remove temporary function

* style: flake8

* feat: add  args for blast options

* feat: add genomes to the neo4j database

* fix: antismash as nodes

* fix: jaccard score

* fix: bgc comparison scoring

* fix: blast arg in bgc search

* fix: neo4j escaping

I had no idea Neo4j removed  '\'. Which means SMILES (and other) strings have to be stored as escaped '\\'

https://neo4j.com/docs/cypher-manual/current/queries/expressions/#string

* feat: BGC search, adaptive width

* fix: genbank output

* fix: to conform to antismash and gbk output

* fix: ability to modify clustermap assembly name without modifying uid

* fix: bgc search scoring

* fix: clustermap.js links color based on on 0-1 scale

* fix: tests for release

* fix: metavar deprecation

* style: black, isort, flake8

* fix: make pytest

* style: clean and recreate package and class diagrams

* chore: bump dependencies

* style: black
  • Loading branch information
chasemc authored Jul 23, 2024
1 parent 02f8342 commit bbab371
Show file tree
Hide file tree
Showing 50 changed files with 1,828 additions and 989 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ install:

## pytest : Run Python pacakge unit tests
pytest: clean install
pytest tests -v --ignore=socialgene/entrypoints/export_protein_loci_assembly_tables.py --cov=./socialgene --cov-report=xml:./coverage.xml --cov-report html
pytest tests/python -v --ignore=socialgene/entrypoints/export_protein_loci_assembly_tables.py --cov=./socialgene --cov-report=xml:./coverage.xml --cov-report html tests/python
xdg-open htmlcov/index.html

## pytestnf : Run Nextflow pytest tests (first runs clean, install python and nextflow test run)
Expand Down
30 changes: 0 additions & 30 deletions Take an input BGC.md

This file was deleted.

153 changes: 0 additions & 153 deletions Untitled-2.cql

This file was deleted.

Binary file modified classes_sgpy.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified packages_sgpy.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
14 changes: 7 additions & 7 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,14 @@ classifiers = [ #! Update me
"Programming Language :: Python :: 3",
]
dependencies = [
"rdkit==2023.9.4",
"rdkit==2024.3.3",
"pandas>=2.2",
"numpy>=1.26",
"neo4j>=5.17.0",
"biopython>=1.83",
"textdistance>=4.6.1",
"rich",
"requests"
"numpy>=1.26", # rdkit requires <2.0
"neo4j>=5.22.0",
"biopython>=1.84",
"textdistance>=4.6.3",
"rich>=13.7.1",
"requests>=2.32.3"
]

[project.license]
Expand Down
Empty file.
40 changes: 40 additions & 0 deletions socialgene/addons/antismash/nr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from socialgene.neo4j.neo4j_element import Node, Relationship


class Product(Node):
neo4j_label = ["product"]
description = "Represents the product of an antiSMASH BGC"
property_specification = {
"uid": int,
}
constraints_unique = ["uid"]


class Category(Node):
neo4j_label = ["category"]
description = "Represents the category of an antiSMASH BGC"
property_specification = {
"uid": int,
}
constraints_unique = ["uid"]


class ProductToCategory(Relationship):
neo4j_label = "IS_A"
description = "Connects an antiSMASH product to category "
start_class = Product
end_class = Category


class GeneClusterToProduct(Relationship):
neo4j_label = "IS_A"
description = "Connects an antiSMASH BGC to product "
start_class = Product
end_class = Category
property_specification = {
"tool": str,
"start": int,
"end": int,
"core_start": int,
"core_end": int,
}
3 changes: 0 additions & 3 deletions socialgene/addons/chebi/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +0,0 @@
# from socialgene.neo4j.neo4j_element import Node, Relationship
# try to use classes already created that inherit from Nodes() or Relationship()
# to do that create a mixin class in a separate file and then import that
6 changes: 3 additions & 3 deletions socialgene/addons/chemistry/cli_similarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,16 +39,16 @@ def get_db_inchis():
res = db.run(
f"""
MATCH (c1:{cmpd_label})
RETURN c1.inchi as chem
RETURN c1.CanonicalSmiles as chem
""",
).value()
return res
return [i.replace("\\\\", "\\") for i in res if i is not None]


def inchi_list_to_compound_dict(x):
res = {}
for i in x:
temp = ChemicalCompound(i)
temp = ChemicalCompound(i, sanitize=True)
res[i] = (temp, temp.node)
return res

Expand Down
27 changes: 8 additions & 19 deletions socialgene/addons/chemistry/nr.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ class ChemicalCompoundNode(Node):
"MolMR": float,
"AnonymousGraph": str,
"ElementGraph": str,
"CanonicalSmiles": str,
"MurckoScaffold": str,
"ExtendedMurcko": str,
"MolFormula": str,
Expand All @@ -50,7 +49,7 @@ class ChemicalCompoundNode(Node):
"inchi": str,
"CanonicalSmiles": str,
}
constraints_unique = ["inchi", "CanonicalSmiles"]
constraints_unique = ["inchi"]


class TanimotoSimilarity(Relationship):
Expand All @@ -74,29 +73,19 @@ class McsSimilarity(Relationship):


class ChemicalSubstructure(ChemicalCompoundNode):
neo4j_label = ChemicalCompoundNode.neo4j_label + ["substructure"]
neo4j_label = ["substructure"]
description = "Represents a chemical substructure"


class ChemicalFragment(Node):
neo4j_label = ["chemical_fragment"]
description = "Represents a chemical fragment as defined by rdkit.Chem.Descriptors"
required_properties = ["uid"]
properties = {
required_properties = ["inchi", "CanonicalSmiles"]
property_specification = {
"uid": str,
"inchi": str,
"CanonicalSmiles": str,
}
constraints_unique = ["uid"]


class ContainsFragment(Relationship):
neo4j_label = "CONTAINS"
description = "Connects a chemical compound to a chemical fragment"
start_class = ChemicalCompoundNode
end_class = ChemicalFragment
constraints_unique = ["inchi", "CanonicalSmiles"]


class ContainsSubstructure(Relationship):
neo4j_label = "CONTAINS"
neo4j_label = "SUBSTRUCTURE"
description = "Connects a chemical compound to a chemical substructure"
start_class = ChemicalCompoundNode
end_class = ChemicalSubstructure
64 changes: 64 additions & 0 deletions socialgene/addons/chemistry/substruct.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# from socialgene.neo4j.neo4j import GraphDriver
# import logging
# import os
# from socialgene.base.chem import ChemicalCompound
# from rich.progress import Progress
# from socialgene.addons.chemistry.nr import ChemicalSubstructure, ContainsSubstructure, ChemicalCompoundNode
# from rdkit import Chem
# from multiprocessing import Pool
# from rdkit.Chem.MolStandardize import rdMolStandardize
# from itertools import batched
# logging.getLogger("neo4j").setLevel(logging.WARNING)
# logging.getLogger().setLevel(logging.INFO)

# if __name__ == "__main__":

# #inspect(a)
# with GraphDriver() as db:
# results = db.run(
# """
# MATCH (n:chemical_compound) RETURN n.inchi as inchi
# """
# ).value()


# nodes=set()
# rels=set()

# def process_subgraph(batched_results):
# nodes=set()
# rels=set()
# for i in batched_results:
# sgmol = ChemicalCompound(i)
# subgraphs = Chem.FindAllSubgraphsOfLengthN(sgmol.mol, 5)
# for subgraph in subgraphs:
# sub_mol = Chem.PathToSubmol(sgmol.mol, subgraph, useQuery=True)
# sub_mol = rdMolStandardize.Cleanup(sub_mol)
# sub_mol = rdMolStandardize.Normalize(sub_mol)
# r = rdMolStandardize.Reionizer()
# sub_mol = r.reionize(sub_mol)
# Chem.RemoveStereochemistry( sub_mol )
# #sub_mol=Chem.MolToSmiles(sub_mol, canonical=True)
# node = ChemicalSubstructure()
# temp_compound = ChemicalCompound(sub_mol)
# node.fill_from_dict(temp_compound.base_properties | temp_compound.hash_dict)
# del temp_compound
# nodes.add(node)
# cn = ChemicalCompoundNode()
# cn.fill_from_dict(sgmol.base_properties | sgmol.hash_dict)
# rel = ContainsSubstructure(cn, node)
# rels.add(rel)
# nodes[0].add_multiple_to_neo4j(nodes)
# rels[0].add_multiple_to_neo4j(rels)

# def update_progress(*args):
# pg.update(task, advance=1)

# with Progress(transient=True) as pg:
# task = pg.add_task("Progress...", total=len(results))
# batched_results = batched(results, 100)
# with Pool() as p:
# for i in p.imap_unordered(process_subgraph, batched_results):
# nodes.update(i[0])
# rels.update(i[1])
# update_progress()
Empty file.
Empty file.
Loading

0 comments on commit bbab371

Please sign in to comment.