Skip to content

Commit

Permalink
Merge pull request #44 from RasmussenLab/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
joacjo authored Sep 29, 2022
2 parents d971b48 + 532b5ac commit c329a57
Show file tree
Hide file tree
Showing 25 changed files with 18,946 additions and 2,594 deletions.
18 changes: 14 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,19 @@ mamba install -c conda-forge -c bioconda snakemake
mamba install -c conda-forge -c bioconda scikit-learn=1.0.2
mamba install -c conda-forge -c bioconda cython
mamba install -c conda-forge -c bioconda pygraphviz
```


```
### Clone repository
git clone the repository https://github.com/RasmussenLab/phamb.git
### Quick install
pip install -e .
### Old dependencies
conda create -c conda-forge -c bioconda -n phamb python=3.6 cython scikit-learn=0.21.3 snakemake pygraphviz
### Test installation
mkdir -p testout
run_RF.py test/contigs.fna.gz test/clusters.tsv test testout
```


Expand All @@ -51,7 +61,7 @@ mkdir -p projectdir
cd projectdir
git clone the repository https://github.com/RasmussenLab/phamb.git
cp -r phamb/workflows/mag_annotation .
python mag_annotation/scripts/split_contigs.py -c contigs.fna.gz
python split_contigs.py -c contigs.fna.gz
```

- Now the `contigs.fna.gz` is splitted into individual assemblies i.e. `assembly/{sample}/{sample}.fna`
Expand Down Expand Up @@ -98,7 +108,7 @@ gzip contigs.fna
### Run the RF model
Running the provided script, the virome bins are written to a fasta file and bin-annotations are summarised in `vambbins_aggregated_annotation.txt`.
```bash
python mag_annotation/scripts/run_RF.py contigs.fna.gz vamb/clusters.tsv annotations resultdir
run_RF.py contigs.fna.gz vamb/clusters.tsv annotations resultdir

ls resultsidr
resultdir/vambbins_aggregated_annotation.txt
Expand Down
8 changes: 8 additions & 0 deletions phamb/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@


"""Phamb - Phages from metagenomic binning
Documentation: https://github.com/RasmussenLab/phamb
"""

__licence__ = 'MIT'
__version__ = (1, 0, 1)
File renamed without changes.
File renamed without changes.
12 changes: 8 additions & 4 deletions workflows/mag_annotation/scripts/run_RF.py → phamb/run_RF.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
#!/usr/bin/python
import sys
import argparse
import vambtools as _vambtools
import run_RF_modules
#import vambtools as _vambtools
from phamb import vambtools as _vambtools
from phamb import run_RF_modules
#import run_RF_modules
import collections as _collections
import os
import numpy as _np
from pathlib import Path


parser = argparse.ArgumentParser(
Expand Down Expand Up @@ -185,7 +188,8 @@ def _run_RF_model(cls,RF_model,genome_order, sparse_df):

print('Loading Model and annotation table')
trained_model = joblib.load(RF_model)

trained_model.n_estimators = 300
trained_model.max_features = 'sqrt'
predicted_genome_labels = trained_model.predict(sparse_df)
prediction_probabilities = trained_model.predict_proba(sparse_df)
predicted_genome_labels = [label.lower() for label in list(predicted_genome_labels) ]
Expand Down Expand Up @@ -226,7 +230,7 @@ def _run_RF_model(cls,RF_model,genome_order, sparse_df):

viral_annotation = run_RF_modules.Viral_annotation(annotation_files=viral_annotation_files,genomes=reference)

rf_model_file = 'mag_annotation/dbs/RF_model.python39.sav'
rf_model_file = Path(__file__).parent / "dbs/RF_model.python39.sav"
RF_results = RF_model(rf_model_file, genomes = viral_annotation.genomes)

bins = {binname:clusters[binname] for binname in RF_results.RF_non_bacteria}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#!/bin/python
'''Helper modules'''
import vambtools as _vambtools
#import vambtools as _vambtools
from phamb import vambtools as _vambtools

import collections as _collections
import os
import numpy as _np
Expand Down
File renamed without changes.
File renamed without changes.
25 changes: 25 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import sys
from setuptools import setup, find_packages
from setuptools import Extension
import os

SETUP_METADATA = \
{
"name": "phamb",
"description": "Phages from metagenomic binning",
"url": "https://github.com/RasmussenLab/phamb",
"version": "1.0.1",
"license": "MIT",
"packages": ['phamb'],
"package_data": {'phamb': ['dbs/RF_model.python39.sav']},
"python_requires": ">=3.9",
"install_requires": ["scikit-learn==1.0.2"],
"classifiers":[
"Programming Language :: Python :: 3",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
],
"scripts":['phamb/run_RF.py','phamb/split_contigs.py','phamb/vambtools.py','phamb/run_RF_modules.py']
}

setup(**SETUP_METADATA)
3,001 changes: 3,001 additions & 0 deletions test/all.DVF.predictions.txt

Large diffs are not rendered by default.

874 changes: 874 additions & 0 deletions test/all.hmmMiComplete105.tbl

Large diffs are not rendered by default.

12,013 changes: 12,013 additions & 0 deletions test/all.hmmVOG.tbl

Large diffs are not rendered by default.

3,000 changes: 3,000 additions & 0 deletions test/clusters.tsv

Large diffs are not rendered by default.

Binary file added test/contigs.fna.gz
Binary file not shown.
191 changes: 0 additions & 191 deletions workflows/crispr/Snakefile

This file was deleted.

3 changes: 0 additions & 3 deletions workflows/crispr/config.yaml

This file was deleted.

7 changes: 0 additions & 7 deletions workflows/crispr/envs/cctyper.yaml

This file was deleted.

Loading

0 comments on commit c329a57

Please sign in to comment.