Skip to content

Commit

Permalink
update readme, fixed weight parsing, change yielder in metadata
Browse files Browse the repository at this point in the history
Former-commit-id: c9798de
  • Loading branch information
PedroMTQ committed Jan 5, 2022
1 parent 2ab0012 commit bd7b582
Show file tree
Hide file tree
Showing 9 changed files with 53 additions and 39 deletions.
2 changes: 1 addition & 1 deletion MANTIS.config
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,6 @@ pfam_weight=0.9
#####<custom_ref> defines the folder or file where Mantis will look for a SINGLE custom reference (HMM or DMND). The path will look something like this: /path/to/custom1/custom1.hmm or /path/to/custom1/
#custom_ref=path/to/hmm/custom1.hmm
#custom_ref=path/to/hmm/custom2.dmnd
#to set the weight, simply add a line with the hmm file name followed by _weight , like so:
#to set the weight, simply add a line with the hmm file name followed by _weight , for example, if the file path is path/to/hmm/custom1.hmm, then you take the name of the file <custom1> without the extension and set the weight like so:
#custom1_weight=0.5

12 changes: 10 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,12 @@ If you have only loose reads, you need to assemble them first; when you have ass
# Citation
If you use Mantis, please make sure you cite the respective paper https://doi.org/10.1093/gigascience/giab042

# Wiki

Do you have any questions you can't find the answer to in here? Please read the [wiki](https://github.com/PedroMTQ/mantis/wiki).

Still can't find the answer? Just post an issue and I'll answer as soon as possible!


# Workflow overview

Expand All @@ -39,8 +45,9 @@ If you use Mantis, please make sure you cite the respective paper https://doi.or
- [HMMER](#10-references-and-acknowledgements), tested with v3.2.1
- **GCC**, for compilation of cython code (most systems should have it by default)

**Mantis can only run on Linux-based systems**
**Mantis can only run on Linux or MacOS systems. If you want to run Mantis on MacOS make sure you use python 3.7**

The multiprocessing package above Python 3.7 has some issues outside of Linux environments.

### Quick configuration
1. `git clone [email protected]:PedroMTQ/mantis.git`
Expand All @@ -67,6 +74,7 @@ You may also redifine the **custom_refs** folder path by adding your preferred p

custom_refs_folder=path/to/custom_refs/

To integrate metadata, each custom reference folder should contain a `metadata.tsv` file - see [Custom References](https://github.com/PedroMTQ/mantis/wiki/Configuration#custom-references) for more details.

### Functions

Expand Down Expand Up @@ -108,7 +116,7 @@ There are 3 output files:

The first two files can have the same query sequence in several lines (query sequence/reference source) while the `consensus_annotation.tsv` will only have one line per query sequence (consensus/query).

Mantis can additionally output in gff format and also a kegg module matrix completeness. Please see [Output](https://github.com/PedroMTQ/mantis/wiki/Output) for more details.
**GFF formatted output files can also be generated, as well as KEGG modules completeness tsv. Please see the [Output](https://github.com/PedroMTQ/mantis/wiki/output) page for information on the additional output files.**

# Further details

Expand Down
9 changes: 5 additions & 4 deletions source/Assembler.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ def __init__(self, verbose=True, redirect_verbose=None,no_taxonomy=False, mantis
else:
self.hmm_chunk_size = hmm_chunk_size
self.read_config_file()
Taxonomy_SQLITE_Connector.__init__(self,resources_folder=self.mantis_paths['resources'])

#self.requirements_met()
# I use manager instead of queue since I need to be able to add records to the end and start of the 'queue' (actually a list) which is not possible with the multiprocessing.Queue
Expand Down Expand Up @@ -88,10 +87,9 @@ def __str__(self):
ref_weights=', '.join([f'{i}:{self.mantis_ref_weights[i]}' for i in self.mantis_ref_weights if i!='else'])
if ref_weights:
res+= f'# Weights:\n{ref_weights}\n'

nog_tax=', '.join([i for i in self.mantis_nogt_tax])
if nog_tax:
res+= f'# NOG tax IDs:\n{nog_tax}\n'
res+= f'\n# NOG tax IDs:\n{nog_tax}\n'

return res

Expand Down Expand Up @@ -204,7 +202,7 @@ def setup_paths_config_file(self):
if line_path: self.mantis_paths['tcdb'] = line_path


elif line.startswith('_weight='):
elif '_weight=' in line:
ref_source, weight = line.split('_weight=')
self.mantis_ref_weights[ref_source] = float(weight)

Expand Down Expand Up @@ -247,6 +245,7 @@ def read_config_file(self):
# if there's no path, we just assume its in the default folder
if not default_ref_path: default_ref_path = add_slash(MANTIS_FOLDER + 'References')
resources_path = add_slash(MANTIS_FOLDER + 'Resources')

self.mantis_paths = {'default': default_ref_path,
'resources': resources_path,
'custom': add_slash(default_ref_path + 'Custom_references'),
Expand All @@ -256,6 +255,8 @@ def read_config_file(self):
'NCBI': add_slash(default_ref_path + 'NCBI'),
'tcdb': add_slash(default_ref_path + 'tcdb'),
}
Taxonomy_SQLITE_Connector.__init__(self,resources_folder=self.mantis_paths['resources'])

self.setup_paths_config_file()
if not self.use_taxonomy:
self.mantis_paths['NOG']=f'NA{SPLITTER}'
Expand Down
1 change: 0 additions & 1 deletion source/Metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,6 @@ def is_essential(self, dict_hits):
self.add_to_dict(dict_hits[hit], 'is_essential_gene', 'True')

def get_hit_links(self, dict_hits, ref_file):

if re.search('NOG[GT]',ref_file):
if 'NOGG' in ref_file:
taxon_id = 'NOGG'
Expand Down
66 changes: 36 additions & 30 deletions source/Metadata_SQLITE_Connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,29 +68,28 @@ def generate_fetch_command(self,ref_id):
return fetch_command

def yield_metadata(self):
res=[]
with open(self.metadata_file_tsv, 'r') as file:
for line in file:
row_info={}
line = line.strip('\n')
line = line.split('\t')
current_ref = line[0]
if '|' in line: line.remove('|')
annotations = line[1:]
for link in annotations:
if link:
temp_link = link.split(':')
link_type = temp_link[0]
link_text = ':'.join(temp_link[1:])
link_text=link_text.strip()
if link_type not in row_info: row_info[link_type]=set()
row_info[link_type].add(link_text)
if link_type == 'description' and link_text == 'NA':
link_text = None
if link_text and link_type == 'description':
get_common_links_metadata(link_text, res=row_info)
res.append(self.convert_row_to_sql(current_ref,row_info))
return res
if line:
line = line.split('\t')
current_ref = line[0]
if '|' in line: line.remove('|')
annotations = line[1:]
for link in annotations:
if link:
temp_link = link.split(':')
link_type = temp_link[0]
link_text = ':'.join(temp_link[1:])
link_text=link_text.strip()
if link_type not in row_info: row_info[link_type]=set()
row_info[link_type].add(link_text)
if link_type == 'description' and link_text == 'NA':
link_text = None
if link_text and link_type == 'description':
get_common_links_metadata(link_text, res=row_info)
yield self.convert_row_to_sql(current_ref,row_info)

def get_db_headers(self):
res = set()
Expand Down Expand Up @@ -135,19 +134,26 @@ def create_sql_table(self):

self.commit_and_close_sqlite_cursor()

def generate_inserts(self, metadata):
def generate_inserts(self, metadata_yielder):
step=self.insert_step
for i in range(0, len(metadata), step):
yield metadata[i:i + step]

temp=[]
for i in metadata_yielder:
if len(temp)<step:
temp.append(i)
else:
yield temp
temp=[]
temp.append(i)
yield temp

def store_metadata(self):
insert_command=self.generate_insert_command()
metadata_yielder=self.yield_metadata()
generator_insert = self.generate_inserts(metadata_yielder)
for table_chunk in generator_insert:
self.cursor.executemany(insert_command, table_chunk)
self.sqlite_connection.commit()
if metadata_yielder:
generator_insert = self.generate_inserts(metadata_yielder)
for table_chunk in generator_insert:
self.cursor.executemany(insert_command, table_chunk)
self.sqlite_connection.commit()

def convert_sql_to_dict(self,sql_result):
sql_result=sql_result[1:]
Expand All @@ -165,8 +171,8 @@ def fetch_metadata(self,ref_id):
if not file_exists(self.db_file):
return {}
fetch_command=self.generate_fetch_command(ref_id)
res_fetch=self.cursor.execute(fetch_command).fetchone()
try:
res_fetch = self.cursor.execute(fetch_command).fetchone()
res=self.convert_sql_to_dict(res_fetch)
return res
except:
Expand All @@ -188,9 +194,9 @@ def test_database(self):

if __name__ == '__main__':
import time
metadata_connector=Metadata_SQLITE_Connector('/media/HDD/data/mantis_references/NOG_dmnd/10/metadata.tsv')
metadata_connector=Metadata_SQLITE_Connector('/media/HDD/data/mantis_references/tcdb/metadata.tsv')
metadata_connector.test_database()
start=time.time()
for i in range(10000):
res=metadata_connector.fetch_metadata('1134474.O59_000005')
res=metadata_connector.fetch_metadata('P0A2U6')
print(time.time()-start)
2 changes: 1 addition & 1 deletion source/Taxonomy_SQLITE_Connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ def get_taxa_ncbi(self,organism_name):
if __name__ == '__main__':
gtdb_connector=Taxonomy_SQLITE_Connector(resources_folder='/home/pedroq/Desktop/test_cr/')
gtdb_connector.launch_taxonomy_connector()
gtdb_connector.create_taxonomy_db()
#gtdb_connector.create_taxonomy_db()
#gtdb_connector.process_gtdb_taxonomy('d__Archaea;p__Thermoproteota;c__Nitrososphaeria;o__Nitrososphaerales;f__Nitrosopumilaceae_C;g__JACEMX01;s__JACEMX01 sp011773785')
a=gtdb_connector.fetch_ncbi_id('d__Archaea;p__Halobacteriota;c__Methanosarcinia;o__Methanosarcinales;f__Methanosarcinaceae;g__Methanolobus;s__Methanolobus psychrophilus')
print(a)
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.

0 comments on commit bd7b582

Please sign in to comment.