|
9 | 9 | from os.path import join |
10 | 10 | from time import strftime |
11 | 11 | from copy import deepcopy |
12 | | -import warnings |
13 | 12 | from skbio.util import find_duplicates |
14 | 13 |
|
15 | | -import pandas as pd |
16 | | - |
17 | 14 | from qiita_core.exceptions import IncompetentQiitaDeveloperError |
18 | 15 | import qiita_db as qdb |
19 | 16 | from .constants import (PREP_TEMPLATE_COLUMNS, TARGET_GENE_DATA_TYPES, |
@@ -519,116 +516,6 @@ def generate_files(self, samples=None, columns=None): |
519 | 516 | fp_id = qdb.util.convert_to_id("prep_template", "filepath_type") |
520 | 517 | self.add_filepath(fp, fp_id=fp_id) |
521 | 518 |
|
522 | | - # creating QIIME mapping file |
523 | | - self.create_qiime_mapping_file() |
524 | | - |
525 | | - def create_qiime_mapping_file(self): |
526 | | - """This creates the QIIME mapping file and links it in the db. |
527 | | -
|
528 | | - Returns |
529 | | - ------- |
530 | | - filepath : str |
531 | | - The filepath of the created QIIME mapping file |
532 | | -
|
533 | | - Raises |
534 | | - ------ |
535 | | - ValueError |
536 | | - If the prep template is not a subset of the sample template |
537 | | - QiitaDBWarning |
538 | | - If the QIIME-required columns are not present in the template |
539 | | -
|
540 | | - Notes |
541 | | - ----- |
542 | | - We cannot ensure that the QIIME-required columns are present in the |
543 | | - metadata map. However, we have to generate a QIIME-compliant mapping |
544 | | - file. Since the user may need a QIIME mapping file, but not these |
545 | | - QIIME-required columns, we are going to create them and |
546 | | - populate them with the value XXQIITAXX. |
547 | | - """ |
548 | | - with qdb.sql_connection.TRN: |
549 | | - rename_cols = { |
550 | | - 'barcode': 'BarcodeSequence', |
551 | | - 'primer': 'LinkerPrimerSequence', |
552 | | - 'description': 'Description', |
553 | | - } |
554 | | - |
555 | | - if 'reverselinkerprimer' in self.categories(): |
556 | | - rename_cols['reverselinkerprimer'] = 'ReverseLinkerPrimer' |
557 | | - new_cols = ['BarcodeSequence', 'LinkerPrimerSequence', |
558 | | - 'ReverseLinkerPrimer'] |
559 | | - else: |
560 | | - new_cols = ['BarcodeSequence', 'LinkerPrimerSequence'] |
561 | | - |
562 | | - # Retrieve the latest sample template |
563 | | - # Since we sorted the filepath retrieval, the first result contains |
564 | | - # the filepath that we want. `retrieve_filepaths` returns a |
565 | | - # 3-tuple, in which the fp is the second element |
566 | | - sample_template_fp = qdb.util.retrieve_filepaths( |
567 | | - "sample_template_filepath", "study_id", self.study_id, |
568 | | - sort='descending')[0]['fp'] |
569 | | - |
570 | | - # reading files via pandas |
571 | | - st = qdb.metadata_template.util.load_template_to_dataframe( |
572 | | - sample_template_fp) |
573 | | - pt = self.to_dataframe() |
574 | | - |
575 | | - st_sample_names = set(st.index) |
576 | | - pt_sample_names = set(pt.index) |
577 | | - |
578 | | - if not pt_sample_names.issubset(st_sample_names): |
579 | | - raise ValueError( |
580 | | - "Prep template is not a sub set of the sample template, " |
581 | | - "file: %s - samples: %s" |
582 | | - % (sample_template_fp, |
583 | | - ', '.join(pt_sample_names-st_sample_names))) |
584 | | - |
585 | | - mapping = pt.join(st, lsuffix="_prep") |
586 | | - mapping.rename(columns=rename_cols, inplace=True) |
587 | | - |
588 | | - # Pre-populate the QIIME-required columns with the value XXQIITAXX |
589 | | - index = mapping.index |
590 | | - placeholder = ['XXQIITAXX'] * len(index) |
591 | | - missing = [] |
592 | | - for val in rename_cols.values(): |
593 | | - if val not in mapping: |
594 | | - missing.append(val) |
595 | | - mapping[val] = pd.Series(placeholder, index=index) |
596 | | - |
597 | | - if missing: |
598 | | - warnings.warn( |
599 | | - "Some columns required to generate a QIIME-compliant " |
600 | | - "mapping file are not present in the template. A " |
601 | | - "placeholder value (XXQIITAXX) has been used to populate " |
602 | | - "these columns. Missing columns: %s" |
603 | | - % ', '.join(sorted(missing)), |
604 | | - qdb.exceptions.QiitaDBWarning) |
605 | | - |
606 | | - # Gets the orginal mapping columns and readjust the order to comply |
607 | | - # with QIIME requirements |
608 | | - cols = mapping.columns.values.tolist() |
609 | | - cols.remove('BarcodeSequence') |
610 | | - cols.remove('LinkerPrimerSequence') |
611 | | - cols.remove('Description') |
612 | | - new_cols.extend(cols) |
613 | | - new_cols.append('Description') |
614 | | - mapping = mapping[new_cols] |
615 | | - |
616 | | - # figuring out the filepath for the QIIME map file |
617 | | - _id, fp = qdb.util.get_mountpoint('templates')[0] |
618 | | - filepath = join(fp, '%d_prep_%d_qiime_%s.txt' % (self.study_id, |
619 | | - self.id, strftime("%Y%m%d-%H%M%S"))) |
620 | | - |
621 | | - # Save the mapping file |
622 | | - mapping.to_csv(filepath, index_label='#SampleID', na_rep='', |
623 | | - sep='\t', encoding='utf-8') |
624 | | - |
625 | | - # adding the fp to the object |
626 | | - self.add_filepath( |
627 | | - filepath, |
628 | | - fp_id=qdb.util.convert_to_id("qiime_map", "filepath_type")) |
629 | | - |
630 | | - return filepath |
631 | | - |
632 | 519 | @property |
633 | 520 | def status(self): |
634 | 521 | """The status of the prep template |
|
0 commit comments