Skip to content

Commit

Permalink
Merge pull request #19 from galaxy-genome-annotation/expression_fix
Browse files Browse the repository at this point in the history
Fix loading of expression data when first column header is not empty
  • Loading branch information
abretaud committed Jun 23, 2021
2 parents 27f5edc + 442f6a3 commit 1b4551f
Show file tree
Hide file tree
Showing 5 changed files with 34 additions and 4 deletions.
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,9 @@ $ chakin feature load_fasta \
## History
- 2.3.7
- Fix loading of expression data when first column header is not empty
- 2.3.6
- Fix loading of GO terms from GFF
Expand Down
7 changes: 4 additions & 3 deletions chado/expression/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -584,10 +584,11 @@ def _process_matrix_file(self, file_path, separator):
with open(file_path) as f:
reader = csv.reader(f, delimiter=separator)
# Get headers (biomat list)
# TODO : python2 compat (reader.next())
biomaterial__full_list = next(reader)
biomaterial_full_list = next(reader)
# Remove first col (=transcript ids)
biomaterial_full_list = biomaterial_full_list[1:]
# Cleanup empty strings
biomaterial_list = [x for x in biomaterial__full_list if x]
biomaterial_list = [x for x in biomaterial_full_list if x]
expected_len = len(biomaterial_list)
for line in reader:
# Get feature name
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

setup(
name="chado",
version='2.3.6',
version='2.3.7',
description="Chado library",
author="Anthony Bretaudeau",
author_email="[email protected]",
Expand Down
4 changes: 4 additions & 0 deletions test-data/expression_title.matrix
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
transcript Biomat1 Biomat2 Biomat3 Biomat4 Biomat5 Biomat6 Biomat7 Biomat8
Q02123|VNBP_POPMV 0.01 0.02 0.06 0.08 1.11111 1.21 1.98 1.88855
P16654|VNBP_PVSP 2.68 9.84 2.65 4.2 0 0.2 85.1 1.0
Q00572|VNBP_HELVS 2.01 2.5 1.1 8.65 50.0 0 1.6 9
22 changes: 22 additions & 0 deletions test/expression_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,28 @@ def test_add_expression(self):

assert elements.count() == 24

def test_add_expression_title(self):

# Setup testing data
# Expression file
expression_file_path = "./test-data/expression_title.matrix"

org = self._create_fake_org()
an = self._create_fake_an()

# Feature file (fasta)
feature_file_path = "./test-data/proteins.fa"
self.ci.feature.load_fasta(fasta=feature_file_path, analysis_id=an['analysis_id'], organism_id=org['organism_id'], sequence_type='mRNA')
self.ci.expression.add_expression(org['organism_id'], an['analysis_id'], expression_file_path, separator="\t")

biomat_list = self.ci.expression.get_biomaterials()
assert len(biomat_list) == 8, "Unexpected number of biomaterials created"

elements = self.ci.session.query(self.ci.model.element.arraydesign_id, self.ci.model.elementresult.quantification_id, self.ci.model.elementresult.signal) \
.join(self.ci.model.elementresult, self.ci.model.element.element_id == self.ci.model.elementresult.element_id)

assert elements.count() == 24

def setUp(self):

self.ci = ci
Expand Down

0 comments on commit 1b4551f

Please sign in to comment.