Skip to content

Commit

Permalink
Merge pull request #24 from waterflow80/database-populator-test-suite
Browse files Browse the repository at this point in the history
Test suite enhancements - SeqColWriter & AssemblyDataGenerator
  • Loading branch information
waterflow80 authored Aug 12, 2023
2 parents 514e755 + 83588ee commit 8212eee
Show file tree
Hide file tree
Showing 11 changed files with 370 additions and 192 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,8 @@ public interface SeqColExtendedDataRepository extends JpaRepository<SeqColExtend
public Optional<List<String>> getSeqColExtendedDataByLevel0Digest(@Param("level0Digest") String seqColDigest);

public Optional<SeqColExtendedDataEntity> getSeqColExtendedDataEntityByDigest(String digest);

void removeSeqColExtendedDataEntityByDigest(String digest);

void deleteAll();
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,8 @@ public interface SeqColLevelOneRepository extends JpaRepository<SeqColLevelOneEn
SeqColLevelOneEntity findSeqColLevelOneEntityByDigest(String digest);

long countSeqColLevelOneEntitiesByDigest(String digest);

void removeSeqColLevelOneEntityByDigest(String digest);

void deleteAll();
}
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,27 @@ public List<SeqColExtendedDataEntity> addAll(List<SeqColExtendedDataEntity> seqC
return repository.saveAll(seqColExtendedDataList);
}

/**
* Remove all seqCol extended data entities that corresponds to the seqCol
* that has the given level0Digest*/
@Transactional
public void removeSeqColExtendedDataEntities(List<SeqColExtendedDataEntity> extendedDataEntities) {
for (SeqColExtendedDataEntity entity: extendedDataEntities) {
removeSeqColExtendedDataEntityByDigest(entity.getDigest());
}
}

/**
* Remove one extended data entity by its digest.
* NOTE!: The given digest is not the seqCol level 0 digest*/
public void removeSeqColExtendedDataEntityByDigest(String digest) {
repository.removeSeqColExtendedDataEntityByDigest(digest);
}

public void removeAllSeqColExtendedEntities() {
repository.deleteAll();
}

/**
* Return the extendedData object for the given digest*/
public Optional<SeqColExtendedDataEntity> getExtendedAttributeByDigest(String digest) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ public Optional<SeqColLevelOneEntity> addSequenceCollectionL1(SeqColLevelOneEnti
return Optional.of(seqCol);
}


public Optional<SeqColLevelOneEntity> getSeqColLevelOneByDigest(String digest){
SeqColLevelOneEntity seqColL11 = repository.findSeqColLevelOneEntityByDigest(digest);
if (seqColL11 != null) {
Expand All @@ -44,6 +43,14 @@ public Optional<SeqColLevelOneEntity> getSeqColLevelOneByDigest(String digest){
}
}

public void removeSeqColLevelOneByDigest(String digest) {
repository.removeSeqColLevelOneEntityByDigest(digest);
}

public void removeAllSeqCols() {
repository.deleteAll();
}

public long countSeqColLevelOneEntitiesByDigest(String digest) {
return repository.countSeqColLevelOneEntitiesByDigest(digest);
}
Expand Down
16 changes: 16 additions & 0 deletions src/main/java/uk/ac/ebi/eva/evaseqcol/service/SeqColService.java
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,22 @@ public Optional<? extends SeqColEntity> getSeqColByDigestAndLevel(String digest,
}
}

/**
* Full remove of the seqCol object (level one and its extended data)*/
@Transactional
public void deleteFullSeqCol(String digest, List<SeqColExtendedDataEntity> extendedDataEntities) {
levelOneService.removeSeqColLevelOneByDigest(digest);
extendedDataService.removeSeqColExtendedDataEntities(extendedDataEntities);
}

/**
* Remove all seqCol entities (level 1 and the extended entities) from the database*/
@Transactional
public void removeAllSeqCol() {
levelOneService.removeAllSeqCols();
extendedDataService.removeAllSeqColExtendedEntities();
}

/**
* Fetch and insert all possible seqCol objects for the given assembly accession.
* NOTE: All possible seqCol objects means with all possible/provided naming conventions that could be found in the
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
package uk.ac.ebi.eva.evaseqcol.io;

import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;

import uk.ac.ebi.eva.evaseqcol.dus.NCBIAssemblyReportReader;
import uk.ac.ebi.eva.evaseqcol.dus.NCBIAssemblyReportReaderFactory;
import uk.ac.ebi.eva.evaseqcol.dus.NCBIAssemblySequenceReader;
import uk.ac.ebi.eva.evaseqcol.dus.NCBIAssemblySequenceReaderFactory;
import uk.ac.ebi.eva.evaseqcol.entities.AssemblyEntity;
import uk.ac.ebi.eva.evaseqcol.entities.AssemblySequenceEntity;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;

@Component
public class AssemblyDataGenerator {

private final String REPORT_FILE_PATH = "src/test/resources/GCA_000146045.2_R64_assembly_report.txt";
private final String SEQUENCES_FILE_PATH = "src/test/resources/GCA_000146045.2_genome_sequence.fna";

private static final String GCA_ACCESSION = "GCA_000146045.2";

private static InputStreamReader sequencesStreamReader;
private static InputStream sequencesStream;

private static InputStreamReader reportStreamReader;
private static InputStream reportStream;

@Autowired
private NCBIAssemblyReportReaderFactory reportReaderFactory;
private NCBIAssemblyReportReader reportReader;

@Autowired
private NCBIAssemblySequenceReaderFactory sequenceReaderFactory;
private NCBIAssemblySequenceReader sequenceReader;

/**
* Return the Assembly entity that corresponds to the assembly report located
* under REPORT_FILE_PATH (see variable content above) having as assembly accession: "GCA_000146045.2"*/
public AssemblyEntity generateAssemblyEntity() throws IOException {
reportStream = new FileInputStream(
new File(REPORT_FILE_PATH));
reportStreamReader = new InputStreamReader(reportStream);
reportReader = reportReaderFactory.build(reportStreamReader);
return reportReader.getAssemblyEntity();
}

/**
* Return the AssemblySequenceEntity that corresponds to the sequences FASTA file
* located under SEQUENCES_FILE_PATH (see variable content above) having as assembly accession: "GCA_000146045.2"*/
public AssemblySequenceEntity generateAssemblySequenceEntity() throws IOException {
sequencesStream = new FileInputStream(
new File(SEQUENCES_FILE_PATH));
sequencesStreamReader = new InputStreamReader(sequencesStream);
sequenceReader = sequenceReaderFactory.build(sequencesStreamReader, GCA_ACCESSION);
return sequenceReader.getAssemblySequencesEntity();
}
}
176 changes: 176 additions & 0 deletions src/test/java/uk/ac/ebi/eva/evaseqcol/io/SeqColWriter.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
package uk.ac.ebi.eva.evaseqcol.io;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;


import uk.ac.ebi.eva.evaseqcol.dus.NCBIAssemblyReportReader;
import uk.ac.ebi.eva.evaseqcol.dus.NCBIAssemblyReportReaderFactory;
import uk.ac.ebi.eva.evaseqcol.dus.NCBIAssemblySequenceReader;
import uk.ac.ebi.eva.evaseqcol.dus.NCBIAssemblySequenceReaderFactory;
import uk.ac.ebi.eva.evaseqcol.entities.AssemblyEntity;
import uk.ac.ebi.eva.evaseqcol.entities.AssemblySequenceEntity;
import uk.ac.ebi.eva.evaseqcol.entities.SeqColEntity;
import uk.ac.ebi.eva.evaseqcol.entities.SeqColExtendedDataEntity;
import uk.ac.ebi.eva.evaseqcol.entities.SeqColLevelOneEntity;
import uk.ac.ebi.eva.evaseqcol.service.SeqColExtendedDataService;
import uk.ac.ebi.eva.evaseqcol.service.SeqColLevelOneService;
import uk.ac.ebi.eva.evaseqcol.service.SeqColService;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;

/**
* This class is responsible for saving and deleting seqCol
* objects to and from the database.*/
@Component
public class SeqColWriter {

private static final Logger logger = LoggerFactory.getLogger(SeqColWriter.class);

private List<SeqColExtendedDataEntity> extendedDataEntitiesUcsc;

private List<SeqColExtendedDataEntity> extendedDataEntitiesGenbank;

private SeqColLevelOneEntity levelOneEntityUcsc;

private SeqColLevelOneEntity levelOneEntityGenbank;

private final String REPORT_FILE_PATH_1 = "src/test/resources/GCA_000146045.2_R64_assembly_report.txt";

private final String SEQUENCES_FILE_PATH_1 = "src/test/resources/GCA_000146045.2_genome_sequence.fna";

private static final String GCA_ACCESSION = "GCA_000146045.2";

private List<String> insertedSeqColDigests; // Holds the digests of the seqCol objects inserted by this class

private static InputStreamReader sequencesStreamReader;

private static InputStream sequencesStream;

private static InputStreamReader reportStreamReader;

private static InputStream reportStream;


private NCBIAssemblyReportReaderFactory reportReaderFactory;

private NCBIAssemblyReportReader reportReader;


private NCBIAssemblySequenceReaderFactory sequenceReaderFactory;

private NCBIAssemblySequenceReader sequenceReader;


private SeqColLevelOneService levelOneService;


private SeqColExtendedDataService extendedDataService;


private SeqColService seqColService;

@Autowired
public SeqColWriter(NCBIAssemblyReportReaderFactory reportReaderFactory, NCBIAssemblySequenceReaderFactory sequenceReaderFactory,
SeqColLevelOneService levelOneService, SeqColExtendedDataService extendedDataService, SeqColService seqColService) {
this.reportReaderFactory = reportReaderFactory;
this.sequenceReaderFactory = sequenceReaderFactory;
this.levelOneService = levelOneService;
this.extendedDataService = extendedDataService;
this.seqColService = seqColService;
}

/**
* Setup the report reader (assembly report reader),
* the sequences' reader (assembly FASTA file reader)
* and other necessary objects.
* */
private void setUp() throws FileNotFoundException {
reportStream = new FileInputStream(
new File(REPORT_FILE_PATH_1));
reportStreamReader = new InputStreamReader(reportStream);
reportReader = reportReaderFactory.build(reportStreamReader);

sequencesStream = new FileInputStream(
new File(SEQUENCES_FILE_PATH_1));
sequencesStreamReader = new InputStreamReader(sequencesStream);
sequenceReader = sequenceReaderFactory.build(sequencesStreamReader, GCA_ACCESSION);

insertedSeqColDigests = new ArrayList<>();
}

/**
* Close streams and streams readers*/
private void tearDown() throws IOException {
reportStream.close();
reportStreamReader.close();
sequencesStream.close();
sequencesStreamReader.close();
}

/**
* Save seqCol objects of assembly GCA_ACCESSION (see variable content above) for naming conventions UCSC and GENBANK
* NOTE: The assembly report and the sequences FASTA file for this assembly are already downloaded
* and put into "src/test/resources/"
* */
public void write() throws IOException {
setUp();
AssemblyEntity assemblyEntity = reportReader.getAssemblyEntity();
AssemblySequenceEntity assemblySequenceEntity = sequenceReader.getAssemblySequencesEntity();

// Insert seqCol for UCSC naming convention
extendedDataEntitiesUcsc = extendedDataService.constructExtendedSeqColDataList(
assemblyEntity, assemblySequenceEntity, SeqColEntity.NamingConvention.UCSC
);
levelOneEntityUcsc = levelOneService.constructSeqColLevelOne(
extendedDataEntitiesUcsc, SeqColEntity.NamingConvention.UCSC);
Optional<String> resultDigestUcsc = seqColService.addFullSequenceCollection(levelOneEntityUcsc, extendedDataEntitiesUcsc);
if (resultDigestUcsc.isPresent()) {
logger.info("Successfully inserted seqCol object with the assembly accession " + GCA_ACCESSION + " for " +
"naming convention " + SeqColEntity.NamingConvention.UCSC);
} else {
logger.error("Could not insert seqCol object with the assembly accession " + GCA_ACCESSION + " for " +
"naming convention " + SeqColEntity.NamingConvention.UCSC);
}
insertedSeqColDigests.add(resultDigestUcsc.get());

// Insert seqCol for GENBANK naming convention
extendedDataEntitiesGenbank = extendedDataService.constructExtendedSeqColDataList(
assemblyEntity, assemblySequenceEntity, SeqColEntity.NamingConvention.GENBANK
);
levelOneEntityGenbank = levelOneService.constructSeqColLevelOne(
extendedDataEntitiesGenbank, SeqColEntity.NamingConvention.GENBANK);
Optional<String> resultDigestGenbank = seqColService.addFullSequenceCollection(levelOneEntityGenbank, extendedDataEntitiesGenbank);
if (resultDigestGenbank.isPresent()) {
logger.info("Successfully inserted seqCol object with the assembly accession " + GCA_ACCESSION + " for " +
"naming convention " + SeqColEntity.NamingConvention.GENBANK);
} else {
logger.error("Could not insert seqCol object with the assembly accession " + GCA_ACCESSION + " for " +
"naming convention " + SeqColEntity.NamingConvention.GENBANK);
}
insertedSeqColDigests.add(resultDigestGenbank.get());

// Clear streams
tearDown();
}

/**
* Remove all inserted seqCol objects from the database.
* */
public void clearData() {
seqColService.removeAllSeqCol();
}

}
Loading

0 comments on commit 8212eee

Please sign in to comment.