From 33795c31c8fc77ad9b3cfe8815cccf0999374493 Mon Sep 17 00:00:00 2001 From: Nils Homer Date: Fri, 14 Feb 2025 14:24:49 -0700 Subject: [PATCH 1/5] feat: SamSource.query should handle out of range queries --- .../fulcrumgenomics/bam/api/SamSource.scala | 10 +++++++- .../fulcrumgenomics/bam/api/SamIoTest.scala | 23 +++++++++++++++++++ 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/src/main/scala/com/fulcrumgenomics/bam/api/SamSource.scala b/src/main/scala/com/fulcrumgenomics/bam/api/SamSource.scala index c9ea7703e..73c658ab6 100644 --- a/src/main/scala/com/fulcrumgenomics/bam/api/SamSource.scala +++ b/src/main/scala/com/fulcrumgenomics/bam/api/SamSource.scala @@ -91,9 +91,17 @@ class SamSource private(private val reader: SamReader) extends View[SamRecord] w /** Returns an iterator over all the records in the source. */ override def iterator: SamIterator = new SamIterator(reader.getFileHeader, reader.iterator()) + private def newQueryInterval(region: Locatable): QueryInterval = { + val contig = dict(region.getContig) + val contigIndex = contig.index + val start = Math.max(l.getStart, 1); + val end = Math.min(l.getEnd, contig.length) + new QueryInterval(contigIndex, start, end) + } + /** Returns an iterator over the records in the regions provided. */ def query(regions: IterableOnce[Locatable], queryType: QueryType = QueryType.Overlapping): SamIterator = { - val queries = QueryInterval.optimizeIntervals(regions.iterator.map(l => new QueryInterval(dict(l.getContig).index, l.getStart, l.getEnd)).toArray) + val queries = QueryInterval.optimizeIntervals(regions.iterator.map(l => newQueryInterval(l)).toArray) val contained = queryType == QueryType.Contained new SamIterator(header, reader.query(queries, contained)) } diff --git a/src/test/scala/com/fulcrumgenomics/bam/api/SamIoTest.scala b/src/test/scala/com/fulcrumgenomics/bam/api/SamIoTest.scala index c6c0fb9b1..1cf0b53c5 100644 --- a/src/test/scala/com/fulcrumgenomics/bam/api/SamIoTest.scala +++ b/src/test/scala/com/fulcrumgenomics/bam/api/SamIoTest.scala @@ -164,4 +164,27 @@ class SamIoTest extends UnitSpec { filterCount shouldBe 10 mapCount shouldBe 10 } + + "SamSource.query" should "" in { + val builder = new SamBuilder(readLength=10, baseQuality=20) + Range(0, 10).foreach { _ => builder.addFrag(start=100) } + val source = builder.toSource + + // test a query before the contig start + source.query("chr1", 0, 1000).length shouldBe 10 + source.query("chr1", -100, 1000).length shouldBe 10 + + // test a query after the contig end + val contigEnd = builder.dict("chr1").length + source.query("chr1", 100, contigEnd+1).length shouldBe 10 + source.query("chr1", 100, contigEnd+100).length shouldBe 10 + + // test a query both before and after the contig start and end respectively + source.query("chr1", -100, contigEnd+100).length shouldBe 10 + + // at the start and end + source.query("chr1", 1, 1000).length shouldBe 10 + source.query("chr1", 100, contigEnd).length shouldBe 10 + source.query("chr1", 1, contigEnd).length shouldBe 10 + } } From 721d9a152c6f89e9e7bb9e645b56d14a7c618976 Mon Sep 17 00:00:00 2001 From: Nils Homer Date: Fri, 14 Feb 2025 14:26:43 -0700 Subject: [PATCH 2/5] fix --- src/main/scala/com/fulcrumgenomics/bam/api/SamSource.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/scala/com/fulcrumgenomics/bam/api/SamSource.scala b/src/main/scala/com/fulcrumgenomics/bam/api/SamSource.scala index 73c658ab6..f53e8635f 100644 --- a/src/main/scala/com/fulcrumgenomics/bam/api/SamSource.scala +++ b/src/main/scala/com/fulcrumgenomics/bam/api/SamSource.scala @@ -94,8 +94,8 @@ class SamSource private(private val reader: SamReader) extends View[SamRecord] w private def newQueryInterval(region: Locatable): QueryInterval = { val contig = dict(region.getContig) val contigIndex = contig.index - val start = Math.max(l.getStart, 1); - val end = Math.min(l.getEnd, contig.length) + val start = Math.max(region.getStart, 1); + val end = Math.min(region.getEnd, contig.length) new QueryInterval(contigIndex, start, end) } From a5037dc28a91d97771dbb00a9d8b62b12b84008c Mon Sep 17 00:00:00 2001 From: Nils Homer Date: Fri, 14 Feb 2025 14:31:08 -0700 Subject: [PATCH 3/5] fixes --- .../fulcrumgenomics/bam/api/SamSource.scala | 9 ++++----- .../fulcrumgenomics/bam/api/SamIoTest.scala | 20 ++++++++++--------- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/src/main/scala/com/fulcrumgenomics/bam/api/SamSource.scala b/src/main/scala/com/fulcrumgenomics/bam/api/SamSource.scala index f53e8635f..32852cebd 100644 --- a/src/main/scala/com/fulcrumgenomics/bam/api/SamSource.scala +++ b/src/main/scala/com/fulcrumgenomics/bam/api/SamSource.scala @@ -92,11 +92,10 @@ class SamSource private(private val reader: SamReader) extends View[SamRecord] w override def iterator: SamIterator = new SamIterator(reader.getFileHeader, reader.iterator()) private def newQueryInterval(region: Locatable): QueryInterval = { - val contig = dict(region.getContig) - val contigIndex = contig.index - val start = Math.max(region.getStart, 1); - val end = Math.min(region.getEnd, contig.length) - new QueryInterval(contigIndex, start, end) + val contig = dict(region.getContig) + val start = Math.max(region.getStart, 1); + val end = Math.min(region.getEnd, contig.length) + new QueryInterval(contig.index, start, end) } /** Returns an iterator over the records in the regions provided. */ diff --git a/src/test/scala/com/fulcrumgenomics/bam/api/SamIoTest.scala b/src/test/scala/com/fulcrumgenomics/bam/api/SamIoTest.scala index 1cf0b53c5..5628a0d4f 100644 --- a/src/test/scala/com/fulcrumgenomics/bam/api/SamIoTest.scala +++ b/src/test/scala/com/fulcrumgenomics/bam/api/SamIoTest.scala @@ -28,6 +28,7 @@ import java.nio.file.Files import java.util.concurrent.{Callable, Executors, TimeUnit} import com.fulcrumgenomics.FgBioDef._ +import com.fulcrumgenomics.bam.api.QueryType.QueryType import com.fulcrumgenomics.fasta.{SequenceDictionary, SequenceMetadata} import com.fulcrumgenomics.testing.{SamBuilder, UnitSpec} import com.fulcrumgenomics.util.Io @@ -166,25 +167,26 @@ class SamIoTest extends UnitSpec { } "SamSource.query" should "" in { + val queryType = QueryType.Overlapping val builder = new SamBuilder(readLength=10, baseQuality=20) Range(0, 10).foreach { _ => builder.addFrag(start=100) } val source = builder.toSource // test a query before the contig start - source.query("chr1", 0, 1000).length shouldBe 10 - source.query("chr1", -100, 1000).length shouldBe 10 + source.query("chr1", 0, 1000, queryType).length shouldBe 10 + source.query("chr1", -100, 1000, queryType).length shouldBe 10 // test a query after the contig end - val contigEnd = builder.dict("chr1").length - source.query("chr1", 100, contigEnd+1).length shouldBe 10 - source.query("chr1", 100, contigEnd+100).length shouldBe 10 + val contigEnd = builder.dict("chr1", queryType).length + source.query("chr1", 100, contigEnd+1, queryType).length shouldBe 10 + source.query("chr1", 100, contigEnd+100, queryType).length shouldBe 10 // test a query both before and after the contig start and end respectively - source.query("chr1", -100, contigEnd+100).length shouldBe 10 + source.query("chr1", -100, contigEnd+100, queryType).length shouldBe 10 // at the start and end - source.query("chr1", 1, 1000).length shouldBe 10 - source.query("chr1", 100, contigEnd).length shouldBe 10 - source.query("chr1", 1, contigEnd).length shouldBe 10 + source.query("chr1", 1, 1000, queryType).length shouldBe 10 + source.query("chr1", 100, contigEnd, queryType).length shouldBe 10 + source.query("chr1", 1, contigEnd, queryType).length shouldBe 10 } } From 836e3d36f3399018052167d687749f2b52bbfb2f Mon Sep 17 00:00:00 2001 From: Nils Homer Date: Fri, 14 Feb 2025 14:34:27 -0700 Subject: [PATCH 4/5] fix --- src/test/scala/com/fulcrumgenomics/bam/api/SamIoTest.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/scala/com/fulcrumgenomics/bam/api/SamIoTest.scala b/src/test/scala/com/fulcrumgenomics/bam/api/SamIoTest.scala index 5628a0d4f..692b2fea2 100644 --- a/src/test/scala/com/fulcrumgenomics/bam/api/SamIoTest.scala +++ b/src/test/scala/com/fulcrumgenomics/bam/api/SamIoTest.scala @@ -177,7 +177,7 @@ class SamIoTest extends UnitSpec { source.query("chr1", -100, 1000, queryType).length shouldBe 10 // test a query after the contig end - val contigEnd = builder.dict("chr1", queryType).length + val contigEnd = builder.dict("chr1").length source.query("chr1", 100, contigEnd+1, queryType).length shouldBe 10 source.query("chr1", 100, contigEnd+100, queryType).length shouldBe 10 From f1bfc3622ac71ee2c2dfcca135313f82115618d4 Mon Sep 17 00:00:00 2001 From: Nils Homer Date: Fri, 14 Feb 2025 14:40:32 -0700 Subject: [PATCH 5/5] sort --- src/test/scala/com/fulcrumgenomics/bam/api/SamIoTest.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/scala/com/fulcrumgenomics/bam/api/SamIoTest.scala b/src/test/scala/com/fulcrumgenomics/bam/api/SamIoTest.scala index 692b2fea2..d1b129b1e 100644 --- a/src/test/scala/com/fulcrumgenomics/bam/api/SamIoTest.scala +++ b/src/test/scala/com/fulcrumgenomics/bam/api/SamIoTest.scala @@ -168,7 +168,7 @@ class SamIoTest extends UnitSpec { "SamSource.query" should "" in { val queryType = QueryType.Overlapping - val builder = new SamBuilder(readLength=10, baseQuality=20) + val builder = new SamBuilder(readLength=10, baseQuality=20, sort=Some(SamOrder.Coordinate)) Range(0, 10).foreach { _ => builder.addFrag(start=100) } val source = builder.toSource