diff --git a/test/test_parsecontigs.py b/test/test_parsecontigs.py index dadc7dee..20adbe50 100644 --- a/test/test_parsecontigs.py +++ b/test/test_parsecontigs.py @@ -108,3 +108,26 @@ def test_save_load(self): self.assertTrue(np.all(md1.lengths == md2.lengths)) self.assertTrue(np.all(md1.refhash == md2.refhash)) self.assertTrue(np.all(md1.minlength == md2.minlength)) + + def test_windows_newlines(self): + rng = random.Random() + buf1 = io.BytesIO() + buf2 = io.BytesIO() + for i in range(10): + record = testtools.make_randseq(rng, 10, 20) + buf1.write(b">" + record.header.encode()) + buf2.write(b">" + record.header.encode()) + buf1.write(b"\r\n") + buf2.write(b"\n") + buf1.write(record.sequence) + buf2.write(record.sequence) + buf1.write(b"\r\n") + buf2.write(b"\n") + + buf1.seek(0) + buf2.seek(0) + comp1 = Composition.from_file(buf1) + comp2 = Composition.from_file(buf2) + + self.assertEqual(comp1.metadata.refhash, comp2.metadata.refhash) + self.assertTrue(np.all(comp1.matrix == comp2.matrix)) diff --git a/vamb/vambtools.py b/vamb/vambtools.py index f9035e9c..93aee3e4 100644 --- a/vamb/vambtools.py +++ b/vamb/vambtools.py @@ -309,7 +309,8 @@ def byte_iterfasta( ) raise TypeError(errormsg) from None - header = probeline[1:] + # 13 is the byte value of \r, meaning we remove either \r\n or \n + header = probeline[1 : -(1 + (probeline[-2] == 13))] buffer: list[bytes] = list() # Iterate over lines @@ -320,7 +321,7 @@ def byte_iterfasta( elif line.startswith(b">"): yield FastaEntry(header, bytearray().join(buffer)) buffer.clear() - header = line[1:] + header = line[1 : -(1 + (line[-2] == 13))] else: buffer.append(line)