Skip to content

Commit

Permalink
Update parse_fastqc.py
Browse files Browse the repository at this point in the history
  • Loading branch information
swatiebi authored May 8, 2024
1 parent d9039ee commit b1e9081
Showing 1 changed file with 4 additions and 0 deletions.
4 changes: 4 additions & 0 deletions src/python/ensembl/genes/metadata/qc/parse_fastqc.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def parse_fastqc_data(fastqc_data_path):
data = f.read()
# Regex patterns to extract total sequences and %GC
total_sequences_pattern = r"Total Sequences\s+(\d+)"
sequence_length_pattern = r"Sequence length\s+(\d+)"
gc_content_pattern = r"%GC\s+(\d+)"
# Search for total sequences
total_sequences_match = re.search(total_sequences_pattern, data)
Expand All @@ -37,6 +38,9 @@ def parse_fastqc_data(fastqc_data_path):
gc_content_match = re.search(gc_content_pattern, data)
if gc_content_match:
fastqc_data['gc_content'] = int(gc_content_match.group(1))
sequence_length_match = re.search(sequence_length_pattern, data)
if sequence_length_match:
fastqc_data['sequence_length'] = int(sequence_length_match.group(1))

return fastqc_data

Expand Down

0 comments on commit b1e9081

Please sign in to comment.