Skip to content

Commit

Permalink
Resolve main conflicts (#556)
Browse files Browse the repository at this point in the history
  • Loading branch information
illusional committed Sep 25, 2023
1 parent d6f9bb4 commit 6d1f9f1
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 41 deletions.
25 changes: 5 additions & 20 deletions metamist/audit/audithelper.py
Original file line number Diff line number Diff line change
@@ -1,36 +1,21 @@
# pylint: disable=no-member
import csv
from collections import defaultdict
import logging
import os
from collections import defaultdict
from typing import Any

from cloudpathlib import AnyPath
from cpg_utils.cloud import get_path_components_from_gcp_path

from metamist.parser.cloudhelper import CloudHelper


class AuditHelper(CloudHelper):
"""General helper class for bucket auditing"""

EXCLUDED_SGS = [
'CPG11783', # acute-care, no FASTQ data
'CPG13409', # perth-neuro, coverage ~0x
'CPG243717', # validation, NA12878_KCCG low coverage https://main-web.populationgenomics.org.au/validation/qc/cram/multiqc.html,
'CPG246645', # ag-hidden, eof issue https://batch.hail.populationgenomics.org.au/batches/97645/jobs/440
'CPG246678', # ag-hidden, diff fastq size https://batch.hail.populationgenomics.org.au/batches/97645/jobs/446
'CPG261792', # rdp-kidney misformated fastq - https://batch.hail.populationgenomics.org.au/batches/378736/jobs/43
# acute care fasq parsing errors https://batch.hail.populationgenomics.org.au/batches/379303/jobs/24
'CPG259150',
'CPG258814',
'CPG258137',
'CPG258111',
'CPG258012',
# ohmr4 cram parsing in align issues
'CPG261339',
'CPG261347',
# IBMDX truncated sample? https://batch.hail.populationgenomics.org.au/batches/422181/jobs/99
'CPG265876',
]
#
EXCLUDED_SGS = os.getenv('SM_AUDIT_HELPER_EXCLUDED_SGS', '').split(',')

@staticmethod
def get_gcs_bucket_subdirs_to_search(paths: list[str]) -> defaultdict[str, list]:
Expand Down
19 changes: 6 additions & 13 deletions scripts/find_sequence_files_to_delete.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,15 @@

# pylint: disable=W0703

import os
import asyncio
import logging
import os
from collections import defaultdict
from typing import Any

import click
from google.cloud import storage

from metamist.apis import ProjectApi
from metamist.graphql import query_async

Expand All @@ -33,17 +34,7 @@
projapi = ProjectApi()

# TODO: fetch this from metamist
CPG_SEQUENCING_GROUP_IDS_TO_SKIP = {
'CPG11783', # acute-care, no FASTQ data
'CPG255232', # perth-neuro: new
'CPG255240', # perth-neuro: new
'CPG253328', # perth-neuro, contamination rate 32%
'CPG13409', # perth-neuro, coverage ~0x
'CPG243717',
'CPG246645', # ag-hidden, eof issue https://batch.hail.populationgenomics.org.au/batches/97645/jobs/440
'CPG246678', # ag-hidden, diff fastq size https://batch.hail.populationgenomics.org.au/batches/97645/jobs/446
'CPG246561', # ag-hidden, coverage ~0x https://main-web.populationgenomics.org.au/ag-hidden/qc/cram/multiqc.html
}
CPG_SEQUENCING_GROUP_IDS_TO_SKIP = {}


def get_bucket_name_from_path(path_input):
Expand Down Expand Up @@ -183,7 +174,9 @@ def find_existing_files_in_bucket(


async def find_files_to_delete(
sequencing_types_to_remove: list[str], projects_to_ignore: list[str], output_path: str
sequencing_types_to_remove: list[str],
projects_to_ignore: list[str],
output_path: str,
):
"""
Get all the sequences across all the projects from metamist
Expand Down
4 changes: 2 additions & 2 deletions scripts/parse_existing_cohort.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,8 @@ def sequence_meta_map():

def fastq_file_name_to_sample_id(filename: str) -> str:
"""
HG3FMDSX3_2_220208_FD02700641_Homo-sapiens_AACGAGGCCG-ATCCAGGTAT_R_220208_BINKAN1_PROPHECY_M002_R1.
-> 220208_FD02700641
SOMEID1_2_220101_SAMPLEID_Homo-sapiens_AACGAGGCCG-ATCCAGGTAT_R_220101_FIRLAS1_PROJECT_M002_R1.
-> 220101_SAMPLEID
"""
return '_'.join(filename.split('_')[2:4])

Expand Down
8 changes: 2 additions & 6 deletions scripts/sync_seqr.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,7 @@
import yaml
from cloudpathlib import AnyPath

from metamist.apis import (
AnalysisApi,
ProjectApi,
SeqrApi,
)
from metamist.apis import AnalysisApi, ProjectApi, SeqrApi
from metamist.graphql import query_async
from metamist.model.analysis_query_model import AnalysisQueryModel
from metamist.model.analysis_status import AnalysisStatus
Expand Down Expand Up @@ -59,7 +55,7 @@
),
}

SGS_TO_IGNORE = {'CPG227355', 'CPG227397'}
SGS_TO_IGNORE = {}
BASE, SEQR_AUDIENCE = ENVS[ENVIRONMENT]

url_individuals_sync = '/api/project/sa/{projectGuid}/individuals/sync'
Expand Down

0 comments on commit 6d1f9f1

Please sign in to comment.