Skip to content

Commit

Permalink
Merge branch 'dev' into graphql-mutations-v2
Browse files Browse the repository at this point in the history
  • Loading branch information
nevoodoo authored Nov 27, 2024
2 parents 098b3a2 + 9c5aca1 commit d4e3da9
Show file tree
Hide file tree
Showing 7 changed files with 244 additions and 15 deletions.
24 changes: 24 additions & 0 deletions db/project.xml
Original file line number Diff line number Diff line change
Expand Up @@ -1865,4 +1865,28 @@
remarks="Migration of family external IDs to separate table"
/>
</changeSet>
<changeSet author="dan.coates" id="2024-11-27_clean_up_project_table">
<sql>SET @@system_versioning_alter_history = 1;</sql>

<dropForeignKeyConstraint baseTableName="project"
constraintName="FK_PROJECT_READ_GROUP_GROUP_ID" />
<dropForeignKeyConstraint baseTableName="project"
constraintName="FK_PROJECT_WRITE_GROUP_GROUP_ID" />
<dropColumn
tableName="project"
columnName="read_group_id"
/>
<dropColumn
tableName="project"
columnName="write_group_id"
/>
<dropColumn
tableName="project"
columnName="read_group_name"
/>
<dropColumn
tableName="project"
columnName="write_group_name"
/>
</changeSet>
</databaseChangeLog>
10 changes: 5 additions & 5 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#
aiohappyeyeballs==2.4.0
# via aiohttp
aiohttp==3.10.5
aiohttp==3.10.*
# via -r requirements.in
aiomysql==0.2.0
# via databases
Expand Down Expand Up @@ -110,7 +110,7 @@ docker==7.1.0
# via testcontainers
email-validator==2.2.0
# via fastapi
fastapi[all]==0.112.2
fastapi[all]==0.115.5
# via
# -r requirements.in
# strawberry-graphql
Expand Down Expand Up @@ -413,7 +413,7 @@ sqlalchemy==2.0.34
# via
# databases
# testcontainers
starlette==0.38.4
starlette==0.41.2
# via
# fastapi
# strawberry-graphql
Expand Down Expand Up @@ -471,13 +471,13 @@ watchfiles==0.24.0
# via uvicorn
websockets==13.0.1
# via uvicorn
werkzeug==3.0.4
werkzeug==3.0.6
# via
# flask
# functions-framework
wrapt==1.16.0
# via
# deprecated
# testcontainers
yarl==1.9.10
yarl==1.12.*
# via aiohttp
2 changes: 1 addition & 1 deletion requirements.in
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ google-cloud-logging==2.7.0
google-cloud-pubsub==2.18.3
google-cloud-storage==1.43.0
uvicorn==0.29.0
fastapi[all]==0.112.2
fastapi[all]==0.115.5
strawberry-graphql[fastapi]==0.243.0
databases[mysql]==0.9.0
cryptography>=41.0.0
Expand Down
8 changes: 4 additions & 4 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#
aiohappyeyeballs==2.4.0
# via aiohttp
aiohttp==3.10.5
aiohttp==3.10.*
# via -r requirements.in
aiomysql==0.2.0
# via databases
Expand Down Expand Up @@ -79,7 +79,7 @@ dnspython==2.6.1
# via email-validator
email-validator==2.2.0
# via fastapi
fastapi[all]==0.112.2
fastapi[all]==0.115.5
# via
# -r requirements.in
# strawberry-graphql
Expand Down Expand Up @@ -298,7 +298,7 @@ sniffio==1.3.1
# httpx
sqlalchemy==2.0.34
# via databases
starlette==0.38.4
starlette==0.41.2
# via fastapi
strawberry-graphql[fastapi]==0.243.0
# via -r requirements.in
Expand Down Expand Up @@ -338,5 +338,5 @@ websockets==13.0.1
# via uvicorn
wrapt==1.16.0
# via deprecated
yarl==1.9.10
yarl==1.12.*
# via aiohttp
10 changes: 8 additions & 2 deletions scripts/back_populate_library_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@
# VCGS fastq
# This is the current name format
vcgs_fastq_regex = (
r'(?P<run_date>\d{6})_(?P<g2>[A-Z\d]+)_(?P<g3>\d{4})_'
r'(?P<g4>[A-Z]{2}\d+)_(?P<sample_id>[\w\d-]+)_(?P<library_id>[A-Z\d-]+)_'
r'(?P<run_date>\d{6,8})_(?P<g2>[A-Z\d]+)_(?P<g3>\d{4})_'
r'(?P<g4>[A-Z]{2}\d+)_(?P<sample_id>[\w\d-]+)_(?P<library_id>[A-Z\d-]+\D?)_'
r'(?P<library_type>[\w\d]+)_(?P<lane>L\d+)_(?P<read>R[12])\.fastq\.gz'
)
# Pre mid 2018 the library id was not included:
Expand Down Expand Up @@ -115,6 +115,12 @@ def check_assay_meta_fields(assays: list[dict], update_sequencing_groups: bool):
'design_description'
)

# Some fastq files are not named in the standard format, but contain the word 'TWIST'
# which we can use to label then with the generic 'TWIST' library type from VCGS
elif 'TWIST' in fastq_filename:
assay_meta_fields_to_update['facility'] = 'vcgs'
assay_meta_fields_to_update['library_type'] = 'TWIST'

else:
logging.warning(
f'No file name match found for assay {assay_id}. Skipping {fastq_filename}.'
Expand Down
188 changes: 188 additions & 0 deletions scripts/upsert_participants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
import csv
import logging
from collections import defaultdict
from io import StringIO
from typing import Dict, List

import click
from google.cloud import storage

from metamist.apis import ParticipantApi
from metamist.graphql import gql, query
from metamist.models import (
ParticipantUpsert,
SampleUpsert,
)

SAMPLE_QUERY = gql(
"""
query getSampleData($project: String!) {
project(name: $project) {
id
}
sample(project: {eq: $project}) {
id
active
externalId
sequencingGroups {
id
assays {
id
meta
}
analyses {
id
output
outputs
meta
}
}
}
}
"""
)

PARTICIPANT_QUERY = gql(
"""
query getParticipantData($project: String!) {
project(name: $project) {
participants {
id
externalId
}
}
}
"""
)

papi = ParticipantApi()


def map_sample_eid_to_sample_data(data: Dict[str, list]):
"""Map sample external ID to sample data"""
return {sample['externalId']: sample for sample in data['sample']}


def get_sample_data(project: str):
"""Get sample data from metamist"""
response = query(SAMPLE_QUERY, {'project': project})
if not isinstance(response, dict):
raise ValueError('Expected a dictionary response from query')
return response


def create_participant(sample_data: list, peid: str, sex: int, external_org_name: str):
"""Create participant upsert object"""
return ParticipantUpsert(
id=None,
external_ids={external_org_name: peid},
reported_sex=sex,
samples=sample_data,
)


def create_sample(project_id: int, seid: str, siid: str, external_org_name: str):
"""Create sample upsert object"""
return SampleUpsert(
id=siid,
external_ids={external_org_name: seid},
project=project_id,
)


def read_files_from_gcs(bucket_name: str, file_path: str):
"""Read files from GCS"""
client = storage.Client()
bucket = client.get_bucket(bucket_name)
blob = bucket.blob(file_path)
blob = blob.download_as_text(encoding='utf-8')
return StringIO(blob)


@click.command()
@click.option('--bucket', help='Bucket containing files. e.g. cpg-bucket-test-upload')
@click.option('--participant-meta', help='Path to participant metadata CSV.')
@click.option('--sample-meta', help='Path to sample metadata CSV.')
@click.option('--sample-external-id-column', help='Column name for sample external ID.')
@click.option('--external-org-name', required=False, help='External organization name.')
def main(
bucket: str,
participant_meta: str,
sample_meta: str,
sample_external_id_column: str,
external_org_name: str | None = None,
): # pylint: disable=too-many-locals
"""Upsert participants to metamist"""
# Query metamist
project = bucket.split('-')[1] # project name derived from bucket name
data_response = get_sample_data(project)
project_id = data_response.get('project')['id']

# allow empty external_org_name for backwards compatibility
if external_org_name is None:
external_org_name = ''

sample_eid_mapping = map_sample_eid_to_sample_data(data_response)

# Set up logging
logging.basicConfig(level=logging.INFO)

# Read in csv
participant_data_io = read_files_from_gcs(bucket, participant_meta)
sample_data_io = read_files_from_gcs(bucket, sample_meta)

participant_data_list = list(csv.DictReader(participant_data_io))
sample_data_list = list(csv.DictReader(sample_data_io, delimiter='\t'))

# Count of samples for each participant
participant_sample_count: Dict[str, int] = defaultdict(int)
metadata_map = {}
for meta_row in participant_data_list:
metadata_map[meta_row['sampleID']] = meta_row
peid = meta_row['externalID']
participant_sample_count[peid] += 1

# Map peid to a list of sample upsert objects
participant_sex_map = {}
participant_sample_map: Dict[str, List[SampleUpsert]] = defaultdict(list)

for name_row in sample_data_list:
# Get the external ID
seid = '.'.join(name_row['CRAM'].split('.')[:2])

meta_row = metadata_map.get(name_row[sample_external_id_column])
# Get participant external ID
peid = meta_row['externalID']

# Code sex
sex = 2 if meta_row['isFemale'].upper() == 'TRUE' else 1

# Get the sample internal ID
if seid not in sample_eid_mapping:
print(f'Sample {seid} not found in metamist.')
continue
siid = sample_eid_mapping[seid]['id']

if peid not in participant_sample_map:
participant_sample_map[peid] = []
participant_sex_map[peid] = sex
participant_sample_map[peid].append(
create_sample(project_id, seid, siid, external_org_name)
)

participant_upserts = []
for peid, sample_upserts in participant_sample_map.items():
participant_upserts.append(
create_participant(
sample_upserts, peid, participant_sex_map[peid], external_org_name
)
)

# Upsert participants
api_response = papi.upsert_participants(project, participant_upserts)
print(api_response)


if __name__ == '__main__':
# pylint: disable=no-value-for-parameter
main()
17 changes: 14 additions & 3 deletions web/src/shared/components/Header/NavBar.tsx
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import * as React from 'react'
import { Link } from 'react-router-dom'
import { Link, LinkProps } from 'react-router-dom'
import { Dropdown, Menu, Popup } from 'semantic-ui-react'

import { BillingApi } from '../../../sm-api'
Expand Down Expand Up @@ -55,24 +55,34 @@ interface MenuItem {
url: string
icon: JSX.Element
submenu?: MenuItem[]
reloadDocument?: boolean
}
interface MenuItemProps {
index: number
item: MenuItem
}

// A link component that reloads the document when clicked
// This is used for the graphiql link which doesn't render
// properly without a reload
const ReloadingLink = (props: LinkProps) => {
return <Link reloadDocument={true} {...props} />
}

const MenuItem: React.FC<MenuItemProps> = ({ index, item }) => {
const theme = React.useContext(ThemeContext)
const isDarkMode = theme.theme === 'dark-mode'

const MenuLinkComponent = item.reloadDocument ? ReloadingLink : Link

const dropdown = (item: MenuItem) => (
<Dropdown id="navDrop" text={item.title} key={index} inverted={isDarkMode}>
<Dropdown.Menu id="navDropMenu">
{item.submenu &&
item.submenu.map((subitem, subindex) => (
<Dropdown.Item
id="navDropMenuItem"
as={Link}
as={MenuLinkComponent}
to={subitem.url}
key={subindex}
>
Expand Down Expand Up @@ -105,7 +115,7 @@ const MenuItem: React.FC<MenuItemProps> = ({ index, item }) => {
return item.submenu ? (
<Menu.Item className="headerMenuItem">{popup(dropdown(item), item.icon)}</Menu.Item>
) : (
<Menu.Item className="headerMenuItem" as={Link} to={item.url} key={index}>
<Menu.Item className="headerMenuItem" as={MenuLinkComponent} to={item.url} key={index}>
{popup(item.title, item.icon)}
</Menu.Item>
)
Expand Down Expand Up @@ -141,6 +151,7 @@ const NavBar = () => {
{
title: 'GraphQL',
url: '/graphql',
reloadDocument: true,
icon: <TroubleshootIcon />,
},
InsightsPages,
Expand Down

0 comments on commit d4e3da9

Please sign in to comment.