Skip to content

Commit

Permalink
Merge pull request #40 from gwaygenomics/update-docs
Browse files Browse the repository at this point in the history
Update docs for release
  • Loading branch information
gwaybio authored Feb 16, 2021
2 parents 3b2a44a + 6d40210 commit f7f5b29
Show file tree
Hide file tree
Showing 11 changed files with 400 additions and 117 deletions.
3 changes: 2 additions & 1 deletion LICENSE.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# BSD 3-Clause License

Copyright (c) 2020, Broad Institute of MIT and Harvard
## Copyright (c) 2021, Broad Institute of MIT and Harvard

All rights reserved.

Redistribution and use in source and binary forms, with or without
Expand Down
4 changes: 4 additions & 0 deletions cytominer_eval/__about__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
__project__ = "cytominer_eval"
__author__ = "Gregory Way"
__version__ = "0.1"
__license__ = "BSD 3-Clause License"
20 changes: 12 additions & 8 deletions cytominer_eval/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,13 +53,17 @@ def evaluate(
An important variable indicating which metadata columns denote replicate
information. All metric operations require replicate profiles.
`replicate_groups` indicates a str or list of columns to use. For
`operation="grit"`, `replicate_groups` is a dict with two keys: "replicate_id"
and "group_id". "replicate_id" is the column name that stores the unique
identifier for each profile, while "group_id" is the column name indicating
how replicates are defined. See also :py:func:`cytominer_eval.operations.grit`
and :py:func:`cytominer_eval.transform.util.check_replicate_groups`
`operation="grit"`, `replicate_groups` is a dict with two keys: "profile_col"
and "replicate_group_col". "profile_col" is the column name that stores
identifiers for each profile (can be unique), while "replicate_group_col" is the
column name indicating a higher order replicate information. E.g.
"replicate_group_col" can be a gene column in a CRISPR experiment with multiple
guides targeting the same genes. See also
:py:func:`cytominer_eval.operations.grit` and
:py:func:`cytominer_eval.transform.util.check_replicate_groups`.
operation : {'replicate_reproducibility', 'precision_recall', 'grit', 'mp_value'}, optional
The specific evaluation metric to calculate. The default is "replicate_reproducibility".
The specific evaluation metric to calculate. The default is
"replicate_reproducibility".
similarity_metric: {'pearson', 'spearman', 'kendall'}, optional
How to calculate pairwise similarity. Defaults to "pearson". We use the input
in pandas.DataFrame.cor(). The default is "pearson".
Expand Down Expand Up @@ -127,8 +131,8 @@ def evaluate(
metric_result = grit(
similarity_melted_df=similarity_melted_df,
control_perts=grit_control_perts,
replicate_id=replicate_groups["replicate_id"],
group_id=replicate_groups["group_id"],
profile_col=replicate_groups["profile_col"],
replicate_group_col=replicate_groups["replicate_group_col"],
replicate_summary_method=grit_replicate_summary_method,
)
elif operation == "mp_value":
Expand Down
29 changes: 17 additions & 12 deletions cytominer_eval/operations/grit.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Grit describes phenotype strength of replicate profiles along two distinct axes:
- Similarity to other perturbations that target the same larger group (e.g. gene, MOA)
- Similarity to other perturbations that target the same larger group (e.g. gene, MOA),
with respect to:
- Similarity to control perturbations
"""
import numpy as np
Expand All @@ -18,8 +19,8 @@
def grit(
similarity_melted_df: pd.DataFrame,
control_perts: List[str],
replicate_id: str,
group_id: str,
profile_col: str,
replicate_group_col: str,
replicate_summary_method: str = "mean",
) -> pd.DataFrame:
r"""Calculate grit
Expand All @@ -30,10 +31,12 @@ def grit(
a long pandas dataframe output from cytominer_eval.transform.metric_melt
control_perts : list
a list of control perturbations to calculate a null distribution
replicate_id : str
the metadata identifier marking which column tracks unique identifiers
group_id : str
the metadata identifier marking which column defines how replicates are grouped
profile_col : str
the metadata column storing profile ids. The column can have unique or replicate
identifiers.
replicate_group_col : str
the metadata column indicating a higher order structure (group) than the
profile column. E.g. target gene vs. guide in a CRISPR experiment.
replicate_summary_method : {'mean', 'median'}, optional
how replicate z-scores to control perts are summarized. Defaults to "mean".
Expand All @@ -48,24 +51,26 @@ def grit(
# Determine pairwise replicates
similarity_melted_df = assign_replicates(
similarity_melted_df=similarity_melted_df,
replicate_groups=[replicate_id, group_id],
replicate_groups=[profile_col, replicate_group_col],
)

# Check to make sure that the melted dataframe is full
assert_melt(similarity_melted_df, eval_metric="grit")

# Extract out specific columns
pair_ids = set_pair_ids()
replicate_col_name = "{x}{suf}".format(
x=replicate_id, suf=pair_ids[list(pair_ids)[0]]["suffix"]
profile_col_name = "{x}{suf}".format(
x=profile_col, suf=pair_ids[list(pair_ids)[0]]["suffix"]
)

# Define the columns to use in the calculation
column_id_info = set_grit_column_info(replicate_id=replicate_id, group_id=group_id)
column_id_info = set_grit_column_info(
profile_col=profile_col, replicate_group_col=replicate_group_col
)

# Calculate grit for each perturbation
grit_df = (
similarity_melted_df.groupby(replicate_col_name)
similarity_melted_df.groupby(profile_col_name)
.apply(
lambda x: calculate_grit(
replicate_group_df=x,
Expand Down
24 changes: 16 additions & 8 deletions cytominer_eval/operations/precision_recall.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,22 @@ def precision_recall(
"""Determine the precision and recall at k for all unique replicate groups
based on a predefined similarity metric (see cytominer_eval.transform.metric_melt)
Arguments:
similarity_melted_df - a long pandas dataframe output from transform.metric_melt
replicate_groups - a list of metadata column names in the original profile dataframe
to use as replicate columns
k - an integer indicating how many pairwise comparisons to threshold
Output:
pandas DataFrame of precision and recall metrics for all replicate groups
Parameters
----------
similarity_melted_df : pandas.DataFrame
An elongated symmetrical matrix indicating pairwise correlations between
samples. Importantly, it must follow the exact structure as output from
:py:func:`cytominer_eval.transform.transform.metric_melt`.
replicate_groups : List
a list of metadata column names in the original profile dataframe to use as
replicate columns.
k : int
an integer indicating how many pairwise comparisons to threshold.
Returns
-------
pandas.DataFrame
precision and recall metrics for all replicate groups given k
"""
# Determine pairwise replicates and make sure to sort based on the metric!
similarity_melted_df = assign_replicates(
Expand Down
Loading

0 comments on commit f7f5b29

Please sign in to comment.